732 files changed, 16107 insertions, 2350 deletions
diff --git a/test/Analysis/BasicAA/aligned-overread.ll b/test/Analysis/BasicAA/aligned-overread.ll
new file mode 100644
index 0000000..b05f8eb
--- /dev/null
+++ b/test/Analysis/BasicAA/aligned-overread.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.S0 = type <{ i8, [4 x i8] }>
+
+@a = global { i8, i8, i8, i8, i8 } { i8 undef, i8 0, i8 0, i8 0, i8 0 }, align 8
+
+define i32 @main() nounwind uwtable ssp {
+entry:
+  %tmp = load i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp1 = or i8 %tmp, -128
+  store i8 %tmp1, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp2 = load i64* bitcast ({ i8, i8, i8, i8, i8 }* @a to i64*), align 8
+  store i8 11, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp3 = trunc i64 %tmp2 to i32
+  ret i32 %tmp3
+
+; Make sure we don't delete either store here
+; CHECK: @main
+; CHECK: store i8 %tmp1
+; CHECK: store i8 11
+}
+
diff --git a/test/Analysis/BasicAA/dg.exp b/test/Analysis/BasicAA/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/BasicAA/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/BasicAA/lit.local.cfg b/test/Analysis/BasicAA/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/BasicAA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BlockFrequencyInfo/dg.exp b/test/Analysis/BlockFrequencyInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/BlockFrequencyInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/BlockFrequencyInfo/lit.local.cfg b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/dg.exp b/test/Analysis/BranchProbabilityInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/BranchProbabilityInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/BranchProbabilityInfo/lit.local.cfg b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/noreturn.ll b/test/Analysis/BranchProbabilityInfo/noreturn.ll
index c53c1ed..8b9ae11 100644
--- a/test/Analysis/BranchProbabilityInfo/noreturn.ll
+++ b/test/Analysis/BranchProbabilityInfo/noreturn.ll
@@ -8,8 +8,8 @@ define i32 @test1(i32 %a, i32 %b) {
 entry:
   %cond = icmp eq i32 %a, 42
   br i1 %cond, label %exit, label %abort
-; CHECK: edge entry -> exit probability is 1023 / 1024
-; CHECK: edge entry -> abort probability is 1 / 1024
+; CHECK: edge entry -> exit probability is 1048575 / 1048576
+; CHECK: edge entry -> abort probability is 1 / 1048576
 
 abort:
   call void @abort() noreturn
@@ -26,11 +26,11 @@ entry:
                               i32 2, label %case_b
                               i32 3, label %case_c
                               i32 4, label %case_d]
-; CHECK: edge entry -> exit probability is 1023 / 1027
-; CHECK: edge entry -> case_a probability is 1 / 1027
-; CHECK: edge entry -> case_b probability is 1 / 1027
-; CHECK: edge entry -> case_c probability is 1 / 1027
-; CHECK: edge entry -> case_d probability is 1 / 1027
+; CHECK: edge entry -> exit probability is 1048575 / 1048579
+; CHECK: edge entry -> case_a probability is 1 / 1048579
+; CHECK: edge entry -> case_b probability is 1 / 1048579
+; CHECK: edge entry -> case_c probability is 1 / 1048579
+; CHECK: edge entry -> case_d probability is 1 / 1048579
 
 case_a:
   br label %case_b
@@ -55,8 +55,8 @@ define i32 @test3(i32 %a, i32 %b) {
 entry:
   %cond1 = icmp eq i32 %a, 42
   br i1 %cond1, label %exit, label %dom
-; CHECK: edge entry -> exit probability is 1023 / 1024
-; CHECK: edge entry -> dom probability is 1 / 1024
+; CHECK: edge entry -> exit probability is 1048575 / 1048576
+; CHECK: edge entry -> dom probability is 1 / 1048576
 
 dom:
   %cond2 = icmp ult i32 %a, 42
diff --git a/test/Analysis/CallGraph/dg.exp b/test/Analysis/CallGraph/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/CallGraph/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/CallGraph/lit.local.cfg b/test/Analysis/CallGraph/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/CallGraph/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Dominators/dg.exp b/test/Analysis/Dominators/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/Dominators/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/Dominators/invoke.ll b/test/Analysis/Dominators/invoke.ll
new file mode 100644
index 0000000..f935750
--- /dev/null
+++ b/test/Analysis/Dominators/invoke.ll
@@ -0,0 +1,19 @@
+; RUN: opt -verify -disable-output %s
+; This tests that we handle unreachable blocks correctly
+
+define void @f() {
+  %v1 = invoke i32* @g()
+          to label %bb1 unwind label %bb2
+  invoke void @__dynamic_cast()
+          to label %bb1 unwind label %bb2
+bb1:
+  %Hidden = getelementptr inbounds i32* %v1, i64 1
+  ret void
+bb2:
+  %lpad.loopexit80 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  ret void
+}
+declare i32 @__gxx_personality_v0(...)
+declare void @__dynamic_cast()
+declare i32* @g()
diff --git a/test/Analysis/Dominators/lit.local.cfg b/test/Analysis/Dominators/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/Dominators/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/GlobalsModRef/dg.exp b/test/Analysis/GlobalsModRef/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/GlobalsModRef/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/GlobalsModRef/lit.local.cfg b/test/Analysis/GlobalsModRef/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/LoopDependenceAnalysis/dg.exp b/test/Analysis/LoopDependenceAnalysis/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/LoopDependenceAnalysis/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/LoopDependenceAnalysis/lit.local.cfg b/test/Analysis/LoopDependenceAnalysis/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/LoopInfo/dg.exp b/test/Analysis/LoopInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/LoopInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/LoopInfo/lit.local.cfg b/test/Analysis/LoopInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/LoopInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/PostDominators/dg.exp b/test/Analysis/PostDominators/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/PostDominators/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/PostDominators/lit.local.cfg b/test/Analysis/PostDominators/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/PostDominators/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Profiling/dg.exp b/test/Analysis/Profiling/dg.exp
deleted file mode 100644
index 1eb4755..0000000
--- a/test/Analysis/Profiling/dg.exp
+++ /dev/null
@@ -1,4 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/Profiling/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/RegionInfo/dg.exp b/test/Analysis/RegionInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/RegionInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/RegionInfo/lit.local.cfg b/test/Analysis/RegionInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/RegionInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/ScalarEvolution/dg.exp b/test/Analysis/ScalarEvolution/dg.exp
deleted file mode 100644
index b65a250..0000000
--- a/test/Analysis/ScalarEvolution/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]] 
diff --git a/test/Analysis/ScalarEvolution/lit.local.cfg b/test/Analysis/ScalarEvolution/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dg.exp b/test/Analysis/TypeBasedAliasAnalysis/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Analysis/TypeBasedAliasAnalysis/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 8fb5fff..1ac5927 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -24,7 +24,7 @@ define void @test0_no(i32* %p) nounwind {
 ; Add the readonly attribute, since there's just a call to a function which 
 ; TBAA says doesn't modify any memory.
 
-; CHECK: define void @test1_yes(i32* %p) nounwind readonly {
+; CHECK: define void @test1_yes(i32* nocapture %p) nounwind readonly {
 define void @test1_yes(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !1
   ret void
diff --git a/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Archive/dg.exp b/test/Archive/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Archive/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Archive/lit.local.cfg b/test/Archive/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Archive/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Assembler/dg.exp b/test/Assembler/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Assembler/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Assembler/lit.local.cfg b/test/Assembler/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Assembler/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Bindings/Ocaml/dg.exp b/test/Bindings/Ocaml/dg.exp
deleted file mode 100644
index fb4bd07..0000000
--- a/test/Bindings/Ocaml/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if [ llvm_supports_binding ocaml ] then {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,ml}]]
-}
diff --git a/test/Bindings/Ocaml/lit.local.cfg b/test/Bindings/Ocaml/lit.local.cfg
new file mode 100644
index 0000000..127c3d5
--- /dev/null
+++ b/test/Bindings/Ocaml/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.ml']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+bindings = set([s.strip() for s in root.llvm_bindings.split(',')])
+if not 'ocaml' in bindings:
+    config.unsupported = True
+
diff --git a/test/Bitcode/dg.exp b/test/Bitcode/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Bitcode/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Bitcode/lit.local.cfg b/test/Bitcode/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Bitcode/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/BugPoint/dg.exp b/test/BugPoint/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/BugPoint/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/BugPoint/lit.local.cfg b/test/BugPoint/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/BugPoint/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index cbad83c..8cebb7c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -68,7 +68,6 @@ add_custom_target(check
               ${LIT_ARGS}
               ${CMAKE_CURRENT_BINARY_DIR}
               COMMENT "Running LLVM regression tests")
-set_target_properties(check PROPERTIES FOLDER "Tests")
 
 add_custom_target(check.deps)
 add_dependencies(check check.deps)
@@ -76,6 +75,7 @@ add_dependencies(check.deps
               UnitTests
               BugpointPasses LLVMHello
               llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump
-              llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump macho-dump opt
-              FileCheck count not)
+              llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
+              macho-dump opt
+              FileCheck count not json-bench)
 set_target_properties(check.deps PROPERTIES FOLDER "Tests")
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaa..0bfe331 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca..0ae7f84 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r2, [r1]
+; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index a65cf4b..e0f50c9 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=basic
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600..1aee508 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
 
 ; CHECK: vld1.64 {d16, d17}, [r{{.}}]
 ; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18ce..da4d157 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
 ; rdar://8690640
 
 define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952b..23e1aa1 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
 ; rdar://8728956
 
 define hidden void @foo() nounwind ssp {
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281..2faa04a 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe..3e78c46 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee..216057a 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; Test that ldmia_ret preserves implicit operands for return values.
 ;
 ; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 86e8712..6fbae19 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
 
 ; CHECK:      movw    r1, :lower16:{{.*}}
 ; CHECK:      movt    r1, :upper16:{{.*}}
-; CHECK:      vldmia  r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]}
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short3]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short2]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short1]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short0]]
+; CHECK:      vldmia  r1
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vstmia  {{.*}}
 
 L.entry:
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 0000000..ddb7632
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH.  In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  %tmp2 = alloca i8*, align 4
+  %tmp3 = alloca i1
+  %myException = alloca %0*, align 4
+  %tmp4 = alloca i8*
+  %tmp5 = alloca i32
+  %exception = alloca %0*, align 4
+  store i32 %a, i32* %tmp, align 4
+  store i32 %b, i32* %tmp1, align 4
+  store i8* %d, i8** %tmp2, align 4
+  store i1 false, i1* %tmp3
+  %tmp7 = load i8** %c
+  %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+          to label %bb11 unwind label %bb15
+
+bb11:                                             ; preds = %bb
+  store %0* %tmp10, %0** %myException, align 4
+  %tmp12 = load %0** %myException, align 4
+  %tmp13 = bitcast %0* %tmp12 to i8*
+  invoke void @objc_exception_throw(i8* %tmp13) noreturn
+          to label %bb14 unwind label %bb15
+
+bb14:                                             ; preds = %bb11
+  unreachable
+
+bb15:                                             ; preds = %bb11, %bb
+  %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+  store i8* %tmp17, i8** %tmp4
+  %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+  store i32 %tmp18, i32* %tmp5
+  store i1 true, i1* %tmp3
+  br label %bb56
+
+bb56:
+  unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 0000000..926daaf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb4, %bb2
+  %tmp = icmp slt i32 undef, undef
+  br i1 %tmp, label %bb4, label %bb67
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+  %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> undef, %tmp10
+  %tmp12 = bitcast <4 x float> zeroinitializer to i128
+  %tmp13 = lshr i128 %tmp12, 64
+  %tmp14 = trunc i128 %tmp13 to i64
+  %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+  %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+  %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+  %tmp18 = fmul <4 x float> %tmp17, %tmp16
+  %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+  %tmp20 = fmul <4 x float> %tmp19, %tmp18
+  %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+  %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+  call arm_aapcs_vfpcc  void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+  %tmp23 = bitcast <4 x float> %tmp22 to i128
+  %tmp24 = trunc i128 %tmp23 to i64
+  %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+  %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+  %tmp35 = fmul <4 x float> %tmp34, undef
+  %tmp36 = fmul <4 x float> %tmp35, undef
+  %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+  %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+  %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp44 = fmul <4 x float> %tmp33, %tmp43
+  %tmp45 = fadd <4 x float> %tmp42, %tmp44
+  %tmp46 = fsub <4 x float> %tmp45, undef
+  %tmp47 = fmul <4 x float> %tmp46, %tmp36
+  %tmp48 = fadd <4 x float> undef, %tmp47
+  %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+  %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+  %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+  %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+  %tmp58 = fmul <4 x float> undef, %tmp57
+  %tmp59 = fsub <4 x float> %tmp51, %tmp48
+  %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+  %tmp61 = fmul <4 x float> %tmp59, %tmp60
+  %tmp62 = fadd <4 x float> %tmp48, %tmp61
+  call arm_aapcs_vfpcc  void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+  %tmp63 = bitcast <4 x float> %tmp62 to i128
+  %tmp64 = lshr i128 %tmp63, 64
+  %tmp65 = trunc i128 %tmp64 to i64
+  %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+  call arm_aapcs_vfpcc  void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+  br label %bb3
+
+bb67:                                             ; preds = %bb3
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 0000000..872eca3
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp2 = extractelement <2 x float> %tmp, i32 0
+  %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+  %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  %tmp7 = extractelement <2 x float> %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+  %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+  %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+  %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+  %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+  %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+  %tmp18 = extractelement <2 x float> %tmp17, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+  %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+  %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+  %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+  %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+  %tmp26 = extractelement <2 x float> %tmp25, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+  ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+  br i1 undef, label %for.end, label %cond.end295
+
+cond.end295:                                      ; preds = %entry
+  %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+  %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+  %1 = bitcast <4 x float> undef to <2 x i64>
+  %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 0000000..ec5b2e9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+  %3 = bitcast <4 x float> %2 to <2 x i64>
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+  %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+  %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+  %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+  %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+  %9 = bitcast <2 x float> %7 to <1 x i64>
+  %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+  %11 = bitcast <2 x i64> %10 to <4 x float>
+  br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 0000000..5f24e42
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br i1 undef, label %bb92, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = or <4 x i32> undef, undef
+  %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+  %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+  %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+  %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+  %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+  %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+  %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+  %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+  %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+  %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+  %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+  %tmp22 = or <2 x i32> %tmp19, %tmp21
+  %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+  %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+  %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+  %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+  %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+  %tmp30 = or <2 x i32> %tmp28, %tmp29
+  %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+  %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+  %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp34 = fadd <4 x float> %tmp33, %tmp32
+  %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+  %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+  %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+  %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp43 = fmul <4 x float> %tmp42, %tmp41
+  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp45 = fadd <4 x float> undef, %tmp43
+  %tmp46 = fadd <4 x float> undef, %tmp45
+  %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+  %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+  %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp51 = fmul <4 x float> %tmp42, %tmp50
+  %tmp52 = fmul <4 x float> %tmp44, undef
+  %tmp53 = fadd <4 x float> %tmp52, %tmp51
+  %tmp54 = fadd <4 x float> undef, %tmp53
+  %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+  %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+  %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp59 = fmul <4 x float> undef, %tmp58
+  %tmp60 = fadd <4 x float> %tmp59, undef
+  %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+  %tmp62 = load void (i8*, i8*)** undef, align 4
+  call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
+  %tmp63 = bitcast <4 x float> %tmp46 to i128
+  %tmp64 = bitcast <4 x float> %tmp54 to i128
+  %tmp65 = bitcast <4 x float> %tmp61 to i128
+  %tmp66 = lshr i128 %tmp63, 64
+  %tmp67 = trunc i128 %tmp66 to i64
+  %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+  %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+  %tmp70 = lshr i128 %tmp64, 64
+  %tmp71 = trunc i128 %tmp70 to i64
+  %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+  %tmp73 = trunc i128 %tmp65 to i64
+  %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+  %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+  %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+  %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+  call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+  %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
+  %tmp79 = bitcast i8* %tmp78 to i512*
+  %tmp80 = load i512* %tmp79, align 16
+  %tmp81 = lshr i512 %tmp80, 128
+  %tmp82 = trunc i512 %tmp80 to i128
+  %tmp83 = trunc i512 %tmp81 to i128
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>
+  %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+  %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+  %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp89 = fmul <4 x float> undef, %tmp88
+  %tmp90 = fadd <4 x float> %tmp89, undef
+  %tmp91 = fadd <4 x float> undef, %tmp90
+  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+bb92:                                             ; preds = %bb2
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 0000000..6c7aaad
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = insertelement <4 x float> undef, float %5, i32 0
+  %9 = insertelement <4 x float> %8, float %6, i32 1
+  %10 = insertelement <4 x float> %9, float %7, i32 2
+  %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+  store <4 x float> %11, <4 x float>* undef, align 16 
+  call arm_aapcs_vfpcc  void @baz(%1* undef, float 0.000000e+00) nounwind
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad..1272e8e 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a1..8967730 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -61,7 +61,7 @@ entry:
   ; CHECK: strex
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
-	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
@@ -77,5 +77,85 @@ entry:
   ; CHECK: strex
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
-	ret void
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+	store i32 %11, i32* %old
+	%uneg = sub i32 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+	store i32 %12, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+	store i32 %14, i32* %old
+
+  ret void
+}
+
+define void @func2() nounwind {
+entry:
+  %val = alloca i16
+  %old = alloca i16
+  store i16 31, i16* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i16* %val, i16 16 monotonic
+  store i16 %0, i16* %old
+  %uneg = sub i16 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+  store i16 %1, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i16* %val, i16 1 monotonic
+  store i16 %2, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i16* %val, i16 0 monotonic
+  store i16 %3, i16* %old
+  ret void
+}
+
+define void @func3() nounwind {
+entry:
+  %val = alloca i8
+  %old = alloca i8
+  store i8 31, i8* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i8* %val, i8 16 monotonic
+  store i8 %0, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %uneg = sub i8 0, 1
+  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+  store i8 %1, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i8* %val, i8 1 monotonic
+  store i8 %2, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i8* %val, i8 0 monotonic
+  store i8 %3, i8* %old
+  ret void
 }
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 877ec18..1b385ab 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -6,9 +6,9 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
 ; CHECK: t1:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
   %2 = mul nsw i32 %0, %1
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d998..be3e105 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 ; Enable tailcall optimization for iOS 5.0
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659..487ec69 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 0000000..eff5de5
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+  br i1 undef, label %while.end42, label %while.body37
+
+while.body37:                                     ; preds = %while.body37, %entry
+  br i1 false, label %while.end42, label %while.body37
+
+while.end42:                                      ; preds = %while.body37, %entry
+  %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+  %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+  tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+  %call47 = tail call i32 @strlen(i8* %.) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9dd..1d011be 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i:		; preds = %bb28.i
 	br i1 false, label %bb.nph53.i, label %bb508.i
 
 bb508.i:		; preds = %bb502.loopexit.i, %entry
-	ret i32 1
+	ret double %tmp10.i4
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
index 7f00eb3..6419292 100644
--- a/test/CodeGen/ARM/ctor_order.ll
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -6,13 +6,15 @@
 ; DARWIN:      .long _f151
 ; DARWIN-NEXT: .long _f152
 
-; ELF:      .section .ctors,"aw",%progbits
+; ELF:      .section .ctors.65384,"aw",%progbits
+; ELF:      .long    f151
+; ELF:      .section .ctors.65383,"aw",%progbits
 ; ELF:      .long    f152
-; ELF-NEXT: .long    f151
 
-; GNUEABI:      .section .init_array,"aw",%init_array
+; GNUEABI:      .section .init_array.151,"aw",%init_array
 ; GNUEABI:      .long    f151
-; GNUEABI-NEXT: .long    f152
+; GNUEABI:      .section .init_array.152,"aw",%init_array
+; GNUEABI:      .long    f152
 
 
 @llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9..a7b44e6 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
 ; Test to check argument y's debug info uses FI
 ; Radar 10048772
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 00e6cb0..0ad0a15 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -2,7 +2,7 @@
 ; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %0 = type opaque
 %1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 3972e68..ae7af0a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp0
 ;CHECK-NEXT:        .long   Ltmp1
-;CHECK-NEXT:        .long   Ltmp2
-;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp10-Ltmp9        @ Loc expr size
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
 ;CHECK-NEXT:        .short  Lset[[N]]
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Ltmp[[M]]:
 ;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
 
 define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 0000000..fd7d0e6
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+  ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+  call void @_Z1fv()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 0000000..723383e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+  %a.addr = alloca i1, align 4
+  %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+  %a.addr = alloca i1, align 4
+  %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+  %a.addr = alloca i1, align 4
+  %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index b7acfaa..625adc2 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 0000000..a0aba69
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8:                                       ; preds = %cond.end
+  br label %cond.end8
+
+cond.false8:                                      ; preds = %cond.end
+  br label %cond.end8
+
+cond.end8:                                        ; preds = %cond.false8, %cond.true8
+  %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+  call void @fooi8(i8 %cond8)
+  br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16:                                       ; preds = %cond.end8
+  br label %cond.end16
+
+cond.false16:                                      ; preds = %cond.end8
+  br label %cond.end16
+
+cond.end16:                                        ; preds = %cond.false16, %cond.true16
+  %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+  call void @fooi16(i16 %cond16)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 695dbba..dd460b2 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 33c6008..1693066 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1a(float %a) uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 14666a8..686ccad 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test sitofp
 
@@ -94,3 +94,149 @@ entry:
   store double %conv, double* %b.addr, align 8
   ret void
 }
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 028d940..7e147c7 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
@@ -15,8 +15,7 @@ entry:
 ; THUMB-NOT: sxtb
 ; THUMB: movs r0, #0
 ; THUMB: movt r0, #0
-; THUMB: add sp, #32
-; THUMb: pop {r7, pc}
+; THUMB: pop
   ret i32 0
 }
 
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index deffe7b..8764bef 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..be8035e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 3ef8bce..e6bdfa7 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
 @temp = common global [60 x i8] zeroinitializer, align 1
 
 define void @t1() nounwind ssp {
 ; ARM: t1
-; ARM: ldr r0, LCPI0_0
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
 ; ARM: add r0, r0, #5
 ; ARM: movw r1, #64
 ; ARM: movw r2, #10
 ; ARM: uxtb r1, r1
 ; ARM: bl _memset
 ; THUMB: t1
-; THUMB: ldr.n r0, LCPI0_0
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
 ; THUMB: adds r0, #5
 ; THUMB: movs r1, #64
 ; THUMB: movt r1, #0
@@ -29,7 +31,8 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @t2() nounwind ssp {
 ; ARM: t2
-; ARM: ldr r0, LCPI1_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
@@ -39,7 +42,8 @@ define void @t2() nounwind ssp {
 ; ARM: ldr r1, [sp]                @ 4-byte Reload
 ; ARM: bl _memcpy
 ; THUMB: t2
-; THUMB: ldr.n r0, LCPI1_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
@@ -55,7 +59,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t3() nounwind ssp {
 ; ARM: t3
-; ARM: ldr r0, LCPI2_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
@@ -63,7 +68,8 @@ define void @t3() nounwind ssp {
 ; ARM: mov r0, r1
 ; ARM: bl _memmove
 ; THUMB: t3
-; THUMB: ldr.n r0, LCPI2_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
@@ -77,26 +83,24 @@ define void @t3() nounwind ssp {
 
 define void @t4() nounwind ssp {
 ; ARM: t4
-; ARM: ldr r0, LCPI3_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: ldr r1, LCPI3_1
-; ARM: ldr r1, [r1]
-; ARM: ldr r2, [r1, #16]
-; ARM: str r2, [r0, #4]
-; ARM: ldr r2, [r1, #20]
-; ARM: str r2, [r0, #8]
-; ARM: ldrh r1, [r1, #24]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
-; THUMB: ldr.n r0, LCPI3_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr.n r1, LCPI3_1
-; THUMB: ldr r1, [r1]
-; THUMB: ldr r2, [r1, #16]
-; THUMB: str r2, [r0, #4]
-; THUMB: ldr r2, [r1, #20]
-; THUMB: str r2, [r0, #8]
-; THUMB: ldrh r1, [r1, #24]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
 ; THUMB: strh r1, [r0, #12]
 ; THUMB: bx lr
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index 0b8a768..2a88678 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index dcfc9d0..e8cc2b2 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; rdar://10418009
 
 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index daf56e7..b180e43 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; rdar://10412592
 
 ; Note: The Thumb code is being generated by the target-independent selector.
@@ -104,4 +104,4 @@ entry:
 ; THUMB: movt r0, #33023
   call void @foo(i32 -2130706433)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 4203537..e50c3a4 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=basic < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index f7f4521..689b169 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
 
 ; Sign-extend of i1 currently not supported by fast-isel
 ;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
@@ -46,3 +46,12 @@ entry:
 ; CHECK: bx lr
   ret i16 %a
 }
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+  ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index 9ac63d6..b83a733 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i1 %c) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 648d711..905543a 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,21 +142,19 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
 
-; ARM: ldr r0, LCPI4_1
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
 }
 
 ; Check unaligned stores
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9..87115cc 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
@@ -60,8 +60,8 @@ entry:
 define float @test5() nounwind {
 entry:
 ; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
   %0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index ad03202..80925c7 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
 ; rdar://7461510
+; rdar://10964603
 
+; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr s0,
-; NAN: vldr s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
   ret i32 %4
 }
 
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 0000000..40e8bb2
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fadd double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fadd float %1, %f3
+  ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+  %1 = fmul double %d2, %d3
+  %2 = fsub double %d1, %1
+  ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+  %1 = fmul float %f2, %f3
+  %2 = fsub float %f1, %1
+  ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double -0.0, %1
+  %3 = fsub double %2, %d3
+  ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float -0.0, %1
+  %3 = fsub float %2, %f3
+  ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float %1, %f3
+  ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae0..893b426 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05..cd870bb 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+; CHECK: subeq r0, r1, #1
 	%tmp5 = add i32 %b, 1
 	ret i32 %tmp5
 
 cond_false:
+; CHECK: addne r0, r1, #1
 	%tmp7 = add i32 %b, -1
 	ret i32 %tmp7
 }
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bf..a5082d8 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578..eef4de0 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
 	]
 
 cond_true:
+; CHECK: addne r0
+; CHECK: bxne
 	%tmp12 = add i32 %a, 1
 	%tmp1518 = add i32 %tmp12, %b
 	ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055..95f5c97 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 2327657..a00deda 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d32322..d188fae 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index d72e9bf..a588bc3 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
 ; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
 
 ; Magic ARM pair hints works best with linearscan / fast.
 
@@ -23,3 +25,47 @@ entry:
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldr
+; GREEDY: ldr
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
+  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
+  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
+  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
+  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
+  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
+  store i32 %4, i32* %scevgep4, align 4
+  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 0000000..bdd4081
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+  %tmp2 = load i8* %call
+  %tmp3 = trunc i8 %tmp2 to i1
+  %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+  store double %cond, double* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 0000000..8068abd
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: bl log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: bl exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a96..5b4cf9d 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.partition_entry = type { i32, i32, i64, i64 }
 
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f..f566974 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,6 +6,7 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
@@ -16,3 +17,31 @@ entry:
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda022..fe0056c 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
+        ; DARWIN: movs r1, #0
+        ; DARWIN: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
         call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 0000000..677b9c2
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -verify-machineinstrs
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  %3 = load %0** %2, align 4, !tbaa !0
+  store float 0.000000e+00, float* undef, align 4
+  %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+  call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
+  %5 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  store float 1.000000e+00, float* undef, align 4
+  call arm_aapcs_vfpcc  void @func1(%0* undef, float* undef, float* undef, %2* undef)
+  store float 1.500000e+01, float* undef, align 4
+  %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
+  %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+  %8 = fadd float undef, -1.000000e+05
+  store float %8, float* undef, align 16, !tbaa !3
+  %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
+  %10 = fmul float undef, 2.000000e+05
+  %11 = fadd float %10, -1.000000e+05
+  store float %11, float* undef, align 4, !tbaa !3
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 0000000..e28b578
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section        .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section        .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28..6bb6743 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
   ret i32 %conv3
 }
 
+; rdar://10750814
 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
 entry:
 ; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
   %conv = zext i16 %v to i32
   %shr4 = lshr i32 %conv, 8
   %shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133a..3a66ec5 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
   %s = or i32 %z, %y
  ret i32 %s
 }
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq.w r2, r2, #1
+  %tmp1 = icmp eq i32 %a, %b
+  %tmp2 = zext i1 %tmp1 to i32
+  %tmp3 = or i32 %tmp2, %c
+  ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %c, i32 0
+  %tmp2 = xor i32 %tmp1, %d
+  ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+  %tmp1 = shl i32 %c, 1
+  %cond = icmp eq i32 %a, %b
+  %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+  %tmp3 = and i32 %c, %tmp2
+  ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef0..521ffa1 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -54,12 +54,12 @@ declare i8* @malloc(...)
 define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A8: str [[REG]], [r0, r1, lsl #2]
+; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A8: str [[REG]], [r0]
 
 ; A9: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A9: str [[REG]], [r0, r1, lsl #2]
+; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A9: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55c..057ea11 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 0000000..e015bf0
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+  br label %indirectgoto
+
+INCREMENT:                                        ; preds = %indirectgoto
+  %inc = add nsw i32 %result.0, 1
+  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+  br label %indirectgoto
+
+DECREMENT:                                        ; preds = %indirectgoto
+  %dec = add nsw i32 %result.0, -1
+  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+  br label %indirectgoto
+
+indirectgoto:                                     ; preds = %DECREMENT, %INCREMENT, %entry
+  %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+  %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+  %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+  %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN:                                           ; preds = %indirectgoto
+  ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 0000000..93340c3
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index be95657..0c23879 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -381,3 +381,20 @@ entry:
   store <4 x float> %b, <4 x float> *%p
   ret void
 }
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b..fb05a20 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+	%tmp1 = load <4 x i16>* %B
+	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	%t5 = getelementptr inbounds i8* %out, i32 16
+	ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+  %tmp1 = load <4 x float>* %this
+  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  %tmp2 = getelementptr inbounds i8* %out, i32  32
+  ret i8* %tmp2
+}
+
 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/CBackend/X86/dg.exp b/test/CodeGen/CBackend/X86/dg.exp
deleted file mode 100644
index 44e3a5e..0000000
--- a/test/CodeGen/CBackend/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] && [llvm_supports_target CBackend] } {
-    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/CodeGen/CBackend/X86/lit.local.cfg b/test/CodeGen/CBackend/X86/lit.local.cfg
new file mode 100644
index 0000000..037d8c3
--- /dev/null
+++ b/test/CodeGen/CBackend/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CBackend' in targets or not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CBackend/dg.exp b/test/CodeGen/CBackend/dg.exp
deleted file mode 100644
index 9d78940..0000000
--- a/test/CodeGen/CBackend/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CBackend/lit.local.cfg b/test/CodeGen/CBackend/lit.local.cfg
new file mode 100644
index 0000000..0dce170
--- /dev/null
+++ b/test/CodeGen/CBackend/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CBackend' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
new file mode 100644
index 0000000..419f594
--- /dev/null
+++ b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=cpp
+declare void @foo(<4 x i32>)
+define void @bar() {
+  call void @foo(<4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+  ret void
+}
diff --git a/test/CodeGen/CPP/dg.exp b/test/CodeGen/CPP/dg.exp
deleted file mode 100644
index 3276dcc..0000000
--- a/test/CodeGen/CPP/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CppBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
new file mode 100644
index 0000000..96596d8
--- /dev/null
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CppBackend' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/dg.exp b/test/CodeGen/CellSPU/dg.exp
deleted file mode 100644
index d416479..0000000
--- a/test/CodeGen/CellSPU/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CellSPU] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/CellSPU/lit.local.cfg
new file mode 100644
index 0000000..6ae0972
--- /dev/null
+++ b/test/CodeGen/CellSPU/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CellSPU' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index b1219e6..9770935 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -163,7 +163,7 @@ define i8 @rotri8(i8 %A) {
 define <2 x float> @test1(<4 x float> %param )
 {
 ; CHECK: test1
-; CHECK: rotqbyi
+; CHECK: shufb
   %el = extractelement <4 x float> %param, i32 1
   %vec1 = insertelement <1 x float> undef, float %el, i32 0
   %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 943ed88..d67559e 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -30,8 +30,6 @@ UnifiedUnreachableBlock:		; preds = %entry
 
 declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn 
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
index 3cbf4c5..b483009 100644
--- a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
+++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -15,8 +15,6 @@
 %"struct.std::locale::facet" = type { i32 (...)**, i32 }
 %union..0._15 = type { i32 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
 declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
diff --git a/test/CodeGen/Generic/dg.exp b/test/CodeGen/Generic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/CodeGen/Generic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/CodeGen/Generic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index b882cf7..e9ac8b6 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = #7
 ; CHECK: memw(r29 + #0) = r[[T0]]
 ; CHECK: r0 = #1
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 36abd74..7219985 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/Hexagon/dg.exp b/test/CodeGen/Hexagon/dg.exp
deleted file mode 100644
index 89f45e6..0000000
--- a/test/CodeGen/Hexagon/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Hexagon] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
index 04c2ec1..c3b6f37 100644
--- a/test/CodeGen/Hexagon/double.ll
+++ b/test/CodeGen/Hexagon/double.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_adddf3
 ; CHECK: __hexagon_subdf3
 
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
index 51acf2e..bec9f58 100644
--- a/test/CodeGen/Hexagon/float.ll
+++ b/test/CodeGen/Hexagon/float.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_addsf3
 ; CHECK: __hexagon_subsf3
 
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
index c0a9fda..dc87c73 100644
--- a/test/CodeGen/Hexagon/frame.ll
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
new file mode 100644
index 0000000..ea12f68
--- /dev/null
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'Hexagon' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
index afd6fc6..d5c5ae3 100644
--- a/test/CodeGen/Hexagon/mpy.ll
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: += mpyi
 
 define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
index c251bd4..1105096 100644
--- a/test/CodeGen/Hexagon/static.ll
+++ b/test/CodeGen/Hexagon/static.ll
@@ -1,13 +1,12 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
 @val = external global i32
 
+; CHECK: CONST32(#num)
 ; CHECK: CONST32(#acc)
 ; CHECK: CONST32(#val)
-; CHECK: CONST32(#num)
 
 define void @foo() nounwind {
 entry:
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index 2c962d0..cc409db 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
 
 %struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
index 69de4f6..af099cd 100644
--- a/test/CodeGen/Hexagon/struct_args_large.ll
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
 ; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
 ; CHECK: memw(r29 + #0) = r[[T1]]
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
index 788e474..01d2041 100644
--- a/test/CodeGen/Hexagon/vaddh.ll
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/MBlaze/dg.exp b/test/CodeGen/MBlaze/dg.exp
deleted file mode 100644
index bfd5e47..0000000
--- a/test/CodeGen/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..e43df89
--- /dev/null
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/MSP430/dg.exp b/test/CodeGen/MSP430/dg.exp
deleted file mode 100644
index e4ea13a..0000000
--- a/test/CodeGen/MSP430/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MSP430] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
new file mode 100644
index 0000000..b9b654d
--- /dev/null
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'MSP430' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index e0c745f..8479ad2 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,20 +1,16 @@
-; DISABLED: llc < %s -march=mips -o %t
-; DISABLED: grep seh %t | count 1
-; DISABLED: grep seb %t | count 1
-; RUN: false
-; XFAIL: *
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
+; CHECK: seb
 	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
 	ret i8 %0
 }
 
 define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind {
 entry:
+; CHECK: seh
 	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
 	ret i16 %0
 }
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index f701bf1..dbde742 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
 %struct.DWstruct = type { i32, i32 }
 
@@ -13,3 +14,40 @@ entry:
   %res = add i32 %asmresult, %asmresult1
   ret i32 %res
 }
+
+@gi2 = external global i32
+@gi1 = external global i32
+@gi0 = external global i32
+@gf0 = external global float
+@gf1 = external global float
+@gd0 = external global double
+@gd1 = external global double
+
+define void @foo0() nounwind {
+entry:
+; CHECK: addu
+  %0 = load i32* @gi1, align 4
+  %1 = load i32* @gi0, align 4
+  %2 = tail call i32 asm "addu $0, $1, $2", "=r,r,r"(i32 %0, i32 %1) nounwind
+  store i32 %2, i32* @gi2, align 4
+  ret void
+}
+
+define void @foo2() nounwind {
+entry:
+; CHECK: neg.s
+  %0 = load float* @gf1, align 4
+  %1 = tail call float asm "neg.s $0, $1", "=f,f"(float %0) nounwind
+  store float %1, float* @gf0, align 4
+  ret void
+}
+
+define void @foo3() nounwind {
+entry:
+; CHECK: neg.d
+  %0 = load double* @gd1, align 8
+  %1 = tail call double asm "neg.d $0, $1", "=f,f"(double %0) nounwind
+  store double %1, double* @gd0, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 23b5349..785a416 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -15,7 +15,7 @@ entry:
 ; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
 ; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
 ; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
-; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -39,3 +39,23 @@ bb4:                                              ; preds = %entry
 bb5:                                              ; preds = %entry
   ret i32 1
 }
+
+; STATIC-O32: .align  2
+; STATIC-O32: $JTI0_0:
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; PIC-O32: .align  2
+; PIC-O32: $JTI0_0:
+; PIC-O32: .gpword
+; PIC-O32: .gpword
+; PIC-O32: .gpword 
+; PIC-O32: .gpword 
+; PIC-N64: .align  3
+; PIC-N64: $JTI0_0:
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword 
+; PIC-N64: .gpdword 
+
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
new file mode 100644
index 0000000..a8fc2cd
--- /dev/null
+++ b/test/CodeGen/Mips/bswap.ll
@@ -0,0 +1,25 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+
+define i32 @bswap32(i32 %x) nounwind readnone {
+entry:
+; MIPS32: bswap32:
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+  %or.3 = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %or.3
+}
+
+define i64 @bswap64(i64 %x) nounwind readnone {
+entry:
+; MIPS64: bswap64:
+; MIPS64: dsbh $[[R0:[0-9]+]]
+; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
+  %or.7 = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %or.7
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/Mips/dg.exp b/test/CodeGen/Mips/dg.exp
deleted file mode 100644
index adb2cac..0000000
--- a/test/CodeGen/Mips/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Mips] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index e3e336b..c3facdb 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -54,16 +54,10 @@ unreachable:                                      ; preds = %entry
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
-declare void @llvm.eh.resume(i8*, i32)
-
 declare void @__cxa_throw(i8*, i8*, i8*)
 
 declare i8* @__cxa_begin_catch(i8*)
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 950c437..e494fe2 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -5,9 +5,8 @@
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
 ; MIPS32-EL: func0:
-; MIPS32-EL: lui $[[T1:[0-9]+]], 32768
-; MIPS32-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
 ; MIPS32-EL: mfc1 $[[HI0:[0-9]+]], $f15
+; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
 ; MIPS32-EL: and $[[AND1:[0-9]+]], $[[HI0]], $[[MSK1]]
 ; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
 ; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
@@ -18,9 +17,8 @@ entry:
 ; MIPS32-EL: mtc1 $[[LO0]], $f0
 ; MIPS32-EL: mtc1 $[[OR]], $f1
 ;
-; MIPS32-EB: lui $[[T1:[0-9]+]], 32768
-; MIPS32-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
 ; MIPS32-EB: mfc1 $[[HI1:[0-9]+]], $f14
+; MIPS32-EB: lui $[[MSK1:[0-9]+]], 32768
 ; MIPS32-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
 ; MIPS32-EB: lui $[[T0:[0-9]+]], 32767
 ; MIPS32-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
@@ -46,9 +44,8 @@ declare double @copysign(double, double) nounwind readnone
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
 ; MIPS32-EL: func1:
-; MIPS32-EL: lui $[[T1:[0-9]+]], 32768
-; MIPS32-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
 ; MIPS32-EL: mfc1 $[[ARG1:[0-9]+]], $f14
+; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
 ; MIPS32-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
 ; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
 ; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
new file mode 100644
index 0000000..435b419
--- /dev/null
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2NAN
+
+define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %add1 = fadd float %add, 0.000000e+00
+  ret float %add1
+}
+
+define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %add = fadd float %sub, 0.000000e+00
+  ret float %add
+}
+
+define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmadd.s 
+; 64R2: nmadd.s 
+; 32R2NAN: madd.s 
+; 64R2NAN: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %sub = fsub float 0.000000e+00, %add
+  ret float %sub
+}
+
+define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmsub.s 
+; 64R2: nmsub.s 
+; 32R2NAN: msub.s 
+; 64R2NAN: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %sub1 = fsub float 0.000000e+00, %sub
+  ret float %sub1
+}
+
+define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: madd.d
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %add1 = fadd double %add, 0.000000e+00
+  ret double %add1
+}
+
+define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: msub.d
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %add = fadd double %sub, 0.000000e+00
+  ret double %add
+}
+
+define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmadd.d 
+; 64R2: nmadd.d 
+; 32R2NAN: madd.d 
+; 64R2NAN: madd.d 
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %sub = fsub double 0.000000e+00, %add
+  ret double %sub
+}
+
+define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmsub.d 
+; 64R2: nmsub.d 
+; 32R2NAN: msub.d 
+; 64R2NAN: msub.d 
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %sub1 = fsub double 0.000000e+00, %sub
+  ret double %sub1
+}
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
new file mode 100644
index 0000000..08bd6e7
--- /dev/null
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load float* %arrayidx1, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  store float %0, float* %arrayidx1, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load double* %arrayidx1, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  store double %0, double* %arrayidx1, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/global-pointer-reg.ll b/test/CodeGen/Mips/global-pointer-reg.ll
new file mode 100644
index 0000000..174d1f9
--- /dev/null
+++ b/test/CodeGen/Mips/global-pointer-reg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s 
+
+@g0 = external global i32
+@g1 = external global i32
+@g2 = external global i32
+
+define void @foo1() nounwind {
+entry:
+; CHECK-NOT:    .cpload
+; CHECK-NOT:    .cprestore
+; CHECK: lui    $[[R0:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu  $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp)
+; CHECK: addu   $[[GP:[0-9]+]], $[[R1]], $25
+; CHECK: lw     ${{[0-9]+}}, %call16(foo2)($[[GP]])
+
+  tail call void @foo2(i32* @g0) nounwind
+  tail call void @foo2(i32* @g1) nounwind
+  tail call void @foo2(i32* @g2) nounwind
+  ret void
+}
+
+declare void @foo2(i32*)
diff --git a/test/CodeGen/Mips/imm.ll b/test/CodeGen/Mips/imm.ll
new file mode 100644
index 0000000..eea391e
--- /dev/null
+++ b/test/CodeGen/Mips/imm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @foo0() nounwind readnone {
+entry:
+; CHECK: foo0
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+  ret i32 305419896
+}
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i32 @foo2() nounwind readnone {
+entry:
+; CHECK: foo2
+; CHECK: addiu ${{[0-9]+}}, $zero, 4660
+  ret i32 4660
+}
+
+define i32 @foo17() nounwind readnone {
+entry:
+; CHECK: foo17
+; CHECK: addiu ${{[0-9]+}}, $zero, -32204
+  ret i32 -32204
+}
+
+define i32 @foo18() nounwind readnone {
+entry:
+; CHECK: foo18
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i32 33332
+}
diff --git a/test/CodeGen/Mips/inlineasm64.ll b/test/CodeGen/Mips/inlineasm64.ll
new file mode 100644
index 0000000..dbce3c3
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm64.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+@gl2 = external global i64
+@gl1 = external global i64
+@gl0 = external global i64
+
+define void @foo1() nounwind {
+entry:
+; CHECK: foo1
+; CHECK: daddu
+  %0 = load i64* @gl1, align 8
+  %1 = load i64* @gl0, align 8
+  %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
+  store i64 %2, i64* @gl2, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 2333f07..b7c9a9c 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -7,8 +7,8 @@
 define void @f() nounwind {
 entry:
 ; CHECK:  lui $at, 65534
-; CHECK:  addu  $at, $sp, $at
-; CHECK:  addiu $sp, $at, -24
+; CHECK:  addiu $at, $at, -24
+; CHECK:  addu  $sp, $sp, $at
 ; CHECK:  .cprestore  65536
 
   %agg.tmp = alloca %struct.S1, align 1
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
new file mode 100644
index 0000000..e1cd73a
--- /dev/null
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
new file mode 100644
index 0000000..09745fb
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -0,0 +1,110 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load float* %arrayidx2, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store float %0, float* %arrayidx2, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load double* %arrayidx2, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store double %0, double* %arrayidx2, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
new file mode 100644
index 0000000..b2b67e5
--- /dev/null
+++ b/test/CodeGen/Mips/mips64countleading.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @t1(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclz
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  ret i64 %tmp1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i64 @t3(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclo 
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
+  ret i64 %tmp1
+}
+
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
new file mode 100644
index 0000000..fa81b72
--- /dev/null
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -0,0 +1,11 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+@gl = global i64 1250999896321, align 8
+
+; CHECK: 8byte
+define i64 @foo1() nounwind readonly {
+entry:
+  %0 = load i64* @gl, align 8
+  ret i64 %0
+}
+
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
index 33af0d8..02a35f8 100644
--- a/test/CodeGen/Mips/mips64ext.ll
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -3,9 +3,24 @@
 define i64 @zext64_32(i32 %a) nounwind readnone {
 entry:
 ; CHECK: addiu $[[R0:[0-9]+]], ${{[0-9]+}}, 2
-; CHECK: dsll32 $[[R1:[0-9]+]], $[[R0]], 0
-; CHECK: dsrl32  ${{[0-9]+}}, $[[R1]], 0
+; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; CHECK: dsrl ${{[0-9]+}}, $[[R1]], 32
   %add = add i32 %a, 2
   %conv = zext i32 %add to i64
   ret i64 %conv
 }
+
+define i64 @sext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+  %conv = sext i32 %a to i64
+  ret i64 %conv
+}
+
+define i64 @i64_float(float %f) nounwind readnone {
+entry:
+; CHECK: trunc.l.s 
+  %conv = fptosi float %f to i64
+  ret i64 %conv
+}
+
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
new file mode 100644
index 0000000..17716da
--- /dev/null
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -0,0 +1,7 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+define double @foo1() nounwind readnone {
+entry:
+; CHECK: dmtc1 $zero
+  ret double 0.000000e+00
+}
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
index dca656c..1fc8636 100644
--- a/test/CodeGen/Mips/mips64imm.ll
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -1,10 +1,18 @@
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
 define i64 @foo3() nounwind readnone {
 entry:
 ; CHECK: foo3
 ; CHECK: lui $[[R0:[0-9]+]], 4660
-; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+; CHECK: daddiu ${{[0-9]+}}, $[[R0]], 22136
   ret i64 305419896
 }
 
@@ -25,11 +33,20 @@ entry:
 define i64 @foo9() nounwind readnone {
 entry:
 ; CHECK: foo9
-; CHECK: lui $[[R0:[0-9]+]], 4660
-; CHECK: ori $[[R1:[0-9]+]], $[[R0]], 22136
-; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 16
-; CHECK: ori $[[R3:[0-9]+]], $[[R2]], 36882
-; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 16
-; CHECK: ori ${{[0-9]+}}, $[[R4]], 13398
+; CHECK: lui $[[R0:[0-9]+]], 583
+; CHECK: daddiu $[[R1:[0-9]+]], $[[R0]], -30001
+; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 18
+; CHECK: daddiu $[[R3:[0-9]+]], $[[R2]], 18441
+; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 17
+; CHECK: daddiu ${{[0-9]+}}, $[[R4]], 13398
   ret i64 1311768467284833366
 }
+
+define i64 @foo10() nounwind readnone {
+entry:
+; CHECK: foo10
+; CHECK: lui $[[R0:[0-9]+]], 34661
+; CHECK: daddiu  ${{[0-9]+}}, $[[R0]], 17185
+  ret i64 -8690466096928522240
+}
+
diff --git a/test/CodeGen/Mips/mips64lea.ll b/test/CodeGen/Mips/mips64lea.ll
new file mode 100644
index 0000000..54d504f
--- /dev/null
+++ b/test/CodeGen/Mips/mips64lea.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define void @foo3() nounwind {
+entry:
+; CHECK: daddiu ${{[0-9]+}}, $sp
+  %a = alloca i32, align 4
+  call void @foo1(i32* %a) nounwind
+  ret void
+}
+
+declare void @foo1(i32*)
+
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
index a89d074..fd036a2 100644
--- a/test/CodeGen/Mips/mips64muldiv.ll
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -8,6 +8,14 @@ entry:
   ret i64 %mul
 }
 
+define i64 @m1(i64 %a) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mfhi
+  %div = sdiv i64 %a, 3
+  ret i64 %div
+}
+
 define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
 ; CHECK: ddivu
diff --git a/test/CodeGen/Mips/mips64shift.ll b/test/CodeGen/Mips/mips64shift.ll
index cc5e508..45d1c95 100644
--- a/test/CodeGen/Mips/mips64shift.ll
+++ b/test/CodeGen/Mips/mips64shift.ll
@@ -44,21 +44,21 @@ entry:
 
 define i64 @f6(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsll ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shl = shl i64 %a0, 40
   ret i64 %shl
 }
 
 define i64 @f7(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsra32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsra ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = ashr i64 %a0, 40
   ret i64 %shr
 }
 
 define i64 @f8(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsrl32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = lshr i64 %a0, 40
   ret i64 %shr
 }
@@ -94,7 +94,7 @@ entry:
 
 define i64 @f12(i64 %a0) nounwind readnone {
 entry:
-; CHECK: drotr32 ${{[0-9]+}}, ${{[0-9]+}}, 22
+; CHECK: drotr ${{[0-9]+}}, ${{[0-9]+}}, 54
   %shl = shl i64 %a0, 10
   %shr = lshr i64 %a0, 54
   %or = or i64 %shl, %shr
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
new file mode 100644
index 0000000..da1e036
--- /dev/null
+++ b/test/CodeGen/Mips/swzero.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+%struct.unaligned = type <{ i32 }>
+
+define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
+entry:
+; CHECK: usw $zero
+  %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
+  store i32 0, i32* %x, align 1
+  ret void
+}
+
+define void @zero_a(i32* nocapture %p) nounwind {
+entry:
+; CHECK: sw $zero
+  store i32 0, i32* %p, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index 3fa852b..a3c4768 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s \
 ; RUN:                             | FileCheck %s -check-prefix=STATIC
-
+; RUN: llc -march=mipsel -relocation-model=static < %s \
+; RUN:   -mips-fix-global-base-reg=false | FileCheck %s -check-prefix=STATICGP
 
 @t1 = thread_local global i32 0, align 4
 
@@ -39,6 +40,11 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
+; STATICGP: lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
+; STATICGP: addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
+; STATICGP: lw      ${{[0-9]+}}, %gottprel(t2)($[[GP]])
+; STATIC:   lui     $gp, %hi(__gnu_local_gp)
+; STATIC:   addiu   $gp, $gp, %lo(__gnu_local_gp)
 ; STATIC:   rdhwr   $3, $29
 ; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
 ; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
@@ -55,7 +61,7 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lui     $[[R0:[0-9]+]], %dtprel_hi(f3.i)
 ; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
-; PIC:   addiu   ${{[0-9]+}}, $[[R1]], %dtprel_lo(f3.i)
+; PIC:   lw      ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]])
 
   %0 = load i32* @f3.i, align 4
   %inc = add nsw i32 %0, 1
diff --git a/test/CodeGen/PTX/dg.exp b/test/CodeGen/PTX/dg.exp
deleted file mode 100644
index 2c304b5..0000000
--- a/test/CodeGen/PTX/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PTX] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/PTX/lit.local.cfg
new file mode 100644
index 0000000..7399089
--- /dev/null
+++ b/test/CodeGen/PTX/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'PTX' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 3c01938..974a99a 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -6,16 +6,22 @@ target triple = "powerpc-apple-darwin9.6"
 
 define void @foo() nounwind {
 entry:
+;CHECK:  mfcr r2
 ;CHECK:  lis r3, 1
+;CHECK:  rlwinm r2, r2, 8, 0, 31
 ;CHECK:  ori r3, r3, 34524
+;CHECK:  stwx r2, r1, r3
+; Make sure that the register scavenger returns the same temporary register.
 ;CHECK:  mfcr r2
-;CHECK:  rlwinm r2, r2, 8, 0, 31
+;CHECK:  lis r3, 1
+;CHECK:  rlwinm r2, r2, 12, 0, 31
+;CHECK:  ori r3, r3, 34520
 ;CHECK:  stwx r2, r1, r3
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
   call void @bar(i8* %x1) nounwind
-  call void asm sideeffect "", "~{cr2}"() nounwind
+  call void asm sideeffect "", "~{cr2},~{cr3}"() nounwind
   br label %return
 
 return:                                           ; preds = %entry
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index a2af87e..d07fea7 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -15,9 +15,9 @@ define i32* @f1() nounwind {
 
 ; PPC32-NOFP: _f1:
 ; PPC32-NOFP: 	lis r0, -1
-; PPC32-NOFP: 	addi r3, r1, 68
 ; PPC32-NOFP: 	ori r0, r0, 32704
 ; PPC32-NOFP: 	stwux r1, r1, r0
+; PPC32-NOFP: 	addi r3, r1, 68
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
@@ -25,10 +25,10 @@ define i32* @f1() nounwind {
 ; PPC32-FP: _f1:
 ; PPC32-FP:	lis r0, -1
 ; PPC32-FP:	stw r31, -4(r1)
-; PPC32-FP:	mr r31, r1
 ; PPC32-FP:	ori r0, r0, 32704
-; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	stwux r1, r1, r0
+; PPC32-FP:	mr r31, r1
+; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -36,9 +36,9 @@ define i32* @f1() nounwind {
 
 ; PPC64-NOFP: _f1:
 ; PPC64-NOFP: 	lis r0, -1
-; PPC64-NOFP: 	addi r3, r1, 116
 ; PPC64-NOFP: 	ori r0, r0, 32656
 ; PPC64-NOFP: 	stdux r1, r1, r0
+; PPC64-NOFP: 	addi r3, r1, 116
 ; PPC64-NOFP: 	ld r1, 0(r1)
 ; PPC64-NOFP: 	blr 
 
@@ -46,10 +46,10 @@ define i32* @f1() nounwind {
 ; PPC64-FP: _f1:
 ; PPC64-FP:	lis r0, -1
 ; PPC64-FP:	std r31, -8(r1)
-; PPC64-FP:	mr r31, r1
 ; PPC64-FP:	ori r0, r0, 32640
-; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	stdux r1, r1, r0
+; PPC64-FP:	mr r31, r1
+; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
new file mode 100644
index 0000000..e161cb0
--- /dev/null
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=g5 | FileCheck %s
+; CHECK: main:
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !15), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !16), !dbg !18
+  %add = add nsw i32 %argc, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{metadata !15, metadata !16}
+!15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 721153, metadata !5, metadata !"argv", metadata !6, i32 33554433, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 1, i32 14, metadata !5, null}
+!18 = metadata !{i32 1, i32 26, metadata !5, null}
+!19 = metadata !{i32 2, i32 3, metadata !20, null}
+!20 = metadata !{i32 720907, metadata !5, i32 1, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+
diff --git a/test/CodeGen/PowerPC/dg.exp b/test/CodeGen/PowerPC/dg.exp
deleted file mode 100644
index 9e50b55..0000000
--- a/test/CodeGen/PowerPC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PowerPC] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..5c7f267
--- /dev/null
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll
deleted file mode 100644
index 725c106..0000000
--- a/test/CodeGen/PowerPC/ppc32-vaarg.ll
+++ /dev/null
@@ -1,160 +0,0 @@
-; RUN: llc -O0 < %s | FileCheck %s
-;ModuleID = 'test.c'
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
-target triple = "powerpc-unknown-freebsd9.0"
-
-%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* }
-
-@var1 = common global i64 0, align 8
-@var2 = common global double 0.0, align 8
-@var3 = common global i32 0, align 4
-
-define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
- entry:
-  %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
-; CHECK: addi 5, 4, 1
-; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
-; CHECK-NEXT: cmplwi 0, 6, 0
-; CHECK-NEXT: stw 3, -4(1)
-; CHECK-NEXT: stw 5, -8(1)
-; CHECK-NEXT: stw 4, -12(1)
-; CHECK-NEXT: bne 0, .LBB0_2
-; CHECK-NEXT: # BB#1:                                 # %entry
-; CHECK-NEXT: lwz 3, -12(1)
-; CHECK-NEXT: stw 3, -8(1)
-; CHECK-NEXT: .LBB0_2:                                # %entry
-; CHECK-NEXT: lwz 3, -8(1)
-; CHECK-NEXT: addi 4, 3, 2
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: lwz 6, 4(5)
-; CHECK-NEXT: lwz 7, 8(5)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 4, 6, 4
-; CHECK-NEXT: mr 8, 6
-; CHECK-NEXT: stw 7, -16(1)
-; CHECK-NEXT: stw 4, -20(1)
-; CHECK-NEXT: stw 3, -24(1)
-; CHECK-NEXT: stw 8, -28(1)
-; CHECK-NEXT: stw 6, -32(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -36(1)
-; CHECK-NEXT: blt 0, .LBB0_4
-; CHECK-NEXT: # BB#3:                                 # %entry
-; CHECK-NEXT: lwz 3, -20(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: .LBB0_4:                                # %entry
-; CHECK-NEXT: lwz 3, -28(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: stw 3, 4(4)
-  store i64 %x, i64* @var1, align 8
-; CHECK-NEXT: lwz 3, -24(1)
-; CHECK-NEXT: slwi 5, 3, 2
-; CHECK-NEXT: lwz 6, -16(1)
-; CHECK-NEXT: add 5, 6, 5
-; CHECK-NEXT: lwz 0, -36(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 5, -40(1)
-; CHECK-NEXT: blt 0, .LBB0_6
-; CHECK-NEXT: # BB#5:                                 # %entry
-; CHECK-NEXT: lwz 3, -32(1)
-; CHECK-NEXT: stw 3, -40(1)
-; CHECK-NEXT: .LBB0_6:                                # %entry
-; CHECK-NEXT: lwz 3, -40(1)
-; CHECK-NEXT: lwz 4, 0(3)
-; CHECK-NEXT: lwz 3, 4(3)
-; CHECK-NEXT: lis 5, var1@ha
-; CHECK-NEXT: la 6, var1@l(5)
-; CHECK-NEXT: stw 3, 4(6)
-; CHECK-NEXT: stw 4, var1@l(5)
-; CHECK-NEXT: lwz 3, -4(1)
-  %y = va_arg %struct.__va_list_tag* %ap, double; From f1
-; CHECK-NEXT: lbz 4, 1(3)
-; CHECK-NEXT: lwz 5, 4(3)
-; CHECK-NEXT: lwz 6, 8(3)
-; CHECK-NEXT: addi 7, 4, 1
-; CHECK-NEXT: stb 7, 1(3)
-; CHECK-NEXT: cmpwi 0, 4, 8
-; CHECK-NEXT: addi 7, 5, 8
-; CHECK-NEXT: mr 8, 5
-; CHECK-NEXT: stw 5, -44(1)
-; CHECK-NEXT: stw 7, -48(1)
-; CHECK-NEXT: stw 4, -52(1)
-; CHECK-NEXT: stw 6, -56(1)
-; CHECK-NEXT: stw 8, -60(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -64(1)
-; CHECK-NEXT: blt 0, .LBB0_8
-; CHECK-NEXT: # BB#7:                                 # %entry
-; CHECK-NEXT: lwz 3, -48(1)
-; CHECK-NEXT: stw 3, -60(1)
-; CHECK-NEXT: .LBB0_8:                                # %entry
-; CHECK-NEXT: lwz 3, -60(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: stw 3, 4(4)
-; CHECK-NEXT: lwz 3, -52(1)
-; CHECK-NEXT: slwi 5, 3, 3
-; CHECK-NEXT: lwz 6, -56(1)
-; CHECK-NEXT: add 5, 6, 5
-; CHECK-NEXT: addi 5, 5, 32
-; CHECK-NEXT: lwz 0, -64(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 5, -68(1)
-; CHECK-NEXT: blt 0, .LBB0_10
-; CHECK-NEXT: # BB#9:                                 # %entry
-; CHECK-NEXT: lwz 3, -44(1)
-; CHECK-NEXT: stw 3, -68(1)
-; CHECK-NEXT: .LBB0_10:                               # %entry
-; CHECK-NEXT: lwz 3, -68(1)
-; CHECK-NEXT: lfd 0, 0(3)
-  store double %y, double* @var2, align 8
-; CHECK-NEXT: lis 3, var2@ha
-; CHECK-NEXT: stfd 0, var2@l(3)
-  %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
-; CHECK-NEXT: lwz 3, -4(1)
-; CHECK-NEXT: lbz 4, 0(3)
-; CHECK-NEXT: lwz 5, 4(3)
-; CHECK-NEXT: lwz 6, 8(3)
-; CHECK-NEXT: addi 7, 4, 1
-; CHECK-NEXT: stb 7, 0(3)
-; CHECK-NEXT: cmpwi 0, 4, 8
-; CHECK-NEXT: addi 7, 5, 4
-; CHECK-NEXT: mr 8, 5
-; CHECK-NEXT: stw 4, -72(1)
-; CHECK-NEXT: stw 6, -76(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -80(1)
-; CHECK-NEXT: stw 5, -84(1)
-; CHECK-NEXT: stw 8, -88(1)
-; CHECK-NEXT: stw 7, -92(1)
-; CHECK-NEXT: blt 0, .LBB0_12
-; CHECK-NEXT: # BB#11:                                # %entry
-; CHECK-NEXT: lwz 3, -92(1)
-; CHECK-NEXT: stw 3, -88(1)
-; CHECK-NEXT: .LBB0_12:                               # %entry
-; CHECK-NEXT: lwz 3, -88(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: stw 3, 4(4)
-; CHECK-NEXT: lwz 3, -72(1)
-; CHECK-NEXT: slwi 5, 3, 2
-; CHECK-NEXT: lwz 6, -76(1)
-; CHECK-NEXT: add 5, 6, 5
-; CHECK-NEXT: lwz 0, -80(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 5, -96(1)
-; CHECK-NEXT: blt 0, .LBB0_14
-; CHECK-NEXT: # BB#13:                                # %entry
-; CHECK-NEXT: lwz 3, -84(1)
-; CHECK-NEXT: stw 3, -96(1)
-; CHECK-NEXT: .LBB0_14:                               # %entry
-; CHECK-NEXT: lwz 3, -96(1)
-; CHECK-NEXT: lwz 3, 0(3)
-  store i32 %z, i32* @var3, align 4
-; CHECK-NEXT: lis 4, var3@ha
-; CHECK-NEXT: stw 3, var3@l(4)
-; CHECK-NEXT: lwz 3, -4(1)
-  ret void
-; CHECK-NEXT: stw 3, -100(1)
-; CHECK-NEXT: blr 
-}
-
diff --git a/test/CodeGen/PowerPC/ppc64-ind-call.ll b/test/CodeGen/PowerPC/ppc64-ind-call.ll
new file mode 100644
index 0000000..d5c4d46
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-ind-call.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+  %call.i75 = call zeroext i8 undef(i8* undef, i8 zeroext 10)
+  unreachable
+}
+
+; CHECK: @test1
+; CHECK: ld 11, 0(3)
+; CHECK: ld 2, 8(3)
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+
diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
new file mode 100644
index 0000000..e5aa1f1
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+
+; CHECK:      .section	.opd,"aw",@progbits
+; CHECK-NEXT: test1:
+; CHECK-NEXT:	.align 3
+; CHECK-NEXT:	.quad .L.test1
+; CHECK-NEXT:	.quad .TOC.@tocbase
+; CHECK-NEXT:	.text
+; CHECK-NEXT: .L.test1:
+
+define i32 @test1(i32 %a) nounwind {
+entry:
+  ret i32 %a
+}
+
+; Until recently, binutils accepted the .size directive as:
+;  .size	test1, .Ltmp0-test1
+; however, using this directive with recent binutils will result in the error:
+;  .size expression for XXX does not evaluate to a constant
+; so we must use the label which actually tags the start of the function.
+; CHECK: .size	test1, .Ltmp0-.L.test1
diff --git a/test/CodeGen/SPARC/dg.exp b/test/CodeGen/SPARC/dg.exp
deleted file mode 100644
index 6c0a997..0000000
--- a/test/CodeGen/SPARC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Sparc] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
new file mode 100644
index 0000000..ba81a16
--- /dev/null
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'Sparc' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb/dg.exp b/test/CodeGen/Thumb/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index fbacaba..f8c438c 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
 
 define void @test1() {
 ; CHECK: test1:
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 4e1394f..4616dcf 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp2,+thumb2 | FileCheck %s
 ; rdar://7076238
 
 @"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index d2140a1..5cb266b 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -1,5 +1,5 @@
 ; rdar://8465407
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 %struct.buf = type opaque
 
diff --git a/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
new file mode 100644
index 0000000..dadbdc5
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 
+
+%struct.LIST_NODE.0.16 = type { %struct.LIST_NODE.0.16*, i8* }
+
+define %struct.LIST_NODE.0.16* @list_AssocListPair(%struct.LIST_NODE.0.16* %List, i8* %Key) nounwind readonly {
+entry:
+  br label %bb3
+
+bb:                                               ; preds = %bb3
+  %Scan.0.idx7.val = load i8** undef, align 4
+  %.idx = getelementptr i8* %Scan.0.idx7.val, i32 4
+  %0 = bitcast i8* %.idx to i8**
+  %.idx.val = load i8** %0, align 4
+  %1 = icmp eq i8* %.idx.val, %Key
+  br i1 %1, label %bb5, label %bb2
+
+bb2:                                              ; preds = %bb
+  %Scan.0.idx8.val = load %struct.LIST_NODE.0.16** undef, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %entry
+  %Scan.0 = phi %struct.LIST_NODE.0.16* [ %List, %entry ], [ %Scan.0.idx8.val, %bb2 ]
+  %2 = icmp eq %struct.LIST_NODE.0.16* %Scan.0, null
+  br i1 %2, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %bb
+  ret %struct.LIST_NODE.0.16* null
+}
diff --git a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
new file mode 100644
index 0000000..4acdd9e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; rdar://10676853
+
+%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* }
+%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* }
+%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
+%union.anon = type { %struct.E_list_struct* }
+%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
+
+@lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
+; CHECK: rdictionary_lookup:
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then10, %entry
+  %dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ]
+  %cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null
+  br i1 %cmp, label %if.end11, label %if.end
+
+if.end:                                           ; preds = %tailrecurse
+  %string = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
+  %0 = load i8** %string, align 4
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.body.i, %if.end
+  %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
+  %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
+  %2 = load i8* %1, align 1
+  %cmp.i = icmp eq i8 %2, 0
+  %.pre.i = load i8* %storemerge.i, align 1
+  br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
+
+land.end.i:                                       ; preds = %while.cond.i
+  %cmp4.i = icmp eq i8 %2, %.pre.i
+  br i1 %cmp4.i, label %while.body.i, label %while.end.i
+
+while.body.i:                                     ; preds = %land.end.i
+  %incdec.ptr.i = getelementptr inbounds i8* %1, i32 1
+  %incdec.ptr6.i = getelementptr inbounds i8* %storemerge.i, i32 1
+  br label %while.cond.i
+
+while.end.i:                                      ; preds = %land.end.i
+  %cmp8.i = icmp eq i8 %2, 42
+  br i1 %cmp8.i, label %if.end3, label %lor.lhs.false.i
+
+lor.lhs.false.i:                                  ; preds = %while.end.i, %while.cond.i
+  %3 = phi i8 [ %2, %while.end.i ], [ 0, %while.cond.i ]
+  %cmp11.i = icmp eq i8 %.pre.i, 42
+  br i1 %cmp11.i, label %if.end3, label %dict_match.exit
+
+dict_match.exit:                                  ; preds = %lor.lhs.false.i
+  %cmp14.i = icmp eq i8 %3, 46
+  %conv16.i = sext i8 %3 to i32
+  %.conv16.i = select i1 %cmp14.i, i32 0, i32 %conv16.i
+  %cmp18.i = icmp eq i8 %.pre.i, 46
+  %conv22.i = sext i8 %.pre.i to i32
+  %cond24.i = select i1 %cmp18.i, i32 0, i32 %conv22.i
+  %sub.i = sub nsw i32 %.conv16.i, %cond24.i
+  %cmp1 = icmp sgt i32 %sub.i, -1
+  br i1 %cmp1, label %if.end3, label %if.then10
+
+if.end3:                                          ; preds = %dict_match.exit, %lor.lhs.false.i, %while.end.i
+; CHECK: %if.end3
+; CHECK: cmp
+; CHECK-NOT: cbnz
+  %storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
+  %right = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
+  %4 = load %struct.Dict_node_struct** %right, align 4
+  tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
+  %cmp4 = icmp eq i32 %storemerge1.i3, 0
+  br i1 %cmp4, label %if.then5, label %if.end8
+
+if.then5:                                         ; preds = %if.end3
+  %call6 = tail call fastcc i8* @xalloc(i32 20)
+  %5 = bitcast i8* %call6 to %struct.Dict_node_struct*
+  %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
+  %7 = load %struct.Dict_node_struct** @lookup_list, align 4
+  %right7 = getelementptr inbounds i8* %call6, i32 16
+  %8 = bitcast i8* %right7 to %struct.Dict_node_struct**
+  store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
+  store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
+  br label %if.then10
+
+if.end8:                                          ; preds = %if.end3
+  %cmp9 = icmp slt i32 %storemerge1.i3, 1
+  br i1 %cmp9, label %if.then10, label %if.end11
+
+if.then10:                                        ; preds = %if.end8, %if.then5, %dict_match.exit
+  %left = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
+  %9 = load %struct.Dict_node_struct** %left, align 4
+  br label %tailrecurse
+
+if.end11:                                         ; preds = %if.end8, %tailrecurse
+  ret void
+}
+
+; Materializable
+declare hidden fastcc i8* @xalloc(i32) nounwind ssp
diff --git a/test/CodeGen/Thumb2/aligned-constants.ll b/test/CodeGen/Thumb2/aligned-constants.ll
new file mode 100644
index 0000000..16b3a19
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-constants.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; The double in the constant pool is 8-byte aligned, forcing the function
+; alignment.
+; CHECK: .align 3
+; CHECK: func
+;
+; Constant pool with 8-byte entry before 4-byte entry:
+; CHECK: .align 3
+; CHECK: LCPI
+; CHECK:	.long	2370821947
+; CHECK:	.long	1080815255
+; CHECK: LCPI
+; CHECK:	.long	1123477881
+define void @func(float* nocapture %x, double* nocapture %y) nounwind ssp {
+entry:
+  %0 = load float* %x, align 4
+  %add = fadd float %0, 0x405EDD2F20000000
+  store float %add, float* %x, align 4
+  %1 = load double* %y, align 4
+  %add1 = fadd double %1, 2.234560e+02
+  store double %add1, double* %y, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
new file mode 100644
index 0000000..c98ca80
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; CHECK: f
+; This function is forced to spill a double.
+; Verify that the spill slot is properly aligned.
+;
+; The caller-saved r4 is used as a scratch register for stack realignment.
+; CHECK: push {r4, r7, lr}
+; CHECK: bic r4, r4, #7
+; CHECK: mov sp, r4
+define void @f(double* nocapture %p) nounwind ssp {
+entry:
+  %0 = load double* %p, align 4
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  tail call void @g() nounwind
+  store double %0, double* %p, align 4
+  ret void
+}
+
+; NEON: f
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #64
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; Stack pointer adjustment for the stack frame contents.
+; This could legally happen before the spills.
+; Since the spill slot is only 8 bytes, technically it would be fine to only
+; subtract #8 here. That would leave sp less aligned than some stack slots,
+; and would probably blow MFI's mind.
+; NEON: sub sp, #16
+; The epilog is free to use another scratch register than r4.
+; NEON: add r[[R4:[0-9]+]], sp, #16
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+declare void @g()
+
+; Spill 7 d-registers.
+define void @f7(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind
+  ret void
+}
+
+; NEON: f7
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #56
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vstr d14, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9, d10, d11},
+; NEON: vld1.64 {d12, d13},
+; NEON: vldr d14,
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+; Spill 7 d-registers, leave a hole.
+define void @f3plus4(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  ret void
+}
+
+; Aligned spilling only works for contiguous ranges starting from d8.
+; The rest goes to the standard vpush instructions.
+; NEON: f3plus4
+; NEON: push {r4, r7, lr}
+; NEON: vpush {d12, d13, d14, d15}
+; NEON: sub.w r4, sp, #24
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vstr d10, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9},
+; NEON: vldr d10, [{{.*}}, #16]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: vpop {d12, d13, d14, d15}
+; NEON: pop
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
new file mode 100644
index 0000000..19d2385
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -0,0 +1,1400 @@
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This function comes from the Bullet test.  It is quite big, and exercises the
+; constant island pass a bit.  It has caused failures, including
+; <rdar://problem/10670199>
+;
+; It is unlikely that this code will continue to create the exact conditions
+; that broke the arm constant island pass in the past, but it is still useful to
+; force the pass to split basic blocks etc.
+;
+; The run lines above force the integrated assembler to be enabled so it can
+; catch any illegal displacements.  Other than that, we depend on the constant
+; island pass assertions.
+
+%class.btVector3 = type { [4 x float] }
+%class.btTransform = type { %class.btMatrix3x3, %class.btVector3 }
+%class.btMatrix3x3 = type { [3 x %class.btVector3] }
+%class.btCapsuleShape = type { %class.btConvexInternalShape, i32 }
+%class.btConvexInternalShape = type { %class.btConvexShape, %class.btVector3, %class.btVector3, float, float }
+%class.btConvexShape = type { %class.btCollisionShape }
+%class.btCollisionShape = type { i32 (...)**, i32, i8* }
+%class.RagDoll = type { i32 (...)**, %class.btDynamicsWorld*, [11 x %class.btCollisionShape*], [11 x %class.btRigidBody*], [10 x %class.btTypedConstraint*] }
+%class.btDynamicsWorld = type { %class.btCollisionWorld, void (%class.btDynamicsWorld*, float)*, void (%class.btDynamicsWorld*, float)*, i8*, %struct.btContactSolverInfo }
+%class.btCollisionWorld = type { i32 (...)**, %class.btAlignedObjectArray, %class.btDispatcher*, %struct.btDispatcherInfo, %class.btStackAlloc*, %class.btBroadphaseInterface*, %class.btIDebugDraw*, i8 }
+%class.btAlignedObjectArray = type { %class.btAlignedAllocator, i32, i32, %class.btCollisionObject**, i8 }
+%class.btAlignedAllocator = type { i8 }
+%class.btCollisionObject = type { i32 (...)**, %class.btTransform, %class.btTransform, %class.btVector3, %class.btVector3, %class.btVector3, i8, float, %struct.btBroadphaseProxy*, %class.btCollisionShape*, %class.btCollisionShape*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
+%struct.btBroadphaseProxy = type { i8*, i16, i16, i8*, i32, %class.btVector3, %class.btVector3 }
+%class.btDispatcher = type { i32 (...)** }
+%struct.btDispatcherInfo = type { float, i32, i32, float, i8, %class.btIDebugDraw*, i8, i8, i8, float, i8, float, %class.btStackAlloc* }
+%class.btIDebugDraw = type { i32 (...)** }
+%class.btStackAlloc = type opaque
+%class.btBroadphaseInterface = type { i32 (...)** }
+%struct.btContactSolverInfo = type { %struct.btContactSolverInfoData }
+%struct.btContactSolverInfoData = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 }
+%class.btRigidBody = type { %class.btCollisionObject, %class.btMatrix3x3, %class.btVector3, %class.btVector3, float, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float, float, i8, float, float, float, float, float, float, %class.btMotionState*, %class.btAlignedObjectArray.22, i32, i32, i32 }
+%class.btMotionState = type { i32 (...)** }
+%class.btAlignedObjectArray.22 = type { %class.btAlignedAllocator.23, i32, i32, %class.btTypedConstraint**, i8 }
+%class.btAlignedAllocator.23 = type { i8 }
+%class.btTypedConstraint = type { i32 (...)**, %struct.btTypedObject, i32, i32, i8, %class.btRigidBody*, %class.btRigidBody*, float, float, %class.btVector3, %class.btVector3, %class.btVector3 }
+%struct.btTypedObject = type { i32 }
+%class.btHingeConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, float, float, float, float, float, i8, i8, i8, i8, i8, float }
+%class.btJacobianEntry = type { %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float }
+%class.btConeTwistConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, %class.btVector3, %class.btVector3, float, float, float, float, float, float, float, float, i8, i8, i8, i8, float, float, %class.btVector3, i8, i8, %class.btQuaternion, float, %class.btVector3 }
+%class.btQuaternion = type { %class.btQuadWord }
+%class.btQuadWord = type { [4 x float] }
+
+@_ZTV7RagDoll = external unnamed_addr constant [4 x i8*]
+
+declare noalias i8* @_Znwm(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3*, float*, float*, float*) unnamed_addr inlinehint ssp align 2
+
+declare void @_ZSt9terminatev()
+
+declare %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform*) unnamed_addr ssp align 2
+
+declare void @_ZN11btTransform11setIdentityEv(%class.btTransform*) ssp align 2
+
+declare void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform*, %class.btVector3*) nounwind inlinehint ssp align 2
+
+declare i8* @_ZN13btConvexShapenwEm(i32) inlinehint ssp align 2
+
+declare void @_ZN13btConvexShapedlEPv(i8*) inlinehint ssp align 2
+
+declare %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape*, float, float)
+
+declare %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform*) nounwind inlinehint ssp align 2
+
+define %class.RagDoll* @_ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f(%class.RagDoll* %this, %class.btDynamicsWorld* %ownerWorld, %class.btVector3* %positionOffset, float %scale) unnamed_addr ssp align 2 {
+entry:
+  %retval = alloca %class.RagDoll*, align 4
+  %this.addr = alloca %class.RagDoll*, align 4
+  %ownerWorld.addr = alloca %class.btDynamicsWorld*, align 4
+  %positionOffset.addr = alloca %class.btVector3*, align 4
+  %scale.addr = alloca float, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %offset = alloca %class.btTransform, align 4
+  %transform = alloca %class.btTransform, align 4
+  %ref.tmp = alloca %class.btVector3, align 4
+  %ref.tmp97 = alloca %class.btVector3, align 4
+  %ref.tmp98 = alloca float, align 4
+  %ref.tmp99 = alloca float, align 4
+  %ref.tmp100 = alloca float, align 4
+  %ref.tmp102 = alloca %class.btTransform, align 4
+  %ref.tmp107 = alloca %class.btVector3, align 4
+  %ref.tmp108 = alloca %class.btVector3, align 4
+  %ref.tmp109 = alloca float, align 4
+  %ref.tmp110 = alloca float, align 4
+  %ref.tmp111 = alloca float, align 4
+  %ref.tmp113 = alloca %class.btTransform, align 4
+  %ref.tmp119 = alloca %class.btVector3, align 4
+  %ref.tmp120 = alloca %class.btVector3, align 4
+  %ref.tmp121 = alloca float, align 4
+  %ref.tmp122 = alloca float, align 4
+  %ref.tmp123 = alloca float, align 4
+  %ref.tmp125 = alloca %class.btTransform, align 4
+  %ref.tmp131 = alloca %class.btVector3, align 4
+  %ref.tmp132 = alloca %class.btVector3, align 4
+  %ref.tmp133 = alloca float, align 4
+  %ref.tmp134 = alloca float, align 4
+  %ref.tmp135 = alloca float, align 4
+  %ref.tmp137 = alloca %class.btTransform, align 4
+  %ref.tmp143 = alloca %class.btVector3, align 4
+  %ref.tmp144 = alloca %class.btVector3, align 4
+  %ref.tmp145 = alloca float, align 4
+  %ref.tmp146 = alloca float, align 4
+  %ref.tmp147 = alloca float, align 4
+  %ref.tmp149 = alloca %class.btTransform, align 4
+  %ref.tmp155 = alloca %class.btVector3, align 4
+  %ref.tmp156 = alloca %class.btVector3, align 4
+  %ref.tmp157 = alloca float, align 4
+  %ref.tmp158 = alloca float, align 4
+  %ref.tmp159 = alloca float, align 4
+  %ref.tmp161 = alloca %class.btTransform, align 4
+  %ref.tmp167 = alloca %class.btVector3, align 4
+  %ref.tmp168 = alloca %class.btVector3, align 4
+  %ref.tmp169 = alloca float, align 4
+  %ref.tmp170 = alloca float, align 4
+  %ref.tmp171 = alloca float, align 4
+  %ref.tmp173 = alloca %class.btTransform, align 4
+  %ref.tmp179 = alloca %class.btVector3, align 4
+  %ref.tmp180 = alloca %class.btVector3, align 4
+  %ref.tmp181 = alloca float, align 4
+  %ref.tmp182 = alloca float, align 4
+  %ref.tmp183 = alloca float, align 4
+  %ref.tmp186 = alloca %class.btTransform, align 4
+  %ref.tmp192 = alloca %class.btVector3, align 4
+  %ref.tmp193 = alloca %class.btVector3, align 4
+  %ref.tmp194 = alloca float, align 4
+  %ref.tmp195 = alloca float, align 4
+  %ref.tmp196 = alloca float, align 4
+  %ref.tmp199 = alloca %class.btTransform, align 4
+  %ref.tmp205 = alloca %class.btVector3, align 4
+  %ref.tmp206 = alloca %class.btVector3, align 4
+  %ref.tmp207 = alloca float, align 4
+  %ref.tmp208 = alloca float, align 4
+  %ref.tmp209 = alloca float, align 4
+  %ref.tmp212 = alloca %class.btTransform, align 4
+  %ref.tmp218 = alloca %class.btVector3, align 4
+  %ref.tmp219 = alloca %class.btVector3, align 4
+  %ref.tmp220 = alloca float, align 4
+  %ref.tmp221 = alloca float, align 4
+  %ref.tmp222 = alloca float, align 4
+  %ref.tmp225 = alloca %class.btTransform, align 4
+  %i = alloca i32, align 4
+  %hingeC = alloca %class.btHingeConstraint*, align 4
+  %coneC = alloca %class.btConeTwistConstraint*, align 4
+  %localA = alloca %class.btTransform, align 4
+  %localB = alloca %class.btTransform, align 4
+  %ref.tmp240 = alloca %class.btVector3, align 4
+  %ref.tmp241 = alloca %class.btVector3, align 4
+  %ref.tmp242 = alloca float, align 4
+  %ref.tmp243 = alloca float, align 4
+  %ref.tmp244 = alloca float, align 4
+  %ref.tmp247 = alloca %class.btVector3, align 4
+  %ref.tmp248 = alloca %class.btVector3, align 4
+  %ref.tmp249 = alloca float, align 4
+  %ref.tmp250 = alloca float, align 4
+  %ref.tmp251 = alloca float, align 4
+  %ref.tmp266 = alloca %class.btVector3, align 4
+  %ref.tmp267 = alloca %class.btVector3, align 4
+  %ref.tmp268 = alloca float, align 4
+  %ref.tmp269 = alloca float, align 4
+  %ref.tmp270 = alloca float, align 4
+  %ref.tmp273 = alloca %class.btVector3, align 4
+  %ref.tmp274 = alloca %class.btVector3, align 4
+  %ref.tmp275 = alloca float, align 4
+  %ref.tmp276 = alloca float, align 4
+  %ref.tmp277 = alloca float, align 4
+  %ref.tmp295 = alloca %class.btVector3, align 4
+  %ref.tmp296 = alloca %class.btVector3, align 4
+  %ref.tmp297 = alloca float, align 4
+  %ref.tmp298 = alloca float, align 4
+  %ref.tmp299 = alloca float, align 4
+  %ref.tmp302 = alloca %class.btVector3, align 4
+  %ref.tmp303 = alloca %class.btVector3, align 4
+  %ref.tmp304 = alloca float, align 4
+  %ref.tmp305 = alloca float, align 4
+  %ref.tmp306 = alloca float, align 4
+  %ref.tmp324 = alloca %class.btVector3, align 4
+  %ref.tmp325 = alloca %class.btVector3, align 4
+  %ref.tmp326 = alloca float, align 4
+  %ref.tmp327 = alloca float, align 4
+  %ref.tmp328 = alloca float, align 4
+  %ref.tmp331 = alloca %class.btVector3, align 4
+  %ref.tmp332 = alloca %class.btVector3, align 4
+  %ref.tmp333 = alloca float, align 4
+  %ref.tmp334 = alloca float, align 4
+  %ref.tmp335 = alloca float, align 4
+  %ref.tmp353 = alloca %class.btVector3, align 4
+  %ref.tmp354 = alloca %class.btVector3, align 4
+  %ref.tmp355 = alloca float, align 4
+  %ref.tmp356 = alloca float, align 4
+  %ref.tmp357 = alloca float, align 4
+  %ref.tmp360 = alloca %class.btVector3, align 4
+  %ref.tmp361 = alloca %class.btVector3, align 4
+  %ref.tmp362 = alloca float, align 4
+  %ref.tmp363 = alloca float, align 4
+  %ref.tmp364 = alloca float, align 4
+  %ref.tmp382 = alloca %class.btVector3, align 4
+  %ref.tmp383 = alloca %class.btVector3, align 4
+  %ref.tmp384 = alloca float, align 4
+  %ref.tmp385 = alloca float, align 4
+  %ref.tmp386 = alloca float, align 4
+  %ref.tmp389 = alloca %class.btVector3, align 4
+  %ref.tmp390 = alloca %class.btVector3, align 4
+  %ref.tmp391 = alloca float, align 4
+  %ref.tmp392 = alloca float, align 4
+  %ref.tmp393 = alloca float, align 4
+  %ref.tmp411 = alloca %class.btVector3, align 4
+  %ref.tmp412 = alloca %class.btVector3, align 4
+  %ref.tmp413 = alloca float, align 4
+  %ref.tmp414 = alloca float, align 4
+  %ref.tmp415 = alloca float, align 4
+  %ref.tmp418 = alloca %class.btVector3, align 4
+  %ref.tmp419 = alloca %class.btVector3, align 4
+  %ref.tmp420 = alloca float, align 4
+  %ref.tmp421 = alloca float, align 4
+  %ref.tmp422 = alloca float, align 4
+  %ref.tmp440 = alloca %class.btVector3, align 4
+  %ref.tmp441 = alloca %class.btVector3, align 4
+  %ref.tmp442 = alloca float, align 4
+  %ref.tmp443 = alloca float, align 4
+  %ref.tmp444 = alloca float, align 4
+  %ref.tmp447 = alloca %class.btVector3, align 4
+  %ref.tmp448 = alloca %class.btVector3, align 4
+  %ref.tmp449 = alloca float, align 4
+  %ref.tmp450 = alloca float, align 4
+  %ref.tmp451 = alloca float, align 4
+  %ref.tmp469 = alloca %class.btVector3, align 4
+  %ref.tmp470 = alloca %class.btVector3, align 4
+  %ref.tmp471 = alloca float, align 4
+  %ref.tmp472 = alloca float, align 4
+  %ref.tmp473 = alloca float, align 4
+  %ref.tmp476 = alloca %class.btVector3, align 4
+  %ref.tmp477 = alloca %class.btVector3, align 4
+  %ref.tmp478 = alloca float, align 4
+  %ref.tmp479 = alloca float, align 4
+  %ref.tmp480 = alloca float, align 4
+  %ref.tmp498 = alloca %class.btVector3, align 4
+  %ref.tmp499 = alloca %class.btVector3, align 4
+  %ref.tmp500 = alloca float, align 4
+  %ref.tmp501 = alloca float, align 4
+  %ref.tmp502 = alloca float, align 4
+  %ref.tmp505 = alloca %class.btVector3, align 4
+  %ref.tmp506 = alloca %class.btVector3, align 4
+  %ref.tmp507 = alloca float, align 4
+  %ref.tmp508 = alloca float, align 4
+  %ref.tmp509 = alloca float, align 4
+  store %class.RagDoll* %this, %class.RagDoll** %this.addr, align 4
+  store %class.btDynamicsWorld* %ownerWorld, %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btVector3* %positionOffset, %class.btVector3** %positionOffset.addr, align 4
+  store float %scale, float* %scale.addr, align 4
+  %this1 = load %class.RagDoll** %this.addr
+  store %class.RagDoll* %this1, %class.RagDoll** %retval
+  %0 = bitcast %class.RagDoll* %this1 to i8***
+  store i8** getelementptr inbounds ([4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
+  %m_ownerWorld = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %1 = load %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btDynamicsWorld* %1, %class.btDynamicsWorld** %m_ownerWorld, align 4
+  %call = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %2 = bitcast i8* %call to %class.btCapsuleShape*
+  %3 = load float* %scale.addr, align 4
+  %mul = fmul float 0x3FC3333340000000, %3
+  %4 = load float* %scale.addr, align 4
+  %mul2 = fmul float 0x3FC99999A0000000, %4
+  %call3 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %2, float %mul, float %mul2)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %5 = bitcast %class.btCapsuleShape* %2 to %class.btCollisionShape*
+  %m_shapes = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes, i32 0, i32 0
+  store %class.btCollisionShape* %5, %class.btCollisionShape** %arrayidx, align 4
+  %call5 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %6 = bitcast i8* %call5 to %class.btCapsuleShape*
+  %7 = load float* %scale.addr, align 4
+  %mul6 = fmul float 0x3FC3333340000000, %7
+  %8 = load float* %scale.addr, align 4
+  %mul7 = fmul float 0x3FD1EB8520000000, %8
+  %call10 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %6, float %mul6, float %mul7)
+          to label %invoke.cont9 unwind label %lpad8
+
+invoke.cont9:                                     ; preds = %invoke.cont
+  %9 = bitcast %class.btCapsuleShape* %6 to %class.btCollisionShape*
+  %m_shapes12 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx13 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes12, i32 0, i32 1
+  store %class.btCollisionShape* %9, %class.btCollisionShape** %arrayidx13, align 4
+  %call14 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %10 = bitcast i8* %call14 to %class.btCapsuleShape*
+  %11 = load float* %scale.addr, align 4
+  %mul15 = fmul float 0x3FB99999A0000000, %11
+  %12 = load float* %scale.addr, align 4
+  %mul16 = fmul float 0x3FA99999A0000000, %12
+  %call19 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %10, float %mul15, float %mul16)
+          to label %invoke.cont18 unwind label %lpad17
+
+invoke.cont18:                                    ; preds = %invoke.cont9
+  %13 = bitcast %class.btCapsuleShape* %10 to %class.btCollisionShape*
+  %m_shapes21 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx22 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes21, i32 0, i32 2
+  store %class.btCollisionShape* %13, %class.btCollisionShape** %arrayidx22, align 4
+  %call23 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %14 = bitcast i8* %call23 to %class.btCapsuleShape*
+  %15 = load float* %scale.addr, align 4
+  %mul24 = fmul float 0x3FB1EB8520000000, %15
+  %16 = load float* %scale.addr, align 4
+  %mul25 = fmul float 0x3FDCCCCCC0000000, %16
+  %call28 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %14, float %mul24, float %mul25)
+          to label %invoke.cont27 unwind label %lpad26
+
+invoke.cont27:                                    ; preds = %invoke.cont18
+  %17 = bitcast %class.btCapsuleShape* %14 to %class.btCollisionShape*
+  %m_shapes30 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx31 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes30, i32 0, i32 3
+  store %class.btCollisionShape* %17, %class.btCollisionShape** %arrayidx31, align 4
+  %call32 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %18 = bitcast i8* %call32 to %class.btCapsuleShape*
+  %19 = load float* %scale.addr, align 4
+  %mul33 = fmul float 0x3FA99999A0000000, %19
+  %20 = load float* %scale.addr, align 4
+  %mul34 = fmul float 0x3FD7AE1480000000, %20
+  %call37 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %18, float %mul33, float %mul34)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont27
+  %21 = bitcast %class.btCapsuleShape* %18 to %class.btCollisionShape*
+  %m_shapes39 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx40 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes39, i32 0, i32 4
+  store %class.btCollisionShape* %21, %class.btCollisionShape** %arrayidx40, align 4
+  %call41 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %22 = bitcast i8* %call41 to %class.btCapsuleShape*
+  %23 = load float* %scale.addr, align 4
+  %mul42 = fmul float 0x3FB1EB8520000000, %23
+  %24 = load float* %scale.addr, align 4
+  %mul43 = fmul float 0x3FDCCCCCC0000000, %24
+  %call46 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %22, float %mul42, float %mul43)
+          to label %invoke.cont45 unwind label %lpad44
+
+invoke.cont45:                                    ; preds = %invoke.cont36
+  %25 = bitcast %class.btCapsuleShape* %22 to %class.btCollisionShape*
+  %m_shapes48 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx49 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes48, i32 0, i32 5
+  store %class.btCollisionShape* %25, %class.btCollisionShape** %arrayidx49, align 4
+  %call50 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %26 = bitcast i8* %call50 to %class.btCapsuleShape*
+  %27 = load float* %scale.addr, align 4
+  %mul51 = fmul float 0x3FA99999A0000000, %27
+  %28 = load float* %scale.addr, align 4
+  %mul52 = fmul float 0x3FD7AE1480000000, %28
+  %call55 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %26, float %mul51, float %mul52)
+          to label %invoke.cont54 unwind label %lpad53
+
+invoke.cont54:                                    ; preds = %invoke.cont45
+  %29 = bitcast %class.btCapsuleShape* %26 to %class.btCollisionShape*
+  %m_shapes57 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx58 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes57, i32 0, i32 6
+  store %class.btCollisionShape* %29, %class.btCollisionShape** %arrayidx58, align 4
+  %call59 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %30 = bitcast i8* %call59 to %class.btCapsuleShape*
+  %31 = load float* %scale.addr, align 4
+  %mul60 = fmul float 0x3FA99999A0000000, %31
+  %32 = load float* %scale.addr, align 4
+  %mul61 = fmul float 0x3FD51EB860000000, %32
+  %call64 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %30, float %mul60, float %mul61)
+          to label %invoke.cont63 unwind label %lpad62
+
+invoke.cont63:                                    ; preds = %invoke.cont54
+  %33 = bitcast %class.btCapsuleShape* %30 to %class.btCollisionShape*
+  %m_shapes66 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx67 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes66, i32 0, i32 7
+  store %class.btCollisionShape* %33, %class.btCollisionShape** %arrayidx67, align 4
+  %call68 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %34 = bitcast i8* %call68 to %class.btCapsuleShape*
+  %35 = load float* %scale.addr, align 4
+  %mul69 = fmul float 0x3FA47AE140000000, %35
+  %36 = load float* %scale.addr, align 4
+  %mul70 = fmul float 2.500000e-01, %36
+  %call73 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %34, float %mul69, float %mul70)
+          to label %invoke.cont72 unwind label %lpad71
+
+invoke.cont72:                                    ; preds = %invoke.cont63
+  %37 = bitcast %class.btCapsuleShape* %34 to %class.btCollisionShape*
+  %m_shapes75 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx76 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes75, i32 0, i32 8
+  store %class.btCollisionShape* %37, %class.btCollisionShape** %arrayidx76, align 4
+  %call77 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %38 = bitcast i8* %call77 to %class.btCapsuleShape*
+  %39 = load float* %scale.addr, align 4
+  %mul78 = fmul float 0x3FA99999A0000000, %39
+  %40 = load float* %scale.addr, align 4
+  %mul79 = fmul float 0x3FD51EB860000000, %40
+  %call82 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %38, float %mul78, float %mul79)
+          to label %invoke.cont81 unwind label %lpad80
+
+invoke.cont81:                                    ; preds = %invoke.cont72
+  %41 = bitcast %class.btCapsuleShape* %38 to %class.btCollisionShape*
+  %m_shapes84 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx85 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes84, i32 0, i32 9
+  store %class.btCollisionShape* %41, %class.btCollisionShape** %arrayidx85, align 4
+  %call86 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %42 = bitcast i8* %call86 to %class.btCapsuleShape*
+  %43 = load float* %scale.addr, align 4
+  %mul87 = fmul float 0x3FA47AE140000000, %43
+  %44 = load float* %scale.addr, align 4
+  %mul88 = fmul float 2.500000e-01, %44
+  %call91 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %42, float %mul87, float %mul88)
+          to label %invoke.cont90 unwind label %lpad89
+
+invoke.cont90:                                    ; preds = %invoke.cont81
+  %45 = bitcast %class.btCapsuleShape* %42 to %class.btCollisionShape*
+  %m_shapes93 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx94 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes93, i32 0, i32 10
+  store %class.btCollisionShape* %45, %class.btCollisionShape** %arrayidx94, align 4
+  %call95 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %offset)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %offset)
+  %46 = load %class.btVector3** %positionOffset.addr, align 4
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %offset, %class.btVector3* %46)
+  %call96 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %transform)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp98, align 4
+  store float 1.000000e+00, float* %ref.tmp99, align 4
+  store float 0.000000e+00, float* %ref.tmp100, align 4
+  %call101 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp97, float* %ref.tmp98, float* %ref.tmp99, float* %ref.tmp100)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp, float* %scale.addr, %class.btVector3* %ref.tmp97)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp102, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes103 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
+  %47 = load %class.btCollisionShape** %arrayidx104, align 4
+  %call105 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp102, %class.btCollisionShape* %47)
+  %m_bodies = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
+  store %class.btRigidBody* %call105, %class.btRigidBody** %arrayidx106, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp109, align 4
+  store float 0x3FF3333340000000, float* %ref.tmp110, align 4
+  store float 0.000000e+00, float* %ref.tmp111, align 4
+  %call112 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp108, float* %ref.tmp109, float* %ref.tmp110, float* %ref.tmp111)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp107, float* %scale.addr, %class.btVector3* %ref.tmp108)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp107)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp113, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes114 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
+  %48 = load %class.btCollisionShape** %arrayidx115, align 4
+  %call116 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp113, %class.btCollisionShape* %48)
+  %m_bodies117 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
+  store %class.btRigidBody* %call116, %class.btRigidBody** %arrayidx118, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp121, align 4
+  store float 0x3FF99999A0000000, float* %ref.tmp122, align 4
+  store float 0.000000e+00, float* %ref.tmp123, align 4
+  %call124 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp120, float* %ref.tmp121, float* %ref.tmp122, float* %ref.tmp123)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp119, float* %scale.addr, %class.btVector3* %ref.tmp120)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp119)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp125, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes126 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
+  %49 = load %class.btCollisionShape** %arrayidx127, align 4
+  %call128 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp125, %class.btCollisionShape* %49)
+  %m_bodies129 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
+  store %class.btRigidBody* %call128, %class.btRigidBody** %arrayidx130, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp133, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp134, align 4
+  store float 0.000000e+00, float* %ref.tmp135, align 4
+  %call136 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp132, float* %ref.tmp133, float* %ref.tmp134, float* %ref.tmp135)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp131, float* %scale.addr, %class.btVector3* %ref.tmp132)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp131)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp137, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes138 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
+  %50 = load %class.btCollisionShape** %arrayidx139, align 4
+  %call140 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp137, %class.btCollisionShape* %50)
+  %m_bodies141 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
+  store %class.btRigidBody* %call140, %class.btRigidBody** %arrayidx142, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp145, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp146, align 4
+  store float 0.000000e+00, float* %ref.tmp147, align 4
+  %call148 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp144, float* %ref.tmp145, float* %ref.tmp146, float* %ref.tmp147)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp143, float* %scale.addr, %class.btVector3* %ref.tmp144)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp143)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp149, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes150 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
+  %51 = load %class.btCollisionShape** %arrayidx151, align 4
+  %call152 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp149, %class.btCollisionShape* %51)
+  %m_bodies153 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
+  store %class.btRigidBody* %call152, %class.btRigidBody** %arrayidx154, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp157, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp158, align 4
+  store float 0.000000e+00, float* %ref.tmp159, align 4
+  %call160 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp156, float* %ref.tmp157, float* %ref.tmp158, float* %ref.tmp159)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp155, float* %scale.addr, %class.btVector3* %ref.tmp156)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp155)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp161, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes162 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
+  %52 = load %class.btCollisionShape** %arrayidx163, align 4
+  %call164 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp161, %class.btCollisionShape* %52)
+  %m_bodies165 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
+  store %class.btRigidBody* %call164, %class.btRigidBody** %arrayidx166, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp169, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp170, align 4
+  store float 0.000000e+00, float* %ref.tmp171, align 4
+  %call172 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp168, float* %ref.tmp169, float* %ref.tmp170, float* %ref.tmp171)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp167, float* %scale.addr, %class.btVector3* %ref.tmp168)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp167)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp173, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes174 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
+  %53 = load %class.btCollisionShape** %arrayidx175, align 4
+  %call176 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp173, %class.btCollisionShape* %53)
+  %m_bodies177 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
+  store %class.btRigidBody* %call176, %class.btRigidBody** %arrayidx178, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFD6666660000000, float* %ref.tmp181, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp182, align 4
+  store float 0.000000e+00, float* %ref.tmp183, align 4
+  %call184 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp180, float* %ref.tmp181, float* %ref.tmp182, float* %ref.tmp183)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp179, float* %scale.addr, %class.btVector3* %ref.tmp180)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp179)
+  %call185 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call185, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp186, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes187 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
+  %54 = load %class.btCollisionShape** %arrayidx188, align 4
+  %call189 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp186, %class.btCollisionShape* %54)
+  %m_bodies190 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
+  store %class.btRigidBody* %call189, %class.btRigidBody** %arrayidx191, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFE6666660000000, float* %ref.tmp194, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp195, align 4
+  store float 0.000000e+00, float* %ref.tmp196, align 4
+  %call197 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp193, float* %ref.tmp194, float* %ref.tmp195, float* %ref.tmp196)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp192, float* %scale.addr, %class.btVector3* %ref.tmp193)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp192)
+  %call198 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call198, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp199, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes200 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
+  %55 = load %class.btCollisionShape** %arrayidx201, align 4
+  %call202 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp199, %class.btCollisionShape* %55)
+  %m_bodies203 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
+  store %class.btRigidBody* %call202, %class.btRigidBody** %arrayidx204, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FD6666660000000, float* %ref.tmp207, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp208, align 4
+  store float 0.000000e+00, float* %ref.tmp209, align 4
+  %call210 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp206, float* %ref.tmp207, float* %ref.tmp208, float* %ref.tmp209)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp205, float* %scale.addr, %class.btVector3* %ref.tmp206)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp205)
+  %call211 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call211, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp212, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes213 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
+  %56 = load %class.btCollisionShape** %arrayidx214, align 4
+  %call215 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp212, %class.btCollisionShape* %56)
+  %m_bodies216 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
+  store %class.btRigidBody* %call215, %class.btRigidBody** %arrayidx217, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FE6666660000000, float* %ref.tmp220, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp221, align 4
+  store float 0.000000e+00, float* %ref.tmp222, align 4
+  %call223 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp219, float* %ref.tmp220, float* %ref.tmp221, float* %ref.tmp222)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp218, float* %scale.addr, %class.btVector3* %ref.tmp219)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp218)
+  %call224 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call224, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp225, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes226 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
+  %57 = load %class.btCollisionShape** %arrayidx227, align 4
+  %call228 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp225, %class.btCollisionShape* %57)
+  %m_bodies229 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
+  store %class.btRigidBody* %call228, %class.btRigidBody** %arrayidx230, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %invoke.cont90
+  %58 = load i32* %i, align 4
+  %cmp = icmp slt i32 %58, 11
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %59 = load i32* %i, align 4
+  %m_bodies231 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
+  %60 = load %class.btRigidBody** %arrayidx232, align 4
+  call void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody* %60, float 0x3FA99999A0000000, float 0x3FEB333340000000)
+  %61 = load i32* %i, align 4
+  %m_bodies233 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
+  %62 = load %class.btRigidBody** %arrayidx234, align 4
+  %63 = bitcast %class.btRigidBody* %62 to %class.btCollisionObject*
+  call void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject* %63, float 0x3FE99999A0000000)
+  %64 = load i32* %i, align 4
+  %m_bodies235 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
+  %65 = load %class.btRigidBody** %arrayidx236, align 4
+  call void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody* %65, float 0x3FF99999A0000000, float 2.500000e+00)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %66 = load i32* %i, align 4
+  %inc = add nsw i32 %66, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+lpad:                                             ; preds = %entry
+  %67 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %68 = extractvalue { i8*, i32 } %67, 0
+  store i8* %68, i8** %exn.slot
+  %69 = extractvalue { i8*, i32 } %67, 1
+  store i32 %69, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call)
+          to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:                                     ; preds = %lpad
+  br label %eh.resume
+
+lpad8:                                            ; preds = %invoke.cont
+  %70 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %71 = extractvalue { i8*, i32 } %70, 0
+  store i8* %71, i8** %exn.slot
+  %72 = extractvalue { i8*, i32 } %70, 1
+  store i32 %72, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call5)
+          to label %invoke.cont11 unwind label %terminate.lpad
+
+invoke.cont11:                                    ; preds = %lpad8
+  br label %eh.resume
+
+lpad17:                                           ; preds = %invoke.cont9
+  %73 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %74 = extractvalue { i8*, i32 } %73, 0
+  store i8* %74, i8** %exn.slot
+  %75 = extractvalue { i8*, i32 } %73, 1
+  store i32 %75, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call14)
+          to label %invoke.cont20 unwind label %terminate.lpad
+
+invoke.cont20:                                    ; preds = %lpad17
+  br label %eh.resume
+
+lpad26:                                           ; preds = %invoke.cont18
+  %76 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %77 = extractvalue { i8*, i32 } %76, 0
+  store i8* %77, i8** %exn.slot
+  %78 = extractvalue { i8*, i32 } %76, 1
+  store i32 %78, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call23)
+          to label %invoke.cont29 unwind label %terminate.lpad
+
+invoke.cont29:                                    ; preds = %lpad26
+  br label %eh.resume
+
+lpad35:                                           ; preds = %invoke.cont27
+  %79 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %80 = extractvalue { i8*, i32 } %79, 0
+  store i8* %80, i8** %exn.slot
+  %81 = extractvalue { i8*, i32 } %79, 1
+  store i32 %81, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call32)
+          to label %invoke.cont38 unwind label %terminate.lpad
+
+invoke.cont38:                                    ; preds = %lpad35
+  br label %eh.resume
+
+lpad44:                                           ; preds = %invoke.cont36
+  %82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %83 = extractvalue { i8*, i32 } %82, 0
+  store i8* %83, i8** %exn.slot
+  %84 = extractvalue { i8*, i32 } %82, 1
+  store i32 %84, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call41)
+          to label %invoke.cont47 unwind label %terminate.lpad
+
+invoke.cont47:                                    ; preds = %lpad44
+  br label %eh.resume
+
+lpad53:                                           ; preds = %invoke.cont45
+  %85 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %86 = extractvalue { i8*, i32 } %85, 0
+  store i8* %86, i8** %exn.slot
+  %87 = extractvalue { i8*, i32 } %85, 1
+  store i32 %87, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call50)
+          to label %invoke.cont56 unwind label %terminate.lpad
+
+invoke.cont56:                                    ; preds = %lpad53
+  br label %eh.resume
+
+lpad62:                                           ; preds = %invoke.cont54
+  %88 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %89 = extractvalue { i8*, i32 } %88, 0
+  store i8* %89, i8** %exn.slot
+  %90 = extractvalue { i8*, i32 } %88, 1
+  store i32 %90, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call59)
+          to label %invoke.cont65 unwind label %terminate.lpad
+
+invoke.cont65:                                    ; preds = %lpad62
+  br label %eh.resume
+
+lpad71:                                           ; preds = %invoke.cont63
+  %91 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %92 = extractvalue { i8*, i32 } %91, 0
+  store i8* %92, i8** %exn.slot
+  %93 = extractvalue { i8*, i32 } %91, 1
+  store i32 %93, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call68)
+          to label %invoke.cont74 unwind label %terminate.lpad
+
+invoke.cont74:                                    ; preds = %lpad71
+  br label %eh.resume
+
+lpad80:                                           ; preds = %invoke.cont72
+  %94 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %95 = extractvalue { i8*, i32 } %94, 0
+  store i8* %95, i8** %exn.slot
+  %96 = extractvalue { i8*, i32 } %94, 1
+  store i32 %96, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call77)
+          to label %invoke.cont83 unwind label %terminate.lpad
+
+invoke.cont83:                                    ; preds = %lpad80
+  br label %eh.resume
+
+lpad89:                                           ; preds = %invoke.cont81
+  %97 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %98 = extractvalue { i8*, i32 } %97, 0
+  store i8* %98, i8** %exn.slot
+  %99 = extractvalue { i8*, i32 } %97, 1
+  store i32 %99, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call86)
+          to label %invoke.cont92 unwind label %terminate.lpad
+
+invoke.cont92:                                    ; preds = %lpad89
+  br label %eh.resume
+
+for.end:                                          ; preds = %for.cond
+  %call237 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localA)
+  %call238 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localB)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call239 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call239, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp242, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp243, align 4
+  store float 0.000000e+00, float* %ref.tmp244, align 4
+  %call245 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp241, float* %ref.tmp242, float* %ref.tmp243, float* %ref.tmp244)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp240, float* %scale.addr, %class.btVector3* %ref.tmp241)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp240)
+  %call246 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call246, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp249, align 4
+  store float 0xBFC3333340000000, float* %ref.tmp250, align 4
+  store float 0.000000e+00, float* %ref.tmp251, align 4
+  %call252 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp248, float* %ref.tmp249, float* %ref.tmp250, float* %ref.tmp251)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp247, float* %scale.addr, %class.btVector3* %ref.tmp248)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp247)
+  %call253 = call noalias i8* @_Znwm(i32 780)
+  %100 = bitcast i8* %call253 to %class.btHingeConstraint*
+  %m_bodies254 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
+  %101 = load %class.btRigidBody** %arrayidx255, align 4
+  %m_bodies256 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
+  %102 = load %class.btRigidBody** %arrayidx257, align 4
+  %call260 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %100, %class.btRigidBody* %101, %class.btRigidBody* %102, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %for.end
+  store %class.btHingeConstraint* %100, %class.btHingeConstraint** %hingeC, align 4
+  %103 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %103, float 0xBFE921FB60000000, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %104 = load %class.btHingeConstraint** %hingeC, align 4
+  %105 = bitcast %class.btHingeConstraint* %104 to %class.btTypedConstraint*
+  %m_joints = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
+  store %class.btTypedConstraint* %105, %class.btTypedConstraint** %arrayidx261, align 4
+  %m_ownerWorld262 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %106 = load %class.btDynamicsWorld** %m_ownerWorld262, align 4
+  %107 = bitcast %class.btDynamicsWorld* %106 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
+  %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
+  %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
+  %m_joints263 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
+  %109 = load %class.btTypedConstraint** %arrayidx264, align 4
+  call void %108(%class.btDynamicsWorld* %106, %class.btTypedConstraint* %109, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call265 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call265, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp268, align 4
+  store float 0x3FD3333340000000, float* %ref.tmp269, align 4
+  store float 0.000000e+00, float* %ref.tmp270, align 4
+  %call271 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp267, float* %ref.tmp268, float* %ref.tmp269, float* %ref.tmp270)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp266, float* %scale.addr, %class.btVector3* %ref.tmp267)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp266)
+  %call272 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call272, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp275, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp276, align 4
+  store float 0.000000e+00, float* %ref.tmp277, align 4
+  %call278 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp274, float* %ref.tmp275, float* %ref.tmp276, float* %ref.tmp277)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp273, float* %scale.addr, %class.btVector3* %ref.tmp274)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp273)
+  %call279 = call noalias i8* @_Znwm(i32 628)
+  %110 = bitcast i8* %call279 to %class.btConeTwistConstraint*
+  %m_bodies280 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
+  %111 = load %class.btRigidBody** %arrayidx281, align 4
+  %m_bodies282 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
+  %112 = load %class.btRigidBody** %arrayidx283, align 4
+  %call286 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %110, %class.btRigidBody* %111, %class.btRigidBody* %112, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont285 unwind label %lpad284
+
+invoke.cont285:                                   ; preds = %invoke.cont259
+  store %class.btConeTwistConstraint* %110, %class.btConeTwistConstraint** %coneC, align 4
+  %113 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %113, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0x3FF921FB60000000, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %114 = load %class.btConeTwistConstraint** %coneC, align 4
+  %115 = bitcast %class.btConeTwistConstraint* %114 to %class.btTypedConstraint*
+  %m_joints287 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
+  store %class.btTypedConstraint* %115, %class.btTypedConstraint** %arrayidx288, align 4
+  %m_ownerWorld289 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %116 = load %class.btDynamicsWorld** %m_ownerWorld289, align 4
+  %117 = bitcast %class.btDynamicsWorld* %116 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
+  %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
+  %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
+  %m_joints292 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
+  %119 = load %class.btTypedConstraint** %arrayidx293, align 4
+  call void %118(%class.btDynamicsWorld* %116, %class.btTypedConstraint* %119, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call294 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call294, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0xBFC70A3D80000000, float* %ref.tmp297, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp298, align 4
+  store float 0.000000e+00, float* %ref.tmp299, align 4
+  %call300 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp296, float* %ref.tmp297, float* %ref.tmp298, float* %ref.tmp299)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp295, float* %scale.addr, %class.btVector3* %ref.tmp296)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp295)
+  %call301 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call301, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0.000000e+00, float* %ref.tmp304, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp305, align 4
+  store float 0.000000e+00, float* %ref.tmp306, align 4
+  %call307 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp303, float* %ref.tmp304, float* %ref.tmp305, float* %ref.tmp306)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp302, float* %scale.addr, %class.btVector3* %ref.tmp303)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp302)
+  %call308 = call noalias i8* @_Znwm(i32 628)
+  %120 = bitcast i8* %call308 to %class.btConeTwistConstraint*
+  %m_bodies309 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
+  %121 = load %class.btRigidBody** %arrayidx310, align 4
+  %m_bodies311 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
+  %122 = load %class.btRigidBody** %arrayidx312, align 4
+  %call315 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %120, %class.btRigidBody* %121, %class.btRigidBody* %122, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont314 unwind label %lpad313
+
+invoke.cont314:                                   ; preds = %invoke.cont285
+  store %class.btConeTwistConstraint* %120, %class.btConeTwistConstraint** %coneC, align 4
+  %123 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %123, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %124 = load %class.btConeTwistConstraint** %coneC, align 4
+  %125 = bitcast %class.btConeTwistConstraint* %124 to %class.btTypedConstraint*
+  %m_joints316 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
+  store %class.btTypedConstraint* %125, %class.btTypedConstraint** %arrayidx317, align 4
+  %m_ownerWorld318 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %126 = load %class.btDynamicsWorld** %m_ownerWorld318, align 4
+  %127 = bitcast %class.btDynamicsWorld* %126 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
+  %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
+  %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
+  %m_joints321 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
+  %129 = load %class.btTypedConstraint** %arrayidx322, align 4
+  call void %128(%class.btDynamicsWorld* %126, %class.btTypedConstraint* %129, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call323 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call323, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp326, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp327, align 4
+  store float 0.000000e+00, float* %ref.tmp328, align 4
+  %call329 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp325, float* %ref.tmp326, float* %ref.tmp327, float* %ref.tmp328)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp324, float* %scale.addr, %class.btVector3* %ref.tmp325)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp324)
+  %call330 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call330, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp333, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp334, align 4
+  store float 0.000000e+00, float* %ref.tmp335, align 4
+  %call336 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp332, float* %ref.tmp333, float* %ref.tmp334, float* %ref.tmp335)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp331, float* %scale.addr, %class.btVector3* %ref.tmp332)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp331)
+  %call337 = call noalias i8* @_Znwm(i32 780)
+  %130 = bitcast i8* %call337 to %class.btHingeConstraint*
+  %m_bodies338 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
+  %131 = load %class.btRigidBody** %arrayidx339, align 4
+  %m_bodies340 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
+  %132 = load %class.btRigidBody** %arrayidx341, align 4
+  %call344 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %130, %class.btRigidBody* %131, %class.btRigidBody* %132, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont343 unwind label %lpad342
+
+invoke.cont343:                                   ; preds = %invoke.cont314
+  store %class.btHingeConstraint* %130, %class.btHingeConstraint** %hingeC, align 4
+  %133 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %133, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %134 = load %class.btHingeConstraint** %hingeC, align 4
+  %135 = bitcast %class.btHingeConstraint* %134 to %class.btTypedConstraint*
+  %m_joints345 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
+  store %class.btTypedConstraint* %135, %class.btTypedConstraint** %arrayidx346, align 4
+  %m_ownerWorld347 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %136 = load %class.btDynamicsWorld** %m_ownerWorld347, align 4
+  %137 = bitcast %class.btDynamicsWorld* %136 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
+  %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
+  %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
+  %m_joints350 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
+  %139 = load %class.btTypedConstraint** %arrayidx351, align 4
+  call void %138(%class.btDynamicsWorld* %136, %class.btTypedConstraint* %139, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call352 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call352, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0x3FC70A3D80000000, float* %ref.tmp355, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp356, align 4
+  store float 0.000000e+00, float* %ref.tmp357, align 4
+  %call358 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp354, float* %ref.tmp355, float* %ref.tmp356, float* %ref.tmp357)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp353, float* %scale.addr, %class.btVector3* %ref.tmp354)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp353)
+  %call359 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call359, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp362, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp363, align 4
+  store float 0.000000e+00, float* %ref.tmp364, align 4
+  %call365 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp361, float* %ref.tmp362, float* %ref.tmp363, float* %ref.tmp364)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp360, float* %scale.addr, %class.btVector3* %ref.tmp361)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp360)
+  %call366 = call noalias i8* @_Znwm(i32 628)
+  %140 = bitcast i8* %call366 to %class.btConeTwistConstraint*
+  %m_bodies367 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
+  %141 = load %class.btRigidBody** %arrayidx368, align 4
+  %m_bodies369 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
+  %142 = load %class.btRigidBody** %arrayidx370, align 4
+  %call373 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %140, %class.btRigidBody* %141, %class.btRigidBody* %142, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont372 unwind label %lpad371
+
+invoke.cont372:                                   ; preds = %invoke.cont343
+  store %class.btConeTwistConstraint* %140, %class.btConeTwistConstraint** %coneC, align 4
+  %143 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %143, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %144 = load %class.btConeTwistConstraint** %coneC, align 4
+  %145 = bitcast %class.btConeTwistConstraint* %144 to %class.btTypedConstraint*
+  %m_joints374 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
+  store %class.btTypedConstraint* %145, %class.btTypedConstraint** %arrayidx375, align 4
+  %m_ownerWorld376 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %146 = load %class.btDynamicsWorld** %m_ownerWorld376, align 4
+  %147 = bitcast %class.btDynamicsWorld* %146 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
+  %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
+  %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
+  %m_joints379 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
+  %149 = load %class.btTypedConstraint** %arrayidx380, align 4
+  call void %148(%class.btDynamicsWorld* %146, %class.btTypedConstraint* %149, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call381 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call381, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp384, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp385, align 4
+  store float 0.000000e+00, float* %ref.tmp386, align 4
+  %call387 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp383, float* %ref.tmp384, float* %ref.tmp385, float* %ref.tmp386)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp382, float* %scale.addr, %class.btVector3* %ref.tmp383)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp382)
+  %call388 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call388, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp391, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp392, align 4
+  store float 0.000000e+00, float* %ref.tmp393, align 4
+  %call394 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp390, float* %ref.tmp391, float* %ref.tmp392, float* %ref.tmp393)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp389, float* %scale.addr, %class.btVector3* %ref.tmp390)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp389)
+  %call395 = call noalias i8* @_Znwm(i32 780)
+  %150 = bitcast i8* %call395 to %class.btHingeConstraint*
+  %m_bodies396 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
+  %151 = load %class.btRigidBody** %arrayidx397, align 4
+  %m_bodies398 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
+  %152 = load %class.btRigidBody** %arrayidx399, align 4
+  %call402 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %150, %class.btRigidBody* %151, %class.btRigidBody* %152, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont401 unwind label %lpad400
+
+invoke.cont401:                                   ; preds = %invoke.cont372
+  store %class.btHingeConstraint* %150, %class.btHingeConstraint** %hingeC, align 4
+  %153 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %153, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %154 = load %class.btHingeConstraint** %hingeC, align 4
+  %155 = bitcast %class.btHingeConstraint* %154 to %class.btTypedConstraint*
+  %m_joints403 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
+  store %class.btTypedConstraint* %155, %class.btTypedConstraint** %arrayidx404, align 4
+  %m_ownerWorld405 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %156 = load %class.btDynamicsWorld** %m_ownerWorld405, align 4
+  %157 = bitcast %class.btDynamicsWorld* %156 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
+  %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
+  %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
+  %m_joints408 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
+  %159 = load %class.btTypedConstraint** %arrayidx409, align 4
+  call void %158(%class.btDynamicsWorld* %156, %class.btTypedConstraint* %159, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call410 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call410, float 0.000000e+00, float 0.000000e+00, float 0x400921FB60000000)
+  store float 0xBFC99999A0000000, float* %ref.tmp413, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp414, align 4
+  store float 0.000000e+00, float* %ref.tmp415, align 4
+  %call416 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp412, float* %ref.tmp413, float* %ref.tmp414, float* %ref.tmp415)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp411, float* %scale.addr, %class.btVector3* %ref.tmp412)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp411)
+  %call417 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call417, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp420, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp421, align 4
+  store float 0.000000e+00, float* %ref.tmp422, align 4
+  %call423 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp419, float* %ref.tmp420, float* %ref.tmp421, float* %ref.tmp422)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp418, float* %scale.addr, %class.btVector3* %ref.tmp419)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp418)
+  %call424 = call noalias i8* @_Znwm(i32 628)
+  %160 = bitcast i8* %call424 to %class.btConeTwistConstraint*
+  %m_bodies425 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
+  %161 = load %class.btRigidBody** %arrayidx426, align 4
+  %m_bodies427 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
+  %162 = load %class.btRigidBody** %arrayidx428, align 4
+  %call431 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %160, %class.btRigidBody* %161, %class.btRigidBody* %162, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont430 unwind label %lpad429
+
+invoke.cont430:                                   ; preds = %invoke.cont401
+  store %class.btConeTwistConstraint* %160, %class.btConeTwistConstraint** %coneC, align 4
+  %163 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %163, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %164 = load %class.btConeTwistConstraint** %coneC, align 4
+  %165 = bitcast %class.btConeTwistConstraint* %164 to %class.btTypedConstraint*
+  %m_joints432 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
+  store %class.btTypedConstraint* %165, %class.btTypedConstraint** %arrayidx433, align 4
+  %m_ownerWorld434 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %166 = load %class.btDynamicsWorld** %m_ownerWorld434, align 4
+  %167 = bitcast %class.btDynamicsWorld* %166 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
+  %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
+  %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
+  %m_joints437 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
+  %169 = load %class.btTypedConstraint** %arrayidx438, align 4
+  call void %168(%class.btDynamicsWorld* %166, %class.btTypedConstraint* %169, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call439 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call439, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp442, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp443, align 4
+  store float 0.000000e+00, float* %ref.tmp444, align 4
+  %call445 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp441, float* %ref.tmp442, float* %ref.tmp443, float* %ref.tmp444)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp440, float* %scale.addr, %class.btVector3* %ref.tmp441)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp440)
+  %call446 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call446, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp449, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp450, align 4
+  store float 0.000000e+00, float* %ref.tmp451, align 4
+  %call452 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp448, float* %ref.tmp449, float* %ref.tmp450, float* %ref.tmp451)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp447, float* %scale.addr, %class.btVector3* %ref.tmp448)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp447)
+  %call453 = call noalias i8* @_Znwm(i32 780)
+  %170 = bitcast i8* %call453 to %class.btHingeConstraint*
+  %m_bodies454 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
+  %171 = load %class.btRigidBody** %arrayidx455, align 4
+  %m_bodies456 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
+  %172 = load %class.btRigidBody** %arrayidx457, align 4
+  %call460 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %170, %class.btRigidBody* %171, %class.btRigidBody* %172, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont459 unwind label %lpad458
+
+invoke.cont459:                                   ; preds = %invoke.cont430
+  store %class.btHingeConstraint* %170, %class.btHingeConstraint** %hingeC, align 4
+  %173 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %173, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %174 = load %class.btHingeConstraint** %hingeC, align 4
+  %175 = bitcast %class.btHingeConstraint* %174 to %class.btTypedConstraint*
+  %m_joints461 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
+  store %class.btTypedConstraint* %175, %class.btTypedConstraint** %arrayidx462, align 4
+  %m_ownerWorld463 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %176 = load %class.btDynamicsWorld** %m_ownerWorld463, align 4
+  %177 = bitcast %class.btDynamicsWorld* %176 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
+  %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
+  %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
+  %m_joints466 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
+  %179 = load %class.btTypedConstraint** %arrayidx467, align 4
+  call void %178(%class.btDynamicsWorld* %176, %class.btTypedConstraint* %179, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call468 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call468, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
+  store float 0x3FC99999A0000000, float* %ref.tmp471, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp472, align 4
+  store float 0.000000e+00, float* %ref.tmp473, align 4
+  %call474 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp470, float* %ref.tmp471, float* %ref.tmp472, float* %ref.tmp473)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp469, float* %scale.addr, %class.btVector3* %ref.tmp470)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp469)
+  %call475 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call475, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp478, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp479, align 4
+  store float 0.000000e+00, float* %ref.tmp480, align 4
+  %call481 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp477, float* %ref.tmp478, float* %ref.tmp479, float* %ref.tmp480)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp476, float* %scale.addr, %class.btVector3* %ref.tmp477)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp476)
+  %call482 = call noalias i8* @_Znwm(i32 628)
+  %180 = bitcast i8* %call482 to %class.btConeTwistConstraint*
+  %m_bodies483 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
+  %181 = load %class.btRigidBody** %arrayidx484, align 4
+  %m_bodies485 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
+  %182 = load %class.btRigidBody** %arrayidx486, align 4
+  %call489 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %180, %class.btRigidBody* %181, %class.btRigidBody* %182, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont488 unwind label %lpad487
+
+invoke.cont488:                                   ; preds = %invoke.cont459
+  store %class.btConeTwistConstraint* %180, %class.btConeTwistConstraint** %coneC, align 4
+  %183 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %183, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %184 = load %class.btConeTwistConstraint** %coneC, align 4
+  %185 = bitcast %class.btConeTwistConstraint* %184 to %class.btTypedConstraint*
+  %m_joints490 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
+  store %class.btTypedConstraint* %185, %class.btTypedConstraint** %arrayidx491, align 4
+  %m_ownerWorld492 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %186 = load %class.btDynamicsWorld** %m_ownerWorld492, align 4
+  %187 = bitcast %class.btDynamicsWorld* %186 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
+  %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
+  %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
+  %m_joints495 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
+  %189 = load %class.btTypedConstraint** %arrayidx496, align 4
+  call void %188(%class.btDynamicsWorld* %186, %class.btTypedConstraint* %189, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call497 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call497, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp500, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp501, align 4
+  store float 0.000000e+00, float* %ref.tmp502, align 4
+  %call503 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp499, float* %ref.tmp500, float* %ref.tmp501, float* %ref.tmp502)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp498, float* %scale.addr, %class.btVector3* %ref.tmp499)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp498)
+  %call504 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call504, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp507, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp508, align 4
+  store float 0.000000e+00, float* %ref.tmp509, align 4
+  %call510 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp506, float* %ref.tmp507, float* %ref.tmp508, float* %ref.tmp509)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp505, float* %scale.addr, %class.btVector3* %ref.tmp506)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp505)
+  %call511 = call noalias i8* @_Znwm(i32 780)
+  %190 = bitcast i8* %call511 to %class.btHingeConstraint*
+  %m_bodies512 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
+  %191 = load %class.btRigidBody** %arrayidx513, align 4
+  %m_bodies514 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
+  %192 = load %class.btRigidBody** %arrayidx515, align 4
+  %call518 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %190, %class.btRigidBody* %191, %class.btRigidBody* %192, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont517 unwind label %lpad516
+
+invoke.cont517:                                   ; preds = %invoke.cont488
+  store %class.btHingeConstraint* %190, %class.btHingeConstraint** %hingeC, align 4
+  %193 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %193, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %194 = load %class.btHingeConstraint** %hingeC, align 4
+  %195 = bitcast %class.btHingeConstraint* %194 to %class.btTypedConstraint*
+  %m_joints519 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
+  store %class.btTypedConstraint* %195, %class.btTypedConstraint** %arrayidx520, align 4
+  %m_ownerWorld521 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %196 = load %class.btDynamicsWorld** %m_ownerWorld521, align 4
+  %197 = bitcast %class.btDynamicsWorld* %196 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
+  %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
+  %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
+  %m_joints524 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
+  %199 = load %class.btTypedConstraint** %arrayidx525, align 4
+  call void %198(%class.btDynamicsWorld* %196, %class.btTypedConstraint* %199, i1 zeroext true)
+  %200 = load %class.RagDoll** %retval
+  ret %class.RagDoll* %200
+
+lpad258:                                          ; preds = %for.end
+  %201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %202 = extractvalue { i8*, i32 } %201, 0
+  store i8* %202, i8** %exn.slot
+  %203 = extractvalue { i8*, i32 } %201, 1
+  store i32 %203, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call253) nounwind
+  br label %eh.resume
+
+lpad284:                                          ; preds = %invoke.cont259
+  %204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %205 = extractvalue { i8*, i32 } %204, 0
+  store i8* %205, i8** %exn.slot
+  %206 = extractvalue { i8*, i32 } %204, 1
+  store i32 %206, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call279) nounwind
+  br label %eh.resume
+
+lpad313:                                          ; preds = %invoke.cont285
+  %207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %208 = extractvalue { i8*, i32 } %207, 0
+  store i8* %208, i8** %exn.slot
+  %209 = extractvalue { i8*, i32 } %207, 1
+  store i32 %209, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call308) nounwind
+  br label %eh.resume
+
+lpad342:                                          ; preds = %invoke.cont314
+  %210 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %211 = extractvalue { i8*, i32 } %210, 0
+  store i8* %211, i8** %exn.slot
+  %212 = extractvalue { i8*, i32 } %210, 1
+  store i32 %212, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call337) nounwind
+  br label %eh.resume
+
+lpad371:                                          ; preds = %invoke.cont343
+  %213 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %214 = extractvalue { i8*, i32 } %213, 0
+  store i8* %214, i8** %exn.slot
+  %215 = extractvalue { i8*, i32 } %213, 1
+  store i32 %215, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call366) nounwind
+  br label %eh.resume
+
+lpad400:                                          ; preds = %invoke.cont372
+  %216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %217 = extractvalue { i8*, i32 } %216, 0
+  store i8* %217, i8** %exn.slot
+  %218 = extractvalue { i8*, i32 } %216, 1
+  store i32 %218, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call395) nounwind
+  br label %eh.resume
+
+lpad429:                                          ; preds = %invoke.cont401
+  %219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %220 = extractvalue { i8*, i32 } %219, 0
+  store i8* %220, i8** %exn.slot
+  %221 = extractvalue { i8*, i32 } %219, 1
+  store i32 %221, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call424) nounwind
+  br label %eh.resume
+
+lpad458:                                          ; preds = %invoke.cont430
+  %222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %223 = extractvalue { i8*, i32 } %222, 0
+  store i8* %223, i8** %exn.slot
+  %224 = extractvalue { i8*, i32 } %222, 1
+  store i32 %224, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call453) nounwind
+  br label %eh.resume
+
+lpad487:                                          ; preds = %invoke.cont459
+  %225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %226 = extractvalue { i8*, i32 } %225, 0
+  store i8* %226, i8** %exn.slot
+  %227 = extractvalue { i8*, i32 } %225, 1
+  store i32 %227, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call482) nounwind
+  br label %eh.resume
+
+lpad516:                                          ; preds = %invoke.cont488
+  %228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %229 = extractvalue { i8*, i32 } %228, 0
+  store i8* %229, i8** %exn.slot
+  %230 = extractvalue { i8*, i32 } %228, 1
+  store i32 %230, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call511) nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad516, %lpad487, %lpad458, %lpad429, %lpad400, %lpad371, %lpad342, %lpad313, %lpad284, %lpad258, %invoke.cont92, %invoke.cont83, %invoke.cont74, %invoke.cont65, %invoke.cont56, %invoke.cont47, %invoke.cont38, %invoke.cont29, %invoke.cont20, %invoke.cont11, %invoke.cont4
+  %exn = load i8** %exn.slot
+  %sel = load i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val526 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val526
+
+terminate.lpad:                                   ; preds = %lpad89, %lpad80, %lpad71, %lpad62, %lpad53, %lpad44, %lpad35, %lpad26, %lpad17, %lpad8, %lpad
+  %231 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_ZmlRKfRK9btVector3(%class.btVector3* noalias sret, float*, %class.btVector3*) inlinehint ssp
+
+declare %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll*, float, %class.btTransform*, %class.btCollisionShape*) ssp align 2
+
+declare void @_ZNK11btTransformmlERKS_(%class.btTransform* noalias sret, %class.btTransform*, %class.btTransform*) inlinehint ssp align 2
+
+declare void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3*, float, float, float) ssp align 2
+
+declare void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody*, float, float)
+
+declare void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject*, float) nounwind ssp align 2
+
+declare void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody*, float, float) nounwind ssp align 2
+
+declare %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*, i1 zeroext)
+
+declare void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint*, float, float, float, float, float) ssp align 2
+
+declare %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*)
+
+declare void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint*, float, float, float, float, float, float) nounwind ssp align 2
diff --git a/test/CodeGen/Thumb2/dg.exp b/test/CodeGen/Thumb2/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb2/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
new file mode 100644
index 0000000..aef6f85
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; This test case would clobber the outgoing call arguments by writing to the
+; emergency spill slot at [sp, #4] without adjusting the stack pointer first.
+
+; CHECK: main
+; CHECK: vmov.f64
+; Adjust SP for the large call
+; CHECK: sub sp,
+; CHECK: mov [[FR:r[0-9]+]], sp
+; Store to call frame + #4
+; CHECK: str{{.*\[}}[[FR]], #4]
+; Don't clobber that store until the call.
+; CHECK-NOT: [sp, #4]
+; CHECK: variadic
+
+define i32 @main() ssp {
+entry:
+  %d = alloca double, align 8
+  store double 1.000000e+00, double* %d, align 8
+  %0 = load double* %d, align 8
+  call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
+  ret i32 0
+}
+
+declare void @variadic(i8*, i8*, i8*, ...)
+
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 7993bbf..893bd0f 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -24,7 +24,6 @@ bb7:                                              ; preds = %bb3
 
 bb9:                                              ; preds = %bb7
 ; CHECK:      cmp	r0, #0
-; CHECK:      cmp	r0, #0
 ; CHECK-NEXT:      cbnz
   %0 = tail call  double @foo(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 2c57348..f577f79 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index 4f2b7c1..b2328e7 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 24c45c5..58f9add 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -15,5 +15,5 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     ret i32 %tmp2
 }
 ; CHECK: f2:
-; CHECK: 	muls	r0, r0, r1
+; CHECK: 	muls	r0, r1, r0
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index bb97d97..ac059bd 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
 ; CHECK: f1:
-; CHECK: muls r0, r0, r1
+; CHECK: muls r0, r1, r0
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 8bb9b92..38bca28 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -30,7 +30,7 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
 	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.psra.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
 	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]
 	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]
 	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
@@ -48,4 +48,4 @@ return:		; preds = %cond_true, %entry
 	ret void
 }
 
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 6f8b89c..24aa5b9 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,5 +1,5 @@
 ; PR1075
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
 
 define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index f6db0d0..838a0c3 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 265d968..2e95082 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index f4f4195..ac167b0 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=basic | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false -optimize-regalloc -regalloc=basic | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 0d11546..c068f8a 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -2,8 +2,6 @@
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
-declare i8* @llvm.eh.exception() nounwind 
-
 declare i8* @_Znwm(i32)
 
 declare i8* @__cxa_begin_catch(i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
deleted file mode 100644
index 6c70c5b..0000000
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s | not grep shrl
-; Note: this test is really trying to make sure that the shift
-; returns the right result; shrl is most likely wrong,
-; but if CodeGen starts legitimately using an shrl here,
-; please adjust the test appropriately.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
-
-define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
-entry:
-	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
-	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
-	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
-	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
-	ret i64 %x.0
-}
-
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 75e0b8a..435adbb 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 12bd285..1259cf4 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
 ; CHECK: subq    $40, %rsp
 ; CHECK: movaps  %xmm8, (%rsp)
 ; CHECK: movaps  %xmm7, 16(%rsp)
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index f6ac2ba..d4a74c9 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
 ; Check that lowered argumens do not overwrite the return address before it is moved.
 ; Bug 6225
 ;
diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
index 5accfd7..e0c2c6c 100644
--- a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
+++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mcpu=generic | FileCheck %s
 ; PR6941
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 72c79d2..4d5f8d7 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,22 +3,28 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT: construct_3
-; CHECK-DEFAULT: construct_2
-; CHECK-DEFAULT: construct_1
+; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_1
+; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_2
+; CHECK-DEFAULT: .section        .ctors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_3
 
-; CHECK-DARWIN: construct_1
-; CHECK-DARWIN: construct_2
-; CHECK-DARWIN: construct_3
+; CHECK-DARWIN: .long _construct_1
+; CHECK-DARWIN-NEXT: .long _construct_2
+; CHECK-DARWIN-NEXT: .long _construct_3
 
 @llvm.global_dtors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @destruct_2 }, { i32, void ()* } { i32 1000, void ()* @destruct_1 }, { i32, void ()* } { i32 3000, void ()* @destruct_3 }]
-; CHECK-DEFAULT: destruct_3
-; CHECK-DEFAULT: destruct_2
-; CHECK-DEFAULT: destruct_1
+; CHECK-DEFAULT: .section        .dtors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_1
+; CHECK-DEFAULT: .section        .dtors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_2
+; CHECK-DEFAULT: .section        .dtors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_3
 
-; CHECK-DARWIN: destruct_1
-; CHECK-DARWIN: destruct_2
-; CHECK-DARWIN: destruct_3
+; CHECK-DARWIN:      .long _destruct_1
+; CHECK-DARWIN-NEXT: .long _destruct_2
+; CHECK-DARWIN-NEXT: .long _destruct_3
 
 declare void @construct_1()
 declare void @construct_2()
diff --git a/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll b/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll
deleted file mode 100644
index 095d8c6..0000000
--- a/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; RUN: llc < %s -verify-regalloc | FileCheck %s
-; PR11347
-;
-; This test case materializes the constant 1 in a register:
-;
-; %vreg19<def> = MOV32ri 1
-;
-; Then rematerializes the instruction for a sub-register copy:
-; 1168L   %vreg14:sub_8bit<def,undef> = COPY %vreg19:sub_8bit<kill>, %vreg14<imp-def>; GR32:%vreg14,%vreg19
-;        Considering merging %vreg19 with %vreg14
-;                RHS = %vreg19 = [560d,656L:0)[720L,976d:0)[1088L,1168d:0)  0@560d
-;                LHS = %vreg14 = [16d,160L:0)[160L,256L:2)[256L,1088L:1)[1168d,1184L:3)[1184L,1344L:2)  0@16d-phikill 1@256L-phidef-phikill 2@1184L-phidef-phikill 3@1168d-phikill
-; Remat: %vreg14<def> = MOV32ri 1, %vreg14<imp-def>, %vreg14<imp-def>; GR32:%vreg14
-;
-; This rematerialized constant is feeding a PHI that is spilled, so the constant
-; is written directly to a stack slot that gets the %esi function argument in
-; another basic block:
-;
-; CHECK: %entry
-; CHECK: movl %esi, [[FI:[0-9]+\(%rsp\)]]
-; CHECK: %if.else24
-; CHECK: movl $1, [[FI]]
-; CHECK: %lor.end9
-; CHECK: movl [[FI]],
-;
-; Those <imp-def> operands on the MOV32ri instruction confused the spiller
-; because they were preserved by TII.foldMemoryOperand.  It is quite rare to
-; see a rematerialized instruction spill, it can only happen when it is feeding
-; a PHI.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.7"
-
-@g_193 = external global i32, align 4
-@g_103 = external global i32, align 4
-
-declare i32 @func_21(i16 signext, i32) nounwind uwtable readnone ssp
-
-define i32 @func_25(i32 %p_27, i8 signext %p_28, i32 %p_30) noreturn nounwind uwtable ssp {
-entry:
-  br label %for.cond
-
-for.cond28.for.cond.loopexit_crit_edge:           ; preds = %for.cond28thread-pre-split
-  store i32 0, i32* @g_103, align 4
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.cond28thread-pre-split, %for.cond28.for.cond.loopexit_crit_edge, %entry
-  %l_365.0 = phi i32 [ undef, %entry ], [ %and, %for.cond28.for.cond.loopexit_crit_edge ], [ %and, %for.cond28thread-pre-split ]
-  %l_288.0 = phi i32 [ undef, %entry ], [ %l_288.1.ph, %for.cond28.for.cond.loopexit_crit_edge ], [ %l_288.1.ph, %for.cond28thread-pre-split ]
-  %l_349.0 = phi i32 [ undef, %entry ], [ %xor, %for.cond28.for.cond.loopexit_crit_edge ], [ %xor, %for.cond28thread-pre-split ]
-  %p_28.addr.0 = phi i8 [ %p_28, %entry ], [ %p_28.addr.1.ph, %for.cond28.for.cond.loopexit_crit_edge ], [ %p_28.addr.1.ph, %for.cond28thread-pre-split ]
-  br i1 undef, label %for.cond31, label %lor.end
-
-lor.end:                                          ; preds = %for.cond
-  %tobool3 = icmp eq i32 %l_349.0, 0
-  br i1 %tobool3, label %for.cond31, label %if.then
-
-if.then:                                          ; preds = %lor.end
-  br i1 undef, label %lor.rhs6, label %lor.end9
-
-lor.rhs6:                                         ; preds = %if.then
-  br label %lor.end9
-
-lor.end9:                                         ; preds = %lor.rhs6, %if.then
-  %and = and i32 %l_365.0, 1
-  %conv11 = sext i8 %p_28.addr.0 to i32
-  %xor = xor i32 %and, %conv11
-  br i1 false, label %if.else, label %if.end
-
-if.else:                                          ; preds = %lor.end9
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %lor.end9
-  %l_395.0 = phi i32 [ 0, %if.else ], [ 1, %lor.end9 ]
-  %cmp14 = icmp ne i32 %and, %conv11
-  %conv15 = zext i1 %cmp14 to i32
-  br i1 %cmp14, label %if.then16, label %for.cond28thread-pre-split
-
-if.then16:                                        ; preds = %if.end
-  %or17 = or i32 %l_288.0, 1
-  %call18 = tail call i32 @func_39(i32 0, i32 %or17, i32 0, i32 0) nounwind
-  br i1 undef, label %if.else24, label %if.then20
-
-if.then20:                                        ; preds = %if.then16
-  %conv21 = trunc i32 %l_395.0 to i16
-  %call22 = tail call i32 @func_21(i16 signext %conv21, i32 undef)
-  br label %for.cond28thread-pre-split
-
-if.else24:                                        ; preds = %if.then16
-  store i32 %conv15, i32* @g_193, align 4
-  %conv25 = trunc i32 %l_395.0 to i8
-  br label %for.cond28thread-pre-split
-
-for.cond28thread-pre-split:                       ; preds = %if.else24, %if.then20, %if.end
-  %l_288.1.ph = phi i32 [ %l_288.0, %if.end ], [ %or17, %if.else24 ], [ %or17, %if.then20 ]
-  %p_28.addr.1.ph = phi i8 [ %p_28.addr.0, %if.end ], [ %conv25, %if.else24 ], [ %p_28.addr.0, %if.then20 ]
-  %.pr = load i32* @g_103, align 4
-  %tobool2933 = icmp eq i32 %.pr, 0
-  br i1 %tobool2933, label %for.cond, label %for.cond28.for.cond.loopexit_crit_edge
-
-for.cond31:                                       ; preds = %for.cond31, %lor.end, %for.cond
-  br label %for.cond31
-}
-
-declare i32 @func_39(i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
index d978102..1561784 100644
--- a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
+++ b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -69,3 +69,12 @@ entry:
   %2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
   ret <3 x i64> %2
 }
+
+define void @t5() nounwind {
+entry:
+  %0 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <8 x i64> <i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 0, i64 0, i64 0>, <8 x i64> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 8, i32 5, i32 6, i32 7>
+  store <8 x i64> %1, <8 x i64> addrspace(1)* undef, align 64
+
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
index 2b98b5a..6f9188c 100644
--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -1,12 +1,19 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
 ; Test case for r146671
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7"
 
 define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
-  ; CHECK: psllw $4, [[REG:%xmm.]]
-  ; CHECK-NEXT: movdqa
-  ; CHECK-NEXT: pblendvb [[REG]],{{ %xmm.}}
+  ; Make sure operands to pblend are in the right order.
+  ; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]]
+  ; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}}
+  ; CHECK-W-SSE4: psllw $2
+
+  ; Make sure we're masking and pcmp'ing the VSELECT conditon vector.
+  ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]]
+  ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]]
+  ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]]
   %1 = shl <16 x i8> %a, %b
   ret <16 x i8> %1
 }
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
new file mode 100644
index 0000000..39c213f
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; Make sure we don't load from the location pointed to by %p
+; twice: it has non-obvious performance implications, and
+; the relevant transformation doesn't know how to update
+; the chains correctly.
+; PR10747
+
+; CHECK: test:
+; CHECK: pextrd $2, %xmm
+define <4 x i32> @test(<4 x i32>* %p) {
+  %v = load <4 x i32>* %p
+  %e = extractelement <4 x i32> %v, i32 2
+  %cmp = icmp eq i32 %e, 3
+  %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
+  ret <4 x i32> %sel
+}
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll
new file mode 100644
index 0000000..dbc122a
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin  -mcpu=corei7 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11.2.0"
+
+; CHECK: @foo8
+; CHECK: psll
+; CHECK: psraw
+; CHECK: pblendvb
+; CHECK: ret
+define void @foo8(float* nocapture %RET) nounwind {
+allocas:
+  %resultvec.i = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <8 x i8> <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
+  %uint2float = uitofp <8 x i8> %resultvec.i to <8 x float>
+  %ptr = bitcast float * %RET to <8 x float> *
+  store <8 x float> %uint2float, <8 x float>* %ptr, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
index ceee8e6..e2b3ebc 100644
--- a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
+++ b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
@@ -2,8 +2,8 @@
 
 ; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
 ; CHECK: prom_bug
-; CHECK: movd
 ; CHECK: shufb
+; CHECK: movd
 ; CHECK: movw
 ; CHECK: ret
 define void @prom_bug(<4 x i8> %t, i16* %p) {
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
new file mode 100644
index 0000000..832a8eb
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -disable-fp-elim
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+; This test case has a landing pad with two predecessors, and a variable that
+; is undef on the first edge while carrying the first function return value on
+; the second edge.
+;
+; Live range splitting tries to isolate the block containing the first function
+; call, and it is important that the last split point is after the function call
+; so the return value can spill.
+;
+; <rdar://problem/10664933>
+
+@Exception = external unnamed_addr constant { i8*, i8* }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @f(i32* nocapture %arg, i32* nocapture %arg1, i32* nocapture %arg2, i32* nocapture %arg3, i32 %arg4, i32 %arg5) optsize ssp {
+bb:
+  br i1 undef, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb
+  %tmp = select i1 false, i32 0, i32 undef
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb
+  %tmp8 = phi i32 [ %tmp, %bb6 ], [ 0, %bb ]
+  %tmp9 = shl i32 %tmp8, 2
+  %tmp10 = invoke noalias i8* @_Znam(i32 undef) optsize
+          to label %bb11 unwind label %bb20
+
+bb11:                                             ; preds = %bb7
+  %tmp12 = ptrtoint i8* %tmp10 to i32
+  %tmp13 = bitcast i8* %tmp10 to i32*
+  %tmp14 = shl i32 %tmp8, 2
+  %tmp15 = getelementptr i32* %tmp13, i32 undef
+  %tmp16 = getelementptr i32* %tmp13, i32 undef
+  %tmp17 = zext i32 %tmp9 to i64
+  %tmp18 = add i64 %tmp17, -1
+  %tmp19 = icmp ugt i64 %tmp18, 4294967295
+  br i1 %tmp19, label %bb29, label %bb31
+
+bb20:                                             ; preds = %bb43, %bb41, %bb29, %bb7
+  %tmp21 = phi i32 [ undef, %bb7 ], [ %tmp12, %bb43 ], [ %tmp12, %bb29 ], [ %tmp12, %bb41 ]
+  %tmp22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @Exception to i8*)
+  br i1 undef, label %bb23, label %bb69
+
+bb23:                                             ; preds = %bb38, %bb20
+  %tmp24 = phi i32 [ %tmp12, %bb38 ], [ %tmp21, %bb20 ]
+  %tmp25 = icmp eq i32 %tmp24, 0
+  br i1 %tmp25, label %bb28, label %bb26
+
+bb26:                                             ; preds = %bb23
+  %tmp27 = inttoptr i32 %tmp24 to i8*
+  br label %bb28
+
+bb28:                                             ; preds = %bb26, %bb23
+  ret void
+
+bb29:                                             ; preds = %bb11
+  invoke void @OnOverFlow() optsize
+          to label %bb30 unwind label %bb20
+
+bb30:                                             ; preds = %bb29
+  unreachable
+
+bb31:                                             ; preds = %bb11
+  %tmp32 = bitcast i32* %tmp15 to i8*
+  %tmp33 = zext i32 %tmp8 to i64
+  %tmp34 = add i64 %tmp33, -1
+  %tmp35 = icmp ugt i64 %tmp34, 4294967295
+  %tmp36 = icmp sgt i32 %tmp8, 0
+  %tmp37 = add i32 %tmp9, -4
+  br label %bb38
+
+bb38:                                             ; preds = %bb67, %bb31
+  %tmp39 = phi i32 [ %tmp68, %bb67 ], [ undef, %bb31 ]
+  %tmp40 = icmp sgt i32 %tmp39, undef
+  br i1 %tmp40, label %bb41, label %bb23
+
+bb41:                                             ; preds = %bb38
+  invoke void @Pjii(i32* %tmp16, i32 0, i32 %tmp8) optsize
+          to label %bb42 unwind label %bb20
+
+bb42:                                             ; preds = %bb41
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp35, label %bb43, label %bb45
+
+bb43:                                             ; preds = %bb42
+  invoke void @OnOverFlow() optsize
+          to label %bb44 unwind label %bb20
+
+bb44:                                             ; preds = %bb43
+  unreachable
+
+bb45:                                             ; preds = %bb57, %bb42
+  %tmp46 = phi i32 [ %tmp58, %bb57 ], [ 255, %bb42 ]
+  %tmp47 = icmp slt i32 undef, 0
+  br i1 %tmp47, label %bb48, label %bb59
+
+bb48:                                             ; preds = %bb45
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb49, label %bb57
+
+bb49:                                             ; preds = %bb49, %bb48
+  %tmp50 = phi i32 [ %tmp55, %bb49 ], [ 0, %bb48 ]
+  %tmp51 = add i32 %tmp50, undef
+  %tmp52 = add i32 %tmp50, undef
+  %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
+  %tmp54 = load i32* %tmp53, align 4, !tbaa !0
+  %tmp55 = add i32 %tmp50, 1
+  %tmp56 = icmp eq i32 %tmp55, %tmp8
+  br i1 %tmp56, label %bb57, label %bb49
+
+bb57:                                             ; preds = %bb49, %bb48
+  %tmp58 = add i32 %tmp46, -1
+  br label %bb45
+
+bb59:                                             ; preds = %bb45
+  %tmp60 = ashr i32 %tmp46, 31
+  tail call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 %tmp37, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb61, label %bb67
+
+bb61:                                             ; preds = %bb61, %bb59
+  %tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
+  %tmp63 = add i32 %tmp62, %tmp14
+  %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
+  store i32 0, i32* %tmp64, align 4, !tbaa !0
+  %tmp65 = add i32 %tmp62, 1
+  %tmp66 = icmp eq i32 %tmp65, %tmp8
+  br i1 %tmp66, label %bb67, label %bb61
+
+bb67:                                             ; preds = %bb61, %bb59
+  %tmp68 = add i32 %tmp39, -1
+  br label %bb38
+
+bb69:                                             ; preds = %bb20
+  resume { i8*, i32 } %tmp22
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare noalias i8* @_Znam(i32) optsize
+
+declare void @Pjii(i32*, i32, i32) optsize
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare void @OnOverFlow() noreturn optsize ssp align 2
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
new file mode 100644
index 0000000..6b90072
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+;CHECK: add18i16
+define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
+;CHECK: vmovups
+  %b = load <18 x i16>* %bp, align 16
+  %x = add <18 x i16> zeroinitializer, %b
+  store <18 x i16> %x, <18 x i16>* %ret, align 16
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
new file mode 100644
index 0000000..fa8e80f
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: endless_loop
+define void @endless_loop() {
+entry:
+  %0 = load <8 x i32> addrspace(1)* undef, align 32
+  %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
new file mode 100644
index 0000000..a883d79
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=-sse | FileCheck %s
+; PR11768
+
+@ptr = external global i8*
+
+define void @baz() nounwind ssp {
+entry:
+  %0 = load i8** @ptr, align 4
+  %cmp = icmp eq i8* %0, null
+  fence seq_cst
+  br i1 %cmp, label %if.then, label %if.else
+
+; Make sure the fence comes before the comparison, since it
+; clobbers EFLAGS.
+
+; CHECK: lock
+; CHECK-NEXT: orl {{.*}}, (%esp)
+; CHECK-NEXT: cmpl $0
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @foo to void ()*)() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @bar to void ()*)() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+declare void @bar(...)
diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll
new file mode 100644
index 0000000..8a3ccc8
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s
+
+;CHECK: vcast
+define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
+;CHECK: pshufd
+;CHECK: pshufd
+  %af = bitcast <2 x float> %a to <2 x i32>
+  %bf = bitcast <2 x float> %b to <2 x i32>
+  %x = sub <2 x i32> %af, %bf
+;CHECK: psubq
+  ret <2 x i32> %x
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-02-12-dagco.ll b/test/CodeGen/X86/2012-02-12-dagco.ll
new file mode 100644
index 0000000..13723a2
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-12-dagco.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @dagco_crash() {
+entry:
+  %srcval.i411.i = load <4 x i64>* undef, align 1
+  %0 = extractelement <4 x i64> %srcval.i411.i, i32 3
+  %srcval.i409.i = load <2 x i64>* undef, align 1
+  %1 = extractelement <2 x i64> %srcval.i409.i, i32 0
+  %2 = insertelement <8 x i64> undef, i64 %0, i32 5
+  %3 = insertelement <8 x i64> %2, i64 %1, i32 6
+  %4 = insertelement <8 x i64> %3, i64 undef, i32 7
+  store <8 x i64> %4, <8 x i64> addrspace(1)* undef, align 64
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-02-14-scalar.ll b/test/CodeGen/X86/2012-02-14-scalar.ll
new file mode 100644
index 0000000..1dc076b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-14-scalar.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @autogen_28112_5000() {
+BB:
+  %S17 = icmp sgt <1 x i64> undef, undef
+  %E19 = extractelement <1 x i1> %S17, i32 0
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %S23 = select i1 %E19, i8 undef, i8 undef
+  br label %CF
+}
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
new file mode 100644
index 0000000..3013f16
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse | FileCheck %s
+; PR11940: Do not optimize away movb %al, %ch
+
+%struct.APInt = type { i64* }
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
+entry:
+; CHECK: bug:
+  %call = tail call i8* @calloc(i32 1, i32 32)
+  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %0 = bitcast i8* %call.i to i64*
+  %rem.i = and i32 %rotateAmt, 63
+  %div.i = lshr i32 %rotateAmt, 6
+  %cmp.i = icmp eq i32 %rem.i, 0
+  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
+
+for.cond.preheader.i:                             ; preds = %entry
+  %sub.i = sub i32 4, %div.i
+  %cmp23.i = icmp eq i32 %div.i, 4
+  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
+  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %.pre5.i = load i64** %pVal.i, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
+  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
+  %add.i = add i32 %i.04.i, %div.i
+  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
+  %1 = load i64* %arrayidx.i, align 4
+  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
+  store i64 %1, i64* %arrayidx3.i, align 4
+  %inc.i = add i32 %i.04.i, 1
+  %cmp2.i = icmp ult i32 %inc.i, %sub.i
+  br i1 %cmp2.i, label %for.body.i, label %if.end.i
+
+if.end.i:                                         ; preds = %for.body.i, %entry
+  %cmp81.i = icmp eq i32 %div.i, 3
+  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
+
+for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
+  %sub58.i = sub i32 3, %div.i
+  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %sh_prom.i = zext i32 %rem.i to i64
+  %sub17.i = sub i32 64, %rem.i
+  %sh_prom18.i = zext i32 %sub17.i to i64
+  %.pre.i = load i64** %pVal11.i, align 4
+  br label %for.body9.i
+
+for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
+; CHECK: %for.body9.i
+; CHECK: movb %al, %ch
+  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
+  %add10.i = add i32 %i6.02.i, %div.i
+  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
+  %2 = load i64* %arrayidx12.i, align 4
+  %shr.i = lshr i64 %2, %sh_prom.i
+  %add14.i = add i32 %add10.i, 1
+  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
+  %3 = load i64* %arrayidx16.i, align 4
+  %shl.i = shl i64 %3, %sh_prom18.i
+  %or.i = or i64 %shl.i, %shr.i
+  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
+  store i64 %or.i, i64* %arrayidx19.i, align 4
+  %inc21.i = add i32 %i6.02.i, 1
+  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
+  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
+
+_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
+  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
+  store i64* %0, i64** %4, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
new file mode 100644
index 0000000..a55c77b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+mmx < %s | FileCheck %s
+; <rdar://problem/10106006>
+
+define void @func() nounwind ssp {
+; CHECK:  psrlw %mm0, %mm1
+entry:
+  call void asm sideeffect "psrlw $0, %mm1", "y,~{dirflag},~{fpsr},~{flags}"(i32 8) nounwind
+  unreachable
+
+bb367:                                            ; preds = %entry                                                                                                                 
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-29-CoalescerBug.ll b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
new file mode 100644
index 0000000..bdce853
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O1 <%s
+; PR12138
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.0"
+
+%struct.S0 = type { i8, i32 }
+
+@d = external global [2 x [2 x %struct.S0]], align 4
+@c = external global i32, align 4
+@e = external global i32, align 4
+@b = external global i32, align 4
+@a = external global i32, align 4
+
+define void @fn2() nounwind optsize ssp {
+entry:
+  store i64 0, i64* bitcast ([2 x [2 x %struct.S0]]* @d to i64*), align 4
+  %0 = load i32* @c, align 4
+  %tobool2 = icmp eq i32 %0, 0
+  %1 = load i32* @a, align 4
+  %tobool4 = icmp eq i32 %1, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end, %entry
+  %f.1.0 = phi i32 [ undef, %entry ], [ %sub, %if.end ]
+  %g.0 = phi i64 [ 0, %entry ], [ %ins, %if.end ]
+  %tobool = icmp eq i32 %f.1.0, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %2 = lshr i64 %g.0, 32
+  %conv = trunc i64 %2 to i16
+  br i1 %tobool2, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %for.body
+  store i32 1, i32* @e, align 4
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %for.body
+  %xor.i = xor i16 %conv, 1
+  %p1.lobit.i8 = lshr i64 %g.0, 47
+  %p1.lobit.i8.tr = trunc i64 %p1.lobit.i8 to i16
+  %p1.lobit.i = and i16 %p1.lobit.i8.tr, 1
+  %and.i = and i16 %p1.lobit.i, %xor.i
+  %3 = xor i16 %and.i, 1
+  %sub.conv.i = sub i16 %conv, %3
+  %conv3 = sext i16 %sub.conv.i to i32
+  store i32 %conv3, i32* @b, align 4
+  br i1 %tobool4, label %if.end, label %for.end
+
+if.end:                                           ; preds = %lor.end
+  %mask = and i64 %g.0, -256
+  %ins = or i64 %mask, 1
+  %sub = add nsw i32 %f.1.0, -1
+  br label %for.cond
+
+for.end:                                          ; preds = %lor.end, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
new file mode 100644
index 0000000..ff6be36
--- /dev/null
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+;CHECK: bad_cast
+define void @bad_cast() {
+entry:
+  %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
+  store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
+;CHECK: ret
+  ret void
+}
+
+
+;CHECK: bad_insert
+define void @bad_insert(i32 %t) {
+entry:
+;CHECK: vpinsrd
+  %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
+  store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/GC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
new file mode 100644
index 0000000..b05ed3c
--- /dev/null
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 5068d29..658ccaa 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 7bf527a..8e871f4 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
 
 ; Some of these tests depend on -join-physregs to commute instructions.
 
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index b514cf6..aaedf18 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
 ; CHECK: foo:
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
new file mode 100644
index 0000000..5942788
--- /dev/null
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
+
+declare void @use_arr(i8*)
+declare void @many_params(i32, i32, i32, i32, i32, i32)
+
+define void @test1() nounwind {
+; atom: test1:
+; atom: leal -1052(%esp), %esp
+; atom-NOT: sub
+; atom: call
+; atom: leal 1052(%esp), %esp
+
+; CHECK: test1:
+; CHECK: subl
+; CHECK: call
+; CHECK-NOT: lea
+  %arr = alloca [1024 x i8], align 16
+  %arr_ptr = getelementptr inbounds [1024 x i8]* %arr, i8 0, i8 0
+  call void @use_arr(i8* %arr_ptr)
+  ret void
+}
+
+define void @test2() nounwind {
+; atom: test2:
+; atom: leal -28(%esp), %esp
+; atom: call
+; atom: leal 28(%esp), %esp
+
+; CHECK: test2:
+; CHECK-NOT: lea
+  call void @many_params(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
+  ret void
+}
+
+define void @test3() nounwind {
+; atom: test3:
+; atom: leal -8(%esp), %esp
+; atom: leal 8(%esp), %esp
+
+; CHECK: test3:
+; CHECK-NOT: lea
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 0, i32* %x, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
new file mode 100644
index 0000000..2301dfc
--- /dev/null
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -0,0 +1,28 @@
+; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
+; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+define void @func() nounwind uwtable {
+; atom: imull
+; atom-NOT: movl
+; atom: imull
+; CHECK: imull
+; CHECK: movl
+; CHECK: imull
+entry:
+  %0 = load i32* @b, align 4
+  %1 = load i32* @c, align 4
+  %mul = mul nsw i32 %0, %1
+  store i32 %mul, i32* @a, align 4
+  %2 = load i32* @e, align 4
+  %3 = load i32* @f, align 4
+  %mul1 = mul nsw i32 %2, %3
+  store i32 %mul1, i32* @d, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 6c0bd58..d0a7fe0 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,7 +18,7 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
-; CHECK: vcvtpd2dqy %ymm
+; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
diff --git a/test/CodeGen/X86/avx-fp2int.ll b/test/CodeGen/X86/avx-fp2int.ll
new file mode 100755
index 0000000..a3aadde
--- /dev/null
+++ b/test/CodeGen/X86/avx-fp2int.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate
+
+; CHECK: test1:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+; CHECK: test2:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+
+define <4 x i8> @test1(<4 x double> %d) {
+  %c = fptoui <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
+define <4 x i8> @test2(<4 x double> %d) {
+  %c = fptosi <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index f583914..616601a 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -369,54 +369,6 @@ define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpeqb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpeqd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpeqw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
-define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpgtb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpgtd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpgtw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vpmaddwd
   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
@@ -950,14 +902,6 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpeqq
-  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   ; CHECK: vphminposuw
   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1271,14 +1215,6 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpgtq
-  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
   ; CHECK: movl
@@ -1830,6 +1766,74 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
+
+define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpeqps
+  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpltps
+  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpleps
+  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunordps
+  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneqps
+  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnltps
+  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnleps
+  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpordps
+  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_uqps
+  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngeps
+  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngtps
+  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalseps
+  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_oqps
+  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgeps
+  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgtps
+  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrueps
+  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_osps
+  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmplt_oqps
+  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmple_oqps
+  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunord_sps
+  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_usps
+  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnlt_uqps
+  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnle_uqps
+  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpord_sps
+  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_usps
+  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnge_uqps
+  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngt_uqps
+  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalse_osps
+  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_osps
+  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpge_oqps
+  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgt_oqps
+  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrue_usps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 
@@ -2481,4 +2485,73 @@ define void @test_x86_avx_vzeroupper() {
 }
 declare void @llvm.x86.avx.vzeroupper() nounwind
 
+; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
+
+; CHECK: monitor
+define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: mwait
+define void @mwait(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
+
+; CHECK: sfence
+define void @sfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse.sfence()
+  ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind
 
+; CHECK: lfence
+define void @lfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.lfence()
+  ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind
+
+; CHECK: mfence
+define void @mfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.mfence()
+  ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind
+
+; CHECK: clflush
+define void @clflush(i8* %p) nounwind {
+entry:
+  tail call void @llvm.x86.sse2.clflush(i8* %p)
+  ret void
+}
+declare void @llvm.x86.sse2.clflush(i8*) nounwind
+
+; CHECK: crc32b
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+; CHECK: crc32w
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+; CHECK: crc32l
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index 07a63ef..c9fc66a 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -25,20 +25,26 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
 
 ;;
 ;; The two tests below check that we must fold load + scalar_to_vector
-;; + ins_subvec+ zext into only a single vmovss or vmovsd
+;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
 
-; CHECK: vmovss (%
+; CHECK: mov00
 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
   %val = load float* %ptr
+; CHECK: vinsertps
+; CHECK: vinsertf128
   %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
   ret <8 x float> %i0
+; CHECK: ret
 }
 
-; CHECK: vmovsd (%
+; CHECK: mov01
 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
   %val = load double* %ptr
+; CHECK: vmovlpd
+; CHECK: vinsertf128
   %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
   ret <4 x double> %i0
+; CHECK: ret
 }
 
 ; CHECK: vmovaps  %ymm
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index f36ba7b..7c58820 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -33,7 +33,7 @@ define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
 }
 
 ; UNSAFE: vmaxpd:
-; UNSAFE: vmaxpd %ymm
+; UNSAFE: vmaxpd {{.+}}, %ymm
 define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
   %max_is_x = fcmp oge <4 x double> %x, %y
   %max = select <4 x i1> %max_is_x, <4 x double> %x, <4 x double> %y
@@ -41,7 +41,7 @@ define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vminpd:
-; UNSAFE: vminpd %ymm
+; UNSAFE: vminpd {{.+}}, %ymm
 define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
   %min_is_x = fcmp ole <4 x double> %x, %y
   %min = select <4 x i1> %min_is_x, <4 x double> %x, <4 x double> %y
@@ -49,7 +49,7 @@ define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vmaxps:
-; UNSAFE: vmaxps %ymm
+; UNSAFE: vmaxps {{.+}}, %ymm
 define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
   %max_is_x = fcmp oge <8 x float> %x, %y
   %max = select <8 x i1> %max_is_x, <8 x float> %x, <8 x float> %y
@@ -57,7 +57,7 @@ define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
 }
 
 ; UNSAFE: vminps:
-; UNSAFE: vminps %ymm
+; UNSAFE: vminps {{.+}}, %ymm
 define <8 x float> @vminps(<8 x float> %x, <8 x float> %y) {
   %min_is_x = fcmp ole <8 x float> %x, %y
   %min = select <8 x i1> %min_is_x, <8 x float> %x, <8 x float> %y
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
new file mode 100755
index 0000000..3713a8c
--- /dev/null
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_8i16_to_8i32
+;CHECK: vpmovsxwd
+
+  %B = sext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_4i32_to_4i64
+;CHECK: vpmovsxdq
+
+  %B = sext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
new file mode 100755
index 0000000..5268ec3
--- /dev/null
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i64> @test1(<4 x i64> %a) nounwind {
+ %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i64>%b
+ ; CHECK: test1:
+ ; CHECK: vinsertf128
+ }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index e9392ae..947d79f 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,15 +6,132 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpshufd
+; CHECK: vpermilps
 }
 
 ; rdar://10538417
 define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
 ; CHECK: test2:
-; CHECK: vxorpd
-; CHECK: vmovsd
+; CHECK: vinsertf128
   %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
   %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
   ret <3 x i64> %2
+; CHECK: ret
 }
+
+define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
+  ret <4 x i64> %c
+; CHECK: test3:
+; CHECK: vperm2f128
+; CHECK: ret
+}
+
+define <8 x float> @test4(float %a) nounwind {
+  %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
+  ret <8 x float> %b
+; CHECK: test4:
+; CHECK: vinsertf128
+}
+
+; rdar://10594409
+define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+; CHECK: test5
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast double* %d to <2 x double>*
+  %1 = load <2 x double>* %0, align 16
+; CHECK: test6
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
+}
+
+define <16 x i16> @test7(<4 x i16> %a) nounwind {
+; CHECK: test7
+  %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: ret
+  ret <16 x i16> %b
+}
+
+; CHECK: test8
+define void @test8() {
+entry:
+  %0 = load <16 x i64> addrspace(1)* null, align 128
+  %1 = shufflevector <16 x i64> <i64 undef, i64 undef, i64 0, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i64> %0, <16 x i32> <i32 17, i32 18, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 26>
+  %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 30, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 22, i32 20, i32 15>
+  store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128
+; CHECK: ret
+  ret void
+}
+
+; Extract a value from a shufflevector..
+define i32 @test9(<4 x i32> %a) nounwind {
+; CHECK: test9
+; CHECK: vpextrd
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %r = extractelement <8 x i32> %b, i32 2
+; CHECK: ret
+  ret i32 %r
+}
+
+; Extract a value which is the result of an undef mask.
+define i32 @test10(<4 x i32> %a) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: #
+; CHECK-NEXT: ret
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %r = extractelement <8 x i32> %b, i32 2
+  ret i32 %r
+}
+
+define <4 x float> @test11(<4 x float> %a) nounwind  {
+; check: test11
+; check: vpermilps $27
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x float> @test12(<4 x float>* %a) nounwind  {
+; CHECK: test12
+; CHECK: vpermilps $27, (
+  %tmp0 = load <4 x float>* %a
+  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @test13(<4 x i32> %a) nounwind  {
+; check: test13
+; check: vpshufd $27
+  %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
+; CHECK: test14
+; CHECK: vpshufd $27, (
+  %tmp0 = load <4 x i32>* %a
+  %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+; CHECK: test15
+; CHECK: vpshufd $8
+; CHECK: ret
+define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
+  %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  ret <4 x i32>%x1
+}
+
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index f8522c2..94bcddd 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -32,7 +32,7 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vshufpd $0
+; CHECK: vpermilpd $0
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
new file mode 100755
index 0000000..d007736
--- /dev/null
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_64_32
+; CHECK: pshufd
+  %B = trunc <4 x i64> %A to <4 x i32>
+  ret <4 x i32>%B
+}
+define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_32_16
+; CHECK: pshufb
+  %B = trunc <8 x i32> %A to <8 x i16>
+  ret <8 x i16>%B
+}
+
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
index fcd7bb6..20f5345 100644
--- a/test/CodeGen/X86/avx-unpack.ll
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -123,3 +123,39 @@ entry:
   %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i64> %shuffle.i
 }
+
+; CHECK: vpunpckhwd
+; CHECK: vpunpckhwd
+; CHECK: vinsertf128
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+; CHECK: vpunpcklwd
+; CHECK: vinsertf128
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+; CHECK: vpunpckhbw
+; CHECK: vinsertf128
+define <32 x i8> @unpackhbw_undef(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+; CHECK: vpunpcklbw
+; CHECK: vinsertf128
+define <32 x i8> @unpacklbw_undef(<32 x i8> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 8fbd02a..5bf9f4f 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -89,3 +89,13 @@ define <4 x i32> @H(<4 x i32> %a) {
   ret <4 x i32> %x
 }
 
+; CHECK: _I
+; CHECK-NOT: vbroadcastsd (%
+; CHECK: ret
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index cda1331..def2212 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -56,3 +56,51 @@ define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
   %2 = add <8 x i32> %1, %v1
   ret <8 x i32> %2
 }
+
+; CHECK: insert_pd
+define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vinsertf128
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+
+; CHECK: insert_undef_pd
+define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+; CHECK: insert_ps
+define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+
+; CHECK: insert_undef_ps
+define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+; CHECK: insert_si
+define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+
+; CHECK: insert_undef_si
+define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 3d521e7..9707cd9 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,7 +45,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK-NOT: vpermilps
+; CHECK: vpermilps
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index 0ccbc59..45883b7 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -7,7 +7,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK: vshufps  $-53, (%
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
 define <8 x float> @A2(<8 x float>* %a, <8 x float>* %b) nounwind uwtable readnone ssp {
 entry:
   %a2 = load <8 x float>* %a
@@ -16,6 +16,22 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: vshufps  $-53, %ymm
+define <8 x i32> @A3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x i32> @A4(<8 x i32>* %a, <8 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x i32>* %a
+  %b2 = load <8 x i32>* %b
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
 ; CHECK: vshufpd  $10, %ymm
 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
@@ -23,7 +39,7 @@ entry:
   ret <4 x double> %shuffle
 }
 
-; CHECK: vshufpd  $10, (%
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
 define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
 entry:
   %a2 = load <4 x double>* %a
@@ -32,6 +48,22 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: vshufpd  $10, %ymm
+define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %b2 = load <4 x i64>* %b
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
 ; CHECK: vshufps  $-53, %ymm
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -59,3 +91,67 @@ entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
   ret <4 x double> %shuffle
 }
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x float> @A128(<4 x float> %a, <4 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x float> @A2128(<4 x float>* %a, <4 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x float>* %a
+  %b2 = load <4 x float>* %b
+  %shuffle = shufflevector <4 x float> %a2, <4 x float> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x i32> @A3128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x i32> @A4128(<4 x i32>* %a, <4 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i32>* %a
+  %b2 = load <4 x i32>* %b
+  %shuffle = shufflevector <4 x i32> %a2, <4 x i32> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x double> @B128(<2 x double> %a, <2 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x double> @B2128(<2 x double>* %a, <2 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x double>* %a
+  %b2 = load <2 x double>* %b
+  %shuffle = shufflevector <2 x double> %a2, <2 x double> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x i64> @B3128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x i64> @B4128(<2 x i64>* %a, <2 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x i64>* %a
+  %b2 = load <2 x i64>* %b
+  %shuffle = shufflevector <2 x i64> %a2, <2 x i64> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
diff --git a/test/CodeGen/X86/avx-win64-args.ll b/test/CodeGen/X86/avx-win64-args.ll
new file mode 100755
index 0000000..85b2634
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64-args.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+declare <8 x float> @foo(<8 x float>, i32)
+
+define <8 x float> @test1(<8 x float> %x, <8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+; CHECK: test1
+; CHECK: leaq {{.*}}, %rcx
+; CHECK: movl {{.*}}, %edx
+; CHECK: call
+; CHECK: ret
+  %x1 = fadd  <8 x float>  %x, %y
+  %call = call  <8 x float> @foo(<8 x float> %x1, i32 1) nounwind
+  %y1 = fsub  <8 x float>  %call, %y
+  ret <8 x float> %y1
+}
+
diff --git a/test/CodeGen/X86/avx-win64.ll b/test/CodeGen/X86/avx-win64.ll
new file mode 100644
index 0000000..dc6bd59
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11862
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win32"
+
+; This function has live ymm registers across a win64 call.
+; The ymm6-15 registers are still call-clobbered even if xmm6-15 are callee-saved.
+; Verify that callee-saved registers are not being used.
+
+; CHECK: f___vyf
+; CHECK: pushq %rbp
+; CHECK: vmovmsk
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: call
+; Two reloads. It's OK if these get folded.
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: blend
+define <8 x float> @f___vyf(<8 x float> %x, <8 x i32> %__mask) nounwind readnone {
+allocas:
+  %bincmp = fcmp oeq <8 x float> %x, zeroinitializer
+  %val_to_boolvec32 = sext <8 x i1> %bincmp to <8 x i32>
+  %"~test" = xor <8 x i32> %val_to_boolvec32, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %"internal_mask&function_mask25" = and <8 x i32> %"~test", %__mask
+  %floatmask.i46 = bitcast <8 x i32> %"internal_mask&function_mask25" to <8 x float>
+  %v.i47 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i46) nounwind readnone
+  %any_mm_cmp27 = icmp eq i32 %v.i47, 0
+  br i1 %any_mm_cmp27, label %safe_if_after_false, label %safe_if_run_false
+
+safe_if_run_false:                                ; preds = %allocas
+  %binop = fadd <8 x float> %x, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  %calltmp = call <8 x float> @f___vyf(<8 x float> %binop, <8 x i32> %"internal_mask&function_mask25")
+  %binop33 = fadd <8 x float> %calltmp, %x
+  %mask_as_float.i48 = bitcast <8 x i32> %"~test" to <8 x float>
+  %blend.i52 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %x, <8 x float> %binop33, <8 x float> %mask_as_float.i48) nounwind
+  br label %safe_if_after_false
+
+safe_if_after_false:                              ; preds = %safe_if_run_false, %allocas
+  %0 = phi <8 x float> [ %x, %allocas ], [ %blend.i52, %safe_if_run_false ]
+  ret <8 x float> %0
+}
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
new file mode 100755
index 0000000..b630e9d
--- /dev/null
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_8i16_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: ret
+
+  %B = zext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_4i32_to_4i64
+;CHECK: vpunpckhdq
+;CHECK: ret
+
+  %B = zext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+;CHECK: zext_8i8_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: vpunpcklwd
+;CHECK: vinsertf128
+;CHECK: ret
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
+}
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index a0f351d..1fb41c0 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -72,54 +72,6 @@ define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) {
 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
 
 
-define <32 x i8> @test_x86_avx2_pcmpeq_b(<32 x i8> %a0, <32 x i8> %a1) {
-  ; CHECK: vpcmpeqb
-  %res = call <32 x i8> @llvm.x86.avx2.pcmpeq.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx2.pcmpeq.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx2_pcmpeq_d(<8 x i32> %a0, <8 x i32> %a1) {
-  ; CHECK: vpcmpeqd
-  %res = call <8 x i32> @llvm.x86.avx2.pcmpeq.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
-  ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pcmpeq.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-
-define <16 x i16> @test_x86_avx2_pcmpeq_w(<16 x i16> %a0, <16 x i16> %a1) {
-  ; CHECK: vpcmpeqw
-  %res = call <16 x i16> @llvm.x86.avx2.pcmpeq.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
-  ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pcmpeq.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-
-define <32 x i8> @test_x86_avx2_pcmpgt_b(<32 x i8> %a0, <32 x i8> %a1) {
-  ; CHECK: vpcmpgtb
-  %res = call <32 x i8> @llvm.x86.avx2.pcmpgt.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx2.pcmpgt.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx2_pcmpgt_d(<8 x i32> %a0, <8 x i32> %a1) {
-  ; CHECK: vpcmpgtd
-  %res = call <8 x i32> @llvm.x86.avx2.pcmpgt.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
-  ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pcmpgt.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-
-define <16 x i16> @test_x86_avx2_pcmpgt_w(<16 x i16> %a0, <16 x i16> %a1) {
-  ; CHECK: vpcmpgtw
-  %res = call <16 x i16> @llvm.x86.avx2.pcmpgt.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
-  ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pcmpgt.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-
 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
   ; CHECK: vpmaddwd
   %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
@@ -553,14 +505,6 @@ define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
 
 
-define <4 x i64> @test_x86_avx2_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1) {
-  ; CHECK: vpcmpeqq
-  %res = call <4 x i64> @llvm.x86.avx2.pcmpeq.q(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pcmpeq.q(<4 x i64>, <4 x i64>) nounwind readnone
-
-
 define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
   ; CHECK: vpmaxsb
   %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
@@ -729,14 +673,6 @@ define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
 
 
-define <4 x i64> @test_x86_avx2_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1) {
-  ; CHECK: vpcmpgtq
-  %res = call <4 x i64> @llvm.x86.avx2.pcmpgt.q(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pcmpgt.q(<4 x i64>, <4 x i64>) nounwind readnone
-
-
 define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
   ; CHECK: vbroadcasti128
   %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
new file mode 100644
index 0000000..0768aae
--- /dev/null
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
+
+define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
+; CHECK: vmovntps
+  %cast = bitcast i8* %B to <8 x float>*
+  %A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
+; CHECK: vmovntdq
+  %cast1 = bitcast i8* %B to <4 x i64>*
+  %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
+  store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: vmovntpd
+  %cast2 = bitcast i8* %B to <4 x double>*
+  %C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
+  store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
new file mode 100644
index 0000000..53b9da3
--- /dev/null
+++ b/test/CodeGen/X86/avx2-palignr.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 undef, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+;
+define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: vpalignr $8
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 10, i32 11, i32 undef, i32 1, i32 14, i32 15, i32 4, i32 5>
+  ret <8 x i32> %C
+}
+
+define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test5:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 undef, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test6:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test7:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %C
+}
+
+define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
+; CHECK: test8:
+; CHECK: palignr $5
+  %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
+  ret <32 x i8> %C
+}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
index b6cf54e..1f192a0 100644
--- a/test/CodeGen/X86/avx2-shift.ll
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -231,7 +231,7 @@ define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
   %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %B
 ; CHECK: sra_v32i8_7:
-; CHECK: vxorps
+; CHECK: vpxor
 ; CHECK: vpcmpgtb
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
index aa97308..6d17443 100644
--- a/test/CodeGen/X86/avx2-unpack.ll
+++ b/test/CodeGen/X86/avx2-unpack.ll
@@ -55,3 +55,32 @@ entry:
   %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
   ret <32 x i8> %shuffle.i
 }
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1_undef(<8 x i32> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1_undef(<4 x i64> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 142be33..fbabb15 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -140,3 +140,13 @@ entry:
   %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
   ret <4 x i64> %q3
 }
+
+; make sure that we still don't support broadcast double into 128-bit vector
+; this used to crash
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
new file mode 100644
index 0000000..3a10c70
--- /dev/null
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+
+
+; In this test we check that sign-extend of the mask bit is performed by
+; shifting the needed bit to the MSB, and not using shl+sra.
+
+;CHECK: vsel_float
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+;CHECK: vsel_4xi8
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
+  ret <4 x i8> %vsel
+}
+
+
+; We do not have native support for v8i16 blends and we have to use the
+; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r
+; reduce the mask in this case.
+;CHECK: vsel_8xi16
+;CHECK: psllw
+;CHECK: psraw
+;CHECK: pblendvb
+;CHECK: ret
+define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
+  ret <8 x i16> %vsel
+}
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index cde9b48..43c47c0 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -1,40 +1,65 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+bmi,+bmi2 | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-       %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
-       ret i32 %tmp
+declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
+  ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: tzcntl
 }
 
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-       %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
-       ret i16 %tmp
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
+  ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: tzcntw
 }
 
-declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
-
-define i64 @t3(i64 %x) nounwind  {
-       %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
-       ret i64 %tmp
+define i32 @t3(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
+  ret i32 %tmp
 ; CHECK: t3:
+; CHECK: tzcntl
+}
+
+define i64 @t4(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
+  ret i64 %tmp
+; CHECK: t4:
 ; CHECK: tzcntq
 }
 
-declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+define i8 @t5(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: t5:
+; CHECK: tzcntl
+}
 
-define i8 @t4(i8 %x) nounwind  {
-       %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
-       ret i8 %tmp
-; CHECK: t4:
+define i16 @t6(i16 %x) nounwind  {
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: t6:
 ; CHECK: tzcntw
 }
 
-declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: t7:
+; CHECK: tzcntl
+}
+
+define i64 @t8(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: t8:
+; CHECK: tzcntq
+}
 
 define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 5cdc100..44670c8 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s
+
 ; rdar://7475489
 
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
@@ -106,3 +107,4 @@ bb2:                                              ; preds = %entry, %bb1
   %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
   ret float %.0
 }
+
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index b060369..2d39901 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep add | not grep 16
+; RUN: llc < %s -mcpu=generic -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
new file mode 100644
index 0000000..7420ce7
--- /dev/null
+++ b/test/CodeGen/X86/cfstring.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; Make sure that the string ends up the the correct section.
+
+; CHECK:        .section __TEXT,__cstring
+; CHECK-NEXT: l_.str3:
+
+; CHECK:        .section  __DATA,__cfstring
+; CHECK-NEXT:   .align  4
+; CHECK-NEXT: L__unnamed_cfstring_4:
+; CHECK-NEXT:   .quad  ___CFConstantStringClassReference
+; CHECK-NEXT:   .long  1992
+; CHECK-NEXT:   .space  4
+; CHECK-NEXT:   .quad  l_.str3
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .space  4
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 9b26efd..763079f 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,48 +1,141 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
-	ret i32 %tmp
-; CHECK: t1:
-; CHECK: bsrl
-; CHECK: cmov
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i8 @cttz_i8(i8 %x)  {
+  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: cttz_i8:
+; CHECK: bsfl
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 
+define i16 @cttz_i16(i16 %x)  {
+  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: cttz_i16:
+; CHECK: bsfw
+; CHECK-NOT: cmov
+; CHECK: ret
+}
 
-define i32 @t2(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
-	ret i32 %tmp
-; CHECK: t2:
+define i32 @cttz_i32(i32 %x)  {
+  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: cttz_i32:
 ; CHECK: bsfl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @cttz_i64(i64 %x)  {
+  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: cttz_i64:
+; CHECK: bsfq
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone 
+define i8 @ctlz_i8(i8 %x) {
+entry:
+  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
+  ret i8 %tmp2
+; CHECK: ctlz_i8:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $7,
+; CHECK: ret
+}
 
-define i16 @t3(i16 %x, i16 %y) nounwind  {
+define i16 @ctlz_i16(i16 %x) {
 entry:
-        %tmp1 = add i16 %x, %y
-	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true )		; <i16> [#uses=1]
-	ret i16 %tmp2
-; CHECK: t3:
+  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+  ret i16 %tmp2
+; CHECK: ctlz_i16:
 ; CHECK: bsrw
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorl $15,
+; CHECK: ret
+}
+
+define i32 @ctlz_i32(i32 %x) {
+  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: ctlz_i32:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+}
+
+define i64 @ctlz_i64(i64 %x) {
+  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: ctlz_i64:
+; CHECK: bsrq
+; CHECK-NOT: cmov
+; CHECK: xorq $63,
+; CHECK: ret
 }
 
-declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone 
+define i32 @ctlz_i32_cmov(i32 %n) {
+entry:
+; Generate a cmov to handle zero inputs when necessary.
+; CHECK: ctlz_i32_cmov:
+; CHECK: bsrl
+; CHECK: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  ret i32 %tmp1
+}
 
+define i32 @ctlz_i32_fold_cmov(i32 %n) {
+entry:
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
-
-define i32 @t4(i32 %n) nounwind {
-entry:
-; CHECK: t4:
+; CHECK: ctlz_i32_fold_cmov:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
+; CHECK: xorl $31,
 ; CHECK: ret
   %or = or i32 %n, 1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true)
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
   ret i32 %tmp1
 }
+
+define i32 @ctlz_bsr(i32 %n) {
+entry:
+; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
+; the most significant bit, which is what 'bsr' does natively.
+; CHECK: ctlz_bsr:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
+
+define i32 @ctlz_bsr_cmov(i32 %n) {
+entry:
+; Same as ctlz_bsr, but ensure this happens even when there is a potential
+; zero.
+; CHECK: ctlz_bsr_cmov:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
diff --git a/test/CodeGen/X86/cmpxchg16b.ll b/test/CodeGen/X86/cmpxchg16b.ll
index ba1c4ef..edbd0bc 100644
--- a/test/CodeGen/X86/cmpxchg16b.ll
+++ b/test/CodeGen/X86/cmpxchg16b.ll
@@ -3,7 +3,7 @@
 ; Basic 128-bit cmpxchg
 define void @t1(i128* nocapture %p) nounwind ssp {
 entry:
-; CHECK movl	$1, %ebx
+; CHECK: movl	$1, %ebx
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg16b
   %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index afe1729..c35935f 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp7
+;CHECK-NEXT: Ltmp5
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dec-eflags-lower.ll b/test/CodeGen/X86/dec-eflags-lower.ll
index 458160a..190819f 100644
--- a/test/CodeGen/X86/dec-eflags-lower.ll
+++ b/test/CodeGen/X86/dec-eflags-lower.ll
@@ -2,6 +2,7 @@
 
 %struct.obj = type { i64 }
 
+; CHECK: _Z7releaseP3obj
 define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
 entry:
 ; CHECK: decq	(%{{rdi|rcx}})
@@ -22,8 +23,45 @@ return:                                           ; preds = %entry, %if.end
   ret void
 }
 
+@c = common global i64 0, align 8
+@a = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+@b = common global i32 0, align 4
+
+; CHECK: test
+define i32 @test() nounwind uwtable ssp {
+entry:
+; CHECK: decq
+; CHECK-NOT: decq
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %dec.i, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+; CHECK: test2
+define i32 @test2() nounwind uwtable ssp {
+entry:
+; CHECK-NOT: decq ({{.*}})
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %0, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
 declare void @free(i8* nocapture) nounwind
 
 !0 = metadata !{metadata !"long", metadata !1}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/X86/dg.exp b/test/CodeGen/X86/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 87c1be5..e577ecb 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index 874c53a..ac5174d 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -6,14 +6,11 @@ entry:
   unreachable
 }
 ; CHECK-NO-FP:     _func:
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_endproc
 
 ; CHECK-FP:      _func:
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_startproc
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: pushq %rbp
@@ -25,5 +22,4 @@ entry:
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
 ; CHECK-FP-NEXT: nop
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 52dcb61..0f16a64 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
+; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index e151821..e4982f0 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 193e436..8ac15cd 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -regalloc=basic -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
 ; PR4684
 
 target datalayout =
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19972f7..b9598bb 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -fast-isel -O0 -mcpu=generic -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
 
 ; This should use flds to set the return value.
 ; CHECK: test0:
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 8391860..c88d529 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -99,7 +99,6 @@ define void @load_store_i1(i1* %p, i1* %q) nounwind {
   ret void
 }
 
-
 @crash_test1x = external global <2 x i32>, align 8
 
 define void @crash_test1() nounwind ssp {
@@ -108,3 +107,13 @@ define void @crash_test1() nounwind ssp {
   ret void
 }
 
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+define i64* @life() nounwind {
+  %a1 = alloca i64*, align 8
+  %a2 = bitcast i64** %a1 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %a2) nounwind      
+  %a3 = load i64** %a1, align 8
+  ret i64* %a3
+}
+
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
index 2ffcb96..81511a3 100644
--- a/test/CodeGen/X86/fltused.ll
+++ b/test/CodeGen/X86/fltused.ll
@@ -4,6 +4,8 @@
 
 ; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
 ; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
 
 @.str = private constant [4 x i8] c"%f\0A\00"
 
diff --git a/test/CodeGen/X86/fltused_function_pointer.ll b/test/CodeGen/X86/fltused_function_pointer.ll
new file mode 100644
index 0000000..cfe484a
--- /dev/null
+++ b/test/CodeGen/X86/fltused_function_pointer.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @foo(i32 (i8*, ...)* %f) nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* %f(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index bd94c13..5ed03ef 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -6,6 +6,20 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %
   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+  ; CHECK: vfmaddss (%{{.*}})
+  %x = load float *%a2
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
+  %x = load float *%a1
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
 declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
 define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
@@ -13,6 +27,20 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double
   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+  ; CHECK: vfmaddsd (%{{.*}})
+  %x = load double *%a2
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
+  %x = load double *%a1
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
 declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
 define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
@@ -20,6 +48,18 @@ define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %
   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+  ; CHECK: vfmaddps (%{{.*}})
+  %x = load <4 x float>* %a2
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
 declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
 define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
@@ -27,6 +67,18 @@ define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double
   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+  ; CHECK: vfmaddpd (%{{.*}})
+  %x = load <2 x double>* %a2
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
 declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
 define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 9f79f77..93baa0e 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,21 +1,77 @@
-; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @t1(i8* %X, i32 %i) {
+; CHECK: t1:
+; CHECK-NOT: and
+; CHECK: movzbl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
-	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
-	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %tmp2 = shl i32 %i, 2
+  %tmp4 = and i32 %tmp2, 1020
+  %tmp7 = getelementptr i8* %X, i32 %tmp4
+  %tmp78 = bitcast i8* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
 }
 
 define i32 @t2(i16* %X, i32 %i) {
+; CHECK: t2:
+; CHECK-NOT: and
+; CHECK: movzwl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %tmp2 = shl i32 %i, 1
+  %tmp4 = and i32 %tmp2, 131070
+  %tmp7 = getelementptr i16* %X, i32 %tmp4
+  %tmp78 = bitcast i16* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
+}
+
+define i32 @t3(i16* %i.ptr, i32* %arr) {
+; This case is tricky. The lshr followed by a gep will produce a lshr followed
+; by an and to remove the low bits. This can be simplified by doing the lshr by
+; a greater constant and using the addressing mode to scale the result back up.
+; To make matters worse, because of the two-phase zext of %i and their reuse in
+; the function, the DAG can get confusing trying to re-use both of them and
+; prevent easy analysis of the mask in order to match this.
+; CHECK: t3:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %val.ptr = getelementptr inbounds i32* %arr, i32 %index
+  %val = load i32* %val.ptr
+  %sum = add i32 %val, %i.zext
+  ret i32 %sum
+}
+
+define i32 @t4(i16* %i.ptr, i32* %arr) {
+; A version of @t3 that has more zero extends and more re-use of intermediate
+; values. This exercise slightly different bits of canonicalization.
+; CHECK: t4:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
-	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
-	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %sum.1 = add i32 %val, %i.zext
+  %sum.2 = add i32 %sum.1, %index
+  ret i32 %sum.2
 }
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 5525af2..e03cb7e 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index a7b3332..9cf4607 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -8,10 +8,7 @@
 ; RAGreedy defeats the test by splitting live ranges.
 
 ; Constant pool all-ones vector:
-; CHECK: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
+; CHECK: .space 16,255
 
 ; No pcmpeqd instructions, everybody uses the constant pool.
 ; CHECK: program_1:
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
new file mode 100644
index 0000000..d89e9dc
--- /dev/null
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-apple-darwin %s -o - | FileCheck %s
+@_ZTIi = external constant i8*
+
+define i32 @main() uwtable optsize ssp {
+entry:
+  invoke void @_Z1fv() optsize
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  br label %eh.resume
+
+try.cont:
+  ret i32 0
+
+eh.resume:
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z1fv() optsize
+
+declare i32 @__gxx_personality_v0(...)
+
+; CHECK: Leh_func_end0:
+; CHECK: GCC_except_table0
+; CHECK: = Leh_func_end0-
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index c9a1c1c..2249618 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin | FileCheck %s
 
 ; There should be no stack manipulations between the inline asm and ret.
 ; CHECK: test1
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index 617bd39..fca68ba 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -mattr=+avx
 ; rdar://7066579
 
 	%0 = type { i64, i64, i64, i64, i64 }		; type %0
@@ -27,3 +27,11 @@ entry:
   %0 = tail call { i8, i8, i8, i8, i8 } asm "foo $1, $2, $3, $4, $1\0Axchgb ${0:b}, ${0:h}", "=q,={ax},={bx},={cx},={dx},0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i8 %val, i8 %a, i8 %b, i8 %c, i8 %d) nounwind
   ret void
 }
+
+; rdar://10614894
+define <8 x float> @test5(<8 x float> %a, <8 x float> %b) nounwind {
+entry:
+  %0 = tail call <8 x float> asm "vperm2f128 $3, $2, $1, $0", "=x,x,x,i,~{dirflag},~{fpsr},~{flags}"(<8 x float> %a, <8 x float> %b, i32 16) nounwind
+  ret <8 x float> %0
+}
+
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index de6500d..91576fb 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic | FileCheck %s
 ; rdar://6992609
 
 ; CHECK: movl [[EDX:%e..]], 4(%esp)
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 7f2bd75..e51e61d 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 -enable-lsr-nested -o %t
 ; RUN: not grep inc %t
 ; RUN: grep dec %t | count 2
 ; RUN: grep addq %t | count 12
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 5e8e162..dbd133c 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 | grep jns
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
 
 define i32 @f(i32 %X) {
 entry:
+; CHECK: f:
+; CHECK: jns
 	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
 	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tmp, label %cond_true, label %cond_next
@@ -18,3 +20,15 @@ cond_next:		; preds = %cond_true, %entry
 declare i32 @bar(...)
 
 declare i32 @baz(...)
+
+; rdar://10633221
+define i32 @g(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: g:
+; CHECK-NOT: test
+; CHECK: cmovs
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, 0
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
new file mode 100644
index 0000000..2026472
--- /dev/null
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -0,0 +1,56 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i64 @test1(i32 %xx, i32 %test) nounwind {
+  %conv = zext i32 %xx to i64
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %conv, %sh_prom
+  ret i64 %shl
+; CHECK: test1:
+; CHECK: shll	%cl, %eax
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test2(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %xx, %sh_prom
+  ret i64 %shl
+; CHECK: test2:
+; CHECK: shll	%cl, %esi
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+; CHECK: orl	%esi, %edx
+; CHECK: shll	%cl, %eax
+}
+
+define i64 @test3(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = lshr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test3:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test4(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = ashr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test4:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: sarl	%cl, %edx
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
new file mode 100644
index 0000000..b05ed3c
--- /dev/null
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/log2_not_readnone.ll b/test/CodeGen/X86/log2_not_readnone.ll
new file mode 100644
index 0000000..5620835
--- /dev/null
+++ b/test/CodeGen/X86/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=i386-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: calll log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: calll exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/X86/lzcnt.ll b/test/CodeGen/X86/lzcnt.ll
index adfc38b..2faa24a 100644
--- a/test/CodeGen/X86/lzcnt.ll
+++ b/test/CodeGen/X86/lzcnt.ll
@@ -1,14 +1,17 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+lzcnt | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
-	ret i32 %tmp
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
+	ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: lzcntl
 }
 
-declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
 	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
@@ -16,23 +19,44 @@ define i16 @t2(i16 %x) nounwind  {
 ; CHECK: lzcntw
 }
 
-declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+define i32 @t3(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
+	ret i32 %tmp
+; CHECK: t3:
+; CHECK: lzcntl
+}
 
-define i64 @t3(i64 %x) nounwind  {
+define i64 @t4(i64 %x) nounwind  {
 	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
-; CHECK: t3:
+; CHECK: t4:
 ; CHECK: lzcntq
 }
 
-declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
-
-define i8 @t4(i8 %x) nounwind  {
-	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
+define i8 @t5(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true )
 	ret i8 %tmp
-; CHECK: t4:
+; CHECK: t5:
+; CHECK: lzcntl
+}
+
+define i16 @t6(i16 %x) nounwind  {
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+	ret i16 %tmp
+; CHECK: t6:
 ; CHECK: lzcntw
 }
 
-declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+	ret i32 %tmp
+; CHECK: t7:
+; CHECK: lzcntl
+}
 
+define i64 @t8(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+	ret i64 %tmp
+; CHECK: t8:
+; CHECK: lzcntq
+}
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
new file mode 100644
index 0000000..54fa01c
--- /dev/null
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
+
+; After tail duplication, two copies in an early exit BB can be cancelled out.
+; rdar://10640363
+define i32 @t1(i32 %a, i32 %b) nounwind  {
+entry:
+; CHECK: t1:
+; CHECK: jne
+  %cmp1 = icmp eq i32 %b, 0
+  br i1 %cmp1, label %while.end, label %while.body
+
+; CHECK: BB
+; CHECK-NOT: mov
+; CHECK: ret
+
+while.body:                                       ; preds = %entry, %while.body
+  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
+  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
+  %rem = srem i32 %a.addr.03, %b.addr.02
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
+  ret i32 %a.addr.0.lcssa
+}
+
+; Two movdqa (from phi-elimination) in the entry BB cancels out.
+; rdar://10428165
+define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK-NOT: movdqa
+  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+  ret <8 x i16> %tmp8
+}
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index d819fc8..a757cde 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
 ; rdar://7610418
 
 %ptr = type { i8* }
@@ -77,3 +77,25 @@ bb.nph743.us:                                     ; preds = %for.body53.us, %if.
 sw.bb307:                                         ; preds = %sw.bb, %entry
   ret void
 }
+
+; CSE physical register defining instruction across MBB boundary.
+; rdar://10660865
+define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: cross_mbb_phys_cse:
+; CHECK: cmpl
+; CHECK: ja
+  %cmp = icmp ugt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+; CHECK-NOT: cmpl
+; CHECK: sbbl
+  %cmp1 = icmp ult i32 %a, %b
+  %. = sext i1 %cmp1 to i32
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 3a4acb8..a7b036e 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
diff --git a/test/CodeGen/X86/mcinst-avx-lowering.ll b/test/CodeGen/X86/mcinst-avx-lowering.ll
new file mode 100644
index 0000000..41f96e8
--- /dev/null
+++ b/test/CodeGen/X86/mcinst-avx-lowering.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10 -mattr=avx -show-mc-encoding < %s | FileCheck %s
+
+define i64 @t1(double %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t1
+  %0 = bitcast double %d_ivar to i64
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+  ret i64 %0
+}
+
+define double @t2(i64 %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t2
+  %0 = bitcast i64 %d_ivar to double
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+  ret double %0
+}
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index f43b0bf..86c6862 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -79,3 +79,16 @@ entry:
 ; LINUX movq
 }
 
+
+@.str = private unnamed_addr constant [30 x i8] c"\00aaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+define void @test5(i8* nocapture %C) nounwind uwtable ssp {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
+  ret void
+
+; DARWIN: movabsq	$7016996765293437281
+; DARWIN: movabsq	$7016996765293437184
+}
+
+
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
index 3ac0e4e..8b7200d 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index c3f412e..92850f2 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -486,10 +486,6 @@ declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32,
 
 declare i8* @_Znwm(i32)
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
 
 declare void @_ZdlPv(i8*) nounwind
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
new file mode 100644
index 0000000..7c0e82f
--- /dev/null
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -0,0 +1,11 @@
+; Check the MCNullStreamer operates correctly, at least on a minimal test case.
+;
+; RUN: llc -filetype=null -o %t -march=x86 %s
+
+define void @f0()  {
+  ret void
+}
+
+define void @f1() {
+  ret void
+}
diff --git a/test/CodeGen/X86/objc-gc-module-flags.ll b/test/CodeGen/X86/objc-gc-module-flags.ll
new file mode 100644
index 0000000..8cb2c03
--- /dev/null
+++ b/test/CodeGen/X86/objc-gc-module-flags.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHECK:        .section  __DATA,__objc_imageinfo,regular,no_dead_strip
+; CHECK-NEXT: L_OBJC_IMAGE_INFO:
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .long  2
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 1, metadata !"Objective-C Garbage Collection", i32 2}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 082d20c..8f1eabd 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=basic < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
 
 ; ModuleID = 'ts.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/CodeGen/X86/odr_comdat.ll b/test/CodeGen/X86/odr_comdat.ll
new file mode 100644
index 0000000..547334c
--- /dev/null
+++ b/test/CodeGen/X86/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=X86LINUX
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; X86LINUX:   .section        .bss._ZN1CIiE1iE,"aGw",@nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; X86LINUX:   .section        .data._ZN1CIiE1jE,"aGw",@progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index e42aa9d..d092916 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
index 528c4bc..a379980 100644
--- a/test/CodeGen/X86/peep-test-3.ll
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -post-RA-scheduler=false | FileCheck %s
 ; rdar://7226797
 
 ; LLVM should omit the testl and use the flags result from the orl.
diff --git a/test/CodeGen/X86/personality_size.ll b/test/CodeGen/X86/personality_size.ll
new file mode 100644
index 0000000..30a5d39
--- /dev/null
+++ b/test/CodeGen/X86/personality_size.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=x86_64-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=i386-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  ret void
+
+return:                                           ; preds = %eh_then, %entry
+  ret void
+}
+
+declare void @_Z1gv()
+
+declare i32 @__gxx_personality_v0(...)
+
+; X64:      .size	DW.ref.__gxx_personality_v0, 8
+; X64:      .quad	__gxx_personality_v0
+
+; X32:      .size	DW.ref.__gxx_personality_v0, 4
+; X32:      .long	__gxx_personality_v0
+
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index fb60ac2..fc06309 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
 
 @ptr = external global i32* 
 @dst = external global i32 
diff --git a/test/CodeGen/X86/pr11202.ll b/test/CodeGen/X86/pr11202.ll
index 2b26a69..13070d1 100644
--- a/test/CodeGen/X86/pr11202.ll
+++ b/test/CodeGen/X86/pr11202.ll
@@ -15,5 +15,5 @@ l2:                                               ; preds = %l1
   br label %l1
 }
 
-; CHECK: .Ltmp1:                                 # Address of block that was removed by CodeGen
-; CHECK: .quad	.Ltmp1
+; CHECK: .Ltmp0:                                 # Address of block that was removed by CodeGen
+; CHECK: .quad	.Ltmp0
diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll
index b8964f2..8b30dc7 100644
--- a/test/CodeGen/X86/promote.ll
+++ b/test/CodeGen/X86/promote.ll
@@ -29,3 +29,14 @@ entry:
   ret i32 0
 ; CHECK: ret
 }
+
+; CHECK: bitcast_widen
+define <2 x float> @bitcast_widen(<4 x i32> %in) nounwind readnone {
+entry:
+; CHECK-NOT: pshufd
+ %x = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %y = bitcast <2 x i32> %x to <2 x float>
+ ret <2 x float> %y
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index d936971..d99a7a4 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 
 ; First without noredzone.
 ; CHECK: f0:
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index 9557d17..f092163 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/reghinting.ll b/test/CodeGen/X86/reghinting.ll
index 87f65ed..6759115 100644
--- a/test/CodeGen/X86/reghinting.ll
+++ b/test/CodeGen/X86/reghinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-macosx | FileCheck %s
 ; PR10221
 
 ;; The registers %x and %y must both spill across the finit call.
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
new file mode 100644
index 0000000..5ce08aa
--- /dev/null
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define i32 @test_basic(i32 %l) {
+        %mem = alloca i32, i32 %l
+        call void @dummy_use (i32* %mem, i32 %l)
+        %terminate = icmp eq i32 %l, 0
+        br i1 %terminate, label %true, label %false
+
+true:
+        ret i32 0
+
+false:
+        %newlen = sub i32 %l, 1
+        %retvalue = call i32 @test_basic(i32 %newlen)
+        ret i32 %retvalue
+
+; X32:      test_basic:
+
+; X32:      cmpl %gs:48, %esp
+; X32-NEXT: ja      .LBB0_2
+
+; X32:      pushl $4
+; X32-NEXT: pushl $12
+; X32-NEXT: calll __morestack
+; X32-NEXT: ret
+
+; X32:      movl %esp, %eax
+; X32-NEXT: subl %ecx, %eax
+; X32-NEXT: cmpl %eax, %gs:48
+
+; X32:      movl %eax, %esp
+
+; X32:      subl $12, %esp
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll __morestack_allocate_stack_space
+; X32-NEXT: addl $16, %esp
+
+; X64:      test_basic:
+
+; X64:      cmpq %fs:112, %rsp
+; X64-NEXT: ja      .LBB0_2
+
+; X64:      movabsq $24, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+
+; X64:      movq %rsp, %rdi
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: cmpq %rdi, %fs:112
+
+; X64:      movq %rdi, %rsp
+
+; X64:      movq %rax, %rdi
+; X64-NEXT: callq __morestack_allocate_stack_space
+; X64-NEXT: movq %rax, %rdi
+
+}
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 4f529c1..5407b87 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,61 +1,97 @@
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
+
+; We used to crash with filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj
+
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-MinGW
+; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD
+
+; X64-Solaris: Segmented stacks not supported on this platform
+; X64-MinGW: Segmented stacks not supported on this platform
+; X32-FreeBSD: Segmented stacks not supported on FreeBSD i386
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
-        %mem = alloca i32, i32 %l
-        call void @dummy_use (i32* %mem, i32 %l)
-        %terminate = icmp eq i32 %l, 0
-        br i1 %terminate, label %true, label %false
+define void @test_basic() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+	ret void
+
+; X32-Linux:       test_basic:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB0_2
 
-true:
-        ret i32 0
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
 
-false:
-        %newlen = sub i32 %l, 1
-        %retvalue = call i32 @test_basic(i32 %newlen)
-        ret i32 %retvalue
+; X64-Linux:       test_basic:
 
-; X32:      test_basic:
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB0_2
 
-; X32:      cmpl %gs:48, %esp
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
 
-; X32:      pushl $4
-; X32-NEXT: pushl $12
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret 
+; X32-Darwin:      test_basic:
 
-; X32:      movl %esp, %eax
-; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: cmpl %eax, %gs:48
+; X32-Darwin:      movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja      LBB0_2
 
-; X32:      movl %eax, %esp
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
-; X32:      subl $12, %esp
-; X32-NEXT: pushl %ecx
-; X32-NEXT: calll __morestack_allocate_stack_space
-; X32-NEXT: addl $16, %esp
+; X64-Darwin:      test_basic:
 
-; X64:      test_basic:
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB0_2
 
-; X64:      cmpq %fs:112, %rsp
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
 
-; X64:      movabsq $24, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X32-MinGW:       test_basic:
 
-; X64:      movq %rsp, %rdi
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: cmpq %rdi, %fs:112
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB0_2
 
-; X64:      movq %rdi, %rsp
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
 
-; X64:      movq %rax, %rdi
-; X64-NEXT: callq __morestack_allocate_stack_space
-; X64-NEXT: movq %rax, %rdi
+; X64-FreeBSD:       test_basic:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB0_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
 
 }
 
@@ -64,21 +100,60 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
        %result = add i32 %other, %addend
        ret i32 %result
 
-; X32:      cmpl %gs:48, %esp
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB1_2
+
+; X32-Linux:       pushl $4
+; X32-Linux-NEXT:  pushl $0
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB1_2
+
+; X64-Linux:       movq %r10, %rax
+; X64-Linux-NEXT:  movabsq $0, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+; X64-Linux-NEXT:  movq %rax, %r10
 
-; X32:      pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin:      movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja      LBB1_2
 
-; X64:      cmpq %fs:112, %rsp
+; X32-Darwin:      pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
-; X64:      movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64-NEXT: movq %rax, %r10
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB1_2
+
+; X64-Darwin:      movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB1_2
+
+; X32-MinGW:       pushl $4
+; X32-MinGW-NEXT:  pushl $0
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB1_2
+
+; X64-FreeBSD:       movq %r10, %rax
+; X64-FreeBSD-NEXT:  movabsq $0, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+; X64-FreeBSD-NEXT:  movq %rax, %r10
 
 }
 
@@ -87,20 +162,224 @@ define void @test_large() {
         call void @dummy_use (i32* %mem, i32 0)
         ret void
 
-; X32:      leal -40012(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
+; X32-Linux:       leal -40012(%esp), %ecx
+; X32-Linux-NEXT:  cmpl %gs:48, %ecx
+; X32-Linux-NEXT:  ja      .LBB2_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB2_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja      LBB2_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB2_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       leal -40008(%esp), %ecx
+; X32-MinGW-NEXT:  cmpl %fs:20, %ecx
+; X32-MinGW-NEXT:  ja      LBB2_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB2_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+        ret void
+
+; X32-Linux:       test_fastcc:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB3_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc:
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB3_2
+
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc:
+
+; X32-Darwin:      movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja      LBB3_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc:
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB3_2
+
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc:
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB3_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB3_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       test_fastcc_large:
+
+; X32-Linux:       leal -40012(%esp), %eax
+; X32-Linux-NEXT:  cmpl %gs:48, %eax
+; X32-Linux-NEXT:  ja      .LBB4_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc_large:
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB4_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc_large:
+
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja      LBB4_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc_large:
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB4_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc_large:
+
+; X32-MinGW:       leal -40008(%esp), %eax
+; X32-MinGW-NEXT:  cmpl %fs:20, %eax
+; X32-MinGW-NEXT:  ja      LBB4_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc_large:
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB4_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 %a)
+        ret void
+
+; This is testing that the Mac implementation preserves ecx
 
-; X32:      pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin:      test_fastcc_large_with_ecx_arg:
 
-; X64:      leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja      LBB5_2
 
-; X64:      movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
 }
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index fd278c2..b747cc5 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86    | grep and | count 2
 ; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
@@ -7,9 +7,15 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
        ret i32 %res
 }
 
+define i32 @t2(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 63
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
 @X = internal global i16 0
 
-define void @t2(i16 %t) nounwind {
+define void @t3(i16 %t) nounwind {
        %shamt = and i16 %t, 31
        %tmp = load i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -17,8 +23,14 @@ define void @t2(i16 %t) nounwind {
        ret void
 }
 
-define i64 @t3(i64 %t, i64 %val) nounwind {
+define i64 @t4(i64 %t, i64 %val) nounwind {
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
+
+define i64 @t5(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 191
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index e443ac1..51f8303 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s | not grep shrl
+; RUN: llc -march=x86 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+@array = weak global [4 x i32] zeroinitializer
+
+define i32 @test_lshr_and(i32 %x) {
+; CHECK: test_lshr_and:
+; CHECK-NOT: shrl
+; CHECK: andl $12,
+; CHECK: movl {{.*}}array{{.*}},
+; CHECK: ret
 
-define i32 @foo(i32 %x) {
 entry:
-	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
-	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
-	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
-	ret i32 %tmp5
+  %tmp2 = lshr i32 %x, 2
+  %tmp3 = and i32 %tmp2, 3
+  %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3
+  %tmp5 = load i32* %tmp4, align 4
+  ret i32 %tmp5
 }
 
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d9c3061..3ea6011 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,28 +1,70 @@
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep {s\[ah\]\[rl\]l} | count 1
-
-define i32* @test1(i32* %P, i32 %X) nounwind {
-        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
-        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
-        ret i32* %P2
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @test1(i32* %P, i32 %X) {
+; CHECK: test1:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = lshr i32 %X, 2
+  %gep.upgrd.1 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.1
+  ret i32* %P2
 }
 
-define i32* @test2(i32* %P, i32 %X) nounwind {
-        %Y = shl i32 %X, 2              ; <i32> [#uses=1]
-        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test2(i32* %P, i32 %X) {
+; CHECK: test2:
+; CHECK: shll $4
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = shl i32 %X, 2
+  %gep.upgrd.2 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.2
+  ret i32* %P2
 }
 
-define i32* @test3(i32* %P, i32 %X) nounwind {
-        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
-        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test3(i32* %P, i32 %X) {
+; CHECK: test3:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = ashr i32 %X, 2
+  %P2 = getelementptr i32* %P, i32 %Y
+  ret i32* %P2
 }
 
-define fastcc i32 @test4(i32* %d) nounwind {
+define fastcc i32 @test4(i32* %d) {
+; CHECK: test4:
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
   %tmp4 = load i32* %d
   %tmp512 = lshr i32 %tmp4, 24
   ret i32 %tmp512
 }
+
+define i64 @test5(i16 %i, i32* %arr) {
+; Ensure that we don't fold away shifts which have multiple uses, as they are
+; just re-introduced for the second use.
+; CHECK: test5:
+; CHECK-NOT: shrl
+; CHECK: shrl $11
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %val.zext = zext i32 %val to i64
+  %sum = add i64 %val.zext, %index.zext
+  ret i64 %sum
+}
diff --git a/test/CodeGen/X86/shl-i64.ll b/test/CodeGen/X86/shl-i64.ll
new file mode 100644
index 0000000..f00058a
--- /dev/null
+++ b/test/CodeGen/X86/shl-i64.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+sse2 < %s | FileCheck %s
+
+; Make sure that we don't generate an illegal i64 extract after LegalizeType.
+; CHECK: shll
+
+
+define void @test_cl(<4 x i64>*  %dst, <4 x i64>* %src, i32 %idx) {
+entry:
+  %arrayidx = getelementptr inbounds <4 x i64> * %src, i32 %idx
+  %0 = load <4 x i64> * %arrayidx, align 32
+  %arrayidx1 = getelementptr inbounds <4 x i64> * %dst, i32 %idx
+  %1 = load <4 x i64> * %arrayidx1, align 32
+  %2 = extractelement <4 x i64> %1, i32 0
+  %and = and i64 %2, 63
+  %3 = insertelement <4 x i64> undef, i64 %and, i32 0    
+  %splat = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i64> %0, %splat
+  store <4 x i64> %shl, <4 x i64> * %arrayidx1, align 32
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index 9d74121..937817e 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse3 | FileCheck %s --check-prefix=X64_BAD
 
 ; Sibcall optimization of expanded libcalls.
 ; rdar://8707777
@@ -29,3 +30,31 @@ entry:
 declare float @sinf(float) nounwind readonly
 
 declare double @sin(double) nounwind readonly
+
+; rdar://10930395
+%0 = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_2" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+define hidden { double, double } @foo2(%0* %self, i8* nocapture %_cmd) uwtable optsize ssp {
+; X64_BAD: foo
+; X64_BAD: call
+; X64_BAD: call
+; X64_BAD: call
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
+  %2 = bitcast %0* %self to i8*
+  %3 = tail call { double, double } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to { double, double } (i8*, i8*)*)(i8* %2, i8* %1) optsize
+  %4 = extractvalue { double, double } %3, 0
+  %5 = extractvalue { double, double } %3, 1
+  %6 = tail call double @floor(double %4) optsize
+  %7 = tail call double @floor(double %5) optsize
+  %insert.i.i = insertvalue { double, double } undef, double %6, 0
+  %insert5.i.i = insertvalue { double, double } %insert.i.i, double %7, 1
+  ret { double, double } %insert5.i.i
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare double @floor(double) optsize
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
deleted file mode 100644
index b945530..0000000
--- a/test/CodeGen/X86/sret.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep ret | grep 4
-
-	%struct.foo = type { [4 x i32] }
-
-define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
-entry:
-	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
-	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
-	store i32 1, i32* %tmp3, align 8
-        ret void
-}
-
-@dst = external global i32
-
-define void @foo() nounwind {
-	%memtmp = alloca %struct.foo, align 4
-        call void @bar( %struct.foo* sret %memtmp ) nounwind
-        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
-	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
-        %tmp6 = load i32* %tmp5
-        store i32 %tmp6, i32* @dst
-        ret void
-}
diff --git a/test/CodeGen/X86/stack-align2.ll b/test/CodeGen/X86/stack-align2.ll
new file mode 100644
index 0000000..18cce72
--- /dev/null
+++ b/test/CodeGen/X86/stack-align2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i386-linux | FileCheck %s -check-prefix=LINUX-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64
+
+define i32 @test() nounwind {
+entry:
+  call void @test2()
+  ret i32 0
+
+; LINUX-I386:     subl	$12, %esp
+; DARWIN-I386:    subl	$12, %esp
+; NETBSD-I386-NOT: subl	{{.*}}, %esp
+
+; LINUX-X86_64:      pushq %{{.*}}
+; LINUX-X86_64-NOT:  subq	{{.*}}, %rsp
+; DARWIN-X86_64:     pushq %{{.*}}
+; DARWIN-X86_64-NOT: subq	{{.*}}, %rsp
+; NETBSD-X86_64:     pushq %{{.*}}
+; NETBSD-X86_64-NOT: subq	{{.*}}, %rsp
+}
+
+declare void @test2()
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 1251a24..81de22c 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86            | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86            | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     lea
 
 @B = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 4522e91..749b5db 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
index c5a105c..c68a8c6 100644
--- a/test/CodeGen/X86/tail-dup-addr.ll
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -2,8 +2,8 @@
 
 ; Test that we don't drop a block that has its address taken.
 
+; CHECK: Ltmp0:                                  ## Block address taken
 ; CHECK: Ltmp1:                                  ## Block address taken
-; CHECK: Ltmp2:                                  ## Block address taken
 
 @a = common global i32 0, align 4
 @p = common global i8* null, align 8
diff --git a/test/CodeGen/X86/tailcall-disable.ll b/test/CodeGen/X86/tailcall-disable.ll
new file mode 100644
index 0000000..b628f5e
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-disable.ll
@@ -0,0 +1,40 @@
+; RUN: llc -disable-tail-calls < %s | FileCheck --check-prefix=CALL %s
+; RUN: llc < %s | FileCheck --check-prefix=JMP %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @helper() nounwind {
+entry:
+  ret i32 7
+}
+
+define i32 @test1() nounwind {
+entry:
+  %call = tail call i32 @helper()
+  ret i32 %call
+}
+
+; CALL: test1:
+; CALL-NOT: ret
+; CALL: callq helper
+; CALL: ret
+
+; JMP: test1:
+; JMP-NOT: ret
+; JMP: jmp helper # TAILCALL
+
+define i32 @test2() nounwind {
+entry:
+  %call = tail call i32 @test2()
+  ret i32 %call
+}
+
+; CALL: test2:
+; CALL-NOT: ret
+; CALL: callq test2
+; CALL: ret
+
+; JMP: test2:
+; JMP-NOT: ret
+; JMP: jmp test2 # TAILCALL
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7ecf379..7621602 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
 
 ; FIXME: Win64 does not support byval.
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c18c7aa..bff5f99 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
 ; CHECK: subq  ${{24|72|80}}, %rsp
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
new file mode 100644
index 0000000..a7be483
--- /dev/null
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
+
+%class.C = type { i8 }
+%struct.S = type { i32 }
+%struct.M = type { i32, i32 }
+
+declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
+declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
+declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
+
+define void @testv() nounwind {
+; CHECK: testv:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C5SmallEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.S, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
+  ret void
+}
+
+define void @test2v() nounwind {
+; CHECK: test2v:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C6MediumEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.M, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX/EDX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
+  ret void
+}
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
index 0cae5c4..f39658e 100644
--- a/test/CodeGen/X86/tls1.ll
+++ b/test/CodeGen/X86/tls1.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i32 15
 
@@ -10,3 +10,11 @@ entry:
 	%tmp1 = load i32* @i
 	ret i32 %tmp1
 }
+; X32_LINUX: movl %gs:i@NTPOFF, %eax
+; X64_LINUX: movl %fs:i@TPOFF, %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movabsq $i@SECREL, %rcx
+
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
index 514a168..cc14826 100644
--- a/test/CodeGen/X86/tls11.ll
+++ b/test/CodeGen/X86/tls11.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i16 15
 
@@ -10,3 +10,12 @@ entry:
 	%tmp1 = load i16* @i
 	ret i16 %tmp1
 }
+; X32_LINUX: movzwl %gs:i@NTPOFF, %eax
+; X64_LINUX: movzwl %fs:i@TPOFF, %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movzwl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: movabsq $i@SECREL, %rcx
+; X64_WIN: movzwl (%rax,%rcx), %eax
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
index c29f6ad..3da789e 100644
--- a/test/CodeGen/X86/tls12.ll
+++ b/test/CodeGen/X86/tls12.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i8 15
 
@@ -10,3 +10,12 @@ entry:
 	%tmp1 = load i8* @i
 	ret i8 %tmp1
 }
+; X32_LINUX: movb %gs:i@NTPOFF, %al
+; X64_LINUX: movb %fs:i@TPOFF, %al
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movb _i@SECREL(%eax), %al
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: movabsq $i@SECREL, %rcx
+; X64_WIN: movb (%rax,%rcx), %al
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
index 08778ec..0f6a98a 100644
--- a/test/CodeGen/X86/tls13.ll
+++ b/test/CodeGen/X86/tls13.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i16 0
 @j = thread_local global i16 0
@@ -22,3 +20,14 @@ entry:
 declare void @g(i32)
 
 declare void @h(i32)
+
+; X32_LINUX: movswl %gs:i@NTPOFF, %eax
+; X32_LINUX: movzwl %gs:j@NTPOFF, %eax
+; X64_LINUX: movswl %fs:i@TPOFF, %edi
+; X64_LINUX: movzwl %fs:j@TPOFF, %edi
+; X32_WIN: movswl _i@SECREL(%esi), %eax
+; X32_WIN: movzwl _j@SECREL(%esi), %eax
+; X64_WIN: movabsq $i@SECREL, %rax
+; X64_WIN: movswl (%rsi,%rax), %ecx
+; X64_WIN: movabsq $j@SECREL, %rax
+; X64_WIN: movzwl (%rsi,%rax), %ecx
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
index 88426dd..6462571 100644
--- a/test/CodeGen/X86/tls14.ll
+++ b/test/CodeGen/X86/tls14.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i8 0
 @j = thread_local global i8 0
@@ -22,3 +20,14 @@ entry:
 declare void @g(i32)
 
 declare void @h(i32)
+
+; X32_LINUX: movsbl %gs:i@NTPOFF, %eax
+; X32_LINUX: movzbl %gs:j@NTPOFF, %eax
+; X64_LINUX: movsbl %fs:i@TPOFF, %edi
+; X64_LINUX: movzbl %fs:j@TPOFF, %edi
+; X32_WIN: movsbl _i@SECREL(%esi), %eax
+; X32_WIN: movzbl _j@SECREL(%esi), %eax
+; X64_WIN: movabsq $i@SECREL, %rax
+; X64_WIN: movsbl (%rsi,%rax), %ecx
+; X64_WIN: movabsq $j@SECREL, %rax
+; X64_WIN: movzbl (%rsi,%rax), %ecx
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
index 5a94296..e882f53 100644
--- a/test/CodeGen/X86/tls2.ll
+++ b/test/CodeGen/X86/tls2.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i32 15
 
@@ -11,3 +9,13 @@ define i32* @f() {
 entry:
 	ret i32* @i
 }
+; X32_LINUX: movl %gs:0, %eax
+; X32_LINUX: leal i@NTPOFF(%eax), %eax
+; X64_LINUX: movq %fs:0, %rax
+; X64_LINUX: leaq i@TPOFF(%rax), %rax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: leal _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
index 7327cc4..ee3f28f 100644
--- a/test/CodeGen/X86/tls3.ll
+++ b/test/CodeGen/X86/tls3.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	i@INDNTPOFF, %eax} %t
-; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
-; RUN: grep {movl	%fs:(%rax), %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
@@ -12,3 +10,12 @@ entry:
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
+; X32_LINUX: movl i@INDNTPOFF, %eax
+; X32_LINUX: movl %gs:(%eax), %eax
+; X64_LINUX: movq i@GOTTPOFF(%rip), %rax
+; X64_LINUX: movl %fs:(%rax), %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movabsq $i@SECREL, %rcx
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
index d2e40e3..2b53ec5 100644
--- a/test/CodeGen/X86/tls4.ll
+++ b/test/CodeGen/X86/tls4.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
@@ -11,3 +9,13 @@ define i32* @f() {
 entry:
 	ret i32* @i
 }
+; X32_LINUX: movl %gs:0, %eax
+; X32_LINUX: addl i@INDNTPOFF, %eax
+; X64_LINUX: movq %fs:0, %rax
+; X64_LINUX: addq i@GOTTPOFF(%rip), %rax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: leal _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
index 4d2cc02..3cc6dab 100644
--- a/test/CodeGen/X86/tls5.ll
+++ b/test/CodeGen/X86/tls5.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
 
 @i = internal thread_local global i32 15
 
@@ -10,3 +10,10 @@ entry:
 	%tmp1 = load i32* @i
 	ret i32 %tmp1
 }
+; X32_LINUX: movl %gs:i@NTPOFF, %eax
+; X64_LINUX: movl %fs:i@TPOFF, %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movabsq $i@SECREL, %rcx
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
index 505106e..c98ad7c 100644
--- a/test/CodeGen/X86/tls6.ll
+++ b/test/CodeGen/X86/tls6.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = internal thread_local global i32 15
 
@@ -11,3 +9,13 @@ define i32* @f() {
 entry:
 	ret i32* @i
 }
+; X32_LINUX: movl %gs:0, %eax
+; X32_LINUX: leal i@NTPOFF(%eax), %eax
+; X64_LINUX: movq %fs:0, %rax
+; X64_LINUX: leaq i@TPOFF(%rax), %rax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: leal _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index b7fe039..9d58019 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,7 +5,7 @@
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin | FileCheck %s
 
 @G = external global i32
 
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index 1dbbdcf..e853e77 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
 ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
 ; by the compiler_rt implementation of __floatundisf.
 ; <rdar://problem/8493982>
@@ -6,37 +6,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; FIXME: This test could generate this code:
-;
-; ## BB#0:                                ## %entry
-; 	testq	%rdi, %rdi
-; 	jns	LBB0_2
-; ## BB#1:
-; 	movq	%rdi, %rax
-; 	shrq	%rax
-; 	andq	$1, %rdi
-; 	orq	%rax, %rdi
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	addss	%xmm0, %xmm0
-; 	ret
-; LBB0_2:                                 ## %entry
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	ret
-;
-; The blocks come from lowering:
-;
-;   %vreg7<def> = CMOV_FR32 %vreg6<kill>, %vreg5<kill>, 15, %EFLAGS<imp-use>; FR32:%vreg7,%vreg6,%vreg5
-;
-; If the instruction had an EFLAGS<kill> flag, it wouldn't need to mark EFLAGS
-; as live-in on the new blocks, and machine sinking would be able to sink
-; everything below the test.
-
-; CHECK: shrq
-; CHECK: andq
-; CHECK-NEXT: orq
 ; CHECK: testq %rdi, %rdi
 ; CHECK-NEXT: jns LBB0_2
-; CHECK: cvtsi2ss
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
 ; CHECK: LBB0_2
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
index eeebcee..b066297 100644
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ b/test/CodeGen/X86/unreachable-stack-protector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index 3bee700..8655c6c 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
 ; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index b3efc7b..f2fc7e7 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
new file mode 100644
index 0000000..05b263e
--- /dev/null
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+
+; PR11674
+define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
+entry:
+; TODO: We should be able to generate cvtps2pd for the load.
+; For now, just check that we generate something sane.
+; CHECK: cvtss2sd
+; CHECK: cvtss2sd
+  %0 = load <2 x float>* %in, align 8
+  %1 = fpext <2 x float> %0 to <2 x double>
+  store <2 x double> %1, <2 x double>* %out, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index d032eda..3476e36 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -1,27 +1,27 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
 ; RUN: opt -instsimplify %s -disable-output
 
-;CHECK: AGEP0
+;CHECK: AGEP0:
 define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
 entry:
   %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
   %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
   %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
   %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
   ret <4 x i32*> %A3
 ;CHECK: ret
 }
 
-;CHECK: AGEP1
+;CHECK: AGEP1:
 define i32 @AGEP1(<4 x i32*> %param) nounwind {
 entry:
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   %k = extractelement <4 x i32*> %A2, i32 3
@@ -30,10 +30,10 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP2
+;CHECK: AGEP2:
 define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
 entry:
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
   %k = extractelement <4 x i32*> %A2, i32 3
@@ -42,10 +42,10 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP3
+;CHECK: AGEP3:
 define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
 entry:
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
   %v = alloca i32
@@ -54,24 +54,35 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP4
-define <4 x i8*> @AGEP4(<4 x i8*> %param, <4 x i32> %off) nounwind {
+;CHECK: AGEP4:
+define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
 entry:
-;CHECK: pslld
+; Multiply offset by two (add it to itself).
 ;CHECK: padd
-  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
-  ret <4 x i8*> %A
+; add the base to the offset
+;CHECK: padd
+  %A = getelementptr <4 x i16*> %param, <4 x i32> %off
+  ret <4 x i16*> %A
 ;CHECK: ret
 }
 
-;CHECK: AGEP5
+;CHECK: AGEP5:
 define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
 entry:
-;CHECK: pslld
-;CHECK: padd
+;CHECK: paddd
   %A = getelementptr <4 x i8*> %param, <4 x i8> %off
   ret <4 x i8*> %A
 ;CHECK: ret
 }
 
 
+; The size of each element is 1 byte. No need to multiply by element size.
+;CHECK: AGEP6:
+define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK-NOT: pslld
+  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 85367e8..661cde8 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 |  FileCheck %s
 
 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index b959ce8..f55b184 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
 ; CHECK: incl
 ; CHECK: incl
 ; CHECK: incl
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 29689dd..79aa000 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 24608d0..7bebb27 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -10,6 +10,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 
@@ -23,6 +24,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 ; Example of when widening a v3float operation causes the DAG to replace a node
@@ -31,7 +33,7 @@ entry:
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: shuf3:
-; CHECK: pshufd
+; CHECK: shufps
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
   %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -45,6 +47,7 @@ entry:
   %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
   store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
   ret void
+; CHECK: ret
 }
 
 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
@@ -53,6 +56,7 @@ define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 ; CHECK-NOT: punpckldq
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
+; CHECK: ret
 }
 
 ; PR11389: another CONCAT_VECTORS case
@@ -61,4 +65,5 @@ define void @shuf5(<8 x i8>* %p) nounwind {
   %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   store <8 x i8> %v, <8 x i8>* %p, align 8
   ret void
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
new file mode 100644
index 0000000..878c6db
--- /dev/null
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
+; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
+; arguments are caller-cleanup like normal arguments.
+
+define void @sret1(i8* sret) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  ret void
+}
+
+define void @sret2(i32* sret %x, i32 %y) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  store i32 %y, i32* %x
+  ret void
+}
+
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index e39d007..a961c6a 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index efe8bca..52bc509 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-win32 | FileCheck %s
 
 ; Verify that the var arg parameters which are passed in registers are stored
 ; in home stack slots allocated by the caller and that AP is correctly
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
new file mode 100644
index 0000000..596b426
--- /dev/null
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
+
+; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
+; function has a nonstandard calling convention: the input value is expected on
+; the x87 stack instead of the callstack. The input value is popped by the
+; callee. Mingw32 uses normal cdecl compiler-rt functions.
+
+define i64 @double_ui64(double %x) nounwind {
+entry:
+; COMPILERRT: @double_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @double_ui64
+; FTOL: fldl
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui double %x to i64
+  ret i64 %0
+}
+
+define i64 @float_ui64(float %x) nounwind {
+entry:
+; COMPILERRT: @float_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @float_ui64
+; FTOL: flds
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui float %x to i64
+  ret i64 %0
+}
+
+define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_2
+; FTOL: @double_ui64_2
+; FTOL_2: @double_ui64_2
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %2 %1
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %3, %4
+  ret i64 %5
+}
+
+define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_3
+; FTOL: @double_ui64_3
+; FTOL_2: @double_ui64_3
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %1 %2 (still)
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %4, %3
+  ret i64 %5
+}
+
+define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
+; COMPILERRT: @double_ui64_4
+; FTOL: @double_ui64_4
+; FTOL_2: @double_ui64_4
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %y
+; FTOL_2: fldl
+;; stack is %x %y
+; FTOL_2: fxch
+;; stack is %y %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+; FTOL_2: fld %st(0)
+;; stack is %x %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+
+  %1 = fptoui double %x to i64
+  %2 = fptoui double %y to i64
+  %3 = sub i64 %1, %2
+  %4 = insertvalue {double, i64} undef, double %x, 0
+  %5 = insertvalue {double, i64} %4, i64 %3, 1
+  ret {double, i64} %5
+}
+
+define i32 @double_ui32_5(double %X) {
+; FTOL: @double_ui32_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i32
+  ret i32 %tmp.1
+}
+
+define i64 @double_ui64_5(double %X) {
+; FTOL: @double_ui64_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i64
+  ret i64 %tmp.1
+}
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
new file mode 100644
index 0000000..a2521b0
--- /dev/null
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -0,0 +1,969 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
+
+define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vpermil2pd
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a1
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a2
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a1
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a2
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vpermil2ps
+  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vpermil2ps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpcmov
+  %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %vec = load <4 x i64>* %a1
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+ %vec = load <4 x i64>* %a2
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK:vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK:vpcomb
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
+  ; CHECK: vphaddbd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
+  ; CHECK: vphaddbq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
+  ; CHECK: vphaddbw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
+  ; CHECK: vphadddq
+  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
+  ; CHECK: vphaddubd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
+  ; CHECK: vphaddubq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
+  ; CHECK: vphaddubw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
+  ; CHECK: vphaddudq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
+  ; CHECK: vphadduwd
+  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
+  ; CHECK: vphadduwq
+  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
+  ; CHECK: vphaddwd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
+  ; CHECK: vphaddwq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
+  ; CHECK: vphsubbw
+  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
+  ; CHECK: vphsubdq
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubdq
+  %vec = load <4 x i32>* %a0
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
+  ; CHECK: vphsubwd
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubwd
+  %vec = load <8 x i16>* %a0
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacssdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacssww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacsww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpmadcswd
+  %vec = load <8 x i16>* %a1
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpperm
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a2
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vprotb
+  %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vprotd
+  %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vprotq
+  %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vprotw
+  %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshab
+  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshad
+  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshaq
+  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshaw
+  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshlb
+  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshld
+  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshlq
+  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshlw
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a1
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a0
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %elem = load float* %a1
+  %vec = insertelement <4 x float> undef, float %elem, i32 0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %elem = load double* %a1
+  %vec = insertelement <2 x double> undef, double %elem, i32 0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
+  ; CHECK: vfrczpd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  %vec = load <2 x double>* %a0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a0
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
+  ; CHECK: vfrczps
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  %vec = load <4 x float>* %a0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
+  ret <8 x float> %res
+}
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %vec = load <8 x float>* %a0
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index b3f5cdb..ff93c68 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -34,7 +34,7 @@ define void @test3(i8 %x) nounwind readnone {
   ret void
 }
 ; CHECK: test3
-; CHECK: movzbl 16(%esp), [[REGISTER:%e[a-z]{2}]]
+; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
 ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
 ; CHECK-NEXT: andl $224, [[REGISTER]]
 ; CHECK-NEXT: movl [[REGISTER]], (%esp)
diff --git a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
index 7d6d7ba..84e21e4 100644
--- a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
+++ b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
@@ -16,5 +16,5 @@ allocas:
 ; CHECK: f:
 ; CHECK: ldaw [[REGISTER:r[0-9]+]], {{r[0-9]+}}[-r1]
 ; CHECK: set sp, [[REGISTER]]
-; CHECK extsp 1
-; CHECK bl g
+; CHECK: extsp 1
+; CHECK: bl g
diff --git a/test/CodeGen/XCore/dg.exp b/test/CodeGen/XCore/dg.exp
deleted file mode 100644
index 7110eab..0000000
--- a/test/CodeGen/XCore/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target XCore] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
new file mode 100644
index 0000000..c697912
--- /dev/null
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 001e938..2557c9c 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -2,7 +2,6 @@
 ; Check struct X for dead variable xyz from inlined function foo.
 
 ; CHECK:	DW_TAG_structure_type
-; CHECK-NEXT:	DW_AT_sibling
 ; CHECK-NEXT:	DW_AT_name
  
 
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
new file mode 100644
index 0000000..6e20169
--- /dev/null
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: b_ref
+; CHECK-NOT: AT_bit_size
+
+%struct.bar = type { %struct.baz, %struct.baz* }
+%struct.baz = type { i32 }
+
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %myBar = alloca %struct.bar, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !49), !dbg !50
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !51), !dbg !52
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %myBar}, metadata !53), !dbg !55
+  call void @_ZN3barC1Ei(%struct.bar* %myBar, i32 1), !dbg !56
+  ret i32 0, !dbg !57
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.bar*, align 8
+  %x.addr = alloca i32, align 4
+  store %struct.bar* %this, %struct.bar** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !58), !dbg !59
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !60), !dbg !61
+  %this1 = load %struct.bar** %this.addr
+  %0 = load i32* %x.addr, align 4, !dbg !62
+  call void @_ZN3barC2Ei(%struct.bar* %this1, i32 %0), !dbg !62
+  ret void, !dbg !62
+}
+
+define linkonce_odr void @_ZN3barC2Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.bar*, align 8
+  %x.addr = alloca i32, align 4
+  store %struct.bar* %this, %struct.bar** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !63), !dbg !64
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !65), !dbg !66
+  %this1 = load %struct.bar** %this.addr
+  %b = getelementptr inbounds %struct.bar* %this1, i32 0, i32 0, !dbg !67
+  %0 = load i32* %x.addr, align 4, !dbg !67
+  call void @_ZN3bazC1Ei(%struct.baz* %b, i32 %0), !dbg !67
+  %1 = getelementptr inbounds %struct.bar* %this1, i32 0, i32 1, !dbg !67
+  %b2 = getelementptr inbounds %struct.bar* %this1, i32 0, i32 0, !dbg !67
+  store %struct.baz* %b2, %struct.baz** %1, align 8, !dbg !67
+  ret void, !dbg !68
+}
+
+define linkonce_odr void @_ZN3bazC1Ei(%struct.baz* %this, i32 %a) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.baz*, align 8
+  %a.addr = alloca i32, align 4
+  store %struct.baz* %this, %struct.baz** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !70), !dbg !71
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !72), !dbg !73
+  %this1 = load %struct.baz** %this.addr
+  %0 = load i32* %a.addr, align 4, !dbg !74
+  call void @_ZN3bazC2Ei(%struct.baz* %this1, i32 %0), !dbg !74
+  ret void, !dbg !74
+}
+
+define linkonce_odr void @_ZN3bazC2Ei(%struct.baz* %this, i32 %a) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.baz*, align 8
+  %a.addr = alloca i32, align 4
+  store %struct.baz* %this, %struct.baz** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !75), !dbg !76
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !77), !dbg !78
+  %this1 = load %struct.baz** %this.addr
+  %h = getelementptr inbounds %struct.baz* %this1, i32 0, i32 0, !dbg !79
+  %0 = load i32* %a.addr, align 4, !dbg !79
+  store i32 %0, i32* %h, align 4, !dbg !79
+  ret void, !dbg !80
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", metadata !"clang version 3.1 (trunk 146596)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !9}
+!5 = metadata !{i32 720898, null, metadata !"bar", metadata !6, i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
+!6 = metadata !{i32 720937, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !8, metadata !19, metadata !21}
+!8 = metadata !{i32 720909, metadata !5, metadata !"b", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!9 = metadata !{i32 720898, null, metadata !"baz", metadata !6, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{metadata !11, metadata !13}
+!11 = metadata !{i32 720909, metadata !9, metadata !"h", metadata !6, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 720942, i32 0, metadata !9, metadata !"baz", metadata !"baz", metadata !"", metadata !6, i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !16, metadata !12}
+!16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!19 = metadata !{i32 720909, metadata !5, metadata !"b_ref", metadata !6, i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
+!21 = metadata !{i32 720942, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{null, metadata !24, metadata !12}
+!24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!27 = metadata !{metadata !28}
+!28 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
+!29 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
+!30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!31 = metadata !{metadata !12, metadata !12, metadata !32}
+!32 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!35 = metadata !{metadata !36}
+!36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!37 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
+!38 = metadata !{metadata !39}
+!39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!40 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
+!41 = metadata !{metadata !42}
+!42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!43 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
+!44 = metadata !{metadata !45}
+!45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!46 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
+!47 = metadata !{metadata !48}
+!48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!50 = metadata !{i32 16, i32 14, metadata !29, null}
+!51 = metadata !{i32 721153, metadata !29, metadata !"argv", metadata !6, i32 33554448, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!52 = metadata !{i32 16, i32 27, metadata !29, null}
+!53 = metadata !{i32 721152, metadata !54, metadata !"myBar", metadata !6, i32 18, metadata !5, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!54 = metadata !{i32 720907, metadata !29, i32 17, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!55 = metadata !{i32 18, i32 9, metadata !54, null}
+!56 = metadata !{i32 18, i32 17, metadata !54, null}
+!57 = metadata !{i32 19, i32 5, metadata !54, null}
+!58 = metadata !{i32 721153, metadata !37, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!59 = metadata !{i32 13, i32 5, metadata !37, null}
+!60 = metadata !{i32 721153, metadata !37, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!61 = metadata !{i32 13, i32 13, metadata !37, null}
+!62 = metadata !{i32 13, i32 34, metadata !37, null}
+!63 = metadata !{i32 721153, metadata !40, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!64 = metadata !{i32 13, i32 5, metadata !40, null}
+!65 = metadata !{i32 721153, metadata !40, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!66 = metadata !{i32 13, i32 13, metadata !40, null}
+!67 = metadata !{i32 13, i32 33, metadata !40, null}
+!68 = metadata !{i32 13, i32 34, metadata !69, null}
+!69 = metadata !{i32 720907, metadata !40, i32 13, i32 33, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!70 = metadata !{i32 721153, metadata !43, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!71 = metadata !{i32 6, i32 5, metadata !43, null}
+!72 = metadata !{i32 721153, metadata !43, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!73 = metadata !{i32 6, i32 13, metadata !43, null}
+!74 = metadata !{i32 6, i32 24, metadata !43, null}
+!75 = metadata !{i32 721153, metadata !46, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!76 = metadata !{i32 6, i32 5, metadata !46, null}
+!77 = metadata !{i32 721153, metadata !46, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!78 = metadata !{i32 6, i32 13, metadata !46, null}
+!79 = metadata !{i32 6, i32 23, metadata !46, null}
+!80 = metadata !{i32 6, i32 24, metadata !81, null}
+!81 = metadata !{i32 720907, metadata !46, i32 6, i32 23, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
new file mode 100644
index 0000000..59280e0
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Checks that we don't emit a size for a pointer type.
+; CHECK: DW_TAG_pointer_type
+; CHECK-NEXT: DW_AT_type
+; CHECK-NOT-NEXT: DW_AT_byte_size
+
+%struct.A = type { i32 }
+
+define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca %struct.A*, align 8
+  store %struct.A* %a, %struct.A** %a.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.A** %a.addr}, metadata !16), !dbg !17
+  %0 = load %struct.A** %a.addr, align 8, !dbg !18
+  %b = getelementptr inbounds %struct.A* %0, i32 0, i32 0, !dbg !18
+  %1 = load i32* %b, align 4, !dbg !18
+  ret i32 %1, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 150996)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786445, metadata !11, metadata !"b", metadata !6, i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 3, i32 13, metadata !5, null}
+!18 = metadata !{i32 4, i32 3, metadata !19, null}
+!19 = metadata !{i32 786443, metadata !5, i32 3, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index bdd65e6..e4c5c80 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -3,8 +3,8 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x0000003c: DW_TAG_subprogram [5] *
-; CHECK: 0x00000064: DW_AT_specification [DW_FORM_ref4]      (cu + 0x003c => {0x0000003c})
+; CHECK: 0x00000038: DW_TAG_subprogram [5] *
+; CHECK: 0x0000005e: DW_AT_specification [DW_FORM_ref4]      (cu + 0x0038 => {0x00000038})
 
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
new file mode 100644
index 0000000..4953c42
--- /dev/null
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -0,0 +1,127 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Checks that we emit debug info for the block variable declare.
+; CHECK: 0x00000030:   DW_TAG_subprogram [3]
+; CHECK: 0x0000005b:     DW_TAG_variable [5]
+; CHECK: 0x0000005c:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x000000e6] = "block")
+; CHECK: 0x00000066:       DW_AT_location [DW_FORM_data4]        (0x00000023)
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define hidden void @__foo_block_invoke_0(i8* %.block_descriptor) uwtable ssp {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !39), !dbg !51
+  %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>*, !dbg !52
+  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block}, metadata !53), !dbg !54
+  %block.capture.addr = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block, i32 0, i32 5, !dbg !55
+  %0 = load void ()** %block.capture.addr, align 8, !dbg !55
+  %block.literal = bitcast void ()* %0 to %struct.__block_literal_generic*, !dbg !55
+  %1 = getelementptr inbounds %struct.__block_literal_generic* %block.literal, i32 0, i32 3, !dbg !55
+  %2 = bitcast %struct.__block_literal_generic* %block.literal to i8*, !dbg !55
+  %3 = load i8** %1, !dbg !55
+  %4 = bitcast i8* %3 to void (i8*)*, !dbg !55
+  invoke void %4(i8* %2)
+          to label %invoke.cont unwind label %lpad, !dbg !55
+
+invoke.cont:                                      ; preds = %entry
+  br label %eh.cont, !dbg !58
+
+eh.cont:                                          ; preds = %catch, %invoke.cont
+  ret void, !dbg !61
+
+lpad:                                             ; preds = %entry
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null, !dbg !55
+  %6 = extractvalue { i8*, i32 } %5, 0, !dbg !55
+  store i8* %6, i8** %exn.slot, !dbg !55
+  %7 = extractvalue { i8*, i32 } %5, 1, !dbg !55
+  store i32 %7, i32* %ehselector.slot, !dbg !55
+  br label %catch, !dbg !55
+
+catch:                                            ; preds = %lpad
+  %exn = load i8** %exn.slot, !dbg !62
+  %exn.adjusted = call i8* @objc_begin_catch(i8* %exn) nounwind, !dbg !62
+  call void @objc_end_catch(), !dbg !58
+  br label %eh.cont, !dbg !58
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i8* @objc_begin_catch(i8*)
+
+declare void @objc_end_catch()
+
+declare i32 @__objc_personality_v0(...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !36, !37, !38}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 151227)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786454, null, metadata !"dispatch_block_t", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_generic", metadata !6, i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
+!13 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!19 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786451, metadata !6, metadata !"__block_descriptor", metadata !6, i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!22 = metadata !{metadata !23, metadata !25}
+!23 = metadata !{i32 786445, metadata !6, metadata !"reserved", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 786445, metadata !6, metadata !"Size", metadata !6, i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!28 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", metadata !6, i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!30 = metadata !{null, metadata !14}
+!31 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", metadata !6, i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!33 = metadata !{null, metadata !14, metadata !14}
+!34 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", metadata !6, i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!40 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
+!41 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_1", metadata !6, i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
+!43 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!44 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!47 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
+!49 = metadata !{i32 786451, null, metadata !"__block_descriptor_withcopydispose", metadata !6, i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
+!50 = metadata !{i32 786445, metadata !6, metadata !"block", metadata !6, i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
+!51 = metadata !{i32 7, i32 18, metadata !28, null}
+!52 = metadata !{i32 7, i32 19, metadata !28, null}
+!53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ]
+!54 = metadata !{i32 5, i32 27, metadata !28, null}
+!55 = metadata !{i32 8, i32 22, metadata !56, null}
+!56 = metadata !{i32 786443, metadata !57, i32 7, i32 26, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!57 = metadata !{i32 786443, metadata !28, i32 7, i32 19, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!58 = metadata !{i32 10, i32 20, metadata !59, null}
+!59 = metadata !{i32 786443, metadata !60, i32 9, i32 35, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
+!60 = metadata !{i32 786443, metadata !57, i32 9, i32 35, metadata !6, i32 3} ; [ DW_TAG_lexical_block ]
+!61 = metadata !{i32 10, i32 21, metadata !28, null}
+!62 = metadata !{i32 9, i32 20, metadata !56, null}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 3da34c2..055a9bf 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -7,8 +7,7 @@
 ; first check that we have a TAG_subprogram at a given offset and it has
 ; AT_inline.
 
-; CHECK: 0x0000014a:   DW_TAG_subprogram
-; CHECK-NEXT:     DW_AT_sibling
+; CHECK: 0x00000130:   DW_TAG_subprogram [18]
 ; CHECK-NEXT:     DW_AT_MIPS_linkage_name
 ; CHECK-NEXT:     DW_AT_specification
 ; CHECK-NEXT:     DW_AT_inline
@@ -16,8 +15,8 @@
 
 ; and then that a TAG_subprogram refers to it with AT_abstract_origin.
 
-; CHECK: 0x000001a6:   DW_TAG_subprogram
-; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x014a => {0x0000014a})
+; CHECK: 0x00000180:   DW_TAG_subprogram [20]
+; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x0130 => {0x00000130})
 
 define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
 entry:
diff --git a/test/DebugInfo/X86/dg.exp b/test/DebugInfo/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/DebugInfo/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/DebugInfo/X86/lit.local.cfg b/test/DebugInfo/X86/lit.local.cfg
new file mode 100644
index 0000000..2092c29
--- /dev/null
+++ b/test/DebugInfo/X86/lit.local.cfg
@@ -0,0 +1,12 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
new file mode 100644
index 0000000..f11fbe4
--- /dev/null
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: ptr
+; CHECK-NOT: AT_bit_size
+
+%struct.crass = type { i8* }
+
+@crass = common global %struct.crass zeroinitializer, align 8
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 147882)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !"foo.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720915, null, metadata !"crass", metadata !6, i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720909, metadata !7, metadata !"ptr", metadata !6, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 8df2b46..8488434 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -3,8 +3,8 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x00000063:     DW_TAG_subprogram [5]
-; CHECK: 0x00000089:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x0063 => {0x00000063})
+; CHECK: 0x0000005a:     DW_TAG_subprogram [5]
+; CHECK: 0x0000007a:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x005a => {0x0000005a})
 
 %struct.foo = type { i8 }
 
diff --git a/test/DebugInfo/dg.exp b/test/DebugInfo/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/DebugInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/DebugInfo/lit.local.cfg b/test/DebugInfo/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/DebugInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll
index eba58cc..b36feee 100644
--- a/test/ExecutionEngine/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -1,4 +1,5 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
+; XFAIL: mcjit
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
index 577226b..0cc0efd 100644
--- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
+; XFAIL: mcjit
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
 	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll
index 61b0a1b..e4049a7 100644
--- a/test/ExecutionEngine/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
+; XFAIL: mcjit
 
 define i32 @main() {
 	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
diff --git a/test/ExecutionEngine/2003-01-04-PhiTest.ll b/test/ExecutionEngine/2003-01-04-PhiTest.ll
index 2bc70d7..48576e7 100644
--- a/test/ExecutionEngine/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/2003-01-04-PhiTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/2003-01-09-SARTest.ll b/test/ExecutionEngine/2003-01-09-SARTest.ll
index 560cd3e..ed58e11 100644
--- a/test/ExecutionEngine/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/2003-01-09-SARTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
 
diff --git a/test/ExecutionEngine/2003-01-10-FUCOM.ll b/test/ExecutionEngine/2003-01-10-FUCOM.ll
index 8512f63..4960e59 100644
--- a/test/ExecutionEngine/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
index df15037..80e19ba 100644
--- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @bar(i8* %X) {
diff --git a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
index 26429a0..1e155ee 100644
--- a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
@@ -1,7 +1,8 @@
 ; This testcase should return with an exit code of 1.
 ;
-; RUN: not lli %s
+; RUN: not %lli %s
 ; XFAIL: arm
+; XFAIL: mcjit
 
 @test = global i64 0		; <i64*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
index 566f3ae..1a1ae5f 100644
--- a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s test
+; RUN: %lli %s test
 ; XFAIL: arm
+; XFAIL: mcjit
 
 declare i32 @puts(i8*)
 
diff --git a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
index bcdb114..45279ad 100644
--- a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
index 37dae86..4342aa4 100644
--- a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-06-05-PHIBug.ll b/test/ExecutionEngine/2003-06-05-PHIBug.ll
index f7bd8b7..03b66c4 100644
--- a/test/ExecutionEngine/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/2003-06-05-PHIBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
index 6c2f340..22dd4cc 100644
--- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ; This testcase failed to work because two variable sized allocas confused the
diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
index 29cbaac..b56025a 100644
--- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
+; XFAIL: mcjit
 
 ;
 ; Regression Test: EnvironmentTest.ll
diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
index 6711d4d..04a5e17 100644
--- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ; This testcase exposes a bug in the local register allocator where it runs out
diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index fe18211..6e48c60 100644
--- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 @A = global i32 0		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
index 874ce39..4d7bd89 100644
--- a/test/ExecutionEngine/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
@@ -1,6 +1,7 @@
 ; PR672
-; RUN: lli %s
+; RUN: %lli %s
 ; XFAIL: arm
+; XFAIL: mcjit-ia32
 
 define i32 @main() {
 	%f = bitcast i32 (i32, i32*, i32)* @check_tail to i32*		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
index c0dc4cf..4183611 100644
--- a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
+++ b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter %s
+; RUN: %lli -force-interpreter %s
 ; PR1836
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
index 07cc659..0ab0274 100644
--- a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s | grep 1
+; RUN: %lli -force-interpreter=true %s | grep 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/2010-01-15-UndefValue.ll b/test/ExecutionEngine/2010-01-15-UndefValue.ll
index 6e7a392..01cb21f 100644
--- a/test/ExecutionEngine/2010-01-15-UndefValue.ll
+++ b/test/ExecutionEngine/2010-01-15-UndefValue.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s
+; RUN: %lli -force-interpreter=true %s
 
 define i32 @main() {
        %a = add i32 0, undef
diff --git a/test/ExecutionEngine/dg.exp b/test/ExecutionEngine/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/ExecutionEngine/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/ExecutionEngine/fpbitcast.ll b/test/ExecutionEngine/fpbitcast.ll
index 47cbb02..fa84be4 100644
--- a/test/ExecutionEngine/fpbitcast.ll
+++ b/test/ExecutionEngine/fpbitcast.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s | grep 40091eb8
+; RUN: %lli -force-interpreter=true %s | grep 40091eb8
 ;
 define i32 @test(double %x) {
 entry:
diff --git a/test/ExecutionEngine/hello.ll b/test/ExecutionEngine/hello.ll
index 92c26a6..4d1d987 100644
--- a/test/ExecutionEngine/hello.ll
+++ b/test/ExecutionEngine/hello.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
+; XFAIL: mcjit
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/hello2.ll b/test/ExecutionEngine/hello2.ll
index 10557ab..05b4409 100644
--- a/test/ExecutionEngine/hello2.ll
+++ b/test/ExecutionEngine/hello2.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
+; XFAIL: mcjit
 
 @X = global i32 7		; <i32*> [#uses=0]
 @msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/ExecutionEngine/simplesttest.ll b/test/ExecutionEngine/simplesttest.ll
index ad38485..85c1715 100644
--- a/test/ExecutionEngine/simplesttest.ll
+++ b/test/ExecutionEngine/simplesttest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	ret i32 0
diff --git a/test/ExecutionEngine/simpletest.ll b/test/ExecutionEngine/simpletest.ll
index 797b359..83f9b84 100644
--- a/test/ExecutionEngine/simpletest.ll
+++ b/test/ExecutionEngine/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @bar() {
diff --git a/test/ExecutionEngine/stubs.ll b/test/ExecutionEngine/stubs.ll
index 2039ab5..c1134e9 100644
--- a/test/ExecutionEngine/stubs.ll
+++ b/test/ExecutionEngine/stubs.ll
@@ -1,5 +1,6 @@
-; RUN: lli -disable-lazy-compilation=false %s
+; RUN: %lli -disable-lazy-compilation=false %s
 ; XFAIL: arm
+; XFAIL: mcjit
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/ExecutionEngine/test-arith.ll b/test/ExecutionEngine/test-arith.ll
index 354ecd2..79f989f 100644
--- a/test/ExecutionEngine/test-arith.ll
+++ b/test/ExecutionEngine/test-arith.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = add i8 0, 12		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/test-branch.ll b/test/ExecutionEngine/test-branch.ll
index 7d4fd56..3ae55d0 100644
--- a/test/ExecutionEngine/test-branch.ll
+++ b/test/ExecutionEngine/test-branch.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test unconditional branch
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-call-no-external-funcs.ll b/test/ExecutionEngine/test-call-no-external-funcs.ll
new file mode 100644
index 0000000..0c98379
--- /dev/null
+++ b/test/ExecutionEngine/test-call-no-external-funcs.ll
@@ -0,0 +1,16 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+; XFAIL: mcjit
+
+define i32 @_Z14func_exit_codev() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @_Z14func_exit_codev()
+  ret i32 %call
+}
diff --git a/test/ExecutionEngine/test-call.ll b/test/ExecutionEngine/test-call.ll
index c4131a2..eaadbba 100644
--- a/test/ExecutionEngine/test-call.ll
+++ b/test/ExecutionEngine/test-call.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
+; XFAIL: mcjit
 
 declare void @exit(i32)
 
diff --git a/test/ExecutionEngine/test-cast.ll b/test/ExecutionEngine/test-cast.ll
index f41448c..667fa80 100644
--- a/test/ExecutionEngine/test-cast.ll
+++ b/test/ExecutionEngine/test-cast.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @foo() {
 	ret i32 0
diff --git a/test/ExecutionEngine/test-common-symbols.ll b/test/ExecutionEngine/test-common-symbols.ll
new file mode 100644
index 0000000..52ce7d1
--- /dev/null
+++ b/test/ExecutionEngine/test-common-symbols.ll
@@ -0,0 +1,90 @@
+; RUN: %lli -O0 -disable-lazy-compilation=false %s
+; XFAIL: arm
+; XFAIL: mcjit
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+; 
+; int main()
+; {
+;     zero_arr[zero_int + 5] = 40;
+; 
+;     if (zero_double < 1.0)
+;         zero_arr[zero_int + 2] = 70;
+; 
+;     for (int i = 1; i < 10; ++i) {
+;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+;     }
+;     return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4
+  %add = add nsw i32 %0, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+  store i32 40, i32* %arrayidx, align 4
+  %1 = load double* @zero_double, align 8
+  %cmp = fcmp olt double %1, 1.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4
+  %add1 = add nsw i32 %2, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+  store i32 70, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4
+  %cmp4 = icmp slt i32 %3, 10
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 %4, 1
+  %idxprom5 = sext i32 %sub to i64
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+  %5 = load i32* %arrayidx6, align 4
+  %6 = load i32* %i, align 4
+  %idxprom7 = sext i32 %6 to i64
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+  %7 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %5, %7
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+  store i32 %add9, i32* %arrayidx11, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %cmp12 = icmp eq i32 %10, 110
+  %cond = select i1 %cmp12, i32 0, i32 -1
+  ret i32 %cond
+}
diff --git a/test/ExecutionEngine/test-constantexpr.ll b/test/ExecutionEngine/test-constantexpr.ll
index d6d90e3..d01479a 100644
--- a/test/ExecutionEngine/test-constantexpr.ll
+++ b/test/ExecutionEngine/test-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
 
diff --git a/test/ExecutionEngine/test-fp-no-external-funcs.ll b/test/ExecutionEngine/test-fp-no-external-funcs.ll
new file mode 100644
index 0000000..6b8410a
--- /dev/null
+++ b/test/ExecutionEngine/test-fp-no-external-funcs.ll
@@ -0,0 +1,22 @@
+; RUN: %lli  %s > /dev/null
+; XFAIL: mcjit
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %Q, double* %DP
+	ret double %Y
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index f653660..3411ca1 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -1,4 +1,5 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
+; XFAIL: mcjit
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/test-global-init-nonzero.ll b/test/ExecutionEngine/test-global-init-nonzero.ll
new file mode 100644
index 0000000..a13bfc9
--- /dev/null
+++ b/test/ExecutionEngine/test-global-init-nonzero.ll
@@ -0,0 +1,36 @@
+; RUN: %lli  %s > /dev/null
+; XFAIL: arm
+; XFAIL: mcjit
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-global.ll b/test/ExecutionEngine/test-global.ll
new file mode 100644
index 0000000..ce25cb2
--- /dev/null
+++ b/test/ExecutionEngine/test-global.ll
@@ -0,0 +1,36 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+; XFAIL: mcjit
+
+@count = global i32 0, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-loadstore.ll b/test/ExecutionEngine/test-loadstore.ll
index 7eb57cb..b9b7798 100644
--- a/test/ExecutionEngine/test-loadstore.ll
+++ b/test/ExecutionEngine/test-loadstore.ll
@@ -1,5 +1,6 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
+; XFAIL: mcjit-ia32
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
 	%V = load i8* %P		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/test-local.ll b/test/ExecutionEngine/test-local.ll
new file mode 100644
index 0000000..240b174
--- /dev/null
+++ b/test/ExecutionEngine/test-local.ll
@@ -0,0 +1,35 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %count = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %count, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* %count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-logical.ll b/test/ExecutionEngine/test-logical.ll
index 710763a..05b381b 100644
--- a/test/ExecutionEngine/test-logical.ll
+++ b/test/ExecutionEngine/test-logical.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = and i8 4, 8		; <i8> [#uses=2]
diff --git a/test/ExecutionEngine/test-loop.ll b/test/ExecutionEngine/test-loop.ll
index f0e6f7a..e951a14 100644
--- a/test/ExecutionEngine/test-loop.ll
+++ b/test/ExecutionEngine/test-loop.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/test-phi.ll b/test/ExecutionEngine/test-phi.ll
index c5848a8..c5bdfd5 100644
--- a/test/ExecutionEngine/test-phi.ll
+++ b/test/ExecutionEngine/test-phi.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test phi node
 @Y = global i32 6		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/test-ret.ll b/test/ExecutionEngine/test-ret.ll
index beec399..025f53e 100644
--- a/test/ExecutionEngine/test-ret.ll
+++ b/test/ExecutionEngine/test-ret.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test return instructions
 define void @test1() {
diff --git a/test/ExecutionEngine/test-return.ll b/test/ExecutionEngine/test-return.ll
new file mode 100644
index 0000000..d464a4b
--- /dev/null
+++ b/test/ExecutionEngine/test-return.ll
@@ -0,0 +1,8 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/test-setcond-fp.ll b/test/ExecutionEngine/test-setcond-fp.ll
index d1d6d05..68276e6 100644
--- a/test/ExecutionEngine/test-setcond-fp.ll
+++ b/test/ExecutionEngine/test-setcond-fp.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-setcond-int.ll b/test/ExecutionEngine/test-setcond-int.ll
index f59d325..48dc021 100644
--- a/test/ExecutionEngine/test-setcond-int.ll
+++ b/test/ExecutionEngine/test-setcond-int.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%int1 = add i32 0, 0		; <i32> [#uses=6]
diff --git a/test/ExecutionEngine/test-shift.ll b/test/ExecutionEngine/test-shift.ll
index d0fb90a..590e262 100644
--- a/test/ExecutionEngine/test-shift.ll
+++ b/test/ExecutionEngine/test-shift.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%shamt = add i8 0, 1		; <i8> [#uses=8]
diff --git a/test/Feature/dg.exp b/test/Feature/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Feature/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Feature/float.ll b/test/Feature/float.ll
index 6c6c5dd..b875afe 100644
--- a/test/Feature/float.ll
+++ b/test/Feature/float.ll
@@ -2,5 +2,6 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
+@H1     = global half 0x4010000000000000
 @F1     = global float 0x4010000000000000
 @D1     = global double 0x4010000000000000
diff --git a/test/Feature/lit.local.cfg b/test/Feature/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Feature/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
new file mode 100644
index 0000000..c0fe15e
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -basicaa -gvn -asan -S | FileCheck %s
+; ASAN conflicts with load widening iff the widened load accesses data out of bounds
+; (while the original unwidened loads do not).
+; http://code.google.com/p/address-sanitizer/issues/detail?id=20#c1
+
+
+; 32-bit little endian target.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+%struct_of_7_bytes_4_aligned = type { i32, i8, i8, i8}
+
+@f = global %struct_of_7_bytes_4_aligned zeroinitializer, align 4
+
+; Accessing bytes 4 and 6, not ok to widen to i32 if address_safety is set.
+
+define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone address_safety {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 3), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_bad
+; CHECK: __asan_report_load1
+; CHECK: __asan_report_load1
+; CHECK-NOT: __asan_report
+; We can not use check for "ret" here because __asan_report_load1 calls live after ret.
+; CHECK: end_test_widening_bad
+}
+
+define void @end_test_widening_bad() {
+  entry:
+  ret void
+}
+
+;; Accessing bytes 4 and 5. Ok to widen to i16.
+
+define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone address_safety {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_ok
+; CHECK: __asan_report_load2
+; CHECK-NOT: __asan_report
+; CHECK: end_test_widening_ok
+}
+
+define void @end_test_widening_ok() {
+  entry:
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/bug_11395.ll b/test/Instrumentation/AddressSanitizer/bug_11395.ll
index c53c385..35c5c4a 100644
--- a/test/Instrumentation/AddressSanitizer/bug_11395.ll
+++ b/test/Instrumentation/AddressSanitizer/bug_11395.ll
@@ -36,14 +36,14 @@ target triple = "i386-unknown-linux-gnu"
 @ff_mlp_firorder_7 = external global i8
 @ff_mlp_firorder_8 = external global i8
 
-define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind {
+define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind address_safety {
 entry:
   %mlp_filter_channel = getelementptr inbounds %struct.DSPContext* %c, i32 0, i32 131
   store void (i32*, i32*, i32, i32, i32, i32, i32, i32*)* @mlp_filter_channel_x86, void (i32*, i32*, i32, i32, i32, i32, i32, i32*)** %mlp_filter_channel, align 4, !tbaa !0
   ret void
 }
 
-define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind {
+define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind address_safety {
 entry:
   %filter_shift.addr = alloca i32, align 4
   %mask.addr = alloca i32, align 4
diff --git a/test/Instrumentation/AddressSanitizer/dg.exp b/test/Instrumentation/AddressSanitizer/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Instrumentation/AddressSanitizer/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
new file mode 100644
index 0000000..80f1b1c
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+; AddressSanitizer must insert __asan_handle_no_return
+; before every noreturn call.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @MyNoReturnFunc(i32) noreturn
+
+define i32 @_Z5ChildPv(i8* nocapture %arg) uwtable address_safety {
+entry:
+  call void @MyNoReturnFunc(i32 1) noreturn
+  unreachable
+}
+
+; CHECK:        call void @__asan_handle_no_return
+; CHECK-NEXT:   call void @MyNoReturnFunc
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
new file mode 100644
index 0000000..ba8d65a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+@xxx = global i32 0, align 4
+
+; If a global is present, __asan_[un]register_globals should be called from
+; module ctor/dtor
+
+; CHECK: llvm.global_dtors
+; CHECK: llvm.global_ctors
+
+; CHECK: define internal void @asan.module_ctor
+; CHECK-NOT: ret
+; CHECK: call void @__asan_register_globals
+; CHECK: ret
+
+; CHECK: define internal void @asan.module_dtor
+; CHECK-NOT: ret
+; CHECK: call void @__asan_unregister_globals
+; CHECK: ret
diff --git a/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
new file mode 100644
index 0000000..633bf9a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
@@ -0,0 +1,25 @@
+; Test that AddressSanitizer instruments "(*a)++" only once.
+; RUN: opt < %s -asan -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1
+; RUN: opt < %s -asan -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define void @IncrementMe(i32* %a) address_safety {
+entry:
+  %tmp1 = load i32* %a, align 4
+  %tmp2 = add i32 %tmp1,  1
+  store i32 %tmp2, i32* %a, align 4
+  ret void
+}
+
+; With optimizations enabled we should see only one call to __asan_report_*
+; OPT1: IncrementMe
+; OPT1: __asan_report_
+; OPT1-NOT: __asan_report_
+; OPT1: asan.module_ctor
+
+; Without optimizations we should see two calls to __asan_report_*
+; OPT0: IncrementMe
+; OPT0: __asan_report_
+; OPT0: __asan_report_
+; OPT0: asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/lit.local.cfg b/test/Instrumentation/AddressSanitizer/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
index e26fb3d..fc27de9 100644
--- a/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -asan -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
-define i32 @read_4_bytes(i32* %a) {
+define i32 @read_4_bytes(i32* %a) address_safety {
 entry:
   %tmp1 = load i32* %a, align 4
   ret i32 %tmp1
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
new file mode 100644
index 0000000..33c703b
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @read_4_bytes(i32* %a) {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: @llvm.global_ctors = {{.*}}@__tsan_init
+
+; CHECK: define i32 @read_4_bytes(i32* %a) {
+; CHECK:        call void @__tsan_func_entry(i8* %0)
+; CHECK-NEXT:   %1 = bitcast i32* %a to i8*
+; CHECK-NEXT:   call void @__tsan_read4(i8* %1)
+; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   call void @__tsan_func_exit()
+; CHECK: ret i32
+
+
diff --git a/test/Integer/dg.exp b/test/Integer/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Integer/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Integer/lit.local.cfg b/test/Integer/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Integer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/dg.exp b/test/Linker/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Linker/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Linker/link-type-names.ll b/test/Linker/link-type-names.ll
new file mode 100644
index 0000000..bfc3b64
--- /dev/null
+++ b/test/Linker/link-type-names.ll
@@ -0,0 +1,10 @@
+; RUN: echo "%X = type { i32 } @G2 = global %X { i32 4 }" > %t.ll
+; RUN: llvm-link %s %t.ll -S | FileCheck %s
+; PR11464
+
+%X = type { i32 }
+@G = global %X { i32 4 }
+
+
+; CHECK: @G = global %X { i32 4 }
+; CHECK: @G2 = global %X { i32 4 }
diff --git a/test/Linker/lit.local.cfg b/test/Linker/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Linker/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/module-flags-1-a.ll b/test/Linker/module-flags-1-a.ll
new file mode 100644
index 0000000..973aa80
--- /dev/null
+++ b/test/Linker/module-flags-1-a.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-link %s %p/module-flags-1-b.ll -S -o - | sort | FileCheck %s
+
+; Test basic functionality of module flags.
+
+; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
+; CHECK: !1 = metadata !{i32 1, metadata !"qux", i32 42}
+; CHECK: !2 = metadata !{i32 1, metadata !"mux", metadata !3}
+; CHECK: !3 = metadata !{metadata !"hello world", i32 927}
+; CHECK: !4 = metadata !{i32 2, metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 2, metadata !"bar", i32 42 }
+!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+
+!llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-1-b.ll b/test/Linker/module-flags-1-b.ll
new file mode 100644
index 0000000..bf3f5e5
--- /dev/null
+++ b/test/Linker/module-flags-1-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-1-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"qux", i32 42 }
+!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+
+!llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-2-a.ll b/test/Linker/module-flags-2-a.ll
new file mode 100644
index 0000000..3ae0288
--- /dev/null
+++ b/test/Linker/module-flags-2-a.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-link %s %p/module-flags-2-b.ll -S -o - | sort | FileCheck %s
+
+; Test the 'override' behavior.
+
+; CHECK: !0 = metadata !{i32 4, metadata !"foo", i32 37}
+; CHECK: !llvm.module.flags = !{!0}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 927 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-2-b.ll b/test/Linker/module-flags-2-b.ll
new file mode 100644
index 0000000..ab55e4b
--- /dev/null
+++ b/test/Linker/module-flags-2-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-2-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-3-a.ll b/test/Linker/module-flags-3-a.ll
new file mode 100644
index 0000000..4233a0a
--- /dev/null
+++ b/test/Linker/module-flags-3-a.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link %s %p/module-flags-3-b.ll -S -o - | sort | FileCheck %s
+
+; Test 'require' behavior.
+
+; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
+; CHECK: !1 = metadata !{i32 3, metadata !"foo", metadata !2}
+; CHECK: !2 = metadata !{metadata !"bar", i32 42}
+; CHECK: !3 = metadata !{i32 1, metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !3}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"bar", i32 42 }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-3-b.ll b/test/Linker/module-flags-3-b.ll
new file mode 100644
index 0000000..76be802
--- /dev/null
+++ b/test/Linker/module-flags-3-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-3-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 3, metadata !"foo",
+  metadata !{ metadata !"bar", i32 42 }
+}
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-4-a.ll b/test/Linker/module-flags-4-a.ll
new file mode 100644
index 0000000..f411a56
--- /dev/null
+++ b/test/Linker/module-flags-4-a.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-link %s %p/module-flags-4-b.ll -S -o - |& FileCheck %s
+
+; Test 'require' error.
+
+; CHECK: linking module flags 'bar': does not have the required value
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"bar", i32 927 }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-4-b.ll b/test/Linker/module-flags-4-b.ll
new file mode 100644
index 0000000..3a460bb
--- /dev/null
+++ b/test/Linker/module-flags-4-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-4-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 3, metadata !"foo",
+  metadata !{ metadata !"bar", i32 42 }
+}
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-a.ll b/test/Linker/module-flags-5-a.ll
new file mode 100644
index 0000000..2e59ecc
--- /dev/null
+++ b/test/Linker/module-flags-5-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-5-b.ll -S -o - |& FileCheck %s
+
+; Test the 'override' error.
+
+; CHECK: linking module flags 'foo': IDs have conflicting override values
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 927 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-b.ll b/test/Linker/module-flags-5-b.ll
new file mode 100644
index 0000000..1e99b20
--- /dev/null
+++ b/test/Linker/module-flags-5-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-5-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-a.ll b/test/Linker/module-flags-6-a.ll
new file mode 100644
index 0000000..c3e0225
--- /dev/null
+++ b/test/Linker/module-flags-6-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-6-b.ll -S -o - |& FileCheck %s
+
+; Test module flags error messages.
+
+; CHECK: linking module flags 'foo': IDs have conflicting values
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-b.ll b/test/Linker/module-flags-6-b.ll
new file mode 100644
index 0000000..2bc5a96
--- /dev/null
+++ b/test/Linker/module-flags-6-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-6-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 38 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/multiple-merged-structs.ll b/test/Linker/multiple-merged-structs.ll
new file mode 100644
index 0000000..348cd89
--- /dev/null
+++ b/test/Linker/multiple-merged-structs.ll
@@ -0,0 +1,19 @@
+; RUN: echo {%bug_type = type opaque \
+; RUN:     declare i32 @bug_a(%bug_type*) \
+; RUN:     declare i32 @bug_b(%bug_type*) } > %t.ll
+; RUN: llvm-link %t.ll %s
+; PR11464
+
+%bug_type = type { %bug_type* }
+%bar = type { i32 }
+
+define i32 @bug_a(%bug_type* %fp) nounwind uwtable {
+entry:
+  %d_stream = getelementptr inbounds %bug_type* %fp, i64 0, i32 0
+  ret i32 0
+}
+
+define i32 @bug_b(%bar* %a) nounwind uwtable {
+entry:
+  ret i32 0
+}
diff --git a/test/Linker/visibility1.ll b/test/Linker/visibility1.ll
new file mode 100644
index 0000000..131f6d5
--- /dev/null
+++ b/test/Linker/visibility1.ll
@@ -0,0 +1,46 @@
+; RUN: llvm-link %s %p/visibility2.ll -S | FileCheck %s
+; RUN: llvm-link %p/visibility2.ll %s -S | FileCheck %s
+
+; The values in this file are strong, the ones in visibility2.ll are weak,
+; but we should still get the visibility from them.
+
+; Variables
+; CHECK: @v1 = hidden global i32 0
+@v1 = global i32 0
+
+; CHECK: @v2 = protected  global i32 0
+@v2 = global i32 0
+
+; CHECK: @v3 = hidden global i32 0
+@v3 = protected global i32 0
+
+
+; Aliases
+; CHECK: @a1 = hidden alias i32* @v1
+@a1 = alias i32* @v1
+
+; CHECK: @a2 = protected alias i32* @v2
+@a2 = alias i32* @v2
+
+; CHECK: @a3 = hidden alias i32* @v3
+@a3 = protected alias i32* @v3
+
+
+; Functions
+; CHECK: define hidden void @f1()
+define void @f1()  {
+entry:
+  ret void
+}
+
+; CHECK: define protected void @f2()
+define void @f2()  {
+entry:
+  ret void
+}
+
+; CHECK: define hidden void @f3()
+define protected void @f3()  {
+entry:
+  ret void
+}
diff --git a/test/Linker/visibility2.ll b/test/Linker/visibility2.ll
new file mode 100644
index 0000000..e6363ca
--- /dev/null
+++ b/test/Linker/visibility2.ll
@@ -0,0 +1,27 @@
+; This file is used by visibility1.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+; Variables
+@v1 = weak hidden global i32 0
+@v2 = weak protected global i32 0
+@v3 = weak hidden global i32 0
+
+; Aliases
+@a1 = hidden alias weak i32* @v1
+@a2 = protected alias weak i32* @v2
+@a3 = hidden alias weak i32* @v3
+
+; Functions
+define weak hidden void @f1() {
+entry:
+  ret void
+}
+define weak protected void @f2() {
+entry:
+  ret void
+}
+define weak hidden void @f3() {
+entry:
+  ret void
+}
diff --git a/test/MC/ARM/arm-aliases.s b/test/MC/ARM/arm-aliases.s
new file mode 100644
index 0000000..d4ea0df
--- /dev/null
+++ b/test/MC/ARM/arm-aliases.s
@@ -0,0 +1,17 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+  .syntax unified
+
+@ Shift-by-zero should canonicalize to no shift at all (lsl #0 encoding)
+        add r1, r2, r3, lsl #0
+        sub r1, r2, r3, ror #0
+        eor r1, r2, r3, lsr #0
+        orr r1, r2, r3, asr #0
+        and r1, r2, r3, ror #0
+        bic r1, r2, r3, lsl #0
+
+@ CHECK: add	r1, r2, r3              @ encoding: [0x03,0x10,0x82,0xe0]
+@ CHECK: sub	r1, r2, r3              @ encoding: [0x03,0x10,0x42,0xe0]
+@ CHECK: eor	r1, r2, r3              @ encoding: [0x03,0x10,0x22,0xe0]
+@ CHECK: orr	r1, r2, r3              @ encoding: [0x03,0x10,0x82,0xe1]
+@ CHECK: and	r1, r2, r3              @ encoding: [0x03,0x10,0x02,0xe0]
+@ CHECK: bic	r1, r2, r3              @ encoding: [0x03,0x10,0xc2,0xe1]
diff --git a/test/MC/ARM/arm-it-block.s b/test/MC/ARM/arm-it-block.s
new file mode 100644
index 0000000..e5e5491
--- /dev/null
+++ b/test/MC/ARM/arm-it-block.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+  .syntax unified
+  .globl _func
+
+_func:
+@ CHECK: _func:
+        it eq
+        moveq r2, r3
+@ 'it' is parsed but not encoded.
+@ CHECK-NOT: it
+@ CHECK: moveq	r2, r3          @ encoding: [0x03,0x20,0xa0,0x01]
diff --git a/test/MC/ARM/arm-memory-instructions.s b/test/MC/ARM/arm-memory-instructions.s
index 783ac28..d8d9130 100644
--- a/test/MC/ARM/arm-memory-instructions.s
+++ b/test/MC/ARM/arm-memory-instructions.s
@@ -130,8 +130,13 @@ _func:
 
 
 @------------------------------------------------------------------------------
-@ FIXME: LDRD (label)
+@ LDRD (label)
 @------------------------------------------------------------------------------
+        ldrd r2, r3, Lbaz
+Lbaz: .quad 0
+
+@ CHECK: ldrd	r2, r3, Lbaz            @ encoding: [0xd0'A',0x20'A',0x4f'A',0xe1'A']
+
 
 @------------------------------------------------------------------------------
 @ LDRD (register)
diff --git a/test/MC/ARM/arm_fixups.s b/test/MC/ARM/arm_fixups.s
index aba0cd8..2f34748 100644
--- a/test/MC/ARM/arm_fixups.s
+++ b/test/MC/ARM/arm_fixups.s
@@ -3,7 +3,7 @@
 
     bl _printf
 @ CHECK: bl _printf @ encoding: [A,A,A,0xeb]
-@ CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_uncondbranch
+@ CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_bl
 
     mov r9, :lower16:(_foo)
     movw r9, :lower16:(_foo)
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 080bc6f..4ae1ac7 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -388,9 +388,9 @@ Lforward:
         blx	#16212288
 
 @ CHECK: bl  _bar @ encoding: [A,A,A,0xeb]
-@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbranch
+@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_bl
 @ CHECK: blx	_bar @ encoding: [A,A,A,0xfa]
-           @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbranch
+           @   fixup A - offset: 0, value: _bar, kind: fixup_arm_blx
 @ CHECK: blls	#28634268               @ encoding: [0x27,0x3b,0x6d,0x9b]
 @ CHECK: blx	#32424576               @ encoding: [0xa0,0xb0,0x7b,0xfa]
 @ CHECK: blx	#16212288               @ encoding: [0x50,0xd8,0x3d,0xfa]
@@ -904,11 +904,28 @@ Lforward:
         movs r2, r3
         moveq r2, r3
         movseq r2, r3
+        mov r12, r8, lsl #(2 - 2)
+        lsl r2, r3, #(2 - 2)
+        mov r12, r8, lsr #(2 - 2)
+        lsr r2, r3, #(2 - 2)
+        mov r12, r8, asr #(2 - 2)
+        asr r2, r3, #(2 - 2)
+        mov r12, r8, ror #(2 - 2)
+        ror r2, r3, #(2 - 2)
 
 @ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
 @ CHECK: movs	r2, r3                  @ encoding: [0x03,0x20,0xb0,0xe1]
 @ CHECK: moveq	r2, r3                  @ encoding: [0x03,0x20,0xa0,0x01]
 @ CHECK: movseq	r2, r3                  @ encoding: [0x03,0x20,0xb0,0x01]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+
 
 @------------------------------------------------------------------------------
 @ MOVT
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index be640f0..78311af 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -1155,11 +1155,37 @@ _func:
         mov r6, r2, lsr #16
         movs r6, r2, asr #32
         movs r6, r2, ror #5
+        movs r4, r4, lsl r5
+        movs r4, r4, lsr r5
+        movs r4, r4, asr r5
+        movs r4, r4, ror r5
+        mov r4, r4, lsl r5
+        movs r4, r4, ror r8
+        movs r4, r5, lsr r6
+        itttt eq
+        moveq r4, r4, lsl r5
+        moveq r4, r4, lsr r5
+        moveq r4, r4, asr r5
+        moveq r4, r4, ror r5
+        mov r4, r4, rrx
 
 @ CHECK: lsl.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x02,0x46]
 @ CHECK: lsr.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x12,0x46]
 @ CHECK: asrs	r6, r2, #32             @ encoding: [0x16,0x10]
 @ CHECK: rors.w	r6, r2, #5              @ encoding: [0x5f,0xea,0x72,0x16]
+@ CHECK: lsls	r4, r5                  @ encoding: [0xac,0x40]
+@ CHECK: lsrs	r4, r5                  @ encoding: [0xec,0x40]
+@ CHECK: asrs	r4, r5                  @ encoding: [0x2c,0x41]
+@ CHECK: rors	r4, r5                  @ encoding: [0xec,0x41]
+@ CHECK: lsl.w	r4, r4, r5              @ encoding: [0x04,0xfa,0x05,0xf4]
+@ CHECK: rors.w	r4, r4, r8              @ encoding: [0x74,0xfa,0x08,0xf4]
+@ CHECK: lsrs.w	r4, r5, r6              @ encoding: [0x35,0xfa,0x06,0xf4]
+@ CHECK: itttt	eq                      @ encoding: [0x01,0xbf]
+@ CHECK: lsleq	r4, r5                  @ encoding: [0xac,0x40]
+@ CHECK: lsreq	r4, r5                  @ encoding: [0xec,0x40]
+@ CHECK: asreq	r4, r5                  @ encoding: [0x2c,0x41]
+@ CHECK: roreq	r4, r5                  @ encoding: [0xec,0x41]
+@ CHECK: rrx	r4, r4                  @ encoding: [0x4f,0xea,0x34,0x04]
 
 
 @------------------------------------------------------------------------------
@@ -3297,3 +3323,30 @@ _func:
 @ CHECK: wfelt                          @ encoding: [0x20,0xbf]
 @ CHECK: wfige                          @ encoding: [0x30,0xbf]
 @ CHECK: yieldlt                        @ encoding: [0x10,0xbf]
+
+
+@------------------------------------------------------------------------------
+@ Alternate syntax for LDR*(literal) encodings
+@------------------------------------------------------------------------------
+        ldr r11, [pc, #-22]
+        ldrb r11, [pc, #-22]
+        ldrh r11, [pc, #-22]
+        ldrsb r11, [pc, #-22]
+        ldrsh r11, [pc, #-22]
+
+        ldr.w r11, [pc, #-22]
+        ldrb.w r11, [pc, #-22]
+        ldrh.w r11, [pc, #-22]
+        ldrsb.w r11, [pc, #-22]
+        ldrsh.w r11, [pc, #-22]
+
+@ CHECK: ldr.w	r11, [pc, #-22]         @ encoding: [0x5f,0xf8,0x16,0xb0]
+@ CHECK: ldrb.w	r11, [pc, #-22]         @ encoding: [0x1f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w	r11, [pc, #-22]         @ encoding: [0x3f,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #-22]        @ encoding: [0x1f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
+@ CHECK: ldr.w	r11, [pc, #-22]         @ encoding: [0x5f,0xf8,0x16,0xb0]
+@ CHECK: ldrb.w	r11, [pc, #-22]         @ encoding: [0x1f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w	r11, [pc, #-22]         @ encoding: [0x3f,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #-22]        @ encoding: [0x1f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
diff --git a/test/MC/ARM/cxx-global-constructor.ll b/test/MC/ARM/cxx-global-constructor.ll
new file mode 100644
index 0000000..e06d2c7
--- /dev/null
+++ b/test/MC/ARM/cxx-global-constructor.ll
@@ -0,0 +1,12 @@
+; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic \
+; RUN: -filetype=obj -o - | elf-dump --dump-section-data | FileCheck %s
+
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
+
+define void @f() {
+  ret void
+}
+
+; Check for a relocation of type R_ARM_TARGET1.
+; CHECK: ('r_type', 0x26)
diff --git a/test/MC/ARM/dg.exp b/test/MC/ARM/dg.exp
deleted file mode 100644
index 055fa25..0000000
--- a/test/MC/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
new file mode 100644
index 0000000..4a311dd
--- /dev/null
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -0,0 +1,23 @@
+@@ test st_value bit 0 of thumb function
+@ RUN: llvm-mc %s -triple=arm-freebsd-eabi -filetype=obj -o - | \
+@ RUN: elf-dump  | FileCheck %s
+
+
+	.syntax unified
+        .text
+        .globl  f
+        .align  2
+        .type   f,%function
+        .code   16
+        .thumb_func
+f:
+        push    {r7, lr}
+        mov     r7, sp
+        bl      g
+        pop     {r7, pc}
+
+@@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
+@CHECK:        ('_relocations', [
+@CHECK:         (('r_offset', 0x00000004)
+@CHECK-NEXT:     ('r_sym', 0x{{[0-9a-fA-F]+}})
+@CHECK-NEXT:     ('r_type', 0x0a)
diff --git a/test/MC/ARM/lit.local.cfg b/test/MC/ARM/lit.local.cfg
new file mode 100644
index 0000000..92d3ff3
--- /dev/null
+++ b/test/MC/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ARM/neon-minmax-encoding.s b/test/MC/ARM/neon-minmax-encoding.s
index 2d0d8c9..b1eb258 100644
--- a/test/MC/ARM/neon-minmax-encoding.s
+++ b/test/MC/ARM/neon-minmax-encoding.s
@@ -1,58 +1,124 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf2]
-	vmin.s8	d16, d16, d17
-@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf2]
-	vmin.s16	d16, d16, d17
-@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf2]
-	vmin.s32	d16, d16, d17
-@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf3]
-	vmin.u8	d16, d16, d17
-@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf3]
-	vmin.u16	d16, d16, d17
-@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf3]
-	vmin.u32	d16, d16, d17
-@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xf2]
-	vmin.f32	d16, d16, d17
-@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf2]
-	vmin.s8	q8, q8, q9
-@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf2]
-	vmin.s16	q8, q8, q9
-@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
-	vmin.s32	q8, q8, q9
-@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf3]
-	vmin.u8	q8, q8, q9
-@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf3]
-	vmin.u16	q8, q8, q9
-@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf3]
-	vmin.u32	q8, q8, q9
-@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x60,0xf2]
-	vmin.f32	q8, q8, q9
-@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf2]
-	vmax.s8	d16, d16, d17
-@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf2]
-	vmax.s16	d16, d16, d17
-@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf2]
-	vmax.s32	d16, d16, d17
-@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf3]
-	vmax.u8	d16, d16, d17
-@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf3]
-	vmax.u16	d16, d16, d17
-@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf3]
-	vmax.u32	d16, d16, d17
-@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xf2]
-	vmax.f32	d16, d16, d17
-@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf2]
-	vmax.s8	q8, q8, q9
-@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf2]
-	vmax.s16	q8, q8, q9
+        vmax.s8 d1, d2, d3
+        vmax.s16 d4, d5, d6
+        vmax.s32 d7, d8, d9
+        vmax.u8 d10, d11, d12
+        vmax.u16 d13, d14, d15
+        vmax.u32 d16, d17, d18
+        vmax.f32 d19, d20, d21
+
+        vmax.s8 d2, d3
+        vmax.s16 d5, d6
+        vmax.s32 d8, d9
+        vmax.u8 d11, d12
+        vmax.u16 d14, d15
+        vmax.u32 d17, d18
+        vmax.f32 d20, d21
+
+        vmax.s8 q1, q2, q3
+        vmax.s16 q4, q5, q6
+        vmax.s32 q7, q8, q9
+        vmax.u8 q10, q11, q12
+        vmax.u16 q13, q14, q15
+        vmax.u32 q6, q7, q8
+        vmax.f32 q9, q5, q1
+
+        vmax.s8 q2, q3
+        vmax.s16 q5, q6
+        vmax.s32 q8, q9
+        vmax.u8 q11, q2
+        vmax.u16 q4, q5
+        vmax.u32 q7, q8
+        vmax.f32 q2, q1
+
+@ CHECK: vmax.s8	d1, d2, d3      @ encoding: [0x03,0x16,0x02,0xf2]
+@ CHECK: vmax.s16	d4, d5, d6      @ encoding: [0x06,0x46,0x15,0xf2]
+@ CHECK: vmax.s32	d7, d8, d9      @ encoding: [0x09,0x76,0x28,0xf2]
+@ CHECK: vmax.u8	d10, d11, d12   @ encoding: [0x0c,0xa6,0x0b,0xf3]
+@ CHECK: vmax.u16	d13, d14, d15   @ encoding: [0x0f,0xd6,0x1e,0xf3]
+@ CHECK: vmax.u32	d16, d17, d18   @ encoding: [0xa2,0x06,0x61,0xf3]
+@ CHECK: vmax.f32	d19, d20, d21   @ encoding: [0xa5,0x3f,0x44,0xf2]
+@ CHECK: vmax.s8	d2, d2, d3      @ encoding: [0x03,0x26,0x02,0xf2]
+@ CHECK: vmax.s16	d5, d5, d6      @ encoding: [0x06,0x56,0x15,0xf2]
+@ CHECK: vmax.s32	d8, d8, d9      @ encoding: [0x09,0x86,0x28,0xf2]
+@ CHECK: vmax.u8	d11, d11, d12   @ encoding: [0x0c,0xb6,0x0b,0xf3]
+@ CHECK: vmax.u16	d14, d14, d15   @ encoding: [0x0f,0xe6,0x1e,0xf3]
+@ CHECK: vmax.u32	d17, d17, d18   @ encoding: [0xa2,0x16,0x61,0xf3]
+@ CHECK: vmax.f32	d20, d20, d21   @ encoding: [0xa5,0x4f,0x44,0xf2]
+@ CHECK: vmax.s8	q1, q2, q3      @ encoding: [0x46,0x26,0x04,0xf2]
+@ CHECK: vmax.s16	q4, q5, q6      @ encoding: [0x4c,0x86,0x1a,0xf2]
+@ CHECK: vmax.s32	q7, q8, q9      @ encoding: [0xe2,0xe6,0x20,0xf2]
+@ CHECK: vmax.u8	q10, q11, q12   @ encoding: [0xe8,0x46,0x46,0xf3]
+@ CHECK: vmax.u16	q13, q14, q15   @ encoding: [0xee,0xa6,0x5c,0xf3]
+@ CHECK: vmax.u32	q6, q7, q8      @ encoding: [0x60,0xc6,0x2e,0xf3]
+@ CHECK: vmax.f32	q9, q5, q1      @ encoding: [0x42,0x2f,0x4a,0xf2]
+@ CHECK: vmax.s8	q2, q2, q3      @ encoding: [0x46,0x46,0x04,0xf2]
+@ CHECK: vmax.s16	q5, q5, q6      @ encoding: [0x4c,0xa6,0x1a,0xf2]
 @ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf2]
-	vmax.s32	q8, q8, q9
-@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf3]
-	vmax.u8	q8, q8, q9
-@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf3]
-	vmax.u16	q8, q8, q9
-@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf3]
-	vmax.u32	q8, q8, q9
-@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x40,0xf2]
-	vmax.f32	q8, q8, q9
+@ CHECK: vmax.u8	q11, q11, q2    @ encoding: [0xc4,0x66,0x46,0xf3]
+@ CHECK: vmax.u16	q4, q4, q5      @ encoding: [0x4a,0x86,0x18,0xf3]
+@ CHECK: vmax.u32	q7, q7, q8      @ encoding: [0x60,0xe6,0x2e,0xf3]
+@ CHECK: vmax.f32	q2, q2, q1      @ encoding: [0x42,0x4f,0x04,0xf2]
+
+
+        vmin.s8 d1, d2, d3
+        vmin.s16 d4, d5, d6
+        vmin.s32 d7, d8, d9
+        vmin.u8 d10, d11, d12
+        vmin.u16 d13, d14, d15
+        vmin.u32 d16, d17, d18
+        vmin.f32 d19, d20, d21
+
+        vmin.s8 d2, d3
+        vmin.s16 d5, d6
+        vmin.s32 d8, d9
+        vmin.u8 d11, d12
+        vmin.u16 d14, d15
+        vmin.u32 d17, d18
+        vmin.f32 d20, d21
+
+        vmin.s8 q1, q2, q3
+        vmin.s16 q4, q5, q6
+        vmin.s32 q7, q8, q9
+        vmin.u8 q10, q11, q12
+        vmin.u16 q13, q14, q15
+        vmin.u32 q6, q7, q8
+        vmin.f32 q9, q5, q1
+
+        vmin.s8 q2, q3
+        vmin.s16 q5, q6
+        vmin.s32 q8, q9
+        vmin.u8 q11, q2
+        vmin.u16 q4, q5
+        vmin.u32 q7, q8
+        vmin.f32 q2, q1
+
+@ CHECK: vmin.s8	d1, d2, d3      @ encoding: [0x13,0x16,0x02,0xf2]
+@ CHECK: vmin.s16	d4, d5, d6      @ encoding: [0x16,0x46,0x15,0xf2]
+@ CHECK: vmin.s32	d7, d8, d9      @ encoding: [0x19,0x76,0x28,0xf2]
+@ CHECK: vmin.u8	d10, d11, d12   @ encoding: [0x1c,0xa6,0x0b,0xf3]
+@ CHECK: vmin.u16	d13, d14, d15   @ encoding: [0x1f,0xd6,0x1e,0xf3]
+@ CHECK: vmin.u32	d16, d17, d18   @ encoding: [0xb2,0x06,0x61,0xf3]
+@ CHECK: vmin.f32	d19, d20, d21   @ encoding: [0xa5,0x3f,0x64,0xf2]
+@ CHECK: vmin.s8	d2, d2, d3      @ encoding: [0x13,0x26,0x02,0xf2]
+@ CHECK: vmin.s16	d5, d5, d6      @ encoding: [0x16,0x56,0x15,0xf2]
+@ CHECK: vmin.s32	d8, d8, d9      @ encoding: [0x19,0x86,0x28,0xf2]
+@ CHECK: vmin.u8	d11, d11, d12   @ encoding: [0x1c,0xb6,0x0b,0xf3]
+@ CHECK: vmin.u16	d14, d14, d15   @ encoding: [0x1f,0xe6,0x1e,0xf3]
+@ CHECK: vmin.u32	d17, d17, d18   @ encoding: [0xb2,0x16,0x61,0xf3]
+@ CHECK: vmin.f32	d20, d20, d21   @ encoding: [0xa5,0x4f,0x64,0xf2]
+@ CHECK: vmin.s8	q1, q2, q3      @ encoding: [0x56,0x26,0x04,0xf2]
+@ CHECK: vmin.s16	q4, q5, q6      @ encoding: [0x5c,0x86,0x1a,0xf2]
+@ CHECK: vmin.s32	q7, q8, q9      @ encoding: [0xf2,0xe6,0x20,0xf2]
+@ CHECK: vmin.u8	q10, q11, q12   @ encoding: [0xf8,0x46,0x46,0xf3]
+@ CHECK: vmin.u16	q13, q14, q15   @ encoding: [0xfe,0xa6,0x5c,0xf3]
+@ CHECK: vmin.u32	q6, q7, q8      @ encoding: [0x70,0xc6,0x2e,0xf3]
+@ CHECK: vmin.f32	q9, q5, q1      @ encoding: [0x42,0x2f,0x6a,0xf2]
+@ CHECK: vmin.s8	q2, q2, q3      @ encoding: [0x56,0x46,0x04,0xf2]
+@ CHECK: vmin.s16	q5, q5, q6      @ encoding: [0x5c,0xa6,0x1a,0xf2]
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
+@ CHECK: vmin.u8	q11, q11, q2    @ encoding: [0xd4,0x66,0x46,0xf3]
+@ CHECK: vmin.u16	q4, q4, q5      @ encoding: [0x5a,0x86,0x18,0xf3]
+@ CHECK: vmin.u32	q7, q7, q8      @ encoding: [0x70,0xe6,0x2e,0xf3]
+@ CHECK: vmin.f32	q2, q2, q1      @ encoding: [0x42,0x4f,0x24,0xf2]
diff --git a/test/MC/ARM/neon-pairwise-encoding.s b/test/MC/ARM/neon-pairwise-encoding.s
index 65c47bd..b1e86aa 100644
--- a/test/MC/ARM/neon-pairwise-encoding.s
+++ b/test/MC/ARM/neon-pairwise-encoding.s
@@ -8,6 +8,16 @@
 	vpadd.i32	d16, d17, d16
 @ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
 	vpadd.f32	d16, d16, d17
+
+@ CHECK: vpadd.i8	d17, d17, d16   @ encoding: [0xb0,0x1b,0x41,0xf2]
+	vpadd.i8	d17, d16
+@ CHECK: vpadd.i16	d17, d17, d16   @ encoding: [0xb0,0x1b,0x51,0xf2]
+	vpadd.i16	d17, d16
+@ CHECK: vpadd.i32	d17, d17, d16   @ encoding: [0xb0,0x1b,0x61,0xf2]
+	vpadd.i32	d17, d16
+@ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
+	vpadd.f32	d16, d17
+
 @ CHECK: vpaddl.s8	d16, d16        @ encoding: [0x20,0x02,0xf0,0xf3]
 	vpaddl.s8	d16, d16
 @ CHECK: vpaddl.s16	d16, d16        @ encoding: [0x20,0x02,0xf4,0xf3]
diff --git a/test/MC/ARM/neon-shift-encoding.s b/test/MC/ARM/neon-shift-encoding.s
index 7e4b543..cd450a8 100644
--- a/test/MC/ARM/neon-shift-encoding.s
+++ b/test/MC/ARM/neon-shift-encoding.s
@@ -105,70 +105,151 @@ _foo:
 @ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
 @ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
 
-@ CHECK: vsra.u8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf3]
-	vsra.u8   d16, d16, #7
-@ CHECK: vsra.u16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf3]
-	vsra.u16  d16, d16, #15
-@ CHECK: vsra.u32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf3]
-	vsra.u32  d16, d16, #31
-@ CHECK: vsra.u64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf3]
-	vsra.u64  d16, d16, #63
-@ CHECK: vsra.u8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf3]
-	vsra.u8   q8, q8, #7
-@ CHECK: vsra.u16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf3]
-	vsra.u16  q8, q8, #15
-@ CHECK: vsra.u32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf3]
-	vsra.u32  q8, q8, #31
-@ CHECK: vsra.u64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf3]
-	vsra.u64  q8, q8, #63
-@ CHECK: vsra.s8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf2]
-	vsra.s8   d16, d16, #7
-@ CHECK: vsra.s16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf2]
-	vsra.s16  d16, d16, #15
-@ CHECK: vsra.s32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf2]
-	vsra.s32  d16, d16, #31
-@ CHECK: vsra.s64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf2]
-	vsra.s64  d16, d16, #63
-@ CHECK: vsra.s8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf2]
-	vsra.s8   q8, q8, #7
-@ CHECK: vsra.s16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf2]
-	vsra.s16  q8, q8, #15
-@ CHECK: vsra.s32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf2]
-	vsra.s32  q8, q8, #31
-@ CHECK: vsra.s64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf2]
-	vsra.s64  q8, q8, #63
-@ CHECK: vsri.8   d16, d16, #7  @ encoding: [0x30,0x04,0xc9,0xf3]
-	vsri.8   d16, d16, #7
-@ CHECK: vsri.16  d16, d16, #15 @ encoding: [0x30,0x04,0xd1,0xf3]
-	vsri.16  d16, d16, #15
-@ CHECK: vsri.32  d16, d16, #31 @ encoding: [0x30,0x04,0xe1,0xf3]
-	vsri.32  d16, d16, #31
-@ CHECK: vsri.64  d16, d16, #63 @ encoding: [0xb0,0x04,0xc1,0xf3]
-	vsri.64  d16, d16, #63
-@ CHECK: vsri.8   q8, q8, #7    @ encoding: [0x70,0x04,0xc9,0xf3]
-	vsri.8   q8, q8, #7
-@ CHECK: vsri.16  q8, q8, #15   @ encoding: [0x70,0x04,0xd1,0xf3]
-	vsri.16  q8, q8, #15
-@ CHECK: vsri.32  q8, q8, #31   @ encoding: [0x70,0x04,0xe1,0xf3]
-	vsri.32  q8, q8, #31
-@ CHECK: vsri.64  q8, q8, #63   @ encoding: [0xf0,0x04,0xc1,0xf3]
-	vsri.64  q8, q8, #63
-@ CHECK: vsli.8   d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf3]
-	vsli.8   d16, d16, #7
-@ CHECK: vsli.16  d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf3]
-	vsli.16  d16, d16, #15
-@ CHECK: vsli.32  d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf3]
-	vsli.32  d16, d16, #31
-@ CHECK: vsli.64  d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf3]
-	vsli.64  d16, d16, #63
-@ CHECK: vsli.8   q8, q8, #7    @ encoding: [0x70,0x05,0xcf,0xf3]
-	vsli.8   q8, q8, #7
-@ CHECK: vsli.16  q8, q8, #15   @ encoding: [0x70,0x05,0xdf,0xf3]
-	vsli.16  q8, q8, #15
-@ CHECK: vsli.32  q8, q8, #31   @ encoding: [0x70,0x05,0xff,0xf3]
-	vsli.32  q8, q8, #31
-@ CHECK: vsli.64  q8, q8, #63   @ encoding: [0xf0,0x05,0xff,0xf3]
-	vsli.64  q8, q8, #63
+
+	vsra.s8   d16, d6, #7
+	vsra.s16  d26, d18, #15
+	vsra.s32  d11, d10, #31
+	vsra.s64  d12, d19, #63
+	vsra.s8   q1, q8, #7
+	vsra.s16  q2, q7, #15
+	vsra.s32  q3, q6, #31
+	vsra.s64  q4, q5, #63
+
+	vsra.s8   d16, #7
+	vsra.s16  d15, #15
+	vsra.s32  d14, #31
+	vsra.s64  d13, #63
+	vsra.s8   q4, #7
+	vsra.s16  q5, #15
+	vsra.s32  q6, #31
+	vsra.s64  q7, #63
+
+@ CHECK: vsra.s8	d16, d6, #7     @ encoding: [0x16,0x01,0xc9,0xf2]
+@ CHECK: vsra.s16	d26, d18, #15   @ encoding: [0x32,0xa1,0xd1,0xf2]
+@ CHECK: vsra.s32	d11, d10, #31   @ encoding: [0x1a,0xb1,0xa1,0xf2]
+@ CHECK: vsra.s64	d12, d19, #63   @ encoding: [0xb3,0xc1,0x81,0xf2]
+@ CHECK: vsra.s8	q1, q8, #7      @ encoding: [0x70,0x21,0x89,0xf2]
+@ CHECK: vsra.s16	q2, q7, #15     @ encoding: [0x5e,0x41,0x91,0xf2]
+@ CHECK: vsra.s32	q3, q6, #31     @ encoding: [0x5c,0x61,0xa1,0xf2]
+@ CHECK: vsra.s64	q4, q5, #63     @ encoding: [0xda,0x81,0x81,0xf2]
+@ CHECK: vsra.s8	d16, d16, #7    @ encoding: [0x30,0x01,0xc9,0xf2]
+@ CHECK: vsra.s16	d15, d15, #15   @ encoding: [0x1f,0xf1,0x91,0xf2]
+@ CHECK: vsra.s32	d14, d14, #31   @ encoding: [0x1e,0xe1,0xa1,0xf2]
+@ CHECK: vsra.s64	d13, d13, #63   @ encoding: [0x9d,0xd1,0x81,0xf2]
+@ CHECK: vsra.s8	q4, q4, #7      @ encoding: [0x58,0x81,0x89,0xf2]
+@ CHECK: vsra.s16	q5, q5, #15     @ encoding: [0x5a,0xa1,0x91,0xf2]
+@ CHECK: vsra.s32	q6, q6, #31     @ encoding: [0x5c,0xc1,0xa1,0xf2]
+@ CHECK: vsra.s64	q7, q7, #63     @ encoding: [0xde,0xe1,0x81,0xf2]
+
+
+	vsra.u8   d16, d6, #7
+	vsra.u16  d26, d18, #15
+	vsra.u32  d11, d10, #31
+	vsra.u64  d12, d19, #63
+	vsra.u8   q1, q8, #7
+	vsra.u16  q2, q7, #15
+	vsra.u32  q3, q6, #31
+	vsra.u64  q4, q5, #63
+
+	vsra.u8   d16, #7
+	vsra.u16  d15, #15
+	vsra.u32  d14, #31
+	vsra.u64  d13, #63
+	vsra.u8   q4, #7
+	vsra.u16  q5, #15
+	vsra.u32  q6, #31
+	vsra.u64  q7, #63
+
+@ CHECK: vsra.u8	d16, d6, #7     @ encoding: [0x16,0x01,0xc9,0xf3]
+@ CHECK: vsra.u16	d26, d18, #15   @ encoding: [0x32,0xa1,0xd1,0xf3]
+@ CHECK: vsra.u32	d11, d10, #31   @ encoding: [0x1a,0xb1,0xa1,0xf3]
+@ CHECK: vsra.u64	d12, d19, #63   @ encoding: [0xb3,0xc1,0x81,0xf3]
+@ CHECK: vsra.u8	q1, q8, #7      @ encoding: [0x70,0x21,0x89,0xf3]
+@ CHECK: vsra.u16	q2, q7, #15     @ encoding: [0x5e,0x41,0x91,0xf3]
+@ CHECK: vsra.u32	q3, q6, #31     @ encoding: [0x5c,0x61,0xa1,0xf3]
+@ CHECK: vsra.u64	q4, q5, #63     @ encoding: [0xda,0x81,0x81,0xf3]
+@ CHECK: vsra.u8	d16, d16, #7    @ encoding: [0x30,0x01,0xc9,0xf3]
+@ CHECK: vsra.u16	d15, d15, #15   @ encoding: [0x1f,0xf1,0x91,0xf3]
+@ CHECK: vsra.u32	d14, d14, #31   @ encoding: [0x1e,0xe1,0xa1,0xf3]
+@ CHECK: vsra.u64	d13, d13, #63   @ encoding: [0x9d,0xd1,0x81,0xf3]
+@ CHECK: vsra.u8	q4, q4, #7      @ encoding: [0x58,0x81,0x89,0xf3]
+@ CHECK: vsra.u16	q5, q5, #15     @ encoding: [0x5a,0xa1,0x91,0xf3]
+@ CHECK: vsra.u32	q6, q6, #31     @ encoding: [0x5c,0xc1,0xa1,0xf3]
+@ CHECK: vsra.u64	q7, q7, #63     @ encoding: [0xde,0xe1,0x81,0xf3]
+
+
+	vsri.8   d16, d6, #7
+	vsri.16  d26, d18, #15
+	vsri.32  d11, d10, #31
+	vsri.64  d12, d19, #63
+	vsri.8   q1, q8, #7
+	vsri.16  q2, q7, #15
+	vsri.32  q3, q6, #31
+	vsri.64  q4, q5, #63
+
+	vsri.8   d16, #7
+	vsri.16  d15, #15
+	vsri.32  d14, #31
+	vsri.64  d13, #63
+	vsri.8   q4, #7
+	vsri.16  q5, #15
+	vsri.32  q6, #31
+	vsri.64  q7, #63
+
+@ CHECK: vsri.8	d16, d6, #7             @ encoding: [0x16,0x04,0xc9,0xf3]
+@ CHECK: vsri.16 d26, d18, #15          @ encoding: [0x32,0xa4,0xd1,0xf3]
+@ CHECK: vsri.32 d11, d10, #31          @ encoding: [0x1a,0xb4,0xa1,0xf3]
+@ CHECK: vsri.64 d12, d19, #63          @ encoding: [0xb3,0xc4,0x81,0xf3]
+@ CHECK: vsri.8	q1, q8, #7              @ encoding: [0x70,0x24,0x89,0xf3]
+@ CHECK: vsri.16 q2, q7, #15            @ encoding: [0x5e,0x44,0x91,0xf3]
+@ CHECK: vsri.32 q3, q6, #31            @ encoding: [0x5c,0x64,0xa1,0xf3]
+@ CHECK: vsri.64 q4, q5, #63            @ encoding: [0xda,0x84,0x81,0xf3]
+@ CHECK: vsri.8	d16, d16, #7            @ encoding: [0x30,0x04,0xc9,0xf3]
+@ CHECK: vsri.16 d15, d15, #15          @ encoding: [0x1f,0xf4,0x91,0xf3]
+@ CHECK: vsri.32 d14, d14, #31          @ encoding: [0x1e,0xe4,0xa1,0xf3]
+@ CHECK: vsri.64 d13, d13, #63          @ encoding: [0x9d,0xd4,0x81,0xf3]
+@ CHECK: vsri.8	q4, q4, #7              @ encoding: [0x58,0x84,0x89,0xf3]
+@ CHECK: vsri.16 q5, q5, #15            @ encoding: [0x5a,0xa4,0x91,0xf3]
+@ CHECK: vsri.32 q6, q6, #31            @ encoding: [0x5c,0xc4,0xa1,0xf3]
+@ CHECK: vsri.64 q7, q7, #63            @ encoding: [0xde,0xe4,0x81,0xf3]
+
+
+	vsli.8   d16, d6, #7
+	vsli.16  d26, d18, #15
+	vsli.32  d11, d10, #31
+	vsli.64  d12, d19, #63
+	vsli.8   q1, q8, #7
+	vsli.16  q2, q7, #15
+	vsli.32  q3, q6, #31
+	vsli.64  q4, q5, #63
+
+	vsli.8   d16, #7
+	vsli.16  d15, #15
+	vsli.32  d14, #31
+	vsli.64  d13, #63
+	vsli.8   q4, #7
+	vsli.16  q5, #15
+	vsli.32  q6, #31
+	vsli.64  q7, #63
+
+@ CHECK: vsli.8	d16, d6, #7             @ encoding: [0x16,0x05,0xcf,0xf3]
+@ CHECK: vsli.16 d26, d18, #15          @ encoding: [0x32,0xa5,0xdf,0xf3]
+@ CHECK: vsli.32 d11, d10, #31          @ encoding: [0x1a,0xb5,0xbf,0xf3]
+@ CHECK: vsli.64 d12, d19, #63          @ encoding: [0xb3,0xc5,0xbf,0xf3]
+@ CHECK: vsli.8	q1, q8, #7              @ encoding: [0x70,0x25,0x8f,0xf3]
+@ CHECK: vsli.16 q2, q7, #15            @ encoding: [0x5e,0x45,0x9f,0xf3]
+@ CHECK: vsli.32 q3, q6, #31            @ encoding: [0x5c,0x65,0xbf,0xf3]
+@ CHECK: vsli.64 q4, q5, #63            @ encoding: [0xda,0x85,0xbf,0xf3]
+@ CHECK: vsli.8	d16, d16, #7            @ encoding: [0x30,0x05,0xcf,0xf3]
+@ CHECK: vsli.16 d15, d15, #15          @ encoding: [0x1f,0xf5,0x9f,0xf3]
+@ CHECK: vsli.32 d14, d14, #31          @ encoding: [0x1e,0xe5,0xbf,0xf3]
+@ CHECK: vsli.64 d13, d13, #63          @ encoding: [0x9d,0xd5,0xbf,0xf3]
+@ CHECK: vsli.8	q4, q4, #7              @ encoding: [0x58,0x85,0x8f,0xf3]
+@ CHECK: vsli.16 q5, q5, #15            @ encoding: [0x5a,0xa5,0x9f,0xf3]
+@ CHECK: vsli.32 q6, q6, #31            @ encoding: [0x5c,0xc5,0xbf,0xf3]
+@ CHECK: vsli.64 q7, q7, #63            @ encoding: [0xde,0xe5,0xbf,0xf3]
+
+
 @ CHECK: vshll.s8	q8, d16, #7  @ encoding: [0x30,0x0a,0xcf,0xf2]
 	vshll.s8	q8, d16, #7
 @ CHECK: vshll.s16	q8, d16, #15  @ encoding: [0x30,0x0a,0xdf,0xf2]
diff --git a/test/MC/ARM/neon-shiftaccum-encoding.s b/test/MC/ARM/neon-shiftaccum-encoding.s
deleted file mode 100644
index 0dc630d..0000000
--- a/test/MC/ARM/neon-shiftaccum-encoding.s
+++ /dev/null
@@ -1,98 +0,0 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
-
-@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf2]
-	vsra.s8	d17, d16, #8
-@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf2]
-	vsra.s16	d17, d16, #16
-@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf2]
-	vsra.s32	d17, d16, #32
-@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf2]
-	vsra.s64	d17, d16, #64
-@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf2]
-	vsra.s8	q8, q9, #8
-@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf2]
-	vsra.s16	q8, q9, #16
-@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf2]
-	vsra.s32	q8, q9, #32
-@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf2]
-	vsra.s64	q8, q9, #64
-@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf3]
-	vsra.u8	d17, d16, #8
-@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf3]
-	vsra.u16	d17, d16, #16
-@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf3]
-	vsra.u32	d17, d16, #32
-@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf3]
-	vsra.u64	d17, d16, #64
-@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf3]
-	vsra.u8	q8, q9, #8
-@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf3]
-	vsra.u16	q8, q9, #16
-@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf3]
-	vsra.u32	q8, q9, #32
-@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf3]
-	vsra.u64	q8, q9, #64
-@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf2]
-	vrsra.s8	d17, d16, #8
-@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf2]
-	vrsra.s16	d17, d16, #16
-@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf2]
-	vrsra.s32	d17, d16, #32
-@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf2]
-	vrsra.s64	d17, d16, #64
-@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf3]
-	vrsra.u8	d17, d16, #8
-@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf3]
-	vrsra.u16	d17, d16, #16
-@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf3]
-	vrsra.u32	d17, d16, #32
-@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf3]
-	vrsra.u64	d17, d16, #64
-@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf2]
-	vrsra.s8	q8, q9, #8
-@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf2]
-	vrsra.s16	q8, q9, #16
-@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf2]
-	vrsra.s32	q8, q9, #32
-@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf2]
-	vrsra.s64	q8, q9, #64
-@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf3]
-	vrsra.u8	q8, q9, #8
-@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf3]
-	vrsra.u16	q8, q9, #16
-@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf3]
-	vrsra.u32	q8, q9, #32
-@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf3]
-	vrsra.u64	q8, q9, #64
-@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0x30,0x15,0xcf,0xf3]
-	vsli.8	d17, d16, #7
-@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0x30,0x15,0xdf,0xf3]
-	vsli.16	d17, d16, #15
-@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0x30,0x15,0xff,0xf3]
-	vsli.32	d17, d16, #31
-@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xb0,0x15,0xff,0xf3]
-	vsli.64	d17, d16, #63
-@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0x70,0x25,0xcf,0xf3]
-	vsli.8	q9, q8, #7
-@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0x70,0x25,0xdf,0xf3]
-	vsli.16	q9, q8, #15
-@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0x70,0x25,0xff,0xf3]
-	vsli.32	q9, q8, #31
-@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xf0,0x25,0xff,0xf3]
-	vsli.64	q9, q8, #63
-@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0x30,0x14,0xc8,0xf3]
-	vsri.8	d17, d16, #8
-@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0x30,0x14,0xd0,0xf3]
-	vsri.16	d17, d16, #16
-@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0x30,0x14,0xe0,0xf3]
-	vsri.32	d17, d16, #32
-@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xb0,0x14,0xc0,0xf3]
-	vsri.64	d17, d16, #64
-@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0x70,0x24,0xc8,0xf3]
-	vsri.8	q9, q8, #8
-@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0x70,0x24,0xd0,0xf3]
-	vsri.16	q9, q8, #16
-@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0x70,0x24,0xe0,0xf3]
-	vsri.32	q9, q8, #32
-@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xf0,0x24,0xc0,0xf3]
-	vsri.64	q9, q8, #64
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 736e953..3cc6bf1 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -154,46 +154,87 @@
 @ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
 
 
-@	vld3.8	{d16, d17, d18}, [r0, :64]
-@	vld3.16	{d16, d17, d18}, [r0]
-@	vld3.32	{d16, d17, d18}, [r0]
-@	vld3.8	{d16, d18, d20}, [r0, :64]!
-@	vld3.8	{d17, d19, d21}, [r0, :64]!
-@	vld3.16	{d16, d18, d20}, [r0]!
-@	vld3.16	{d17, d19, d21}, [r0]!
-@	vld3.32	{d16, d18, d20}, [r0]!
-@	vld3.32	{d17, d19, d21}, [r0]!
-
-@ FIXME: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4]
-@ FIXME: vld3.16 {d16, d17, d18}, [r0]  @ encoding: [0x4f,0x04,0x60,0xf4]
-@ FIXME: vld3.32 {d16, d17, d18}, [r0]  @ encoding: [0x8f,0x04,0x60,0xf4]
-@ FIXME: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
-@ FIXME: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4]
-@ FIXME: vld3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x60,0xf4]
-@ FIXME: vld3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x60,0xf4]
-@ FIXME: vld3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x60,0xf4]
-@ FIXME: vld3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x60,0xf4]
-
-
-@	vld4.8	{d16, d17, d18, d19}, [r0, :64]
-@	vld4.16	{d16, d17, d18, d19}, [r0, :128]
-@	vld4.32	{d16, d17, d18, d19}, [r0, :256]
-@	vld4.8	{d16, d18, d20, d22}, [r0, :256]!
-@	vld4.8	{d17, d19, d21, d23}, [r0, :256]!
-@	vld4.16	{d16, d18, d20, d22}, [r0]!
-@	vld4.16	{d17, d19, d21, d23}, [r0]!
-@	vld4.32	{d16, d18, d20, d22}, [r0]!
-@	vld4.32	{d17, d19, d21, d23}, [r0]!
-
-@ FIXME: vld4.8	{d16, d17, d18, d19}, [r0, :64]@ encoding: [0x1f,0x00,0x60,0xf4]
-@ FIXME: vld4.16 {d16, d17, d18, d19}, [r0,:128]@ encoding:[0x6f,0x00,0x60,0xf4]
-@ FIXME: vld4.32 {d16, d17, d18, d19}, [r0,:256]@ encoding:[0xbf,0x00,0x60,0xf4]
-@ FIXME: vld4.8	{d16, d18, d20, d22}, [r0,:256]!@ encoding:[0x3d,0x01,0x60,0xf4]
-@ FIXME: vld4.8	{d17, d19, d21, d23}, [r0,:256]!@ encoding:[0x3d,0x11,0x60,0xf4]
-@ FIXME: vld4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4]
-@ FIXME: vld4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4]
-@ FIXME: vld4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4]
-@ FIXME: vld4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4]
+	vld3.8 {d16, d17, d18}, [r1]
+	vld3.16 {d6, d7, d8}, [r2]
+	vld3.32 {d1, d2, d3}, [r3]
+	vld3.8 {d16, d18, d20}, [r0, :64]
+	vld3.u16 {d27, d29, d31}, [r4]
+	vld3.i32 {d6, d8, d10}, [r5]
+
+	vld3.i8 {d12, d13, d14}, [r6], r1
+	vld3.i16 {d11, d12, d13}, [r7], r2
+	vld3.u32 {d2, d3, d4}, [r8], r3
+	vld3.8 {d4, d6, d8}, [r9], r4
+	vld3.u16 {d14, d16, d18}, [r9], r4
+	vld3.i32 {d16, d18, d20}, [r10], r5
+
+	vld3.p8 {d6, d7, d8}, [r8]!
+	vld3.16 {d9, d10, d11}, [r7]!
+	vld3.f32 {d1, d2, d3}, [r6]!
+	vld3.8 {d16, d18, d20}, [r0, :64]!
+	vld3.p16 {d20, d22, d24}, [r5]!
+	vld3.32 {d5, d7, d9}, [r4]!
+
+
+@ CHECK: vld3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x61,0xf4]
+@ CHECK: vld3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x22,0xf4]
+@ CHECK: vld3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x23,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x60,0xf4]
+@ CHECK: vld3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x64,0xf4]
+@ CHECK: vld3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x25,0xf4]
+@ CHECK: vld3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x26,0xf4]
+@ CHECK: vld3.16	{d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x27,0xf4]
+@ CHECK: vld3.32	{d2, d3, d4}, [r8], r3  @ encoding: [0x83,0x24,0x28,0xf4]
+@ CHECK: vld3.8	{d4, d6, d8}, [r9], r4  @ encoding: [0x04,0x45,0x29,0xf4]
+@ CHECK: vld3.16	{d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x29,0xf4]
+@ CHECK: vld3.32	{d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x6a,0xf4]
+@ CHECK: vld3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x28,0xf4]
+@ CHECK: vld3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x27,0xf4]
+@ CHECK: vld3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x26,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
+@ CHECK: vld3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x65,0xf4]
+@ CHECK: vld3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x24,0xf4]
+
+
+	vld4.8 {d16, d17, d18, d19}, [r1, :64]
+	vld4.16 {d16, d17, d18, d19}, [r2, :128]
+	vld4.32 {d16, d17, d18, d19}, [r3, :256]
+	vld4.8 {d17, d19, d21, d23}, [r5, :256]
+	vld4.16 {d17, d19, d21, d23}, [r7]
+	vld4.32 {d16, d18, d20, d22}, [r8]
+
+	vld4.s8 {d16, d17, d18, d19}, [r1, :64]!
+	vld4.s16 {d16, d17, d18, d19}, [r2, :128]!
+	vld4.s32 {d16, d17, d18, d19}, [r3, :256]!
+	vld4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vld4.u16 {d17, d19, d21, d23}, [r7]!
+	vld4.u32 {d16, d18, d20, d22}, [r8]!
+
+	vld4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vld4.p16 {d16, d17, d18, d19}, [r2], r7
+	vld4.f32 {d16, d17, d18, d19}, [r3, :64], r5
+	vld4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vld4.i16 {d16, d18, d20, d22}, [r6], r3
+	vld4.i32 {d17, d19, d21, d23}, [r9], r4
+
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x65,0xf4]
+@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x67,0xf4]
+@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x68,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x65,0xf4]
+@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x67,0xf4]
+@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x68,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x64,0xf4]
+@ CHECK: vld4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x66,0xf4]
+@ CHECK: vld4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x69,0xf4]
 
 
 	vld1.8 {d4[]}, [r1]
@@ -211,67 +252,205 @@
 @ CHECK: vld1.8	{d4[], d5[]}, [r1], r3  @ encoding: [0x23,0x4c,0xa1,0xf4]
 
 	vld1.8	{d16[3]}, [r0]
-@	vld1.16	{d16[2]}, [r0, :16]
-@	vld1.32	{d16[1]}, [r0, :32]
+	vld1.16	{d16[2]}, [r0, :16]
+	vld1.32	{d16[1]}, [r0, :32]
         vld1.p8 d12[6], [r2]!
         vld1.i8 d12[6], [r2], r2
         vld1.u16 d12[3], [r2]!
         vld1.16 d12[2], [r2], r2
 
 @ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
-@ FIXME: vld1.16 {d16[2]}, [r0, :16]    @ encoding: [0x9f,0x04,0xe0,0xf4]
-@ FIXME: vld1.32 {d16[1]}, [r0, :32]    @ encoding: [0xbf,0x08,0xe0,0xf4]
+@ CHECK: vld1.16 {d16[2]}, [r0, :16]    @ encoding: [0x9f,0x04,0xe0,0xf4]
+@ CHECK: vld1.32 {d16[1]}, [r0, :32]    @ encoding: [0xbf,0x08,0xe0,0xf4]
 @ CHECK: vld1.8	{d12[6]}, [r2]!         @ encoding: [0xcd,0xc0,0xa2,0xf4]
 @ CHECK: vld1.8	{d12[6]}, [r2], r2      @ encoding: [0xc2,0xc0,0xa2,0xf4]
 @ CHECK: vld1.16 {d12[3]}, [r2]!        @ encoding: [0xcd,0xc4,0xa2,0xf4]
 @ CHECK: vld1.16 {d12[2]}, [r2], r2     @ encoding: [0x82,0xc4,0xa2,0xf4]
 
 
-@	vld2.8	{d16[1], d17[1]}, [r0, :16]
-@	vld2.16	{d16[1], d17[1]}, [r0, :32]
+	vld2.8	{d16[1], d17[1]}, [r0, :16]
+	vld2.16	{d16[1], d17[1]}, [r0, :32]
 	vld2.32	{d16[1], d17[1]}, [r0]
-@	vld2.16	{d17[1], d19[1]}, [r0]
-@	vld2.32	{d17[0], d19[0]}, [r0, :64]
+	vld2.16	{d17[1], d19[1]}, [r0]
+	vld2.32	{d17[0], d19[0]}, [r0, :64]
+	vld2.32	{d17[0], d19[0]}, [r0, :64]!
         vld2.8 {d2[4], d3[4]}, [r2], r3
         vld2.8 {d2[4], d3[4]}, [r2]!
         vld2.8 {d2[4], d3[4]}, [r2]
-
-@ FIXME: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
-@ FIXME: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
+        vld2.32 {d22[], d23[]}, [r1]
+        vld2.32 {d22[], d24[]}, [r1]
+        vld2.32 {d10[ ],d11[ ]}, [r3]!
+        vld2.32 {d14[ ],d16[ ]}, [r4]!
+        vld2.32 {d22[ ],d23[ ]}, [r5], r4
+        vld2.32 {d22[ ],d24[ ]}, [r6], r4
+
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
+@ CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
 @ CHECK: vld2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
-@ FIXME: vld2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
-@ FIXME: vld2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+@ CHECK: vld2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]! @ encoding: [0x5d,0x19,0xe0,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0xa2,0xf4]
-
-
-
-@	vld3.8	{d16[1], d17[1], d18[1]}, [r0]
-@	vld3.16	{d16[1], d17[1], d18[1]}, [r0]
-@	vld3.32	{d16[1], d17[1], d18[1]}, [r0]
-@	vld3.16	{d16[1], d18[1], d20[1]}, [r0]
-@	vld3.32	{d17[1], d19[1], d21[1]}, [r0]
-
-@ FIXME: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf4]
-@ FIXME: vld3.16 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x4f,0x06,0xe0,0xf4]
-@ FIXME: vld3.32 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x8f,0x0a,0xe0,0xf4]
-@ FIXME: vld3.16 {d16[1], d18[1], d20[1]}, [r0]@ encoding: [0x6f,0x06,0xe0,0xf4]
-@ FIXME: vld3.32 {d17[1], d19[1], d21[1]}, [r0]@ encoding: [0xcf,0x1a,0xe0,0xf4]
-
-
-@	vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
-@	vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@	vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@	vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
-@	vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
-
-@ FIXME: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf4]
-@ FIXME: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf4]
-@ FIXME: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf4]
-@ FIXME: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf4]
-@ FIXME: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf4]
-
+@ CHECK: vld2.32 {d22[], d23[]}, [r1]    @ encoding: [0x8f,0x6d,0xe1,0xf4]
+@ CHECK: vld2.32 {d22[], d24[]}, [r1]    @ encoding: [0xaf,0x6d,0xe1,0xf4]
+@ CHECK: vld2.32 {d10[], d11[]}, [r3]!   @ encoding: [0x8d,0xad,0xa3,0xf4]
+@ CHECK: vld2.32 {d14[], d16[]}, [r4]!   @ encoding: [0xad,0xed,0xa4,0xf4]
+@ CHECK: vld2.32 {d22[], d23[]}, [r5], r4 @ encoding: [0x84,0x6d,0xe5,0xf4]
+@ CHECK: vld2.32 {d22[], d24[]}, [r6], r4 @ encoding: [0xa4,0x6d,0xe6,0xf4]
+
+
+	vld3.8 {d16[1], d17[1], d18[1]}, [r1]
+	vld3.16 {d6[1], d7[1], d8[1]}, [r2]
+	vld3.32 {d1[1], d2[1], d3[1]}, [r3]
+	vld3.u16 {d27[2], d29[2], d31[2]}, [r4]
+	vld3.i32 {d6[0], d8[0], d10[0]}, [r5]
+
+	vld3.i8 {d12[3], d13[3], d14[3]}, [r6], r1
+	vld3.i16 {d11[2], d12[2], d13[2]}, [r7], r2
+	vld3.u32 {d2[1], d3[1], d4[1]}, [r8], r3
+	vld3.u16 {d14[2], d16[2], d18[2]}, [r9], r4
+	vld3.i32 {d16[0], d18[0], d20[0]}, [r10], r5
+
+	vld3.p8 {d6[6], d7[6], d8[6]}, [r8]!
+	vld3.16 {d9[2], d10[2], d11[2]}, [r7]!
+	vld3.f32 {d1[1], d2[1], d3[1]}, [r6]!
+	vld3.p16 {d20[2], d22[2], d24[2]}, [r5]!
+	vld3.32 {d5[0], d7[0], d9[0]}, [r4]!
+
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xe1,0xf4]
+@ CHECK: vld3.16 {d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0xa2,0xf4]
+@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0xa3,0xf4]
+@ CHECK: vld3.16 {d27[2], d29[2], d31[2]}, [r4] @ encoding: [0xaf,0xb6,0xe4,0xf4]
+@ CHECK: vld3.32 {d6[0], d8[0], d10[0]}, [r5] @ encoding: [0x4f,0x6a,0xa5,0xf4]
+@ CHECK: vld3.8	{d12[3], d13[3], d14[3]}, [r6], r1 @ encoding: [0x61,0xc2,0xa6,0xf4]
+@ CHECK: vld3.16 {d11[2], d12[2], d13[2]}, [r7], r2 @ encoding: [0x82,0xb6,0xa7,0xf4]
+@ CHECK: vld3.32 {d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0xa8,0xf4]
+@ CHECK: vld3.16 {d14[2], d16[2], d18[2]}, [r9], r4 @ encoding: [0xa4,0xe6,0xa9,0xf4]
+@ CHECK: vld3.32 {d16[0], d18[0], d20[0]}, [r10], r5 @ encoding: [0x45,0x0a,0xea,0xf4]
+@ CHECK: vld3.8	{d6[6], d7[6], d8[6]}, [r8]! @ encoding: [0xcd,0x62,0xa8,0xf4]
+@ CHECK: vld3.16 {d9[2], d10[2], d11[2]}, [r7]! @ encoding: [0x8d,0x96,0xa7,0xf4]
+@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0xa6,0xf4]
+@ CHECK: vld3.16 {d20[2], d21[2], d22[2]}, [r5]! @ encoding: [0xad,0x46,0xe5,0xf4]
+@ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4]
+
+
+	vld3.8 {d16[], d17[], d18[]}, [r1]
+	vld3.16 {d16[], d17[], d18[]}, [r2]
+	vld3.32 {d16[], d17[], d18[]}, [r3]
+	vld3.8 {d17[], d19[], d21[]}, [r7]
+	vld3.16 {d17[], d19[], d21[]}, [r7]
+	vld3.32 {d16[], d18[], d20[]}, [r8]
+
+	vld3.s8 {d16[], d17[], d18[]}, [r1]!
+	vld3.s16 {d16[], d17[], d18[]}, [r2]!
+	vld3.s32 {d16[], d17[], d18[]}, [r3]!
+	vld3.u8 {d17[], d19[], d21[]}, [r7]!
+	vld3.u16 {d17[], d19[], d21[]}, [r7]!
+	vld3.u32 {d16[], d18[], d20[]}, [r8]!
+
+	vld3.p8 {d16[], d17[], d18[]}, [r1], r8
+	vld3.p16 {d16[], d17[], d18[]}, [r2], r7
+	vld3.f32 {d16[], d17[], d18[]}, [r3], r5
+	vld3.i8 {d16[], d18[], d20[]}, [r6], r3
+	vld3.i16 {d16[], d18[], d20[]}, [r6], r3
+	vld3.i32 {d17[], d19[], d21[]}, [r9], r4
+
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1] @ encoding: [0x0f,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2] @ encoding: [0x4f,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3] @ encoding: [0x8f,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d17[], d19[], d21[]}, [r7] @ encoding: [0x2f,0x1e,0xe7,0xf4]
+@ CHECK: vld3.16 {d17[], d19[], d21[]}, [r7] @ encoding: [0x6f,0x1e,0xe7,0xf4]
+@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8] @ encoding: [0xaf,0x0e,0xe8,0xf4]
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]! @ encoding: [0x0d,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2]! @ encoding: [0x4d,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3]! @ encoding: [0x8d,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x2d,0x1e,0xe7,0xf4]
+@ CHECK: vld3.16 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x6d,0x1e,0xe7,0xf4]
+@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8]! @ encoding: [0xad,0x0e,0xe8,0xf4]
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r8 @ encoding: [0x08,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2], r7 @ encoding: [0x47,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3], r5 @ encoding: [0x85,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x23,0x0e,0xe6,0xf4]
+@ CHECK: vld3.16 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x63,0x0e,0xe6,0xf4]
+@ CHECK: vld3.32 {d17[], d19[], d21[]}, [r9], r4 @ encoding: [0xa4,0x1e,0xe9,0xf4]
+
+
+	vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+	vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2]
+	vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3]
+	vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
+	vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
+
+	vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
+	vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vld4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
+	vld4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
+
+	vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vld4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
+	vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vld4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
+	vld4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
+
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xe8,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xe8,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xe6,0xf4]
+@ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4]
+
+
+	vld4.8 {d16[], d17[], d18[], d19[]}, [r1]
+	vld4.16 {d16[], d17[], d18[], d19[]}, [r2]
+	vld4.32 {d16[], d17[], d18[], d19[]}, [r3]
+	vld4.8 {d17[], d19[], d21[], d23[]}, [r7]
+	vld4.16 {d17[], d19[], d21[], d23[]}, [r7]
+	vld4.32 {d16[], d18[], d20[], d22[]}, [r8]
+
+	vld4.s8 {d16[], d17[], d18[], d19[]}, [r1]!
+	vld4.s16 {d16[], d17[], d18[], d19[]}, [r2]!
+	vld4.s32 {d16[], d17[], d18[], d19[]}, [r3]!
+	vld4.u8 {d17[], d19[], d21[], d23[]}, [r7]!
+	vld4.u16 {d17[], d19[], d21[], d23[]}, [r7]!
+	vld4.u32 {d16[], d18[], d20[], d22[]}, [r8]!
+
+	vld4.p8 {d16[], d17[], d18[], d19[]}, [r1], r8
+	vld4.p16 {d16[], d17[], d18[], d19[]}, [r2], r7
+	vld4.f32 {d16[], d17[], d18[], d19[]}, [r3], r5
+	vld4.i8 {d16[], d18[], d20[], d22[]}, [r6], r3
+	vld4.i16 {d16[], d18[], d20[], d22[]}, [r6], r3
+	vld4.i32 {d17[], d19[], d21[], d23[]}, [r9], r4
+
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1] @ encoding: [0x0f,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2] @ encoding: [0x4f,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3] @ encoding: [0x8f,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x2f,0x1f,0xe7,0xf4]
+@ CHECK: vld4.16 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x6f,0x1f,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8] @ encoding: [0xaf,0x0f,0xe8,0xf4]
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]! @ encoding: [0x0d,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2]! @ encoding: [0x4d,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3]! @ encoding: [0x8d,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x2d,0x1f,0xe7,0xf4]
+@ CHECK: vld4.16 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x6d,0x1f,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8]! @ encoding: [0xad,0x0f,0xe8,0xf4]
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r8 @ encoding: [0x08,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2], r7 @ encoding: [0x47,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3], r5 @ encoding: [0x85,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x23,0x0f,0xe6,0xf4]
+@ CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x63,0x0f,0xe6,0xf4]
+@ CHECK: vld4.32 {d17[], d19[], d21[], d23[]}, [r9], r4 @ encoding: [0xa4,0x1f,0xe9,0xf4]
 
 @ Handle 'Q' registers in register lists as if the sub-reg D regs were
 @ specified instead.
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index 3a4cb87..2b14d37 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -58,91 +58,191 @@
 @ CHECK: vst2.32	{d8, d9, d10, d11}, [r0, :256]! @ encoding: [0xbd,0x83,0x00,0xf4]
 
 
-@	vst3.8	{d16, d17, d18}, [r0, :64]
-@	vst3.16	{d16, d17, d18}, [r0]
-@	vst3.32	{d16, d17, d18}, [r0]
-@	vst3.8	{d16, d18, d20}, [r0, :64]!
-@	vst3.8	{d17, d19, d21}, [r0, :64]!
-@	vst3.16	{d16, d18, d20}, [r0]!
-@	vst3.16	{d17, d19, d21}, [r0]!
-@	vst3.32	{d16, d18, d20}, [r0]!
-@	vst3.32	{d17, d19, d21}, [r0]!
-
-@ FIXME: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf4]
-@ FIXME: vst3.16 {d16, d17, d18}, [r0]  @ encoding: [0x4f,0x04,0x40,0xf4]
-@ FIXME: vst3.32 {d16, d17, d18}, [r0]  @ encoding: [0x8f,0x04,0x40,0xf4]
-@ FIXME: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
-@ FIXME: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf4]
-@ FIXME: vst3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x40,0xf4]
-@ FIXME: vst3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x40,0xf4]
-@ FIXME: vst3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x40,0xf4]
-@ FIXME: vst3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x40,0xf4]
-
-
-@	vst4.8	{d16, d17, d18, d19}, [r0, :64]
-@	vst4.16	{d16, d17, d18, d19}, [r0, :128]
-@	vst4.8	{d16, d18, d20, d22}, [r0, :256]!
-@	vst4.8	{d17, d19, d21, d23}, [r0, :256]!
-@	vst4.16	{d16, d18, d20, d22}, [r0]!
-@	vst4.16	{d17, d19, d21, d23}, [r0]!
-@	vst4.32	{d16, d18, d20, d22}, [r0]!
-@	vst4.32	{d17, d19, d21, d23}, [r0]!
-
-@ FIXME: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf4]
-@ FIXME: vst4.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf4]
-@ FIXME: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf4]
-@ FIXME: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf4]
-@ FIXME: vst4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf4]
-@ FIXME: vst4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf4]
-@ FIXME: vst4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf4]
-@ FIXME: vst4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf4]
-
-
-@	vst2.8	{d16[1], d17[1]}, [r0, :16]
-@	vst2.16	{d16[1], d17[1]}, [r0, :32]
-	vst2.32	{d16[1], d17[1]}, [r0]
-@	vst2.16	{d17[1], d19[1]}, [r0]
-@	vst2.32	{d17[0], d19[0]}, [r0, :64]
+	vst3.8 {d16, d17, d18}, [r1]
+	vst3.16 {d6, d7, d8}, [r2]
+	vst3.32 {d1, d2, d3}, [r3]
+	vst3.8 {d16, d18, d20}, [r0, :64]
+	vst3.u16 {d27, d29, d31}, [r4]
+	vst3.i32 {d6, d8, d10}, [r5]
+
+	vst3.i8 {d12, d13, d14}, [r6], r1
+	vst3.i16 {d11, d12, d13}, [r7], r2
+	vst3.u32 {d2, d3, d4}, [r8], r3
+	vst3.8 {d4, d6, d8}, [r9], r4
+	vst3.u16 {d14, d16, d18}, [r9], r4
+	vst3.i32 {d16, d18, d20}, [r10], r5
+
+	vst3.p8 {d6, d7, d8}, [r8]!
+	vst3.16 {d9, d10, d11}, [r7]!
+	vst3.f32 {d1, d2, d3}, [r6]!
+	vst3.8 {d16, d18, d20}, [r0, :64]!
+	vst3.p16 {d20, d22, d24}, [r5]!
+	vst3.32 {d5, d7, d9}, [r4]!
+
+@ CHECK: vst3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x41,0xf4]
+@ CHECK: vst3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x02,0xf4]
+@ CHECK: vst3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x03,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x40,0xf4]
+@ CHECK: vst3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x44,0xf4]
+@ CHECK: vst3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x05,0xf4]
+@ CHECK: vst3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x06,0xf4]
+@ CHECK: vst3.16	{d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x07,0xf4]
+@ CHECK: vst3.32	{d2, d3, d4}, [r8], r3  @ encoding: [0x83,0x24,0x08,0xf4]
+@ CHECK: vst3.8	{d4, d6, d8}, [r9], r4  @ encoding: [0x04,0x45,0x09,0xf4]
+@ CHECK: vst3.16	{d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x09,0xf4]
+@ CHECK: vst3.32	{d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x4a,0xf4]
+@ CHECK: vst3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x08,0xf4]
+@ CHECK: vst3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x07,0xf4]
+@ CHECK: vst3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x06,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
+@ CHECK: vst3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x45,0xf4]
+@ CHECK: vst3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x04,0xf4]
+
+
+	vst4.8 {d16, d17, d18, d19}, [r1, :64]
+	vst4.16 {d16, d17, d18, d19}, [r2, :128]
+	vst4.32 {d16, d17, d18, d19}, [r3, :256]
+	vst4.8 {d17, d19, d21, d23}, [r5, :256]
+	vst4.16 {d17, d19, d21, d23}, [r7]
+	vst4.32 {d16, d18, d20, d22}, [r8]
+
+	vst4.s8 {d16, d17, d18, d19}, [r1, :64]!
+	vst4.s16 {d16, d17, d18, d19}, [r2, :128]!
+	vst4.s32 {d16, d17, d18, d19}, [r3, :256]!
+	vst4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vst4.u16 {d17, d19, d21, d23}, [r7]!
+	vst4.u32 {d16, d18, d20, d22}, [r8]!
+
+	vst4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vst4.p16 {d16, d17, d18, d19}, [r2], r7
+	vst4.f32 {d16, d17, d18, d19}, [r3, :64], r5
+	vst4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vst4.i16 {d16, d18, d20, d22}, [r6], r3
+	vst4.i32 {d17, d19, d21, d23}, [r9], r4
+
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x45,0xf4]
+@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x47,0xf4]
+@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x48,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x45,0xf4]
+@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x47,0xf4]
+@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x48,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x44,0xf4]
+@ CHECK: vst4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x46,0xf4]
+@ CHECK: vst4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x49,0xf4]
+
+
+	vst2.8	{d16[1], d17[1]}, [r0, :16]
+	vst2.p16	{d16[1], d17[1]}, [r0, :32]
+	vst2.i32	{d16[1], d17[1]}, [r0]
+	vst2.u16	{d17[1], d19[1]}, [r0]
+	vst2.f32	{d17[0], d19[0]}, [r0, :64]
 
         vst2.8 {d2[4], d3[4]}, [r2], r3
-        vst2.8 {d2[4], d3[4]}, [r2]!
-        vst2.8 {d2[4], d3[4]}, [r2]
+        vst2.u8 {d2[4], d3[4]}, [r2]!
+        vst2.p8 {d2[4], d3[4]}, [r2]
 
-@ FIXME: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
-@ FIXME: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
+        vst2.16 {d17[1], d19[1]}, [r0]
+        vst2.32 {d17[0], d19[0]}, [r0, :64]
+        vst2.i16 {d7[1], d9[1]}, [r1]!
+        vst2.32 {d6[0], d8[0]}, [r2, :64]!
+        vst2.16 {d2[1], d4[1]}, [r3], r5
+        vst2.u32 {d5[0], d7[0]}, [r4, :64], r7
+
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
+@ CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
 @ CHECK: vst2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
-@ FIXME: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
-@ FIXME: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
 
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0x82,0xf4]
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0x82,0xf4]
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0x82,0xf4]
 
+@ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.16 {d7[1], d9[1]}, [r1]!   @ encoding: [0x6d,0x75,0x81,0xf4]
+@ CHECK: vst2.32 {d6[0], d8[0]}, [r2, :64]! @ encoding: [0x5d,0x69,0x82,0xf4]
+@ CHECK: vst2.16 {d2[1], d4[1]}, [r3], r5 @ encoding: [0x65,0x25,0x83,0xf4]
+@ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4]
+
+
+	vst3.8 {d16[1], d17[1], d18[1]}, [r1]
+	vst3.16 {d6[1], d7[1], d8[1]}, [r2]
+	vst3.32 {d1[1], d2[1], d3[1]}, [r3]
+	vst3.u16 {d27[1], d29[1], d31[1]}, [r4]
+	vst3.i32 {d6[1], d8[1], d10[1]}, [r5]
+
+	vst3.i8 {d12[1], d13[1], d14[1]}, [r6], r1
+	vst3.i16 {d11[1], d12[1], d13[1]}, [r7], r2
+	vst3.u32 {d2[1], d3[1], d4[1]}, [r8], r3
+	vst3.u16 {d14[1], d16[1], d18[1]}, [r9], r4
+	vst3.i32 {d16[1], d18[1], d20[1]}, [r10], r5
+
+	vst3.p8 {d6[1], d7[1], d8[1]}, [r8]!
+	vst3.16 {d9[1], d10[1], d11[1]}, [r7]!
+	vst3.f32 {d1[1], d2[1], d3[1]}, [r6]!
+	vst3.p16 {d20[1], d22[1], d24[1]}, [r5]!
+	vst3.32 {d5[1], d7[1], d9[1]}, [r4]!
+
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xc1,0xf4]
+@ CHECK: vst3.16	{d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0x82,0xf4]
+@ CHECK: vst3.32	{d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0x83,0xf4]
+@ CHECK: vst3.16	{d27[1], d29[1], d31[1]}, [r4] @ encoding: [0x6f,0xb6,0xc4,0xf4]
+@ CHECK: vst3.32	{d6[1], d8[1], d10[1]}, [r5] @ encoding: [0xcf,0x6a,0x85,0xf4]
+@ CHECK: vst3.8	{d12[1], d13[1], d14[1]}, [r6], r1 @ encoding: [0x21,0xc2,0x86,0xf4]
+@ CHECK: vst3.16	{d11[1], d12[1], d13[1]}, [r7], r2 @ encoding: [0x42,0xb6,0x87,0xf4]
+@ CHECK: vst3.32	{d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0x88,0xf4]
+@ CHECK: vst3.16	{d14[1], d16[1], d18[1]}, [r9], r4 @ encoding: [0x64,0xe6,0x89,0xf4]
+@ CHECK: vst3.32	{d16[1], d18[1], d20[1]}, [r10], r5 @ encoding: [0xc5,0x0a,0xca,0xf4]
+@ CHECK: vst3.8	{d6[1], d7[1], d8[1]}, [r8]! @ encoding: [0x2d,0x62,0x88,0xf4]
+@ CHECK: vst3.16	{d9[1], d10[1], d11[1]}, [r7]! @ encoding: [0x4d,0x96,0x87,0xf4]
+@ CHECK: vst3.32	{d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0x86,0xf4]
+@ CHECK: vst3.16	{d20[1], d21[1], d22[1]}, [r5]! @ encoding: [0x6d,0x46,0xc5,0xf4]
+@ CHECK: vst3.32	{d5[1], d7[1], d9[1]}, [r4]! @ encoding: [0xcd,0x5a,0x84,0xf4]
 
-@	vst3.8	{d16[1], d17[1], d18[1]}, [r0]
-@	vst3.16	{d16[1], d17[1], d18[1]}, [r0]
-@	vst3.32	{d16[1], d17[1], d18[1]}, [r0]
-@	vst3.16	{d17[2], d19[2], d21[2]}, [r0]
-@	vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
-@ FIXME: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf4]
-@ FIXME: vst3.16 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x4f,0x06,0xc0,0xf4]
-@ FIXME: vst3.32 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x8f,0x0a,0xc0,0xf4]
-@ FIXME: vst3.16 {d17[2], d19[2], d21[2]}, [r0]@ encoding: [0xaf,0x16,0xc0,0xf4]
-@ FIXME: vst3.32 {d16[0], d18[0], d20[0]}, [r0]@ encoding: [0x4f,0x0a,0xc0,0xf4]
+	vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+	vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2]
+	vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3]
+	vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
+	vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
 
+	vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
+	vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vst4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
+	vst4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
 
-@	vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
-@	vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@	vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@	vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
-@	vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+	vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vst4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
+	vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vst4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
+	vst4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
 
-@ FIXME: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4]
-@ FIXME: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4]
-@ FIXME: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4]
-@ FIXME: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4]
-@ FIXME: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xc7,0xf4]
+@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xc8,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xc7,0xf4]
+@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xc8,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xc6,0xf4]
+@ CHECK: vst4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xc9,0xf4]
 
 
 @ Spot-check additional size-suffix aliases.
diff --git a/test/MC/ARM/neont2-minmax-encoding.s b/test/MC/ARM/neont2-minmax-encoding.s
index 7e86d45..9ecadce 100644
--- a/test/MC/ARM/neont2-minmax-encoding.s
+++ b/test/MC/ARM/neont2-minmax-encoding.s
@@ -2,59 +2,125 @@
 
 .code 16
 
-@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x06]
-	vmin.s8	d16, d16, d17
-@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x06]
-	vmin.s16	d16, d16, d17
-@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x06]
-	vmin.s32	d16, d16, d17
-@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x06]
-	vmin.u8	d16, d16, d17
-@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xb1,0x06]
-	vmin.u16	d16, d16, d17
-@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xb1,0x06]
-	vmin.u32	d16, d16, d17
-@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0f]
-	vmin.f32	d16, d16, d17
-@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x06]
-	vmin.s8	q8, q8, q9
-@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x06]
-	vmin.s16	q8, q8, q9
-@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
-	vmin.s32	q8, q8, q9
-@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x06]
-	vmin.u8	q8, q8, q9
-@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xf2,0x06]
-	vmin.u16	q8, q8, q9
-@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xf2,0x06]
-	vmin.u32	q8, q8, q9
-@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0f]
-	vmin.f32	q8, q8, q9
-@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xa1,0x06]
-	vmax.s8	d16, d16, d17
-@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x06]
-	vmax.s16	d16, d16, d17
-@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x06]
-	vmax.s32	d16, d16, d17
-@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xa1,0x06]
-	vmax.u8	d16, d16, d17
-@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x06]
-	vmax.u16	d16, d16, d17
-@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x06]
-	vmax.u32	d16, d16, d17
-@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x0f]
-	vmax.f32	d16, d16, d17
-@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xe2,0x06]
-	vmax.s8	q8, q8, q9
-@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x06]
-	vmax.s16	q8, q8, q9
+        vmax.s8 d1, d2, d3
+        vmax.s16 d4, d5, d6
+        vmax.s32 d7, d8, d9
+        vmax.u8 d10, d11, d12
+        vmax.u16 d13, d14, d15
+        vmax.u32 d16, d17, d18
+        vmax.f32 d19, d20, d21
+
+        vmax.s8 d2, d3
+        vmax.s16 d5, d6
+        vmax.s32 d8, d9
+        vmax.u8 d11, d12
+        vmax.u16 d14, d15
+        vmax.u32 d17, d18
+        vmax.f32 d20, d21
+
+        vmax.s8 q1, q2, q3
+        vmax.s16 q4, q5, q6
+        vmax.s32 q7, q8, q9
+        vmax.u8 q10, q11, q12
+        vmax.u16 q13, q14, q15
+        vmax.u32 q6, q7, q8
+        vmax.f32 q9, q5, q1
+
+        vmax.s8 q2, q3
+        vmax.s16 q5, q6
+        vmax.s32 q8, q9
+        vmax.u8 q11, q2
+        vmax.u16 q4, q5
+        vmax.u32 q7, q8
+        vmax.f32 q2, q1
+
+@ CHECK: vmax.s8	d1, d2, d3      @ encoding: [0x02,0xef,0x03,0x16]
+@ CHECK: vmax.s16	d4, d5, d6      @ encoding: [0x15,0xef,0x06,0x46]
+@ CHECK: vmax.s32	d7, d8, d9      @ encoding: [0x28,0xef,0x09,0x76]
+@ CHECK: vmax.u8	d10, d11, d12   @ encoding: [0x0b,0xff,0x0c,0xa6]
+@ CHECK: vmax.u16	d13, d14, d15   @ encoding: [0x1e,0xff,0x0f,0xd6]
+@ CHECK: vmax.u32	d16, d17, d18   @ encoding: [0x61,0xff,0xa2,0x06]
+@ CHECK: vmax.f32	d19, d20, d21   @ encoding: [0x44,0xef,0xa5,0x3f]
+@ CHECK: vmax.s8	d2, d2, d3      @ encoding: [0x02,0xef,0x03,0x26]
+@ CHECK: vmax.s16	d5, d5, d6      @ encoding: [0x15,0xef,0x06,0x56]
+@ CHECK: vmax.s32	d8, d8, d9      @ encoding: [0x28,0xef,0x09,0x86]
+@ CHECK: vmax.u8	d11, d11, d12   @ encoding: [0x0b,0xff,0x0c,0xb6]
+@ CHECK: vmax.u16	d14, d14, d15   @ encoding: [0x1e,0xff,0x0f,0xe6]
+@ CHECK: vmax.u32	d17, d17, d18   @ encoding: [0x61,0xff,0xa2,0x16]
+@ CHECK: vmax.f32	d20, d20, d21   @ encoding: [0x44,0xef,0xa5,0x4f]
+@ CHECK: vmax.s8	q1, q2, q3      @ encoding: [0x04,0xef,0x46,0x26]
+@ CHECK: vmax.s16	q4, q5, q6      @ encoding: [0x1a,0xef,0x4c,0x86]
+@ CHECK: vmax.s32	q7, q8, q9      @ encoding: [0x20,0xef,0xe2,0xe6]
+@ CHECK: vmax.u8	q10, q11, q12   @ encoding: [0x46,0xff,0xe8,0x46]
+@ CHECK: vmax.u16	q13, q14, q15   @ encoding: [0x5c,0xff,0xee,0xa6]
+@ CHECK: vmax.u32	q6, q7, q8      @ encoding: [0x2e,0xff,0x60,0xc6]
+@ CHECK: vmax.f32	q9, q5, q1      @ encoding: [0x4a,0xef,0x42,0x2f]
+@ CHECK: vmax.s8	q2, q2, q3      @ encoding: [0x04,0xef,0x46,0x46]
+@ CHECK: vmax.s16	q5, q5, q6      @ encoding: [0x1a,0xef,0x4c,0xa6]
 @ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x06]
-	vmax.s32	q8, q8, q9
-@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xe2,0x06]
-	vmax.u8	q8, q8, q9
-@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x06]
-	vmax.u16	q8, q8, q9
-@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x06]
-	vmax.u32	q8, q8, q9
-@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x0f]
-	vmax.f32	q8, q8, q9
+@ CHECK: vmax.u8	q11, q11, q2    @ encoding: [0x46,0xff,0xc4,0x66]
+@ CHECK: vmax.u16	q4, q4, q5      @ encoding: [0x18,0xff,0x4a,0x86]
+@ CHECK: vmax.u32	q7, q7, q8      @ encoding: [0x2e,0xff,0x60,0xe6]
+@ CHECK: vmax.f32	q2, q2, q1      @ encoding: [0x04,0xef,0x42,0x4f]
+
+
+        vmin.s8 d1, d2, d3
+        vmin.s16 d4, d5, d6
+        vmin.s32 d7, d8, d9
+        vmin.u8 d10, d11, d12
+        vmin.u16 d13, d14, d15
+        vmin.u32 d16, d17, d18
+        vmin.f32 d19, d20, d21
+
+        vmin.s8 d2, d3
+        vmin.s16 d5, d6
+        vmin.s32 d8, d9
+        vmin.u8 d11, d12
+        vmin.u16 d14, d15
+        vmin.u32 d17, d18
+        vmin.f32 d20, d21
+
+        vmin.s8 q1, q2, q3
+        vmin.s16 q4, q5, q6
+        vmin.s32 q7, q8, q9
+        vmin.u8 q10, q11, q12
+        vmin.u16 q13, q14, q15
+        vmin.u32 q6, q7, q8
+        vmin.f32 q9, q5, q1
+
+        vmin.s8 q2, q3
+        vmin.s16 q5, q6
+        vmin.s32 q8, q9
+        vmin.u8 q11, q2
+        vmin.u16 q4, q5
+        vmin.u32 q7, q8
+        vmin.f32 q2, q1
+
+@ CHECK: vmin.s8	d1, d2, d3      @ encoding: [0x02,0xef,0x13,0x16]
+@ CHECK: vmin.s16	d4, d5, d6      @ encoding: [0x15,0xef,0x16,0x46]
+@ CHECK: vmin.s32	d7, d8, d9      @ encoding: [0x28,0xef,0x19,0x76]
+@ CHECK: vmin.u8	d10, d11, d12   @ encoding: [0x0b,0xff,0x1c,0xa6]
+@ CHECK: vmin.u16	d13, d14, d15   @ encoding: [0x1e,0xff,0x1f,0xd6]
+@ CHECK: vmin.u32	d16, d17, d18   @ encoding: [0x61,0xff,0xb2,0x06]
+@ CHECK: vmin.f32	d19, d20, d21   @ encoding: [0x64,0xef,0xa5,0x3f]
+@ CHECK: vmin.s8	d2, d2, d3      @ encoding: [0x02,0xef,0x13,0x26]
+@ CHECK: vmin.s16	d5, d5, d6      @ encoding: [0x15,0xef,0x16,0x56]
+@ CHECK: vmin.s32	d8, d8, d9      @ encoding: [0x28,0xef,0x19,0x86]
+@ CHECK: vmin.u8	d11, d11, d12   @ encoding: [0x0b,0xff,0x1c,0xb6]
+@ CHECK: vmin.u16	d14, d14, d15   @ encoding: [0x1e,0xff,0x1f,0xe6]
+@ CHECK: vmin.u32	d17, d17, d18   @ encoding: [0x61,0xff,0xb2,0x16]
+@ CHECK: vmin.f32	d20, d20, d21   @ encoding: [0x64,0xef,0xa5,0x4f]
+@ CHECK: vmin.s8	q1, q2, q3      @ encoding: [0x04,0xef,0x56,0x26]
+@ CHECK: vmin.s16	q4, q5, q6      @ encoding: [0x1a,0xef,0x5c,0x86]
+@ CHECK: vmin.s32	q7, q8, q9      @ encoding: [0x20,0xef,0xf2,0xe6]
+@ CHECK: vmin.u8	q10, q11, q12   @ encoding: [0x46,0xff,0xf8,0x46]
+@ CHECK: vmin.u16	q13, q14, q15   @ encoding: [0x5c,0xff,0xfe,0xa6]
+@ CHECK: vmin.u32	q6, q7, q8      @ encoding: [0x2e,0xff,0x70,0xc6]
+@ CHECK: vmin.f32	q9, q5, q1      @ encoding: [0x6a,0xef,0x42,0x2f]
+@ CHECK: vmin.s8	q2, q2, q3      @ encoding: [0x04,0xef,0x56,0x46]
+@ CHECK: vmin.s16	q5, q5, q6      @ encoding: [0x1a,0xef,0x5c,0xa6]
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
+@ CHECK: vmin.u8	q11, q11, q2    @ encoding: [0x46,0xff,0xd4,0x66]
+@ CHECK: vmin.u16	q4, q4, q5      @ encoding: [0x18,0xff,0x5a,0x86]
+@ CHECK: vmin.u32	q7, q7, q8      @ encoding: [0x2e,0xff,0x70,0xe6]
+@ CHECK: vmin.f32	q2, q2, q1      @ encoding: [0x24,0xef,0x42,0x4f]
diff --git a/test/MC/ARM/neont2-mul-encoding.s b/test/MC/ARM/neont2-mul-encoding.s
index a9de38e..dfbb667 100644
--- a/test/MC/ARM/neont2-mul-encoding.s
+++ b/test/MC/ARM/neont2-mul-encoding.s
@@ -70,9 +70,9 @@
 
 	vqdmull.s16	q8, d16, d17
 	vqdmull.s32	q8, d16, d17
-@ vqdmull.s16	q1, d7, d1[1]
+        vqdmull.s16	q1, d7, d1[1]
 
 @ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0d]
 @ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0d]
-@ FIXME: vqdmull.s16	q1, d7, d1[1]    @ encoding: [0x97,0xef,0x49,0x3b]
+@ CHECK: vqdmull.s16	q1, d7, d1[1]   @ encoding: [0x97,0xef,0x49,0x2b]
 
diff --git a/test/MC/ARM/pr11877.s b/test/MC/ARM/pr11877.s
new file mode 100644
index 0000000..da3f6ad
--- /dev/null
+++ b/test/MC/ARM/pr11877.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple arm-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
index 58abe88..a40e02b 100644
--- a/test/MC/ARM/simple-fp-encoding.s
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -271,3 +271,51 @@
 
 @ CHECK: vmovne	s25, s26, r2, r5
         vmovne	s25, s26, r2, r5        @ encoding: [0x39,0x2a,0x45,0x1c]
+
+@ VMOV w/ optional data type suffix.
+	vmov.32 s1, r8
+        vmov.s16 s2, r4
+        vmov.16 s3, r6
+        vmov.u32 s4, r1
+        vmov.p8 s5, r2
+        vmov.8 s6, r3
+
+        vmov.32 r1, s8
+        vmov.s16 r2, s4
+        vmov.16 r3, s6
+        vmov.u32 r4, s1
+        vmov.p8 r5, s2
+        vmov.8 r6, s3
+
+@ CHECK: vmov	s1, r8                  @ encoding: [0x90,0x8a,0x00,0xee]
+@ CHECK: vmov	s2, r4                  @ encoding: [0x10,0x4a,0x01,0xee]
+@ CHECK: vmov	s3, r6                  @ encoding: [0x90,0x6a,0x01,0xee]
+@ CHECK: vmov	s4, r1                  @ encoding: [0x10,0x1a,0x02,0xee]
+@ CHECK: vmov	s5, r2                  @ encoding: [0x90,0x2a,0x02,0xee]
+@ CHECK: vmov	s6, r3                  @ encoding: [0x10,0x3a,0x03,0xee]
+@ CHECK: vmov	r1, s8                  @ encoding: [0x10,0x1a,0x14,0xee]
+@ CHECK: vmov	r2, s4                  @ encoding: [0x10,0x2a,0x12,0xee]
+@ CHECK: vmov	r3, s6                  @ encoding: [0x10,0x3a,0x13,0xee]
+@ CHECK: vmov	r4, s1                  @ encoding: [0x90,0x4a,0x10,0xee]
+@ CHECK: vmov	r5, s2                  @ encoding: [0x10,0x5a,0x11,0xee]
+@ CHECK: vmov	r6, s3                  @ encoding: [0x90,0x6a,0x11,0xee]
+
+
+@ VCVT (between floating-point and fixed-point)
+	vcvt.f32.u32 s0, s0, #20
+        vcvt.f64.s32 d0, d0, #32
+        vcvt.f32.u16 s0, s0, #1
+        vcvt.f64.s16 d0, d0, #16
+
+@ CHECK: vcvt.f32.u32	s0, s0, #20     @ encoding: [0xc6,0x0a,0xbb,0xee]
+@ CHECK: vcvt.f64.s32	d0, d0, #32     @ encoding: [0xc0,0x0b,0xba,0xee]
+@ CHECK: vcvt.f32.u16	s0, s0, #1      @ encoding: [0x67,0x0a,0xbb,0xee]
+@ CHECK: vcvt.f64.s16	d0, d0, #16     @ encoding: [0x40,0x0b,0xba,0xee]
+
+
+@ Use NEON to load some f32 immediates that don't fit the f8 representation.
+        vmov.f32 d4, #0.0
+        vmov.f32 d4, #32.0
+
+@ CHECK: vmov.i32	d4, #0x0        @ encoding: [0x10,0x40,0x80,0xf2]
+@ CHECK: vmov.i32	d4, #0x42000000 @ encoding: [0x12,0x46,0x84,0xf2]
diff --git a/test/MC/AsmParser/cfi-unfinished-frame.s b/test/MC/AsmParser/cfi-unfinished-frame.s
new file mode 100644
index 0000000..1182d52
--- /dev/null
+++ b/test/MC/AsmParser/cfi-unfinished-frame.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu %s -o %t 2>%t.out
+// RUN: FileCheck -input-file=%t.out %s
+
+.cfi_startproc
+// CHECK: Unfinished frame
diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp
deleted file mode 100644
index a6d81da..0000000
--- a/test/MC/AsmParser/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
diff --git a/test/MC/AsmParser/directive_file-errors.s b/test/MC/AsmParser/directive_file-errors.s
new file mode 100644
index 0000000..5ae2bbe
--- /dev/null
+++ b/test/MC/AsmParser/directive_file-errors.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -g -triple i386-unknown-unknown %s 2> %t.err | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+// Test for Bug 11740
+
+        .file "hello"
+        .file 1 "world"
+
+// CHECK: .file "hello"
+// CHECK-ERRORS:6:9: error: input can't have .file dwarf directives when -g is used to generate dwarf debug info for assembly code
diff --git a/test/MC/AsmParser/lit.local.cfg b/test/MC/AsmParser/lit.local.cfg
new file mode 100644
index 0000000..1f53769
--- /dev/null
+++ b/test/MC/AsmParser/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/AsmParser/pr11865.s b/test/MC/AsmParser/pr11865.s
new file mode 100644
index 0000000..1c03e11
--- /dev/null
+++ b/test/MC/AsmParser/pr11865.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/AsmParser/variables-invalid.s b/test/MC/AsmParser/variables-invalid.s
index 9656889..21758d2 100644
--- a/test/MC/AsmParser/variables-invalid.s
+++ b/test/MC/AsmParser/variables-invalid.s
@@ -2,7 +2,7 @@
 // RUN: FileCheck --input-file %t %s
 
         .data
-// CHECK: invalid assignment to 't0_v0'
+// CHECK: Recursive use of 't0_v0'
         t0_v0 = t0_v0 + 1
 
         t1_v1 = 1
@@ -15,3 +15,13 @@ t2_s0:
         t3_s0 = t2_s0 + 1
 // CHECK: invalid reassignment of non-absolute variable 't3_s0'
         t3_s0 = 1
+
+
+// CHECK: Recursive use of 't4_s2'
+        t4_s0 = t4_s1
+        t4_s1 = t4_s2
+        t4_s2 = t4_s0
+
+// CHECK: Recursive use of 't5_s1'
+        t5_s0 = t5_s1 + 1
+        t5_s1 = t5_s0
diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp
deleted file mode 100644
index d46d700..0000000
--- a/test/MC/COFF/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
-}
diff --git a/test/MC/COFF/global_ctors.ll b/test/MC/COFF/global_ctors.ll
new file mode 100644
index 0000000..4d6b1c7
--- /dev/null
+++ b/test/MC/COFF/global_ctors.ll
@@ -0,0 +1,28 @@
+; Test that global ctors are emitted into the proper COFF section for the
+; target. Mingw uses .ctors, whereas MSVC uses .CRT$XC*.
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32 
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN32 
+; RUN: llc < %s -mtriple i686-pc-mingw32 | FileCheck %s --check-prefix MINGW32 
+; RUN: llc < %s -mtriple x86_64-pc-mingw32 | FileCheck %s --check-prefix MINGW32 
+
+@.str = private unnamed_addr constant [13 x i8] c"constructing\00", align 1
+@.str2 = private unnamed_addr constant [5 x i8] c"main\00", align 1
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @a_global_ctor }]
+
+declare i32 @puts(i8*)
+
+define void @a_global_ctor() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+define i32 @main() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @.str2, i32 0, i32 0))
+  ret i32 0
+}
+
+; WIN32: .section .CRT$XCU,"r"
+; WIN32: a_global_ctor
+; MINGW32: .section .ctors,"w"
+; MINGW32: a_global_ctor
diff --git a/test/MC/COFF/lit.local.cfg b/test/MC/COFF/lit.local.cfg
new file mode 100644
index 0000000..ec8d4d3
--- /dev/null
+++ b/test/MC/COFF/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.s', '.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/COFF/secrel32.s b/test/MC/COFF/secrel32.s
new file mode 100644
index 0000000..ce148db
--- /dev/null
+++ b/test/MC/COFF/secrel32.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+
+// check that we produce the correct relocation for .secrel32
+
+Lfoo:
+	.secrel32	Lfoo
+
+// CHECK:       Relocations              = [
+// CHECK-NEXT:    0 = {
+// CHECK-NEXT:       VirtualAddress           = 0x0
+// CHECK-NEXT:       SymbolTableIndex         = 0
+// CHECK-NEXT:       Type                     = IMAGE_REL_I386_SECREL (11)
+// CHECK-NEXT:       SymbolName               = .text
+// CHECK-NEXT:     }
diff --git a/test/MC/Disassembler/ARM/dg.exp b/test/MC/Disassembler/ARM/dg.exp
deleted file mode 100644
index fc2f17a..0000000
--- a/test/MC/Disassembler/ARM/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/ARM/invalid-IT-CC15.txt b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
new file mode 100644
index 0000000..17e25ea
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
@@ -0,0 +1,18 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown |& grep und
+# rdar://10841671
+
+0xe3 0xbf
+0xdf 0xed 0x61 0x3b
+0x71 0xee 0xe0 0x1b
+0x72 0xee 0xa3 0x2b
+0xdf 0xed 0x60 0x0b
+
+# This is test is dealing with a undefined condition code value of 15 in the
+# above sequence of junk bytes and not allowing the disassembler to abort on
+# printing the final instruction in this list.
+# 
+#	ittte	al
+#	vldr	d19, [pc, #388]
+#	vsub.f64	d17, d17, d16
+#	vadd.f64	d18, d18, d19
+#	vldr<und>	d16, [pc, #384]
diff --git a/test/MC/Disassembler/ARM/lit.local.cfg b/test/MC/Disassembler/ARM/lit.local.cfg
new file mode 100644
index 0000000..c5dd3fb
--- /dev/null
+++ b/test/MC/Disassembler/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.txt']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/ARM/unpredictables-thumb.txt b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
new file mode 100644
index 0000000..e7645f0
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv7 |& FileCheck %s
+
+0x01 0x47
+# CHECK: 3:1: warning: potentially undefined
+# CHECK: bx r0
diff --git a/test/MC/Disassembler/MBlaze/dg.exp b/test/MC/Disassembler/MBlaze/dg.exp
deleted file mode 100644
index 0be99a3..0000000
--- a/test/MC/Disassembler/MBlaze/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/MBlaze/lit.local.cfg b/test/MC/Disassembler/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..766b980
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.txt']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/X86/dg.exp b/test/MC/Disassembler/X86/dg.exp
deleted file mode 100644
index a4d0e7c..0000000
--- a/test/MC/Disassembler/X86/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/X86/lit.local.cfg b/test/MC/Disassembler/X86/lit.local.cfg
new file mode 100644
index 0000000..5f3ae7d
--- /dev/null
+++ b/test/MC/Disassembler/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.txt']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 05a57d7..840d5fa 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -6,6 +6,11 @@
 # CHECK: int	$33
 0xCD 0x21
 
+# CHECK: jrcxz -127
+0xe3 0x81
+
+# CHECK: jecxz -127
+0x67 0xe3 0x81
 
 # CHECK: addb	%al, (%rax)
 0 0
@@ -28,6 +33,9 @@
 # CHECK: vmcall
 0x0f 0x01 0xc1
 
+# CHECK: vmfunc
+0x0f 0x01 0xd4
+
 # CHECK: vmlaunch
 0x0f 0x01 0xc2
 
@@ -52,6 +60,30 @@
 # CHECK: vmptrst
 0x0f 0xc7 0x38
 
+# CHECK: vmrun
+0x0f 0x01 0xd8
+
+# CHECK: vmmcall
+0x0f 0x01 0xd9
+
+# CHECK: vmload
+0x0f 0x01 0xda
+
+# CHECK: vmsave
+0x0f 0x01 0xdb
+
+# CHECK: stgi
+0x0f 0x01 0xdc
+
+# CHECK: clgi
+0x0f 0x01 0xdd
+
+# CHECK: skinit
+0x0f 0x01 0xde
+
+# CHECK: invlpga
+0x0f 0x01 0xdf
+
 # CHECK: movl $0, -4(%rbp)
 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
 
@@ -647,3 +679,48 @@
 
 # CHECK: shrxq %r12, %r11, %r10
 0xc4 0x42 0x9b 0xf7 0xd3
+
+# CHECK: vfmadd132ps %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x19 0x98 0xd3
+
+# CHECK: vfmadd132pd %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x99 0x98 0xd3
+
+# CHECK: vfmadd132ps %ymm11, %ymm12, %ymm10
+0xc4 0x42 0x1d 0x98 0xd3
+
+# CHECK: vfmadd132pd %ymm11, %ymm12, %ymm10
+0xc4 0x42 0x9d 0x98 0xd3
+
+# CHECK: vfmadd132ps (%rax), %xmm12, %xmm10
+0xc4 0x62 0x19 0x98 0x10
+
+# CHECK: vfmadd132pd (%rax), %xmm12, %xmm10
+0xc4 0x62 0x99 0x98 0x10
+
+# CHECK: vfmadd132ps (%rax), %ymm12, %ymm10
+0xc4 0x62 0x1d 0x98 0x10
+
+# CHECK: vfmadd132pd (%rax), %ymm12, %ymm10
+0xc4 0x62 0x9d 0x98 0x10
+
+# CHECK: vfmadd132ss %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x19 0x99 0xd3
+
+# CHECK: vfmadd132sd %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x99 0x99 0xd3
+
+# CHECK: vfmadd132ss (%rax), %xmm12, %xmm10
+0xc4 0x62 0x19 0x99 0x10
+
+# CHECK: vfmadd132sd (%rax), %xmm12, %xmm10
+0xc4 0x62 0x99 0x99 0x10
+
+# CHECK: vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x6a 0x01 0x10
+
+# CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
+0xc4 0xe3 0x79 0x6a 0x01 0x10
+
+# CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 5d5ee5d..5f2f608 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -11,6 +11,12 @@
 # CHECK: calll
 0xff 0xd0
 
+# CHECK: jecxz -127
+0xe3 0x81
+
+# CHECK: jcxz -127
+0x67 0xe3 0x81
+
 # CHECK: incl
 0x40
 
@@ -63,6 +69,9 @@
 # CHECK: vmcall
 0x0f 0x01 0xc1
 
+# CHECK: vmfunc
+0x0f 0x01 0xd4
+
 # CHECK: vmlaunch
 0x0f 0x01 0xc2
 
@@ -87,6 +96,30 @@
 # CHECK: vmptrst
 0x0f 0xc7 0x38
 
+# CHECK: vmrun
+0x0f 0x01 0xd8
+
+# CHECK: vmmcall
+0x0f 0x01 0xd9
+
+# CHECK: vmload
+0x0f 0x01 0xda
+
+# CHECK: vmsave
+0x0f 0x01 0xdb
+
+# CHECK: stgi
+0x0f 0x01 0xdc
+
+# CHECK: clgi
+0x0f 0x01 0xdd
+
+# CHECK: skinit
+0x0f 0x01 0xde
+
+# CHECK: invlpga
+0x0f 0x01 0xdf
+
 # CHECK: movl $0, -4(%ebp)
 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
 
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
new file mode 100644
index 0000000..3a5af00
--- /dev/null
+++ b/test/MC/ELF/cfi-escape.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_escape 0x15, 7, 0x7f # DW_CFA_val_offset_sf, %esp, 8/-8
+        nop
+	.cfi_endproc
+
+// CHECK:       # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00411507 7f000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
new file mode 100644
index 0000000..0fc3129
--- /dev/null
+++ b/test/MC/ELF/cfi-restore.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_restore %rbp
+        nop
+	.cfi_endproc
+
+// CHECK:       # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 0041c600 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
new file mode 100644
index 0000000..cf6d160
--- /dev/null
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+        .cfi_startproc
+        .cfi_signal_frame
+        .cfi_endproc
+
+g:
+        .cfi_startproc
+        .cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
+// CHECK-NEXT:  ('sh_size', 0x0000000000000058)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5253 00017810 011b0c07 08900100 10000000 1c000000 00000000 00000000 00000000 14000000 00000000 017a5200 01781001 1b0c0708 90010000 10000000 1c000000 00000000 00000000 00000000')
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp
deleted file mode 100644
index d46d700..0000000
--- a/test/MC/ELF/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
-}
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
new file mode 100644
index 0000000..b090e08
--- /dev/null
+++ b/test/MC/ELF/gen-dwarf.s
@@ -0,0 +1,70 @@
+// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+
+
+// Test that on ELF the debug info has a relocation to debug_abbrev and one to
+// to debug_line.
+
+
+    .text
+    .globl foo
+    .type foo, @function
+    .align 4
+foo:
+    ret
+    .size foo, .-foo
+
+// Section 4 is .debug_line
+// CHECK:       # Section 4
+// CHECK-NEXT:  # '.debug_line'
+
+
+
+// The two relocations, one to symbol 6 and one to 4
+// CHECK:         # '.rel.debug_info'
+// CHECK-NEXT:   ('sh_type',
+// CHECK-NEXT:   ('sh_flags'
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size',
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign',
+// CHECK-NEXT:   ('sh_entsize',
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x00000006)
+// CHECK-NEXT:     ('r_sym', 0x000006)
+// CHECK-NEXT:     ('r_type', 0x01)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:    # Relocation 1
+// CHECK-NEXT:    (('r_offset', 0x0000000c)
+// CHECK-NEXT:     ('r_sym', 0x000004)
+// CHECK-NEXT:     ('r_type', 0x01)
+// CHECK-NEXT:    ),
+
+
+// Section 8 is .debug_abbrev
+// CHECK:       # Section 8
+// CHECK-NEXT:  (('sh_name', 0x00000001) # '.debug_abbrev'
+
+// Symbol 4 is section 4 (.debug_line)
+// CHECK:         # Symbol 4
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0004)
+// CHECK-NEXT:    ),
+
+// Symbol 6 is section 8 (.debug_abbrev)
+// CHECK:         # Symbol 6
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0008)
+// CHECK-NEXT:    ),
diff --git a/test/MC/ELF/lit.local.cfg b/test/MC/ELF/lit.local.cfg
new file mode 100644
index 0000000..461c6f4
--- /dev/null
+++ b/test/MC/ELF/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ELF/many-section.s b/test/MC/ELF/many-section.s
index e7e723a..b729e66 100644
--- a/test/MC/ELF/many-section.s
+++ b/test/MC/ELF/many-section.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
-// RUN: llvm-nm %t | FileCheck %s
+// RUN: llvm-nm -a %t | FileCheck %s
 
 // CHECK: s000a
 // CHECK-NOT: U
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
index 197418d..922d4c6 100644
--- a/test/MC/ELF/tls-i386.s
+++ b/test/MC/ELF/tls-i386.s
@@ -9,6 +9,13 @@
         movl    foo5@TPOFF(%eax), %eax
         movl    foo6@DTPOFF(%eax), %eax
         movl    foo7@INDNTPOFF, %eax
+        .long   foo8@NTPOFF
+        .long   foo9@GOTNTPOFF
+        .long   fooA@TLSGD
+        .long   fooB@TLSLDM
+        .long   fooC@TPOFF
+        .long   fooD@DTPOFF
+        .long   fooE@INDNTPOFF
 
 // CHECK:       (('st_name', 0x00000001) # 'foo1'
 // CHECK-NEXT:   ('st_value', 0x00000000)
@@ -72,3 +79,67 @@
 // CHECK-NEXT:   ('st_other', 0x00)
 // CHECK-NEXT:   ('st_shndx', 0x0000)
 // CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 12
+// CHECK-NEXT:  (('st_name', 0x00000024) # 'foo8'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 13
+// CHECK-NEXT:  (('st_name', 0x00000029) # 'foo9'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 14
+// CHECK-NEXT:  (('st_name', 0x0000002e) # 'fooA'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 15
+// CHECK-NEXT:  (('st_name', 0x00000033) # 'fooB'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 16
+// CHECK-NEXT:  (('st_name', 0x00000038) # 'fooC'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 17
+// CHECK-NEXT:  (('st_name', 0x0000003d) # 'fooD'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 18
+// CHECK-NEXT:  (('st_name', 0x00000042) # 'fooE'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index d6d7de6..fe2bb4e 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -5,12 +5,14 @@
 	leaq	foo1@TLSGD(%rip), %rdi
         leaq    foo2@GOTTPOFF(%rip), %rdi
         leaq    foo3@TLSLD(%rip), %rdi
-
+	.long foo4@GOTTPOFF
+	.long foo5@TLSLD
+	.long foo6@TLSGD
 	.section	.zed,"awT",@progbits
 foobar:
 	.long	43
 
-// CHECK:      (('st_name', 0x00000010) # 'foobar'
+// CHECK:      (('st_name', 0x0000001f) # 'foobar'
 // CHECK-NEXT:  ('st_bind', 0x0)
 // CHECK-NEXT:  ('st_type', 0x6)
 // CHECK-NEXT:  ('st_other', 0x00)
@@ -46,3 +48,30 @@ foobar:
 // CHECK-NEXT:   ('st_value', 0x0000000000000000)
 // CHECK-NEXT:   ('st_size', 0x0000000000000000)
 // CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 10
+// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 11
+// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 12
+// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/MBlaze/dg.exp b/test/MC/MBlaze/dg.exp
deleted file mode 100644
index 0c4e78e..0000000
--- a/test/MC/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/MBlaze/lit.local.cfg b/test/MC/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..6f92d87
--- /dev/null
+++ b/test/MC/MBlaze/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/ARM/darwin-ARM-reloc.s b/test/MC/MachO/ARM/darwin-ARM-reloc.s
index fe69a94..b98c80c 100644
--- a/test/MC/MachO/ARM/darwin-ARM-reloc.s
+++ b/test/MC/MachO/ARM/darwin-ARM-reloc.s
@@ -12,9 +12,9 @@ _f1:
 
         .data
 _d0:
-Ld0_0:  
+Ld0_0:
         .long Lsc0_0 - Ld0_0
-        
+
 	.section	__TEXT,__cstring,cstring_literals
 Lsc0_0:
         .long 0
diff --git a/test/MC/MachO/ARM/dg.exp b/test/MC/MachO/ARM/dg.exp
deleted file mode 100644
index 055fa25..0000000
--- a/test/MC/MachO/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/MachO/ARM/empty-function-nop.ll b/test/MC/MachO/ARM/empty-function-nop.ll
new file mode 100644
index 0000000..ef86ebc
--- /dev/null
+++ b/test/MC/MachO/ARM/empty-function-nop.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -filetype=obj -mtriple=thumbv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T1 %s
+; RUN: llc < %s -filetype=obj -mtriple=thumbv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T2 %s
+; RUN: llc < %s -filetype=obj -mtriple=armv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARM %s
+; RUN: llc < %s -filetype=obj -mtriple=armv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARMV7 %s
+
+; Empty functions need a NOP in them for MachO to prevent DWARF FDEs from
+; getting all mucked up. See lib/CodeGen/AsmPrinter/AsmPrinter.cpp for
+; details.
+define internal fastcc void @empty_function() {
+  unreachable
+}
+; CHECK-T1:    ('_section_data', 'c046')
+; CHECK-T2:    ('_section_data', '00bf')
+; CHECK-ARM:   ('_section_data', '0000a0e1')
+; CHECK-ARMV7: ('_section_data', '00f020e3')
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
new file mode 100644
index 0000000..871e2b5
--- /dev/null
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/ARM/no-subsections-reloc.s b/test/MC/MachO/ARM/no-subsections-reloc.s
new file mode 100644
index 0000000..7701c59
--- /dev/null
+++ b/test/MC/MachO/ARM/no-subsections-reloc.s
@@ -0,0 +1,18 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+@ When not using subsections-via-symbols, references to non-local symbols
+@ in the same section can be resolved at assembly time w/o relocations.
+
+ .syntax unified
+ .text
+ .thumb
+ .thumb_func _foo
+_foo:
+    ldr r3, bar
+bar:
+    .long 0
+
+@ CHECK: 'num_reloc', 0
+@ CHECK: '_section_data', 'dff80030 00000000'
diff --git a/test/MC/MachO/ARM/relax-thumb-ldr-literal.s b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
new file mode 100644
index 0000000..8d26f6d
--- /dev/null
+++ b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+        .text
+	.thumb
+	.thumb_func _foo
+_foo:
+        ldr r2, (_foo - 4)
+
+@ CHECK: ('num_reloc', 0)
+@ CHECK: ('_section_data', '5ff80820')
diff --git a/test/MC/MachO/ARM/thumb2-function-relative-load.s b/test/MC/MachO/ARM/thumb2-function-relative-load.s
new file mode 100644
index 0000000..622007d
--- /dev/null
+++ b/test/MC/MachO/ARM/thumb2-function-relative-load.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+        .syntax unified
+        .text
+	.thumb
+        .thumb_func _foo
+_foo:
+	ldr lr, (_foo - 4)
+
+        .subsections_via_symbols
+
+@ CHECK: ('_section_data', '5ff808e0')
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
new file mode 100644
index 0000000..49cfa41
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// Test case for rdar://10743265
+
+// This tests that this expression does not cause a crash and produces two
+// relocation entries:
+// Relocation information (__TEXT,__text) 2 entries
+// address  pcrel length extern type    scattered symbolnum/value
+// 00000000 False long   True   SUB     False     _base
+// 00000000 False long   True   UNSIGND False     _start_ap_2
+
+_base = .
+
+.long (0x2000) + _start_ap_2 - _base 
+.word 0
+
+_start_ap_2:
+        cli
+
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0x5c000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:   ])
diff --git a/test/MC/MachO/dg.exp b/test/MC/MachO/dg.exp
deleted file mode 100644
index ca6aefe..0000000
--- a/test/MC/MachO/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
-
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
index a443d75..4fbc32d 100644
--- a/test/MC/MachO/gen-dwarf.s
+++ b/test/MC/MachO/gen-dwarf.s
@@ -7,6 +7,7 @@ _bar:
 L1:	leave
 	ret
 _foo:
+_baz:
 	nop
 .data
 _x:	.long 1
@@ -24,12 +25,11 @@ _x:	.long 1
 // CHECK: 	DW_AT_producer	DW_FORM_string
 // CHECK: 	DW_AT_language	DW_FORM_data2
 
-// CHECK: [2] DW_TAG_subprogram	DW_CHILDREN_yes
+// CHECK: [2] DW_TAG_label	DW_CHILDREN_yes
 // CHECK: 	DW_AT_name	DW_FORM_string
 // CHECK: 	DW_AT_decl_file	DW_FORM_data4
 // CHECK: 	DW_AT_decl_line	DW_FORM_data4
 // CHECK: 	DW_AT_low_pc	DW_FORM_addr
-// CHECK: 	DW_AT_high_pc	DW_FORM_addr
 // CHECK: 	DW_AT_prototyped	DW_FORM_flag
 
 // CHECK: [3] DW_TAG_unspecified_parameters	DW_CHILDREN_no
@@ -48,24 +48,33 @@ _x:	.long 1
 // CHECK:    DW_AT_producer [DW_FORM_string]	("llvm-mc (based on {{.*}})")
 // CHECK:    DW_AT_language [DW_FORM_data2]	(0x8001)
 
-// CHECK:    DW_TAG_subprogram [2] *
+// CHECK:    DW_TAG_label [2] *
 // CHECK:      DW_AT_name [DW_FORM_string]	("bar")
 // CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
 // CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000005)
 // CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
-// CHECK:      DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000007)
 // CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
 
 // CHECK:      DW_TAG_unspecified_parameters [3]  
 
 // CHECK:      NULL
 
-// CHECK:    DW_TAG_subprogram [2] *
+// CHECK:    DW_TAG_label [2] *
 // CHECK:      DW_AT_name [DW_FORM_string]	("foo")
 // CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
 // CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000009)
 // CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
-// CHECK:      DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000008)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    DW_TAG_label [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("baz")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x0000000a)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
 // CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
 
 // CHECK:      DW_TAG_unspecified_parameters [3]  
@@ -109,5 +118,5 @@ _x:	.long 1
 // CHECK: 0x0000000000000000      6      0      1   0  is_stmt
 // CHECK: 0x0000000000000005      7      0      1   0  is_stmt
 // CHECK: 0x0000000000000006      8      0      1   0  is_stmt
-// CHECK: 0x0000000000000007     10      0      1   0  is_stmt
-// CHECK: 0x0000000000000008     10      0      1   0  is_stmt end_sequence
+// CHECK: 0x0000000000000007     11      0      1   0  is_stmt
+// CHECK: 0x0000000000000008     11      0      1   0  is_stmt end_sequence
diff --git a/test/MC/MachO/lit.local.cfg b/test/MC/MachO/lit.local.cfg
new file mode 100644
index 0000000..1f53769
--- /dev/null
+++ b/test/MC/MachO/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/reloc-pcrel-offset.s b/test/MC/MachO/reloc-pcrel-offset.s
index bc611d7..e113e96 100644
--- a/test/MC/MachO/reloc-pcrel-offset.s
+++ b/test/MC/MachO/reloc-pcrel-offset.s
@@ -11,6 +11,7 @@
 
         .text
 _a:
+_b:
         call _a
 
         .subsections_via_symbols
diff --git a/test/MC/MachO/reloc-pcrel.s b/test/MC/MachO/reloc-pcrel.s
index 2684408..1133415 100644
--- a/test/MC/MachO/reloc-pcrel.s
+++ b/test/MC/MachO/reloc-pcrel.s
@@ -8,13 +8,13 @@
 // CHECK:  ('word-1', 0x6)),
 // CHECK: # Relocation 2
 // CHECK: (('word-0', 0x40),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 3
 // CHECK: (('word-0', 0x3b),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 4
 // CHECK: (('word-0', 0x36),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 5
 // CHECK: (('word-0', 0xe0000031),
 // CHECK:  ('word-1', 0x4)),
@@ -36,15 +36,16 @@
 // CHECK-NEXT: ])
 
         xorl %eax,%eax
-        
+
         .globl _a
 _a:
         xorl %eax,%eax
 _b:
+_d:
         xorl %eax,%eax
 L0:
         xorl %eax,%eax
-L1:     
+L1:
 
         call L0
         call L0 - 1
diff --git a/test/MC/Mips/dg.exp b/test/MC/Mips/dg.exp
deleted file mode 100644
index e469402..0000000
--- a/test/MC/Mips/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Mips] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/Mips/elf-tls.ll b/test/MC/Mips/elf-tls.ll
new file mode 100644
index 0000000..b4183b8
--- /dev/null
+++ b/test/MC/Mips/elf-tls.ll
@@ -0,0 +1,36 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that the appropriate relocations were created.
+
+; CHECK:     ('r_type', 0x2b)
+; CHECK:     ('r_type', 0x2c)
+; CHECK:     ('r_type', 0x2d)
+
+@t1 = thread_local global i32 0, align 4
+
+define i32 @f1() nounwind {
+entry:
+  %tmp = load i32* @t1, align 4
+  ret i32 %tmp
+
+}
+
+
+@t2 = external thread_local global i32
+
+define i32 @f2() nounwind {
+entry:
+  %tmp = load i32* @t2, align 4
+  ret i32 %tmp
+
+}
+
+@f3.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i32 @f3() nounwind {
+entry:
+  %0 = load i32* @f3.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f3.i, align 4
+  ret i32 %inc
+}
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
new file mode 100644
index 0000000..4621182
--- /dev/null
+++ b/test/MC/Mips/elf_basic.s
@@ -0,0 +1,7 @@
+; RUN: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE %s
+
+; Check that we produce the correct endian.
+
+; CHECK-BE: ('e_indent[EI_DATA]', 0x02)
+; CHECK-LE: ('e_indent[EI_DATA]', 0x01)
diff --git a/test/MC/Mips/lit.local.cfg b/test/MC/Mips/lit.local.cfg
new file mode 100644
index 0000000..ecc61ea
--- /dev/null
+++ b/test/MC/Mips/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Mips/pr11877.s b/test/MC/Mips/pr11877.s
new file mode 100644
index 0000000..d354ce4
--- /dev/null
+++ b/test/MC/Mips/pr11877.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple mips-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/X86/dg.exp b/test/MC/X86/dg.exp
deleted file mode 100644
index ec87b69..0000000
--- a/test/MC/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/X86/intel-syntax-2.s b/test/MC/X86/intel-syntax-2.s
new file mode 100644
index 0000000..ca4afc3
--- /dev/null
+++ b/test/MC/X86/intel-syntax-2.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown  %s | FileCheck %s
+
+	.intel_syntax
+_test:
+// CHECK:	movl	$257, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], 257
+
diff --git a/test/MC/X86/intel-syntax-encoding.s b/test/MC/X86/intel-syntax-encoding.s
new file mode 100644
index 0000000..8891126
--- /dev/null
+++ b/test/MC/X86/intel-syntax-encoding.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -x86-asm-syntax=intel -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: encoding: [0x66,0x83,0xf0,0x0c]
+	xor	ax, 12
+// CHECK: encoding: [0x83,0xf0,0x0c]
+	xor	eax, 12
+// CHECK: encoding: [0x48,0x83,0xf0,0x0c]
+	xor	rax, 12
+
+// CHECK: encoding: [0x66,0x83,0xc8,0x0c]
+	or	ax, 12
+// CHECK: encoding: [0x83,0xc8,0x0c]
+	or	eax, 12
+// CHECK: encoding: [0x48,0x83,0xc8,0x0c]
+	or	rax, 12
+
+// CHECK: encoding: [0x66,0x83,0xf8,0x0c]
+	cmp	ax, 12
+// CHECK: encoding: [0x83,0xf8,0x0c]
+	cmp	eax, 12
+// CHECK: encoding: [0x48,0x83,0xf8,0x0c]
+	cmp	rax, 12
+
+// CHECK: encoding: [0x48,0x89,0x44,0x24,0xf0]	
+	mov	QWORD PTR [RSP - 16], RAX
+
+// CHECK: encoding: [0x66,0x83,0xc0,0xf4]
+	add	ax, -12
+// CHECK: encoding: [0x83,0xc0,0xf4]
+	add	eax, -12
+// CHECK: encoding: [0x48,0x83,0xc0,0xf4]
+	add	rax, -12
+
+LBB0_3:
+// CHECK: encoding: [0xeb,A]
+	jmp	LBB0_3
+// CHECK: encoding: [0xf2,0x0f,0x10,0x2c,0x25,0xf8,0xff,0xff,0xff]
+        movsd   XMM5, QWORD PTR [-8]
+
+// CHECK: encoding: [0xd1,0xe7]
+	shl	EDI, 1
+
+// CHECK: encoding: [0x0f,0xc2,0xd1,0x01]
+	cmpltps XMM2, XMM1
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
new file mode 100644
index 0000000..7cd5677
--- /dev/null
+++ b/test/MC/X86/intel-syntax.s
@@ -0,0 +1,66 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+
+_test:
+	xor	EAX, EAX
+	ret
+
+_main:
+// CHECK:	movl	$257, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], 257
+// CHECK:	movl	$258, 4(%rsp)
+	mov	DWORD PTR [RSP + 4], 258
+// CHECK:	movq	$123, -16(%rsp)
+	mov	QWORD PTR [RSP - 16], 123
+// CHECK:	movb	$97, -17(%rsp)
+	mov	BYTE PTR [RSP - 17], 97
+// CHECK:	movl	-4(%rsp), %eax
+	mov	EAX, DWORD PTR [RSP - 4]
+// CHECK:	movq    (%rsp), %rax
+	mov     RAX, QWORD PTR [RSP]
+// CHECK:	movl	$-4, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], -4
+// CHECK:	movq	0, %rcx
+	mov	RCX, QWORD PTR [0]
+// CHECK:	movl	-24(%rsp,%rax,4), %eax	
+	mov	EAX, DWORD PTR [RSP + 4*RAX - 24]
+// CHECK:	movb	%dil, (%rdx,%rcx)
+	mov	BYTE PTR [RDX + RCX], DIL
+// CHECK:	movzwl	2(%rcx), %edi
+	movzx	EDI, WORD PTR [RCX + 2]
+// CHECK:	callq	_test
+	call	_test
+// CHECK:	andw	$12,	%ax
+	and	ax, 12
+// CHECK:	andw	$-12,	%ax
+	and	ax, -12
+// CHECK:	andw	$257,	%ax
+	and	ax, 257
+// CHECK:	andw	$-257,	%ax
+	and	ax, -257
+// CHECK:	andl	$12,	%eax
+	and	eax, 12
+// CHECK:	andl	$-12,	%eax
+	and	eax, -12
+// CHECK:	andl	$257,	%eax
+	and	eax, 257
+// CHECK:	andl	$-257,	%eax
+	and	eax, -257
+// CHECK:	andq	$12,	%rax
+	and	rax, 12
+// CHECK:	andq	$-12,	%rax
+	and	rax, -12
+// CHECK:	andq	$257,	%rax
+	and	rax, 257
+// CHECK:	andq	$-257,	%rax
+	and	rax, -257
+// CHECK:	fld	%st(0)
+	fld	ST(0)
+// CHECK:	movl	%fs:(%rdi), %eax
+        mov     EAX, DWORD PTR FS:[RDI]
+// CHECK:	leal	(,%rdi,4), %r8d
+        lea     R8D, DWORD PTR [4*RDI]
+// CHECK:        movl    _fnan(,%ecx,4), %ecx
+        mov     ECX, DWORD PTR [4*ECX + _fnan]
+// CHECK:       movq    %fs:320, %rax
+        mov     RAX, QWORD PTR FS:[320]
+	ret
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
new file mode 100644
index 0000000..149a9a3
--- /dev/null
+++ b/test/MC/X86/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 577ac40..6c27b85 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -18408,6 +18408,9 @@
 // CHECK: 	vmcall
         	vmcall
 
+// CHECK: 	vmfunc
+        	vmfunc
+
 // CHECK: 	vmclear	3735928559(%ebx,%ecx,8)
         	vmclear	0xdeadbeef(%ebx,%ecx,8)
 
@@ -18465,6 +18468,30 @@
 // CHECK: 	vmxon	305419896
         	vmxon	0x12345678
 
+// CHECK: 	vmrun %eax
+        	vmrun %eax
+
+// CHECK: 	vmmcall
+        	vmmcall
+
+// CHECK: 	vmload %eax
+        	vmload %eax
+
+// CHECK: 	vmsave %eax
+        	vmsave %eax
+
+// CHECK: 	stgi
+        	stgi
+
+// CHECK: 	clgi
+        	clgi
+
+// CHECK: 	skinit %eax
+        	skinit %eax
+
+// CHECK: 	invlpga %ecx, %eax
+        	invlpga %ecx, %eax
+
 // CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %mm3
         	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
 
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
index 19f1445..8e11aec 100644
--- a/test/MC/X86/x86-32.s
+++ b/test/MC/X86/x86-32.s
@@ -28,6 +28,9 @@
 	vmcall
 // CHECK: vmcall
 // CHECK: encoding: [0x0f,0x01,0xc1]
+	vmfunc
+// CHECK: vmfunc
+// CHECK: encoding: [0x0f,0x01,0xd4]
 	vmlaunch
 // CHECK: vmlaunch
 // CHECK: encoding: [0x0f,0x01,0xc2]
@@ -41,7 +44,32 @@
 // CHECK: swapgs
 // CHECK: encoding: [0x0f,0x01,0xf8]
 
-rdtscp
+	vmrun %eax
+// CHECK: vmrun %eax
+// CHECK: encoding: [0x0f,0x01,0xd8]
+	vmmcall
+// CHECK: vmmcall
+// CHECK: encoding: [0x0f,0x01,0xd9]
+	vmload %eax
+// CHECK: vmload %eax
+// CHECK: encoding: [0x0f,0x01,0xda]
+	vmsave %eax
+// CHECK: vmsave %eax
+// CHECK: encoding: [0x0f,0x01,0xdb]
+	stgi
+// CHECK: stgi
+// CHECK: encoding: [0x0f,0x01,0xdc]
+	clgi
+// CHECK: clgi
+// CHECK: encoding: [0x0f,0x01,0xdd]
+	skinit %eax
+// CHECK: skinit %eax
+// CHECK: encoding: [0x0f,0x01,0xde]
+	invlpga %ecx, %eax
+// CHECK: invlpga %ecx, %eax
+// CHECK: encoding: [0x0f,0x01,0xdf]
+
+	rdtscp
 // CHECK: rdtscp
 // CHECK:  encoding: [0x0f,0x01,0xf9]
 
@@ -69,9 +97,9 @@ rdtscp
         sal $1, %eax
 
 // moffset forms of moves, rdar://7947184
-movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,A,A,A,A]
-movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,A,A,A,A]
-movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,A,A,A,A]
+movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,0x00,0x00,0x00,0x00]
+movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,0x00,0x00,0x00,0x00]
+movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,0x00,0x00,0x00,0x00]
 
 // rdar://7973775
 into
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 4ec579a..d5e1b9c 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -339,15 +339,20 @@ rclb	$1, %bl   // CHECK: rclb %bl     # encoding: [0xd0,0xd3]
 rclb	$2, %bl   // CHECK: rclb $2, %bl # encoding: [0xc0,0xd3,0x02]
 
 // rdar://8418316
+// PR12173
+// CHECK: shldw	%cl, %bx, %bx
+// CHECK: shldw	%cl, %bx, %bx
 // CHECK: shldw	$1, %bx, %bx
-// CHECK: shldw	$1, %bx, %bx
-// CHECK: shrdw	$1, %bx, %bx
+// CHECK: shrdw	%cl, %bx, %bx
+// CHECK: shrdw	%cl, %bx, %bx
 // CHECK: shrdw	$1, %bx, %bx
 
-shld	%bx,%bx
-shld	$1, %bx,%bx
-shrd	%bx,%bx
-shrd	$1, %bx,%bx
+shld  %bx, %bx
+shld  %cl, %bx, %bx
+shld  $1, %bx, %bx
+shrd  %bx, %bx
+shrd  %cl, %bx, %bx
+shrd  $1, %bx, %bx
 
 // CHECK: sldtl	%ecx
 // CHECK: encoding: [0x0f,0x00,0xc1]
diff --git a/test/Makefile b/test/Makefile
index 1bf2874..a4e53f8 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -169,6 +169,8 @@ lit.site.cfg: site.exp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
+	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
diff --git a/test/Object/Inputs/shared-object-test.elf-i386 b/test/Object/Inputs/shared-object-test.elf-i386
new file mode 100644
index 0000000..fb63915
--- /dev/null
+++ b/test/Object/Inputs/shared-object-test.elf-i386
diff --git a/test/Object/Inputs/shared-object-test.elf-x86-64 b/test/Object/Inputs/shared-object-test.elf-x86-64
new file mode 100644
index 0000000..92667f5
--- /dev/null
+++ b/test/Object/Inputs/shared-object-test.elf-x86-64
diff --git a/test/Object/Inputs/shared.ll b/test/Object/Inputs/shared.ll
new file mode 100644
index 0000000..1a62d56
--- /dev/null
+++ b/test/Object/Inputs/shared.ll
@@ -0,0 +1,33 @@
+; How to make the shared objects from this file:
+;
+; LDARGS="--unresolved-symbols=ignore-all -soname=libfoo.so --no-as-needed -lc -lm"
+;
+; X86-32 ELF:
+;   llc -mtriple=i386-linux-gnu shared.ll -filetype=obj -o tmp32.o -relocation-model=pic
+;   ld -melf_i386 -shared tmp32.o -o shared-object-test.elf-i386 $LDARGS
+;
+; X86-64 ELF:
+;   llc -mtriple=x86_64-linux-gnu shared.ll -filetype=obj -o tmp64.o -relocation-model=pic
+;   ld -melf_x86_64 -shared tmp64.o -o shared-object-test.elf-x86-64 $LDARGS
+
+@defined_sym = global i32 1, align 4
+
+@tls_sym = thread_local global i32 2, align 4
+
+@undef_sym = external global i32
+
+@undef_tls_sym = external thread_local global i32
+
+@common_sym = common global i32 0, align 4
+
+define i32 @global_func() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+declare i32 @undef_func(...)
+
+define internal i32 @local_func() nounwind uwtable {
+entry:
+  ret i32 0
+}
diff --git a/test/Object/X86/dg.exp b/test/Object/X86/dg.exp
deleted file mode 100644
index 6a91de7..0000000
--- a/test/Object/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{test}]]
-}
diff --git a/test/Object/X86/lit.local.cfg b/test/Object/X86/lit.local.cfg
new file mode 100644
index 0000000..bbffb84
--- /dev/null
+++ b/test/Object/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.test']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Object/dg.exp b/test/Object/dg.exp
deleted file mode 100644
index be82c51..0000000
--- a/test/Object/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{test}]]
diff --git a/test/Object/lit.local.cfg b/test/Object/lit.local.cfg
new file mode 100644
index 0000000..df9b335
--- /dev/null
+++ b/test/Object/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.test']
diff --git a/test/Object/nm-shared-object.test b/test/Object/nm-shared-object.test
new file mode 100644
index 0000000..b361df5
--- /dev/null
+++ b/test/Object/nm-shared-object.test
@@ -0,0 +1,15 @@
+RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+
+; Note: tls_sym should be 'D' (not '?'), but TLS is not
+; yet recognized by ObjectFile.
+
+ELF: {{[0-9a-f]+}} A __bss_start
+ELF: {{[0-9a-f]+}} A _edata
+ELF: {{[0-9a-f]+}} A _end
+ELF: {{[0-9a-f]+}} B common_sym
+ELF: {{[0-9a-f]+}} D defined_sym
+ELF: {{[0-9a-f]+}} T global_func
+ELF:               ? tls_sym
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
new file mode 100644
index 0000000..3b5457c
--- /dev/null
+++ b/test/Object/readobj-shared-object.test
@@ -0,0 +1,59 @@
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF32
+
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF64
+
+ELF64:File Format : ELF64-x86-64
+ELF64:Arch        : x86_64
+ELF64:Address Size: 64 bits
+ELF64:Load Name   : libfoo.so
+
+ELF32:File Format : ELF32-i386
+ELF32:Arch        : i386
+ELF32:Address Size: 32 bits
+ELF32:Load Name   : libfoo.so
+
+ELF:Symbols:
+ELF:  .dynsym                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .dynstr                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .text                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .eh_frame              DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .tdata                 DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .dynamic               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .got.plt               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .data                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .bss                   DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  shared.ll              FILE            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute,formatspecific
+ELF:  local_func             FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}
+ELF:  _GLOBAL_OFFSET_TABLE_  DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
+ELF:  _DYNAMIC               DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
+ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
+ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  Total: 21
+
+ELF:Dynamic Symbols:
+ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
+ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  Total: {{[0-9a-f]+}}
+
+ELF:Libraries needed:
+ELF:  libc.so.6
+ELF:  libm.so.6
+ELF:  Total: 2
+
+
diff --git a/test/Other/2009-03-31-CallGraph.ll b/test/Other/2009-03-31-CallGraph.ll
index d6653ec..864903c 100644
--- a/test/Other/2009-03-31-CallGraph.ll
+++ b/test/Other/2009-03-31-CallGraph.ll
@@ -15,6 +15,8 @@ ok2:
     unreachable
 
 lpad2:
+    %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
     unreachable
 }
 
@@ -29,3 +31,4 @@ declare void @f6() nounwind
 
 declare void @f8()
 
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Other/X86/dg.exp b/test/Other/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Other/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Other/X86/lit.local.cfg b/test/Other/X86/lit.local.cfg
new file mode 100644
index 0000000..84bd88c
--- /dev/null
+++ b/test/Other/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Other/dg.exp b/test/Other/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Other/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Other/lit.local.cfg b/test/Other/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Other/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/TableGen/ForeachList.td b/test/TableGen/ForeachList.td
new file mode 100644
index 0000000..99b7e14
--- /dev/null
+++ b/test/TableGen/ForeachList.td
@@ -0,0 +1,76 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Register<string name, int idx> {
+  string Name = name;
+  int Index = idx;
+}
+
+foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in {
+  def R#i : Register<"R"#i, i>;
+  def F#i : Register<"F"#i, i>;
+}
+
+// CHECK: def F0
+// CHECK: string Name = "F0";
+// CHECK: int Index = 0;
+
+// CHECK: def F1
+// CHECK: string Name = "F1";
+// CHECK: int Index = 1;
+
+// CHECK: def F2
+// CHECK: string Name = "F2";
+// CHECK: int Index = 2;
+
+// CHECK: def F3
+// CHECK: string Name = "F3";
+// CHECK: int Index = 3;
+
+// CHECK: def F4
+// CHECK: string Name = "F4";
+// CHECK: int Index = 4;
+
+// CHECK: def F5
+// CHECK: string Name = "F5";
+// CHECK: int Index = 5;
+
+// CHECK: def F6
+// CHECK: string Name = "F6";
+// CHECK: int Index = 6;
+
+// CHECK: def F7
+// CHECK: string Name = "F7";
+// CHECK: int Index = 7;
+
+// CHECK: def R0
+// CHECK: string Name = "R0";
+// CHECK: int Index = 0;
+
+// CHECK: def R1
+// CHECK: string Name = "R1";
+// CHECK: int Index = 1;
+
+// CHECK: def R2
+// CHECK: string Name = "R2";
+// CHECK: int Index = 2;
+
+// CHECK: def R3
+// CHECK: string Name = "R3";
+// CHECK: int Index = 3;
+
+// CHECK: def R4
+// CHECK: string Name = "R4";
+// CHECK: int Index = 4;
+
+// CHECK: def R5
+// CHECK: string Name = "R5";
+// CHECK: int Index = 5;
+
+// CHECK: def R6
+// CHECK: string Name = "R6";
+// CHECK: int Index = 6;
+
+// CHECK: def R7
+// CHECK: string Name = "R7";
+// CHECK: int Index = 7;
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
new file mode 100644
index 0000000..e2defe9
--- /dev/null
+++ b/test/TableGen/ForeachLoop.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Register<string name, int idx> {
+  string Name = name;
+  int Index = idx;
+}
+
+foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in
+  def R#i : Register<"R"#i, i>;
+
+
+// CHECK: def R0
+// CHECK: string Name = "R0";
+// CHECK: int Index = 0;
+
+// CHECK: def R1
+// CHECK: string Name = "R1";
+// CHECK: int Index = 1;
+
+// CHECK: def R2
+// CHECK: string Name = "R2";
+// CHECK: int Index = 2;
+
+// CHECK: def R3
+// CHECK: string Name = "R3";
+// CHECK: int Index = 3;
+
+// CHECK: def R4
+// CHECK: string Name = "R4";
+// CHECK: int Index = 4;
+
+// CHECK: def R5
+// CHECK: string Name = "R5";
+// CHECK: int Index = 5;
+
+// CHECK: def R6
+// CHECK: string Name = "R6";
+// CHECK: int Index = 6;
+
+// CHECK: def R7
+// CHECK: string Name = "R7";
+// CHECK: int Index = 7;
diff --git a/test/TableGen/NestedForeach.td b/test/TableGen/NestedForeach.td
new file mode 100644
index 0000000..e8c16f7
--- /dev/null
+++ b/test/TableGen/NestedForeach.td
@@ -0,0 +1,74 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Droid<string series, int release, string model, int patchlevel> {
+  string Series = series;
+  int Release = release;
+  string Model = model;
+  int Patchlevel = patchlevel;
+}
+
+foreach S = ["R", "C"] in {
+  foreach R = [2, 3, 4] in {
+    foreach M = ["D", "P", "Q"] in {
+      foreach P = [0, 2, 4] in {
+        def S#R#M#P : Droid<S, R, M, P>;
+      }
+    }
+  }
+}
+
+// CHECK: def C2D0
+// CHECK: def C2D2
+// CHECK: def C2D4
+// CHECK: def C2P0
+// CHECK: def C2P2
+// CHECK: def C2P4
+// CHECK: def C2Q0
+// CHECK: def C2Q2
+// CHECK: def C2Q4
+// CHECK: def C3D0
+// CHECK: def C3D2
+// CHECK: def C3D4
+// CHECK: def C3P0
+// CHECK: def C3P2
+// CHECK: def C3P4
+// CHECK: def C3Q0
+// CHECK: def C3Q2
+// CHECK: def C3Q4
+// CHECK: def C4D0
+// CHECK: def C4D2
+// CHECK: def C4D4
+// CHECK: def C4P0
+// CHECK: def C4P2
+// CHECK: def C4P4
+// CHECK: def C4Q0
+// CHECK: def C4Q2
+// CHECK: def C4Q4
+// CHECK: def R2D0
+// CHECK: def R2D2
+// CHECK: def R2D4
+// CHECK: def R2P0
+// CHECK: def R2P2
+// CHECK: def R2P4
+// CHECK: def R2Q0
+// CHECK: def R2Q2
+// CHECK: def R2Q4
+// CHECK: def R3D0
+// CHECK: def R3D2
+// CHECK: def R3D4
+// CHECK: def R3P0
+// CHECK: def R3P2
+// CHECK: def R3P4
+// CHECK: def R3Q0
+// CHECK: def R3Q2
+// CHECK: def R3Q4
+// CHECK: def R4D0
+// CHECK: def R4D2
+// CHECK: def R4D4
+// CHECK: def R4P0
+// CHECK: def R4P2
+// CHECK: def R4P4
+// CHECK: def R4Q0
+// CHECK: def R4Q2
+// CHECK: def R4Q4
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
index a4acea9..4d85aa3 100644
--- a/test/TableGen/SetTheory.td
+++ b/test/TableGen/SetTheory.td
@@ -165,3 +165,10 @@ def S9d : Set<(sequence "S%ua", 7, 9)>;
 // CHECK: S9b = [ e7 e6 e5 e4 e3 ]
 // CHECK: S9c = [ e0 ]
 // CHECK: S9d = [ a b c d e0 e3 e6 e9 e4 e5 e7 ]
+
+// The 'interleave' operator is almost the inverse of 'decimate'.
+def interleave;
+def T0a : Set<(interleave S9a, S9b)>;
+def T0b : Set<(interleave S8e, S8d)>;
+// CHECK: T0a = [ e3 e7 e4 e6 e5 ]
+// CHECK: T0b = [ e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ]
diff --git a/test/TableGen/SiblingForeach.td b/test/TableGen/SiblingForeach.td
new file mode 100644
index 0000000..a11f6f8
--- /dev/null
+++ b/test/TableGen/SiblingForeach.td
@@ -0,0 +1,277 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Set<int i = 0, int j = 0, int k = 0> {
+  int I = i;
+  int J = j;
+  int K = k;
+}
+
+foreach i = [1, 2, 3] in {
+  def I1_#i : Set<i>;
+  foreach j = [1, 2, 3] in {
+    def I1_#i#_J1_#j : Set<i, j>;
+  }
+  def I2_#i : Set<i>;
+  foreach j = [4, 5, 6] in {
+    foreach k = [1, 2, 3] in {
+      def I3_#i#_J2_#j#_K1_#k : Set<i, j, k>;
+    }
+    def I4_#i#_J3_#j : Set<i, j>;
+  }
+}
+
+// CHECK: def I1_1
+// CHECK: int I = 1;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_1
+// CHECK: int I = 1;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_2
+// CHECK: int I = 1;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_3
+// CHECK: int I = 1;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2
+// CHECK: int I = 2;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_1
+// CHECK: int I = 2;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_2
+// CHECK: int I = 2;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_3
+// CHECK: int I = 2;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3
+// CHECK: int I = 3;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_1
+// CHECK: int I = 3;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_2
+// CHECK: int I = 3;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_3
+// CHECK: int I = 3;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I2_1
+// CHECK: int I = 1;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I2_2
+// CHECK: int I = 2;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I2_3
+// CHECK: int I = 3;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I3_1_J2_4_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_4_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_4_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_1_J2_5_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_5_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_5_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_1_J2_6_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_6_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_6_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_4_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_4_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_4_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_5_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_5_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_5_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_6_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_6_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_6_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_4_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_4_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_4_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_5_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_5_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_5_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_6_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_6_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_6_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I4_1_J3_4
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_1_J3_5
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_1_J3_6
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_4
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_5
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_6
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_4
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_5
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_6
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
diff --git a/test/TableGen/dg.exp b/test/TableGen/dg.exp
deleted file mode 100644
index f7d275a..0000000
--- a/test/TableGen/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{td}]]
diff --git a/test/TableGen/lit.local.cfg b/test/TableGen/lit.local.cfg
new file mode 100644
index 0000000..9a4a014
--- /dev/null
+++ b/test/TableGen/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.td']
diff --git a/test/Transforms/ADCE/dg.exp b/test/Transforms/ADCE/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ADCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ADCE/lit.local.cfg b/test/Transforms/ADCE/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ADCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/dg.exp b/test/Transforms/ArgumentPromotion/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ArgumentPromotion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ArgumentPromotion/lit.local.cfg b/test/Transforms/ArgumentPromotion/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
new file mode 100644
index 0000000..32a91ce
--- /dev/null
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -0,0 +1,112 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
+; want to select the pairs:
+; %div77 = fdiv double %sub74, %mul76.v.r1 <->   %div125 = fdiv double %mul121, %mul76.v.r2 (div125 depends on mul117)
+; %add84 = fadd double %sub83, 2.000000e+00 <->   %add127 = fadd double %mul126, 1.000000e+00 (add127 depends on div77)
+; %mul95 = fmul double %sub45.v.r1, %sub36.v.r1 <->   %mul88 = fmul double %sub36.v.r1, %sub87 (mul88 depends on add84)
+; %mul117 = fmul double %sub39.v.r1, %sub116 <->   %mul97 = fmul double %mul96, %sub39.v.r1 (mul97 depends on mul95)
+; and so a dependency cycle would be created.
+
+declare double @fabs(double) nounwind readnone
+define void @test1(double %a, double %b, double %c, double %add80, double %mul1, double %mul2.v.r1, double %mul73, double %sub, double %sub65, double %F.0, i32 %n.0, double %Bnm3.0, double %Bnm2.0, double %Bnm1.0, double %Anm3.0, double %Anm2.0, double %Anm1.0) {
+entry:
+  br label %go
+go:
+  %conv = sitofp i32 %n.0 to double
+  %add35 = fadd double %conv, %a
+  %sub36 = fadd double %add35, -1.000000e+00
+  %add38 = fadd double %conv, %b
+  %sub39 = fadd double %add38, -1.000000e+00
+  %add41 = fadd double %conv, %c
+  %sub42 = fadd double %add41, -1.000000e+00
+  %sub45 = fadd double %add35, -2.000000e+00
+  %sub48 = fadd double %add38, -2.000000e+00
+  %sub51 = fadd double %add41, -2.000000e+00
+  %mul52 = shl nsw i32 %n.0, 1
+  %sub53 = add nsw i32 %mul52, -1
+  %conv54 = sitofp i32 %sub53 to double
+  %sub56 = add nsw i32 %mul52, -3
+  %conv57 = sitofp i32 %sub56 to double
+  %sub59 = add nsw i32 %mul52, -5
+  %conv60 = sitofp i32 %sub59 to double
+  %mul61 = mul nsw i32 %n.0, %n.0
+  %conv62 = sitofp i32 %mul61 to double
+  %mul63 = fmul double %conv62, 3.000000e+00
+  %mul67 = fmul double %sub65, %conv
+  %add68 = fadd double %mul63, %mul67
+  %add69 = fadd double %add68, 2.000000e+00
+  %sub71 = fsub double %add69, %mul2.v.r1
+  %sub74 = fsub double %sub71, %mul73
+  %mul75 = fmul double %conv57, 2.000000e+00
+  %mul76 = fmul double %mul75, %sub42
+  %div77 = fdiv double %sub74, %mul76
+  %mul82 = fmul double %add80, %conv
+  %sub83 = fsub double %mul63, %mul82
+  %add84 = fadd double %sub83, 2.000000e+00
+  %sub86 = fsub double %add84, %mul2.v.r1
+  %sub87 = fsub double -0.000000e+00, %sub86
+  %mul88 = fmul double %sub36, %sub87
+  %mul89 = fmul double %mul88, %sub39
+  %mul90 = fmul double %conv54, 4.000000e+00
+  %mul91 = fmul double %mul90, %conv57
+  %mul92 = fmul double %mul91, %sub51
+  %mul93 = fmul double %mul92, %sub42
+  %div94 = fdiv double %mul89, %mul93
+  %mul95 = fmul double %sub45, %sub36
+  %mul96 = fmul double %mul95, %sub48
+  %mul97 = fmul double %mul96, %sub39
+  %sub99 = fsub double %conv, %a
+  %sub100 = fadd double %sub99, -2.000000e+00
+  %mul101 = fmul double %mul97, %sub100
+  %sub103 = fsub double %conv, %b
+  %sub104 = fadd double %sub103, -2.000000e+00
+  %mul105 = fmul double %mul101, %sub104
+  %mul106 = fmul double %conv57, 8.000000e+00
+  %mul107 = fmul double %mul106, %conv57
+  %mul108 = fmul double %mul107, %conv60
+  %sub111 = fadd double %add41, -3.000000e+00
+  %mul112 = fmul double %mul108, %sub111
+  %mul113 = fmul double %mul112, %sub51
+  %mul114 = fmul double %mul113, %sub42
+  %div115 = fdiv double %mul105, %mul114
+  %sub116 = fsub double -0.000000e+00, %sub36
+  %mul117 = fmul double %sub39, %sub116
+  %sub119 = fsub double %conv, %c
+  %sub120 = fadd double %sub119, -1.000000e+00
+  %mul121 = fmul double %mul117, %sub120
+  %mul123 = fmul double %mul75, %sub51
+  %mul124 = fmul double %mul123, %sub42
+  %div125 = fdiv double %mul121, %mul124
+  %mul126 = fmul double %div77, %sub
+  %add127 = fadd double %mul126, 1.000000e+00
+  %mul128 = fmul double %add127, %Anm1.0
+  %mul129 = fmul double %div94, %sub
+  %add130 = fadd double %div125, %mul129
+  %mul131 = fmul double %add130, %sub
+  %mul132 = fmul double %mul131, %Anm2.0
+  %add133 = fadd double %mul128, %mul132
+  %mul134 = fmul double %div115, %mul1
+  %mul135 = fmul double %mul134, %Anm3.0
+  %add136 = fadd double %add133, %mul135
+  %mul139 = fmul double %add127, %Bnm1.0
+  %mul143 = fmul double %mul131, %Bnm2.0
+  %add144 = fadd double %mul139, %mul143
+  %mul146 = fmul double %mul134, %Bnm3.0
+  %add147 = fadd double %add144, %mul146
+  %div148 = fdiv double %add136, %add147
+  %sub149 = fsub double %F.0, %div148
+  %div150 = fdiv double %sub149, %F.0
+  %call = tail call double @fabs(double %div150) nounwind readnone
+  %cmp = fcmp olt double %call, 0x3CB0000000000000
+  %cmp152 = icmp sgt i32 %n.0, 20000
+  %or.cond = or i1 %cmp, %cmp152
+  br i1 %or.cond, label %done, label %go
+done:
+  ret void
+; CHECK: @test1
+; CHECK: go:
+; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
+; FIXME: When tree pruning is deterministic, include the entire output.
+}
diff --git a/test/Transforms/BBVectorize/func-alias.ll b/test/Transforms/BBVectorize/func-alias.ll
new file mode 100644
index 0000000..9d0cc07
--- /dev/null
+++ b/test/Transforms/BBVectorize/func-alias.ll
@@ -0,0 +1,244 @@
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
+; The chain length is set to 2 so that this will do some vectorization; check that the order of the functions is unchanged.
+
+%struct.descriptor_dimension = type { i64, i64, i64 }
+%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* }
+%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%"struct.array4_real(kind=4)" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%"struct.array4_integer(kind=4).73" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%struct.array4_unknown = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+
+@.cst4 = external unnamed_addr constant [11 x i8], align 8
+@.cst823 = external unnamed_addr constant [214 x i8], align 64
+@j.4580 = external global i32
+@j1.4581 = external global i32
+@nty1.4590 = external global [2 x i8]
+@nty2.4591 = external global [2 x i8]
+@xr1.4592 = external global float
+@xr2.4593 = external global float
+@yr1.4594 = external global float
+@yr2.4595 = external global float
+
+@__main1_MOD_iave = external unnamed_addr global i32
+@__main1_MOD_igrp = external global i32
+@__main1_MOD_iounit = external global i32
+@__main1_MOD_ityp = external global i32
+@__main1_MOD_mclmsg = external unnamed_addr global %struct.array4_unknown, align 32
+@__main1_MOD_mxdate = external unnamed_addr global %"struct.array4_integer(kind=4).73", align 32
+@__main1_MOD_rmxval = external unnamed_addr global %"struct.array4_real(kind=4)", align 32
+
+declare void @_gfortran_st_write(%struct.__st_parameter_dt*)
+declare void @_gfortran_st_write_done(%struct.__st_parameter_dt*)
+declare void @_gfortran_transfer_character_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_integer_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_real_write(%struct.__st_parameter_dt*, i8*, i32)
+
+define i1 @"prtmax__<bb 3>_<bb 34>"(%struct.__st_parameter_dt* %memtmp3, i32 %D.4627_188.reload) nounwind {
+; CHECK: prtmax__
+newFuncRoot:
+  br label %"<bb 34>"
+
+codeRepl80.exitStub:                              ; preds = %"<bb 34>"
+  ret i1 true
+
+"<bb 34>.<bb 25>_crit_edge.exitStub":             ; preds = %"<bb 34>"
+  ret i1 false
+
+"<bb 34>":                                        ; preds = %newFuncRoot
+  %tmp128 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp129 = getelementptr inbounds %struct.__st_parameter_common* %tmp128, i32 0, i32 2
+  store i8* getelementptr inbounds ([11 x i8]* @.cst4, i64 0, i64 0), i8** %tmp129, align 8
+  %tmp130 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp131 = getelementptr inbounds %struct.__st_parameter_common* %tmp130, i32 0, i32 3
+  store i32 31495, i32* %tmp131, align 4
+  %tmp132 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 5
+  store i8* getelementptr inbounds ([214 x i8]* @.cst823, i64 0, i64 0), i8** %tmp132, align 8
+  %tmp133 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 6
+  store i32 214, i32* %tmp133, align 4
+  %tmp134 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp135 = getelementptr inbounds %struct.__st_parameter_common* %tmp134, i32 0, i32 0
+  store i32 4096, i32* %tmp135, align 4
+  %iounit.8748_288 = load i32* @__main1_MOD_iounit, align 4
+  %tmp136 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp137 = getelementptr inbounds %struct.__st_parameter_common* %tmp136, i32 0, i32 1
+  store i32 %iounit.8748_288, i32* %tmp137, align 4
+  call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_289 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j.8758_290 = load i32* @j.4580, align 4
+  %D.75760_291 = sext i32 %j.8758_290 to i64
+  %iave.8736_292 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_293 = sext i32 %iave.8736_292 to i64
+  %D.75808_294 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
+  %igrp.8737_296 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_297 = sext i32 %igrp.8737_296 to i64
+  %D.75810_298 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
+  %D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
+  %D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
+  %ityp.8750_302 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_303 = sext i32 %ityp.8750_302 to i64
+  %D.75814_304 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
+  %D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
+  %D.75817_307 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
+  %tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
+  %tmp139 = bitcast [0 x float]* %tmp138 to float*
+  %D.75819_309 = getelementptr inbounds float* %tmp139, i64 %D.75818_308
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_310 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j.8758_311 = load i32* @j.4580, align 4
+  %D.75760_312 = sext i32 %j.8758_311 to i64
+  %iave.8736_313 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_314 = sext i32 %iave.8736_313 to i64
+  %D.75821_315 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
+  %igrp.8737_317 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_318 = sext i32 %igrp.8737_317 to i64
+  %D.75823_319 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
+  %D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
+  %D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
+  %ityp.8750_323 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_324 = sext i32 %ityp.8750_323 to i64
+  %D.75827_325 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
+  %D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
+  %D.75830_328 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
+  %tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
+  %tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
+  %D.75832_330 = getelementptr inbounds [1 x i8]* %tmp141, i64 %D.75831_329
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_331 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j.8758_332 = load i32* @j.4580, align 4
+  %D.75760_333 = sext i32 %j.8758_332 to i64
+  %iave.8736_334 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_335 = sext i32 %iave.8736_334 to i64
+  %D.75834_336 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
+  %igrp.8737_338 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_339 = sext i32 %igrp.8737_338 to i64
+  %D.75836_340 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
+  %D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
+  %D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
+  %ityp.8750_344 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_345 = sext i32 %ityp.8750_344 to i64
+  %D.75840_346 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
+  %D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
+  %D.75843_349 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
+  %tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
+  %tmp143 = bitcast [0 x i32]* %tmp142 to i32*
+  %D.75845_351 = getelementptr inbounds i32* %tmp143, i64 %D.75844_350
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75845_351, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr1.4592, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr1.4594, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty1.4590, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_352 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j1.8760_353 = load i32* @j1.4581, align 4
+  %D.75773_354 = sext i32 %j1.8760_353 to i64
+  %iave.8736_355 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_356 = sext i32 %iave.8736_355 to i64
+  %D.75808_357 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
+  %igrp.8737_359 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_360 = sext i32 %igrp.8737_359 to i64
+  %D.75810_361 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
+  %D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
+  %D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
+  %ityp.8750_365 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_366 = sext i32 %ityp.8750_365 to i64
+  %D.75814_367 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
+  %D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
+  %D.75817_370 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
+  %tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
+  %tmp145 = bitcast [0 x float]* %tmp144 to float*
+  %D.75849_372 = getelementptr inbounds float* %tmp145, i64 %D.75848_371
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_373 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j1.8760_374 = load i32* @j1.4581, align 4
+  %D.75773_375 = sext i32 %j1.8760_374 to i64
+  %iave.8736_376 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_377 = sext i32 %iave.8736_376 to i64
+  %D.75821_378 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
+  %igrp.8737_380 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_381 = sext i32 %igrp.8737_380 to i64
+  %D.75823_382 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
+  %D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
+  %D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
+  %ityp.8750_386 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_387 = sext i32 %ityp.8750_386 to i64
+  %D.75827_388 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
+  %D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
+  %D.75830_391 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
+  %tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
+  %tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
+  %D.75853_393 = getelementptr inbounds [1 x i8]* %tmp147, i64 %D.75852_392
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_394 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j1.8760_395 = load i32* @j1.4581, align 4
+  %D.75773_396 = sext i32 %j1.8760_395 to i64
+  %iave.8736_397 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_398 = sext i32 %iave.8736_397 to i64
+  %D.75834_399 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
+  %igrp.8737_401 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_402 = sext i32 %igrp.8737_401 to i64
+  %D.75836_403 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
+  %D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
+  %D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
+  %ityp.8750_407 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_408 = sext i32 %ityp.8750_407 to i64
+  %D.75840_409 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
+  %D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
+  %D.75843_412 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
+  %tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
+  %tmp149 = bitcast [0 x i32]* %tmp148 to i32*
+  %D.75857_414 = getelementptr inbounds i32* %tmp149, i64 %D.75856_413
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75857_414, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr2.4593, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr2.4595, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty2.4591, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
+; CHECK: @_gfortran_st_write_done
+  %j.8758_415 = load i32* @j.4580, align 4
+  %D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
+  %j.8758_417 = load i32* @j.4580, align 4
+  %j.8770_418 = add nsw i32 %j.8758_417, 1
+  store i32 %j.8770_418, i32* @j.4580, align 4
+  %tmp150 = icmp ne i1 %D.4634_416, false
+  br i1 %tmp150, label %codeRepl80.exitStub, label %"<bb 34>.<bb 25>_crit_edge.exitStub"
+}
+
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
new file mode 100644
index 0000000..cea225d
--- /dev/null
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -0,0 +1,41 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %i2 = load double* %c, align 8
+  %add = fadd double %mul, %i2
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %arrayidx6 = getelementptr inbounds double* %c, i64 1
+  %i5 = load double* %arrayidx6, align 8
+  %add7 = fadd double %mul5, %i5
+  %mul9 = fmul double %add, %i1
+  %add11 = fadd double %mul9, %i2
+  %mul13 = fmul double %add7, %i4
+  %add15 = fadd double %mul13, %i5
+  %mul16 = fmul double %add11, %add15
+  ret double %mul16
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %i2 = load <2 x double>* %i2.v.i0, align 8
+; CHECK: %add = fadd <2 x double> %mul, %i2
+; CHECK: %mul9 = fmul <2 x double> %add, %i1
+; CHECK: %add11 = fadd <2 x double> %mul9, %i2
+; CHECK: %add11.v.r1 = extractelement <2 x double> %add11, i32 0
+; CHECK: %add11.v.r2 = extractelement <2 x double> %add11, i32 1
+; CHECK: %mul16 = fmul double %add11.v.r1, %add11.v.r2
+; CHECK: ret double %mul16
+}
+
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
new file mode 100644
index 0000000..bebc91a
--- /dev/null
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -0,0 +1,93 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; The second check covers the use of alias analysis (with loop unrolling).
+
+define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
+entry:
+  br label %for.body
+; CHECK: @test1
+; CHECK-UNRL: @test1
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8
+  %mul = fmul double %0, %0
+  %mul3 = fmul double %0, %1
+  %add = fadd double %mul, %mul3
+  %add4 = fadd double %1, %1
+  %add5 = fadd double %add4, %0
+  %mul6 = fmul double %0, %add5
+  %add7 = fadd double %add, %mul6
+  %mul8 = fmul double %1, %1
+  %add9 = fadd double %0, %0
+  %add10 = fadd double %add9, %0
+  %mul11 = fmul double %mul8, %add10
+  %add12 = fadd double %add7, %mul11
+  %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+  store double %add12, double* %arrayidx14, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10
+  br i1 %exitcond, label %for.end, label %for.body
+; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK: %0 = load double* %arrayidx, align 8
+; CHECK: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK: %1 = load double* %arrayidx2, align 8
+; CHECK: %mul = fmul double %0, %0
+; CHECK: %mul3 = fmul double %0, %1
+; CHECK: %add = fadd double %mul, %mul3
+; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
+; CHECK: %mul8 = fmul double %1, %1
+; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
+; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
+; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0
+; CHECK: %add5.v.i1.2 = insertelement <2 x double> %add5.v.i1.1, double %0, i32 1
+; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
+; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %add5.v.i1.1, double %mul8, i32 1
+; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
+; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
+; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1
+; CHECK: %add7 = fadd double %add, %mul6.v.r1
+; CHECK: %add12 = fadd double %add7, %mul6.v.r2
+; CHECK: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK: store double %add12, double* %arrayidx14, align 8
+; CHECK: %indvars.iv.next = add i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, 10
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK-UNRL: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+; CHECK-UNRL: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK-UNRL: %0 = bitcast double* %arrayidx to <2 x double>*
+; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
+; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK-UNRL: %2 = load <2 x double>* %0, align 8
+; CHECK-UNRL: %3 = load <2 x double>* %1, align 8
+; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
+; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
+; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
+; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
+; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
+; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
+; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
+; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
+; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
+; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
+; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
+; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
+; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
+; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
+; CHECK-UNRL: %indvars.iv.next.1 = add i64 %indvars.iv, 2
+; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
+; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
+; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/BBVectorize/mem-op-depth.ll b/test/Transforms/BBVectorize/mem-op-depth.ll
new file mode 100644
index 0000000..84f16bd
--- /dev/null
+++ b/test/Transforms/BBVectorize/mem-op-depth.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=6 -instcombine -gvn -S | FileCheck %s
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+define i32 @test1() nounwind {
+; CHECK: @test1
+  %V1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %V2 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 1), align 4
+  %V3= load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 2), align 8
+  %V4 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 3), align 4
+; CHECK:   %V1 = load <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
+  store float %V1, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 0), align 16
+  store float %V2, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 1), align 4
+  store float %V3, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 2), align 8
+  store float %V4, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 3), align 4
+; CHECK-NEXT: store <4 x float> %V1, <4 x float>* bitcast ([1024 x float]* @B to <4 x float>*), align 16
+  ret i32 0
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
new file mode 100644
index 0000000..8c9cc3c
--- /dev/null
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -0,0 +1,17 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -S | FileCheck %s -check-prefix=CHECK-RD3
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -S | FileCheck %s -check-prefix=CHECK-RD2
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%R  = fmul double %Y1, %Y2
+	ret double %R
+; CHECK-RD3: @test1
+; CHECK-RD2: @test1
+; CHECK-RD3-NOT: <2 x double>
+; CHECK-RD2: <2 x double>
+}
+
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
new file mode 100644
index 0000000..d9945b5
--- /dev/null
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -0,0 +1,46 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK-SL4: @test1
+; CHECK-SL4-NOT: <2 x double>
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+        ; Here we have a dependency chain: the short search limit will not
+        ; see past this chain and so will not see the second part of the
+        ; pair to vectorize.
+        %mul41 = fmul double %Z1, %Y2
+        %sub48 = fsub double %Z1, %mul41
+        %mul62 = fmul double %Z1, %sub48
+        %sub69 = fsub double %Z1, %mul62
+        %mul83 = fmul double %Z1, %sub69
+        %sub90 = fsub double %Z1, %mul83
+        %mul104 = fmul double %Z1, %sub90
+        %sub111 = fsub double %Z1, %mul104
+        %mul125 = fmul double %Z1, %sub111
+        %sub132 = fsub double %Z1, %mul125
+        %mul146 = fmul double %Z1, %sub132
+        %sub153 = fsub double %Z1, %mul146
+        ; end of chain.
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R1  = fdiv double %Z1, %Z2
+        %R   = fmul double %R1, %sub153
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R1 = fdiv double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
new file mode 100644
index 0000000..b2ef27b
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -0,0 +1,59 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
+; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
+
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
new file mode 100644
index 0000000..a5397ee
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -0,0 +1,110 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+
+; Simple 3-pair chain with loads and stores
+define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test1
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with extending loads and stores
+define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0f = load float* %a, align 4
+  %i0 = fpext float %i0f to double
+  %i1f = load float* %b, align 4
+  %i1 = fpext float %i1f to double
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds float* %a, i64 1
+  %i3f = load float* %arrayidx3, align 4
+  %i3 = fpext float %i3f to double
+  %arrayidx4 = getelementptr inbounds float* %b, i64 1
+  %i4f = load float* %arrayidx4, align 4
+  %i4 = fpext float %i4f to double
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test2
+; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
+; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
+; CHECK: %i0f = load <2 x float>* %i0f.v.i0, align 4
+; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
+; CHECK: %i1f = load <2 x float>* %i1f.v.i0, align 4
+; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test2
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with loads and truncating stores
+define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %mulf = fptrunc double %mul to float
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %mul5f = fptrunc double %mul5 to float
+  store float %mulf, float* %c, align 8
+  %arrayidx5 = getelementptr inbounds float* %c, i64 1
+  store float %mul5f, float* %arrayidx5, align 4
+  ret void
+; CHECK: @test3
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK: %0 = bitcast float* %c to <2 x float>*
+; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test3
+; CHECK-AO: %i0 = load double* %a, align 8
+; CHECK-AO: %i1 = load double* %b, align 8
+; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
+; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
+; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1
+; CHECK-AO: %i3 = load double* %arrayidx3, align 8
+; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1
+; CHECK-AO: %i4 = load double* %arrayidx4, align 8
+; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
+; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
+; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
+; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK-AO: %0 = bitcast float* %c to <2 x float>*
+; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK-AO: ret void
+}
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
new file mode 100644
index 0000000..904d766
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -0,0 +1,152 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair permuted)
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair first splat)
+define double @test3(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test3
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair second splat)
+define double @test4(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test4
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain
+define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
+; CHECK: @test5
+; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	%X1 = fsub <2 x float> %A1, %B1
+	%X2 = fsub <2 x float> %A2, %B2
+; CHECK: %X1 = fsub <4 x float> %X1.v.i0, %X1.v.i1
+	%Y1 = fmul <2 x float> %X1, %A1
+	%Y2 = fmul <2 x float> %X2, %A2
+; CHECK: %Y1 = fmul <4 x float> %X1, %X1.v.i0
+	%Z1 = fadd <2 x float> %Y1, %B1
+	%Z2 = fadd <2 x float> %Y2, %B2
+; CHECK: %Z1 = fadd <4 x float> %Y1, %X1.v.i1
+	%R  = fmul <2 x float> %Z1, %Z2
+; CHECK: %Z1.v.r1 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK: %R = fmul <2 x float> %Z1.v.r1, %Z1.v.r2
+	ret <2 x float> %R
+; CHECK: ret <2 x float> %R
+}
+
+; Basic chain with shuffles
+define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
+; CHECK: @test6
+; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	%X1 = sub <8 x i8> %A1, %B1
+	%X2 = sub <8 x i8> %A2, %B2
+; CHECK: %X1 = sub <16 x i8> %X1.v.i0, %X1.v.i1
+	%Y1 = mul <8 x i8> %X1, %A1
+	%Y2 = mul <8 x i8> %X2, %A2
+; CHECK: %Y1 = mul <16 x i8> %X1, %X1.v.i0
+	%Z1 = add <8 x i8> %Y1, %B1
+	%Z2 = add <8 x i8> %Y2, %B2
+; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1
+        %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
+        %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15>
+; CHECK: %Q1.v.i1 = shufflevector <8 x i8> %Z1.v.r2, <8 x i8> undef, <16 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
+	%R  = mul <8 x i8> %Q1, %Q2
+; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2
+	ret <8 x i8> %R
+; CHECK: ret <8 x i8> %R
+}
+
+
diff --git a/test/Transforms/BlockPlacement/dg.exp b/test/Transforms/BlockPlacement/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/BlockPlacement/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/BlockPlacement/lit.local.cfg b/test/Transforms/BlockPlacement/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/BlockPlacement/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeExtractor/dg.exp b/test/Transforms/CodeExtractor/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/CodeExtractor/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/CodeExtractor/lit.local.cfg b/test/Transforms/CodeExtractor/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/CodeExtractor/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeGenPrepare/dg.exp b/test/Transforms/CodeGenPrepare/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/CodeGenPrepare/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CodeGenPrepare/lit.local.cfg b/test/Transforms/CodeGenPrepare/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index d0d0a5b..09e6e7d 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -1,5 +1,8 @@
 ; RUN: opt < %s -constprop -die -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
 ; This is a basic sanity check for constant propagation.  The add instruction
 ; should be eliminated.
 define i32 @test1(i1 %B) {
@@ -40,3 +43,11 @@ define i1 @TNAN() {
   %C = or i1 %A, %B
   ret i1 %C
 }
+
+define i128 @vector_to_int_cast() {
+  %A = bitcast <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824> to i128
+  ret i128 %A
+; CHECK: @vector_to_int_cast
+; CHECK: ret i128 85070591750041656499021422275829170176
+}
+  
+\ No newline at end of file
diff --git a/test/Transforms/ConstProp/dg.exp b/test/Transforms/ConstProp/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ConstProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstProp/lit.local.cfg b/test/Transforms/ConstProp/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ConstProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ConstantMerge/dg.exp b/test/Transforms/ConstantMerge/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ConstantMerge/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstantMerge/linker-private.ll b/test/Transforms/ConstantMerge/linker-private.ll
new file mode 100644
index 0000000..eba7880
--- /dev/null
+++ b/test/Transforms/ConstantMerge/linker-private.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -constmerge -S | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; CHECK: @.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/Transforms/ConstantMerge/lit.local.cfg b/test/Transforms/ConstantMerge/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ConstantMerge/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/CorrelatedValuePropagation/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
new file mode 100644
index 0000000..9b70ed2
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -0,0 +1,43 @@
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+declare i32 @foo()
+
+define i32 @test1(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test1
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
+
+define i32 @test2(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp ugt i32 %a, 15
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test2
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
diff --git a/test/Transforms/DeadArgElim/dg.exp b/test/Transforms/DeadArgElim/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/DeadArgElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadArgElim/lit.local.cfg b/test/Transforms/DeadArgElim/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/DeadArgElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/dg.exp b/test/Transforms/DeadStoreElimination/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/DeadStoreElimination/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadStoreElimination/dominate.ll b/test/Transforms/DeadStoreElimination/dominate.ll
new file mode 100644
index 0000000..284fea4
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/dominate.ll
@@ -0,0 +1,25 @@
+; RUN: opt  %s -dse -disable-output
+; test that we don't crash
+declare void @bar()
+
+define void @foo() {
+bb1:
+  %memtmp3.i = alloca [21 x i8], align 1
+  %0 = getelementptr inbounds [21 x i8]* %memtmp3.i, i64 0, i64 0
+  br label %bb3
+
+bb2:
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb3
+
+bb3:
+  call void @bar()
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb4
+
+bb4:
+  ret void
+
+}
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/Transforms/DeadStoreElimination/lit.local.cfg b/test/Transforms/DeadStoreElimination/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/EarlyCSE/dg.exp b/test/Transforms/EarlyCSE/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/EarlyCSE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/EarlyCSE/lit.local.cfg b/test/Transforms/EarlyCSE/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/EarlyCSE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/FunctionAttrs/dg.exp b/test/Transforms/FunctionAttrs/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/FunctionAttrs/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/FunctionAttrs/lit.local.cfg b/test/Transforms/FunctionAttrs/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index e2bab19..3027acd 100644
--- a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -1,21 +1,24 @@
-; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
-; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p} | count 6
+; RUN: opt < %s -functionattrs -S | FileCheck %s
 @g = global i32* null		; <i32**> [#uses=1]
 
+; CHECK: define i32* @c1(i32* %q)
 define i32* @c1(i32* %q) {
 	ret i32* %q
 }
 
+; CHECK: define void @c2(i32* %q)
 define void @c2(i32* %q) {
 	store i32* %q, i32** @g
 	ret void
 }
 
+; CHECK: define void @c3(i32* %q)
 define void @c3(i32* %q) {
 	call void @c2(i32* %q)
 	ret void
 }
 
+; CHECK: define i1 @c4(i32* %q, i32 %bitno)
 define i1 @c4(i32* %q, i32 %bitno) {
 	%tmp = ptrtoint i32* %q to i32
 	%tmp2 = lshr i32 %tmp, %bitno
@@ -29,6 +32,7 @@ l1:
 
 @lookup_table = global [2 x i1] [ i1 0, i1 1 ]
 
+; CHECK: define i1 @c5(i32* %q, i32 %bitno)
 define i1 @c5(i32* %q, i32 %bitno) {
 	%tmp = ptrtoint i32* %q to i32
 	%tmp2 = lshr i32 %tmp, %bitno
@@ -40,6 +44,8 @@ define i1 @c5(i32* %q, i32 %bitno) {
 }
 
 declare void @throw_if_bit_set(i8*, i8) readonly
+
+; CHECK: define i1 @c6(i8* %q, i8 %bit)
 define i1 @c6(i8* %q, i8 %bit) {
 	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
 		to label %ret0 unwind label %ret1
@@ -61,6 +67,7 @@ define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
 	ret i1* %lookup
 }
 
+; CHECK: define i1 @c7(i32* %q, i32 %bitno)
 define i1 @c7(i32* %q, i32 %bitno) {
 	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
 	%val = load i1* %ptr
@@ -68,6 +75,7 @@ define i1 @c7(i32* %q, i32 %bitno) {
 }
 
 
+; CHECK: define i32 @nc1(i32* %q, i32* nocapture %p, i1 %b)
 define i32 @nc1(i32* %q, i32* %p, i1 %b) {
 e:
 	br label %l
@@ -82,24 +90,89 @@ l:
 	ret i32 %val
 }
 
+; CHECK: define void @nc2(i32* nocapture %p, i32* %q)
 define void @nc2(i32* %p, i32* %q) {
 	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
 	ret void
 }
 
+; CHECK: define void @nc3(void ()* nocapture %p)
 define void @nc3(void ()* %p) {
 	call void %p()
 	ret void
 }
 
 declare void @external(i8*) readonly nounwind
+; CHECK: define void @nc4(i8* nocapture %p)
 define void @nc4(i8* %p) {
 	call void @external(i8* %p)
 	ret void
 }
 
+; CHECK: define void @nc5(void (i8*)* nocapture %f, i8* nocapture %p)
 define void @nc5(void (i8*)* %f, i8* %p) {
 	call void %f(i8* %p) readonly nounwind
 	call void %f(i8* nocapture %p)
 	ret void
 }
+
+; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* %y1_1)
+define void @test1_1(i8* %x1_1, i8* %y1_1) {
+  call i8* @test1_2(i8* %x1_1, i8* %y1_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* %y1_2)
+define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
+  call void @test1_1(i8* %x1_2, i8* %y1_2)
+  store i32* null, i32** @g
+  ret i8* %y1_2
+}
+
+; CHECK: define void @test2(i8* nocapture %x2)
+define void @test2(i8* %x2) {
+  call void @test2(i8* %x2)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture %y3, i8* nocapture %z3)
+define void @test3(i8* %x3, i8* %y3, i8* %z3) {
+  call void @test3(i8* %z3, i8* %y3, i8* %x3)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test4_1(i8* %x4_1)
+define void @test4_1(i8* %x4_1) {
+  call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test4_2(i8* nocapture %x4_2, i8* %y4_2, i8* nocapture %z4_2)
+define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
+  call void @test4_1(i8* null)
+  store i32* null, i32** @g
+  ret i8* %y4_2
+}
+
+declare i8* @test5_1(i8* %x5_1)
+
+; CHECK: define void @test5_2(i8* %x5_2)
+define void @test5_2(i8* %x5_2) {
+  call i8* @test5_1(i8* %x5_2)
+  store i32* null, i32** @g
+  ret void
+}
+
+declare void @test6_1(i8* %x6_1, i8* nocapture %y6_1, ...)
+
+; CHECK: define void @test6_2(i8* %x6_2, i8* nocapture %y6_2, i8* %z6_2)
+define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) {
+  call void (i8*, i8*, ...)* @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2)
+  store i32* null, i32** @g
+  ret void
+}
+
diff --git a/test/Transforms/GVN/commute.ll b/test/Transforms/GVN/commute.ll
new file mode 100644
index 0000000..cf4fb7f
--- /dev/null
+++ b/test/Transforms/GVN/commute.ll
@@ -0,0 +1,23 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+declare void @use(i32, i32)
+
+define void @foo(i32 %x, i32 %y) {
+  ; CHECK: @foo
+  %add1 = add i32 %x, %y
+  %add2 = add i32 %y, %x
+  call void @use(i32 %add1, i32 %add2)
+  ; CHECK: @use(i32 %add1, i32 %add1)
+  ret void
+}
+
+declare void @vse(i1, i1)
+
+define void @bar(i32 %x, i32 %y) {
+  ; CHECK: @bar
+  %cmp1 = icmp ult i32 %x, %y
+  %cmp2 = icmp ugt i32 %y, %x
+  call void @vse(i1 %cmp1, i1 %cmp2)
+  ; CHECK: @vse(i1 %cmp1, i1 %cmp1)
+  ret void
+}
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index 0b31b01..9c28955 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -55,25 +55,6 @@ return:		; preds = %bb8
 }
 
 declare void @foo(i1)
-
-; CHECK: @test2
-define void @test2(i1 %x, i1 %y) {
-  %z = or i1 %x, %y
-  br i1 %z, label %true, label %false
-true:
-; CHECK: true:
-  %z2 = or i1 %x, %y
-  call void @foo(i1 %z2)
-; CHECK: call void @foo(i1 true)
-  br label %true
-false:
-; CHECK: false:
-  %z3 = or i1 %x, %y
-  call void @foo(i1 %z3)
-; CHECK: call void @foo(i1 false)
-  br label %false
-}
-
 declare void @bar(i32)
 
 ; CHECK: @test3
@@ -130,3 +111,141 @@ case3:
 ; CHECK: call void @bar(i32 %x)
   ret void
 }
+
+; CHECK: @test5
+define i1 @test5(i32 %x, i32 %y) {
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp ne i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp eq i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test6
+define i1 @test6(i32 %x, i32 %y) {
+  %cmp2 = icmp ne i32 %x, %y
+  %cmp = icmp eq i32 %x, %y
+  %cmp3 = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test7
+define i1 @test7(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp sle i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp sgt i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test8
+define i1 @test8(i32 %x, i32 %y) {
+  %cmp2 = icmp sle i32 %x, %y
+  %cmp = icmp sgt i32 %x, %y
+  %cmp3 = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; PR1768
+; CHECK: @test9
+define i32 @test9(i32 %i, i32 %j) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+; PR1768
+; CHECK: @test10
+define i32 @test10(i32 %j, i32 %i) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+declare i32 @yogibar()
+
+; CHECK: @test11
+define i32 @test11(i32 %x) {
+  %v0 = call i32 @yogibar()
+  %v1 = call i32 @yogibar()
+  %cmp = icmp eq i32 %v0, %v1
+  br i1 %cmp, label %cond_true, label %next
+
+cond_true:
+  ret i32 %v1
+; CHECK: ret i32 %v0
+
+next:
+  %cmp2 = icmp eq i32 %x, %v0
+  br i1 %cmp2, label %cond_true2, label %next2
+
+cond_true2:
+  ret i32 %v0
+; CHECK: ret i32 %x
+
+next2:
+  ret i32 0
+}
+
+; CHECK: @test12
+define i32 @test12(i32 %x) {
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:
+  br label %ret
+
+cond_false:
+  br label %ret
+
+ret:
+  %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ]
+; CHECK: %res = phi i32 [ 0, %cond_true ], [ %x, %cond_false ]
+  ret i32 %res
+}
diff --git a/test/Transforms/GVN/dg.exp b/test/Transforms/GVN/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GVN/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GVN/lit.local.cfg b/test/Transforms/GVN/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GVN/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalDCE/dg.exp b/test/Transforms/GlobalDCE/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GlobalDCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalDCE/lit.local.cfg b/test/Transforms/GlobalDCE/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GlobalDCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/atomic.ll b/test/Transforms/GlobalOpt/atomic.ll
new file mode 100644
index 0000000..4c3f439
--- /dev/null
+++ b/test/Transforms/GlobalOpt/atomic.ll
@@ -0,0 +1,10 @@
+; RUN: opt -globalopt < %s -S -o - | FileCheck %s
+
+@GV1 = internal global i64 1
+; CHECK: @GV1 = internal unnamed_addr constant i64 1
+
+define void @test1() {
+entry:
+  %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index 834bd00..af8fa32 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -6,3 +6,46 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK: @A = global i1 false
 @A = global i1 icmp ne (i64 sub nsw (i64 ptrtoint (i8* getelementptr inbounds ([3 x i8]* @.str91250, i64 0, i64 1) to i64), i64 ptrtoint ([3 x i8]* @.str91250 to i64)), i64 1)
+
+; PR11352
+
+@xs = global [2 x i32] zeroinitializer, align 4
+; CHECK: @xs = global [2 x i32] [i32 1, i32 1]
+
+define internal void @test1() {
+entry:
+  store i32 1, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 0)
+  %0 = load i32* getelementptr inbounds ([2 x i32]* @xs, i32 0, i64 0), align 4
+  store i32 %0, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 1)
+  ret void
+}
+
+; PR12060
+
+%closure = type { i32 }
+
+@f = internal global %closure zeroinitializer, align 4
+@m = global i32 0, align 4
+; CHECK-NOT: @f
+; CHECK: @m = global i32 13
+
+define internal i32 @test2_helper(%closure* %this, i32 %b) {
+entry:
+  %0 = getelementptr inbounds %closure* %this, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  %add = add nsw i32 %1, %b
+  ret i32 %add
+}
+
+define internal void @test2() {
+entry:
+  store i32 4, i32* getelementptr inbounds (%closure* @f, i32 0, i32 0)
+  %call = call i32 @test2_helper(%closure* @f, i32 9)
+  store i32 %call, i32* @m, align 4
+  ret void
+}
+
+@llvm.global_ctors = appending constant
+  [2 x { i32, void ()* }]
+  [{ i32, void ()* } { i32 65535, void ()* @test1 },
+   { i32, void ()* } { i32 65535, void ()* @test2 }]
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index 204f979..e3bc473 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -4,20 +4,31 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 %0 = type { i32, void ()* }
 %struct.foo = type { i32* }
+%struct.bar = type { i128 }
 
 @G = global i32 0, align 4
 @H = global i32 0, align 4
 @X = global %struct.foo zeroinitializer, align 8
-@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @init }]
+@X2 = global %struct.bar zeroinitializer, align 8
+@llvm.global_ctors = appending global [2 x %0] [%0 { i32 65535, void ()* @init1 }, %0 { i32 65535, void ()* @init2 }]
 
 ; PR8710 - GlobalOpt shouldn't change the global's initializer to have this
 ; arbitrary constant expression, the code generator can't handle it.
-define internal void @init() {
+define internal void @init1() {
 entry:
   %tmp = getelementptr inbounds %struct.foo* @X, i32 0, i32 0
   store i32* inttoptr (i64 sdiv (i64 ptrtoint (i32* @G to i64), i64 ptrtoint (i32* @H to i64)) to i32*), i32** %tmp, align 8
   ret void
 }
-
-; CHECK: @init
+; CHECK: @init1
 ; CHECK: store i32*
+
+; PR11705 - ptrtoint isn't safe in general in global initializers.
+define internal void @init2() {
+entry:
+  %tmp = getelementptr inbounds %struct.bar* @X2, i32 0, i32 0
+  store i128 ptrtoint (i32* @G to i128), i128* %tmp, align 16
+  ret void
+}
+; CHECK: @init2
+; CHECK: store i128
diff --git a/test/Transforms/GlobalOpt/cxx-dtor.ll b/test/Transforms/GlobalOpt/cxx-dtor.ll
index 2263562..7c6ae78 100644
--- a/test/Transforms/GlobalOpt/cxx-dtor.ll
+++ b/test/Transforms/GlobalOpt/cxx-dtor.ll
@@ -2,6 +2,7 @@
 
 %0 = type { i32, void ()* }
 %struct.A = type { i8 }
+%struct.B = type { }
 
 @a = global %struct.A zeroinitializer, align 1
 @__dso_handle = external global i8*
@@ -15,13 +16,14 @@ define internal void @__cxx_global_var_init() nounwind section "__TEXT,__StaticI
 }
 
 define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind align 2 {
-  call void @_ZN1AD2Ev(%struct.A* %this)
+  %t = bitcast %struct.A* %this to %struct.B*
+  call void @_ZN1BD1Ev(%struct.B* %t)
   ret void
 }
 
 declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 
-define linkonce_odr void @_ZN1AD2Ev(%struct.A* %this) nounwind align 2 {
+define linkonce_odr void @_ZN1BD1Ev(%struct.B* %this) nounwind align 2 {
   ret void
 }
 
diff --git a/test/Transforms/GlobalOpt/dg.exp b/test/Transforms/GlobalOpt/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/GlobalOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalOpt/invariant.ll b/test/Transforms/GlobalOpt/invariant.ll
new file mode 100644
index 0000000..6b99193
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant.ll
@@ -0,0 +1,59 @@
+; RUN: opt -globalopt -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr)
+
+define void @test1(i8* %ptr) {
+  call {}* @llvm.invariant.start(i64 4, i8* %ptr)
+  ret void
+}
+
+@object1 = global i32 0
+; CHECK: @object1 = constant i32 -1
+define void @ctor1() {
+  store i32 -1, i32* @object1
+  %A = bitcast i32* @object1 to i8*
+  call void @test1(i8* %A)
+  ret void
+}
+
+
+@object2 = global i32 0
+; CHECK: @object2 = global i32 0
+define void @ctor2() {
+  store i32 -1, i32* @object2
+  %A = bitcast i32* @object2 to i8*
+  %B = call {}* @llvm.invariant.start(i64 4, i8* %A)
+  %C = bitcast {}* %B to i8*
+  ret void
+}
+
+
+@object3 = global i32 0
+; CHECK: @object3 = global i32 -1
+define void @ctor3() {
+  store i32 -1, i32* @object3
+  %A = bitcast i32* @object3 to i8*
+  call {}* @llvm.invariant.start(i64 3, i8* %A)
+  ret void
+}
+
+
+@object4 = global i32 0
+; CHECK: @object4 = global i32 -1
+define void @ctor4() {
+  store i32 -1, i32* @object4
+  %A = bitcast i32* @object4 to i8*
+  call {}* @llvm.invariant.start(i64 -1, i8* %A)
+  ret void
+}
+
+
+@llvm.global_ctors = appending constant
+  [4 x { i32, void ()* }]
+  [ { i32, void ()* } { i32 65535, void ()* @ctor1 },
+    { i32, void ()* } { i32 65535, void ()* @ctor2 },
+    { i32, void ()* } { i32 65535, void ()* @ctor3 },
+    { i32, void ()* } { i32 65535, void ()* @ctor4 } ]
diff --git a/test/Transforms/GlobalOpt/lit.local.cfg b/test/Transforms/GlobalOpt/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/GlobalOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IPConstantProp/dg.exp b/test/Transforms/IPConstantProp/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/IPConstantProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IPConstantProp/lit.local.cfg b/test/Transforms/IPConstantProp/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/IPConstantProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/dg.exp b/test/Transforms/IndVarSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/IndVarSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IndVarSimplify/lit.local.cfg b/test/Transforms/IndVarSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 9c2abd0..23fdc87 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -333,9 +333,9 @@ entry:
 
 ; CHECK: loop:
 ; CHECK: phi %structIF*
-; CHECK: phi i32*
-; CHECK: getelementptr inbounds
+; CHECK-NOT: phi
 ; CHECK: getelementptr inbounds
+; CHECK-NOT: getelementptr
 ; CHECK: exit:
 loop:
   %ptr.iv = phi %structIF* [ %ptr.inc, %latch ], [ %base, %entry ]
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
new file mode 100644
index 0000000..91ab40a
--- /dev/null
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -0,0 +1,83 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+
+@glbl = external global i32
+
+define void @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call void @inner1
+  %ptr = alloca i32
+  call void @inner1(i32* %ptr)
+  ret void
+}
+
+define void @inner1(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr i32* %ptr, i32 0
+  %D = getelementptr i32* %ptr, i32 1
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer2() {
+; CHECK: @outer2
+; CHECK: call void @inner2
+  %ptr = alloca i32
+  call void @inner2(i32* %ptr)
+  ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction.
+define void @inner2(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr i32* %ptr, i32 0
+  %D = getelementptr i32* %ptr, i32 %A
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer3() {
+; CHECK: @outer3
+; CHECK-NOT: call void @inner3
+  %ptr = alloca i32
+  call void @inner3(i32* %ptr, i1 undef)
+  ret void
+}
+
+define void @inner3(i32 *%ptr, i1 %x) {
+  %A = icmp eq i32* %ptr, null
+  %B = and i1 %x, %A
+  br i1 %A, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %t1 = load i32* %ptr
+  %t2 = add i32 %t1, 1
+  %t3 = add i32 %t2, 1
+  %t4 = add i32 %t3, 1
+  %t5 = add i32 %t4, 1
+  %t6 = add i32 %t5, 1
+  %t7 = add i32 %t6, 1
+  %t8 = add i32 %t7, 1
+  %t9 = add i32 %t8, 1
+  %t10 = add i32 %t9, 1
+  %t11 = add i32 %t10, 1
+  %t12 = add i32 %t11, 1
+  %t13 = add i32 %t12, 1
+  %t14 = add i32 %t13, 1
+  %t15 = add i32 %t14, 1
+  %t16 = add i32 %t15, 1
+  %t17 = add i32 %t16, 1
+  %t18 = add i32 %t17, 1
+  %t19 = add i32 %t18, 1
+  %t20 = add i32 %t19, 1
+  ret void
+bb.false:
+  ret void
+}
diff --git a/test/Transforms/Inline/dg.exp b/test/Transforms/Inline/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Inline/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 462c29a..1f34113 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -23,15 +23,11 @@ invcont:
 	ret i32 %retval
 
 lpad:
-	%eh_ptr = call i8* @llvm.eh.exception()
-	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
 	unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @__gxx_personality_v0(...)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
new file mode 100644
index 0000000..ab2e954
--- /dev/null
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Check that functions with "returns_twice" calls are only inlined,
+; if they are themselve marked as such.
+
+declare i32 @a() returns_twice
+declare i32 @b() returns_twice
+
+define i32 @f() {
+entry:
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @g() {
+entry:
+; CHECK: define i32 @g
+; CHECK: call i32 @f()
+; CHECK-NOT: call i32 @a()
+  %call = call i32 @f()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @h() returns_twice {
+entry:
+  %call = call i32 @b() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @i() {
+entry:
+; CHECK: define i32 @i
+; CHECK: call i32 @b()
+; CHECK-NOT: call i32 @h()
+  %call = call i32 @h() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/lit.local.cfg b/test/Transforms/Inline/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Inline/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
new file mode 100644
index 0000000..abab9dc
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -disable-output
+
+%opaque_struct = type opaque
+
+@G = external global [0 x %opaque_struct]
+
+declare void @foo(%opaque_struct*)
+
+define void @bar() {
+  call void @foo(%opaque_struct* bitcast ([0 x %opaque_struct]* @G to %opaque_struct*))
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2012-02-13-FCmp.ll b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
new file mode 100644
index 0000000..39b0594
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
@@ -0,0 +1,35 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; Radar 10803727
+@.str = private unnamed_addr constant [35 x i8] c"\0Ain_range input (should be 0): %f\0A\00", align 1
+@.str1 = external hidden unnamed_addr constant [35 x i8], align 1
+
+declare i32 @printf(i8*, ...)
+define i64 @_Z8tempCastj(i32 %val) uwtable ssp {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str1, i64 0, i64 0), i32 %val)
+  %conv = uitofp i32 %val to double
+  %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str, i64 0, i64 0), double %conv)
+  %cmp.i = fcmp oge double %conv, -1.000000e+00
+  br i1 %cmp.i, label %land.rhs.i, label %if.end.critedge
+; CHECK:  br i1 true, label %land.rhs.i, label %if.end.critedge
+
+land.rhs.i:                                       ; preds = %entry
+  %cmp1.i = fcmp olt double %conv, 1.000000e+00
+  br i1 %cmp1.i, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.rhs.i
+  %add = fadd double %conv, 5.000000e-01
+  %conv3 = fptosi double %add to i64
+  br label %return
+
+if.end.critedge:                                  ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.critedge, %land.rhs.i
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i64 [ %conv3, %if.then ], [ -1, %if.end ]
+  ret i64 %retval.0
+}
+
diff --git a/test/Transforms/InstCombine/2012-02-28-ICmp.ll b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
new file mode 100644
index 0000000..82cf85f
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/10803154>
+
+; There should be no transformation.
+; CHECK: %a = trunc i32 %x to i8
+; CHECK: %b = icmp ne i8 %a, 0
+; CHECK: %c = and i32 %x, 16711680
+; CHECK: %d = icmp ne i32 %c, 0
+; CHECK: %e = and i1 %b, %d
+; CHECK: ret i1 %e
+
+define i1 @f1(i32 %x) {
+  %a = trunc i32 %x to i8
+  %b = icmp ne i8 %a, 0
+  %c = and i32 %x, 16711680
+  %d = icmp ne i32 %c, 0
+  %e = and i1 %b, %d
+  ret i1 %e
+}
diff --git a/test/Transforms/InstCombine/dg.exp b/test/Transforms/InstCombine/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/InstCombine/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 016e8c5..a9ae221 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -121,8 +121,8 @@ define i1 @test12(i1 %A) {
   %B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
   ret i1 %B
 ; CHECK: @test12
-; CHECK-NEXT: %B = select i1
-; CHECK-NEXT: ret i1 %B
+; CHECK-NEXT: = xor i1 %A, true
+; CHECK-NEXT: ret i1
 }
 
 ; PR6481
@@ -524,7 +524,7 @@ define i1 @test53(i32 %a, i32 %b) nounwind {
 
 ; CHECK: @test54
 ; CHECK-NEXT: %and = and i8 %a, -64
-; CHECK-NEXT icmp eq i8 %and, -128
+; CHECK-NEXT: icmp eq i8 %and, -128
 define i1 @test54(i8 %a) nounwind {
   %ext = zext i8 %a to i32
   %and = and i32 %ext, 192
@@ -580,3 +580,60 @@ define zeroext i1 @cmpabs2(i64 %val) {
   %tobool = icmp ne i64 %sub.val, 0
   ret i1 %tobool
 }
+
+; CHECK: @test58
+; CHECK-NEXT: call i32 @test58_d(i64 36029346783166592)
+define void @test58() nounwind {
+  %cast = bitcast <1 x i64> <i64 36029346783166592> to i64
+  %call = call i32 @test58_d( i64 %cast) nounwind
+  ret void
+}
+declare i32 @test58_d(i64)
+
+define i1 @test59(i8* %foo) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 2
+  %gep2 = getelementptr inbounds i8* %foo, i64 10
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  %use = ptrtoint i8* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59
+; CHECK: ret i1 true
+}
+
+define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr i32* %bit, i64 %i
+  %gep2 = getelementptr  i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61
+; CHECK: icmp ult i8* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test62(i8* %a) {
+  %arrayidx1 = getelementptr inbounds i8* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8* %a, i64 10
+  %cmp = icmp slt i8* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+; CHECK: @test62
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index e31bd7d..382e6b3 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -181,10 +181,10 @@ entry:
 
 define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
 entry:
-  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true) nounwind readnone
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
   %lz.cmp = icmp eq i32 %lz, 32
   store volatile i1 %lz.cmp, i1* %c
-  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 true) nounwind readnone
+  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) nounwind readnone
   %tz.cmp = icmp ne i32 %tz, 32
   store volatile i1 %tz.cmp, i1* %c
   %pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
@@ -201,16 +201,22 @@ entry:
 ; CHECK-NEXT: store volatile i1 %pop.cmp, i1* %c
 }
 
-
-define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)    ; <i32> [#uses=1]
-  %shr3 = lshr i32 %tmp1, 5                       ; <i32> [#uses=1]
+define i32 @cttz_simplify1a(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %shr3 = lshr i32 %tmp1, 5
   ret i32 %shr3
-  
-; CHECK: @cttz_simplify1
+
+; CHECK: @cttz_simplify1a
 ; CHECK: icmp eq i32 %x, 0
-; CHECK-NEXT: zext i1 
+; CHECK-NEXT: zext i1
 ; CHECK-NEXT: ret i32
 }
 
+define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %shr3 = lshr i32 %tmp1, 5
+  ret i32 %shr3
 
+; CHECK: @cttz_simplify1b
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/InstCombine/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 132d51a..52310e3 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -542,3 +542,75 @@ define i32 @test45(i32 %a) nounwind {
 ; CHECK-NEXT: %y = lshr i32 %a, 5
 ; CHECK-NEXT: ret i32 %y
 }
+
+define i32 @test46(i32 %a) {
+  %y = ashr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test46
+; CHECK-NEXT: %z = ashr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test47(i32 %a) {
+  %y = lshr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test47
+; CHECK-NEXT: %z = lshr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test48(i32 %x) {
+  %A = lshr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test48
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test49(i32 %x) {
+  %A = ashr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test49
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test50(i32 %x) {
+  %A = shl nsw i32 %x, 1
+  %B = ashr i32 %A, 3
+  ret i32 %B
+; CHECK: @test50
+; CHECK-NEXT: %B = ashr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test51(i32 %x) {
+  %A = shl nuw i32 %x, 1
+  %B = lshr i32 %A, 3
+  ret i32 %B
+; CHECK: @test51
+; CHECK-NEXT: %B = lshr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test52(i32 %x) {
+  %A = shl nsw i32 %x, 3
+  %B = ashr i32 %A, 1
+  ret i32 %B
+; CHECK: @test52
+; CHECK-NEXT: %B = shl nsw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test53(i32 %x) {
+  %A = shl nuw i32 %x, 3
+  %B = lshr i32 %A, 1
+  ret i32 %B
+; CHECK: @test53
+; CHECK-NEXT: %B = shl nuw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll
index 47f5f30..a6066d8 100644
--- a/test/Transforms/InstCombine/sign-test-and-or.ll
+++ b/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -77,3 +77,103 @@ if.then:
 if.end:
   ret void
 }
+
+define void @test5(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp eq i32 %and, 0
+  %2 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test5
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test6(i32 %a) nounwind {
+  %1 = icmp sgt i32 %a, -1
+  %and = and i32 %a, 134217728
+  %2 = icmp eq i32 %and, 0
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test6
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test7(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp ne i32 %and, 0
+  %2 = icmp slt i32 %a, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test7
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test8(i32 %a) nounwind {
+  %1 = icmp slt i32 %a, 0
+  %and = and i32 %a, 134217728
+  %2 = icmp ne i32 %and, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test8
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test9(i32 %a) nounwind {
+  %1 = and i32 %a, 1073741824
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %2, %3
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test9
+; CHECK-NEXT: %1 = and i32 %a, -1073741824
+; CHECK-NEXT: %2 = icmp eq i32 %1, 1073741824
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
new file mode 100644
index 0000000..279e4ac
--- /dev/null
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test1(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = sub i32 63, %and
+  ret i32 %sub
+
+; CHECK: @test1
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: xor i32 %and, 63
+; CHECK-NEXT: ret
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @test2(i32 %x) nounwind {
+  %count = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) nounwind readnone
+  %sub = sub i32 31, %count
+  ret i32 %sub
+
+; CHECK: @test2
+; CHECK-NEXT: ctlz
+; CHECK-NEXT: xor i32 %count, 31
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = xor i32 31, %and
+  %add = add i32 %sub, 42
+  ret i32 %add
+
+; CHECK: @test3
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: sub i32 73, %and
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 37de328..b71ec8c 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -301,3 +301,29 @@ define i32 @test28(i32 %x, i32 %y, i32 %z) {
 ; CHECK-NEXT: add i32
 ; CHECK-NEXT: ret i32
 }
+
+define i64 @test29(i8* %foo, i64 %i, i64 %j) {
+  %gep1 = getelementptr inbounds i8* %foo, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test29
+; CHECK-NEXT: sub i64 %i, %j
+; CHECK-NEXT: ret i64
+}
+
+define i64 @test30(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i32* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test30
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: sub i64 %gep1.idx, %j
+; CHECK-NEXT: ret i64
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 9f3dffe..e15bfaa 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -10,6 +10,99 @@ define i1 @ptrtoint() {
 ; CHECK: ret i1 false
 }
 
+define i1 @bitcast() {
+; CHECK: @bitcast
+  %a = alloca i32
+  %b = alloca i64
+  %x = bitcast i32* %a to i8*
+  %y = bitcast i64* %b to i8*
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep() {
+; CHECK: @gep
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep2() {
+; CHECK: @gep2
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %y = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+; PR11238
+%gept = type { i32, i32 }
+@gepy = global %gept zeroinitializer, align 8
+@gepz = extern_weak global %gept
+
+define i1 @gep3() {
+; CHECK: @gep3
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep4() {
+; CHECK: @gep4
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* @gepy, i64 0, i32 0
+  %b = getelementptr %gept* @gepy, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep5() {
+; CHECK: @gep5
+  %x = alloca %gept, align 8
+  %a = getelementptr inbounds %gept* %x, i64 0, i32 1
+  %b = getelementptr %gept* @gepy, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep6(%gept* %x) {
+; Same as @gep3 but potentially null.
+; CHECK: @gep6
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep7(%gept* %x) {
+; CHECK: @gep7
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* @gepz, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
+define i1 @gep8(%gept* %x) {
+; CHECK: @gep8
+  %a = getelementptr %gept* %x, i32 1
+  %b = getelementptr %gept* %x, i32 -1
+  %equal = icmp ugt %gept* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
   %e1 = zext i32 %x to i64
@@ -406,3 +499,40 @@ define i1 @mul3(i32 %X, i32 %Y) {
   ret i1 %C
 ; CHECK: ret i1 true
 }
+
+define <2 x i1> @vectorselect1(<2 x i1> %cond) {
+; CHECK: @vectorselect1
+  %invert = xor <2 x i1> %cond, <i1 1, i1 1>
+  %s = select <2 x i1> %invert, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 1, i32 1>
+  %c = icmp ne <2 x i32> %s, <i32 0, i32 0>
+  ret <2 x i1> %c
+; CHECK: ret <2 x i1> %cond
+}
+
+; PR11948
+define <2 x i1> @vectorselectcrash(i32 %arg1) {
+  %tobool40 = icmp ne i32 %arg1, 0
+  %cond43 = select i1 %tobool40, <2 x i16> <i16 -5, i16 66>, <2 x i16> <i16 46, i16 1>
+  %cmp45 = icmp ugt <2 x i16> %cond43, <i16 73, i16 21>
+  ret <2 x i1> %cmp45
+}
+
+; PR12013
+define i1 @alloca_compare(i64 %idx) {
+  %sv = alloca { i32, i32, [124 x i32] }
+  %1 = getelementptr inbounds { i32, i32, [124 x i32] }* %sv, i32 0, i32 2, i64 %idx
+  %2 = icmp eq i32* %1, null
+  ret i1 %2
+  ; CHECK: alloca_compare
+  ; CHECK: ret i1 false
+}
+
+; PR12075
+define i1 @infinite_gep() {
+  ret i1 1
+
+unreachableblock:
+  %X = getelementptr i32 *%X, i32 1
+  %Y = icmp eq i32* %X, null
+  ret i1 %Y
+}
diff --git a/test/Transforms/InstSimplify/dg.exp b/test/Transforms/InstSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/InstSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstSimplify/lit.local.cfg b/test/Transforms/InstSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/InstSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Internalize/dg.exp b/test/Transforms/Internalize/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Internalize/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Internalize/lit.local.cfg b/test/Transforms/Internalize/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Internalize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/JumpThreading/dg.exp b/test/Transforms/JumpThreading/dg.exp
deleted file mode 100644
index de42dad..0000000
--- a/test/Transforms/JumpThreading/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/JumpThreading/lit.local.cfg b/test/Transforms/JumpThreading/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/JumpThreading/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/LCSSA/dg.exp b/test/Transforms/LCSSA/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LCSSA/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LCSSA/lit.local.cfg b/test/Transforms/LCSSA/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LCSSA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/dg.exp b/test/Transforms/LICM/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LICM/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LICM/lit.local.cfg b/test/Transforms/LICM/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LICM/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopDeletion/dg.exp b/test/Transforms/LoopDeletion/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopDeletion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopDeletion/lit.local.cfg b/test/Transforms/LoopDeletion/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopDeletion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/dg.exp b/test/Transforms/LoopIdiom/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopIdiom/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopIdiom/lit.local.cfg b/test/Transforms/LoopIdiom/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopIdiom/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/alloca.ll b/test/Transforms/LoopRotate/alloca.ll
new file mode 100644
index 0000000..fd217ea
--- /dev/null
+++ b/test/Transforms/LoopRotate/alloca.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -loop-rotate -S | FileCheck %s
+
+; Test alloca in -loop-rotate.
+
+; We expect a different value for %ptr each iteration (according to the
+; definition of alloca). I.e. each @use must be paired with an alloca.
+
+; CHECK: call void @use(i8* %
+; CHECK: %ptr = alloca i8
+
+@e = global i16 10
+
+declare void @use(i8*)
+
+define void @test() {
+entry:
+  %end = load i16* @e
+  br label %loop
+
+loop:
+  %n.phi = phi i16 [ %n, %loop.fin ], [ 0, %entry ]
+  %ptr = alloca i8
+  %cond = icmp eq i16 %n.phi, %end
+  br i1 %cond, label %exit, label %loop.fin
+
+loop.fin:
+  %n = add i16 %n.phi, 1
+  call void @use(i8* %ptr)
+  br label %loop
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopRotate/dg.exp b/test/Transforms/LoopRotate/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopRotate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopRotate/lit.local.cfg b/test/Transforms/LoopRotate/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopRotate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
new file mode 100644
index 0000000..f422724
--- /dev/null
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info | FileCheck %s
+; PR2624 unroll multiple exits
+
+@mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+
+; CHECK: @f
+; CHECK-NOT: bb4
+define i8 @f() {
+entry:
+	tail call i32 @fegetround( )		; <i32>:0 [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb4, %entry
+	%mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]		; <i8> [#uses=4]
+	zext i8 %mode.0 to i32		; <i32>:1 [#uses=1]
+	getelementptr [4 x i32]* @mode_table, i32 0, i32 %1		; <i32*>:2 [#uses=1]
+	load i32* %2, align 4		; <i32>:3 [#uses=1]
+	icmp eq i32 %3, %0		; <i1>:4 [#uses=1]
+	br i1 %4, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	ret i8 %mode.0
+
+bb2:		; preds = %bb
+	icmp eq i8 %mode.0, 1		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb5, label %bb4
+
+bb4:		; preds = %bb2
+	%indvar.next = add i8 %mode.0, 1		; <i8> [#uses=1]
+	br label %bb
+
+bb5:		; preds = %bb2
+	tail call void @raise_exception( ) noreturn
+	unreachable
+}
+
+declare i32 @fegetround()
+
+declare void @raise_exception() noreturn
diff --git a/test/Transforms/LoopSimplify/dg.exp b/test/Transforms/LoopSimplify/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopSimplify/lit.local.cfg b/test/Transforms/LoopSimplify/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
new file mode 100644
index 0000000..392a8bc
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
@@ -0,0 +1,39 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; PR11571: handle a postinc user outside of for.body7 that requires
+; recursive expansion of a quadratic recurrence within for.body7. LSR
+; needs to forget that for.body7 is a postinc loop during expansion.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd10.0"
+
+@b = external global [121 x i32]
+
+; CHECK: @vb
+;   Outer recurrence:
+; CHECK: %lsr.iv1 = phi [121 x i32]*
+;   Inner recurrence:
+; CHECK: %lsr.iv = phi i32
+;   Outer step (relative to inner recurrence):
+; CHECK: %scevgep = getelementptr i1* %{{.*}}, i32 %lsr.iv
+;   Outer use:
+; CHECK: %lsr.iv3 = phi [121 x i32]* [ %lsr.iv1, %for.body43.preheader ]
+define void @vb() nounwind {
+for.cond.preheader:
+  br label %for.body7
+
+for.body7:
+  %indvars.iv77 = phi i32 [ %indvars.iv.next78, %for.body7 ], [ 1, %for.cond.preheader ]
+  %bf.072 = phi i32 [ %t1, %for.body7 ], [ 0, %for.cond.preheader ]
+  %t1 = add i32 %bf.072, %indvars.iv77
+  %indvars.iv.next78 = add i32 %indvars.iv77, 1
+  br i1 undef, label %for.body43, label %for.body7
+
+for.body43:
+  %bf.459 = phi i32 [ %inc44, %for.body43 ], [ %t1, %for.body7 ]
+  %inc44 = add nsw i32 %bf.459, 1
+  %arrayidx45 = getelementptr inbounds [121 x i32]* @b, i32 0, i32 %bf.459
+  %t2 = load i32* %arrayidx45, align 4
+  br label %for.body43
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
new file mode 100644
index 0000000..d7f5723
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
@@ -0,0 +1,88 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10619599> "SelectionDAGBuilder shouldn't visit PHI nodes!" assert.
+; <rdar://10655343> SCEVExpander segfault on simple test case
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-f128:128:128-n8:16:32"
+target triple = "i386-apple-darwin"
+
+; LSR should convert the inner loop (bb7.us) IV (j.01.us) into float*.
+; This involves a nested AddRec, the outer AddRec's loop invariant components
+; cannot find a preheader, so they should be expanded in the loop header
+; (bb7.lr.ph.us) below the existing phi i.12.us.
+; Currently, LSR won't kick in on such loops.
+; CHECK: @nopreheader
+; CHECK: bb7.us:
+; CHECK-NOT: phi float*
+; CHECK: %j.01.us = phi i32
+; CHECK-NOT: phi float*
+define void @nopreheader(float* nocapture %a, i32 %n) nounwind {
+entry:
+  %0 = sdiv i32 %n, undef
+  indirectbr i8* undef, [label %bb10.preheader]
+
+bb10.preheader:                                   ; preds = %bb4
+  indirectbr i8* undef, [label %bb8.preheader.lr.ph, label %return]
+
+bb8.preheader.lr.ph:                              ; preds = %bb10.preheader
+  indirectbr i8* null, [label %bb7.lr.ph.us, label %bb9]
+
+bb7.lr.ph.us:                                     ; preds = %bb9.us, %bb8.preheader.lr.ph
+  %i.12.us = phi i32 [ %2, %bb9.us ], [ 0, %bb8.preheader.lr.ph ]
+  %tmp30 = mul i32 %0, %i.12.us
+  indirectbr i8* undef, [label %bb7.us]
+
+bb7.us:                                           ; preds = %bb7.lr.ph.us, %bb7.us
+  %j.01.us = phi i32 [ 0, %bb7.lr.ph.us ], [ %1, %bb7.us ]
+  %tmp31 = add i32 %tmp30, %j.01.us
+  %scevgep9 = getelementptr float* %a, i32 %tmp31
+  store float undef, float* %scevgep9, align 1
+  %1 = add nsw i32 %j.01.us, 1
+  indirectbr i8* undef, [label %bb9.us, label %bb7.us]
+
+bb9.us:                                           ; preds = %bb7.us
+  %2 = add nsw i32 %i.12.us, 1
+  indirectbr i8* undef, [label %bb7.lr.ph.us, label %return]
+
+bb9:                                              ; preds = %bb9, %bb8.preheader.lr.ph
+  indirectbr i8* undef, [label %bb9, label %return]
+
+return:                                           ; preds = %bb9, %bb9.us, %bb10.preheader
+  ret void
+}
+
+; In this case, SCEVExpander simply cannot materialize the AddRecExpr
+; that LSR picks. We must detect that %bb8.preheader does not have a
+; preheader and avoid performing LSR on %bb7.
+; CHECK: @nopreheader2
+; CHECK: bb7:
+; CHECK: %indvar = phi i32
+define fastcc void @nopreheader2([200 x i32]* nocapture %Array2) nounwind {
+entry:
+  indirectbr i8* undef, [label %bb]
+
+bb:                                               ; preds = %bb, %entry
+  indirectbr i8* undef, [label %bb3, label %bb]
+
+bb3:                                              ; preds = %bb3, %bb
+  indirectbr i8* undef, [label %bb8.preheader, label %bb3]
+
+bb8.preheader:                                    ; preds = %bb9, %bb3
+  %indvar5 = phi i32 [ %indvar.next6, %bb9 ], [ 0, %bb3 ]
+  %tmp26 = add i32 %indvar5, 13
+  indirectbr i8* null, [label %bb7]
+
+bb7:                                              ; preds = %bb8.preheader, %bb7
+  %indvar = phi i32 [ 0, %bb8.preheader ], [ %indvar.next, %bb7 ]
+  %scevgep = getelementptr [200 x i32]* %Array2, i32 %tmp26, i32 %indvar
+  store i32 undef, i32* %scevgep, align 4
+  %indvar.next = add i32 %indvar, 1
+  indirectbr i8* undef, [label %bb9, label %bb7]
+
+bb9:                                              ; preds = %bb7
+  %indvar.next6 = add i32 %indvar5, 1
+  indirectbr i8* undef, [label %return, label %bb8.preheader]
+
+return:                                           ; preds = %bb9
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
new file mode 100644
index 0000000..3036a7e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
@@ -0,0 +1,113 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10701050> "Cannot split an edge from an IndirectBrInst" assert.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; while.cond197 is a dominates the simplified loop while.cond238 but
+; has no with no preheader.
+;
+; CHECK: @nopreheader
+; CHECK: %while.cond238
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: indirectbr
+define void @nopreheader(i8* %end) nounwind {
+entry:
+  br label %while.cond179
+
+while.cond179:                                    ; preds = %if.end434, %if.end369, %if.end277, %if.end165
+  %s.1 = phi i8* [ undef, %if.end434 ], [ %incdec.ptr356, %if.end348 ], [ undef, %entry ]
+  indirectbr i8* undef, [label %land.rhs184, label %while.end453]
+
+land.rhs184:                                      ; preds = %while.cond179
+  indirectbr i8* undef, [label %while.end453, label %while.cond197]
+
+while.cond197:                                    ; preds = %land.rhs202, %land.rhs184
+  %0 = phi i64 [ %indvar.next11, %land.rhs202 ], [ 0, %land.rhs184 ]
+  indirectbr i8* undef, [label %land.rhs202, label %while.end215]
+
+land.rhs202:                                      ; preds = %while.cond197
+  %indvar.next11 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.end215, label %while.cond197]
+
+while.end215:                                     ; preds = %land.rhs202, %while.cond197
+  indirectbr i8* undef, [label %PREMATURE, label %if.end221]
+
+if.end221:                                        ; preds = %while.end215
+  indirectbr i8* undef, [label %while.cond238.preheader, label %lor.lhs.false227]
+
+lor.lhs.false227:                                 ; preds = %if.end221
+  indirectbr i8* undef, [label %while.cond238.preheader, label %if.else]
+
+while.cond238.preheader:                          ; preds = %lor.lhs.false227, %if.end221
+  %tmp16 = add i64 %0, 2
+  indirectbr i8* undef, [label %while.cond238]
+
+while.cond238:                                    ; preds = %land.rhs243, %while.cond238.preheader
+  %1 = phi i64 [ %indvar.next15, %land.rhs243 ], [ 0, %while.cond238.preheader ]
+  %tmp36 = add i64 %tmp16, %1
+  %s.3 = getelementptr i8* %s.1, i64 %tmp36
+  %cmp241 = icmp ult i8* %s.3, %end
+  indirectbr i8* undef, [label %land.rhs243, label %while.end256]
+
+land.rhs243:                                      ; preds = %while.cond238
+  %indvar.next15 = add i64 %1, 1
+  indirectbr i8* undef, [label %while.end256, label %while.cond238]
+
+while.end256:                                     ; preds = %land.rhs243, %while.cond238
+  indirectbr i8* undef, [label %PREMATURE]
+
+if.else:                                          ; preds = %lor.lhs.false227
+  indirectbr i8* undef, [label %if.then297, label %if.else386]
+
+if.then297:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %PREMATURE, label %if.end307]
+
+if.end307:                                        ; preds = %if.then297
+  indirectbr i8* undef, [label %if.end314, label %FAIL]
+
+if.end314:                                        ; preds = %if.end307
+  indirectbr i8* undef, [label %if.end340]
+
+if.end340:                                        ; preds = %while.end334
+  indirectbr i8* undef, [label %PREMATURE, label %if.end348]
+
+if.end348:                                        ; preds = %if.end340
+  %incdec.ptr356 = getelementptr inbounds i8* undef, i64 2
+  indirectbr i8* undef, [label %while.cond179]
+
+if.else386:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %while.end453, label %if.end434]
+
+if.end434:                                        ; preds = %if.then428, %if.end421
+  indirectbr i8* undef, [label %while.cond179]
+
+while.end453:                                     ; preds = %if.else386, %land.rhs184, %while.cond179
+  indirectbr i8* undef, [label %PREMATURE, label %if.end459]
+
+if.end459:                                        ; preds = %while.end453
+  indirectbr i8* undef, [label %if.then465, label %FAIL]
+
+if.then465:                                       ; preds = %if.end459
+  indirectbr i8* undef, [label %return, label %if.then479]
+
+if.then479:                                       ; preds = %if.then465
+  indirectbr i8* undef, [label %return]
+
+FAIL:                                             ; preds = %if.end459, %if.end307, %land.lhs.true142, %land.lhs.true131, %while.end
+  indirectbr i8* undef, [label %DECL_FAIL]
+
+PREMATURE:                                        ; preds = %while.end453, %while.end415, %if.end340, %while.end334, %if.then297, %while.end256, %while.end215
+  indirectbr i8* undef, [label %return, label %if.then495]
+
+if.then495:                                       ; preds = %PREMATURE
+  indirectbr i8* undef, [label %return]
+
+DECL_FAIL:                                        ; preds = %if.then488, %FAIL, %land.lhs.true99, %lor.lhs.false, %if.end83, %if.then39, %if.end
+  indirectbr i8* undef, [label %return]
+
+return:                                           ; preds = %if.then512, %if.end504, %DECL_FAIL, %if.then495, %PREMATURE, %if.then479, %if.then465, %if.then69, %if.end52, %if.end19, %if.then
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
new file mode 100644
index 0000000..9189d79
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -0,0 +1,292 @@
+; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; A9: @simple
+; no expensive address computation in the preheader
+; A9: lsl
+; A9-NOT: lsl
+; A9: %loop
+; no complex address modes
+; A9-NOT: lsl
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; A9: @user
+; stride multiples computed in the preheader
+; A9: lsl
+; A9: lsl
+; A9: %loop
+; complex address modes
+; A9: lsl
+; A9: lsl
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; A9: extrastride:
+; no spills
+; A9-NOT: str
+; only one stride multiple in the preheader
+; A9: lsl
+; A9-NOT: {{str r|lsl}}
+; A9: %for.body{{$}}
+; no complex address modes or reloads
+; A9-NOT: {{ldr .*[sp]|lsl}}
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; A9: foldedidx:
+; A9: ldrb.w {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @testNeon is an important example of the nead for ivchains.
+;
+; Currently we have three extra add.w's that keep the store address
+; live past the next increment because ISEL is unfortunately undoing
+; the store chain. ISEL also fails to convert the stores to
+; post-increment addressing. However, the loads should use
+; post-increment addressing, no add's or add.w's beyond the three
+; mentioned. Most importantly, there should be no spills or reloads!
+;
+; CHECK: testNeon:
+; CHECK: %.lr.ph
+; CHECK-NOT: lsl.w
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK-NOT: add.w r
+; CHECK: bne
+define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
+  %1 = icmp sgt i32 %limit, 0
+  br i1 %1, label %.lr.ph, label %45
+
+.lr.ph:                                           ; preds = %0
+  %2 = shl nsw i32 %ref_stride, 1
+  %3 = mul nsw i32 %ref_stride, 3
+  %4 = shl nsw i32 %ref_stride, 2
+  %5 = mul nsw i32 %ref_stride, 5
+  %6 = mul nsw i32 %ref_stride, 6
+  %7 = mul nsw i32 %ref_stride, 7
+  %8 = shl nsw i32 %ref_stride, 3
+  %9 = sub i32 0, %8
+  %10 = mul i32 %limit, -64
+  br label %11
+
+; <label>:11                                      ; preds = %11, %.lr.ph
+  %.05 = phi i8* [ %ref_data, %.lr.ph ], [ %42, %11 ]
+  %counter.04 = phi i32 [ 0, %.lr.ph ], [ %44, %11 ]
+  %result.03 = phi <16 x i8> [ zeroinitializer, %.lr.ph ], [ %41, %11 ]
+  %.012 = phi <16 x i8>* [ %data, %.lr.ph ], [ %43, %11 ]
+  %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %.05, i32 1) nounwind
+  %13 = getelementptr inbounds i8* %.05, i32 %ref_stride
+  %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %13, i32 1) nounwind
+  %15 = shufflevector <1 x i64> %12, <1 x i64> %14, <2 x i32> <i32 0, i32 1>
+  %16 = bitcast <2 x i64> %15 to <16 x i8>
+  %17 = getelementptr inbounds <16 x i8>* %.012, i32 1
+  store <16 x i8> %16, <16 x i8>* %.012, align 4
+  %18 = getelementptr inbounds i8* %.05, i32 %2
+  %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %18, i32 1) nounwind
+  %20 = getelementptr inbounds i8* %.05, i32 %3
+  %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %20, i32 1) nounwind
+  %22 = shufflevector <1 x i64> %19, <1 x i64> %21, <2 x i32> <i32 0, i32 1>
+  %23 = bitcast <2 x i64> %22 to <16 x i8>
+  %24 = getelementptr inbounds <16 x i8>* %.012, i32 2
+  store <16 x i8> %23, <16 x i8>* %17, align 4
+  %25 = getelementptr inbounds i8* %.05, i32 %4
+  %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %25, i32 1) nounwind
+  %27 = getelementptr inbounds i8* %.05, i32 %5
+  %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %27, i32 1) nounwind
+  %29 = shufflevector <1 x i64> %26, <1 x i64> %28, <2 x i32> <i32 0, i32 1>
+  %30 = bitcast <2 x i64> %29 to <16 x i8>
+  %31 = getelementptr inbounds <16 x i8>* %.012, i32 3
+  store <16 x i8> %30, <16 x i8>* %24, align 4
+  %32 = getelementptr inbounds i8* %.05, i32 %6
+  %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %32, i32 1) nounwind
+  %34 = getelementptr inbounds i8* %.05, i32 %7
+  %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %34, i32 1) nounwind
+  %36 = shufflevector <1 x i64> %33, <1 x i64> %35, <2 x i32> <i32 0, i32 1>
+  %37 = bitcast <2 x i64> %36 to <16 x i8>
+  store <16 x i8> %37, <16 x i8>* %31, align 4
+  %38 = add <16 x i8> %16, %23
+  %39 = add <16 x i8> %38, %30
+  %40 = add <16 x i8> %39, %37
+  %41 = add <16 x i8> %result.03, %40
+  %42 = getelementptr i8* %.05, i32 %9
+  %43 = getelementptr inbounds <16 x i8>* %.012, i32 -64
+  %44 = add nsw i32 %counter.04, 1
+  %exitcond = icmp eq i32 %44, %limit
+  br i1 %exitcond, label %._crit_edge, label %11
+
+._crit_edge:                                      ; preds = %11
+  %scevgep = getelementptr <16 x i8>* %data, i32 %10
+  br label %45
+
+; <label>:45                                      ; preds = %._crit_edge, %0
+  %result.0.lcssa = phi <16 x i8> [ %41, %._crit_edge ], [ zeroinitializer, %0 ]
+  %.01.lcssa = phi <16 x i8>* [ %scevgep, %._crit_edge ], [ %data, %0 ]
+  store <16 x i8> %result.0.lcssa, <16 x i8>* %.01.lcssa, align 4
+  ret void
+}
+
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
new file mode 100644
index 0000000..d622529
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
new file mode 100644
index 0000000..2dcaab8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s
+
+declare i1 @check() nounwind
+declare i1 @foo(i8*, i8*, i8*) nounwind
+
+; Check that redundant phi elimination ran
+; CHECK: @test
+; CHECK: %while.body.i
+; CHECK: movs
+; CHECK-NOT: movs
+; CHECK: %for.end.i
+define i32 @test(i8* %base) nounwind uwtable ssp {
+entry:
+  br label %while.body.lr.ph.i
+
+while.body.lr.ph.i:                               ; preds = %cond.true.i
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %cond.true29.i, %while.body.lr.ph.i
+  %indvars.iv7.i = phi i64 [ 16, %while.body.lr.ph.i ], [ %indvars.iv.next8.i, %cond.true29.i ]
+  %i.05.i = phi i64 [ 0, %while.body.lr.ph.i ], [ %indvars.iv7.i, %cond.true29.i ]
+  %sext.i = shl i64 %i.05.i, 32
+  %idx.ext.i = ashr exact i64 %sext.i, 32
+  %add.ptr.sum.i = add i64 %idx.ext.i, 16
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %while.body.i
+  %indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ]
+  %add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i
+  %arrayidx22.i = getelementptr inbounds i8* %base, i64 %add.ptr.sum
+  %0 = load i8* %arrayidx22.i, align 1
+  %indvars.iv.next.i = add i64 %indvars.iv.i, 1
+  %cmp = call i1 @check() nounwind
+  br i1 %cmp, label %for.end.i, label %for.body.i
+
+for.end.i:                                        ; preds = %for.body.i
+  %add.ptr.i144 = getelementptr inbounds i8* %base, i64 %add.ptr.sum.i
+  %cmp2 = tail call i1 @foo(i8* %add.ptr.i144, i8* %add.ptr.i144, i8* undef) nounwind
+  br i1 %cmp2, label %cond.true29.i, label %cond.false35.i
+
+cond.true29.i:                                    ; preds = %for.end.i
+  %indvars.iv.next8.i = add i64 %indvars.iv7.i, 16
+  br i1 false, label %exit, label %while.body.i
+
+cond.false35.i:                                   ; preds = %for.end.i
+  unreachable
+
+exit:                                 ; preds = %cond.true29.i, %cond.true.i
+  ret i32 0
+}
+
+%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 }
+
+@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16
+
+; PR11782: SCEVExpander assert
+;
+; Test phi reuse after LSR that requires SCEVExpander to hoist an
+; interesting GEP.
+;
+; CHECK: @test2
+; CHECK: %entry
+; CHECK-NOT: mov
+; CHECK: jne
+define void @test2(i32 %n) nounwind uwtable {
+entry:
+  br i1 undef, label %while.end, label %for.cond468
+
+for.cond468:                                      ; preds = %if.then477, %entry
+  %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
+  %k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
+  %k.0 = load i32* %k.0.in, align 4
+  %0 = trunc i64 %indvars.iv1163 to i32
+  %cmp469 = icmp slt i32 %0, %n
+  br i1 %cmp469, label %for.body471, label %for.inc498
+
+for.body471:                                      ; preds = %for.cond468
+  %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
+  %1 = load i32* %first, align 4
+  br i1 undef, label %if.then477, label %for.inc498
+
+if.then477:                                       ; preds = %for.body471
+  %last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2
+  %indvars.iv.next1164 = add i64 %indvars.iv1163, 1
+  br label %for.cond468
+
+for.inc498:                                       ; preds = %for.inc498, %for.body471, %for.cond468
+  br label %for.inc498
+
+while.end:                                        ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/dg.exp b/test/Transforms/LoopStrengthReduce/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Transforms/LoopStrengthReduce/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
new file mode 100644
index 0000000..e42b67f
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -0,0 +1,300 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; X64: @simple
+; %x * 4
+; X64: shlq $2
+; no other address computation in the preheader
+; X64-NEXT: xorl
+; X64-NEXT: .align
+; X64: %loop
+; no complex address modes
+; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @simple
+; no expensive address computation in the preheader
+; X32-NOT: imul
+; X32: %loop
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; X64: @user
+; X64: shlq $4
+; X64: lea
+; X64: lea
+; X64: %loop
+; complex address modes
+; X64: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @user
+; expensive address computation in the preheader
+; X32: imul
+; X32: %loop
+; complex address modes
+; X32: (%{{[^)]+}},%{{[^)]+}},
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; X64: extrastride:
+; We currently don't handle this on X64 because the sexts cause
+; strange increment expressions like this:
+; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+;
+; X32: extrastride:
+; no spills in the preheader
+; X32-NOT: mov{{.*}}(%esp){{$}}
+; X32: %for.body{{$}}
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+; no reloads
+; X32-NOT: (%esp)
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; X64: foldedidx:
+; X64: movzbl -3(
+;
+; X32: foldedidx:
+; X32: movzbl -3(
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @multioper tests instructions with multiple IV user operands. We
+; should be able to chain them independent of each other.
+;
+; X64: @multioper
+; X64: %for.body
+; X64: movl %{{.*}},4)
+; X64-NEXT: leal 1(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 2(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 3(
+; X64-NEXT: movl %{{.*}},4)
+;
+; X32: @multioper
+; X32: %for.body
+; X32: movl %{{.*}},4)
+; X32-NEXT: leal 1(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 2(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 3(
+; X32-NEXT: movl %{{.*}},4)
+define void @multioper(i32* %a, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %p = phi i32* [ %p.next, %for.body ], [ %a, %entry ]
+  %i = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
+  store i32 %i, i32* %p, align 4
+  %inc1 = or i32 %i, 1
+  %add.ptr.i1 = getelementptr inbounds i32* %p, i32 1
+  store i32 %inc1, i32* %add.ptr.i1, align 4
+  %inc2 = add nsw i32 %i, 2
+  %add.ptr.i2 = getelementptr inbounds i32* %p, i32 2
+  store i32 %inc2, i32* %add.ptr.i2, align 4
+  %inc3 = add nsw i32 %i, 3
+  %add.ptr.i3 = getelementptr inbounds i32* %p, i32 3
+  store i32 %inc3, i32* %add.ptr.i3, align 4
+  %p.next = getelementptr inbounds i32* %p, i32 4
+  %inc4 = add nsw i32 %i, 4
+  %cmp = icmp slt i32 %inc4, %n
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; @testCmpZero has a ICmpZero LSR use that should not be hidden from
+; LSR. Profitable chains should have more than one nonzero increment
+; anyway.
+;
+; X32: @testCmpZero
+; X32: %for.body82.us
+; X32: dec
+; X32: jne
+define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
+entry:
+  %dest0 = getelementptr inbounds i8* %src, i32 %srcidx
+  %source0 = getelementptr inbounds i8* %dst, i32 %dstidx
+  %add.ptr79.us.sum = add i32 %srcidx, %len
+  %lftr.limit = getelementptr i8* %src, i32 %add.ptr79.us.sum
+  br label %for.body82.us
+
+for.body82.us:
+  %dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ]
+  %source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ]
+  %0 = bitcast i8* %source to i32*
+  %1 = load i32* %0, align 4
+  %trunc = trunc i32 %1 to i8
+  %add.ptr83.us = getelementptr inbounds i8* %source, i32 4
+  %incdec.ptr91.us = getelementptr inbounds i8* %dest, i32 1
+  store i8 %trunc, i8* %dest, align 1
+  %exitcond = icmp eq i8* %incdec.ptr91.us, %lftr.limit
+  br i1 %exitcond, label %return, label %for.body82.us
+
+return:
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
new file mode 100644
index 0000000..d8e0aa9
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -0,0 +1,96 @@
+; REQUIRES: asserts
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; X64: sharedidx:
+; X64: %for.body.preheader
+; X64-NOT: leal ({{.*}},4)
+; X64: %for.body.1
+
+; X32: sharedidx:
+; X32: %for.body.2
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: %for.body.3
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
new file mode 100644
index 0000000..84bd88c
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/dg.exp b/test/Transforms/LoopStrengthReduce/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopStrengthReduce/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
new file mode 100644
index 0000000..b87bf62
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -0,0 +1,70 @@
+; RUN: opt -loop-reduce %s
+; we used to crash on this one
+
+declare i8* @_Znwm()
+declare i32 @__gxx_personality_v0(...)
+declare void @g()
+define void @f() {
+bb0:
+  br label %bb1
+bb1:
+  %v0 = phi i64 [ 0, %bb0 ], [ %v1, %bb1 ]
+  %v1 = add nsw i64 %v0, 1
+  br i1 undef, label %bb2, label %bb1
+bb2:
+  %v2 = icmp eq i64 %v0, 0
+  br i1 %v2, label %bb6, label %bb3
+bb3:
+  %v3 = invoke noalias i8* @_Znwm()
+          to label %bb5 unwind label %bb4
+bb4:
+  %v4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb5:
+  %v5 = bitcast i8* %v3 to i32**
+  %add.ptr.i = getelementptr inbounds i32** %v5, i64 %v0
+  br label %bb6
+bb6:
+  %v6 = phi i32** [ null, %bb2 ], [ %add.ptr.i, %bb5 ]
+  invoke void @g()
+          to label %bb7 unwind label %bb8
+bb7:
+  unreachable
+bb8:
+  %v7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb9:
+  resume { i8*, i32 } zeroinitializer
+}
+
+
+define void @h() {
+bb1:
+  invoke void @g() optsize
+          to label %bb2 unwind label %bb5
+bb2:
+  %arrayctor.cur = phi i8* [ undef, %bb1 ], [ %arrayctor.next, %bb3 ]
+  invoke void @g() optsize
+          to label %bb3 unwind label %bb6
+bb3:
+  %arrayctor.next = getelementptr inbounds i8* %arrayctor.cur, i64 1
+  br label %bb2
+bb4:
+  ret void
+bb5:
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @g() optsize
+          to label %bb4 unwind label %bb7
+bb6:
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %arraydestroy.isempty = icmp eq i8* undef, %arrayctor.cur
+  ret void
+bb7:
+  %lpad.nonloopexit = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ivchain.ll b/test/Transforms/LoopStrengthReduce/ivchain.ll
new file mode 100644
index 0000000..ce7ad19
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ivchain.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+;
+; PR11782: bad cast to AddRecExpr.
+; A sign extend feeds an IVUser and cannot be hoisted into the AddRec.
+; CollectIVChains should bailout on this case.
+
+%struct = type { i8*, i8*, i16, i64, i16, i16, i16, i64, i64, i16, i8*, i64, i64, i64 }
+
+; CHECK: @test
+; CHECK: for.body:
+; CHECK: lsr.iv = phi %struct
+; CHECK: br
+define i32 @test(i8* %h, i32 %more) nounwind uwtable {
+entry:
+  br i1 undef, label %land.end238, label %return
+
+land.end238:                                      ; preds = %if.end229
+  br label %for.body
+
+for.body:                                         ; preds = %sw.epilog, %land.end238
+  %fbh.0 = phi %struct* [ undef, %land.end238 ], [ %incdec.ptr, %sw.epilog ]
+  %column_n.0 = phi i16 [ 0, %land.end238 ], [ %inc601, %sw.epilog ]
+  %conv250 = sext i16 %column_n.0 to i32
+  %add257 = add nsw i32 %conv250, 1
+  %conv258 = trunc i32 %add257 to i16
+  %cmp263 = icmp ult i16 undef, 2
+  br label %if.end388
+
+if.end388:                                        ; preds = %if.then380, %if.else356
+  %ColLength = getelementptr inbounds %struct* %fbh.0, i64 0, i32 7
+  %call405 = call signext i16 @SQLColAttribute(i8* undef, i16 zeroext %conv258, i16 zeroext 1003, i8* null, i16 signext 0, i16* null, i64* %ColLength) nounwind
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb542, %sw.bb523, %if.end475
+  %inc601 = add i16 %column_n.0, 1
+  %incdec.ptr = getelementptr inbounds %struct* %fbh.0, i64 1
+  br label %for.body
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
+
+declare signext i16 @SQLColAttribute(i8*, i16 zeroext, i16 zeroext, i8*, i16 signext, i16*, i64*)
diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 76aa08c..96904c6 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 ; PR9939
 
-; LSR should property handle the post-inc offset when folding the
+; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
 ; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
new file mode 100644
index 0000000..ee7b1e8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+%struct.nsTArray = type { i8 }
+%struct.nsTArrayHeader = type { i32 }
+
+define void @_Z6foobarR8nsTArray(%struct.nsTArray* %aValues, i32 %foo, %struct.nsTArrayHeader* %bar) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %_ZN8nsTArray9ElementAtEi.exit, %entry
+  %i.06 = phi i32 [ %add, %_ZN8nsTArray9ElementAtEi.exit ], [ 0, %entry ]
+  %call.i = call %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev() nounwind
+  %add.ptr.i = getelementptr inbounds %struct.nsTArrayHeader* %call.i, i32 1
+  %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
+  %arrayidx = getelementptr inbounds %struct.nsTArray* %tmp, i32 %i.06
+  %add = add nsw i32 %i.06, 1
+  call void @llvm.dbg.value(metadata !{%struct.nsTArray* %aValues}, i64 0, metadata !0) nounwind
+  br label %_ZN8nsTArray9ElementAtEi.exit
+
+_ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
+  %arrayidx.i = getelementptr inbounds %struct.nsTArray* %tmp, i32 %add
+  call void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray* %arrayidx, %struct.nsTArray* %arrayidx.i) nounwind
+  %cmp = icmp slt i32 %add, %foo
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %_ZN8nsTArray9ElementAtEi.exit
+  ret void
+}
+
+declare void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray*, %struct.nsTArray*)
+
+declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 786689}                       ; [ DW_TAG_arg_variable ]
diff --git a/test/Transforms/LoopStrengthReduce/pr12048.ll b/test/Transforms/LoopStrengthReduce/pr12048.ll
new file mode 100644
index 0000000..7e0f2ad
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12048.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+define void @resolve_name() nounwind uwtable ssp {
+  br label %while.cond40.preheader
+while.cond132.while.cond.loopexit_crit_edge:
+  br label %while.cond40.preheader
+while.cond40.preheader:
+  br label %while.cond40
+while.cond40:
+  %indvars.iv194 = phi i8* [ null, %while.cond40.preheader ], [ %scevgep, %while.body51 ]
+  %tmp.1 = phi i8* [ undef, %while.cond40.preheader ], [ %incdec.ptr, %while.body51 ]
+  switch i8 undef, label %while.body51 [
+    i8 0, label %if.then59
+  ]
+while.body51:                                     ; preds = %land.end50
+  %incdec.ptr = getelementptr inbounds i8* %tmp.1, i64 1
+  %scevgep = getelementptr i8* %indvars.iv194, i64 1
+  br label %while.cond40
+if.then59:                                        ; preds = %while.end
+  br i1 undef, label %if.then64, label %if.end113
+if.then64:                                        ; preds = %if.then59
+  %incdec.ptr88.tmp.2 = select i1 undef, i8* undef, i8* undef
+  br label %if.end113
+if.end113:                                        ; preds = %if.then64, %if.then59
+  %tmp.4 = phi i8* [ %incdec.ptr88.tmp.2, %if.then64 ], [ undef, %if.then59 ]
+  %tmp.4195 = ptrtoint i8* %tmp.4 to i64
+  br  label %while.cond132.preheader
+while.cond132.preheader:                          ; preds = %if.end113
+  %cmp133173 = icmp eq i8* %tmp.1, %tmp.4
+  br i1 %cmp133173, label %while.cond40.preheader, label %while.body139.lr.ph
+while.body139.lr.ph:                              ; preds = %while.cond132.preheader
+  %scevgep198 = getelementptr i8* %indvars.iv194, i64 0
+  %scevgep198199 = ptrtoint i8* %scevgep198 to i64
+  br label %while.body139
+while.body139:                                    ; preds = %while.body139, %while.body139.lr.ph
+  %start_of_var.0177 = phi i8* [ %tmp.1, %while.body139.lr.ph ], [ null, %while.body139 ]
+  br i1 undef, label %while.cond132.while.cond.loopexit_crit_edge, label %while.body139
+}
diff --git a/test/Transforms/LoopUnroll/dg.exp b/test/Transforms/LoopUnroll/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopUnroll/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnroll/lit.local.cfg b/test/Transforms/LoopUnroll/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index 21c0ec3..05d98d5 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -1,5 +1,5 @@
-; RUN: opt -loop-unswitch -loop-unswitch-threshold 30 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 30 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 1 loop-unswitch - Number of switches unswitched
diff --git a/test/Transforms/LoopUnswitch/dg.exp b/test/Transforms/LoopUnswitch/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LoopUnswitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnswitch/lit.local.cfg b/test/Transforms/LoopUnswitch/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerAtomic/dg.exp b/test/Transforms/LowerAtomic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerAtomic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerAtomic/lit.local.cfg b/test/Transforms/LowerAtomic/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerAtomic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerExpectIntrinsic/dg.exp b/test/Transforms/LowerExpectIntrinsic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerExpectIntrinsic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/LowerInvoke/dg.exp b/test/Transforms/LowerInvoke/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerInvoke/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerInvoke/lit.local.cfg b/test/Transforms/LowerInvoke/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerInvoke/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerSwitch/dg.exp b/test/Transforms/LowerSwitch/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/LowerSwitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerSwitch/lit.local.cfg b/test/Transforms/LowerSwitch/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/LowerSwitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/dg.exp b/test/Transforms/Mem2Reg/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Mem2Reg/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Mem2Reg/lit.local.cfg b/test/Transforms/Mem2Reg/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Mem2Reg/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/dg.exp b/test/Transforms/MemCpyOpt/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/MemCpyOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MemCpyOpt/lit.local.cfg b/test/Transforms/MemCpyOpt/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MergeFunc/dg.exp b/test/Transforms/MergeFunc/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/MergeFunc/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MergeFunc/lit.local.cfg b/test/Transforms/MergeFunc/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/MergeFunc/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
new file mode 100644
index 0000000..8c7b5b1
--- /dev/null
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -objc-arc-apelim < %s | FileCheck %s
+; rdar://10227311
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_x }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_y }]
+
+@x = global i32 0
+
+declare i32 @bar() nounwind
+
+define i32 @foo() nounwind {
+entry:
+  ret i32 5
+}
+
+define internal void @__cxx_global_var_init() {
+entry:
+  %call = call i32 @foo()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+define internal void @__dxx_global_var_init() {
+entry:
+  %call = call i32 @bar()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_x()
+; CHECK-NOT: @objc
+; CHECK: }
+define internal void @_GLOBAL__I_x() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__cxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_y()
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: }
+define internal void @_GLOBAL__I_y() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__dxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+declare i8* @objc_autoreleasePoolPush()
+declare void @objc_autoreleasePoolPop(i8*)
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 44c2602..552f4e0 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -786,7 +786,7 @@ C:
 @__block_holder_tmp_1 = external constant %block1
 define void @test23() {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind, !clang.arc.copy_on_escape !0
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
@@ -801,13 +801,28 @@ entry:
 ; CHECK: }
 define void @test23b(i8* %p) {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* %p) nounwind
+  %0 = call i8* @objc_retainBlock(i8* %p) nounwind, !clang.arc.copy_on_escape !0
   call void @callee()
   call void @use_pointer(i8* %p)
   call void @objc_release(i8* %p) nounwind
   ret void
 }
 
+; Don't optimize objc_retainBlock, because there's no copy_on_escape metadata.
+
+; CHECK: define void @test23c(
+; CHECK: @objc_retainBlock
+; CHECK: @objc_release
+; CHECK: }
+define void @test23c() {
+entry:
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  ret void
+}
+
 ; Any call can decrement a retain count.
 
 ; CHECK: define void @test24(
diff --git a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
index 4ad78e7..4a9b314 100644
--- a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
 
-; CHECK: call void @objc_storeStrong(i8**
+; CHECK: tail call void @objc_storeStrong(i8**
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0.0"
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index fda2ff4..4ff0596 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -9,7 +9,7 @@ declare void @objc_release(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
diff --git a/test/Transforms/ObjCARC/dg.exp b/test/Transforms/ObjCARC/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/ObjCARC/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ObjCARC/lit.local.cfg b/test/Transforms/ObjCARC/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/ObjCARC/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
new file mode 100644
index 0000000..9728f6e
--- /dev/null
+++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
@@ -0,0 +1,122 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
+%struct.__block_descriptor = type { i64, i64 }
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+
+; The optimizer should make use of the !clang.arc.no_objc_arc_exceptions
+; metadata and eliminate the retainBlock+release pair here.
+; rdar://10803830.
+
+; CHECK: define void @test0(
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !4
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+; There is no !clang.arc.no_objc_arc_exceptions
+; metadata here, so the optimizer shouldn't eliminate anything.
+
+; CHECK: define void @test0_no_metadata(
+; CHECK: call i8* @objc_retainBlock(
+; CHECK: invoke
+; CHECK: call void @objc_release(
+; CHECK: }
+define void @test0_no_metadata() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+declare i8* @objc_retainBlock(i8*)
+declare void @objc_release(i8*)
+declare void @_Block_object_dispose(i8*, i32)
+declare i32 @__objc_personality_v0(...)
+declare void @__foo_block_invoke_0(i8* nocapture) uwtable ssp
+
+!4 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-load.ll b/test/Transforms/ObjCARC/retain-block-load.ll
new file mode 100644
index 0000000..a5170e3
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block-load.ll
@@ -0,0 +1,51 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; rdar://10803830
+; The optimizer should be able to prove that the block does not
+; "escape", so the retainBlock+release pair can be eliminated.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.__block_descriptor = type { i64, i64 }
+
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external global { i64, i64, i8*, i8* }
+
+; CHECK: define void @test() {
+; CHECK-NOT: @objc
+; CHECK: declare i8* @objc_retainBlock(i8*)
+; CHECK: declare void @objc_release(i8*)
+
+define void @test() {
+entry:
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 1
+  store i32 1073741824, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 3
+  store i8* bitcast (i32 (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 5
+  store i32 4, i32* %block.captured, align 8
+  %tmp = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block to i8*
+  %tmp1 = call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  %tmp2 = getelementptr inbounds i8* %tmp1, i64 16
+  %tmp3 = bitcast i8* %tmp2 to i8**
+  %tmp4 = load i8** %tmp3, align 8
+  %tmp5 = bitcast i8* %tmp4 to i32 (i8*)*
+  %call = call i32 %tmp5(i8* %tmp1)
+  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+declare i32 @__test_block_invoke_0(i8* nocapture %.block_descriptor) nounwind readonly
+
+declare i8* @objc_retainBlock(i8*)
+
+declare void @objc_release(i8*)
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
new file mode 100644
index 0000000..b3b62d3
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -0,0 +1,138 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+!0 = metadata !{}
+
+declare i8* @objc_retain(i8*)
+declare void @callee(i8)
+declare void @use_pointer(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_retainBlock(i8*)
+declare i8* @objc_autorelease(i8*)
+
+; Basic retainBlock+release elimination.
+
+; CHECK: define void @test0(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but there's no copy_on_escape metadata, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_no_metadata(i8* %tmp) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_no_metadata(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but the pointer escapes, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0_escape, but there's no intervening call.
+
+; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_just_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Basic nested retainBlock+release elimination.
+
+; CHECK: define void @test1(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK-NOT: @objc
+; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but there's no copy_on_escape metadata, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release.
+
+; CHECK: define void @test1_no_metadata(i8* %tmp) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_no_metadata(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but the pointer escapes, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release
+
+; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: store i8* %tmp2, i8** %z
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/PhaseOrdering/dg.exp b/test/Transforms/PhaseOrdering/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/PhaseOrdering/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PhaseOrdering/lit.local.cfg b/test/Transforms/PhaseOrdering/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/PruneEH/dg.exp b/test/Transforms/PruneEH/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/PruneEH/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PruneEH/lit.local.cfg b/test/Transforms/PruneEH/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/PruneEH/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/dg.exp b/test/Transforms/Reassociate/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Reassociate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Reassociate/lit.local.cfg b/test/Transforms/Reassociate/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Reassociate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
index 7546bf5..f62ed70 100644
--- a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
+++ b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
@@ -21,10 +21,6 @@ define internal i32 @f() {
 
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SCCP/dg.exp b/test/Transforms/SCCP/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SCCP/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SCCP/lit.local.cfg b/test/Transforms/SCCP/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SCCP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ScalarRepl/dg.exp b/test/Transforms/ScalarRepl/dg.exp
deleted file mode 100644
index 39954d8..0000000
--- a/test/Transforms/ScalarRepl/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
diff --git a/test/Transforms/ScalarRepl/lit.local.cfg b/test/Transforms/ScalarRepl/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Transforms/ScalarRepl/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
new file mode 100644
index 0000000..cb5101c
--- /dev/null
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -scalarrepl-ssa < %s | FileCheck %s
+; rdar://10589171
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.foo = type { i32, i32 }
+
+@.str = private unnamed_addr constant [6 x i8] c"x=%d\0A\00", align 1
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
+entry:
+  %f = alloca %struct.foo, align 4
+  %x.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 0
+  store i32 1, i32* %x.i, align 4
+  %y.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 1
+  br label %while.cond.i
+
+; CHECK: while.cond.i:
+; CHECK-NEXT: %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+; CHECK-NEXT: %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+; CHECK-NEXT: %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+; CHECK-NOT: phi
+while.cond.i:                                     ; preds = %while.cond.backedge.i, %entry
+  %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+  %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+  %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+  %cmp.i = icmp sgt i32 %left.0.i, 0
+  br i1 %cmp.i, label %while.body.i, label %while.cond.i.func.exit_crit_edge
+
+while.cond.i.func.exit_crit_edge:                 ; preds = %while.cond.i
+  br label %func.exit
+
+while.body.i:                                     ; preds = %while.cond.i
+  %dec.i = add nsw i32 %left.0.i, -1
+  switch i32 1, label %while.body.i.func.exit_crit_edge [
+    i32 0, label %while.cond.backedge.i
+    i32 1, label %sw.bb.i
+  ]
+
+while.body.i.func.exit_crit_edge:                 ; preds = %while.body.i
+  br label %func.exit
+
+sw.bb.i:                                          ; preds = %while.body.i
+  %cmp2.i = icmp eq i32 %tmp, 1
+  br i1 %cmp2.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %sw.bb.i
+  store i32 %pos.0.i, i32* %x.i, align 4
+  br label %if.end.i
+
+; CHECK: if.end.i:
+; CHECK-NEXT: %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+; CHECK-NOT: phi
+if.end.i:                                         ; preds = %if.then.i, %sw.bb.i
+  %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+  store i32 %tmp1, i32* %y.i, align 4
+  br label %while.cond.backedge.i
+
+; CHECK: while.cond.backedge.i:
+; CHECK-NEXT: %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+; CHECK-NOT: phi
+while.cond.backedge.i:                            ; preds = %if.end.i, %while.body.i
+  %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+  %xtmp.i = add i32 %pos.0.i, 1
+  br label %while.cond.i
+
+; CHECK: func.exit:
+; CHECK-NOT: load
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
+  %tmp3 = load i32* %x.i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
index 568e61c..e2765e5 100644
--- a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
@@ -1,9 +1,14 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1 } | count 4
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 ; PR3354
 ; Do not merge bb1 into the entry block, it might trap.
 
 @G = extern_weak global i32
 
+; CHECK: @test(
+; CHECK: br i1 %tmp25
+; CHECK: bb1:
+; CHECK: sdiv
+
 define i32 @test(i32 %tmp21, i32 %tmp24) {
 	%tmp25 = icmp sle i32 %tmp21, %tmp24		
 	br i1 %tmp25, label %bb2, label %bb1	
@@ -18,6 +23,11 @@ bb6:
 	ret i32 927
 }
 
+; CHECK: @test2(
+; CHECK: br i1 %tmp34
+; CHECK: bb5:
+; CHECK: sdiv
+
 define i32 @test2(i32 %tmp21, i32 %tmp24, i1 %tmp34) {
 	br i1 %tmp34, label %bb5, label %bb6
 
diff --git a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll b/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
deleted file mode 100644
index ebacf2f..0000000
--- a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: opt %s -simplifycfg -disable-output
-; PR8445
-
-define void @test() {
-      unwind
-}
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index 5cfc77c..a61867f 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -1,7 +1,10 @@
-; RUN: opt < %s -simplifycfg  -S | grep select
-; RUN: opt < %s -simplifycfg  -S | grep br | count 2
+; RUN: opt < %s -simplifycfg -phi-node-folding-threshold=2 -S | FileCheck %s
 
-define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind  {
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test1(i32 %a, i32 %b, i32 %c) nounwind  {
+; CHECK: @test1
 entry:
         %tmp1 = icmp eq i32 %b, 0
         br i1 %tmp1, label %bb1, label %bb3
@@ -9,6 +12,11 @@ entry:
 bb1:            ; preds = %entry
 	%tmp2 = icmp sgt i32 %c, 1
 	br i1 %tmp2, label %bb2, label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: icmp sgt i32 %c, 1
+; CHECK-NEXT: add i32 %a, 1
+; CHECK-NEXT: select i1 %tmp2, i32 %tmp3, i32 %a
+; CHECK-NEXT: br label %bb3
 
 bb2:		; preds = bb1
 	%tmp3 = add i32 %a, 1
@@ -19,3 +27,20 @@ bb3:		; preds = %bb2, %entry
         %tmp5 = sub i32 %tmp4, 1
 	ret i32 %tmp5
 }
+
+declare i8 @llvm.cttz.i8(i8, i1)
+
+define i8 @test2(i8 %a) {
+; CHECK: @test2
+  br i1 undef, label %bb_true, label %bb_false
+bb_true:
+  %b = tail call i8 @llvm.cttz.i8(i8 %a, i1 false)
+  br label %join
+bb_false:
+  br label %join
+join:
+  %c = phi i8 [%b, %bb_true], [%a, %bb_false]
+; CHECK: select
+  ret i8 %c
+}
+
diff --git a/test/Transforms/SimplifyCFG/dg.exp b/test/Transforms/SimplifyCFG/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SimplifyCFG/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyCFG/multiple-phis.ll b/test/Transforms/SimplifyCFG/multiple-phis.ll
new file mode 100644
index 0000000..7845423
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/multiple-phis.ll
@@ -0,0 +1,39 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+; It's not worthwhile to if-convert one of the phi nodes and leave
+; the other behind, because that still requires a branch. If
+; SimplifyCFG if-converts one of the phis, it should do both.
+
+; CHECK:      %div.high.addr.0 = select i1 %cmp1, i32 %div, i32 %high.addr.0
+; CHECK-NEXT: %low.0.add2 = select i1 %cmp1, i32 %low.0, i32 %add2
+; CHECK-NEXT: br label %while.cond
+
+define i32 @upper_bound(i32* %r, i32 %high, i32 %k) nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %if.then, %if.else, %entry
+  %high.addr.0 = phi i32 [ %high, %entry ], [ %div, %if.then ], [ %high.addr.0, %if.else ]
+  %low.0 = phi i32 [ 0, %entry ], [ %low.0, %if.then ], [ %add2, %if.else ]
+  %cmp = icmp ult i32 %low.0, %high.addr.0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %add = add i32 %low.0, %high.addr.0
+  %div = udiv i32 %add, 2
+  %idxprom = zext i32 %div to i64
+  %arrayidx = getelementptr inbounds i32* %r, i64 %idxprom
+  %0 = load i32* %arrayidx
+  %cmp1 = icmp ult i32 %k, %0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %while.body
+  br label %while.cond
+
+if.else:                                          ; preds = %while.body
+  %add2 = add i32 %div, 1
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %low.0
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
new file mode 100644
index 0000000..c791785
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -0,0 +1,88 @@
+; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
+
+declare void @helper(i32)
+
+define void @test1(i1 %a, i1 %b) {
+; CHECK: @test1
+entry:
+  br i1 %a, label %Y, label %X, !prof !0
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !0
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test2(i1 %a, i1 %b) {
+; CHECK: @test2
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
+; CHECK-NOT: !prof
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !2
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test3(i1 %a, i1 %b) {
+; CHECK: @test3
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test4(i1 %a, i1 %b) {
+; CHECK: @test4
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 3, i32 5}
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 2}
+
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 5, i32 11}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 5}
+; CHECK-NOT: !2
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 009f05e..7654d02 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -35,6 +35,6 @@ if.end:
   ret i8* %x.addr
 
 ; CHECK: @test2
-; CHECK: %x.addr = select i1 %cmp, i8* %incdec.ptr, i8* %y
-; CHECK: ret i8* %x.addr
+; CHECK: %incdec.ptr.y = select i1 %cmp, i8* %incdec.ptr, i8* %y
+; CHECK: ret i8* %incdec.ptr.y
 }
diff --git a/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index fc83ec2..3b0c48b 100644
--- a/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -15,8 +15,8 @@ c:
   ret i32 5
 ; CHECK: @test1
 ; CHECK: %cond = icmp eq i32 %i, 24
-; CHECK: %merge = select i1 %cond, i32 5, i32 0
-; CHECK: ret i32 %merge
+; CHECK: %. = select i1 %cond, i32 5, i32 0
+; CHECK: ret i32 %.
 }
 
 
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 5494a65..673a62b 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -115,7 +115,7 @@ entry:
 cont:
 ; CHECK: %lt = icmp slt i64 %x, %y
     %lt = icmp slt i64 %x, %y
-; CHECK-NEXT: br i1 %lt, label %a, label %r
+; CHECK-NEXT: select i1 %lt, i32 -1, i32 1
     %qux = select i1 %lt, i32 0, i32 2
     switch i32 %qux, label %bees [
         i32 0, label %a
diff --git a/test/Transforms/SimplifyCFG/unreachable-blocks.ll b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
new file mode 100644
index 0000000..1df0eab
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
@@ -0,0 +1,28 @@
+; RUN: opt -simplifycfg < %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; PR11825
+define void @test1() {
+entry:
+  br label %return
+
+while_block:                                      ; preds = %and_if_cont2, %and_if_cont
+  %newlen = sub i32 %newlen, 1
+  %newptr = getelementptr i8* %newptr, i64 1
+  %test = icmp sgt i32 %newlen, 0
+  br i1 %test, label %and_if1, label %and_if_cont2
+
+and_if1:                                          ; preds = %while_block
+  %char = load i8* %newptr
+  %test2 = icmp ule i8 %char, 32
+  br label %and_if_cont2
+
+and_if_cont2:                                     ; preds = %and_if1, %while_block
+  %a18 = phi i1 [ %test, %while_block ], [ %test2, %and_if1 ]
+  br i1 %a18, label %while_block, label %return
+
+return:                                           ; preds = %and_if_cont2, %and_if_cont
+  ret void
+}
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
index c98e79a..489c993 100644
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ b/test/Transforms/SimplifyLibCalls/Printf.ll
@@ -3,7 +3,7 @@
 @str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
 @str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
 
-; CHECK: internal unnamed_addr constant [12 x i8] c"hello world\00"
+; CHECK: private unnamed_addr constant [12 x i8] c"hello world\00"
 
 declare i32 @printf(i8*, ...)
 
diff --git a/test/Transforms/SimplifyLibCalls/cos.ll b/test/Transforms/SimplifyLibCalls/cos.ll
new file mode 100644
index 0000000..6a8ce8c
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/cos.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define double @foo(double %d) nounwind readnone {
+; CHECK: @foo
+    %1 = fsub double -0.000000e+00, %d
+    %2 = call double @cos(double %1) nounwind readnone
+; CHECK: call double @cos(double %d)
+    ret double %2
+}
+
+declare double @cos(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/dg.exp b/test/Transforms/SimplifyLibCalls/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/SimplifyLibCalls/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyLibCalls/lit.local.cfg b/test/Transforms/SimplifyLibCalls/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Sink/dg.exp b/test/Transforms/Sink/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/Sink/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Sink/lit.local.cfg b/test/Transforms/Sink/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/Sink/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/dg.exp b/test/Transforms/StripSymbols/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/StripSymbols/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/StripSymbols/lit.local.cfg b/test/Transforms/StripSymbols/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/StripSymbols/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailCallElim/dg.exp b/test/Transforms/TailCallElim/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/TailCallElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailCallElim/lit.local.cfg b/test/Transforms/TailCallElim/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/TailCallElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailDup/X86/dg.exp b/test/Transforms/TailDup/X86/dg.exp
deleted file mode 100644
index 7b7bd4e..0000000
--- a/test/Transforms/TailDup/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
new file mode 100644
index 0000000..84bd88c
--- /dev/null
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/TailDup/dg.exp b/test/Transforms/TailDup/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Transforms/TailDup/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
new file mode 100644
index 0000000..39c8039
--- /dev/null
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -0,0 +1,12 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/Verifier/dg.exp b/test/Verifier/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/Verifier/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Verifier/lit.local.cfg b/test/Verifier/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Verifier/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/lib/llvm.exp b/test/lib/llvm.exp
index a61f934..e69de29 100644
--- a/test/lib/llvm.exp
+++ b/test/lib/llvm.exp
@@ -1,269 +0,0 @@
-# This procedure executes one line of a test case's execution script.
-proc execOneLine { test PRS outcome lineno line } {
-  set status 0
-  set resultmsg ""
-  set retval [ catch { eval exec -keepnewline -- $line } errmsg ]
-  if { $retval != 0 } {
-    set code [lindex $::errorCode 0]
-    set lineno [expr $lineno + 1]
-    if { $PRS != ""} {
-      set PRS " for $PRS"
-    }
-    set errmsg " at line $lineno\nwhile running: $line\n$errmsg"
-    switch "$code" {
-      CHILDSTATUS {
-        set status [lindex $::errorCode 2]
-        if { $status != 0 } {
-          set resultmsg "$test$PRS\nFailed with exit($status)$errmsg"
-        }
-      }
-      CHILDKILLED {
-        set signal [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with signal($signal)$errmsg"
-      }
-      CHILDSUSP {
-        set signal [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with suspend($signal)$errmsg"
-      }
-      POSIX {
-        set posixNum [lindex $::errorCode 1]
-        set posixMsg [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with posix($posixNum,$posixMsg)$errmsg"
-      }
-      NONE {
-        # Any other error such as stderr output of a program, or syntax error in
-        # the RUN line.
-        set resultmsg "$test$PRS\nFailed with unknown error (or has stderr output)$errmsg"
-      }
-      default {
-        set resultmsg "$test$PRS\nFailed with unknown error$errmsg"
-      }
-    }
-  }
-  return $resultmsg
-}
-
-# This procedure performs variable substitutions on the RUN: lines of a test
-# cases.
-proc substitute { line test tmpFile } {
-  global srcroot objroot srcdir objdir subdir target_triplet
-  global ocamlopt
-  global link shlibext
-  global valgrind grep gas
-  set path [file join $srcdir $subdir]
-
-  # Substitute all Tcl variables.
-  set new_line [subst $line ]
-
-  #replace %% with _#MARKER#_ to make the replacement of %% more predictable
-  regsub -all {%%} $new_line {_#MARKER#_} new_line
-  #replace %link with C++ link command
-  regsub -all {%link} $new_line "$link" new_line
-  #replace %shlibext with shared library extension
-  regsub -all {%shlibext} $new_line "$shlibext" new_line
-  #replace %ocamlopt with ocaml compiler command
-  regsub -all {%ocamlopt} $new_line "$ocamlopt" new_line
-  #replace %p with path to source,
-  regsub -all {%p} $new_line [file join $srcdir $subdir] new_line
-  #replace %s with filename
-  regsub -all {%s} $new_line $test new_line
-  #replace %t with temp filenames
-  regsub -all {%t} $new_line $tmpFile new_line
-  #replace %abs_tmp with absolute temp filenames
-  regsub -all {%abs_tmp} $new_line [file join [pwd] $tmpFile] new_line
-  #replace _#MARKER#_ with %
-  regsub -all {_#MARKER#_} $new_line % new_line
-
-  #replace grep with GNU grep
-  regsub -all { grep } $new_line " $grep " new_line
-  #replace as with GNU as
-  regsub -all {\| as } $new_line "| $gas " new_line
-
-  #valgind related stuff
-# regsub -all {bugpoint } $new_line "$valgrind bugpoint " new_line
-  regsub -all {llc } $new_line "$valgrind llc " new_line
-  regsub -all {lli } $new_line "$valgrind lli " new_line
-  regsub -all {llvm-ar } $new_line "$valgrind llvm-ar " new_line
-  regsub -all {llvm-as } $new_line "$valgrind llvm-as " new_line
-  regsub -all {llvm-bcanalyzer } $new_line "$valgrind llvm-bcanalyzer " new_line
-  regsub -all {llvm-dis } $new_line "$valgrind llvm-dis " new_line
-  regsub -all {llvm-extract } $new_line "$valgrind llvm-extract " new_line
-  regsub -all {llvm-ld } $new_line "$valgrind llvm-ld " new_line
-  regsub -all {llvm-link } $new_line "$valgrind llvm-link " new_line
-  regsub -all {llvm-nm } $new_line "$valgrind llvm-nm " new_line
-  regsub -all {llvm-prof } $new_line "$valgrind llvm-prof " new_line
-  regsub -all {llvm-ranlib } $new_line "$valgrind llvm-ranlib " new_line
-  regsub -all {([^a-zA-Z_-])opt } $new_line "\\1$valgrind opt " new_line
-  regsub -all {^opt } $new_line "$valgrind opt " new_line
-  regsub -all {llvm-tblgen } $new_line "$valgrind llvm-tblgen " new_line
-  regsub -all "not $valgrind " $new_line "$valgrind not " new_line
-
-  return $new_line
-}
-
-# This procedure runs the set of tests for the test_source_files array.
-proc RunLLVMTests { test_source_files } {
-  global srcroot objroot srcdir objdir subdir target_triplet
-  set timeout 60
-
-  set path [file join $objdir $subdir]
-
-  #Make Output Directory if it does not exist already
-  if { [file exists path] } {
-    cd $path
-  } else {
-    file mkdir $path
-    cd $path
-  }
-
-  file mkdir Output
-  cd Output
-
-  foreach test $test_source_files {
-    #Should figure out best way to set the timeout
-    #set timeout 40
-
-    set filename [file tail $test]
-    verbose "ABOUT TO RUN: $filename" 2
-    set outcome PASS
-    set tmpFile "$filename.tmp"
-
-    # Mark that it should not be XFAIL for this target.
-    set targetPASS 0
-
-    #set hasRunline bool to check if testcase has a runline
-    set numLines 0
-
-    # Open the test file and start reading lines
-    set testFileId [ open $test r]
-    set runline ""
-    set PRNUMS ""
-    foreach line [split [read $testFileId] \n] {
-
-      # if its the END. line then stop parsing (optimization for big files)
-      if {[regexp {END.[[:space:]]*$} $line match endofscript]} {
-        break
-
-      # if the line is continued, concatenate and continue the loop
-      } elseif {[regexp {RUN: *(.+)(\\)$} $line match oneline suffix]} {
-        set runline "$runline$oneline "
-
-      # if its a terminating RUN: line then do substitution on the whole line
-      # and then save the line.
-      } elseif {[regexp {RUN: *(.+)$} $line match oneline suffix]} {
-        set runline "$runline$oneline"
-        set runline [ substitute $runline $test $tmpFile ]
-        set lines($numLines) $runline
-        set numLines [expr $numLines + 1]
-        set runline ""
-
-      # if its an PR line, save the problem report number
-      } elseif {[regexp {PR([0-9]+)} $line match prnum]} {
-        if {$PRNUMS == ""} {
-          set PRNUMS "PR$prnum"
-        } else {
-          set PRNUMS "$PRNUMS,$prnum"
-        }
-      # if its an XFAIL line, see if we should be XFAILing or not.
-      } elseif {[regexp {XFAIL:[ *](.+)} $line match targets]} {
-        set targets
-
-        #split up target if more then 1 specified
-        foreach target [split $targets ,] {
-          if { $target == "*" } {
-              if {$targetPASS != 1} {
-                 set outcome XFAIL
-              }
-          } elseif { [regexp $target $target_triplet match] } {
-              if {$targetPASS != 1} {
-                 set outcome XFAIL
-              }
-          }
-        }
-      } elseif {[regexp {XTARGET:[ *](.+)} $line match targets]} {
-        set targets
-
-        #split up target if more then 1 specified
-        foreach target [split $targets ,] {
-          if { [regexp {\*} $target match] } {
-              set targetPASS 1
-              set outcome PASS
-          } elseif { [regexp $target $target_triplet match] } {
-              set targetPASS 1
-              set outcome PASS
-          }
-        }
-      }
-    }
-
-    # Done reading the script
-    close $testFileId
-
-
-    if { $numLines == 0 } {
-      fail "$test: \nDoes not have a RUN line\n"
-    } else {
-      set failed 0
-      for { set i 0 } { $i < $numLines } { set i [ expr $i + 1 ] } {
-        regsub ^.*RUN:(.*) $lines($i) \1 theLine
-        set resultmsg [execOneLine $test $PRNUMS $outcome $i $theLine ]
-        if { $resultmsg != "" } {
-          if { $outcome == "XFAIL" } {
-            xfail "$resultmsg"
-          } else {
-            fail "$resultmsg"
-          }
-          set failed 1
-          break
-        }
-      }
-      if { $failed } {
-        continue
-      } else {
-        if { $PRNUMS != "" } {
-          set PRNUMS " for $PRNUMS"
-        }
-        if { $outcome == "XFAIL" } {
-          xpass "$test$PRNUMS"
-        } else {
-          pass "$test$PRNUMS"
-        }
-      }
-    }
-  }
-}
-
-# This procedure provides an interface to check the TARGETS_TO_BUILD makefile
-# variable to see if a particular target has been configured to build. This
-# helps avoid running tests for targets that aren't available.
-proc llvm_supports_target { tgtName } {
-  global TARGETS_TO_BUILD
-  foreach target [split $TARGETS_TO_BUILD] {
-    if { [regexp $tgtName $target match] } {
-      return 1
-    }
-  }
-  return 0
-}
-
-proc llvm_supports_darwin_and_target { tgtName } {
-  global target_triplet
-  if { [ llvm_supports_target $tgtName ] } {
-    if { [regexp darwin $target_triplet match] } {
-      return 1
-    }
-  }
-  return 0
-}
-
-# This procedure provides an interface to check the BINDINGS_TO_BUILD makefile
-# variable to see if a particular binding has been configured to build.
-proc llvm_supports_binding { name } {
-  global llvm_bindings
-  foreach item [split $llvm_bindings] {
-    if { [regexp $name $item match] } {
-      return 1
-    }
-  }
-  return 0
-}
diff --git a/test/lit.cfg b/test/lit.cfg
index 6bc170c..acbe97a 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -141,6 +141,29 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
     if m:
         site_exp[m.group(1)] = m.group(2)
 
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
+# triple so we can check it with XFAIL and XTARGET.
+config.target_triple += lit.valgrindTriple
+
+# Process jit implementation option
+jit_impl_cfg = lit.params.get('jit_impl', None)
+if jit_impl_cfg == 'mcjit':
+  # When running with mcjit, mangle -mcjit into target triple
+  # and add -use-mcjit flag to lli invocation
+  if 'i686' in config.target_triple:
+    config.target_triple += jit_impl_cfg + '-ia32'
+  elif 'x86_64' in config.target_triple:
+    config.target_triple += jit_impl_cfg + '-ia64'
+  else:
+    config.target_triple += jit_impl_cfg
+
+  config.substitutions.append( ('%lli', 'lli -use-mcjit') )
+else:
+  config.substitutions.append( ('%lli', 'lli') )
+
 # Add substitutions.
 for sub in ['link', 'shlibext', 'ocamlopt', 'llvmshlibdir']:
     config.substitutions.append(('%' + sub, site_exp[sub]))
@@ -197,13 +220,6 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
 
 excludes = []
 
-# Provide target_triple for use in XFAIL and XTARGET.
-config.target_triple = site_exp['target_triplet']
-
-# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
-# triple so we can check it with XFAIL and XTARGET.
-config.target_triple += lit.valgrindTriple
-
 # Provide llvm_supports_target for use in local configs.
 targets = set(site_exp["TARGETS_TO_BUILD"].split())
 def llvm_supports_target(name):
@@ -237,7 +253,6 @@ def on_clone(parent, cfg, for_path):
     libPath = os.path.join(os.path.dirname(for_path),
                            'dg.exp')
     if not os.path.exists(libPath):
-        cfg.unsupported = True
         return
 
     # Reset unsupported, in case we inherited it.
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 5a42e5c..8b81186 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -7,6 +7,8 @@ config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
+config.targets_to_build = "@TARGETS_TO_BUILD@"
+config.llvm_bindings = "@LLVM_BINDINGS@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.