317 files changed, 8268 insertions, 1149 deletions
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaa..0bfe331 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca..0ae7f84 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r2, [r1]
+; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index a65cf4b..e0f50c9 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=basic
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600..1aee508 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
 
 ; CHECK: vld1.64 {d16, d17}, [r{{.}}]
 ; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18ce..da4d157 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
 ; rdar://8690640
 
 define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952b..23e1aa1 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
 ; rdar://8728956
 
 define hidden void @foo() nounwind ssp {
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281..2faa04a 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe..3e78c46 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee..216057a 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; Test that ldmia_ret preserves implicit operands for return values.
 ;
 ; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 86e8712..6fbae19 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -8,11 +8,11 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
 
 ; CHECK:      movw    r1, :lower16:{{.*}}
 ; CHECK:      movt    r1, :upper16:{{.*}}
-; CHECK:      vldmia  r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]}
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short3]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short2]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short1]]
-; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short0]]
+; CHECK:      vldmia  r1
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vstmia  {{.*}}
 
 L.entry:
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 0000000..ddb7632
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH.  In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  %tmp2 = alloca i8*, align 4
+  %tmp3 = alloca i1
+  %myException = alloca %0*, align 4
+  %tmp4 = alloca i8*
+  %tmp5 = alloca i32
+  %exception = alloca %0*, align 4
+  store i32 %a, i32* %tmp, align 4
+  store i32 %b, i32* %tmp1, align 4
+  store i8* %d, i8** %tmp2, align 4
+  store i1 false, i1* %tmp3
+  %tmp7 = load i8** %c
+  %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+          to label %bb11 unwind label %bb15
+
+bb11:                                             ; preds = %bb
+  store %0* %tmp10, %0** %myException, align 4
+  %tmp12 = load %0** %myException, align 4
+  %tmp13 = bitcast %0* %tmp12 to i8*
+  invoke void @objc_exception_throw(i8* %tmp13) noreturn
+          to label %bb14 unwind label %bb15
+
+bb14:                                             ; preds = %bb11
+  unreachable
+
+bb15:                                             ; preds = %bb11, %bb
+  %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+  store i8* %tmp17, i8** %tmp4
+  %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+  store i32 %tmp18, i32* %tmp5
+  store i1 true, i1* %tmp3
+  br label %bb56
+
+bb56:
+  unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 0000000..926daaf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb4, %bb2
+  %tmp = icmp slt i32 undef, undef
+  br i1 %tmp, label %bb4, label %bb67
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+  %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> undef, %tmp10
+  %tmp12 = bitcast <4 x float> zeroinitializer to i128
+  %tmp13 = lshr i128 %tmp12, 64
+  %tmp14 = trunc i128 %tmp13 to i64
+  %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+  %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+  %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+  %tmp18 = fmul <4 x float> %tmp17, %tmp16
+  %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+  %tmp20 = fmul <4 x float> %tmp19, %tmp18
+  %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+  %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+  call arm_aapcs_vfpcc  void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+  %tmp23 = bitcast <4 x float> %tmp22 to i128
+  %tmp24 = trunc i128 %tmp23 to i64
+  %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+  %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+  %tmp35 = fmul <4 x float> %tmp34, undef
+  %tmp36 = fmul <4 x float> %tmp35, undef
+  %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+  %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+  %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp44 = fmul <4 x float> %tmp33, %tmp43
+  %tmp45 = fadd <4 x float> %tmp42, %tmp44
+  %tmp46 = fsub <4 x float> %tmp45, undef
+  %tmp47 = fmul <4 x float> %tmp46, %tmp36
+  %tmp48 = fadd <4 x float> undef, %tmp47
+  %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+  %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+  %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+  %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+  %tmp58 = fmul <4 x float> undef, %tmp57
+  %tmp59 = fsub <4 x float> %tmp51, %tmp48
+  %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+  %tmp61 = fmul <4 x float> %tmp59, %tmp60
+  %tmp62 = fadd <4 x float> %tmp48, %tmp61
+  call arm_aapcs_vfpcc  void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+  %tmp63 = bitcast <4 x float> %tmp62 to i128
+  %tmp64 = lshr i128 %tmp63, 64
+  %tmp65 = trunc i128 %tmp64 to i64
+  %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+  call arm_aapcs_vfpcc  void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+  br label %bb3
+
+bb67:                                             ; preds = %bb3
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 0000000..872eca3
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp2 = extractelement <2 x float> %tmp, i32 0
+  %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+  %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  %tmp7 = extractelement <2 x float> %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+  %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+  %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+  %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+  %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+  %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+  %tmp18 = extractelement <2 x float> %tmp17, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+  %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+  %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+  %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+  %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+  %tmp26 = extractelement <2 x float> %tmp25, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+  ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+  br i1 undef, label %for.end, label %cond.end295
+
+cond.end295:                                      ; preds = %entry
+  %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+  %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+  %1 = bitcast <4 x float> undef to <2 x i64>
+  %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 0000000..ec5b2e9
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+  %3 = bitcast <4 x float> %2 to <2 x i64>
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+  %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+  %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+  %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+  %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+  %9 = bitcast <2 x float> %7 to <1 x i64>
+  %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+  %11 = bitcast <2 x i64> %10 to <4 x float>
+  br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 0000000..5f24e42
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br i1 undef, label %bb92, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = or <4 x i32> undef, undef
+  %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+  %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+  %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+  %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+  %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+  %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+  %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+  %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+  %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+  %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+  %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+  %tmp22 = or <2 x i32> %tmp19, %tmp21
+  %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+  %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+  %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+  %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+  %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+  %tmp30 = or <2 x i32> %tmp28, %tmp29
+  %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+  %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+  %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp34 = fadd <4 x float> %tmp33, %tmp32
+  %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+  %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+  %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+  %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp43 = fmul <4 x float> %tmp42, %tmp41
+  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp45 = fadd <4 x float> undef, %tmp43
+  %tmp46 = fadd <4 x float> undef, %tmp45
+  %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+  %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+  %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp51 = fmul <4 x float> %tmp42, %tmp50
+  %tmp52 = fmul <4 x float> %tmp44, undef
+  %tmp53 = fadd <4 x float> %tmp52, %tmp51
+  %tmp54 = fadd <4 x float> undef, %tmp53
+  %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+  %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+  %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp59 = fmul <4 x float> undef, %tmp58
+  %tmp60 = fadd <4 x float> %tmp59, undef
+  %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+  %tmp62 = load void (i8*, i8*)** undef, align 4
+  call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
+  %tmp63 = bitcast <4 x float> %tmp46 to i128
+  %tmp64 = bitcast <4 x float> %tmp54 to i128
+  %tmp65 = bitcast <4 x float> %tmp61 to i128
+  %tmp66 = lshr i128 %tmp63, 64
+  %tmp67 = trunc i128 %tmp66 to i64
+  %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+  %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+  %tmp70 = lshr i128 %tmp64, 64
+  %tmp71 = trunc i128 %tmp70 to i64
+  %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+  %tmp73 = trunc i128 %tmp65 to i64
+  %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+  %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+  %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+  %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+  call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+  %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
+  %tmp79 = bitcast i8* %tmp78 to i512*
+  %tmp80 = load i512* %tmp79, align 16
+  %tmp81 = lshr i512 %tmp80, 128
+  %tmp82 = trunc i512 %tmp80 to i128
+  %tmp83 = trunc i512 %tmp81 to i128
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>
+  %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+  %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+  %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp89 = fmul <4 x float> undef, %tmp88
+  %tmp90 = fadd <4 x float> %tmp89, undef
+  %tmp91 = fadd <4 x float> undef, %tmp90
+  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+bb92:                                             ; preds = %bb2
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 0000000..6c7aaad
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = insertelement <4 x float> undef, float %5, i32 0
+  %9 = insertelement <4 x float> %8, float %6, i32 1
+  %10 = insertelement <4 x float> %9, float %7, i32 2
+  %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+  store <4 x float> %11, <4 x float>* undef, align 16 
+  call arm_aapcs_vfpcc  void @baz(%1* undef, float 0.000000e+00) nounwind
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad..1272e8e 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a1..8967730 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -61,7 +61,7 @@ entry:
   ; CHECK: strex
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
-	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
@@ -77,5 +77,85 @@ entry:
   ; CHECK: strex
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
-	ret void
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+	store i32 %11, i32* %old
+	%uneg = sub i32 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+	store i32 %12, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+	store i32 %14, i32* %old
+
+  ret void
+}
+
+define void @func2() nounwind {
+entry:
+  %val = alloca i16
+  %old = alloca i16
+  store i16 31, i16* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i16* %val, i16 16 monotonic
+  store i16 %0, i16* %old
+  %uneg = sub i16 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+  store i16 %1, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i16* %val, i16 1 monotonic
+  store i16 %2, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i16* %val, i16 0 monotonic
+  store i16 %3, i16* %old
+  ret void
+}
+
+define void @func3() nounwind {
+entry:
+  %val = alloca i8
+  %old = alloca i8
+  store i8 31, i8* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i8* %val, i8 16 monotonic
+  store i8 %0, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %uneg = sub i8 0, 1
+  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+  store i8 %1, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i8* %val, i8 1 monotonic
+  store i8 %2, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i8* %val, i8 0 monotonic
+  store i8 %3, i8* %old
+  ret void
 }
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 877ec18..1b385ab 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -6,9 +6,9 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
 ; CHECK: t1:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
   %2 = mul nsw i32 %0, %1
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d998..be3e105 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 ; Enable tailcall optimization for iOS 5.0
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659..487ec69 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 0000000..eff5de5
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+  br i1 undef, label %while.end42, label %while.body37
+
+while.body37:                                     ; preds = %while.body37, %entry
+  br i1 false, label %while.end42, label %while.body37
+
+while.end42:                                      ; preds = %while.body37, %entry
+  %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+  %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+  tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+  %call47 = tail call i32 @strlen(i8* %.) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9dd..1d011be 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i:		; preds = %bb28.i
 	br i1 false, label %bb.nph53.i, label %bb508.i
 
 bb508.i:		; preds = %bb502.loopexit.i, %entry
-	ret i32 1
+	ret double %tmp10.i4
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
index 7f00eb3..6419292 100644
--- a/test/CodeGen/ARM/ctor_order.ll
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -6,13 +6,15 @@
 ; DARWIN:      .long _f151
 ; DARWIN-NEXT: .long _f152
 
-; ELF:      .section .ctors,"aw",%progbits
+; ELF:      .section .ctors.65384,"aw",%progbits
+; ELF:      .long    f151
+; ELF:      .section .ctors.65383,"aw",%progbits
 ; ELF:      .long    f152
-; ELF-NEXT: .long    f151
 
-; GNUEABI:      .section .init_array,"aw",%init_array
+; GNUEABI:      .section .init_array.151,"aw",%init_array
 ; GNUEABI:      .long    f151
-; GNUEABI-NEXT: .long    f152
+; GNUEABI:      .section .init_array.152,"aw",%init_array
+; GNUEABI:      .long    f152
 
 
 @llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9..a7b44e6 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
 ; Test to check argument y's debug info uses FI
 ; Radar 10048772
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 00e6cb0..0ad0a15 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -2,7 +2,7 @@
 ; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %0 = type opaque
 %1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 3972e68..ae7af0a 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp0
 ;CHECK-NEXT:        .long   Ltmp1
-;CHECK-NEXT:        .long   Ltmp2
-;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp10-Ltmp9        @ Loc expr size
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
 ;CHECK-NEXT:        .short  Lset[[N]]
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Ltmp[[M]]:
 ;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
 
 define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 0000000..fd7d0e6
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+  ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+  call void @_Z1fv()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 0000000..723383e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+  %a.addr = alloca i1, align 4
+  %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+  %a.addr = alloca i1, align 4
+  %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+  %a.addr = alloca i1, align 4
+  %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index b7acfaa..625adc2 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 0000000..a0aba69
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8:                                       ; preds = %cond.end
+  br label %cond.end8
+
+cond.false8:                                      ; preds = %cond.end
+  br label %cond.end8
+
+cond.end8:                                        ; preds = %cond.false8, %cond.true8
+  %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+  call void @fooi8(i8 %cond8)
+  br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16:                                       ; preds = %cond.end8
+  br label %cond.end16
+
+cond.false16:                                      ; preds = %cond.end8
+  br label %cond.end16
+
+cond.end16:                                        ; preds = %cond.false16, %cond.true16
+  %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+  call void @fooi16(i16 %cond16)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 695dbba..dd460b2 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 33c6008..1693066 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1a(float %a) uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 14666a8..686ccad 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test sitofp
 
@@ -94,3 +94,149 @@ entry:
   store double %conv, double* %b.addr, align 8
   ret void
 }
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 028d940..7e147c7 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
@@ -15,8 +15,7 @@ entry:
 ; THUMB-NOT: sxtb
 ; THUMB: movs r0, #0
 ; THUMB: movt r0, #0
-; THUMB: add sp, #32
-; THUMb: pop {r7, pc}
+; THUMB: pop
   ret i32 0
 }
 
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index deffe7b..8764bef 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 0000000..be8035e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 3ef8bce..e6bdfa7 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
 @temp = common global [60 x i8] zeroinitializer, align 1
 
 define void @t1() nounwind ssp {
 ; ARM: t1
-; ARM: ldr r0, LCPI0_0
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
 ; ARM: add r0, r0, #5
 ; ARM: movw r1, #64
 ; ARM: movw r2, #10
 ; ARM: uxtb r1, r1
 ; ARM: bl _memset
 ; THUMB: t1
-; THUMB: ldr.n r0, LCPI0_0
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
 ; THUMB: adds r0, #5
 ; THUMB: movs r1, #64
 ; THUMB: movt r1, #0
@@ -29,7 +31,8 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @t2() nounwind ssp {
 ; ARM: t2
-; ARM: ldr r0, LCPI1_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
@@ -39,7 +42,8 @@ define void @t2() nounwind ssp {
 ; ARM: ldr r1, [sp]                @ 4-byte Reload
 ; ARM: bl _memcpy
 ; THUMB: t2
-; THUMB: ldr.n r0, LCPI1_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
@@ -55,7 +59,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t3() nounwind ssp {
 ; ARM: t3
-; ARM: ldr r0, LCPI2_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
@@ -63,7 +68,8 @@ define void @t3() nounwind ssp {
 ; ARM: mov r0, r1
 ; ARM: bl _memmove
 ; THUMB: t3
-; THUMB: ldr.n r0, LCPI2_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
@@ -77,26 +83,24 @@ define void @t3() nounwind ssp {
 
 define void @t4() nounwind ssp {
 ; ARM: t4
-; ARM: ldr r0, LCPI3_0
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: ldr r1, LCPI3_1
-; ARM: ldr r1, [r1]
-; ARM: ldr r2, [r1, #16]
-; ARM: str r2, [r0, #4]
-; ARM: ldr r2, [r1, #20]
-; ARM: str r2, [r0, #8]
-; ARM: ldrh r1, [r1, #24]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
-; THUMB: ldr.n r0, LCPI3_0
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr.n r1, LCPI3_1
-; THUMB: ldr r1, [r1]
-; THUMB: ldr r2, [r1, #16]
-; THUMB: str r2, [r0, #4]
-; THUMB: ldr r2, [r1, #20]
-; THUMB: str r2, [r0, #8]
-; THUMB: ldrh r1, [r1, #24]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
 ; THUMB: strh r1, [r0, #12]
 ; THUMB: bx lr
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index 0b8a768..2a88678 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index dcfc9d0..e8cc2b2 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; rdar://10418009
 
 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index daf56e7..b180e43 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; rdar://10412592
 
 ; Note: The Thumb code is being generated by the target-independent selector.
@@ -104,4 +104,4 @@ entry:
 ; THUMB: movt r0, #33023
   call void @foo(i32 -2130706433)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 4203537..e50c3a4 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=basic < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index f7f4521..689b169 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
 
 ; Sign-extend of i1 currently not supported by fast-isel
 ;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
@@ -46,3 +46,12 @@ entry:
 ; CHECK: bx lr
   ret i16 %a
 }
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+  ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index 9ac63d6..b83a733 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i1 %c) nounwind readnone {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 648d711..905543a 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,21 +142,19 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
 
-; ARM: ldr r0, LCPI4_1
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
 }
 
 ; Check unaligned stores
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9..87115cc 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
@@ -60,8 +60,8 @@ entry:
 define float @test5() nounwind {
 entry:
 ; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
   %0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index ad03202..80925c7 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
 ; rdar://7461510
+; rdar://10964603
 
+; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr s0,
-; NAN: vldr s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
   ret i32 %4
 }
 
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 0000000..40e8bb2
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fadd double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fadd float %1, %f3
+  ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+  %1 = fmul double %d2, %d3
+  %2 = fsub double %d1, %1
+  ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+  %1 = fmul float %f2, %f3
+  %2 = fsub float %f1, %1
+  ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double -0.0, %1
+  %3 = fsub double %2, %d3
+  ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float -0.0, %1
+  %3 = fsub float %2, %f3
+  ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float %1, %f3
+  ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae0..893b426 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05..cd870bb 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+; CHECK: subeq r0, r1, #1
 	%tmp5 = add i32 %b, 1
 	ret i32 %tmp5
 
 cond_false:
+; CHECK: addne r0, r1, #1
 	%tmp7 = add i32 %b, -1
 	ret i32 %tmp7
 }
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bf..a5082d8 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578..eef4de0 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
 	]
 
 cond_true:
+; CHECK: addne r0
+; CHECK: bxne
 	%tmp12 = add i32 %a, 1
 	%tmp1518 = add i32 %tmp12, %b
 	ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055..95f5c97 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 2327657..a00deda 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d32322..d188fae 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index d72e9bf..a588bc3 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
 ; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
 
 ; Magic ARM pair hints works best with linearscan / fast.
 
@@ -23,3 +25,47 @@ entry:
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldr
+; GREEDY: ldr
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
+  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
+  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
+  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
+  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
+  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
+  store i32 %4, i32* %scevgep4, align 4
+  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 0000000..bdd4081
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+  %tmp2 = load i8* %call
+  %tmp3 = trunc i8 %tmp2 to i1
+  %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+  store double %cond, double* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 0000000..8068abd
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: bl log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: bl exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a96..5b4cf9d 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.partition_entry = type { i32, i32, i64, i64 }
 
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f..f566974 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,6 +6,7 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
@@ -16,3 +17,31 @@ entry:
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda022..fe0056c 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
+        ; DARWIN: movs r1, #0
+        ; DARWIN: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
         call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 0000000..677b9c2
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -verify-machineinstrs
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  %3 = load %0** %2, align 4, !tbaa !0
+  store float 0.000000e+00, float* undef, align 4
+  %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+  call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
+  %5 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  store float 1.000000e+00, float* undef, align 4
+  call arm_aapcs_vfpcc  void @func1(%0* undef, float* undef, float* undef, %2* undef)
+  store float 1.500000e+01, float* undef, align 4
+  %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
+  %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+  %8 = fadd float undef, -1.000000e+05
+  store float %8, float* undef, align 16, !tbaa !3
+  %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
+  %10 = fmul float undef, 2.000000e+05
+  %11 = fadd float %10, -1.000000e+05
+  store float %11, float* undef, align 4, !tbaa !3
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 0000000..e28b578
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section        .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section        .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28..6bb6743 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
   ret i32 %conv3
 }
 
+; rdar://10750814
 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
 entry:
 ; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
   %conv = zext i16 %v to i32
   %shr4 = lshr i32 %conv, 8
   %shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133a..3a66ec5 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
   %s = or i32 %z, %y
  ret i32 %s
 }
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq.w r2, r2, #1
+  %tmp1 = icmp eq i32 %a, %b
+  %tmp2 = zext i1 %tmp1 to i32
+  %tmp3 = or i32 %tmp2, %c
+  ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %c, i32 0
+  %tmp2 = xor i32 %tmp1, %d
+  ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+  %tmp1 = shl i32 %c, 1
+  %cond = icmp eq i32 %a, %b
+  %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+  %tmp3 = and i32 %c, %tmp2
+  ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef0..521ffa1 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -54,12 +54,12 @@ declare i8* @malloc(...)
 define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A8: str [[REG]], [r0, r1, lsl #2]
+; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A8: str [[REG]], [r0]
 
 ; A9: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A9: str [[REG]], [r0, r1, lsl #2]
+; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A9: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55c..057ea11 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 0000000..e015bf0
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+  br label %indirectgoto
+
+INCREMENT:                                        ; preds = %indirectgoto
+  %inc = add nsw i32 %result.0, 1
+  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+  br label %indirectgoto
+
+DECREMENT:                                        ; preds = %indirectgoto
+  %dec = add nsw i32 %result.0, -1
+  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+  br label %indirectgoto
+
+indirectgoto:                                     ; preds = %DECREMENT, %INCREMENT, %entry
+  %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+  %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+  %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+  %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN:                                           ; preds = %indirectgoto
+  ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 0000000..93340c3
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index be95657..0c23879 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -381,3 +381,20 @@ entry:
   store <4 x float> %b, <4 x float> *%p
   ret void
 }
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b..fb05a20 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+	%tmp1 = load <4 x i16>* %B
+	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	%t5 = getelementptr inbounds i8* %out, i32 16
+	ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+  %tmp1 = load <4 x float>* %this
+  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  %tmp2 = getelementptr inbounds i8* %out, i32  32
+  ret i8* %tmp2
+}
+
 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/CBackend/X86/dg.exp b/test/CodeGen/CBackend/X86/dg.exp
deleted file mode 100644
index 44e3a5e..0000000
--- a/test/CodeGen/CBackend/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] && [llvm_supports_target CBackend] } {
-    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/CodeGen/CBackend/X86/lit.local.cfg b/test/CodeGen/CBackend/X86/lit.local.cfg
new file mode 100644
index 0000000..037d8c3
--- /dev/null
+++ b/test/CodeGen/CBackend/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CBackend' in targets or not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CBackend/dg.exp b/test/CodeGen/CBackend/dg.exp
deleted file mode 100644
index 9d78940..0000000
--- a/test/CodeGen/CBackend/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CBackend/lit.local.cfg b/test/CodeGen/CBackend/lit.local.cfg
new file mode 100644
index 0000000..0dce170
--- /dev/null
+++ b/test/CodeGen/CBackend/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CBackend' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
new file mode 100644
index 0000000..419f594
--- /dev/null
+++ b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=cpp
+declare void @foo(<4 x i32>)
+define void @bar() {
+  call void @foo(<4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+  ret void
+}
diff --git a/test/CodeGen/CPP/dg.exp b/test/CodeGen/CPP/dg.exp
deleted file mode 100644
index 3276dcc..0000000
--- a/test/CodeGen/CPP/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CppBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
new file mode 100644
index 0000000..96596d8
--- /dev/null
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CppBackend' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/dg.exp b/test/CodeGen/CellSPU/dg.exp
deleted file mode 100644
index d416479..0000000
--- a/test/CodeGen/CellSPU/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CellSPU] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/CellSPU/lit.local.cfg
new file mode 100644
index 0000000..6ae0972
--- /dev/null
+++ b/test/CodeGen/CellSPU/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'CellSPU' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index b1219e6..9770935 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -163,7 +163,7 @@ define i8 @rotri8(i8 %A) {
 define <2 x float> @test1(<4 x float> %param )
 {
 ; CHECK: test1
-; CHECK: rotqbyi
+; CHECK: shufb
   %el = extractelement <4 x float> %param, i32 1
   %vec1 = insertelement <1 x float> undef, float %el, i32 0
   %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 943ed88..d67559e 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -30,8 +30,6 @@ UnifiedUnreachableBlock:		; preds = %entry
 
 declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn 
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
index 3cbf4c5..b483009 100644
--- a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
+++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -15,8 +15,6 @@
 %"struct.std::locale::facet" = type { i32 (...)**, i32 }
 %union..0._15 = type { i32 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
 declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
diff --git a/test/CodeGen/Generic/dg.exp b/test/CodeGen/Generic/dg.exp
deleted file mode 100644
index f200589..0000000
--- a/test/CodeGen/Generic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/CodeGen/Generic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index b882cf7..e9ac8b6 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = #7
 ; CHECK: memw(r29 + #0) = r[[T0]]
 ; CHECK: r0 = #1
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 36abd74..7219985 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/Hexagon/dg.exp b/test/CodeGen/Hexagon/dg.exp
deleted file mode 100644
index 89f45e6..0000000
--- a/test/CodeGen/Hexagon/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Hexagon] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
index 04c2ec1..c3b6f37 100644
--- a/test/CodeGen/Hexagon/double.ll
+++ b/test/CodeGen/Hexagon/double.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_adddf3
 ; CHECK: __hexagon_subdf3
 
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
index 51acf2e..bec9f58 100644
--- a/test/CodeGen/Hexagon/float.ll
+++ b/test/CodeGen/Hexagon/float.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_addsf3
 ; CHECK: __hexagon_subsf3
 
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
index c0a9fda..dc87c73 100644
--- a/test/CodeGen/Hexagon/frame.ll
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
new file mode 100644
index 0000000..ea12f68
--- /dev/null
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'Hexagon' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
index afd6fc6..d5c5ae3 100644
--- a/test/CodeGen/Hexagon/mpy.ll
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: += mpyi
 
 define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
index c251bd4..1105096 100644
--- a/test/CodeGen/Hexagon/static.ll
+++ b/test/CodeGen/Hexagon/static.ll
@@ -1,13 +1,12 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
 @val = external global i32
 
+; CHECK: CONST32(#num)
 ; CHECK: CONST32(#acc)
 ; CHECK: CONST32(#val)
-; CHECK: CONST32(#num)
 
 define void @foo() nounwind {
 entry:
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index 2c962d0..cc409db 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
 
 %struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
index 69de4f6..af099cd 100644
--- a/test/CodeGen/Hexagon/struct_args_large.ll
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
 ; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
 ; CHECK: memw(r29 + #0) = r[[T1]]
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
index 788e474..01d2041 100644
--- a/test/CodeGen/Hexagon/vaddh.ll
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -1,5 +1,4 @@
-; RUN: true
-; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/MBlaze/dg.exp b/test/CodeGen/MBlaze/dg.exp
deleted file mode 100644
index bfd5e47..0000000
--- a/test/CodeGen/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
new file mode 100644
index 0000000..e43df89
--- /dev/null
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/MSP430/dg.exp b/test/CodeGen/MSP430/dg.exp
deleted file mode 100644
index e4ea13a..0000000
--- a/test/CodeGen/MSP430/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MSP430] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
new file mode 100644
index 0000000..b9b654d
--- /dev/null
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'MSP430' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index e0c745f..8479ad2 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,20 +1,16 @@
-; DISABLED: llc < %s -march=mips -o %t
-; DISABLED: grep seh %t | count 1
-; DISABLED: grep seb %t | count 1
-; RUN: false
-; XFAIL: *
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
+; CHECK: seb
 	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
 	ret i8 %0
 }
 
 define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind {
 entry:
+; CHECK: seh
 	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
 	ret i16 %0
 }
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index f701bf1..dbde742 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
 %struct.DWstruct = type { i32, i32 }
 
@@ -13,3 +14,40 @@ entry:
   %res = add i32 %asmresult, %asmresult1
   ret i32 %res
 }
+
+@gi2 = external global i32
+@gi1 = external global i32
+@gi0 = external global i32
+@gf0 = external global float
+@gf1 = external global float
+@gd0 = external global double
+@gd1 = external global double
+
+define void @foo0() nounwind {
+entry:
+; CHECK: addu
+  %0 = load i32* @gi1, align 4
+  %1 = load i32* @gi0, align 4
+  %2 = tail call i32 asm "addu $0, $1, $2", "=r,r,r"(i32 %0, i32 %1) nounwind
+  store i32 %2, i32* @gi2, align 4
+  ret void
+}
+
+define void @foo2() nounwind {
+entry:
+; CHECK: neg.s
+  %0 = load float* @gf1, align 4
+  %1 = tail call float asm "neg.s $0, $1", "=f,f"(float %0) nounwind
+  store float %1, float* @gf0, align 4
+  ret void
+}
+
+define void @foo3() nounwind {
+entry:
+; CHECK: neg.d
+  %0 = load double* @gd1, align 8
+  %1 = tail call double asm "neg.d $0, $1", "=f,f"(double %0) nounwind
+  store double %1, double* @gd0, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 23b5349..785a416 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -15,7 +15,7 @@ entry:
 ; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
 ; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
 ; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
-; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -39,3 +39,23 @@ bb4:                                              ; preds = %entry
 bb5:                                              ; preds = %entry
   ret i32 1
 }
+
+; STATIC-O32: .align  2
+; STATIC-O32: $JTI0_0:
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; PIC-O32: .align  2
+; PIC-O32: $JTI0_0:
+; PIC-O32: .gpword
+; PIC-O32: .gpword
+; PIC-O32: .gpword 
+; PIC-O32: .gpword 
+; PIC-N64: .align  3
+; PIC-N64: $JTI0_0:
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword 
+; PIC-N64: .gpdword 
+
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
new file mode 100644
index 0000000..a8fc2cd
--- /dev/null
+++ b/test/CodeGen/Mips/bswap.ll
@@ -0,0 +1,25 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+
+define i32 @bswap32(i32 %x) nounwind readnone {
+entry:
+; MIPS32: bswap32:
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+  %or.3 = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %or.3
+}
+
+define i64 @bswap64(i64 %x) nounwind readnone {
+entry:
+; MIPS64: bswap64:
+; MIPS64: dsbh $[[R0:[0-9]+]]
+; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
+  %or.7 = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %or.7
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/Mips/dg.exp b/test/CodeGen/Mips/dg.exp
deleted file mode 100644
index adb2cac..0000000
--- a/test/CodeGen/Mips/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Mips] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index e3e336b..c3facdb 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -54,16 +54,10 @@ unreachable:                                      ; preds = %entry
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
-declare void @llvm.eh.resume(i8*, i32)
-
 declare void @__cxa_throw(i8*, i8*, i8*)
 
 declare i8* @__cxa_begin_catch(i8*)
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 950c437..e494fe2 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -5,9 +5,8 @@
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
 ; MIPS32-EL: func0:
-; MIPS32-EL: lui $[[T1:[0-9]+]], 32768
-; MIPS32-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
 ; MIPS32-EL: mfc1 $[[HI0:[0-9]+]], $f15
+; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
 ; MIPS32-EL: and $[[AND1:[0-9]+]], $[[HI0]], $[[MSK1]]
 ; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
 ; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
@@ -18,9 +17,8 @@ entry:
 ; MIPS32-EL: mtc1 $[[LO0]], $f0
 ; MIPS32-EL: mtc1 $[[OR]], $f1
 ;
-; MIPS32-EB: lui $[[T1:[0-9]+]], 32768
-; MIPS32-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
 ; MIPS32-EB: mfc1 $[[HI1:[0-9]+]], $f14
+; MIPS32-EB: lui $[[MSK1:[0-9]+]], 32768
 ; MIPS32-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
 ; MIPS32-EB: lui $[[T0:[0-9]+]], 32767
 ; MIPS32-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
@@ -46,9 +44,8 @@ declare double @copysign(double, double) nounwind readnone
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
 ; MIPS32-EL: func1:
-; MIPS32-EL: lui $[[T1:[0-9]+]], 32768
-; MIPS32-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
 ; MIPS32-EL: mfc1 $[[ARG1:[0-9]+]], $f14
+; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
 ; MIPS32-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
 ; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
 ; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
new file mode 100644
index 0000000..435b419
--- /dev/null
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2NAN
+
+define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %add1 = fadd float %add, 0.000000e+00
+  ret float %add1
+}
+
+define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %add = fadd float %sub, 0.000000e+00
+  ret float %add
+}
+
+define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmadd.s 
+; 64R2: nmadd.s 
+; 32R2NAN: madd.s 
+; 64R2NAN: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %sub = fsub float 0.000000e+00, %add
+  ret float %sub
+}
+
+define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmsub.s 
+; 64R2: nmsub.s 
+; 32R2NAN: msub.s 
+; 64R2NAN: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %sub1 = fsub float 0.000000e+00, %sub
+  ret float %sub1
+}
+
+define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: madd.d
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %add1 = fadd double %add, 0.000000e+00
+  ret double %add1
+}
+
+define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: msub.d
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %add = fadd double %sub, 0.000000e+00
+  ret double %add
+}
+
+define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmadd.d 
+; 64R2: nmadd.d 
+; 32R2NAN: madd.d 
+; 64R2NAN: madd.d 
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %sub = fsub double 0.000000e+00, %add
+  ret double %sub
+}
+
+define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmsub.d 
+; 64R2: nmsub.d 
+; 32R2NAN: msub.d 
+; 64R2NAN: msub.d 
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %sub1 = fsub double 0.000000e+00, %sub
+  ret double %sub1
+}
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
new file mode 100644
index 0000000..08bd6e7
--- /dev/null
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load float* %arrayidx1, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  store float %0, float* %arrayidx1, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load double* %arrayidx1, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  store double %0, double* %arrayidx1, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/global-pointer-reg.ll b/test/CodeGen/Mips/global-pointer-reg.ll
new file mode 100644
index 0000000..174d1f9
--- /dev/null
+++ b/test/CodeGen/Mips/global-pointer-reg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s 
+
+@g0 = external global i32
+@g1 = external global i32
+@g2 = external global i32
+
+define void @foo1() nounwind {
+entry:
+; CHECK-NOT:    .cpload
+; CHECK-NOT:    .cprestore
+; CHECK: lui    $[[R0:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu  $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp)
+; CHECK: addu   $[[GP:[0-9]+]], $[[R1]], $25
+; CHECK: lw     ${{[0-9]+}}, %call16(foo2)($[[GP]])
+
+  tail call void @foo2(i32* @g0) nounwind
+  tail call void @foo2(i32* @g1) nounwind
+  tail call void @foo2(i32* @g2) nounwind
+  ret void
+}
+
+declare void @foo2(i32*)
diff --git a/test/CodeGen/Mips/imm.ll b/test/CodeGen/Mips/imm.ll
new file mode 100644
index 0000000..eea391e
--- /dev/null
+++ b/test/CodeGen/Mips/imm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @foo0() nounwind readnone {
+entry:
+; CHECK: foo0
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+  ret i32 305419896
+}
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i32 @foo2() nounwind readnone {
+entry:
+; CHECK: foo2
+; CHECK: addiu ${{[0-9]+}}, $zero, 4660
+  ret i32 4660
+}
+
+define i32 @foo17() nounwind readnone {
+entry:
+; CHECK: foo17
+; CHECK: addiu ${{[0-9]+}}, $zero, -32204
+  ret i32 -32204
+}
+
+define i32 @foo18() nounwind readnone {
+entry:
+; CHECK: foo18
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i32 33332
+}
diff --git a/test/CodeGen/Mips/inlineasm64.ll b/test/CodeGen/Mips/inlineasm64.ll
new file mode 100644
index 0000000..dbce3c3
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm64.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+@gl2 = external global i64
+@gl1 = external global i64
+@gl0 = external global i64
+
+define void @foo1() nounwind {
+entry:
+; CHECK: foo1
+; CHECK: daddu
+  %0 = load i64* @gl1, align 8
+  %1 = load i64* @gl0, align 8
+  %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
+  store i64 %2, i64* @gl2, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 2333f07..b7c9a9c 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -7,8 +7,8 @@
 define void @f() nounwind {
 entry:
 ; CHECK:  lui $at, 65534
-; CHECK:  addu  $at, $sp, $at
-; CHECK:  addiu $sp, $at, -24
+; CHECK:  addiu $at, $at, -24
+; CHECK:  addu  $sp, $sp, $at
 ; CHECK:  .cprestore  65536
 
   %agg.tmp = alloca %struct.S1, align 1
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
new file mode 100644
index 0000000..e1cd73a
--- /dev/null
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
new file mode 100644
index 0000000..09745fb
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -0,0 +1,110 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load float* %arrayidx2, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store float %0, float* %arrayidx2, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load double* %arrayidx2, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store double %0, double* %arrayidx2, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
new file mode 100644
index 0000000..b2b67e5
--- /dev/null
+++ b/test/CodeGen/Mips/mips64countleading.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @t1(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclz
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  ret i64 %tmp1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i64 @t3(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclo 
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
+  ret i64 %tmp1
+}
+
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
new file mode 100644
index 0000000..fa81b72
--- /dev/null
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -0,0 +1,11 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+@gl = global i64 1250999896321, align 8
+
+; CHECK: 8byte
+define i64 @foo1() nounwind readonly {
+entry:
+  %0 = load i64* @gl, align 8
+  ret i64 %0
+}
+
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
index 33af0d8..02a35f8 100644
--- a/test/CodeGen/Mips/mips64ext.ll
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -3,9 +3,24 @@
 define i64 @zext64_32(i32 %a) nounwind readnone {
 entry:
 ; CHECK: addiu $[[R0:[0-9]+]], ${{[0-9]+}}, 2
-; CHECK: dsll32 $[[R1:[0-9]+]], $[[R0]], 0
-; CHECK: dsrl32  ${{[0-9]+}}, $[[R1]], 0
+; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; CHECK: dsrl ${{[0-9]+}}, $[[R1]], 32
   %add = add i32 %a, 2
   %conv = zext i32 %add to i64
   ret i64 %conv
 }
+
+define i64 @sext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+  %conv = sext i32 %a to i64
+  ret i64 %conv
+}
+
+define i64 @i64_float(float %f) nounwind readnone {
+entry:
+; CHECK: trunc.l.s 
+  %conv = fptosi float %f to i64
+  ret i64 %conv
+}
+
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
new file mode 100644
index 0000000..17716da
--- /dev/null
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -0,0 +1,7 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+define double @foo1() nounwind readnone {
+entry:
+; CHECK: dmtc1 $zero
+  ret double 0.000000e+00
+}
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
index dca656c..1fc8636 100644
--- a/test/CodeGen/Mips/mips64imm.ll
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -1,10 +1,18 @@
 ; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
 define i64 @foo3() nounwind readnone {
 entry:
 ; CHECK: foo3
 ; CHECK: lui $[[R0:[0-9]+]], 4660
-; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+; CHECK: daddiu ${{[0-9]+}}, $[[R0]], 22136
   ret i64 305419896
 }
 
@@ -25,11 +33,20 @@ entry:
 define i64 @foo9() nounwind readnone {
 entry:
 ; CHECK: foo9
-; CHECK: lui $[[R0:[0-9]+]], 4660
-; CHECK: ori $[[R1:[0-9]+]], $[[R0]], 22136
-; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 16
-; CHECK: ori $[[R3:[0-9]+]], $[[R2]], 36882
-; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 16
-; CHECK: ori ${{[0-9]+}}, $[[R4]], 13398
+; CHECK: lui $[[R0:[0-9]+]], 583
+; CHECK: daddiu $[[R1:[0-9]+]], $[[R0]], -30001
+; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 18
+; CHECK: daddiu $[[R3:[0-9]+]], $[[R2]], 18441
+; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 17
+; CHECK: daddiu ${{[0-9]+}}, $[[R4]], 13398
   ret i64 1311768467284833366
 }
+
+define i64 @foo10() nounwind readnone {
+entry:
+; CHECK: foo10
+; CHECK: lui $[[R0:[0-9]+]], 34661
+; CHECK: daddiu  ${{[0-9]+}}, $[[R0]], 17185
+  ret i64 -8690466096928522240
+}
+
diff --git a/test/CodeGen/Mips/mips64lea.ll b/test/CodeGen/Mips/mips64lea.ll
new file mode 100644
index 0000000..54d504f
--- /dev/null
+++ b/test/CodeGen/Mips/mips64lea.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define void @foo3() nounwind {
+entry:
+; CHECK: daddiu ${{[0-9]+}}, $sp
+  %a = alloca i32, align 4
+  call void @foo1(i32* %a) nounwind
+  ret void
+}
+
+declare void @foo1(i32*)
+
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
index a89d074..fd036a2 100644
--- a/test/CodeGen/Mips/mips64muldiv.ll
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -8,6 +8,14 @@ entry:
   ret i64 %mul
 }
 
+define i64 @m1(i64 %a) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mfhi
+  %div = sdiv i64 %a, 3
+  ret i64 %div
+}
+
 define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
 ; CHECK: ddivu
diff --git a/test/CodeGen/Mips/mips64shift.ll b/test/CodeGen/Mips/mips64shift.ll
index cc5e508..45d1c95 100644
--- a/test/CodeGen/Mips/mips64shift.ll
+++ b/test/CodeGen/Mips/mips64shift.ll
@@ -44,21 +44,21 @@ entry:
 
 define i64 @f6(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsll ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shl = shl i64 %a0, 40
   ret i64 %shl
 }
 
 define i64 @f7(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsra32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsra ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = ashr i64 %a0, 40
   ret i64 %shr
 }
 
 define i64 @f8(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsrl32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = lshr i64 %a0, 40
   ret i64 %shr
 }
@@ -94,7 +94,7 @@ entry:
 
 define i64 @f12(i64 %a0) nounwind readnone {
 entry:
-; CHECK: drotr32 ${{[0-9]+}}, ${{[0-9]+}}, 22
+; CHECK: drotr ${{[0-9]+}}, ${{[0-9]+}}, 54
   %shl = shl i64 %a0, 10
   %shr = lshr i64 %a0, 54
   %or = or i64 %shl, %shr
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
new file mode 100644
index 0000000..da1e036
--- /dev/null
+++ b/test/CodeGen/Mips/swzero.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+%struct.unaligned = type <{ i32 }>
+
+define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
+entry:
+; CHECK: usw $zero
+  %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
+  store i32 0, i32* %x, align 1
+  ret void
+}
+
+define void @zero_a(i32* nocapture %p) nounwind {
+entry:
+; CHECK: sw $zero
+  store i32 0, i32* %p, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index 3fa852b..a3c4768 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s \
 ; RUN:                             | FileCheck %s -check-prefix=STATIC
-
+; RUN: llc -march=mipsel -relocation-model=static < %s \
+; RUN:   -mips-fix-global-base-reg=false | FileCheck %s -check-prefix=STATICGP
 
 @t1 = thread_local global i32 0, align 4
 
@@ -39,6 +40,11 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
+; STATICGP: lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
+; STATICGP: addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
+; STATICGP: lw      ${{[0-9]+}}, %gottprel(t2)($[[GP]])
+; STATIC:   lui     $gp, %hi(__gnu_local_gp)
+; STATIC:   addiu   $gp, $gp, %lo(__gnu_local_gp)
 ; STATIC:   rdhwr   $3, $29
 ; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
 ; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
@@ -55,7 +61,7 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lui     $[[R0:[0-9]+]], %dtprel_hi(f3.i)
 ; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
-; PIC:   addiu   ${{[0-9]+}}, $[[R1]], %dtprel_lo(f3.i)
+; PIC:   lw      ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]])
 
   %0 = load i32* @f3.i, align 4
   %inc = add nsw i32 %0, 1
diff --git a/test/CodeGen/PTX/dg.exp b/test/CodeGen/PTX/dg.exp
deleted file mode 100644
index 2c304b5..0000000
--- a/test/CodeGen/PTX/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PTX] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/PTX/lit.local.cfg
new file mode 100644
index 0000000..7399089
--- /dev/null
+++ b/test/CodeGen/PTX/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'PTX' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 3c01938..974a99a 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -6,16 +6,22 @@ target triple = "powerpc-apple-darwin9.6"
 
 define void @foo() nounwind {
 entry:
+;CHECK:  mfcr r2
 ;CHECK:  lis r3, 1
+;CHECK:  rlwinm r2, r2, 8, 0, 31
 ;CHECK:  ori r3, r3, 34524
+;CHECK:  stwx r2, r1, r3
+; Make sure that the register scavenger returns the same temporary register.
 ;CHECK:  mfcr r2
-;CHECK:  rlwinm r2, r2, 8, 0, 31
+;CHECK:  lis r3, 1
+;CHECK:  rlwinm r2, r2, 12, 0, 31
+;CHECK:  ori r3, r3, 34520
 ;CHECK:  stwx r2, r1, r3
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
   call void @bar(i8* %x1) nounwind
-  call void asm sideeffect "", "~{cr2}"() nounwind
+  call void asm sideeffect "", "~{cr2},~{cr3}"() nounwind
   br label %return
 
 return:                                           ; preds = %entry
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index a2af87e..d07fea7 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -15,9 +15,9 @@ define i32* @f1() nounwind {
 
 ; PPC32-NOFP: _f1:
 ; PPC32-NOFP: 	lis r0, -1
-; PPC32-NOFP: 	addi r3, r1, 68
 ; PPC32-NOFP: 	ori r0, r0, 32704
 ; PPC32-NOFP: 	stwux r1, r1, r0
+; PPC32-NOFP: 	addi r3, r1, 68
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
@@ -25,10 +25,10 @@ define i32* @f1() nounwind {
 ; PPC32-FP: _f1:
 ; PPC32-FP:	lis r0, -1
 ; PPC32-FP:	stw r31, -4(r1)
-; PPC32-FP:	mr r31, r1
 ; PPC32-FP:	ori r0, r0, 32704
-; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	stwux r1, r1, r0
+; PPC32-FP:	mr r31, r1
+; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -36,9 +36,9 @@ define i32* @f1() nounwind {
 
 ; PPC64-NOFP: _f1:
 ; PPC64-NOFP: 	lis r0, -1
-; PPC64-NOFP: 	addi r3, r1, 116
 ; PPC64-NOFP: 	ori r0, r0, 32656
 ; PPC64-NOFP: 	stdux r1, r1, r0
+; PPC64-NOFP: 	addi r3, r1, 116
 ; PPC64-NOFP: 	ld r1, 0(r1)
 ; PPC64-NOFP: 	blr 
 
@@ -46,10 +46,10 @@ define i32* @f1() nounwind {
 ; PPC64-FP: _f1:
 ; PPC64-FP:	lis r0, -1
 ; PPC64-FP:	std r31, -8(r1)
-; PPC64-FP:	mr r31, r1
 ; PPC64-FP:	ori r0, r0, 32640
-; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	stdux r1, r1, r0
+; PPC64-FP:	mr r31, r1
+; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
new file mode 100644
index 0000000..e161cb0
--- /dev/null
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=g5 | FileCheck %s
+; CHECK: main:
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !15), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !16), !dbg !18
+  %add = add nsw i32 %argc, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{metadata !15, metadata !16}
+!15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 721153, metadata !5, metadata !"argv", metadata !6, i32 33554433, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 1, i32 14, metadata !5, null}
+!18 = metadata !{i32 1, i32 26, metadata !5, null}
+!19 = metadata !{i32 2, i32 3, metadata !20, null}
+!20 = metadata !{i32 720907, metadata !5, i32 1, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+
diff --git a/test/CodeGen/PowerPC/dg.exp b/test/CodeGen/PowerPC/dg.exp
deleted file mode 100644
index 9e50b55..0000000
--- a/test/CodeGen/PowerPC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PowerPC] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..5c7f267
--- /dev/null
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll
deleted file mode 100644
index 725c106..0000000
--- a/test/CodeGen/PowerPC/ppc32-vaarg.ll
+++ /dev/null
@@ -1,160 +0,0 @@
-; RUN: llc -O0 < %s | FileCheck %s
-;ModuleID = 'test.c'
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
-target triple = "powerpc-unknown-freebsd9.0"
-
-%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* }
-
-@var1 = common global i64 0, align 8
-@var2 = common global double 0.0, align 8
-@var3 = common global i32 0, align 4
-
-define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
- entry:
-  %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
-; CHECK: addi 5, 4, 1
-; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
-; CHECK-NEXT: cmplwi 0, 6, 0
-; CHECK-NEXT: stw 3, -4(1)
-; CHECK-NEXT: stw 5, -8(1)
-; CHECK-NEXT: stw 4, -12(1)
-; CHECK-NEXT: bne 0, .LBB0_2
-; CHECK-NEXT: # BB#1:                                 # %entry
-; CHECK-NEXT: lwz 3, -12(1)
-; CHECK-NEXT: stw 3, -8(1)
-; CHECK-NEXT: .LBB0_2:                                # %entry
-; CHECK-NEXT: lwz 3, -8(1)
-; CHECK-NEXT: addi 4, 3, 2
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: lwz 6, 4(5)
-; CHECK-NEXT: lwz 7, 8(5)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 4, 6, 4
-; CHECK-NEXT: mr 8, 6
-; CHECK-NEXT: stw 7, -16(1)
-; CHECK-NEXT: stw 4, -20(1)
-; CHECK-NEXT: stw 3, -24(1)
-; CHECK-NEXT: stw 8, -28(1)
-; CHECK-NEXT: stw 6, -32(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -36(1)
-; CHECK-NEXT: blt 0, .LBB0_4
-; CHECK-NEXT: # BB#3:                                 # %entry
-; CHECK-NEXT: lwz 3, -20(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: .LBB0_4:                                # %entry
-; CHECK-NEXT: lwz 3, -28(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: stw 3, 4(4)
-  store i64 %x, i64* @var1, align 8
-; CHECK-NEXT: lwz 3, -24(1)
-; CHECK-NEXT: slwi 5, 3, 2
-; CHECK-NEXT: lwz 6, -16(1)
-; CHECK-NEXT: add 5, 6, 5
-; CHECK-NEXT: lwz 0, -36(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 5, -40(1)
-; CHECK-NEXT: blt 0, .LBB0_6
-; CHECK-NEXT: # BB#5:                                 # %entry
-; CHECK-NEXT: lwz 3, -32(1)
-; CHECK-NEXT: stw 3, -40(1)
-; CHECK-NEXT: .LBB0_6:                                # %entry
-; CHECK-NEXT: lwz 3, -40(1)
-; CHECK-NEXT: lwz 4, 0(3)
-; CHECK-NEXT: lwz 3, 4(3)
-; CHECK-NEXT: lis 5, var1@ha
-; CHECK-NEXT: la 6, var1@l(5)
-; CHECK-NEXT: stw 3, 4(6)
-; CHECK-NEXT: stw 4, var1@l(5)
-; CHECK-NEXT: lwz 3, -4(1)
-  %y = va_arg %struct.__va_list_tag* %ap, double; From f1
-; CHECK-NEXT: lbz 4, 1(3)
-; CHECK-NEXT: lwz 5, 4(3)
-; CHECK-NEXT: lwz 6, 8(3)
-; CHECK-NEXT: addi 7, 4, 1
-; CHECK-NEXT: stb 7, 1(3)
-; CHECK-NEXT: cmpwi 0, 4, 8
-; CHECK-NEXT: addi 7, 5, 8
-; CHECK-NEXT: mr 8, 5
-; CHECK-NEXT: stw 5, -44(1)
-; CHECK-NEXT: stw 7, -48(1)
-; CHECK-NEXT: stw 4, -52(1)
-; CHECK-NEXT: stw 6, -56(1)
-; CHECK-NEXT: stw 8, -60(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -64(1)
-; CHECK-NEXT: blt 0, .LBB0_8
-; CHECK-NEXT: # BB#7:                                 # %entry
-; CHECK-NEXT: lwz 3, -48(1)
-; CHECK-NEXT: stw 3, -60(1)
-; CHECK-NEXT: .LBB0_8:                                # %entry
-; CHECK-NEXT: lwz 3, -60(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: stw 3, 4(4)
-; CHECK-NEXT: lwz 3, -52(1)
-; CHECK-NEXT: slwi 5, 3, 3
-; CHECK-NEXT: lwz 6, -56(1)
-; CHECK-NEXT: add 5, 6, 5
-; CHECK-NEXT: addi 5, 5, 32
-; CHECK-NEXT: lwz 0, -64(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 5, -68(1)
-; CHECK-NEXT: blt 0, .LBB0_10
-; CHECK-NEXT: # BB#9:                                 # %entry
-; CHECK-NEXT: lwz 3, -44(1)
-; CHECK-NEXT: stw 3, -68(1)
-; CHECK-NEXT: .LBB0_10:                               # %entry
-; CHECK-NEXT: lwz 3, -68(1)
-; CHECK-NEXT: lfd 0, 0(3)
-  store double %y, double* @var2, align 8
-; CHECK-NEXT: lis 3, var2@ha
-; CHECK-NEXT: stfd 0, var2@l(3)
-  %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
-; CHECK-NEXT: lwz 3, -4(1)
-; CHECK-NEXT: lbz 4, 0(3)
-; CHECK-NEXT: lwz 5, 4(3)
-; CHECK-NEXT: lwz 6, 8(3)
-; CHECK-NEXT: addi 7, 4, 1
-; CHECK-NEXT: stb 7, 0(3)
-; CHECK-NEXT: cmpwi 0, 4, 8
-; CHECK-NEXT: addi 7, 5, 4
-; CHECK-NEXT: mr 8, 5
-; CHECK-NEXT: stw 4, -72(1)
-; CHECK-NEXT: stw 6, -76(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -80(1)
-; CHECK-NEXT: stw 5, -84(1)
-; CHECK-NEXT: stw 8, -88(1)
-; CHECK-NEXT: stw 7, -92(1)
-; CHECK-NEXT: blt 0, .LBB0_12
-; CHECK-NEXT: # BB#11:                                # %entry
-; CHECK-NEXT: lwz 3, -92(1)
-; CHECK-NEXT: stw 3, -88(1)
-; CHECK-NEXT: .LBB0_12:                               # %entry
-; CHECK-NEXT: lwz 3, -88(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: stw 3, 4(4)
-; CHECK-NEXT: lwz 3, -72(1)
-; CHECK-NEXT: slwi 5, 3, 2
-; CHECK-NEXT: lwz 6, -76(1)
-; CHECK-NEXT: add 5, 6, 5
-; CHECK-NEXT: lwz 0, -80(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 5, -96(1)
-; CHECK-NEXT: blt 0, .LBB0_14
-; CHECK-NEXT: # BB#13:                                # %entry
-; CHECK-NEXT: lwz 3, -84(1)
-; CHECK-NEXT: stw 3, -96(1)
-; CHECK-NEXT: .LBB0_14:                               # %entry
-; CHECK-NEXT: lwz 3, -96(1)
-; CHECK-NEXT: lwz 3, 0(3)
-  store i32 %z, i32* @var3, align 4
-; CHECK-NEXT: lis 4, var3@ha
-; CHECK-NEXT: stw 3, var3@l(4)
-; CHECK-NEXT: lwz 3, -4(1)
-  ret void
-; CHECK-NEXT: stw 3, -100(1)
-; CHECK-NEXT: blr 
-}
-
diff --git a/test/CodeGen/PowerPC/ppc64-ind-call.ll b/test/CodeGen/PowerPC/ppc64-ind-call.ll
new file mode 100644
index 0000000..d5c4d46
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-ind-call.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+  %call.i75 = call zeroext i8 undef(i8* undef, i8 zeroext 10)
+  unreachable
+}
+
+; CHECK: @test1
+; CHECK: ld 11, 0(3)
+; CHECK: ld 2, 8(3)
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+
diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
new file mode 100644
index 0000000..e5aa1f1
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+
+; CHECK:      .section	.opd,"aw",@progbits
+; CHECK-NEXT: test1:
+; CHECK-NEXT:	.align 3
+; CHECK-NEXT:	.quad .L.test1
+; CHECK-NEXT:	.quad .TOC.@tocbase
+; CHECK-NEXT:	.text
+; CHECK-NEXT: .L.test1:
+
+define i32 @test1(i32 %a) nounwind {
+entry:
+  ret i32 %a
+}
+
+; Until recently, binutils accepted the .size directive as:
+;  .size	test1, .Ltmp0-test1
+; however, using this directive with recent binutils will result in the error:
+;  .size expression for XXX does not evaluate to a constant
+; so we must use the label which actually tags the start of the function.
+; CHECK: .size	test1, .Ltmp0-.L.test1
diff --git a/test/CodeGen/SPARC/dg.exp b/test/CodeGen/SPARC/dg.exp
deleted file mode 100644
index 6c0a997..0000000
--- a/test/CodeGen/SPARC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Sparc] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
new file mode 100644
index 0000000..ba81a16
--- /dev/null
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'Sparc' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb/dg.exp b/test/CodeGen/Thumb/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index fbacaba..f8c438c 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
 
 define void @test1() {
 ; CHECK: test1:
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 4e1394f..4616dcf 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp2,+thumb2 | FileCheck %s
 ; rdar://7076238
 
 @"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index d2140a1..5cb266b 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -1,5 +1,5 @@
 ; rdar://8465407
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 %struct.buf = type opaque
 
diff --git a/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
new file mode 100644
index 0000000..dadbdc5
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 
+
+%struct.LIST_NODE.0.16 = type { %struct.LIST_NODE.0.16*, i8* }
+
+define %struct.LIST_NODE.0.16* @list_AssocListPair(%struct.LIST_NODE.0.16* %List, i8* %Key) nounwind readonly {
+entry:
+  br label %bb3
+
+bb:                                               ; preds = %bb3
+  %Scan.0.idx7.val = load i8** undef, align 4
+  %.idx = getelementptr i8* %Scan.0.idx7.val, i32 4
+  %0 = bitcast i8* %.idx to i8**
+  %.idx.val = load i8** %0, align 4
+  %1 = icmp eq i8* %.idx.val, %Key
+  br i1 %1, label %bb5, label %bb2
+
+bb2:                                              ; preds = %bb
+  %Scan.0.idx8.val = load %struct.LIST_NODE.0.16** undef, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %entry
+  %Scan.0 = phi %struct.LIST_NODE.0.16* [ %List, %entry ], [ %Scan.0.idx8.val, %bb2 ]
+  %2 = icmp eq %struct.LIST_NODE.0.16* %Scan.0, null
+  br i1 %2, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %bb
+  ret %struct.LIST_NODE.0.16* null
+}
diff --git a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
new file mode 100644
index 0000000..4acdd9e
--- /dev/null
+++ b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; rdar://10676853
+
+%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* }
+%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* }
+%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
+%union.anon = type { %struct.E_list_struct* }
+%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
+
+@lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
+; CHECK: rdictionary_lookup:
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then10, %entry
+  %dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ]
+  %cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null
+  br i1 %cmp, label %if.end11, label %if.end
+
+if.end:                                           ; preds = %tailrecurse
+  %string = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
+  %0 = load i8** %string, align 4
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.body.i, %if.end
+  %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
+  %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
+  %2 = load i8* %1, align 1
+  %cmp.i = icmp eq i8 %2, 0
+  %.pre.i = load i8* %storemerge.i, align 1
+  br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
+
+land.end.i:                                       ; preds = %while.cond.i
+  %cmp4.i = icmp eq i8 %2, %.pre.i
+  br i1 %cmp4.i, label %while.body.i, label %while.end.i
+
+while.body.i:                                     ; preds = %land.end.i
+  %incdec.ptr.i = getelementptr inbounds i8* %1, i32 1
+  %incdec.ptr6.i = getelementptr inbounds i8* %storemerge.i, i32 1
+  br label %while.cond.i
+
+while.end.i:                                      ; preds = %land.end.i
+  %cmp8.i = icmp eq i8 %2, 42
+  br i1 %cmp8.i, label %if.end3, label %lor.lhs.false.i
+
+lor.lhs.false.i:                                  ; preds = %while.end.i, %while.cond.i
+  %3 = phi i8 [ %2, %while.end.i ], [ 0, %while.cond.i ]
+  %cmp11.i = icmp eq i8 %.pre.i, 42
+  br i1 %cmp11.i, label %if.end3, label %dict_match.exit
+
+dict_match.exit:                                  ; preds = %lor.lhs.false.i
+  %cmp14.i = icmp eq i8 %3, 46
+  %conv16.i = sext i8 %3 to i32
+  %.conv16.i = select i1 %cmp14.i, i32 0, i32 %conv16.i
+  %cmp18.i = icmp eq i8 %.pre.i, 46
+  %conv22.i = sext i8 %.pre.i to i32
+  %cond24.i = select i1 %cmp18.i, i32 0, i32 %conv22.i
+  %sub.i = sub nsw i32 %.conv16.i, %cond24.i
+  %cmp1 = icmp sgt i32 %sub.i, -1
+  br i1 %cmp1, label %if.end3, label %if.then10
+
+if.end3:                                          ; preds = %dict_match.exit, %lor.lhs.false.i, %while.end.i
+; CHECK: %if.end3
+; CHECK: cmp
+; CHECK-NOT: cbnz
+  %storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
+  %right = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
+  %4 = load %struct.Dict_node_struct** %right, align 4
+  tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
+  %cmp4 = icmp eq i32 %storemerge1.i3, 0
+  br i1 %cmp4, label %if.then5, label %if.end8
+
+if.then5:                                         ; preds = %if.end3
+  %call6 = tail call fastcc i8* @xalloc(i32 20)
+  %5 = bitcast i8* %call6 to %struct.Dict_node_struct*
+  %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
+  %7 = load %struct.Dict_node_struct** @lookup_list, align 4
+  %right7 = getelementptr inbounds i8* %call6, i32 16
+  %8 = bitcast i8* %right7 to %struct.Dict_node_struct**
+  store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
+  store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
+  br label %if.then10
+
+if.end8:                                          ; preds = %if.end3
+  %cmp9 = icmp slt i32 %storemerge1.i3, 1
+  br i1 %cmp9, label %if.then10, label %if.end11
+
+if.then10:                                        ; preds = %if.end8, %if.then5, %dict_match.exit
+  %left = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
+  %9 = load %struct.Dict_node_struct** %left, align 4
+  br label %tailrecurse
+
+if.end11:                                         ; preds = %if.end8, %tailrecurse
+  ret void
+}
+
+; Materializable
+declare hidden fastcc i8* @xalloc(i32) nounwind ssp
diff --git a/test/CodeGen/Thumb2/aligned-constants.ll b/test/CodeGen/Thumb2/aligned-constants.ll
new file mode 100644
index 0000000..16b3a19
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-constants.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; The double in the constant pool is 8-byte aligned, forcing the function
+; alignment.
+; CHECK: .align 3
+; CHECK: func
+;
+; Constant pool with 8-byte entry before 4-byte entry:
+; CHECK: .align 3
+; CHECK: LCPI
+; CHECK:	.long	2370821947
+; CHECK:	.long	1080815255
+; CHECK: LCPI
+; CHECK:	.long	1123477881
+define void @func(float* nocapture %x, double* nocapture %y) nounwind ssp {
+entry:
+  %0 = load float* %x, align 4
+  %add = fadd float %0, 0x405EDD2F20000000
+  store float %add, float* %x, align 4
+  %1 = load double* %y, align 4
+  %add1 = fadd double %1, 2.234560e+02
+  store double %add1, double* %y, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
new file mode 100644
index 0000000..c98ca80
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; CHECK: f
+; This function is forced to spill a double.
+; Verify that the spill slot is properly aligned.
+;
+; The caller-saved r4 is used as a scratch register for stack realignment.
+; CHECK: push {r4, r7, lr}
+; CHECK: bic r4, r4, #7
+; CHECK: mov sp, r4
+define void @f(double* nocapture %p) nounwind ssp {
+entry:
+  %0 = load double* %p, align 4
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  tail call void @g() nounwind
+  store double %0, double* %p, align 4
+  ret void
+}
+
+; NEON: f
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #64
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; Stack pointer adjustment for the stack frame contents.
+; This could legally happen before the spills.
+; Since the spill slot is only 8 bytes, technically it would be fine to only
+; subtract #8 here. That would leave sp less aligned than some stack slots,
+; and would probably blow MFI's mind.
+; NEON: sub sp, #16
+; The epilog is free to use another scratch register than r4.
+; NEON: add r[[R4:[0-9]+]], sp, #16
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+declare void @g()
+
+; Spill 7 d-registers.
+define void @f7(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind
+  ret void
+}
+
+; NEON: f7
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #56
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vstr d14, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9, d10, d11},
+; NEON: vld1.64 {d12, d13},
+; NEON: vldr d14,
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+; Spill 7 d-registers, leave a hole.
+define void @f3plus4(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  ret void
+}
+
+; Aligned spilling only works for contiguous ranges starting from d8.
+; The rest goes to the standard vpush instructions.
+; NEON: f3plus4
+; NEON: push {r4, r7, lr}
+; NEON: vpush {d12, d13, d14, d15}
+; NEON: sub.w r4, sp, #24
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vstr d10, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9},
+; NEON: vldr d10, [{{.*}}, #16]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: vpop {d12, d13, d14, d15}
+; NEON: pop
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
new file mode 100644
index 0000000..19d2385
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -0,0 +1,1400 @@
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This function comes from the Bullet test.  It is quite big, and exercises the
+; constant island pass a bit.  It has caused failures, including
+; <rdar://problem/10670199>
+;
+; It is unlikely that this code will continue to create the exact conditions
+; that broke the arm constant island pass in the past, but it is still useful to
+; force the pass to split basic blocks etc.
+;
+; The run lines above force the integrated assembler to be enabled so it can
+; catch any illegal displacements.  Other than that, we depend on the constant
+; island pass assertions.
+
+%class.btVector3 = type { [4 x float] }
+%class.btTransform = type { %class.btMatrix3x3, %class.btVector3 }
+%class.btMatrix3x3 = type { [3 x %class.btVector3] }
+%class.btCapsuleShape = type { %class.btConvexInternalShape, i32 }
+%class.btConvexInternalShape = type { %class.btConvexShape, %class.btVector3, %class.btVector3, float, float }
+%class.btConvexShape = type { %class.btCollisionShape }
+%class.btCollisionShape = type { i32 (...)**, i32, i8* }
+%class.RagDoll = type { i32 (...)**, %class.btDynamicsWorld*, [11 x %class.btCollisionShape*], [11 x %class.btRigidBody*], [10 x %class.btTypedConstraint*] }
+%class.btDynamicsWorld = type { %class.btCollisionWorld, void (%class.btDynamicsWorld*, float)*, void (%class.btDynamicsWorld*, float)*, i8*, %struct.btContactSolverInfo }
+%class.btCollisionWorld = type { i32 (...)**, %class.btAlignedObjectArray, %class.btDispatcher*, %struct.btDispatcherInfo, %class.btStackAlloc*, %class.btBroadphaseInterface*, %class.btIDebugDraw*, i8 }
+%class.btAlignedObjectArray = type { %class.btAlignedAllocator, i32, i32, %class.btCollisionObject**, i8 }
+%class.btAlignedAllocator = type { i8 }
+%class.btCollisionObject = type { i32 (...)**, %class.btTransform, %class.btTransform, %class.btVector3, %class.btVector3, %class.btVector3, i8, float, %struct.btBroadphaseProxy*, %class.btCollisionShape*, %class.btCollisionShape*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
+%struct.btBroadphaseProxy = type { i8*, i16, i16, i8*, i32, %class.btVector3, %class.btVector3 }
+%class.btDispatcher = type { i32 (...)** }
+%struct.btDispatcherInfo = type { float, i32, i32, float, i8, %class.btIDebugDraw*, i8, i8, i8, float, i8, float, %class.btStackAlloc* }
+%class.btIDebugDraw = type { i32 (...)** }
+%class.btStackAlloc = type opaque
+%class.btBroadphaseInterface = type { i32 (...)** }
+%struct.btContactSolverInfo = type { %struct.btContactSolverInfoData }
+%struct.btContactSolverInfoData = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 }
+%class.btRigidBody = type { %class.btCollisionObject, %class.btMatrix3x3, %class.btVector3, %class.btVector3, float, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float, float, i8, float, float, float, float, float, float, %class.btMotionState*, %class.btAlignedObjectArray.22, i32, i32, i32 }
+%class.btMotionState = type { i32 (...)** }
+%class.btAlignedObjectArray.22 = type { %class.btAlignedAllocator.23, i32, i32, %class.btTypedConstraint**, i8 }
+%class.btAlignedAllocator.23 = type { i8 }
+%class.btTypedConstraint = type { i32 (...)**, %struct.btTypedObject, i32, i32, i8, %class.btRigidBody*, %class.btRigidBody*, float, float, %class.btVector3, %class.btVector3, %class.btVector3 }
+%struct.btTypedObject = type { i32 }
+%class.btHingeConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, float, float, float, float, float, i8, i8, i8, i8, i8, float }
+%class.btJacobianEntry = type { %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float }
+%class.btConeTwistConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, %class.btVector3, %class.btVector3, float, float, float, float, float, float, float, float, i8, i8, i8, i8, float, float, %class.btVector3, i8, i8, %class.btQuaternion, float, %class.btVector3 }
+%class.btQuaternion = type { %class.btQuadWord }
+%class.btQuadWord = type { [4 x float] }
+
+@_ZTV7RagDoll = external unnamed_addr constant [4 x i8*]
+
+declare noalias i8* @_Znwm(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3*, float*, float*, float*) unnamed_addr inlinehint ssp align 2
+
+declare void @_ZSt9terminatev()
+
+declare %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform*) unnamed_addr ssp align 2
+
+declare void @_ZN11btTransform11setIdentityEv(%class.btTransform*) ssp align 2
+
+declare void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform*, %class.btVector3*) nounwind inlinehint ssp align 2
+
+declare i8* @_ZN13btConvexShapenwEm(i32) inlinehint ssp align 2
+
+declare void @_ZN13btConvexShapedlEPv(i8*) inlinehint ssp align 2
+
+declare %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape*, float, float)
+
+declare %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform*) nounwind inlinehint ssp align 2
+
+define %class.RagDoll* @_ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f(%class.RagDoll* %this, %class.btDynamicsWorld* %ownerWorld, %class.btVector3* %positionOffset, float %scale) unnamed_addr ssp align 2 {
+entry:
+  %retval = alloca %class.RagDoll*, align 4
+  %this.addr = alloca %class.RagDoll*, align 4
+  %ownerWorld.addr = alloca %class.btDynamicsWorld*, align 4
+  %positionOffset.addr = alloca %class.btVector3*, align 4
+  %scale.addr = alloca float, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %offset = alloca %class.btTransform, align 4
+  %transform = alloca %class.btTransform, align 4
+  %ref.tmp = alloca %class.btVector3, align 4
+  %ref.tmp97 = alloca %class.btVector3, align 4
+  %ref.tmp98 = alloca float, align 4
+  %ref.tmp99 = alloca float, align 4
+  %ref.tmp100 = alloca float, align 4
+  %ref.tmp102 = alloca %class.btTransform, align 4
+  %ref.tmp107 = alloca %class.btVector3, align 4
+  %ref.tmp108 = alloca %class.btVector3, align 4
+  %ref.tmp109 = alloca float, align 4
+  %ref.tmp110 = alloca float, align 4
+  %ref.tmp111 = alloca float, align 4
+  %ref.tmp113 = alloca %class.btTransform, align 4
+  %ref.tmp119 = alloca %class.btVector3, align 4
+  %ref.tmp120 = alloca %class.btVector3, align 4
+  %ref.tmp121 = alloca float, align 4
+  %ref.tmp122 = alloca float, align 4
+  %ref.tmp123 = alloca float, align 4
+  %ref.tmp125 = alloca %class.btTransform, align 4
+  %ref.tmp131 = alloca %class.btVector3, align 4
+  %ref.tmp132 = alloca %class.btVector3, align 4
+  %ref.tmp133 = alloca float, align 4
+  %ref.tmp134 = alloca float, align 4
+  %ref.tmp135 = alloca float, align 4
+  %ref.tmp137 = alloca %class.btTransform, align 4
+  %ref.tmp143 = alloca %class.btVector3, align 4
+  %ref.tmp144 = alloca %class.btVector3, align 4
+  %ref.tmp145 = alloca float, align 4
+  %ref.tmp146 = alloca float, align 4
+  %ref.tmp147 = alloca float, align 4
+  %ref.tmp149 = alloca %class.btTransform, align 4
+  %ref.tmp155 = alloca %class.btVector3, align 4
+  %ref.tmp156 = alloca %class.btVector3, align 4
+  %ref.tmp157 = alloca float, align 4
+  %ref.tmp158 = alloca float, align 4
+  %ref.tmp159 = alloca float, align 4
+  %ref.tmp161 = alloca %class.btTransform, align 4
+  %ref.tmp167 = alloca %class.btVector3, align 4
+  %ref.tmp168 = alloca %class.btVector3, align 4
+  %ref.tmp169 = alloca float, align 4
+  %ref.tmp170 = alloca float, align 4
+  %ref.tmp171 = alloca float, align 4
+  %ref.tmp173 = alloca %class.btTransform, align 4
+  %ref.tmp179 = alloca %class.btVector3, align 4
+  %ref.tmp180 = alloca %class.btVector3, align 4
+  %ref.tmp181 = alloca float, align 4
+  %ref.tmp182 = alloca float, align 4
+  %ref.tmp183 = alloca float, align 4
+  %ref.tmp186 = alloca %class.btTransform, align 4
+  %ref.tmp192 = alloca %class.btVector3, align 4
+  %ref.tmp193 = alloca %class.btVector3, align 4
+  %ref.tmp194 = alloca float, align 4
+  %ref.tmp195 = alloca float, align 4
+  %ref.tmp196 = alloca float, align 4
+  %ref.tmp199 = alloca %class.btTransform, align 4
+  %ref.tmp205 = alloca %class.btVector3, align 4
+  %ref.tmp206 = alloca %class.btVector3, align 4
+  %ref.tmp207 = alloca float, align 4
+  %ref.tmp208 = alloca float, align 4
+  %ref.tmp209 = alloca float, align 4
+  %ref.tmp212 = alloca %class.btTransform, align 4
+  %ref.tmp218 = alloca %class.btVector3, align 4
+  %ref.tmp219 = alloca %class.btVector3, align 4
+  %ref.tmp220 = alloca float, align 4
+  %ref.tmp221 = alloca float, align 4
+  %ref.tmp222 = alloca float, align 4
+  %ref.tmp225 = alloca %class.btTransform, align 4
+  %i = alloca i32, align 4
+  %hingeC = alloca %class.btHingeConstraint*, align 4
+  %coneC = alloca %class.btConeTwistConstraint*, align 4
+  %localA = alloca %class.btTransform, align 4
+  %localB = alloca %class.btTransform, align 4
+  %ref.tmp240 = alloca %class.btVector3, align 4
+  %ref.tmp241 = alloca %class.btVector3, align 4
+  %ref.tmp242 = alloca float, align 4
+  %ref.tmp243 = alloca float, align 4
+  %ref.tmp244 = alloca float, align 4
+  %ref.tmp247 = alloca %class.btVector3, align 4
+  %ref.tmp248 = alloca %class.btVector3, align 4
+  %ref.tmp249 = alloca float, align 4
+  %ref.tmp250 = alloca float, align 4
+  %ref.tmp251 = alloca float, align 4
+  %ref.tmp266 = alloca %class.btVector3, align 4
+  %ref.tmp267 = alloca %class.btVector3, align 4
+  %ref.tmp268 = alloca float, align 4
+  %ref.tmp269 = alloca float, align 4
+  %ref.tmp270 = alloca float, align 4
+  %ref.tmp273 = alloca %class.btVector3, align 4
+  %ref.tmp274 = alloca %class.btVector3, align 4
+  %ref.tmp275 = alloca float, align 4
+  %ref.tmp276 = alloca float, align 4
+  %ref.tmp277 = alloca float, align 4
+  %ref.tmp295 = alloca %class.btVector3, align 4
+  %ref.tmp296 = alloca %class.btVector3, align 4
+  %ref.tmp297 = alloca float, align 4
+  %ref.tmp298 = alloca float, align 4
+  %ref.tmp299 = alloca float, align 4
+  %ref.tmp302 = alloca %class.btVector3, align 4
+  %ref.tmp303 = alloca %class.btVector3, align 4
+  %ref.tmp304 = alloca float, align 4
+  %ref.tmp305 = alloca float, align 4
+  %ref.tmp306 = alloca float, align 4
+  %ref.tmp324 = alloca %class.btVector3, align 4
+  %ref.tmp325 = alloca %class.btVector3, align 4
+  %ref.tmp326 = alloca float, align 4
+  %ref.tmp327 = alloca float, align 4
+  %ref.tmp328 = alloca float, align 4
+  %ref.tmp331 = alloca %class.btVector3, align 4
+  %ref.tmp332 = alloca %class.btVector3, align 4
+  %ref.tmp333 = alloca float, align 4
+  %ref.tmp334 = alloca float, align 4
+  %ref.tmp335 = alloca float, align 4
+  %ref.tmp353 = alloca %class.btVector3, align 4
+  %ref.tmp354 = alloca %class.btVector3, align 4
+  %ref.tmp355 = alloca float, align 4
+  %ref.tmp356 = alloca float, align 4
+  %ref.tmp357 = alloca float, align 4
+  %ref.tmp360 = alloca %class.btVector3, align 4
+  %ref.tmp361 = alloca %class.btVector3, align 4
+  %ref.tmp362 = alloca float, align 4
+  %ref.tmp363 = alloca float, align 4
+  %ref.tmp364 = alloca float, align 4
+  %ref.tmp382 = alloca %class.btVector3, align 4
+  %ref.tmp383 = alloca %class.btVector3, align 4
+  %ref.tmp384 = alloca float, align 4
+  %ref.tmp385 = alloca float, align 4
+  %ref.tmp386 = alloca float, align 4
+  %ref.tmp389 = alloca %class.btVector3, align 4
+  %ref.tmp390 = alloca %class.btVector3, align 4
+  %ref.tmp391 = alloca float, align 4
+  %ref.tmp392 = alloca float, align 4
+  %ref.tmp393 = alloca float, align 4
+  %ref.tmp411 = alloca %class.btVector3, align 4
+  %ref.tmp412 = alloca %class.btVector3, align 4
+  %ref.tmp413 = alloca float, align 4
+  %ref.tmp414 = alloca float, align 4
+  %ref.tmp415 = alloca float, align 4
+  %ref.tmp418 = alloca %class.btVector3, align 4
+  %ref.tmp419 = alloca %class.btVector3, align 4
+  %ref.tmp420 = alloca float, align 4
+  %ref.tmp421 = alloca float, align 4
+  %ref.tmp422 = alloca float, align 4
+  %ref.tmp440 = alloca %class.btVector3, align 4
+  %ref.tmp441 = alloca %class.btVector3, align 4
+  %ref.tmp442 = alloca float, align 4
+  %ref.tmp443 = alloca float, align 4
+  %ref.tmp444 = alloca float, align 4
+  %ref.tmp447 = alloca %class.btVector3, align 4
+  %ref.tmp448 = alloca %class.btVector3, align 4
+  %ref.tmp449 = alloca float, align 4
+  %ref.tmp450 = alloca float, align 4
+  %ref.tmp451 = alloca float, align 4
+  %ref.tmp469 = alloca %class.btVector3, align 4
+  %ref.tmp470 = alloca %class.btVector3, align 4
+  %ref.tmp471 = alloca float, align 4
+  %ref.tmp472 = alloca float, align 4
+  %ref.tmp473 = alloca float, align 4
+  %ref.tmp476 = alloca %class.btVector3, align 4
+  %ref.tmp477 = alloca %class.btVector3, align 4
+  %ref.tmp478 = alloca float, align 4
+  %ref.tmp479 = alloca float, align 4
+  %ref.tmp480 = alloca float, align 4
+  %ref.tmp498 = alloca %class.btVector3, align 4
+  %ref.tmp499 = alloca %class.btVector3, align 4
+  %ref.tmp500 = alloca float, align 4
+  %ref.tmp501 = alloca float, align 4
+  %ref.tmp502 = alloca float, align 4
+  %ref.tmp505 = alloca %class.btVector3, align 4
+  %ref.tmp506 = alloca %class.btVector3, align 4
+  %ref.tmp507 = alloca float, align 4
+  %ref.tmp508 = alloca float, align 4
+  %ref.tmp509 = alloca float, align 4
+  store %class.RagDoll* %this, %class.RagDoll** %this.addr, align 4
+  store %class.btDynamicsWorld* %ownerWorld, %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btVector3* %positionOffset, %class.btVector3** %positionOffset.addr, align 4
+  store float %scale, float* %scale.addr, align 4
+  %this1 = load %class.RagDoll** %this.addr
+  store %class.RagDoll* %this1, %class.RagDoll** %retval
+  %0 = bitcast %class.RagDoll* %this1 to i8***
+  store i8** getelementptr inbounds ([4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
+  %m_ownerWorld = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %1 = load %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btDynamicsWorld* %1, %class.btDynamicsWorld** %m_ownerWorld, align 4
+  %call = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %2 = bitcast i8* %call to %class.btCapsuleShape*
+  %3 = load float* %scale.addr, align 4
+  %mul = fmul float 0x3FC3333340000000, %3
+  %4 = load float* %scale.addr, align 4
+  %mul2 = fmul float 0x3FC99999A0000000, %4
+  %call3 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %2, float %mul, float %mul2)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %5 = bitcast %class.btCapsuleShape* %2 to %class.btCollisionShape*
+  %m_shapes = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes, i32 0, i32 0
+  store %class.btCollisionShape* %5, %class.btCollisionShape** %arrayidx, align 4
+  %call5 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %6 = bitcast i8* %call5 to %class.btCapsuleShape*
+  %7 = load float* %scale.addr, align 4
+  %mul6 = fmul float 0x3FC3333340000000, %7
+  %8 = load float* %scale.addr, align 4
+  %mul7 = fmul float 0x3FD1EB8520000000, %8
+  %call10 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %6, float %mul6, float %mul7)
+          to label %invoke.cont9 unwind label %lpad8
+
+invoke.cont9:                                     ; preds = %invoke.cont
+  %9 = bitcast %class.btCapsuleShape* %6 to %class.btCollisionShape*
+  %m_shapes12 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx13 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes12, i32 0, i32 1
+  store %class.btCollisionShape* %9, %class.btCollisionShape** %arrayidx13, align 4
+  %call14 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %10 = bitcast i8* %call14 to %class.btCapsuleShape*
+  %11 = load float* %scale.addr, align 4
+  %mul15 = fmul float 0x3FB99999A0000000, %11
+  %12 = load float* %scale.addr, align 4
+  %mul16 = fmul float 0x3FA99999A0000000, %12
+  %call19 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %10, float %mul15, float %mul16)
+          to label %invoke.cont18 unwind label %lpad17
+
+invoke.cont18:                                    ; preds = %invoke.cont9
+  %13 = bitcast %class.btCapsuleShape* %10 to %class.btCollisionShape*
+  %m_shapes21 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx22 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes21, i32 0, i32 2
+  store %class.btCollisionShape* %13, %class.btCollisionShape** %arrayidx22, align 4
+  %call23 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %14 = bitcast i8* %call23 to %class.btCapsuleShape*
+  %15 = load float* %scale.addr, align 4
+  %mul24 = fmul float 0x3FB1EB8520000000, %15
+  %16 = load float* %scale.addr, align 4
+  %mul25 = fmul float 0x3FDCCCCCC0000000, %16
+  %call28 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %14, float %mul24, float %mul25)
+          to label %invoke.cont27 unwind label %lpad26
+
+invoke.cont27:                                    ; preds = %invoke.cont18
+  %17 = bitcast %class.btCapsuleShape* %14 to %class.btCollisionShape*
+  %m_shapes30 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx31 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes30, i32 0, i32 3
+  store %class.btCollisionShape* %17, %class.btCollisionShape** %arrayidx31, align 4
+  %call32 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %18 = bitcast i8* %call32 to %class.btCapsuleShape*
+  %19 = load float* %scale.addr, align 4
+  %mul33 = fmul float 0x3FA99999A0000000, %19
+  %20 = load float* %scale.addr, align 4
+  %mul34 = fmul float 0x3FD7AE1480000000, %20
+  %call37 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %18, float %mul33, float %mul34)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont27
+  %21 = bitcast %class.btCapsuleShape* %18 to %class.btCollisionShape*
+  %m_shapes39 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx40 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes39, i32 0, i32 4
+  store %class.btCollisionShape* %21, %class.btCollisionShape** %arrayidx40, align 4
+  %call41 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %22 = bitcast i8* %call41 to %class.btCapsuleShape*
+  %23 = load float* %scale.addr, align 4
+  %mul42 = fmul float 0x3FB1EB8520000000, %23
+  %24 = load float* %scale.addr, align 4
+  %mul43 = fmul float 0x3FDCCCCCC0000000, %24
+  %call46 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %22, float %mul42, float %mul43)
+          to label %invoke.cont45 unwind label %lpad44
+
+invoke.cont45:                                    ; preds = %invoke.cont36
+  %25 = bitcast %class.btCapsuleShape* %22 to %class.btCollisionShape*
+  %m_shapes48 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx49 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes48, i32 0, i32 5
+  store %class.btCollisionShape* %25, %class.btCollisionShape** %arrayidx49, align 4
+  %call50 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %26 = bitcast i8* %call50 to %class.btCapsuleShape*
+  %27 = load float* %scale.addr, align 4
+  %mul51 = fmul float 0x3FA99999A0000000, %27
+  %28 = load float* %scale.addr, align 4
+  %mul52 = fmul float 0x3FD7AE1480000000, %28
+  %call55 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %26, float %mul51, float %mul52)
+          to label %invoke.cont54 unwind label %lpad53
+
+invoke.cont54:                                    ; preds = %invoke.cont45
+  %29 = bitcast %class.btCapsuleShape* %26 to %class.btCollisionShape*
+  %m_shapes57 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx58 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes57, i32 0, i32 6
+  store %class.btCollisionShape* %29, %class.btCollisionShape** %arrayidx58, align 4
+  %call59 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %30 = bitcast i8* %call59 to %class.btCapsuleShape*
+  %31 = load float* %scale.addr, align 4
+  %mul60 = fmul float 0x3FA99999A0000000, %31
+  %32 = load float* %scale.addr, align 4
+  %mul61 = fmul float 0x3FD51EB860000000, %32
+  %call64 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %30, float %mul60, float %mul61)
+          to label %invoke.cont63 unwind label %lpad62
+
+invoke.cont63:                                    ; preds = %invoke.cont54
+  %33 = bitcast %class.btCapsuleShape* %30 to %class.btCollisionShape*
+  %m_shapes66 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx67 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes66, i32 0, i32 7
+  store %class.btCollisionShape* %33, %class.btCollisionShape** %arrayidx67, align 4
+  %call68 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %34 = bitcast i8* %call68 to %class.btCapsuleShape*
+  %35 = load float* %scale.addr, align 4
+  %mul69 = fmul float 0x3FA47AE140000000, %35
+  %36 = load float* %scale.addr, align 4
+  %mul70 = fmul float 2.500000e-01, %36
+  %call73 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %34, float %mul69, float %mul70)
+          to label %invoke.cont72 unwind label %lpad71
+
+invoke.cont72:                                    ; preds = %invoke.cont63
+  %37 = bitcast %class.btCapsuleShape* %34 to %class.btCollisionShape*
+  %m_shapes75 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx76 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes75, i32 0, i32 8
+  store %class.btCollisionShape* %37, %class.btCollisionShape** %arrayidx76, align 4
+  %call77 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %38 = bitcast i8* %call77 to %class.btCapsuleShape*
+  %39 = load float* %scale.addr, align 4
+  %mul78 = fmul float 0x3FA99999A0000000, %39
+  %40 = load float* %scale.addr, align 4
+  %mul79 = fmul float 0x3FD51EB860000000, %40
+  %call82 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %38, float %mul78, float %mul79)
+          to label %invoke.cont81 unwind label %lpad80
+
+invoke.cont81:                                    ; preds = %invoke.cont72
+  %41 = bitcast %class.btCapsuleShape* %38 to %class.btCollisionShape*
+  %m_shapes84 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx85 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes84, i32 0, i32 9
+  store %class.btCollisionShape* %41, %class.btCollisionShape** %arrayidx85, align 4
+  %call86 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %42 = bitcast i8* %call86 to %class.btCapsuleShape*
+  %43 = load float* %scale.addr, align 4
+  %mul87 = fmul float 0x3FA47AE140000000, %43
+  %44 = load float* %scale.addr, align 4
+  %mul88 = fmul float 2.500000e-01, %44
+  %call91 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %42, float %mul87, float %mul88)
+          to label %invoke.cont90 unwind label %lpad89
+
+invoke.cont90:                                    ; preds = %invoke.cont81
+  %45 = bitcast %class.btCapsuleShape* %42 to %class.btCollisionShape*
+  %m_shapes93 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx94 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes93, i32 0, i32 10
+  store %class.btCollisionShape* %45, %class.btCollisionShape** %arrayidx94, align 4
+  %call95 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %offset)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %offset)
+  %46 = load %class.btVector3** %positionOffset.addr, align 4
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %offset, %class.btVector3* %46)
+  %call96 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %transform)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp98, align 4
+  store float 1.000000e+00, float* %ref.tmp99, align 4
+  store float 0.000000e+00, float* %ref.tmp100, align 4
+  %call101 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp97, float* %ref.tmp98, float* %ref.tmp99, float* %ref.tmp100)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp, float* %scale.addr, %class.btVector3* %ref.tmp97)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp102, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes103 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
+  %47 = load %class.btCollisionShape** %arrayidx104, align 4
+  %call105 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp102, %class.btCollisionShape* %47)
+  %m_bodies = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
+  store %class.btRigidBody* %call105, %class.btRigidBody** %arrayidx106, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp109, align 4
+  store float 0x3FF3333340000000, float* %ref.tmp110, align 4
+  store float 0.000000e+00, float* %ref.tmp111, align 4
+  %call112 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp108, float* %ref.tmp109, float* %ref.tmp110, float* %ref.tmp111)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp107, float* %scale.addr, %class.btVector3* %ref.tmp108)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp107)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp113, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes114 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
+  %48 = load %class.btCollisionShape** %arrayidx115, align 4
+  %call116 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp113, %class.btCollisionShape* %48)
+  %m_bodies117 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
+  store %class.btRigidBody* %call116, %class.btRigidBody** %arrayidx118, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp121, align 4
+  store float 0x3FF99999A0000000, float* %ref.tmp122, align 4
+  store float 0.000000e+00, float* %ref.tmp123, align 4
+  %call124 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp120, float* %ref.tmp121, float* %ref.tmp122, float* %ref.tmp123)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp119, float* %scale.addr, %class.btVector3* %ref.tmp120)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp119)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp125, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes126 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
+  %49 = load %class.btCollisionShape** %arrayidx127, align 4
+  %call128 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp125, %class.btCollisionShape* %49)
+  %m_bodies129 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
+  store %class.btRigidBody* %call128, %class.btRigidBody** %arrayidx130, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp133, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp134, align 4
+  store float 0.000000e+00, float* %ref.tmp135, align 4
+  %call136 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp132, float* %ref.tmp133, float* %ref.tmp134, float* %ref.tmp135)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp131, float* %scale.addr, %class.btVector3* %ref.tmp132)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp131)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp137, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes138 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
+  %50 = load %class.btCollisionShape** %arrayidx139, align 4
+  %call140 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp137, %class.btCollisionShape* %50)
+  %m_bodies141 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
+  store %class.btRigidBody* %call140, %class.btRigidBody** %arrayidx142, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp145, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp146, align 4
+  store float 0.000000e+00, float* %ref.tmp147, align 4
+  %call148 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp144, float* %ref.tmp145, float* %ref.tmp146, float* %ref.tmp147)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp143, float* %scale.addr, %class.btVector3* %ref.tmp144)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp143)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp149, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes150 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
+  %51 = load %class.btCollisionShape** %arrayidx151, align 4
+  %call152 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp149, %class.btCollisionShape* %51)
+  %m_bodies153 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
+  store %class.btRigidBody* %call152, %class.btRigidBody** %arrayidx154, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp157, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp158, align 4
+  store float 0.000000e+00, float* %ref.tmp159, align 4
+  %call160 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp156, float* %ref.tmp157, float* %ref.tmp158, float* %ref.tmp159)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp155, float* %scale.addr, %class.btVector3* %ref.tmp156)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp155)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp161, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes162 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
+  %52 = load %class.btCollisionShape** %arrayidx163, align 4
+  %call164 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp161, %class.btCollisionShape* %52)
+  %m_bodies165 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
+  store %class.btRigidBody* %call164, %class.btRigidBody** %arrayidx166, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp169, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp170, align 4
+  store float 0.000000e+00, float* %ref.tmp171, align 4
+  %call172 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp168, float* %ref.tmp169, float* %ref.tmp170, float* %ref.tmp171)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp167, float* %scale.addr, %class.btVector3* %ref.tmp168)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp167)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp173, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes174 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
+  %53 = load %class.btCollisionShape** %arrayidx175, align 4
+  %call176 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp173, %class.btCollisionShape* %53)
+  %m_bodies177 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
+  store %class.btRigidBody* %call176, %class.btRigidBody** %arrayidx178, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFD6666660000000, float* %ref.tmp181, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp182, align 4
+  store float 0.000000e+00, float* %ref.tmp183, align 4
+  %call184 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp180, float* %ref.tmp181, float* %ref.tmp182, float* %ref.tmp183)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp179, float* %scale.addr, %class.btVector3* %ref.tmp180)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp179)
+  %call185 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call185, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp186, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes187 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
+  %54 = load %class.btCollisionShape** %arrayidx188, align 4
+  %call189 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp186, %class.btCollisionShape* %54)
+  %m_bodies190 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
+  store %class.btRigidBody* %call189, %class.btRigidBody** %arrayidx191, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFE6666660000000, float* %ref.tmp194, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp195, align 4
+  store float 0.000000e+00, float* %ref.tmp196, align 4
+  %call197 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp193, float* %ref.tmp194, float* %ref.tmp195, float* %ref.tmp196)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp192, float* %scale.addr, %class.btVector3* %ref.tmp193)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp192)
+  %call198 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call198, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp199, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes200 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
+  %55 = load %class.btCollisionShape** %arrayidx201, align 4
+  %call202 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp199, %class.btCollisionShape* %55)
+  %m_bodies203 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
+  store %class.btRigidBody* %call202, %class.btRigidBody** %arrayidx204, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FD6666660000000, float* %ref.tmp207, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp208, align 4
+  store float 0.000000e+00, float* %ref.tmp209, align 4
+  %call210 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp206, float* %ref.tmp207, float* %ref.tmp208, float* %ref.tmp209)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp205, float* %scale.addr, %class.btVector3* %ref.tmp206)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp205)
+  %call211 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call211, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp212, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes213 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
+  %56 = load %class.btCollisionShape** %arrayidx214, align 4
+  %call215 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp212, %class.btCollisionShape* %56)
+  %m_bodies216 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
+  store %class.btRigidBody* %call215, %class.btRigidBody** %arrayidx217, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FE6666660000000, float* %ref.tmp220, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp221, align 4
+  store float 0.000000e+00, float* %ref.tmp222, align 4
+  %call223 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp219, float* %ref.tmp220, float* %ref.tmp221, float* %ref.tmp222)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp218, float* %scale.addr, %class.btVector3* %ref.tmp219)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp218)
+  %call224 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call224, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp225, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes226 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
+  %57 = load %class.btCollisionShape** %arrayidx227, align 4
+  %call228 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp225, %class.btCollisionShape* %57)
+  %m_bodies229 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
+  store %class.btRigidBody* %call228, %class.btRigidBody** %arrayidx230, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %invoke.cont90
+  %58 = load i32* %i, align 4
+  %cmp = icmp slt i32 %58, 11
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %59 = load i32* %i, align 4
+  %m_bodies231 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
+  %60 = load %class.btRigidBody** %arrayidx232, align 4
+  call void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody* %60, float 0x3FA99999A0000000, float 0x3FEB333340000000)
+  %61 = load i32* %i, align 4
+  %m_bodies233 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
+  %62 = load %class.btRigidBody** %arrayidx234, align 4
+  %63 = bitcast %class.btRigidBody* %62 to %class.btCollisionObject*
+  call void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject* %63, float 0x3FE99999A0000000)
+  %64 = load i32* %i, align 4
+  %m_bodies235 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
+  %65 = load %class.btRigidBody** %arrayidx236, align 4
+  call void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody* %65, float 0x3FF99999A0000000, float 2.500000e+00)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %66 = load i32* %i, align 4
+  %inc = add nsw i32 %66, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+lpad:                                             ; preds = %entry
+  %67 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %68 = extractvalue { i8*, i32 } %67, 0
+  store i8* %68, i8** %exn.slot
+  %69 = extractvalue { i8*, i32 } %67, 1
+  store i32 %69, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call)
+          to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:                                     ; preds = %lpad
+  br label %eh.resume
+
+lpad8:                                            ; preds = %invoke.cont
+  %70 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %71 = extractvalue { i8*, i32 } %70, 0
+  store i8* %71, i8** %exn.slot
+  %72 = extractvalue { i8*, i32 } %70, 1
+  store i32 %72, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call5)
+          to label %invoke.cont11 unwind label %terminate.lpad
+
+invoke.cont11:                                    ; preds = %lpad8
+  br label %eh.resume
+
+lpad17:                                           ; preds = %invoke.cont9
+  %73 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %74 = extractvalue { i8*, i32 } %73, 0
+  store i8* %74, i8** %exn.slot
+  %75 = extractvalue { i8*, i32 } %73, 1
+  store i32 %75, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call14)
+          to label %invoke.cont20 unwind label %terminate.lpad
+
+invoke.cont20:                                    ; preds = %lpad17
+  br label %eh.resume
+
+lpad26:                                           ; preds = %invoke.cont18
+  %76 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %77 = extractvalue { i8*, i32 } %76, 0
+  store i8* %77, i8** %exn.slot
+  %78 = extractvalue { i8*, i32 } %76, 1
+  store i32 %78, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call23)
+          to label %invoke.cont29 unwind label %terminate.lpad
+
+invoke.cont29:                                    ; preds = %lpad26
+  br label %eh.resume
+
+lpad35:                                           ; preds = %invoke.cont27
+  %79 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %80 = extractvalue { i8*, i32 } %79, 0
+  store i8* %80, i8** %exn.slot
+  %81 = extractvalue { i8*, i32 } %79, 1
+  store i32 %81, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call32)
+          to label %invoke.cont38 unwind label %terminate.lpad
+
+invoke.cont38:                                    ; preds = %lpad35
+  br label %eh.resume
+
+lpad44:                                           ; preds = %invoke.cont36
+  %82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %83 = extractvalue { i8*, i32 } %82, 0
+  store i8* %83, i8** %exn.slot
+  %84 = extractvalue { i8*, i32 } %82, 1
+  store i32 %84, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call41)
+          to label %invoke.cont47 unwind label %terminate.lpad
+
+invoke.cont47:                                    ; preds = %lpad44
+  br label %eh.resume
+
+lpad53:                                           ; preds = %invoke.cont45
+  %85 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %86 = extractvalue { i8*, i32 } %85, 0
+  store i8* %86, i8** %exn.slot
+  %87 = extractvalue { i8*, i32 } %85, 1
+  store i32 %87, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call50)
+          to label %invoke.cont56 unwind label %terminate.lpad
+
+invoke.cont56:                                    ; preds = %lpad53
+  br label %eh.resume
+
+lpad62:                                           ; preds = %invoke.cont54
+  %88 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %89 = extractvalue { i8*, i32 } %88, 0
+  store i8* %89, i8** %exn.slot
+  %90 = extractvalue { i8*, i32 } %88, 1
+  store i32 %90, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call59)
+          to label %invoke.cont65 unwind label %terminate.lpad
+
+invoke.cont65:                                    ; preds = %lpad62
+  br label %eh.resume
+
+lpad71:                                           ; preds = %invoke.cont63
+  %91 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %92 = extractvalue { i8*, i32 } %91, 0
+  store i8* %92, i8** %exn.slot
+  %93 = extractvalue { i8*, i32 } %91, 1
+  store i32 %93, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call68)
+          to label %invoke.cont74 unwind label %terminate.lpad
+
+invoke.cont74:                                    ; preds = %lpad71
+  br label %eh.resume
+
+lpad80:                                           ; preds = %invoke.cont72
+  %94 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %95 = extractvalue { i8*, i32 } %94, 0
+  store i8* %95, i8** %exn.slot
+  %96 = extractvalue { i8*, i32 } %94, 1
+  store i32 %96, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call77)
+          to label %invoke.cont83 unwind label %terminate.lpad
+
+invoke.cont83:                                    ; preds = %lpad80
+  br label %eh.resume
+
+lpad89:                                           ; preds = %invoke.cont81
+  %97 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %98 = extractvalue { i8*, i32 } %97, 0
+  store i8* %98, i8** %exn.slot
+  %99 = extractvalue { i8*, i32 } %97, 1
+  store i32 %99, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call86)
+          to label %invoke.cont92 unwind label %terminate.lpad
+
+invoke.cont92:                                    ; preds = %lpad89
+  br label %eh.resume
+
+for.end:                                          ; preds = %for.cond
+  %call237 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localA)
+  %call238 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localB)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call239 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call239, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp242, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp243, align 4
+  store float 0.000000e+00, float* %ref.tmp244, align 4
+  %call245 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp241, float* %ref.tmp242, float* %ref.tmp243, float* %ref.tmp244)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp240, float* %scale.addr, %class.btVector3* %ref.tmp241)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp240)
+  %call246 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call246, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp249, align 4
+  store float 0xBFC3333340000000, float* %ref.tmp250, align 4
+  store float 0.000000e+00, float* %ref.tmp251, align 4
+  %call252 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp248, float* %ref.tmp249, float* %ref.tmp250, float* %ref.tmp251)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp247, float* %scale.addr, %class.btVector3* %ref.tmp248)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp247)
+  %call253 = call noalias i8* @_Znwm(i32 780)
+  %100 = bitcast i8* %call253 to %class.btHingeConstraint*
+  %m_bodies254 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
+  %101 = load %class.btRigidBody** %arrayidx255, align 4
+  %m_bodies256 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
+  %102 = load %class.btRigidBody** %arrayidx257, align 4
+  %call260 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %100, %class.btRigidBody* %101, %class.btRigidBody* %102, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %for.end
+  store %class.btHingeConstraint* %100, %class.btHingeConstraint** %hingeC, align 4
+  %103 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %103, float 0xBFE921FB60000000, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %104 = load %class.btHingeConstraint** %hingeC, align 4
+  %105 = bitcast %class.btHingeConstraint* %104 to %class.btTypedConstraint*
+  %m_joints = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
+  store %class.btTypedConstraint* %105, %class.btTypedConstraint** %arrayidx261, align 4
+  %m_ownerWorld262 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %106 = load %class.btDynamicsWorld** %m_ownerWorld262, align 4
+  %107 = bitcast %class.btDynamicsWorld* %106 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
+  %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
+  %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
+  %m_joints263 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
+  %109 = load %class.btTypedConstraint** %arrayidx264, align 4
+  call void %108(%class.btDynamicsWorld* %106, %class.btTypedConstraint* %109, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call265 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call265, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp268, align 4
+  store float 0x3FD3333340000000, float* %ref.tmp269, align 4
+  store float 0.000000e+00, float* %ref.tmp270, align 4
+  %call271 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp267, float* %ref.tmp268, float* %ref.tmp269, float* %ref.tmp270)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp266, float* %scale.addr, %class.btVector3* %ref.tmp267)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp266)
+  %call272 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call272, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp275, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp276, align 4
+  store float 0.000000e+00, float* %ref.tmp277, align 4
+  %call278 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp274, float* %ref.tmp275, float* %ref.tmp276, float* %ref.tmp277)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp273, float* %scale.addr, %class.btVector3* %ref.tmp274)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp273)
+  %call279 = call noalias i8* @_Znwm(i32 628)
+  %110 = bitcast i8* %call279 to %class.btConeTwistConstraint*
+  %m_bodies280 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
+  %111 = load %class.btRigidBody** %arrayidx281, align 4
+  %m_bodies282 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
+  %112 = load %class.btRigidBody** %arrayidx283, align 4
+  %call286 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %110, %class.btRigidBody* %111, %class.btRigidBody* %112, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont285 unwind label %lpad284
+
+invoke.cont285:                                   ; preds = %invoke.cont259
+  store %class.btConeTwistConstraint* %110, %class.btConeTwistConstraint** %coneC, align 4
+  %113 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %113, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0x3FF921FB60000000, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %114 = load %class.btConeTwistConstraint** %coneC, align 4
+  %115 = bitcast %class.btConeTwistConstraint* %114 to %class.btTypedConstraint*
+  %m_joints287 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
+  store %class.btTypedConstraint* %115, %class.btTypedConstraint** %arrayidx288, align 4
+  %m_ownerWorld289 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %116 = load %class.btDynamicsWorld** %m_ownerWorld289, align 4
+  %117 = bitcast %class.btDynamicsWorld* %116 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
+  %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
+  %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
+  %m_joints292 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
+  %119 = load %class.btTypedConstraint** %arrayidx293, align 4
+  call void %118(%class.btDynamicsWorld* %116, %class.btTypedConstraint* %119, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call294 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call294, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0xBFC70A3D80000000, float* %ref.tmp297, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp298, align 4
+  store float 0.000000e+00, float* %ref.tmp299, align 4
+  %call300 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp296, float* %ref.tmp297, float* %ref.tmp298, float* %ref.tmp299)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp295, float* %scale.addr, %class.btVector3* %ref.tmp296)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp295)
+  %call301 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call301, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0.000000e+00, float* %ref.tmp304, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp305, align 4
+  store float 0.000000e+00, float* %ref.tmp306, align 4
+  %call307 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp303, float* %ref.tmp304, float* %ref.tmp305, float* %ref.tmp306)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp302, float* %scale.addr, %class.btVector3* %ref.tmp303)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp302)
+  %call308 = call noalias i8* @_Znwm(i32 628)
+  %120 = bitcast i8* %call308 to %class.btConeTwistConstraint*
+  %m_bodies309 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
+  %121 = load %class.btRigidBody** %arrayidx310, align 4
+  %m_bodies311 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
+  %122 = load %class.btRigidBody** %arrayidx312, align 4
+  %call315 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %120, %class.btRigidBody* %121, %class.btRigidBody* %122, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont314 unwind label %lpad313
+
+invoke.cont314:                                   ; preds = %invoke.cont285
+  store %class.btConeTwistConstraint* %120, %class.btConeTwistConstraint** %coneC, align 4
+  %123 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %123, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %124 = load %class.btConeTwistConstraint** %coneC, align 4
+  %125 = bitcast %class.btConeTwistConstraint* %124 to %class.btTypedConstraint*
+  %m_joints316 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
+  store %class.btTypedConstraint* %125, %class.btTypedConstraint** %arrayidx317, align 4
+  %m_ownerWorld318 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %126 = load %class.btDynamicsWorld** %m_ownerWorld318, align 4
+  %127 = bitcast %class.btDynamicsWorld* %126 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
+  %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
+  %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
+  %m_joints321 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
+  %129 = load %class.btTypedConstraint** %arrayidx322, align 4
+  call void %128(%class.btDynamicsWorld* %126, %class.btTypedConstraint* %129, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call323 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call323, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp326, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp327, align 4
+  store float 0.000000e+00, float* %ref.tmp328, align 4
+  %call329 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp325, float* %ref.tmp326, float* %ref.tmp327, float* %ref.tmp328)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp324, float* %scale.addr, %class.btVector3* %ref.tmp325)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp324)
+  %call330 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call330, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp333, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp334, align 4
+  store float 0.000000e+00, float* %ref.tmp335, align 4
+  %call336 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp332, float* %ref.tmp333, float* %ref.tmp334, float* %ref.tmp335)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp331, float* %scale.addr, %class.btVector3* %ref.tmp332)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp331)
+  %call337 = call noalias i8* @_Znwm(i32 780)
+  %130 = bitcast i8* %call337 to %class.btHingeConstraint*
+  %m_bodies338 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
+  %131 = load %class.btRigidBody** %arrayidx339, align 4
+  %m_bodies340 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
+  %132 = load %class.btRigidBody** %arrayidx341, align 4
+  %call344 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %130, %class.btRigidBody* %131, %class.btRigidBody* %132, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont343 unwind label %lpad342
+
+invoke.cont343:                                   ; preds = %invoke.cont314
+  store %class.btHingeConstraint* %130, %class.btHingeConstraint** %hingeC, align 4
+  %133 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %133, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %134 = load %class.btHingeConstraint** %hingeC, align 4
+  %135 = bitcast %class.btHingeConstraint* %134 to %class.btTypedConstraint*
+  %m_joints345 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
+  store %class.btTypedConstraint* %135, %class.btTypedConstraint** %arrayidx346, align 4
+  %m_ownerWorld347 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %136 = load %class.btDynamicsWorld** %m_ownerWorld347, align 4
+  %137 = bitcast %class.btDynamicsWorld* %136 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
+  %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
+  %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
+  %m_joints350 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
+  %139 = load %class.btTypedConstraint** %arrayidx351, align 4
+  call void %138(%class.btDynamicsWorld* %136, %class.btTypedConstraint* %139, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call352 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call352, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0x3FC70A3D80000000, float* %ref.tmp355, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp356, align 4
+  store float 0.000000e+00, float* %ref.tmp357, align 4
+  %call358 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp354, float* %ref.tmp355, float* %ref.tmp356, float* %ref.tmp357)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp353, float* %scale.addr, %class.btVector3* %ref.tmp354)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp353)
+  %call359 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call359, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp362, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp363, align 4
+  store float 0.000000e+00, float* %ref.tmp364, align 4
+  %call365 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp361, float* %ref.tmp362, float* %ref.tmp363, float* %ref.tmp364)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp360, float* %scale.addr, %class.btVector3* %ref.tmp361)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp360)
+  %call366 = call noalias i8* @_Znwm(i32 628)
+  %140 = bitcast i8* %call366 to %class.btConeTwistConstraint*
+  %m_bodies367 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
+  %141 = load %class.btRigidBody** %arrayidx368, align 4
+  %m_bodies369 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
+  %142 = load %class.btRigidBody** %arrayidx370, align 4
+  %call373 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %140, %class.btRigidBody* %141, %class.btRigidBody* %142, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont372 unwind label %lpad371
+
+invoke.cont372:                                   ; preds = %invoke.cont343
+  store %class.btConeTwistConstraint* %140, %class.btConeTwistConstraint** %coneC, align 4
+  %143 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %143, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %144 = load %class.btConeTwistConstraint** %coneC, align 4
+  %145 = bitcast %class.btConeTwistConstraint* %144 to %class.btTypedConstraint*
+  %m_joints374 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
+  store %class.btTypedConstraint* %145, %class.btTypedConstraint** %arrayidx375, align 4
+  %m_ownerWorld376 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %146 = load %class.btDynamicsWorld** %m_ownerWorld376, align 4
+  %147 = bitcast %class.btDynamicsWorld* %146 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
+  %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
+  %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
+  %m_joints379 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
+  %149 = load %class.btTypedConstraint** %arrayidx380, align 4
+  call void %148(%class.btDynamicsWorld* %146, %class.btTypedConstraint* %149, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call381 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call381, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp384, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp385, align 4
+  store float 0.000000e+00, float* %ref.tmp386, align 4
+  %call387 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp383, float* %ref.tmp384, float* %ref.tmp385, float* %ref.tmp386)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp382, float* %scale.addr, %class.btVector3* %ref.tmp383)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp382)
+  %call388 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call388, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp391, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp392, align 4
+  store float 0.000000e+00, float* %ref.tmp393, align 4
+  %call394 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp390, float* %ref.tmp391, float* %ref.tmp392, float* %ref.tmp393)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp389, float* %scale.addr, %class.btVector3* %ref.tmp390)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp389)
+  %call395 = call noalias i8* @_Znwm(i32 780)
+  %150 = bitcast i8* %call395 to %class.btHingeConstraint*
+  %m_bodies396 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
+  %151 = load %class.btRigidBody** %arrayidx397, align 4
+  %m_bodies398 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
+  %152 = load %class.btRigidBody** %arrayidx399, align 4
+  %call402 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %150, %class.btRigidBody* %151, %class.btRigidBody* %152, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont401 unwind label %lpad400
+
+invoke.cont401:                                   ; preds = %invoke.cont372
+  store %class.btHingeConstraint* %150, %class.btHingeConstraint** %hingeC, align 4
+  %153 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %153, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %154 = load %class.btHingeConstraint** %hingeC, align 4
+  %155 = bitcast %class.btHingeConstraint* %154 to %class.btTypedConstraint*
+  %m_joints403 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
+  store %class.btTypedConstraint* %155, %class.btTypedConstraint** %arrayidx404, align 4
+  %m_ownerWorld405 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %156 = load %class.btDynamicsWorld** %m_ownerWorld405, align 4
+  %157 = bitcast %class.btDynamicsWorld* %156 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
+  %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
+  %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
+  %m_joints408 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
+  %159 = load %class.btTypedConstraint** %arrayidx409, align 4
+  call void %158(%class.btDynamicsWorld* %156, %class.btTypedConstraint* %159, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call410 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call410, float 0.000000e+00, float 0.000000e+00, float 0x400921FB60000000)
+  store float 0xBFC99999A0000000, float* %ref.tmp413, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp414, align 4
+  store float 0.000000e+00, float* %ref.tmp415, align 4
+  %call416 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp412, float* %ref.tmp413, float* %ref.tmp414, float* %ref.tmp415)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp411, float* %scale.addr, %class.btVector3* %ref.tmp412)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp411)
+  %call417 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call417, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp420, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp421, align 4
+  store float 0.000000e+00, float* %ref.tmp422, align 4
+  %call423 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp419, float* %ref.tmp420, float* %ref.tmp421, float* %ref.tmp422)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp418, float* %scale.addr, %class.btVector3* %ref.tmp419)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp418)
+  %call424 = call noalias i8* @_Znwm(i32 628)
+  %160 = bitcast i8* %call424 to %class.btConeTwistConstraint*
+  %m_bodies425 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
+  %161 = load %class.btRigidBody** %arrayidx426, align 4
+  %m_bodies427 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
+  %162 = load %class.btRigidBody** %arrayidx428, align 4
+  %call431 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %160, %class.btRigidBody* %161, %class.btRigidBody* %162, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont430 unwind label %lpad429
+
+invoke.cont430:                                   ; preds = %invoke.cont401
+  store %class.btConeTwistConstraint* %160, %class.btConeTwistConstraint** %coneC, align 4
+  %163 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %163, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %164 = load %class.btConeTwistConstraint** %coneC, align 4
+  %165 = bitcast %class.btConeTwistConstraint* %164 to %class.btTypedConstraint*
+  %m_joints432 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
+  store %class.btTypedConstraint* %165, %class.btTypedConstraint** %arrayidx433, align 4
+  %m_ownerWorld434 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %166 = load %class.btDynamicsWorld** %m_ownerWorld434, align 4
+  %167 = bitcast %class.btDynamicsWorld* %166 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
+  %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
+  %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
+  %m_joints437 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
+  %169 = load %class.btTypedConstraint** %arrayidx438, align 4
+  call void %168(%class.btDynamicsWorld* %166, %class.btTypedConstraint* %169, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call439 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call439, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp442, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp443, align 4
+  store float 0.000000e+00, float* %ref.tmp444, align 4
+  %call445 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp441, float* %ref.tmp442, float* %ref.tmp443, float* %ref.tmp444)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp440, float* %scale.addr, %class.btVector3* %ref.tmp441)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp440)
+  %call446 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call446, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp449, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp450, align 4
+  store float 0.000000e+00, float* %ref.tmp451, align 4
+  %call452 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp448, float* %ref.tmp449, float* %ref.tmp450, float* %ref.tmp451)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp447, float* %scale.addr, %class.btVector3* %ref.tmp448)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp447)
+  %call453 = call noalias i8* @_Znwm(i32 780)
+  %170 = bitcast i8* %call453 to %class.btHingeConstraint*
+  %m_bodies454 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
+  %171 = load %class.btRigidBody** %arrayidx455, align 4
+  %m_bodies456 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
+  %172 = load %class.btRigidBody** %arrayidx457, align 4
+  %call460 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %170, %class.btRigidBody* %171, %class.btRigidBody* %172, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont459 unwind label %lpad458
+
+invoke.cont459:                                   ; preds = %invoke.cont430
+  store %class.btHingeConstraint* %170, %class.btHingeConstraint** %hingeC, align 4
+  %173 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %173, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %174 = load %class.btHingeConstraint** %hingeC, align 4
+  %175 = bitcast %class.btHingeConstraint* %174 to %class.btTypedConstraint*
+  %m_joints461 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
+  store %class.btTypedConstraint* %175, %class.btTypedConstraint** %arrayidx462, align 4
+  %m_ownerWorld463 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %176 = load %class.btDynamicsWorld** %m_ownerWorld463, align 4
+  %177 = bitcast %class.btDynamicsWorld* %176 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
+  %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
+  %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
+  %m_joints466 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
+  %179 = load %class.btTypedConstraint** %arrayidx467, align 4
+  call void %178(%class.btDynamicsWorld* %176, %class.btTypedConstraint* %179, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call468 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call468, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
+  store float 0x3FC99999A0000000, float* %ref.tmp471, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp472, align 4
+  store float 0.000000e+00, float* %ref.tmp473, align 4
+  %call474 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp470, float* %ref.tmp471, float* %ref.tmp472, float* %ref.tmp473)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp469, float* %scale.addr, %class.btVector3* %ref.tmp470)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp469)
+  %call475 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call475, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp478, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp479, align 4
+  store float 0.000000e+00, float* %ref.tmp480, align 4
+  %call481 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp477, float* %ref.tmp478, float* %ref.tmp479, float* %ref.tmp480)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp476, float* %scale.addr, %class.btVector3* %ref.tmp477)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp476)
+  %call482 = call noalias i8* @_Znwm(i32 628)
+  %180 = bitcast i8* %call482 to %class.btConeTwistConstraint*
+  %m_bodies483 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
+  %181 = load %class.btRigidBody** %arrayidx484, align 4
+  %m_bodies485 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
+  %182 = load %class.btRigidBody** %arrayidx486, align 4
+  %call489 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %180, %class.btRigidBody* %181, %class.btRigidBody* %182, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont488 unwind label %lpad487
+
+invoke.cont488:                                   ; preds = %invoke.cont459
+  store %class.btConeTwistConstraint* %180, %class.btConeTwistConstraint** %coneC, align 4
+  %183 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %183, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %184 = load %class.btConeTwistConstraint** %coneC, align 4
+  %185 = bitcast %class.btConeTwistConstraint* %184 to %class.btTypedConstraint*
+  %m_joints490 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
+  store %class.btTypedConstraint* %185, %class.btTypedConstraint** %arrayidx491, align 4
+  %m_ownerWorld492 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %186 = load %class.btDynamicsWorld** %m_ownerWorld492, align 4
+  %187 = bitcast %class.btDynamicsWorld* %186 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
+  %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
+  %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
+  %m_joints495 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
+  %189 = load %class.btTypedConstraint** %arrayidx496, align 4
+  call void %188(%class.btDynamicsWorld* %186, %class.btTypedConstraint* %189, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call497 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call497, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp500, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp501, align 4
+  store float 0.000000e+00, float* %ref.tmp502, align 4
+  %call503 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp499, float* %ref.tmp500, float* %ref.tmp501, float* %ref.tmp502)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp498, float* %scale.addr, %class.btVector3* %ref.tmp499)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp498)
+  %call504 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call504, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp507, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp508, align 4
+  store float 0.000000e+00, float* %ref.tmp509, align 4
+  %call510 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp506, float* %ref.tmp507, float* %ref.tmp508, float* %ref.tmp509)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp505, float* %scale.addr, %class.btVector3* %ref.tmp506)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp505)
+  %call511 = call noalias i8* @_Znwm(i32 780)
+  %190 = bitcast i8* %call511 to %class.btHingeConstraint*
+  %m_bodies512 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
+  %191 = load %class.btRigidBody** %arrayidx513, align 4
+  %m_bodies514 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
+  %192 = load %class.btRigidBody** %arrayidx515, align 4
+  %call518 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %190, %class.btRigidBody* %191, %class.btRigidBody* %192, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont517 unwind label %lpad516
+
+invoke.cont517:                                   ; preds = %invoke.cont488
+  store %class.btHingeConstraint* %190, %class.btHingeConstraint** %hingeC, align 4
+  %193 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %193, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %194 = load %class.btHingeConstraint** %hingeC, align 4
+  %195 = bitcast %class.btHingeConstraint* %194 to %class.btTypedConstraint*
+  %m_joints519 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
+  store %class.btTypedConstraint* %195, %class.btTypedConstraint** %arrayidx520, align 4
+  %m_ownerWorld521 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %196 = load %class.btDynamicsWorld** %m_ownerWorld521, align 4
+  %197 = bitcast %class.btDynamicsWorld* %196 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
+  %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
+  %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
+  %m_joints524 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
+  %199 = load %class.btTypedConstraint** %arrayidx525, align 4
+  call void %198(%class.btDynamicsWorld* %196, %class.btTypedConstraint* %199, i1 zeroext true)
+  %200 = load %class.RagDoll** %retval
+  ret %class.RagDoll* %200
+
+lpad258:                                          ; preds = %for.end
+  %201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %202 = extractvalue { i8*, i32 } %201, 0
+  store i8* %202, i8** %exn.slot
+  %203 = extractvalue { i8*, i32 } %201, 1
+  store i32 %203, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call253) nounwind
+  br label %eh.resume
+
+lpad284:                                          ; preds = %invoke.cont259
+  %204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %205 = extractvalue { i8*, i32 } %204, 0
+  store i8* %205, i8** %exn.slot
+  %206 = extractvalue { i8*, i32 } %204, 1
+  store i32 %206, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call279) nounwind
+  br label %eh.resume
+
+lpad313:                                          ; preds = %invoke.cont285
+  %207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %208 = extractvalue { i8*, i32 } %207, 0
+  store i8* %208, i8** %exn.slot
+  %209 = extractvalue { i8*, i32 } %207, 1
+  store i32 %209, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call308) nounwind
+  br label %eh.resume
+
+lpad342:                                          ; preds = %invoke.cont314
+  %210 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %211 = extractvalue { i8*, i32 } %210, 0
+  store i8* %211, i8** %exn.slot
+  %212 = extractvalue { i8*, i32 } %210, 1
+  store i32 %212, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call337) nounwind
+  br label %eh.resume
+
+lpad371:                                          ; preds = %invoke.cont343
+  %213 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %214 = extractvalue { i8*, i32 } %213, 0
+  store i8* %214, i8** %exn.slot
+  %215 = extractvalue { i8*, i32 } %213, 1
+  store i32 %215, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call366) nounwind
+  br label %eh.resume
+
+lpad400:                                          ; preds = %invoke.cont372
+  %216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %217 = extractvalue { i8*, i32 } %216, 0
+  store i8* %217, i8** %exn.slot
+  %218 = extractvalue { i8*, i32 } %216, 1
+  store i32 %218, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call395) nounwind
+  br label %eh.resume
+
+lpad429:                                          ; preds = %invoke.cont401
+  %219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %220 = extractvalue { i8*, i32 } %219, 0
+  store i8* %220, i8** %exn.slot
+  %221 = extractvalue { i8*, i32 } %219, 1
+  store i32 %221, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call424) nounwind
+  br label %eh.resume
+
+lpad458:                                          ; preds = %invoke.cont430
+  %222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %223 = extractvalue { i8*, i32 } %222, 0
+  store i8* %223, i8** %exn.slot
+  %224 = extractvalue { i8*, i32 } %222, 1
+  store i32 %224, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call453) nounwind
+  br label %eh.resume
+
+lpad487:                                          ; preds = %invoke.cont459
+  %225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %226 = extractvalue { i8*, i32 } %225, 0
+  store i8* %226, i8** %exn.slot
+  %227 = extractvalue { i8*, i32 } %225, 1
+  store i32 %227, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call482) nounwind
+  br label %eh.resume
+
+lpad516:                                          ; preds = %invoke.cont488
+  %228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %229 = extractvalue { i8*, i32 } %228, 0
+  store i8* %229, i8** %exn.slot
+  %230 = extractvalue { i8*, i32 } %228, 1
+  store i32 %230, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call511) nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad516, %lpad487, %lpad458, %lpad429, %lpad400, %lpad371, %lpad342, %lpad313, %lpad284, %lpad258, %invoke.cont92, %invoke.cont83, %invoke.cont74, %invoke.cont65, %invoke.cont56, %invoke.cont47, %invoke.cont38, %invoke.cont29, %invoke.cont20, %invoke.cont11, %invoke.cont4
+  %exn = load i8** %exn.slot
+  %sel = load i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val526 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val526
+
+terminate.lpad:                                   ; preds = %lpad89, %lpad80, %lpad71, %lpad62, %lpad53, %lpad44, %lpad35, %lpad26, %lpad17, %lpad8, %lpad
+  %231 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_ZmlRKfRK9btVector3(%class.btVector3* noalias sret, float*, %class.btVector3*) inlinehint ssp
+
+declare %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll*, float, %class.btTransform*, %class.btCollisionShape*) ssp align 2
+
+declare void @_ZNK11btTransformmlERKS_(%class.btTransform* noalias sret, %class.btTransform*, %class.btTransform*) inlinehint ssp align 2
+
+declare void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3*, float, float, float) ssp align 2
+
+declare void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody*, float, float)
+
+declare void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject*, float) nounwind ssp align 2
+
+declare void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody*, float, float) nounwind ssp align 2
+
+declare %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*, i1 zeroext)
+
+declare void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint*, float, float, float, float, float) ssp align 2
+
+declare %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*)
+
+declare void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint*, float, float, float, float, float, float) nounwind ssp align 2
diff --git a/test/CodeGen/Thumb2/dg.exp b/test/CodeGen/Thumb2/dg.exp
deleted file mode 100644
index 3ff359a..0000000
--- a/test/CodeGen/Thumb2/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
new file mode 100644
index 0000000..aef6f85
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; This test case would clobber the outgoing call arguments by writing to the
+; emergency spill slot at [sp, #4] without adjusting the stack pointer first.
+
+; CHECK: main
+; CHECK: vmov.f64
+; Adjust SP for the large call
+; CHECK: sub sp,
+; CHECK: mov [[FR:r[0-9]+]], sp
+; Store to call frame + #4
+; CHECK: str{{.*\[}}[[FR]], #4]
+; Don't clobber that store until the call.
+; CHECK-NOT: [sp, #4]
+; CHECK: variadic
+
+define i32 @main() ssp {
+entry:
+  %d = alloca double, align 8
+  store double 1.000000e+00, double* %d, align 8
+  %0 = load double* %d, align 8
+  call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
+  ret i32 0
+}
+
+declare void @variadic(i8*, i8*, i8*, ...)
+
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
new file mode 100644
index 0000000..dd6c50d
--- /dev/null
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 7993bbf..893bd0f 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -24,7 +24,6 @@ bb7:                                              ; preds = %bb3
 
 bb9:                                              ; preds = %bb7
 ; CHECK:      cmp	r0, #0
-; CHECK:      cmp	r0, #0
 ; CHECK-NEXT:      cbnz
   %0 = tail call  double @foo(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 2c57348..f577f79 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index 4f2b7c1..b2328e7 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 24c45c5..58f9add 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -15,5 +15,5 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     ret i32 %tmp2
 }
 ; CHECK: f2:
-; CHECK: 	muls	r0, r0, r1
+; CHECK: 	muls	r0, r1, r0
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index bb97d97..ac059bd 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
 ; CHECK: f1:
-; CHECK: muls r0, r0, r1
+; CHECK: muls r0, r1, r0
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 8bb9b92..38bca28 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -30,7 +30,7 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
 	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.psra.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
 	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]
 	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]
 	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
@@ -48,4 +48,4 @@ return:		; preds = %cond_true, %entry
 	ret void
 }
 
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 6f8b89c..24aa5b9 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,5 +1,5 @@
 ; PR1075
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
 
 define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index f6db0d0..838a0c3 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 265d968..2e95082 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index f4f4195..ac167b0 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=basic | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false -optimize-regalloc -regalloc=basic | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 0d11546..c068f8a 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -2,8 +2,6 @@
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
-declare i8* @llvm.eh.exception() nounwind 
-
 declare i8* @_Znwm(i32)
 
 declare i8* @__cxa_begin_catch(i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
deleted file mode 100644
index 6c70c5b..0000000
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s | not grep shrl
-; Note: this test is really trying to make sure that the shift
-; returns the right result; shrl is most likely wrong,
-; but if CodeGen starts legitimately using an shrl here,
-; please adjust the test appropriately.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
-
-define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
-entry:
-	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
-	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
-	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
-	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
-	ret i64 %x.0
-}
-
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 75e0b8a..435adbb 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 12bd285..1259cf4 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
 ; CHECK: subq    $40, %rsp
 ; CHECK: movaps  %xmm8, (%rsp)
 ; CHECK: movaps  %xmm7, 16(%rsp)
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index f6ac2ba..d4a74c9 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
 ; Check that lowered argumens do not overwrite the return address before it is moved.
 ; Bug 6225
 ;
diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
index 5accfd7..e0c2c6c 100644
--- a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
+++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mcpu=generic | FileCheck %s
 ; PR6941
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 72c79d2..4d5f8d7 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,22 +3,28 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT: construct_3
-; CHECK-DEFAULT: construct_2
-; CHECK-DEFAULT: construct_1
+; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_1
+; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_2
+; CHECK-DEFAULT: .section        .ctors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_3
 
-; CHECK-DARWIN: construct_1
-; CHECK-DARWIN: construct_2
-; CHECK-DARWIN: construct_3
+; CHECK-DARWIN: .long _construct_1
+; CHECK-DARWIN-NEXT: .long _construct_2
+; CHECK-DARWIN-NEXT: .long _construct_3
 
 @llvm.global_dtors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @destruct_2 }, { i32, void ()* } { i32 1000, void ()* @destruct_1 }, { i32, void ()* } { i32 3000, void ()* @destruct_3 }]
-; CHECK-DEFAULT: destruct_3
-; CHECK-DEFAULT: destruct_2
-; CHECK-DEFAULT: destruct_1
+; CHECK-DEFAULT: .section        .dtors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_1
+; CHECK-DEFAULT: .section        .dtors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_2
+; CHECK-DEFAULT: .section        .dtors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_3
 
-; CHECK-DARWIN: destruct_1
-; CHECK-DARWIN: destruct_2
-; CHECK-DARWIN: destruct_3
+; CHECK-DARWIN:      .long _destruct_1
+; CHECK-DARWIN-NEXT: .long _destruct_2
+; CHECK-DARWIN-NEXT: .long _destruct_3
 
 declare void @construct_1()
 declare void @construct_2()
diff --git a/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll b/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll
deleted file mode 100644
index 095d8c6..0000000
--- a/test/CodeGen/X86/2011-11-09-FoldImpDefs.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; RUN: llc < %s -verify-regalloc | FileCheck %s
-; PR11347
-;
-; This test case materializes the constant 1 in a register:
-;
-; %vreg19<def> = MOV32ri 1
-;
-; Then rematerializes the instruction for a sub-register copy:
-; 1168L   %vreg14:sub_8bit<def,undef> = COPY %vreg19:sub_8bit<kill>, %vreg14<imp-def>; GR32:%vreg14,%vreg19
-;        Considering merging %vreg19 with %vreg14
-;                RHS = %vreg19 = [560d,656L:0)[720L,976d:0)[1088L,1168d:0)  0@560d
-;                LHS = %vreg14 = [16d,160L:0)[160L,256L:2)[256L,1088L:1)[1168d,1184L:3)[1184L,1344L:2)  0@16d-phikill 1@256L-phidef-phikill 2@1184L-phidef-phikill 3@1168d-phikill
-; Remat: %vreg14<def> = MOV32ri 1, %vreg14<imp-def>, %vreg14<imp-def>; GR32:%vreg14
-;
-; This rematerialized constant is feeding a PHI that is spilled, so the constant
-; is written directly to a stack slot that gets the %esi function argument in
-; another basic block:
-;
-; CHECK: %entry
-; CHECK: movl %esi, [[FI:[0-9]+\(%rsp\)]]
-; CHECK: %if.else24
-; CHECK: movl $1, [[FI]]
-; CHECK: %lor.end9
-; CHECK: movl [[FI]],
-;
-; Those <imp-def> operands on the MOV32ri instruction confused the spiller
-; because they were preserved by TII.foldMemoryOperand.  It is quite rare to
-; see a rematerialized instruction spill, it can only happen when it is feeding
-; a PHI.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.7"
-
-@g_193 = external global i32, align 4
-@g_103 = external global i32, align 4
-
-declare i32 @func_21(i16 signext, i32) nounwind uwtable readnone ssp
-
-define i32 @func_25(i32 %p_27, i8 signext %p_28, i32 %p_30) noreturn nounwind uwtable ssp {
-entry:
-  br label %for.cond
-
-for.cond28.for.cond.loopexit_crit_edge:           ; preds = %for.cond28thread-pre-split
-  store i32 0, i32* @g_103, align 4
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.cond28thread-pre-split, %for.cond28.for.cond.loopexit_crit_edge, %entry
-  %l_365.0 = phi i32 [ undef, %entry ], [ %and, %for.cond28.for.cond.loopexit_crit_edge ], [ %and, %for.cond28thread-pre-split ]
-  %l_288.0 = phi i32 [ undef, %entry ], [ %l_288.1.ph, %for.cond28.for.cond.loopexit_crit_edge ], [ %l_288.1.ph, %for.cond28thread-pre-split ]
-  %l_349.0 = phi i32 [ undef, %entry ], [ %xor, %for.cond28.for.cond.loopexit_crit_edge ], [ %xor, %for.cond28thread-pre-split ]
-  %p_28.addr.0 = phi i8 [ %p_28, %entry ], [ %p_28.addr.1.ph, %for.cond28.for.cond.loopexit_crit_edge ], [ %p_28.addr.1.ph, %for.cond28thread-pre-split ]
-  br i1 undef, label %for.cond31, label %lor.end
-
-lor.end:                                          ; preds = %for.cond
-  %tobool3 = icmp eq i32 %l_349.0, 0
-  br i1 %tobool3, label %for.cond31, label %if.then
-
-if.then:                                          ; preds = %lor.end
-  br i1 undef, label %lor.rhs6, label %lor.end9
-
-lor.rhs6:                                         ; preds = %if.then
-  br label %lor.end9
-
-lor.end9:                                         ; preds = %lor.rhs6, %if.then
-  %and = and i32 %l_365.0, 1
-  %conv11 = sext i8 %p_28.addr.0 to i32
-  %xor = xor i32 %and, %conv11
-  br i1 false, label %if.else, label %if.end
-
-if.else:                                          ; preds = %lor.end9
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %lor.end9
-  %l_395.0 = phi i32 [ 0, %if.else ], [ 1, %lor.end9 ]
-  %cmp14 = icmp ne i32 %and, %conv11
-  %conv15 = zext i1 %cmp14 to i32
-  br i1 %cmp14, label %if.then16, label %for.cond28thread-pre-split
-
-if.then16:                                        ; preds = %if.end
-  %or17 = or i32 %l_288.0, 1
-  %call18 = tail call i32 @func_39(i32 0, i32 %or17, i32 0, i32 0) nounwind
-  br i1 undef, label %if.else24, label %if.then20
-
-if.then20:                                        ; preds = %if.then16
-  %conv21 = trunc i32 %l_395.0 to i16
-  %call22 = tail call i32 @func_21(i16 signext %conv21, i32 undef)
-  br label %for.cond28thread-pre-split
-
-if.else24:                                        ; preds = %if.then16
-  store i32 %conv15, i32* @g_193, align 4
-  %conv25 = trunc i32 %l_395.0 to i8
-  br label %for.cond28thread-pre-split
-
-for.cond28thread-pre-split:                       ; preds = %if.else24, %if.then20, %if.end
-  %l_288.1.ph = phi i32 [ %l_288.0, %if.end ], [ %or17, %if.else24 ], [ %or17, %if.then20 ]
-  %p_28.addr.1.ph = phi i8 [ %p_28.addr.0, %if.end ], [ %conv25, %if.else24 ], [ %p_28.addr.0, %if.then20 ]
-  %.pr = load i32* @g_103, align 4
-  %tobool2933 = icmp eq i32 %.pr, 0
-  br i1 %tobool2933, label %for.cond, label %for.cond28.for.cond.loopexit_crit_edge
-
-for.cond31:                                       ; preds = %for.cond31, %lor.end, %for.cond
-  br label %for.cond31
-}
-
-declare i32 @func_39(i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
index d978102..1561784 100644
--- a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
+++ b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -69,3 +69,12 @@ entry:
   %2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
   ret <3 x i64> %2
 }
+
+define void @t5() nounwind {
+entry:
+  %0 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <8 x i64> <i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 0, i64 0, i64 0>, <8 x i64> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 8, i32 5, i32 6, i32 7>
+  store <8 x i64> %1, <8 x i64> addrspace(1)* undef, align 64
+
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
index 2b98b5a..6f9188c 100644
--- a/test/CodeGen/X86/2011-12-15-vec_shift.ll
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -1,12 +1,19 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
 ; Test case for r146671
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7"
 
 define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
-  ; CHECK: psllw $4, [[REG:%xmm.]]
-  ; CHECK-NEXT: movdqa
-  ; CHECK-NEXT: pblendvb [[REG]],{{ %xmm.}}
+  ; Make sure operands to pblend are in the right order.
+  ; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]]
+  ; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}}
+  ; CHECK-W-SSE4: psllw $2
+
+  ; Make sure we're masking and pcmp'ing the VSELECT conditon vector.
+  ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]]
+  ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]]
+  ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]]
   %1 = shl <16 x i8> %a, %b
   ret <16 x i8> %1
 }
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
new file mode 100644
index 0000000..39c213f
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; Make sure we don't load from the location pointed to by %p
+; twice: it has non-obvious performance implications, and
+; the relevant transformation doesn't know how to update
+; the chains correctly.
+; PR10747
+
+; CHECK: test:
+; CHECK: pextrd $2, %xmm
+define <4 x i32> @test(<4 x i32>* %p) {
+  %v = load <4 x i32>* %p
+  %e = extractelement <4 x i32> %v, i32 2
+  %cmp = icmp eq i32 %e, 3
+  %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
+  ret <4 x i32> %sel
+}
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll
new file mode 100644
index 0000000..dbc122a
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin  -mcpu=corei7 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11.2.0"
+
+; CHECK: @foo8
+; CHECK: psll
+; CHECK: psraw
+; CHECK: pblendvb
+; CHECK: ret
+define void @foo8(float* nocapture %RET) nounwind {
+allocas:
+  %resultvec.i = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <8 x i8> <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
+  %uint2float = uitofp <8 x i8> %resultvec.i to <8 x float>
+  %ptr = bitcast float * %RET to <8 x float> *
+  store <8 x float> %uint2float, <8 x float>* %ptr, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
index ceee8e6..e2b3ebc 100644
--- a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
+++ b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
@@ -2,8 +2,8 @@
 
 ; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
 ; CHECK: prom_bug
-; CHECK: movd
 ; CHECK: shufb
+; CHECK: movd
 ; CHECK: movw
 ; CHECK: ret
 define void @prom_bug(<4 x i8> %t, i16* %p) {
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
new file mode 100644
index 0000000..832a8eb
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -disable-fp-elim
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+; This test case has a landing pad with two predecessors, and a variable that
+; is undef on the first edge while carrying the first function return value on
+; the second edge.
+;
+; Live range splitting tries to isolate the block containing the first function
+; call, and it is important that the last split point is after the function call
+; so the return value can spill.
+;
+; <rdar://problem/10664933>
+
+@Exception = external unnamed_addr constant { i8*, i8* }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @f(i32* nocapture %arg, i32* nocapture %arg1, i32* nocapture %arg2, i32* nocapture %arg3, i32 %arg4, i32 %arg5) optsize ssp {
+bb:
+  br i1 undef, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb
+  %tmp = select i1 false, i32 0, i32 undef
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb
+  %tmp8 = phi i32 [ %tmp, %bb6 ], [ 0, %bb ]
+  %tmp9 = shl i32 %tmp8, 2
+  %tmp10 = invoke noalias i8* @_Znam(i32 undef) optsize
+          to label %bb11 unwind label %bb20
+
+bb11:                                             ; preds = %bb7
+  %tmp12 = ptrtoint i8* %tmp10 to i32
+  %tmp13 = bitcast i8* %tmp10 to i32*
+  %tmp14 = shl i32 %tmp8, 2
+  %tmp15 = getelementptr i32* %tmp13, i32 undef
+  %tmp16 = getelementptr i32* %tmp13, i32 undef
+  %tmp17 = zext i32 %tmp9 to i64
+  %tmp18 = add i64 %tmp17, -1
+  %tmp19 = icmp ugt i64 %tmp18, 4294967295
+  br i1 %tmp19, label %bb29, label %bb31
+
+bb20:                                             ; preds = %bb43, %bb41, %bb29, %bb7
+  %tmp21 = phi i32 [ undef, %bb7 ], [ %tmp12, %bb43 ], [ %tmp12, %bb29 ], [ %tmp12, %bb41 ]
+  %tmp22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @Exception to i8*)
+  br i1 undef, label %bb23, label %bb69
+
+bb23:                                             ; preds = %bb38, %bb20
+  %tmp24 = phi i32 [ %tmp12, %bb38 ], [ %tmp21, %bb20 ]
+  %tmp25 = icmp eq i32 %tmp24, 0
+  br i1 %tmp25, label %bb28, label %bb26
+
+bb26:                                             ; preds = %bb23
+  %tmp27 = inttoptr i32 %tmp24 to i8*
+  br label %bb28
+
+bb28:                                             ; preds = %bb26, %bb23
+  ret void
+
+bb29:                                             ; preds = %bb11
+  invoke void @OnOverFlow() optsize
+          to label %bb30 unwind label %bb20
+
+bb30:                                             ; preds = %bb29
+  unreachable
+
+bb31:                                             ; preds = %bb11
+  %tmp32 = bitcast i32* %tmp15 to i8*
+  %tmp33 = zext i32 %tmp8 to i64
+  %tmp34 = add i64 %tmp33, -1
+  %tmp35 = icmp ugt i64 %tmp34, 4294967295
+  %tmp36 = icmp sgt i32 %tmp8, 0
+  %tmp37 = add i32 %tmp9, -4
+  br label %bb38
+
+bb38:                                             ; preds = %bb67, %bb31
+  %tmp39 = phi i32 [ %tmp68, %bb67 ], [ undef, %bb31 ]
+  %tmp40 = icmp sgt i32 %tmp39, undef
+  br i1 %tmp40, label %bb41, label %bb23
+
+bb41:                                             ; preds = %bb38
+  invoke void @Pjii(i32* %tmp16, i32 0, i32 %tmp8) optsize
+          to label %bb42 unwind label %bb20
+
+bb42:                                             ; preds = %bb41
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp35, label %bb43, label %bb45
+
+bb43:                                             ; preds = %bb42
+  invoke void @OnOverFlow() optsize
+          to label %bb44 unwind label %bb20
+
+bb44:                                             ; preds = %bb43
+  unreachable
+
+bb45:                                             ; preds = %bb57, %bb42
+  %tmp46 = phi i32 [ %tmp58, %bb57 ], [ 255, %bb42 ]
+  %tmp47 = icmp slt i32 undef, 0
+  br i1 %tmp47, label %bb48, label %bb59
+
+bb48:                                             ; preds = %bb45
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb49, label %bb57
+
+bb49:                                             ; preds = %bb49, %bb48
+  %tmp50 = phi i32 [ %tmp55, %bb49 ], [ 0, %bb48 ]
+  %tmp51 = add i32 %tmp50, undef
+  %tmp52 = add i32 %tmp50, undef
+  %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
+  %tmp54 = load i32* %tmp53, align 4, !tbaa !0
+  %tmp55 = add i32 %tmp50, 1
+  %tmp56 = icmp eq i32 %tmp55, %tmp8
+  br i1 %tmp56, label %bb57, label %bb49
+
+bb57:                                             ; preds = %bb49, %bb48
+  %tmp58 = add i32 %tmp46, -1
+  br label %bb45
+
+bb59:                                             ; preds = %bb45
+  %tmp60 = ashr i32 %tmp46, 31
+  tail call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 %tmp37, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb61, label %bb67
+
+bb61:                                             ; preds = %bb61, %bb59
+  %tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
+  %tmp63 = add i32 %tmp62, %tmp14
+  %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
+  store i32 0, i32* %tmp64, align 4, !tbaa !0
+  %tmp65 = add i32 %tmp62, 1
+  %tmp66 = icmp eq i32 %tmp65, %tmp8
+  br i1 %tmp66, label %bb67, label %bb61
+
+bb67:                                             ; preds = %bb61, %bb59
+  %tmp68 = add i32 %tmp39, -1
+  br label %bb38
+
+bb69:                                             ; preds = %bb20
+  resume { i8*, i32 } %tmp22
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare noalias i8* @_Znam(i32) optsize
+
+declare void @Pjii(i32*, i32, i32) optsize
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare void @OnOverFlow() noreturn optsize ssp align 2
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
new file mode 100644
index 0000000..6b90072
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+;CHECK: add18i16
+define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
+;CHECK: vmovups
+  %b = load <18 x i16>* %bp, align 16
+  %x = add <18 x i16> zeroinitializer, %b
+  store <18 x i16> %x, <18 x i16>* %ret, align 16
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
new file mode 100644
index 0000000..fa8e80f
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: endless_loop
+define void @endless_loop() {
+entry:
+  %0 = load <8 x i32> addrspace(1)* undef, align 32
+  %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
new file mode 100644
index 0000000..a883d79
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=-sse | FileCheck %s
+; PR11768
+
+@ptr = external global i8*
+
+define void @baz() nounwind ssp {
+entry:
+  %0 = load i8** @ptr, align 4
+  %cmp = icmp eq i8* %0, null
+  fence seq_cst
+  br i1 %cmp, label %if.then, label %if.else
+
+; Make sure the fence comes before the comparison, since it
+; clobbers EFLAGS.
+
+; CHECK: lock
+; CHECK-NEXT: orl {{.*}}, (%esp)
+; CHECK-NEXT: cmpl $0
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @foo to void ()*)() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @bar to void ()*)() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+declare void @bar(...)
diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll
new file mode 100644
index 0000000..8a3ccc8
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s
+
+;CHECK: vcast
+define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
+;CHECK: pshufd
+;CHECK: pshufd
+  %af = bitcast <2 x float> %a to <2 x i32>
+  %bf = bitcast <2 x float> %b to <2 x i32>
+  %x = sub <2 x i32> %af, %bf
+;CHECK: psubq
+  ret <2 x i32> %x
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-02-12-dagco.ll b/test/CodeGen/X86/2012-02-12-dagco.ll
new file mode 100644
index 0000000..13723a2
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-12-dagco.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @dagco_crash() {
+entry:
+  %srcval.i411.i = load <4 x i64>* undef, align 1
+  %0 = extractelement <4 x i64> %srcval.i411.i, i32 3
+  %srcval.i409.i = load <2 x i64>* undef, align 1
+  %1 = extractelement <2 x i64> %srcval.i409.i, i32 0
+  %2 = insertelement <8 x i64> undef, i64 %0, i32 5
+  %3 = insertelement <8 x i64> %2, i64 %1, i32 6
+  %4 = insertelement <8 x i64> %3, i64 undef, i32 7
+  store <8 x i64> %4, <8 x i64> addrspace(1)* undef, align 64
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-02-14-scalar.ll b/test/CodeGen/X86/2012-02-14-scalar.ll
new file mode 100644
index 0000000..1dc076b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-14-scalar.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @autogen_28112_5000() {
+BB:
+  %S17 = icmp sgt <1 x i64> undef, undef
+  %E19 = extractelement <1 x i1> %S17, i32 0
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %S23 = select i1 %E19, i8 undef, i8 undef
+  br label %CF
+}
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
new file mode 100644
index 0000000..3013f16
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse | FileCheck %s
+; PR11940: Do not optimize away movb %al, %ch
+
+%struct.APInt = type { i64* }
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
+entry:
+; CHECK: bug:
+  %call = tail call i8* @calloc(i32 1, i32 32)
+  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %0 = bitcast i8* %call.i to i64*
+  %rem.i = and i32 %rotateAmt, 63
+  %div.i = lshr i32 %rotateAmt, 6
+  %cmp.i = icmp eq i32 %rem.i, 0
+  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
+
+for.cond.preheader.i:                             ; preds = %entry
+  %sub.i = sub i32 4, %div.i
+  %cmp23.i = icmp eq i32 %div.i, 4
+  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
+  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %.pre5.i = load i64** %pVal.i, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
+  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
+  %add.i = add i32 %i.04.i, %div.i
+  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
+  %1 = load i64* %arrayidx.i, align 4
+  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
+  store i64 %1, i64* %arrayidx3.i, align 4
+  %inc.i = add i32 %i.04.i, 1
+  %cmp2.i = icmp ult i32 %inc.i, %sub.i
+  br i1 %cmp2.i, label %for.body.i, label %if.end.i
+
+if.end.i:                                         ; preds = %for.body.i, %entry
+  %cmp81.i = icmp eq i32 %div.i, 3
+  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
+
+for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
+  %sub58.i = sub i32 3, %div.i
+  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %sh_prom.i = zext i32 %rem.i to i64
+  %sub17.i = sub i32 64, %rem.i
+  %sh_prom18.i = zext i32 %sub17.i to i64
+  %.pre.i = load i64** %pVal11.i, align 4
+  br label %for.body9.i
+
+for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
+; CHECK: %for.body9.i
+; CHECK: movb %al, %ch
+  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
+  %add10.i = add i32 %i6.02.i, %div.i
+  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
+  %2 = load i64* %arrayidx12.i, align 4
+  %shr.i = lshr i64 %2, %sh_prom.i
+  %add14.i = add i32 %add10.i, 1
+  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
+  %3 = load i64* %arrayidx16.i, align 4
+  %shl.i = shl i64 %3, %sh_prom18.i
+  %or.i = or i64 %shl.i, %shr.i
+  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
+  store i64 %or.i, i64* %arrayidx19.i, align 4
+  %inc21.i = add i32 %i6.02.i, 1
+  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
+  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
+
+_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
+  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
+  store i64* %0, i64** %4, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
new file mode 100644
index 0000000..a55c77b
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+mmx < %s | FileCheck %s
+; <rdar://problem/10106006>
+
+define void @func() nounwind ssp {
+; CHECK:  psrlw %mm0, %mm1
+entry:
+  call void asm sideeffect "psrlw $0, %mm1", "y,~{dirflag},~{fpsr},~{flags}"(i32 8) nounwind
+  unreachable
+
+bb367:                                            ; preds = %entry                                                                                                                 
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-29-CoalescerBug.ll b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
new file mode 100644
index 0000000..bdce853
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O1 <%s
+; PR12138
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.0"
+
+%struct.S0 = type { i8, i32 }
+
+@d = external global [2 x [2 x %struct.S0]], align 4
+@c = external global i32, align 4
+@e = external global i32, align 4
+@b = external global i32, align 4
+@a = external global i32, align 4
+
+define void @fn2() nounwind optsize ssp {
+entry:
+  store i64 0, i64* bitcast ([2 x [2 x %struct.S0]]* @d to i64*), align 4
+  %0 = load i32* @c, align 4
+  %tobool2 = icmp eq i32 %0, 0
+  %1 = load i32* @a, align 4
+  %tobool4 = icmp eq i32 %1, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end, %entry
+  %f.1.0 = phi i32 [ undef, %entry ], [ %sub, %if.end ]
+  %g.0 = phi i64 [ 0, %entry ], [ %ins, %if.end ]
+  %tobool = icmp eq i32 %f.1.0, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %2 = lshr i64 %g.0, 32
+  %conv = trunc i64 %2 to i16
+  br i1 %tobool2, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %for.body
+  store i32 1, i32* @e, align 4
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %for.body
+  %xor.i = xor i16 %conv, 1
+  %p1.lobit.i8 = lshr i64 %g.0, 47
+  %p1.lobit.i8.tr = trunc i64 %p1.lobit.i8 to i16
+  %p1.lobit.i = and i16 %p1.lobit.i8.tr, 1
+  %and.i = and i16 %p1.lobit.i, %xor.i
+  %3 = xor i16 %and.i, 1
+  %sub.conv.i = sub i16 %conv, %3
+  %conv3 = sext i16 %sub.conv.i to i32
+  store i32 %conv3, i32* @b, align 4
+  br i1 %tobool4, label %if.end, label %for.end
+
+if.end:                                           ; preds = %lor.end
+  %mask = and i64 %g.0, -256
+  %ins = or i64 %mask, 1
+  %sub = add nsw i32 %f.1.0, -1
+  br label %for.cond
+
+for.end:                                          ; preds = %lor.end, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
new file mode 100644
index 0000000..ff6be36
--- /dev/null
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+;CHECK: bad_cast
+define void @bad_cast() {
+entry:
+  %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
+  store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
+;CHECK: ret
+  ret void
+}
+
+
+;CHECK: bad_insert
+define void @bad_insert(i32 %t) {
+entry:
+;CHECK: vpinsrd
+  %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
+  store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/GC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
new file mode 100644
index 0000000..b05ed3c
--- /dev/null
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 5068d29..658ccaa 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 7bf527a..8e871f4 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
 
 ; Some of these tests depend on -join-physregs to commute instructions.
 
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index b514cf6..aaedf18 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
 ; CHECK: foo:
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
new file mode 100644
index 0000000..5942788
--- /dev/null
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
+
+declare void @use_arr(i8*)
+declare void @many_params(i32, i32, i32, i32, i32, i32)
+
+define void @test1() nounwind {
+; atom: test1:
+; atom: leal -1052(%esp), %esp
+; atom-NOT: sub
+; atom: call
+; atom: leal 1052(%esp), %esp
+
+; CHECK: test1:
+; CHECK: subl
+; CHECK: call
+; CHECK-NOT: lea
+  %arr = alloca [1024 x i8], align 16
+  %arr_ptr = getelementptr inbounds [1024 x i8]* %arr, i8 0, i8 0
+  call void @use_arr(i8* %arr_ptr)
+  ret void
+}
+
+define void @test2() nounwind {
+; atom: test2:
+; atom: leal -28(%esp), %esp
+; atom: call
+; atom: leal 28(%esp), %esp
+
+; CHECK: test2:
+; CHECK-NOT: lea
+  call void @many_params(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
+  ret void
+}
+
+define void @test3() nounwind {
+; atom: test3:
+; atom: leal -8(%esp), %esp
+; atom: leal 8(%esp), %esp
+
+; CHECK: test3:
+; CHECK-NOT: lea
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 0, i32* %x, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
new file mode 100644
index 0000000..2301dfc
--- /dev/null
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -0,0 +1,28 @@
+; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
+; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+define void @func() nounwind uwtable {
+; atom: imull
+; atom-NOT: movl
+; atom: imull
+; CHECK: imull
+; CHECK: movl
+; CHECK: imull
+entry:
+  %0 = load i32* @b, align 4
+  %1 = load i32* @c, align 4
+  %mul = mul nsw i32 %0, %1
+  store i32 %mul, i32* @a, align 4
+  %2 = load i32* @e, align 4
+  %3 = load i32* @f, align 4
+  %mul1 = mul nsw i32 %2, %3
+  store i32 %mul1, i32* @d, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 6c0bd58..d0a7fe0 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,7 +18,7 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
-; CHECK: vcvtpd2dqy %ymm
+; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
diff --git a/test/CodeGen/X86/avx-fp2int.ll b/test/CodeGen/X86/avx-fp2int.ll
new file mode 100755
index 0000000..a3aadde
--- /dev/null
+++ b/test/CodeGen/X86/avx-fp2int.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate
+
+; CHECK: test1:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+; CHECK: test2:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+
+define <4 x i8> @test1(<4 x double> %d) {
+  %c = fptoui <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
+define <4 x i8> @test2(<4 x double> %d) {
+  %c = fptosi <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index f583914..616601a 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -369,54 +369,6 @@ define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpeqb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpeqd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpeqw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
-define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpgtb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpgtd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpgtw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vpmaddwd
   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
@@ -950,14 +902,6 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpeqq
-  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   ; CHECK: vphminposuw
   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1271,14 +1215,6 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpgtq
-  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
   ; CHECK: movl
@@ -1830,6 +1766,74 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
+
+define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpeqps
+  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpltps
+  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpleps
+  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunordps
+  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneqps
+  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnltps
+  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnleps
+  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpordps
+  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_uqps
+  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngeps
+  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngtps
+  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalseps
+  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_oqps
+  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgeps
+  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgtps
+  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrueps
+  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_osps
+  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmplt_oqps
+  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmple_oqps
+  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunord_sps
+  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_usps
+  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnlt_uqps
+  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnle_uqps
+  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpord_sps
+  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_usps
+  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnge_uqps
+  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngt_uqps
+  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalse_osps
+  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_osps
+  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpge_oqps
+  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgt_oqps
+  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrue_usps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 
@@ -2481,4 +2485,73 @@ define void @test_x86_avx_vzeroupper() {
 }
 declare void @llvm.x86.avx.vzeroupper() nounwind
 
+; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
+
+; CHECK: monitor
+define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: mwait
+define void @mwait(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
+
+; CHECK: sfence
+define void @sfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse.sfence()
+  ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind
 
+; CHECK: lfence
+define void @lfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.lfence()
+  ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind
+
+; CHECK: mfence
+define void @mfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.mfence()
+  ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind
+
+; CHECK: clflush
+define void @clflush(i8* %p) nounwind {
+entry:
+  tail call void @llvm.x86.sse2.clflush(i8* %p)
+  ret void
+}
+declare void @llvm.x86.sse2.clflush(i8*) nounwind
+
+; CHECK: crc32b
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+; CHECK: crc32w
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+; CHECK: crc32l
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index 07a63ef..c9fc66a 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -25,20 +25,26 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
 
 ;;
 ;; The two tests below check that we must fold load + scalar_to_vector
-;; + ins_subvec+ zext into only a single vmovss or vmovsd
+;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
 
-; CHECK: vmovss (%
+; CHECK: mov00
 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
   %val = load float* %ptr
+; CHECK: vinsertps
+; CHECK: vinsertf128
   %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
   ret <8 x float> %i0
+; CHECK: ret
 }
 
-; CHECK: vmovsd (%
+; CHECK: mov01
 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
   %val = load double* %ptr
+; CHECK: vmovlpd
+; CHECK: vinsertf128
   %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
   ret <4 x double> %i0
+; CHECK: ret
 }
 
 ; CHECK: vmovaps  %ymm
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index f36ba7b..7c58820 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -33,7 +33,7 @@ define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
 }
 
 ; UNSAFE: vmaxpd:
-; UNSAFE: vmaxpd %ymm
+; UNSAFE: vmaxpd {{.+}}, %ymm
 define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
   %max_is_x = fcmp oge <4 x double> %x, %y
   %max = select <4 x i1> %max_is_x, <4 x double> %x, <4 x double> %y
@@ -41,7 +41,7 @@ define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vminpd:
-; UNSAFE: vminpd %ymm
+; UNSAFE: vminpd {{.+}}, %ymm
 define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
   %min_is_x = fcmp ole <4 x double> %x, %y
   %min = select <4 x i1> %min_is_x, <4 x double> %x, <4 x double> %y
@@ -49,7 +49,7 @@ define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vmaxps:
-; UNSAFE: vmaxps %ymm
+; UNSAFE: vmaxps {{.+}}, %ymm
 define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
   %max_is_x = fcmp oge <8 x float> %x, %y
   %max = select <8 x i1> %max_is_x, <8 x float> %x, <8 x float> %y
@@ -57,7 +57,7 @@ define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
 }
 
 ; UNSAFE: vminps:
-; UNSAFE: vminps %ymm
+; UNSAFE: vminps {{.+}}, %ymm
 define <8 x float> @vminps(<8 x float> %x, <8 x float> %y) {
   %min_is_x = fcmp ole <8 x float> %x, %y
   %min = select <8 x i1> %min_is_x, <8 x float> %x, <8 x float> %y
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
new file mode 100755
index 0000000..3713a8c
--- /dev/null
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_8i16_to_8i32
+;CHECK: vpmovsxwd
+
+  %B = sext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_4i32_to_4i64
+;CHECK: vpmovsxdq
+
+  %B = sext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
new file mode 100755
index 0000000..5268ec3
--- /dev/null
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i64> @test1(<4 x i64> %a) nounwind {
+ %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i64>%b
+ ; CHECK: test1:
+ ; CHECK: vinsertf128
+ }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index e9392ae..947d79f 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,15 +6,132 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpshufd
+; CHECK: vpermilps
 }
 
 ; rdar://10538417
 define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
 ; CHECK: test2:
-; CHECK: vxorpd
-; CHECK: vmovsd
+; CHECK: vinsertf128
   %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
   %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
   ret <3 x i64> %2
+; CHECK: ret
 }
+
+define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
+  ret <4 x i64> %c
+; CHECK: test3:
+; CHECK: vperm2f128
+; CHECK: ret
+}
+
+define <8 x float> @test4(float %a) nounwind {
+  %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
+  ret <8 x float> %b
+; CHECK: test4:
+; CHECK: vinsertf128
+}
+
+; rdar://10594409
+define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+; CHECK: test5
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast double* %d to <2 x double>*
+  %1 = load <2 x double>* %0, align 16
+; CHECK: test6
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
+}
+
+define <16 x i16> @test7(<4 x i16> %a) nounwind {
+; CHECK: test7
+  %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: ret
+  ret <16 x i16> %b
+}
+
+; CHECK: test8
+define void @test8() {
+entry:
+  %0 = load <16 x i64> addrspace(1)* null, align 128
+  %1 = shufflevector <16 x i64> <i64 undef, i64 undef, i64 0, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i64> %0, <16 x i32> <i32 17, i32 18, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 26>
+  %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 30, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 22, i32 20, i32 15>
+  store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128
+; CHECK: ret
+  ret void
+}
+
+; Extract a value from a shufflevector..
+define i32 @test9(<4 x i32> %a) nounwind {
+; CHECK: test9
+; CHECK: vpextrd
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %r = extractelement <8 x i32> %b, i32 2
+; CHECK: ret
+  ret i32 %r
+}
+
+; Extract a value which is the result of an undef mask.
+define i32 @test10(<4 x i32> %a) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: #
+; CHECK-NEXT: ret
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %r = extractelement <8 x i32> %b, i32 2
+  ret i32 %r
+}
+
+define <4 x float> @test11(<4 x float> %a) nounwind  {
+; check: test11
+; check: vpermilps $27
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x float> @test12(<4 x float>* %a) nounwind  {
+; CHECK: test12
+; CHECK: vpermilps $27, (
+  %tmp0 = load <4 x float>* %a
+  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @test13(<4 x i32> %a) nounwind  {
+; check: test13
+; check: vpshufd $27
+  %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
+; CHECK: test14
+; CHECK: vpshufd $27, (
+  %tmp0 = load <4 x i32>* %a
+  %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+; CHECK: test15
+; CHECK: vpshufd $8
+; CHECK: ret
+define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
+  %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  ret <4 x i32>%x1
+}
+
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index f8522c2..94bcddd 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -32,7 +32,7 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vshufpd $0
+; CHECK: vpermilpd $0
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
new file mode 100755
index 0000000..d007736
--- /dev/null
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_64_32
+; CHECK: pshufd
+  %B = trunc <4 x i64> %A to <4 x i32>
+  ret <4 x i32>%B
+}
+define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_32_16
+; CHECK: pshufb
+  %B = trunc <8 x i32> %A to <8 x i16>
+  ret <8 x i16>%B
+}
+
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
index fcd7bb6..20f5345 100644
--- a/test/CodeGen/X86/avx-unpack.ll
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -123,3 +123,39 @@ entry:
   %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i64> %shuffle.i
 }
+
+; CHECK: vpunpckhwd
+; CHECK: vpunpckhwd
+; CHECK: vinsertf128
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+; CHECK: vpunpcklwd
+; CHECK: vinsertf128
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+; CHECK: vpunpckhbw
+; CHECK: vinsertf128
+define <32 x i8> @unpackhbw_undef(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+; CHECK: vpunpcklbw
+; CHECK: vinsertf128
+define <32 x i8> @unpacklbw_undef(<32 x i8> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 8fbd02a..5bf9f4f 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -89,3 +89,13 @@ define <4 x i32> @H(<4 x i32> %a) {
   ret <4 x i32> %x
 }
 
+; CHECK: _I
+; CHECK-NOT: vbroadcastsd (%
+; CHECK: ret
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index cda1331..def2212 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -56,3 +56,51 @@ define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
   %2 = add <8 x i32> %1, %v1
   ret <8 x i32> %2
 }
+
+; CHECK: insert_pd
+define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vinsertf128
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+
+; CHECK: insert_undef_pd
+define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+; CHECK: insert_ps
+define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+
+; CHECK: insert_undef_ps
+define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+; CHECK: insert_si
+define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+
+; CHECK: insert_undef_si
+define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 3d521e7..9707cd9 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,7 +45,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK-NOT: vpermilps
+; CHECK: vpermilps
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index 0ccbc59..45883b7 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -7,7 +7,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK: vshufps  $-53, (%
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
 define <8 x float> @A2(<8 x float>* %a, <8 x float>* %b) nounwind uwtable readnone ssp {
 entry:
   %a2 = load <8 x float>* %a
@@ -16,6 +16,22 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: vshufps  $-53, %ymm
+define <8 x i32> @A3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x i32> @A4(<8 x i32>* %a, <8 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x i32>* %a
+  %b2 = load <8 x i32>* %b
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
 ; CHECK: vshufpd  $10, %ymm
 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
@@ -23,7 +39,7 @@ entry:
   ret <4 x double> %shuffle
 }
 
-; CHECK: vshufpd  $10, (%
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
 define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
 entry:
   %a2 = load <4 x double>* %a
@@ -32,6 +48,22 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: vshufpd  $10, %ymm
+define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %b2 = load <4 x i64>* %b
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
 ; CHECK: vshufps  $-53, %ymm
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -59,3 +91,67 @@ entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
   ret <4 x double> %shuffle
 }
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x float> @A128(<4 x float> %a, <4 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x float> @A2128(<4 x float>* %a, <4 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x float>* %a
+  %b2 = load <4 x float>* %b
+  %shuffle = shufflevector <4 x float> %a2, <4 x float> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x i32> @A3128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x i32> @A4128(<4 x i32>* %a, <4 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i32>* %a
+  %b2 = load <4 x i32>* %b
+  %shuffle = shufflevector <4 x i32> %a2, <4 x i32> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x double> @B128(<2 x double> %a, <2 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x double> @B2128(<2 x double>* %a, <2 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x double>* %a
+  %b2 = load <2 x double>* %b
+  %shuffle = shufflevector <2 x double> %a2, <2 x double> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x i64> @B3128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x i64> @B4128(<2 x i64>* %a, <2 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x i64>* %a
+  %b2 = load <2 x i64>* %b
+  %shuffle = shufflevector <2 x i64> %a2, <2 x i64> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
diff --git a/test/CodeGen/X86/avx-win64-args.ll b/test/CodeGen/X86/avx-win64-args.ll
new file mode 100755
index 0000000..85b2634
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64-args.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+declare <8 x float> @foo(<8 x float>, i32)
+
+define <8 x float> @test1(<8 x float> %x, <8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+; CHECK: test1
+; CHECK: leaq {{.*}}, %rcx
+; CHECK: movl {{.*}}, %edx
+; CHECK: call
+; CHECK: ret
+  %x1 = fadd  <8 x float>  %x, %y
+  %call = call  <8 x float> @foo(<8 x float> %x1, i32 1) nounwind
+  %y1 = fsub  <8 x float>  %call, %y
+  ret <8 x float> %y1
+}
+
diff --git a/test/CodeGen/X86/avx-win64.ll b/test/CodeGen/X86/avx-win64.ll
new file mode 100644
index 0000000..dc6bd59
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11862
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win32"
+
+; This function has live ymm registers across a win64 call.
+; The ymm6-15 registers are still call-clobbered even if xmm6-15 are callee-saved.
+; Verify that callee-saved registers are not being used.
+
+; CHECK: f___vyf
+; CHECK: pushq %rbp
+; CHECK: vmovmsk
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: call
+; Two reloads. It's OK if these get folded.
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: blend
+define <8 x float> @f___vyf(<8 x float> %x, <8 x i32> %__mask) nounwind readnone {
+allocas:
+  %bincmp = fcmp oeq <8 x float> %x, zeroinitializer
+  %val_to_boolvec32 = sext <8 x i1> %bincmp to <8 x i32>
+  %"~test" = xor <8 x i32> %val_to_boolvec32, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %"internal_mask&function_mask25" = and <8 x i32> %"~test", %__mask
+  %floatmask.i46 = bitcast <8 x i32> %"internal_mask&function_mask25" to <8 x float>
+  %v.i47 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i46) nounwind readnone
+  %any_mm_cmp27 = icmp eq i32 %v.i47, 0
+  br i1 %any_mm_cmp27, label %safe_if_after_false, label %safe_if_run_false
+
+safe_if_run_false:                                ; preds = %allocas
+  %binop = fadd <8 x float> %x, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  %calltmp = call <8 x float> @f___vyf(<8 x float> %binop, <8 x i32> %"internal_mask&function_mask25")
+  %binop33 = fadd <8 x float> %calltmp, %x
+  %mask_as_float.i48 = bitcast <8 x i32> %"~test" to <8 x float>
+  %blend.i52 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %x, <8 x float> %binop33, <8 x float> %mask_as_float.i48) nounwind
+  br label %safe_if_after_false
+
+safe_if_after_false:                              ; preds = %safe_if_run_false, %allocas
+  %0 = phi <8 x float> [ %x, %allocas ], [ %blend.i52, %safe_if_run_false ]
+  ret <8 x float> %0
+}
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
new file mode 100755
index 0000000..b630e9d
--- /dev/null
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_8i16_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: ret
+
+  %B = zext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_4i32_to_4i64
+;CHECK: vpunpckhdq
+;CHECK: ret
+
+  %B = zext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+;CHECK: zext_8i8_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: vpunpcklwd
+;CHECK: vinsertf128
+;CHECK: ret
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
+}
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index a0f351d..1fb41c0 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -72,54 +72,6 @@ define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) {
 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
 
 
-define <32 x i8> @test_x86_avx2_pcmpeq_b(<32 x i8> %a0, <32 x i8> %a1) {
-  ; CHECK: vpcmpeqb
-  %res = call <32 x i8> @llvm.x86.avx2.pcmpeq.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx2.pcmpeq.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx2_pcmpeq_d(<8 x i32> %a0, <8 x i32> %a1) {
-  ; CHECK: vpcmpeqd
-  %res = call <8 x i32> @llvm.x86.avx2.pcmpeq.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
-  ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pcmpeq.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-
-define <16 x i16> @test_x86_avx2_pcmpeq_w(<16 x i16> %a0, <16 x i16> %a1) {
-  ; CHECK: vpcmpeqw
-  %res = call <16 x i16> @llvm.x86.avx2.pcmpeq.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
-  ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pcmpeq.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-
-define <32 x i8> @test_x86_avx2_pcmpgt_b(<32 x i8> %a0, <32 x i8> %a1) {
-  ; CHECK: vpcmpgtb
-  %res = call <32 x i8> @llvm.x86.avx2.pcmpgt.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx2.pcmpgt.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx2_pcmpgt_d(<8 x i32> %a0, <8 x i32> %a1) {
-  ; CHECK: vpcmpgtd
-  %res = call <8 x i32> @llvm.x86.avx2.pcmpgt.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
-  ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pcmpgt.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-
-define <16 x i16> @test_x86_avx2_pcmpgt_w(<16 x i16> %a0, <16 x i16> %a1) {
-  ; CHECK: vpcmpgtw
-  %res = call <16 x i16> @llvm.x86.avx2.pcmpgt.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
-  ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pcmpgt.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-
 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
   ; CHECK: vpmaddwd
   %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
@@ -553,14 +505,6 @@ define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
 
 
-define <4 x i64> @test_x86_avx2_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1) {
-  ; CHECK: vpcmpeqq
-  %res = call <4 x i64> @llvm.x86.avx2.pcmpeq.q(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pcmpeq.q(<4 x i64>, <4 x i64>) nounwind readnone
-
-
 define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
   ; CHECK: vpmaxsb
   %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
@@ -729,14 +673,6 @@ define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
 
 
-define <4 x i64> @test_x86_avx2_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1) {
-  ; CHECK: vpcmpgtq
-  %res = call <4 x i64> @llvm.x86.avx2.pcmpgt.q(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pcmpgt.q(<4 x i64>, <4 x i64>) nounwind readnone
-
-
 define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
   ; CHECK: vbroadcasti128
   %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
new file mode 100644
index 0000000..0768aae
--- /dev/null
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
+
+define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
+; CHECK: vmovntps
+  %cast = bitcast i8* %B to <8 x float>*
+  %A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
+; CHECK: vmovntdq
+  %cast1 = bitcast i8* %B to <4 x i64>*
+  %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
+  store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: vmovntpd
+  %cast2 = bitcast i8* %B to <4 x double>*
+  %C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
+  store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
new file mode 100644
index 0000000..53b9da3
--- /dev/null
+++ b/test/CodeGen/X86/avx2-palignr.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 undef, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+;
+define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: vpalignr $8
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 10, i32 11, i32 undef, i32 1, i32 14, i32 15, i32 4, i32 5>
+  ret <8 x i32> %C
+}
+
+define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test5:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 undef, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test6:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test7:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %C
+}
+
+define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
+; CHECK: test8:
+; CHECK: palignr $5
+  %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
+  ret <32 x i8> %C
+}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
index b6cf54e..1f192a0 100644
--- a/test/CodeGen/X86/avx2-shift.ll
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -231,7 +231,7 @@ define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
   %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %B
 ; CHECK: sra_v32i8_7:
-; CHECK: vxorps
+; CHECK: vpxor
 ; CHECK: vpcmpgtb
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
index aa97308..6d17443 100644
--- a/test/CodeGen/X86/avx2-unpack.ll
+++ b/test/CodeGen/X86/avx2-unpack.ll
@@ -55,3 +55,32 @@ entry:
   %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
   ret <32 x i8> %shuffle.i
 }
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1_undef(<8 x i32> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1_undef(<4 x i64> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 142be33..fbabb15 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -140,3 +140,13 @@ entry:
   %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
   ret <4 x i64> %q3
 }
+
+; make sure that we still don't support broadcast double into 128-bit vector
+; this used to crash
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
new file mode 100644
index 0000000..3a10c70
--- /dev/null
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+
+
+; In this test we check that sign-extend of the mask bit is performed by
+; shifting the needed bit to the MSB, and not using shl+sra.
+
+;CHECK: vsel_float
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+;CHECK: vsel_4xi8
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
+  ret <4 x i8> %vsel
+}
+
+
+; We do not have native support for v8i16 blends and we have to use the
+; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r
+; reduce the mask in this case.
+;CHECK: vsel_8xi16
+;CHECK: psllw
+;CHECK: psraw
+;CHECK: pblendvb
+;CHECK: ret
+define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
+  ret <8 x i16> %vsel
+}
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index cde9b48..43c47c0 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -1,40 +1,65 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+bmi,+bmi2 | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-       %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
-       ret i32 %tmp
+declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
+  ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: tzcntl
 }
 
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-       %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
-       ret i16 %tmp
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
+  ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: tzcntw
 }
 
-declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
-
-define i64 @t3(i64 %x) nounwind  {
-       %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
-       ret i64 %tmp
+define i32 @t3(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
+  ret i32 %tmp
 ; CHECK: t3:
+; CHECK: tzcntl
+}
+
+define i64 @t4(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
+  ret i64 %tmp
+; CHECK: t4:
 ; CHECK: tzcntq
 }
 
-declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+define i8 @t5(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: t5:
+; CHECK: tzcntl
+}
 
-define i8 @t4(i8 %x) nounwind  {
-       %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
-       ret i8 %tmp
-; CHECK: t4:
+define i16 @t6(i16 %x) nounwind  {
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: t6:
 ; CHECK: tzcntw
 }
 
-declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: t7:
+; CHECK: tzcntl
+}
+
+define i64 @t8(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: t8:
+; CHECK: tzcntq
+}
 
 define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 5cdc100..44670c8 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s
+
 ; rdar://7475489
 
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
@@ -106,3 +107,4 @@ bb2:                                              ; preds = %entry, %bb1
   %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
   ret float %.0
 }
+
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index b060369..2d39901 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep add | not grep 16
+; RUN: llc < %s -mcpu=generic -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
new file mode 100644
index 0000000..7420ce7
--- /dev/null
+++ b/test/CodeGen/X86/cfstring.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; Make sure that the string ends up the the correct section.
+
+; CHECK:        .section __TEXT,__cstring
+; CHECK-NEXT: l_.str3:
+
+; CHECK:        .section  __DATA,__cfstring
+; CHECK-NEXT:   .align  4
+; CHECK-NEXT: L__unnamed_cfstring_4:
+; CHECK-NEXT:   .quad  ___CFConstantStringClassReference
+; CHECK-NEXT:   .long  1992
+; CHECK-NEXT:   .space  4
+; CHECK-NEXT:   .quad  l_.str3
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .space  4
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 9b26efd..763079f 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,48 +1,141 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
-	ret i32 %tmp
-; CHECK: t1:
-; CHECK: bsrl
-; CHECK: cmov
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i8 @cttz_i8(i8 %x)  {
+  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: cttz_i8:
+; CHECK: bsfl
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 
+define i16 @cttz_i16(i16 %x)  {
+  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: cttz_i16:
+; CHECK: bsfw
+; CHECK-NOT: cmov
+; CHECK: ret
+}
 
-define i32 @t2(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
-	ret i32 %tmp
-; CHECK: t2:
+define i32 @cttz_i32(i32 %x)  {
+  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: cttz_i32:
 ; CHECK: bsfl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @cttz_i64(i64 %x)  {
+  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: cttz_i64:
+; CHECK: bsfq
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone 
+define i8 @ctlz_i8(i8 %x) {
+entry:
+  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
+  ret i8 %tmp2
+; CHECK: ctlz_i8:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $7,
+; CHECK: ret
+}
 
-define i16 @t3(i16 %x, i16 %y) nounwind  {
+define i16 @ctlz_i16(i16 %x) {
 entry:
-        %tmp1 = add i16 %x, %y
-	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true )		; <i16> [#uses=1]
-	ret i16 %tmp2
-; CHECK: t3:
+  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+  ret i16 %tmp2
+; CHECK: ctlz_i16:
 ; CHECK: bsrw
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorl $15,
+; CHECK: ret
+}
+
+define i32 @ctlz_i32(i32 %x) {
+  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: ctlz_i32:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+}
+
+define i64 @ctlz_i64(i64 %x) {
+  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: ctlz_i64:
+; CHECK: bsrq
+; CHECK-NOT: cmov
+; CHECK: xorq $63,
+; CHECK: ret
 }
 
-declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone 
+define i32 @ctlz_i32_cmov(i32 %n) {
+entry:
+; Generate a cmov to handle zero inputs when necessary.
+; CHECK: ctlz_i32_cmov:
+; CHECK: bsrl
+; CHECK: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  ret i32 %tmp1
+}
 
+define i32 @ctlz_i32_fold_cmov(i32 %n) {
+entry:
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
-
-define i32 @t4(i32 %n) nounwind {
-entry:
-; CHECK: t4:
+; CHECK: ctlz_i32_fold_cmov:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
+; CHECK: xorl $31,
 ; CHECK: ret
   %or = or i32 %n, 1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true)
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
   ret i32 %tmp1
 }
+
+define i32 @ctlz_bsr(i32 %n) {
+entry:
+; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
+; the most significant bit, which is what 'bsr' does natively.
+; CHECK: ctlz_bsr:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
+
+define i32 @ctlz_bsr_cmov(i32 %n) {
+entry:
+; Same as ctlz_bsr, but ensure this happens even when there is a potential
+; zero.
+; CHECK: ctlz_bsr_cmov:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
diff --git a/test/CodeGen/X86/cmpxchg16b.ll b/test/CodeGen/X86/cmpxchg16b.ll
index ba1c4ef..edbd0bc 100644
--- a/test/CodeGen/X86/cmpxchg16b.ll
+++ b/test/CodeGen/X86/cmpxchg16b.ll
@@ -3,7 +3,7 @@
 ; Basic 128-bit cmpxchg
 define void @t1(i128* nocapture %p) nounwind ssp {
 entry:
-; CHECK movl	$1, %ebx
+; CHECK: movl	$1, %ebx
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg16b
   %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index afe1729..c35935f 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp7
+;CHECK-NEXT: Ltmp5
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dec-eflags-lower.ll b/test/CodeGen/X86/dec-eflags-lower.ll
index 458160a..190819f 100644
--- a/test/CodeGen/X86/dec-eflags-lower.ll
+++ b/test/CodeGen/X86/dec-eflags-lower.ll
@@ -2,6 +2,7 @@
 
 %struct.obj = type { i64 }
 
+; CHECK: _Z7releaseP3obj
 define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
 entry:
 ; CHECK: decq	(%{{rdi|rcx}})
@@ -22,8 +23,45 @@ return:                                           ; preds = %entry, %if.end
   ret void
 }
 
+@c = common global i64 0, align 8
+@a = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+@b = common global i32 0, align 4
+
+; CHECK: test
+define i32 @test() nounwind uwtable ssp {
+entry:
+; CHECK: decq
+; CHECK-NOT: decq
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %dec.i, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+; CHECK: test2
+define i32 @test2() nounwind uwtable ssp {
+entry:
+; CHECK-NOT: decq ({{.*}})
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %0, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
 declare void @free(i8* nocapture) nounwind
 
 !0 = metadata !{metadata !"long", metadata !1}
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/X86/dg.exp b/test/CodeGen/X86/dg.exp
deleted file mode 100644
index 629a147..0000000
--- a/test/CodeGen/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 87c1be5..e577ecb 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index 874c53a..ac5174d 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -6,14 +6,11 @@ entry:
   unreachable
 }
 ; CHECK-NO-FP:     _func:
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_endproc
 
 ; CHECK-FP:      _func:
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_startproc
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: pushq %rbp
@@ -25,5 +22,4 @@ entry:
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
 ; CHECK-FP-NEXT: nop
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 52dcb61..0f16a64 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
+; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index e151821..e4982f0 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 193e436..8ac15cd 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -regalloc=basic -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
 ; PR4684
 
 target datalayout =
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19972f7..b9598bb 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -fast-isel -O0 -mcpu=generic -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
 
 ; This should use flds to set the return value.
 ; CHECK: test0:
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 8391860..c88d529 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -99,7 +99,6 @@ define void @load_store_i1(i1* %p, i1* %q) nounwind {
   ret void
 }
 
-
 @crash_test1x = external global <2 x i32>, align 8
 
 define void @crash_test1() nounwind ssp {
@@ -108,3 +107,13 @@ define void @crash_test1() nounwind ssp {
   ret void
 }
 
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+define i64* @life() nounwind {
+  %a1 = alloca i64*, align 8
+  %a2 = bitcast i64** %a1 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %a2) nounwind      
+  %a3 = load i64** %a1, align 8
+  ret i64* %a3
+}
+
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
index 2ffcb96..81511a3 100644
--- a/test/CodeGen/X86/fltused.ll
+++ b/test/CodeGen/X86/fltused.ll
@@ -4,6 +4,8 @@
 
 ; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
 ; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
 
 @.str = private constant [4 x i8] c"%f\0A\00"
 
diff --git a/test/CodeGen/X86/fltused_function_pointer.ll b/test/CodeGen/X86/fltused_function_pointer.ll
new file mode 100644
index 0000000..cfe484a
--- /dev/null
+++ b/test/CodeGen/X86/fltused_function_pointer.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @foo(i32 (i8*, ...)* %f) nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* %f(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index bd94c13..5ed03ef 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -6,6 +6,20 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %
   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+  ; CHECK: vfmaddss (%{{.*}})
+  %x = load float *%a2
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
+  %x = load float *%a1
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
 declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
 define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
@@ -13,6 +27,20 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double
   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+  ; CHECK: vfmaddsd (%{{.*}})
+  %x = load double *%a2
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
+  %x = load double *%a1
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
 declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
 define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
@@ -20,6 +48,18 @@ define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %
   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+  ; CHECK: vfmaddps (%{{.*}})
+  %x = load <4 x float>* %a2
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
 declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
 define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
@@ -27,6 +67,18 @@ define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double
   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+  ; CHECK: vfmaddpd (%{{.*}})
+  %x = load <2 x double>* %a2
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
 declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
 define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 9f79f77..93baa0e 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,21 +1,77 @@
-; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @t1(i8* %X, i32 %i) {
+; CHECK: t1:
+; CHECK-NOT: and
+; CHECK: movzbl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
-	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
-	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %tmp2 = shl i32 %i, 2
+  %tmp4 = and i32 %tmp2, 1020
+  %tmp7 = getelementptr i8* %X, i32 %tmp4
+  %tmp78 = bitcast i8* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
 }
 
 define i32 @t2(i16* %X, i32 %i) {
+; CHECK: t2:
+; CHECK-NOT: and
+; CHECK: movzwl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %tmp2 = shl i32 %i, 1
+  %tmp4 = and i32 %tmp2, 131070
+  %tmp7 = getelementptr i16* %X, i32 %tmp4
+  %tmp78 = bitcast i16* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
+}
+
+define i32 @t3(i16* %i.ptr, i32* %arr) {
+; This case is tricky. The lshr followed by a gep will produce a lshr followed
+; by an and to remove the low bits. This can be simplified by doing the lshr by
+; a greater constant and using the addressing mode to scale the result back up.
+; To make matters worse, because of the two-phase zext of %i and their reuse in
+; the function, the DAG can get confusing trying to re-use both of them and
+; prevent easy analysis of the mask in order to match this.
+; CHECK: t3:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %val.ptr = getelementptr inbounds i32* %arr, i32 %index
+  %val = load i32* %val.ptr
+  %sum = add i32 %val, %i.zext
+  ret i32 %sum
+}
+
+define i32 @t4(i16* %i.ptr, i32* %arr) {
+; A version of @t3 that has more zero extends and more re-use of intermediate
+; values. This exercise slightly different bits of canonicalization.
+; CHECK: t4:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
-	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
-	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %sum.1 = add i32 %val, %i.zext
+  %sum.2 = add i32 %sum.1, %index
+  ret i32 %sum.2
 }
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 5525af2..e03cb7e 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index a7b3332..9cf4607 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -8,10 +8,7 @@
 ; RAGreedy defeats the test by splitting live ranges.
 
 ; Constant pool all-ones vector:
-; CHECK: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
+; CHECK: .space 16,255
 
 ; No pcmpeqd instructions, everybody uses the constant pool.
 ; CHECK: program_1:
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
new file mode 100644
index 0000000..d89e9dc
--- /dev/null
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-apple-darwin %s -o - | FileCheck %s
+@_ZTIi = external constant i8*
+
+define i32 @main() uwtable optsize ssp {
+entry:
+  invoke void @_Z1fv() optsize
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  br label %eh.resume
+
+try.cont:
+  ret i32 0
+
+eh.resume:
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z1fv() optsize
+
+declare i32 @__gxx_personality_v0(...)
+
+; CHECK: Leh_func_end0:
+; CHECK: GCC_except_table0
+; CHECK: = Leh_func_end0-
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index c9a1c1c..2249618 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin | FileCheck %s
 
 ; There should be no stack manipulations between the inline asm and ret.
 ; CHECK: test1
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index 617bd39..fca68ba 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -mattr=+avx
 ; rdar://7066579
 
 	%0 = type { i64, i64, i64, i64, i64 }		; type %0
@@ -27,3 +27,11 @@ entry:
   %0 = tail call { i8, i8, i8, i8, i8 } asm "foo $1, $2, $3, $4, $1\0Axchgb ${0:b}, ${0:h}", "=q,={ax},={bx},={cx},={dx},0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i8 %val, i8 %a, i8 %b, i8 %c, i8 %d) nounwind
   ret void
 }
+
+; rdar://10614894
+define <8 x float> @test5(<8 x float> %a, <8 x float> %b) nounwind {
+entry:
+  %0 = tail call <8 x float> asm "vperm2f128 $3, $2, $1, $0", "=x,x,x,i,~{dirflag},~{fpsr},~{flags}"(<8 x float> %a, <8 x float> %b, i32 16) nounwind
+  ret <8 x float> %0
+}
+
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index de6500d..91576fb 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic | FileCheck %s
 ; rdar://6992609
 
 ; CHECK: movl [[EDX:%e..]], 4(%esp)
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 7f2bd75..e51e61d 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 -enable-lsr-nested -o %t
 ; RUN: not grep inc %t
 ; RUN: grep dec %t | count 2
 ; RUN: grep addq %t | count 12
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 5e8e162..dbd133c 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 | grep jns
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
 
 define i32 @f(i32 %X) {
 entry:
+; CHECK: f:
+; CHECK: jns
 	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
 	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tmp, label %cond_true, label %cond_next
@@ -18,3 +20,15 @@ cond_next:		; preds = %cond_true, %entry
 declare i32 @bar(...)
 
 declare i32 @baz(...)
+
+; rdar://10633221
+define i32 @g(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: g:
+; CHECK-NOT: test
+; CHECK: cmovs
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, 0
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
new file mode 100644
index 0000000..2026472
--- /dev/null
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -0,0 +1,56 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i64 @test1(i32 %xx, i32 %test) nounwind {
+  %conv = zext i32 %xx to i64
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %conv, %sh_prom
+  ret i64 %shl
+; CHECK: test1:
+; CHECK: shll	%cl, %eax
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test2(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %xx, %sh_prom
+  ret i64 %shl
+; CHECK: test2:
+; CHECK: shll	%cl, %esi
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+; CHECK: orl	%esi, %edx
+; CHECK: shll	%cl, %eax
+}
+
+define i64 @test3(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = lshr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test3:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test4(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = ashr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test4:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: sarl	%cl, %edx
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
new file mode 100644
index 0000000..b05ed3c
--- /dev/null
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/log2_not_readnone.ll b/test/CodeGen/X86/log2_not_readnone.ll
new file mode 100644
index 0000000..5620835
--- /dev/null
+++ b/test/CodeGen/X86/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=i386-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: calll log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: calll exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/X86/lzcnt.ll b/test/CodeGen/X86/lzcnt.ll
index adfc38b..2faa24a 100644
--- a/test/CodeGen/X86/lzcnt.ll
+++ b/test/CodeGen/X86/lzcnt.ll
@@ -1,14 +1,17 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+lzcnt | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
-	ret i32 %tmp
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
+	ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: lzcntl
 }
 
-declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
 	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
@@ -16,23 +19,44 @@ define i16 @t2(i16 %x) nounwind  {
 ; CHECK: lzcntw
 }
 
-declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+define i32 @t3(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
+	ret i32 %tmp
+; CHECK: t3:
+; CHECK: lzcntl
+}
 
-define i64 @t3(i64 %x) nounwind  {
+define i64 @t4(i64 %x) nounwind  {
 	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
-; CHECK: t3:
+; CHECK: t4:
 ; CHECK: lzcntq
 }
 
-declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
-
-define i8 @t4(i8 %x) nounwind  {
-	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
+define i8 @t5(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true )
 	ret i8 %tmp
-; CHECK: t4:
+; CHECK: t5:
+; CHECK: lzcntl
+}
+
+define i16 @t6(i16 %x) nounwind  {
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+	ret i16 %tmp
+; CHECK: t6:
 ; CHECK: lzcntw
 }
 
-declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+	ret i32 %tmp
+; CHECK: t7:
+; CHECK: lzcntl
+}
 
+define i64 @t8(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+	ret i64 %tmp
+; CHECK: t8:
+; CHECK: lzcntq
+}
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
new file mode 100644
index 0000000..54fa01c
--- /dev/null
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
+
+; After tail duplication, two copies in an early exit BB can be cancelled out.
+; rdar://10640363
+define i32 @t1(i32 %a, i32 %b) nounwind  {
+entry:
+; CHECK: t1:
+; CHECK: jne
+  %cmp1 = icmp eq i32 %b, 0
+  br i1 %cmp1, label %while.end, label %while.body
+
+; CHECK: BB
+; CHECK-NOT: mov
+; CHECK: ret
+
+while.body:                                       ; preds = %entry, %while.body
+  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
+  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
+  %rem = srem i32 %a.addr.03, %b.addr.02
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
+  ret i32 %a.addr.0.lcssa
+}
+
+; Two movdqa (from phi-elimination) in the entry BB cancels out.
+; rdar://10428165
+define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK-NOT: movdqa
+  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+  ret <8 x i16> %tmp8
+}
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index d819fc8..a757cde 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
 ; rdar://7610418
 
 %ptr = type { i8* }
@@ -77,3 +77,25 @@ bb.nph743.us:                                     ; preds = %for.body53.us, %if.
 sw.bb307:                                         ; preds = %sw.bb, %entry
   ret void
 }
+
+; CSE physical register defining instruction across MBB boundary.
+; rdar://10660865
+define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: cross_mbb_phys_cse:
+; CHECK: cmpl
+; CHECK: ja
+  %cmp = icmp ugt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+; CHECK-NOT: cmpl
+; CHECK: sbbl
+  %cmp1 = icmp ult i32 %a, %b
+  %. = sext i1 %cmp1 to i32
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 3a4acb8..a7b036e 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
diff --git a/test/CodeGen/X86/mcinst-avx-lowering.ll b/test/CodeGen/X86/mcinst-avx-lowering.ll
new file mode 100644
index 0000000..41f96e8
--- /dev/null
+++ b/test/CodeGen/X86/mcinst-avx-lowering.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10 -mattr=avx -show-mc-encoding < %s | FileCheck %s
+
+define i64 @t1(double %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t1
+  %0 = bitcast double %d_ivar to i64
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+  ret i64 %0
+}
+
+define double @t2(i64 %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t2
+  %0 = bitcast i64 %d_ivar to double
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+  ret double %0
+}
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index f43b0bf..86c6862 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -79,3 +79,16 @@ entry:
 ; LINUX movq
 }
 
+
+@.str = private unnamed_addr constant [30 x i8] c"\00aaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+define void @test5(i8* nocapture %C) nounwind uwtable ssp {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
+  ret void
+
+; DARWIN: movabsq	$7016996765293437281
+; DARWIN: movabsq	$7016996765293437184
+}
+
+
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
index 3ac0e4e..8b7200d 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index c3f412e..92850f2 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -486,10 +486,6 @@ declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32,
 
 declare i8* @_Znwm(i32)
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
 
 declare void @_ZdlPv(i8*) nounwind
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
new file mode 100644
index 0000000..7c0e82f
--- /dev/null
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -0,0 +1,11 @@
+; Check the MCNullStreamer operates correctly, at least on a minimal test case.
+;
+; RUN: llc -filetype=null -o %t -march=x86 %s
+
+define void @f0()  {
+  ret void
+}
+
+define void @f1() {
+  ret void
+}
diff --git a/test/CodeGen/X86/objc-gc-module-flags.ll b/test/CodeGen/X86/objc-gc-module-flags.ll
new file mode 100644
index 0000000..8cb2c03
--- /dev/null
+++ b/test/CodeGen/X86/objc-gc-module-flags.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHECK:        .section  __DATA,__objc_imageinfo,regular,no_dead_strip
+; CHECK-NEXT: L_OBJC_IMAGE_INFO:
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .long  2
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 1, metadata !"Objective-C Garbage Collection", i32 2}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 082d20c..8f1eabd 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=basic < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
 
 ; ModuleID = 'ts.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/CodeGen/X86/odr_comdat.ll b/test/CodeGen/X86/odr_comdat.ll
new file mode 100644
index 0000000..547334c
--- /dev/null
+++ b/test/CodeGen/X86/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=X86LINUX
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; X86LINUX:   .section        .bss._ZN1CIiE1iE,"aGw",@nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; X86LINUX:   .section        .data._ZN1CIiE1jE,"aGw",@progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index e42aa9d..d092916 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
index 528c4bc..a379980 100644
--- a/test/CodeGen/X86/peep-test-3.ll
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -post-RA-scheduler=false | FileCheck %s
 ; rdar://7226797
 
 ; LLVM should omit the testl and use the flags result from the orl.
diff --git a/test/CodeGen/X86/personality_size.ll b/test/CodeGen/X86/personality_size.ll
new file mode 100644
index 0000000..30a5d39
--- /dev/null
+++ b/test/CodeGen/X86/personality_size.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=x86_64-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=i386-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  ret void
+
+return:                                           ; preds = %eh_then, %entry
+  ret void
+}
+
+declare void @_Z1gv()
+
+declare i32 @__gxx_personality_v0(...)
+
+; X64:      .size	DW.ref.__gxx_personality_v0, 8
+; X64:      .quad	__gxx_personality_v0
+
+; X32:      .size	DW.ref.__gxx_personality_v0, 4
+; X32:      .long	__gxx_personality_v0
+
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index fb60ac2..fc06309 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
 
 @ptr = external global i32* 
 @dst = external global i32 
diff --git a/test/CodeGen/X86/pr11202.ll b/test/CodeGen/X86/pr11202.ll
index 2b26a69..13070d1 100644
--- a/test/CodeGen/X86/pr11202.ll
+++ b/test/CodeGen/X86/pr11202.ll
@@ -15,5 +15,5 @@ l2:                                               ; preds = %l1
   br label %l1
 }
 
-; CHECK: .Ltmp1:                                 # Address of block that was removed by CodeGen
-; CHECK: .quad	.Ltmp1
+; CHECK: .Ltmp0:                                 # Address of block that was removed by CodeGen
+; CHECK: .quad	.Ltmp0
diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll
index b8964f2..8b30dc7 100644
--- a/test/CodeGen/X86/promote.ll
+++ b/test/CodeGen/X86/promote.ll
@@ -29,3 +29,14 @@ entry:
   ret i32 0
 ; CHECK: ret
 }
+
+; CHECK: bitcast_widen
+define <2 x float> @bitcast_widen(<4 x i32> %in) nounwind readnone {
+entry:
+; CHECK-NOT: pshufd
+ %x = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %y = bitcast <2 x i32> %x to <2 x float>
+ ret <2 x float> %y
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index d936971..d99a7a4 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 
 ; First without noredzone.
 ; CHECK: f0:
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index 9557d17..f092163 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/reghinting.ll b/test/CodeGen/X86/reghinting.ll
index 87f65ed..6759115 100644
--- a/test/CodeGen/X86/reghinting.ll
+++ b/test/CodeGen/X86/reghinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-macosx | FileCheck %s
 ; PR10221
 
 ;; The registers %x and %y must both spill across the finit call.
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
new file mode 100644
index 0000000..5ce08aa
--- /dev/null
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define i32 @test_basic(i32 %l) {
+        %mem = alloca i32, i32 %l
+        call void @dummy_use (i32* %mem, i32 %l)
+        %terminate = icmp eq i32 %l, 0
+        br i1 %terminate, label %true, label %false
+
+true:
+        ret i32 0
+
+false:
+        %newlen = sub i32 %l, 1
+        %retvalue = call i32 @test_basic(i32 %newlen)
+        ret i32 %retvalue
+
+; X32:      test_basic:
+
+; X32:      cmpl %gs:48, %esp
+; X32-NEXT: ja      .LBB0_2
+
+; X32:      pushl $4
+; X32-NEXT: pushl $12
+; X32-NEXT: calll __morestack
+; X32-NEXT: ret
+
+; X32:      movl %esp, %eax
+; X32-NEXT: subl %ecx, %eax
+; X32-NEXT: cmpl %eax, %gs:48
+
+; X32:      movl %eax, %esp
+
+; X32:      subl $12, %esp
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll __morestack_allocate_stack_space
+; X32-NEXT: addl $16, %esp
+
+; X64:      test_basic:
+
+; X64:      cmpq %fs:112, %rsp
+; X64-NEXT: ja      .LBB0_2
+
+; X64:      movabsq $24, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+
+; X64:      movq %rsp, %rdi
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: cmpq %rdi, %fs:112
+
+; X64:      movq %rdi, %rsp
+
+; X64:      movq %rax, %rdi
+; X64-NEXT: callq __morestack_allocate_stack_space
+; X64-NEXT: movq %rax, %rdi
+
+}
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 4f529c1..5407b87 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,61 +1,97 @@
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
+
+; We used to crash with filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj
+
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-MinGW
+; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD
+
+; X64-Solaris: Segmented stacks not supported on this platform
+; X64-MinGW: Segmented stacks not supported on this platform
+; X32-FreeBSD: Segmented stacks not supported on FreeBSD i386
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
-        %mem = alloca i32, i32 %l
-        call void @dummy_use (i32* %mem, i32 %l)
-        %terminate = icmp eq i32 %l, 0
-        br i1 %terminate, label %true, label %false
+define void @test_basic() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+	ret void
+
+; X32-Linux:       test_basic:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB0_2
 
-true:
-        ret i32 0
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
 
-false:
-        %newlen = sub i32 %l, 1
-        %retvalue = call i32 @test_basic(i32 %newlen)
-        ret i32 %retvalue
+; X64-Linux:       test_basic:
 
-; X32:      test_basic:
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB0_2
 
-; X32:      cmpl %gs:48, %esp
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
 
-; X32:      pushl $4
-; X32-NEXT: pushl $12
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret 
+; X32-Darwin:      test_basic:
 
-; X32:      movl %esp, %eax
-; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: cmpl %eax, %gs:48
+; X32-Darwin:      movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja      LBB0_2
 
-; X32:      movl %eax, %esp
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
-; X32:      subl $12, %esp
-; X32-NEXT: pushl %ecx
-; X32-NEXT: calll __morestack_allocate_stack_space
-; X32-NEXT: addl $16, %esp
+; X64-Darwin:      test_basic:
 
-; X64:      test_basic:
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB0_2
 
-; X64:      cmpq %fs:112, %rsp
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
 
-; X64:      movabsq $24, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X32-MinGW:       test_basic:
 
-; X64:      movq %rsp, %rdi
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: cmpq %rdi, %fs:112
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB0_2
 
-; X64:      movq %rdi, %rsp
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
 
-; X64:      movq %rax, %rdi
-; X64-NEXT: callq __morestack_allocate_stack_space
-; X64-NEXT: movq %rax, %rdi
+; X64-FreeBSD:       test_basic:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB0_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
 
 }
 
@@ -64,21 +100,60 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
        %result = add i32 %other, %addend
        ret i32 %result
 
-; X32:      cmpl %gs:48, %esp
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB1_2
+
+; X32-Linux:       pushl $4
+; X32-Linux-NEXT:  pushl $0
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB1_2
+
+; X64-Linux:       movq %r10, %rax
+; X64-Linux-NEXT:  movabsq $0, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+; X64-Linux-NEXT:  movq %rax, %r10
 
-; X32:      pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin:      movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja      LBB1_2
 
-; X64:      cmpq %fs:112, %rsp
+; X32-Darwin:      pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
-; X64:      movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64-NEXT: movq %rax, %r10
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB1_2
+
+; X64-Darwin:      movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB1_2
+
+; X32-MinGW:       pushl $4
+; X32-MinGW-NEXT:  pushl $0
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB1_2
+
+; X64-FreeBSD:       movq %r10, %rax
+; X64-FreeBSD-NEXT:  movabsq $0, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+; X64-FreeBSD-NEXT:  movq %rax, %r10
 
 }
 
@@ -87,20 +162,224 @@ define void @test_large() {
         call void @dummy_use (i32* %mem, i32 0)
         ret void
 
-; X32:      leal -40012(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
+; X32-Linux:       leal -40012(%esp), %ecx
+; X32-Linux-NEXT:  cmpl %gs:48, %ecx
+; X32-Linux-NEXT:  ja      .LBB2_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB2_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja      LBB2_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB2_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       leal -40008(%esp), %ecx
+; X32-MinGW-NEXT:  cmpl %fs:20, %ecx
+; X32-MinGW-NEXT:  ja      LBB2_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB2_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+        ret void
+
+; X32-Linux:       test_fastcc:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB3_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc:
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB3_2
+
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc:
+
+; X32-Darwin:      movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja      LBB3_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc:
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB3_2
+
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc:
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB3_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB3_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       test_fastcc_large:
+
+; X32-Linux:       leal -40012(%esp), %eax
+; X32-Linux-NEXT:  cmpl %gs:48, %eax
+; X32-Linux-NEXT:  ja      .LBB4_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc_large:
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB4_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc_large:
+
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja      LBB4_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc_large:
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB4_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc_large:
+
+; X32-MinGW:       leal -40008(%esp), %eax
+; X32-MinGW-NEXT:  cmpl %fs:20, %eax
+; X32-MinGW-NEXT:  ja      LBB4_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc_large:
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB4_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 %a)
+        ret void
+
+; This is testing that the Mac implementation preserves ecx
 
-; X32:      pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin:      test_fastcc_large_with_ecx_arg:
 
-; X64:      leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja      LBB5_2
 
-; X64:      movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
 }
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index fd278c2..b747cc5 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86    | grep and | count 2
 ; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
@@ -7,9 +7,15 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
        ret i32 %res
 }
 
+define i32 @t2(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 63
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
 @X = internal global i16 0
 
-define void @t2(i16 %t) nounwind {
+define void @t3(i16 %t) nounwind {
        %shamt = and i16 %t, 31
        %tmp = load i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -17,8 +23,14 @@ define void @t2(i16 %t) nounwind {
        ret void
 }
 
-define i64 @t3(i64 %t, i64 %val) nounwind {
+define i64 @t4(i64 %t, i64 %val) nounwind {
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
+
+define i64 @t5(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 191
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index e443ac1..51f8303 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s | not grep shrl
+; RUN: llc -march=x86 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+@array = weak global [4 x i32] zeroinitializer
+
+define i32 @test_lshr_and(i32 %x) {
+; CHECK: test_lshr_and:
+; CHECK-NOT: shrl
+; CHECK: andl $12,
+; CHECK: movl {{.*}}array{{.*}},
+; CHECK: ret
 
-define i32 @foo(i32 %x) {
 entry:
-	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
-	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
-	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
-	ret i32 %tmp5
+  %tmp2 = lshr i32 %x, 2
+  %tmp3 = and i32 %tmp2, 3
+  %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3
+  %tmp5 = load i32* %tmp4, align 4
+  ret i32 %tmp5
 }
 
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d9c3061..3ea6011 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,28 +1,70 @@
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep {s\[ah\]\[rl\]l} | count 1
-
-define i32* @test1(i32* %P, i32 %X) nounwind {
-        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
-        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
-        ret i32* %P2
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @test1(i32* %P, i32 %X) {
+; CHECK: test1:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = lshr i32 %X, 2
+  %gep.upgrd.1 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.1
+  ret i32* %P2
 }
 
-define i32* @test2(i32* %P, i32 %X) nounwind {
-        %Y = shl i32 %X, 2              ; <i32> [#uses=1]
-        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test2(i32* %P, i32 %X) {
+; CHECK: test2:
+; CHECK: shll $4
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = shl i32 %X, 2
+  %gep.upgrd.2 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.2
+  ret i32* %P2
 }
 
-define i32* @test3(i32* %P, i32 %X) nounwind {
-        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
-        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test3(i32* %P, i32 %X) {
+; CHECK: test3:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = ashr i32 %X, 2
+  %P2 = getelementptr i32* %P, i32 %Y
+  ret i32* %P2
 }
 
-define fastcc i32 @test4(i32* %d) nounwind {
+define fastcc i32 @test4(i32* %d) {
+; CHECK: test4:
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
   %tmp4 = load i32* %d
   %tmp512 = lshr i32 %tmp4, 24
   ret i32 %tmp512
 }
+
+define i64 @test5(i16 %i, i32* %arr) {
+; Ensure that we don't fold away shifts which have multiple uses, as they are
+; just re-introduced for the second use.
+; CHECK: test5:
+; CHECK-NOT: shrl
+; CHECK: shrl $11
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %val.zext = zext i32 %val to i64
+  %sum = add i64 %val.zext, %index.zext
+  ret i64 %sum
+}
diff --git a/test/CodeGen/X86/shl-i64.ll b/test/CodeGen/X86/shl-i64.ll
new file mode 100644
index 0000000..f00058a
--- /dev/null
+++ b/test/CodeGen/X86/shl-i64.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+sse2 < %s | FileCheck %s
+
+; Make sure that we don't generate an illegal i64 extract after LegalizeType.
+; CHECK: shll
+
+
+define void @test_cl(<4 x i64>*  %dst, <4 x i64>* %src, i32 %idx) {
+entry:
+  %arrayidx = getelementptr inbounds <4 x i64> * %src, i32 %idx
+  %0 = load <4 x i64> * %arrayidx, align 32
+  %arrayidx1 = getelementptr inbounds <4 x i64> * %dst, i32 %idx
+  %1 = load <4 x i64> * %arrayidx1, align 32
+  %2 = extractelement <4 x i64> %1, i32 0
+  %and = and i64 %2, 63
+  %3 = insertelement <4 x i64> undef, i64 %and, i32 0    
+  %splat = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i64> %0, %splat
+  store <4 x i64> %shl, <4 x i64> * %arrayidx1, align 32
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index 9d74121..937817e 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse3 | FileCheck %s --check-prefix=X64_BAD
 
 ; Sibcall optimization of expanded libcalls.
 ; rdar://8707777
@@ -29,3 +30,31 @@ entry:
 declare float @sinf(float) nounwind readonly
 
 declare double @sin(double) nounwind readonly
+
+; rdar://10930395
+%0 = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_2" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+define hidden { double, double } @foo2(%0* %self, i8* nocapture %_cmd) uwtable optsize ssp {
+; X64_BAD: foo
+; X64_BAD: call
+; X64_BAD: call
+; X64_BAD: call
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
+  %2 = bitcast %0* %self to i8*
+  %3 = tail call { double, double } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to { double, double } (i8*, i8*)*)(i8* %2, i8* %1) optsize
+  %4 = extractvalue { double, double } %3, 0
+  %5 = extractvalue { double, double } %3, 1
+  %6 = tail call double @floor(double %4) optsize
+  %7 = tail call double @floor(double %5) optsize
+  %insert.i.i = insertvalue { double, double } undef, double %6, 0
+  %insert5.i.i = insertvalue { double, double } %insert.i.i, double %7, 1
+  ret { double, double } %insert5.i.i
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare double @floor(double) optsize
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
deleted file mode 100644
index b945530..0000000
--- a/test/CodeGen/X86/sret.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep ret | grep 4
-
-	%struct.foo = type { [4 x i32] }
-
-define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
-entry:
-	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
-	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
-	store i32 1, i32* %tmp3, align 8
-        ret void
-}
-
-@dst = external global i32
-
-define void @foo() nounwind {
-	%memtmp = alloca %struct.foo, align 4
-        call void @bar( %struct.foo* sret %memtmp ) nounwind
-        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
-	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
-        %tmp6 = load i32* %tmp5
-        store i32 %tmp6, i32* @dst
-        ret void
-}
diff --git a/test/CodeGen/X86/stack-align2.ll b/test/CodeGen/X86/stack-align2.ll
new file mode 100644
index 0000000..18cce72
--- /dev/null
+++ b/test/CodeGen/X86/stack-align2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i386-linux | FileCheck %s -check-prefix=LINUX-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64
+
+define i32 @test() nounwind {
+entry:
+  call void @test2()
+  ret i32 0
+
+; LINUX-I386:     subl	$12, %esp
+; DARWIN-I386:    subl	$12, %esp
+; NETBSD-I386-NOT: subl	{{.*}}, %esp
+
+; LINUX-X86_64:      pushq %{{.*}}
+; LINUX-X86_64-NOT:  subq	{{.*}}, %rsp
+; DARWIN-X86_64:     pushq %{{.*}}
+; DARWIN-X86_64-NOT: subq	{{.*}}, %rsp
+; NETBSD-X86_64:     pushq %{{.*}}
+; NETBSD-X86_64-NOT: subq	{{.*}}, %rsp
+}
+
+declare void @test2()
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 1251a24..81de22c 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86            | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86            | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     lea
 
 @B = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 4522e91..749b5db 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
index c5a105c..c68a8c6 100644
--- a/test/CodeGen/X86/tail-dup-addr.ll
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -2,8 +2,8 @@
 
 ; Test that we don't drop a block that has its address taken.
 
+; CHECK: Ltmp0:                                  ## Block address taken
 ; CHECK: Ltmp1:                                  ## Block address taken
-; CHECK: Ltmp2:                                  ## Block address taken
 
 @a = common global i32 0, align 4
 @p = common global i8* null, align 8
diff --git a/test/CodeGen/X86/tailcall-disable.ll b/test/CodeGen/X86/tailcall-disable.ll
new file mode 100644
index 0000000..b628f5e
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-disable.ll
@@ -0,0 +1,40 @@
+; RUN: llc -disable-tail-calls < %s | FileCheck --check-prefix=CALL %s
+; RUN: llc < %s | FileCheck --check-prefix=JMP %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @helper() nounwind {
+entry:
+  ret i32 7
+}
+
+define i32 @test1() nounwind {
+entry:
+  %call = tail call i32 @helper()
+  ret i32 %call
+}
+
+; CALL: test1:
+; CALL-NOT: ret
+; CALL: callq helper
+; CALL: ret
+
+; JMP: test1:
+; JMP-NOT: ret
+; JMP: jmp helper # TAILCALL
+
+define i32 @test2() nounwind {
+entry:
+  %call = tail call i32 @test2()
+  ret i32 %call
+}
+
+; CALL: test2:
+; CALL-NOT: ret
+; CALL: callq test2
+; CALL: ret
+
+; JMP: test2:
+; JMP-NOT: ret
+; JMP: jmp test2 # TAILCALL
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7ecf379..7621602 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
 
 ; FIXME: Win64 does not support byval.
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c18c7aa..bff5f99 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
 ; CHECK: subq  ${{24|72|80}}, %rsp
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
new file mode 100644
index 0000000..a7be483
--- /dev/null
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
+
+%class.C = type { i8 }
+%struct.S = type { i32 }
+%struct.M = type { i32, i32 }
+
+declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
+declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
+declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
+
+define void @testv() nounwind {
+; CHECK: testv:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C5SmallEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.S, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
+  ret void
+}
+
+define void @test2v() nounwind {
+; CHECK: test2v:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C6MediumEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.M, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX/EDX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
+  ret void
+}
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
index 0cae5c4..f39658e 100644
--- a/test/CodeGen/X86/tls1.ll
+++ b/test/CodeGen/X86/tls1.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i32 15
 
@@ -10,3 +10,11 @@ entry:
 	%tmp1 = load i32* @i
 	ret i32 %tmp1
 }
+; X32_LINUX: movl %gs:i@NTPOFF, %eax
+; X64_LINUX: movl %fs:i@TPOFF, %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movabsq $i@SECREL, %rcx
+
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
index 514a168..cc14826 100644
--- a/test/CodeGen/X86/tls11.ll
+++ b/test/CodeGen/X86/tls11.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i16 15
 
@@ -10,3 +10,12 @@ entry:
 	%tmp1 = load i16* @i
 	ret i16 %tmp1
 }
+; X32_LINUX: movzwl %gs:i@NTPOFF, %eax
+; X64_LINUX: movzwl %fs:i@TPOFF, %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movzwl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: movabsq $i@SECREL, %rcx
+; X64_WIN: movzwl (%rax,%rcx), %eax
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
index c29f6ad..3da789e 100644
--- a/test/CodeGen/X86/tls12.ll
+++ b/test/CodeGen/X86/tls12.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i8 15
 
@@ -10,3 +10,12 @@ entry:
 	%tmp1 = load i8* @i
 	ret i8 %tmp1
 }
+; X32_LINUX: movb %gs:i@NTPOFF, %al
+; X64_LINUX: movb %fs:i@TPOFF, %al
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movb _i@SECREL(%eax), %al
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: movabsq $i@SECREL, %rcx
+; X64_WIN: movb (%rax,%rcx), %al
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
index 08778ec..0f6a98a 100644
--- a/test/CodeGen/X86/tls13.ll
+++ b/test/CodeGen/X86/tls13.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i16 0
 @j = thread_local global i16 0
@@ -22,3 +20,14 @@ entry:
 declare void @g(i32)
 
 declare void @h(i32)
+
+; X32_LINUX: movswl %gs:i@NTPOFF, %eax
+; X32_LINUX: movzwl %gs:j@NTPOFF, %eax
+; X64_LINUX: movswl %fs:i@TPOFF, %edi
+; X64_LINUX: movzwl %fs:j@TPOFF, %edi
+; X32_WIN: movswl _i@SECREL(%esi), %eax
+; X32_WIN: movzwl _j@SECREL(%esi), %eax
+; X64_WIN: movabsq $i@SECREL, %rax
+; X64_WIN: movswl (%rsi,%rax), %ecx
+; X64_WIN: movabsq $j@SECREL, %rax
+; X64_WIN: movzwl (%rsi,%rax), %ecx
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
index 88426dd..6462571 100644
--- a/test/CodeGen/X86/tls14.ll
+++ b/test/CodeGen/X86/tls14.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i8 0
 @j = thread_local global i8 0
@@ -22,3 +20,14 @@ entry:
 declare void @g(i32)
 
 declare void @h(i32)
+
+; X32_LINUX: movsbl %gs:i@NTPOFF, %eax
+; X32_LINUX: movzbl %gs:j@NTPOFF, %eax
+; X64_LINUX: movsbl %fs:i@TPOFF, %edi
+; X64_LINUX: movzbl %fs:j@TPOFF, %edi
+; X32_WIN: movsbl _i@SECREL(%esi), %eax
+; X32_WIN: movzbl _j@SECREL(%esi), %eax
+; X64_WIN: movabsq $i@SECREL, %rax
+; X64_WIN: movsbl (%rsi,%rax), %ecx
+; X64_WIN: movabsq $j@SECREL, %rax
+; X64_WIN: movzbl (%rsi,%rax), %ecx
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
index 5a94296..e882f53 100644
--- a/test/CodeGen/X86/tls2.ll
+++ b/test/CodeGen/X86/tls2.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = thread_local global i32 15
 
@@ -11,3 +9,13 @@ define i32* @f() {
 entry:
 	ret i32* @i
 }
+; X32_LINUX: movl %gs:0, %eax
+; X32_LINUX: leal i@NTPOFF(%eax), %eax
+; X64_LINUX: movq %fs:0, %rax
+; X64_LINUX: leaq i@TPOFF(%rax), %rax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: leal _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
index 7327cc4..ee3f28f 100644
--- a/test/CodeGen/X86/tls3.ll
+++ b/test/CodeGen/X86/tls3.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	i@INDNTPOFF, %eax} %t
-; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
-; RUN: grep {movl	%fs:(%rax), %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
@@ -12,3 +10,12 @@ entry:
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
+; X32_LINUX: movl i@INDNTPOFF, %eax
+; X32_LINUX: movl %gs:(%eax), %eax
+; X64_LINUX: movq i@GOTTPOFF(%rip), %rax
+; X64_LINUX: movl %fs:(%rax), %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movabsq $i@SECREL, %rcx
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
index d2e40e3..2b53ec5 100644
--- a/test/CodeGen/X86/tls4.ll
+++ b/test/CodeGen/X86/tls4.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
@@ -11,3 +9,13 @@ define i32* @f() {
 entry:
 	ret i32* @i
 }
+; X32_LINUX: movl %gs:0, %eax
+; X32_LINUX: addl i@INDNTPOFF, %eax
+; X64_LINUX: movq %fs:0, %rax
+; X64_LINUX: addq i@GOTTPOFF(%rip), %rax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: leal _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
index 4d2cc02..3cc6dab 100644
--- a/test/CodeGen/X86/tls5.ll
+++ b/test/CodeGen/X86/tls5.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
 
 @i = internal thread_local global i32 15
 
@@ -10,3 +10,10 @@ entry:
 	%tmp1 = load i32* @i
 	ret i32 %tmp1
 }
+; X32_LINUX: movl %gs:i@NTPOFF, %eax
+; X64_LINUX: movl %fs:i@TPOFF, %eax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: movl _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movabsq $i@SECREL, %rcx
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
index 505106e..c98ad7c 100644
--- a/test/CodeGen/X86/tls6.ll
+++ b/test/CodeGen/X86/tls6.ll
@@ -1,9 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
 
 @i = internal thread_local global i32 15
 
@@ -11,3 +9,13 @@ define i32* @f() {
 entry:
 	ret i32* @i
 }
+; X32_LINUX: movl %gs:0, %eax
+; X32_LINUX: leal i@NTPOFF(%eax), %eax
+; X64_LINUX: movq %fs:0, %rax
+; X64_LINUX: leaq i@TPOFF(%rax), %rax
+; X32_WIN: movl __tls_index, %eax
+; X32_WIN: movl %fs:__tls_array, %ecx
+; X32_WIN: leal _i@SECREL(%eax), %eax
+; X64_WIN: movl _tls_index(%rip), %eax
+; X64_WIN: movq %gs:88, %rcx
+; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index b7fe039..9d58019 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,7 +5,7 @@
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin | FileCheck %s
 
 @G = external global i32
 
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index 1dbbdcf..e853e77 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
 ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
 ; by the compiler_rt implementation of __floatundisf.
 ; <rdar://problem/8493982>
@@ -6,37 +6,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; FIXME: This test could generate this code:
-;
-; ## BB#0:                                ## %entry
-; 	testq	%rdi, %rdi
-; 	jns	LBB0_2
-; ## BB#1:
-; 	movq	%rdi, %rax
-; 	shrq	%rax
-; 	andq	$1, %rdi
-; 	orq	%rax, %rdi
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	addss	%xmm0, %xmm0
-; 	ret
-; LBB0_2:                                 ## %entry
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	ret
-;
-; The blocks come from lowering:
-;
-;   %vreg7<def> = CMOV_FR32 %vreg6<kill>, %vreg5<kill>, 15, %EFLAGS<imp-use>; FR32:%vreg7,%vreg6,%vreg5
-;
-; If the instruction had an EFLAGS<kill> flag, it wouldn't need to mark EFLAGS
-; as live-in on the new blocks, and machine sinking would be able to sink
-; everything below the test.
-
-; CHECK: shrq
-; CHECK: andq
-; CHECK-NEXT: orq
 ; CHECK: testq %rdi, %rdi
 ; CHECK-NEXT: jns LBB0_2
-; CHECK: cvtsi2ss
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
 ; CHECK: LBB0_2
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
index eeebcee..b066297 100644
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ b/test/CodeGen/X86/unreachable-stack-protector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index 3bee700..8655c6c 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
 ; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index b3efc7b..f2fc7e7 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
new file mode 100644
index 0000000..05b263e
--- /dev/null
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+
+; PR11674
+define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
+entry:
+; TODO: We should be able to generate cvtps2pd for the load.
+; For now, just check that we generate something sane.
+; CHECK: cvtss2sd
+; CHECK: cvtss2sd
+  %0 = load <2 x float>* %in, align 8
+  %1 = fpext <2 x float> %0 to <2 x double>
+  store <2 x double> %1, <2 x double>* %out, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index d032eda..3476e36 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -1,27 +1,27 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
 ; RUN: opt -instsimplify %s -disable-output
 
-;CHECK: AGEP0
+;CHECK: AGEP0:
 define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
 entry:
   %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
   %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
   %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
   %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
   ret <4 x i32*> %A3
 ;CHECK: ret
 }
 
-;CHECK: AGEP1
+;CHECK: AGEP1:
 define i32 @AGEP1(<4 x i32*> %param) nounwind {
 entry:
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   %k = extractelement <4 x i32*> %A2, i32 3
@@ -30,10 +30,10 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP2
+;CHECK: AGEP2:
 define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
 entry:
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
   %k = extractelement <4 x i32*> %A2, i32 3
@@ -42,10 +42,10 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP3
+;CHECK: AGEP3:
 define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
 entry:
-;CHECK: pslld
+;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
   %v = alloca i32
@@ -54,24 +54,35 @@ entry:
 ;CHECK: ret
 }
 
-;CHECK: AGEP4
-define <4 x i8*> @AGEP4(<4 x i8*> %param, <4 x i32> %off) nounwind {
+;CHECK: AGEP4:
+define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
 entry:
-;CHECK: pslld
+; Multiply offset by two (add it to itself).
 ;CHECK: padd
-  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
-  ret <4 x i8*> %A
+; add the base to the offset
+;CHECK: padd
+  %A = getelementptr <4 x i16*> %param, <4 x i32> %off
+  ret <4 x i16*> %A
 ;CHECK: ret
 }
 
-;CHECK: AGEP5
+;CHECK: AGEP5:
 define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
 entry:
-;CHECK: pslld
-;CHECK: padd
+;CHECK: paddd
   %A = getelementptr <4 x i8*> %param, <4 x i8> %off
   ret <4 x i8*> %A
 ;CHECK: ret
 }
 
 
+; The size of each element is 1 byte. No need to multiply by element size.
+;CHECK: AGEP6:
+define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK-NOT: pslld
+  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 85367e8..661cde8 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 |  FileCheck %s
 
 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index b959ce8..f55b184 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
 ; CHECK: incl
 ; CHECK: incl
 ; CHECK: incl
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 29689dd..79aa000 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 24608d0..7bebb27 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -10,6 +10,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 
@@ -23,6 +24,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 ; Example of when widening a v3float operation causes the DAG to replace a node
@@ -31,7 +33,7 @@ entry:
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: shuf3:
-; CHECK: pshufd
+; CHECK: shufps
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
   %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -45,6 +47,7 @@ entry:
   %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
   store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
   ret void
+; CHECK: ret
 }
 
 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
@@ -53,6 +56,7 @@ define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 ; CHECK-NOT: punpckldq
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
+; CHECK: ret
 }
 
 ; PR11389: another CONCAT_VECTORS case
@@ -61,4 +65,5 @@ define void @shuf5(<8 x i8>* %p) nounwind {
   %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   store <8 x i8> %v, <8 x i8>* %p, align 8
   ret void
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
new file mode 100644
index 0000000..878c6db
--- /dev/null
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
+; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
+; arguments are caller-cleanup like normal arguments.
+
+define void @sret1(i8* sret) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  ret void
+}
+
+define void @sret2(i32* sret %x, i32 %y) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  store i32 %y, i32* %x
+  ret void
+}
+
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index e39d007..a961c6a 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index efe8bca..52bc509 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-win32 | FileCheck %s
 
 ; Verify that the var arg parameters which are passed in registers are stored
 ; in home stack slots allocated by the caller and that AP is correctly
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
new file mode 100644
index 0000000..596b426
--- /dev/null
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
+
+; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
+; function has a nonstandard calling convention: the input value is expected on
+; the x87 stack instead of the callstack. The input value is popped by the
+; callee. Mingw32 uses normal cdecl compiler-rt functions.
+
+define i64 @double_ui64(double %x) nounwind {
+entry:
+; COMPILERRT: @double_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @double_ui64
+; FTOL: fldl
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui double %x to i64
+  ret i64 %0
+}
+
+define i64 @float_ui64(float %x) nounwind {
+entry:
+; COMPILERRT: @float_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @float_ui64
+; FTOL: flds
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui float %x to i64
+  ret i64 %0
+}
+
+define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_2
+; FTOL: @double_ui64_2
+; FTOL_2: @double_ui64_2
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %2 %1
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %3, %4
+  ret i64 %5
+}
+
+define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_3
+; FTOL: @double_ui64_3
+; FTOL_2: @double_ui64_3
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %1 %2 (still)
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %4, %3
+  ret i64 %5
+}
+
+define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
+; COMPILERRT: @double_ui64_4
+; FTOL: @double_ui64_4
+; FTOL_2: @double_ui64_4
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %y
+; FTOL_2: fldl
+;; stack is %x %y
+; FTOL_2: fxch
+;; stack is %y %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+; FTOL_2: fld %st(0)
+;; stack is %x %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+
+  %1 = fptoui double %x to i64
+  %2 = fptoui double %y to i64
+  %3 = sub i64 %1, %2
+  %4 = insertvalue {double, i64} undef, double %x, 0
+  %5 = insertvalue {double, i64} %4, i64 %3, 1
+  ret {double, i64} %5
+}
+
+define i32 @double_ui32_5(double %X) {
+; FTOL: @double_ui32_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i32
+  ret i32 %tmp.1
+}
+
+define i64 @double_ui64_5(double %X) {
+; FTOL: @double_ui64_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i64
+  ret i64 %tmp.1
+}
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
new file mode 100644
index 0000000..a2521b0
--- /dev/null
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -0,0 +1,969 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
+
+define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vpermil2pd
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a1
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a2
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a1
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a2
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vpermil2ps
+  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vpermil2ps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpcmov
+  %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %vec = load <4 x i64>* %a1
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+ %vec = load <4 x i64>* %a2
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK:vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK:vpcomb
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
+  ; CHECK: vphaddbd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
+  ; CHECK: vphaddbq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
+  ; CHECK: vphaddbw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
+  ; CHECK: vphadddq
+  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
+  ; CHECK: vphaddubd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
+  ; CHECK: vphaddubq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
+  ; CHECK: vphaddubw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
+  ; CHECK: vphaddudq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
+  ; CHECK: vphadduwd
+  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
+  ; CHECK: vphadduwq
+  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
+  ; CHECK: vphaddwd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
+  ; CHECK: vphaddwq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
+  ; CHECK: vphsubbw
+  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
+  ; CHECK: vphsubdq
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubdq
+  %vec = load <4 x i32>* %a0
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
+  ; CHECK: vphsubwd
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubwd
+  %vec = load <8 x i16>* %a0
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacssdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacssww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacsww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpmadcswd
+  %vec = load <8 x i16>* %a1
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpperm
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a2
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vprotb
+  %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vprotd
+  %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vprotq
+  %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vprotw
+  %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshab
+  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshad
+  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshaq
+  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshaw
+  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshlb
+  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshld
+  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshlq
+  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshlw
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a1
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a0
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %elem = load float* %a1
+  %vec = insertelement <4 x float> undef, float %elem, i32 0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %elem = load double* %a1
+  %vec = insertelement <2 x double> undef, double %elem, i32 0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
+  ; CHECK: vfrczpd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  %vec = load <2 x double>* %a0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a0
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
+  ; CHECK: vfrczps
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  %vec = load <4 x float>* %a0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
+  ret <8 x float> %res
+}
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %vec = load <8 x float>* %a0
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index b3f5cdb..ff93c68 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -34,7 +34,7 @@ define void @test3(i8 %x) nounwind readnone {
   ret void
 }
 ; CHECK: test3
-; CHECK: movzbl 16(%esp), [[REGISTER:%e[a-z]{2}]]
+; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
 ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
 ; CHECK-NEXT: andl $224, [[REGISTER]]
 ; CHECK-NEXT: movl [[REGISTER]], (%esp)
diff --git a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
index 7d6d7ba..84e21e4 100644
--- a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
+++ b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
@@ -16,5 +16,5 @@ allocas:
 ; CHECK: f:
 ; CHECK: ldaw [[REGISTER:r[0-9]+]], {{r[0-9]+}}[-r1]
 ; CHECK: set sp, [[REGISTER]]
-; CHECK extsp 1
-; CHECK bl g
+; CHECK: extsp 1
+; CHECK: bl g
diff --git a/test/CodeGen/XCore/dg.exp b/test/CodeGen/XCore/dg.exp
deleted file mode 100644
index 7110eab..0000000
--- a/test/CodeGen/XCore/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target XCore] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
new file mode 100644
index 0000000..c697912
--- /dev/null
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
+