207 files changed, 2459 insertions, 2659 deletions
diff --git a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
index 7c9af6f..0fe88bd 100644
--- a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
+++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll
@@ -26,7 +26,7 @@ bb2:                                              ; preds = %bb1, %entry
 ; CHECK: bb2
 ; CHECK: subs [[REG:r[0-9]+]], #1
 ; CHECK: cmp [[REG]], #0
-; CHECK: bgt
+; CHECK: ble
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tries.0 = sub i32 2147483647, %indvar
   %tmp1 = icmp sgt i32 %tries.0, 0
diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
index 6d596df..6206cd7 100644
--- a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -6,8 +6,7 @@
 ; (i32 extload $addr+c*sizeof(i16)
 define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
 entry:
-; CHECK: ldrh [[REG:r[0-9]+]]
-; CHECK: strh [[REG]]
+; CHECK: vst1.32
   %0 = load <3 x i16> * %srcA, align 8
   %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
   store <2 x i16> %1, <2 x i16> * %dst, align 4
diff --git a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
new file mode 100644
index 0000000..0ff4f51
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+
+; ARM has a peephole optimization which looks for a def / use pair. The def
+; produces a 32-bit immediate which is consumed by the use. It tries to 
+; fold the immediate by breaking it into two parts and fold them into the
+; immmediate fields of two uses. e.g
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        add     r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        add.w   r0, r0, #30146560
+;
+; However, this transformation is incorrect if the user produces a flag. e.g.
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        adds    r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        adds.w  r0, r0, #30146560
+; Note the adds.w may not set the carry flag even if the original sequence
+; would.
+;
+; rdar://11116189
+define i64 @t(i64 %aInput) nounwind {
+; CHECK: t:
+; CHECK: movs [[REG:(r[0-9]+)]], #0
+; CHECK: movt [[REG]], #46540
+; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+  %1 = mul i64 %aInput, 1000000
+  %2 = add i64 %1, -7952618389194932224
+  ret i64 %2
+}
diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
new file mode 100644
index 0000000..33ad187
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind align 2 {
+  br i1 undef, label %5, label %1
+
+; <label>:1                                       ; preds = %0
+  %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %2 to <4 x float>
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0
+  %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2
+  store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+; <label>:5                                       ; preds = %0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
new file mode 100644
index 0000000..6f50f27
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+;target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 {
+bb4:
+  %tmp = extractelement <2 x float> undef, i32 0
+  br i1 undef, label %bb18, label %bb5
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = fadd float %tmp, -1.500000e+01
+  %tmp7 = fdiv float %tmp6, 2.000000e+01
+  %tmp8 = fadd float %tmp7, 1.000000e+00
+  %tmp9 = fdiv float 1.000000e+00, %tmp8
+  %tmp10 = fsub float 1.000000e+00, %tmp9
+  %tmp11 = fmul float %tmp10, 1.000000e+01
+  %tmp12 = fadd float %tmp11, 1.500000e+01
+  %tmp13 = fdiv float %tmp12, %tmp
+  %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0
+  %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
+  %tmp17 = fadd <4 x float> %tmp16, %arg
+  store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
+  br label %bb18
+
+bb18:                                             ; preds = %bb5, %bb4
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index be3e105..94edff5 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -96,3 +96,70 @@ bb:
   tail call void @foo() nounwind
   ret void
 }
+
+; Make sure codegenprep is duplicating ret instructions to enable tail calls.
+; rdar://11140249
+define i32 @t8(i32 %x) nounwind ssp {
+entry:
+; CHECKT2D: t8:
+; CHECKT2D-NOT: push
+; CHECKT2D-NOT
+  %and = and i32 %x, 1
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+; CHECKT2D: bne.w _a
+  %call = tail call i32 @a(i32 %x) nounwind
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %and1 = and i32 %x, 2
+  %tobool2 = icmp eq i32 %and1, 0
+  br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3:                                         ; preds = %if.end
+; CHECKT2D: bne.w _b
+  %call4 = tail call i32 @b(i32 %x) nounwind
+  br label %return
+
+if.end5:                                          ; preds = %if.end
+; CHECKT2D: b.w _c
+  %call6 = tail call i32 @c(i32 %x) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end5, %if.then3, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [ %call6, %if.end5 ]
+  ret i32 %retval.0
+}
+
+declare i32 @a(i32)
+
+declare i32 @b(i32)
+
+declare i32 @c(i32)
+
+; PR12419
+; rdar://11195178
+; Use the correct input chain for the tailcall node or else the call to
+; _ZN9MutexLockD1Ev would be lost.
+%class.MutexLock = type { i8 }
+
+@x = external global i32, align 4
+
+define i32 @t9() nounwind {
+; CHECKT2D: t9:
+; CHECKT2D: blx __ZN9MutexLockC1Ev
+; CHECKT2D: blx __ZN9MutexLockD1Ev
+; CHECKT2D: b.w ___divsi3
+  %lock = alloca %class.MutexLock, align 1
+  %1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
+  %2 = load i32* @x, align 4
+  %3 = sdiv i32 1000, %2
+  %4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
+  ret i32 %3
+}
+
+declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
+
+declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
new file mode 100644
index 0000000..7316452
--- /dev/null
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LLVM IR optimizers canonicalize icmp+select this way.
+; Make sure that TwoAddressInstructionPass can commute the corresponding
+; MOVCC instructions to avoid excessive copies in one of the if blocks.
+;
+; CHECK: %if.then
+; CHECK-NOT: mov
+; CHECK: movlo
+; CHECK: movlo
+; CHECK-NOT: mov
+
+; CHECK: %if.else
+; CHECK-NOT: mov
+; CHECK: movls
+; CHECK: movls
+; CHECK-NOT: mov
+
+; This is really an LSR test: Make sure that cmp is using the incremented
+; induction variable.
+; CHECK: %if.end8
+; CHECK: add{{(s|\.w)?}} [[IV:r[0-9]+]], {{.*}}#1
+; CHECK: cmp [[IV]], #
+
+define i32 @f(i32* nocapture %a, i32 %Pref) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %if.end8
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ]
+  %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
+  %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %mul = mul i32 %0, %0
+  %sub = add nsw i32 %i.012, -5
+  %cmp2 = icmp eq i32 %sub, %Pref
+  br i1 %cmp2, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %cmp3 = icmp ult i32 %mul, %BestCost.011
+  %i.0.BestIdx.0 = select i1 %cmp3, i32 %i.012, i32 %BestIdx.010
+  %mul.BestCost.0 = select i1 %cmp3, i32 %mul, i32 %BestCost.011
+  br label %if.end8
+
+if.else:                                          ; preds = %for.body
+  %cmp5 = icmp ugt i32 %mul, %BestCost.011
+  %BestIdx.0.i.0 = select i1 %cmp5, i32 %BestIdx.010, i32 %i.012
+  %BestCost.0.mul = select i1 %cmp5, i32 %BestCost.011, i32 %mul
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then
+  %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
+  %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
+  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  %inc = add i32 %i.012, 1
+  %cmp = icmp eq i32 %inc, 11
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end8
+  ret i32 %BestIdx.1
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
new file mode 100644
index 0000000..18f57ea
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  ; CHECK-NOT: vand
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+define float @g(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  %1 = load <4 x i16>* %in
+  ; CHECK-NOT: uxth
+  %2 = extractelement <4 x i16> %1, i32 0
+  ; CHECK: vcvt.f32.u32
+  %3 = uitofp i16 %2 to float
+  ret float %3
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 625adc2..7c532d5 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -5,7 +5,7 @@ define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
 entry:
 ; THUMB: t1:
 ; ARM: t1:
-
+  %x = add i32 %a, %b  
   br i1 1, label %if.then, label %if.else
 ; THUMB-NOT: b LBB0_1
 ; ARM-NOT:  b LBB0_1
@@ -24,6 +24,7 @@ if.then2:                                         ; preds = %if.else
   br label %if.end6
 
 if.else3:                                         ; preds = %if.else
+  %y = sub i32 %a, %b
   br i1 1, label %if.then5, label %if.end
 ; THUMB-NOT: b LBB0_5
 ; ARM-NOT:  b LBB0_5
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 905543a..417e2d9 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -217,3 +217,12 @@ entry:
 ; THUMB: vcmpe.f32 s0, #0
   ret i1 %4
 }
+
+; ARM: @urem_fold
+; THUMB: @urem_fold
+; ARM: and r0, r0, #31
+; THUMB: and r0, r0, #31
+define i32 @urem_fold(i32 %a) nounwind {
+  %rem = urem i32 %a, 32
+  ret i32 %rem
+}
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 87115cc..27fa2b0 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -40,26 +40,10 @@ entry:
   ret double %1
 }
 
-; rdar://9059537
-define i32 @test4() ssp {
-entry:
-; SOFT: test4:
-; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00
-; This S-reg must be the first sub-reg of the last D-reg on vbsl.
-; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]]
-; SOFT: vshr.u64 [[REG4]], [[REG4]], #32
-; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000
-; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}}
-  %call80 = tail call double @copysign(double 1.000000e+00, double undef)
-  %conv81 = fptrunc double %call80 to float
-  %tmp88 = bitcast float %conv81 to i32
-  ret i32 %tmp88
-}
-
 ; rdar://9287902
-define float @test5() nounwind {
+define float @test4() nounwind {
 entry:
-; SOFT: test5:
+; SOFT: test4:
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
 ; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
index 40e8bb2..802d1b8 100644
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s
 ; Check generated fused MAC and MLS.
 
 define double @fusedMACTest1(double %d1, double %d2, double %d3) {
@@ -98,3 +98,88 @@ define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
   %sub = fsub <4 x float> %a, %mul
   ret <4 x float> %sub
 }
+
+define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f32
+; CHECK: vfma.f32
+  %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  ret float %tmp1
+}
+
+define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f64
+; CHECK: vfma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  ret double %tmp1
+}
+
+define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_v2f32
+; CHECK: vfma.f32
+  %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
+  ret <2 x float> %tmp1
+}
+
+define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64_2
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64_2
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64
+; CHECK: vfnma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  %tmp2 = fsub double -0.0, %tmp1
+  ret double %tmp2
+}
+
+define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64_2
+; CHECK: vfnma.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = fsub double -0.0, %c
+  %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
+  ret double %tmp3
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index 97a48e1..8ddf025 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep {ldr.*\\\[.*\],} | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
 
-define i32 @test(i32 %a, i32 %b, i32 %c) {
+; CHECK: test1:
+; CHECK: ldr {{.*, \[.*]}}, -r2
+; CHECK-NOT: ldr
+define i32 @test1(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
         %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
@@ -10,3 +12,14 @@ define i32 @test(i32 %a, i32 %b, i32 %c) {
         ret i32 %tmp5
 }
 
+; CHECK: test2:
+; CHECK: ldr {{.*, \[.*\]}}, #-16
+; CHECK-NOT: ldr
+define i32 @test2(i32 %a, i32 %b) {
+        %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp4 = sub i32 %tmp1, 16               ; <i32> [#uses=1]
+        %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index 7c44284..e904e5f 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=arm | \
-; RUN:   grep {ldr.*\\!} | count 2
+; RUN: llc < %s -march=arm | FileCheck %s
 
+; CHECK: test1:
+; CHECK: ldr {{.*!}}
+; CHECK-NOT: ldr
 define i32* @test1(i32* %X, i32* %dest) {
         %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
         %A = load i32* %Y               ; <i32> [#uses=1]
@@ -8,6 +10,9 @@ define i32* @test1(i32* %X, i32* %dest) {
         ret i32* %Y
 }
 
+; CHECK: test2:
+; CHECK: ldr {{.*!}}
+; CHECK-NOT: ldr
 define i32 @test2(i32 %a, i32 %b, i32 %c) {
         %tmp1 = sub i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
@@ -16,4 +21,3 @@ define i32 @test2(i32 %a, i32 %b, i32 %c) {
         %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
 }
-
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index a588bc3..3f8fd75 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -44,8 +44,7 @@ entry:
 ; BASIC: str
 ; GREEDY: @f
 ; GREEDY: %bb
-; GREEDY: ldr
-; GREEDY: ldr
+; GREEDY: ldrd
 ; GREEDY: str
 define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
index dd6c50d..cb77b09 100644
--- a/test/CodeGen/ARM/lit.local.cfg
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
new file mode 100644
index 0000000..5283f57
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LSR should compare against the post-incremented induction variable.
+; In this case, the immediate value is -2 which requires a cmn instruction.
+;
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
+; CHECK: cmn{{.*}}[[IV]], #2
+; CHECK: bne
+define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp {
+entry:
+  %cmp3 = icmp eq i32 %i, -2
+  br i1 %cmp3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %bi.06 = phi i32 [ %i.addr.0.bi.0, %for.body ], [ 0, %entry ]
+  %i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
+  %b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.addr.05
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, %b.04
+  %.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
+  %sub = add nsw i32 %i.addr.05, -2
+  %cmp = icmp eq i32 %i.addr.05, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
deleted file mode 100644
index ea5ae8f..0000000
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ /dev/null
@@ -1,640 +0,0 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8  -enable-lsr-nested < %s | FileCheck %s
-
-; LSR should recognize that this is an unrolled loop which can use
-; constant offset addressing, so that each of the following stores
-; uses the same register.
-
-; CHECK: vstr s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
-; CHECK: vstr s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
-; CHECK: vstr s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
-
-; We can also save a register in the outer loop, but that requires
-; performing LSR on the outer loop.
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-
-%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
-%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
-%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
-%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
-%4 = type opaque
-%5 = type opaque
-%6 = type { void (%2*)*, i32, i32, i32, i32 }
-%7 = type { [8 x i32], [12 x i32] }
-%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
-%9 = type { [64 x i16], i32 }
-%10 = type { [17 x i8], [256 x i8], i32 }
-%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
-%12 = type { %12*, i8, i32, i32, i8* }
-%13 = type { void (%0*)*, void (%0*)*, i32 }
-%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
-%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
-%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
-%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
-%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
-%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
-%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
-%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
-%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
-%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
-
-define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
-bb:
-  %t = alloca [64 x float], align 4
-  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
-  %t6 = load i8** %t5, align 4
-  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
-  %t8 = load i8** %t7, align 4
-  br label %bb9
-
-bb9:
-  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
-  %t11 = add i32 %t10, 8
-  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
-  %t13 = add i32 %t10, 16
-  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
-  %t15 = add i32 %t10, 24
-  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
-  %t17 = add i32 %t10, 32
-  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
-  %t19 = add i32 %t10, 40
-  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
-  %t21 = add i32 %t10, 48
-  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
-  %t23 = add i32 %t10, 56
-  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
-  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
-  %t26 = shl i32 %t10, 5
-  %t27 = or i32 %t26, 8
-  %t28 = getelementptr i8* %t8, i32 %t27
-  %t29 = bitcast i8* %t28 to float*
-  %t30 = or i32 %t26, 16
-  %t31 = getelementptr i8* %t8, i32 %t30
-  %t32 = bitcast i8* %t31 to float*
-  %t33 = or i32 %t26, 24
-  %t34 = getelementptr i8* %t8, i32 %t33
-  %t35 = bitcast i8* %t34 to float*
-  %t36 = or i32 %t26, 4
-  %t37 = getelementptr i8* %t8, i32 %t36
-  %t38 = bitcast i8* %t37 to float*
-  %t39 = or i32 %t26, 12
-  %t40 = getelementptr i8* %t8, i32 %t39
-  %t41 = bitcast i8* %t40 to float*
-  %t42 = or i32 %t26, 20
-  %t43 = getelementptr i8* %t8, i32 %t42
-  %t44 = bitcast i8* %t43 to float*
-  %t45 = or i32 %t26, 28
-  %t46 = getelementptr i8* %t8, i32 %t45
-  %t47 = bitcast i8* %t46 to float*
-  %t48 = getelementptr i8* %t8, i32 %t26
-  %t49 = bitcast i8* %t48 to float*
-  %t50 = shl i32 %t10, 3
-  %t51 = or i32 %t50, 1
-  %t52 = getelementptr i16* %a2, i32 %t51
-  %t53 = or i32 %t50, 2
-  %t54 = getelementptr i16* %a2, i32 %t53
-  %t55 = or i32 %t50, 3
-  %t56 = getelementptr i16* %a2, i32 %t55
-  %t57 = or i32 %t50, 4
-  %t58 = getelementptr i16* %a2, i32 %t57
-  %t59 = or i32 %t50, 5
-  %t60 = getelementptr i16* %a2, i32 %t59
-  %t61 = or i32 %t50, 6
-  %t62 = getelementptr i16* %a2, i32 %t61
-  %t63 = or i32 %t50, 7
-  %t64 = getelementptr i16* %a2, i32 %t63
-  %t65 = getelementptr i16* %a2, i32 %t50
-  %t66 = load i16* %t52, align 2
-  %t67 = icmp eq i16 %t66, 0
-  %t68 = load i16* %t54, align 2
-  %t69 = icmp eq i16 %t68, 0
-  %t70 = and i1 %t67, %t69
-  br i1 %t70, label %bb71, label %bb91
-
-bb71:
-  %t72 = load i16* %t56, align 2
-  %t73 = icmp eq i16 %t72, 0
-  br i1 %t73, label %bb74, label %bb91
-
-bb74:
-  %t75 = load i16* %t58, align 2
-  %t76 = icmp eq i16 %t75, 0
-  br i1 %t76, label %bb77, label %bb91
-
-bb77:
-  %t78 = load i16* %t60, align 2
-  %t79 = icmp eq i16 %t78, 0
-  br i1 %t79, label %bb80, label %bb91
-
-bb80:
-  %t81 = load i16* %t62, align 2
-  %t82 = icmp eq i16 %t81, 0
-  br i1 %t82, label %bb83, label %bb91
-
-bb83:
-  %t84 = load i16* %t64, align 2
-  %t85 = icmp eq i16 %t84, 0
-  br i1 %t85, label %bb86, label %bb91
-
-bb86:
-  %t87 = load i16* %t65, align 2
-  %t88 = sitofp i16 %t87 to float
-  %t89 = load float* %t49, align 4
-  %t90 = fmul float %t88, %t89
-  store float %t90, float* %t25, align 4
-  store float %t90, float* %t12, align 4
-  store float %t90, float* %t14, align 4
-  store float %t90, float* %t16, align 4
-  store float %t90, float* %t18, align 4
-  store float %t90, float* %t20, align 4
-  store float %t90, float* %t22, align 4
-  store float %t90, float* %t24, align 4
-  br label %bb156
-
-bb91:
-  %t92 = load i16* %t65, align 2
-  %t93 = sitofp i16 %t92 to float
-  %t94 = load float* %t49, align 4
-  %t95 = fmul float %t93, %t94
-  %t96 = sitofp i16 %t68 to float
-  %t97 = load float* %t29, align 4
-  %t98 = fmul float %t96, %t97
-  %t99 = load i16* %t58, align 2
-  %t100 = sitofp i16 %t99 to float
-  %t101 = load float* %t32, align 4
-  %t102 = fmul float %t100, %t101
-  %t103 = load i16* %t62, align 2
-  %t104 = sitofp i16 %t103 to float
-  %t105 = load float* %t35, align 4
-  %t106 = fmul float %t104, %t105
-  %t107 = fadd float %t95, %t102
-  %t108 = fsub float %t95, %t102
-  %t109 = fadd float %t98, %t106
-  %t110 = fsub float %t98, %t106
-  %t111 = fmul float %t110, 0x3FF6A09E60000000
-  %t112 = fsub float %t111, %t109
-  %t113 = fadd float %t107, %t109
-  %t114 = fsub float %t107, %t109
-  %t115 = fadd float %t108, %t112
-  %t116 = fsub float %t108, %t112
-  %t117 = sitofp i16 %t66 to float
-  %t118 = load float* %t38, align 4
-  %t119 = fmul float %t117, %t118
-  %t120 = load i16* %t56, align 2
-  %t121 = sitofp i16 %t120 to float
-  %t122 = load float* %t41, align 4
-  %t123 = fmul float %t121, %t122
-  %t124 = load i16* %t60, align 2
-  %t125 = sitofp i16 %t124 to float
-  %t126 = load float* %t44, align 4
-  %t127 = fmul float %t125, %t126
-  %t128 = load i16* %t64, align 2
-  %t129 = sitofp i16 %t128 to float
-  %t130 = load float* %t47, align 4
-  %t131 = fmul float %t129, %t130
-  %t132 = fadd float %t127, %t123
-  %t133 = fsub float %t127, %t123
-  %t134 = fadd float %t119, %t131
-  %t135 = fsub float %t119, %t131
-  %t136 = fadd float %t134, %t132
-  %t137 = fsub float %t134, %t132
-  %t138 = fmul float %t137, 0x3FF6A09E60000000
-  %t139 = fadd float %t133, %t135
-  %t140 = fmul float %t139, 0x3FFD906BC0000000
-  %t141 = fmul float %t135, 0x3FF1517A80000000
-  %t142 = fsub float %t141, %t140
-  %t143 = fmul float %t133, 0xC004E7AEA0000000
-  %t144 = fadd float %t143, %t140
-  %t145 = fsub float %t144, %t136
-  %t146 = fsub float %t138, %t145
-  %t147 = fadd float %t142, %t146
-  %t148 = fadd float %t113, %t136
-  store float %t148, float* %t25, align 4
-  %t149 = fsub float %t113, %t136
-  store float %t149, float* %t24, align 4
-  %t150 = fadd float %t115, %t145
-  store float %t150, float* %t12, align 4
-  %t151 = fsub float %t115, %t145
-  store float %t151, float* %t22, align 4
-  %t152 = fadd float %t116, %t146
-  store float %t152, float* %t14, align 4
-  %t153 = fsub float %t116, %t146
-  store float %t153, float* %t20, align 4
-  %t154 = fadd float %t114, %t147
-  store float %t154, float* %t18, align 4
-  %t155 = fsub float %t114, %t147
-  store float %t155, float* %t16, align 4
-  br label %bb156
-
-bb156:
-  %t157 = add i32 %t10, 1
-  %t158 = icmp eq i32 %t157, 8
-  br i1 %t158, label %bb159, label %bb9
-
-bb159:
-  %t160 = add i32 %a4, 7
-  %t161 = add i32 %a4, 1
-  %t162 = add i32 %a4, 6
-  %t163 = add i32 %a4, 2
-  %t164 = add i32 %a4, 5
-  %t165 = add i32 %a4, 4
-  %t166 = add i32 %a4, 3
-  br label %bb167
-
-bb167:
-  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
-  %t169 = getelementptr i8** %a3, i32 %t168
-  %t170 = shl i32 %t168, 3
-  %t171 = or i32 %t170, 4
-  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
-  %t173 = or i32 %t170, 2
-  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
-  %t175 = or i32 %t170, 6
-  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
-  %t177 = or i32 %t170, 5
-  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
-  %t179 = or i32 %t170, 3
-  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
-  %t181 = or i32 %t170, 1
-  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
-  %t183 = or i32 %t170, 7
-  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
-  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
-  %t186 = load i8** %t169, align 4
-  %t187 = getelementptr inbounds i8* %t186, i32 %a4
-  %t188 = load float* %t185, align 4
-  %t189 = load float* %t172, align 4
-  %t190 = fadd float %t188, %t189
-  %t191 = fsub float %t188, %t189
-  %t192 = load float* %t174, align 4
-  %t193 = load float* %t176, align 4
-  %t194 = fadd float %t192, %t193
-  %t195 = fsub float %t192, %t193
-  %t196 = fmul float %t195, 0x3FF6A09E60000000
-  %t197 = fsub float %t196, %t194
-  %t198 = fadd float %t190, %t194
-  %t199 = fsub float %t190, %t194
-  %t200 = fadd float %t191, %t197
-  %t201 = fsub float %t191, %t197
-  %t202 = load float* %t178, align 4
-  %t203 = load float* %t180, align 4
-  %t204 = fadd float %t202, %t203
-  %t205 = fsub float %t202, %t203
-  %t206 = load float* %t182, align 4
-  %t207 = load float* %t184, align 4
-  %t208 = fadd float %t206, %t207
-  %t209 = fsub float %t206, %t207
-  %t210 = fadd float %t208, %t204
-  %t211 = fsub float %t208, %t204
-  %t212 = fmul float %t211, 0x3FF6A09E60000000
-  %t213 = fadd float %t205, %t209
-  %t214 = fmul float %t213, 0x3FFD906BC0000000
-  %t215 = fmul float %t209, 0x3FF1517A80000000
-  %t216 = fsub float %t215, %t214
-  %t217 = fmul float %t205, 0xC004E7AEA0000000
-  %t218 = fadd float %t217, %t214
-  %t219 = fsub float %t218, %t210
-  %t220 = fsub float %t212, %t219
-  %t221 = fadd float %t216, %t220
-  %t222 = fadd float %t198, %t210
-  %t223 = fptosi float %t222 to i32
-  %t224 = add nsw i32 %t223, 4
-  %t225 = lshr i32 %t224, 3
-  %t226 = and i32 %t225, 1023
-  %t227 = add i32 %t226, 128
-  %t228 = getelementptr inbounds i8* %t6, i32 %t227
-  %t229 = load i8* %t228, align 1
-  store i8 %t229, i8* %t187, align 1
-  %t230 = fsub float %t198, %t210
-  %t231 = fptosi float %t230 to i32
-  %t232 = add nsw i32 %t231, 4
-  %t233 = lshr i32 %t232, 3
-  %t234 = and i32 %t233, 1023
-  %t235 = add i32 %t234, 128
-  %t236 = getelementptr inbounds i8* %t6, i32 %t235
-  %t237 = load i8* %t236, align 1
-  %t238 = getelementptr inbounds i8* %t186, i32 %t160
-  store i8 %t237, i8* %t238, align 1
-  %t239 = fadd float %t200, %t219
-  %t240 = fptosi float %t239 to i32
-  %t241 = add nsw i32 %t240, 4
-  %t242 = lshr i32 %t241, 3
-  %t243 = and i32 %t242, 1023
-  %t244 = add i32 %t243, 128
-  %t245 = getelementptr inbounds i8* %t6, i32 %t244
-  %t246 = load i8* %t245, align 1
-  %t247 = getelementptr inbounds i8* %t186, i32 %t161
-  store i8 %t246, i8* %t247, align 1
-  %t248 = fsub float %t200, %t219
-  %t249 = fptosi float %t248 to i32
-  %t250 = add nsw i32 %t249, 4
-  %t251 = lshr i32 %t250, 3
-  %t252 = and i32 %t251, 1023
-  %t253 = add i32 %t252, 128
-  %t254 = getelementptr inbounds i8* %t6, i32 %t253
-  %t255 = load i8* %t254, align 1
-  %t256 = getelementptr inbounds i8* %t186, i32 %t162
-  store i8 %t255, i8* %t256, align 1
-  %t257 = fadd float %t201, %t220
-  %t258 = fptosi float %t257 to i32
-  %t259 = add nsw i32 %t258, 4
-  %t260 = lshr i32 %t259, 3
-  %t261 = and i32 %t260, 1023
-  %t262 = add i32 %t261, 128
-  %t263 = getelementptr inbounds i8* %t6, i32 %t262
-  %t264 = load i8* %t263, align 1
-  %t265 = getelementptr inbounds i8* %t186, i32 %t163
-  store i8 %t264, i8* %t265, align 1
-  %t266 = fsub float %t201, %t220
-  %t267 = fptosi float %t266 to i32
-  %t268 = add nsw i32 %t267, 4
-  %t269 = lshr i32 %t268, 3
-  %t270 = and i32 %t269, 1023
-  %t271 = add i32 %t270, 128
-  %t272 = getelementptr inbounds i8* %t6, i32 %t271
-  %t273 = load i8* %t272, align 1
-  %t274 = getelementptr inbounds i8* %t186, i32 %t164
-  store i8 %t273, i8* %t274, align 1
-  %t275 = fadd float %t199, %t221
-  %t276 = fptosi float %t275 to i32
-  %t277 = add nsw i32 %t276, 4
-  %t278 = lshr i32 %t277, 3
-  %t279 = and i32 %t278, 1023
-  %t280 = add i32 %t279, 128
-  %t281 = getelementptr inbounds i8* %t6, i32 %t280
-  %t282 = load i8* %t281, align 1
-  %t283 = getelementptr inbounds i8* %t186, i32 %t165
-  store i8 %t282, i8* %t283, align 1
-  %t284 = fsub float %t199, %t221
-  %t285 = fptosi float %t284 to i32
-  %t286 = add nsw i32 %t285, 4
-  %t287 = lshr i32 %t286, 3
-  %t288 = and i32 %t287, 1023
-  %t289 = add i32 %t288, 128
-  %t290 = getelementptr inbounds i8* %t6, i32 %t289
-  %t291 = load i8* %t290, align 1
-  %t292 = getelementptr inbounds i8* %t186, i32 %t166
-  store i8 %t291, i8* %t292, align 1
-  %t293 = add nsw i32 %t168, 1
-  %t294 = icmp eq i32 %t293, 8
-  br i1 %t294, label %bb295, label %bb167
-
-bb295:
-  ret void
-}
-
-%struct.ct_data_s = type { %union.anon, %union.anon }
-%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
-%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
-%struct.static_tree_desc = type { i32 }
-%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
-%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
-%union.anon = type { i16 }
-
-define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
-entry:
-  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
-  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
-  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
-  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
-  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
-  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
-  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
-  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
-  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
-  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
-  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
-  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
-  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
-  br i1 %14, label %bb, label %bb2
-
-bb:                                               ; preds = %entry
-  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
-  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
-  br label %bb2
-
-bb2:                                              ; preds = %bb, %entry
-  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
-  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
-  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
-  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
-  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
-  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
-  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
-  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
-  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
-  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
-  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
-  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
-  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
-  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
-  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
-  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
-  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
-  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
-  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
-  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
-  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
-  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
-  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
-  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
-  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
-  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
-  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
-  br label %bb6
-
-bb6:                                              ; preds = %bb24, %bb2
-  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
-  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
-  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
-  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
-  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
-  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
-  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
-  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
-  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
-  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
-  br i1 %40, label %bb7, label %bb23
-
-bb7:                                              ; preds = %bb6
-  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
-  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
-  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
-  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
-  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
-  br i1 %44, label %bb8, label %bb23
-
-bb8:                                              ; preds = %bb7
-  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
-  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
-  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
-  br i1 %47, label %bb9, label %bb23
-
-bb9:                                              ; preds = %bb8
-  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
-  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
-  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
-  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
-  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
-  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
-  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
-  br i1 %52, label %bb10, label %bb23
-
-bb10:                                             ; preds = %bb9
-  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
-  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
-  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
-  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
-  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
-  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
-  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
-  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
-  br label %bb11
-
-bb11:                                             ; preds = %bb18, %bb10
-  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
-  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
-  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
-  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
-  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
-  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
-  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
-  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
-  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
-  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
-  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
-  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
-  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
-  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
-  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
-  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
-  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
-  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
-  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
-  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
-  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
-  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
-  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
-  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
-  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
-  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
-  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
-  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
-  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
-  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
-  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
-  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
-  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
-  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
-  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
-  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
-  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
-  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
-  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
-  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
-  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
-  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
-  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
-  br i1 %55, label %bb12, label %bb20
-
-bb12:                                             ; preds = %bb11
-  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
-  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
-  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
-  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
-  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
-  br i1 %58, label %bb13, label %bb20
-
-bb13:                                             ; preds = %bb12
-  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
-  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
-  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
-  br i1 %61, label %bb14, label %bb20
-
-bb14:                                             ; preds = %bb13
-  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
-  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
-  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
-  br i1 %64, label %bb15, label %bb20
-
-bb15:                                             ; preds = %bb14
-  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
-  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
-  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
-  br i1 %67, label %bb16, label %bb20
-
-bb16:                                             ; preds = %bb15
-  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
-  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
-  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
-  br i1 %70, label %bb17, label %bb20
-
-bb17:                                             ; preds = %bb16
-  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
-  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
-  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
-  br i1 %73, label %bb18, label %bb20
-
-bb18:                                             ; preds = %bb17
-  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
-  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
-  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
-  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
-  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
-  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
-  br i1 %or.cond, label %bb11, label %bb20
-
-bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
-  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
-  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
-  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
-  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
-  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
-  br i1 %81, label %bb21, label %bb23
-
-bb21:                                             ; preds = %bb20
-  store i32 %cur_match_addr.0, i32* %34, align 4
-  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
-  br i1 %82, label %bb22, label %bb25
-
-bb22:                                             ; preds = %bb21
-  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
-  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
-  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
-  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
-  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
-  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
-  br label %bb23
-
-bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
-  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
-  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
-  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
-  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
-  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
-  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
-  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
-  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
-  br i1 %91, label %bb24, label %bb25
-
-bb24:                                             ; preds = %bb23
-
-; LSR should use count-down iteration to avoid requiring the trip count
-; in a register.
-
-;      CHECK: @ %bb24
-; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
-; CHECK: bne.w
-
-  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
-  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
-  br i1 %92, label %bb25, label %bb6
-
-bb25:                                             ; preds = %bb24, %bb23, %bb21
-  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
-  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
-  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
-  ret i32 %merge
-}
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index f566974..3ac7d77 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -10,7 +10,7 @@ entry:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
-; CHECK: moveq
+; CHECK: mov{{eq|ne}}
     %tmp1 = icmp eq i32 %cond1, 0
     %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
     %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
new file mode 100644
index 0000000..b4da552
--- /dev/null
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+
+; CHECK: func_4_8
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
+  %r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
+  store <4 x i8> %r, <4 x i8>* %p
+  ret void
+}
+
+; CHECK: func_2_16
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) {
+  %r = add <2 x i16> %param, <i16 1, i16 2>
+  store <2 x i16> %r, <2 x i16>* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
new file mode 100644
index 0000000..d1d0ee5
--- /dev/null
+++ b/test/CodeGen/ARM/reg_asc_order.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+;CHECK: ldm
+;CHECK: stm
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 78b4e7e..05794e4 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -273,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
 ; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
-; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
 ; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index e927b39..c9ac66a 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -64,7 +64,7 @@ define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
 entry:
 ; ARM: t4:
 ; ARM: ldr
-; ARM: movlt
+; ARM: mov{{lt|ge}}
 
 ; ARMT2: t4:
 ; ARMT2: movwlt [[R0:r[0-9]+]], #65365
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index f1bd7ee..3e07da8 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -76,12 +76,12 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
-; CHECK-NEON:      adr     r2, LCPI7_0
-; CHECK-NEON-NEXT: movw    r3, #1123
-; CHECK-NEON-NEXT: adds    r1, r2, #4
-; CHECK-NEON-NEXT: cmp     r0, r3
+; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
+; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
+; CHECK-NEON-NEXT: cmp     r0, [[R3]]
 ; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   r1, r2
+; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/tail-opts.ll b/test/CodeGen/ARM/tail-opts.ll
index 3dc77e2..220b0f1 100644
--- a/test/CodeGen/ARM/tail-opts.ll
+++ b/test/CodeGen/ARM/tail-opts.ll
@@ -16,11 +16,11 @@ declare i8* @choose(i8*, i8*)
 
 ; CHECK: tail_duplicate_me:
 ; CHECK:      qux
-; CHECK:      qux
 ; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
 ; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
 ; CHECK-NEXT: bx r
+; CHECK:      qux
 ; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
 ; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
 ; CHECK:      str r
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 1387393..7fddbed 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -8,7 +8,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 
 ; Test signed conversion.
 ; CHECK: t1
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -24,7 +24,7 @@ declare void @foo_float32x2_t(<2 x float>)
 
 ; Test unsigned conversion.
 ; CHECK: t2
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
   %tmp = load i32* @uin, align 4, !tbaa !3
@@ -38,7 +38,7 @@ entry:
 
 ; Test which should not fold due to non-power of 2.
 ; CHECK: t3
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -52,7 +52,7 @@ entry:
 
 ; Test which should not fold due to power of 2 out of range.
 ; CHECK: t4
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -66,7 +66,7 @@ entry:
 
 ; Test case where const is max power of 2 (i.e., 2^32).
 ; CHECK: t5
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -80,7 +80,7 @@ entry:
 
 ; Test quadword.
 ; CHECK: t6
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
new file mode 100644
index 0000000..1ec36da
--- /dev/null
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: g:
+define float @g(<4 x i8>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u8
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i8>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i8> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: h:
+define <4 x i8> @h(<4 x float> %v) {
+  ; CHECK: vcvt.{{[us]}}32.f32
+  ; CHECK: vmovn.i32
+  %1 = fptoui <4 x float> %v to <4 x i8>
+  ret <4 x i8> %1
+}
+
+; CHECK: i:
+define <4 x i8> @i(<4 x i8>* %x) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.s8
+  ; CHECK: vmovl.s16
+  ; CHECK: vrecpe
+  ; CHECK: vrecps
+  ; CHECK: vmul
+  ; CHECK: vmovn
+  %1 = load <4 x i8>* %x, align 4
+  %2 = sdiv <4 x i8> zeroinitializer, %1
+  ret <4 x i8> %2
+}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index e154334..122ec03 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -149,12 +149,10 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
 }
 
 ; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
-; to <2 x i16> when stored to memory. Currently ARM scalarizes these stores.
-; See PR 11158
+; to <2 x i16> when stored to memory.
 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 ; CHECK: test_vrev64:
-; CHECK: vst1.16
-; CHECK: vst1.16
+; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
   %tmp2 = load <8 x i16>* %0, align 4
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 1f5113e..679e3f4 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-code-place | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
-; The 0.0 constant is loaded from the constant pool and kept in a register.
+; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
 ; CHECK: %entry
 ; CHECK: vldr s
 ; The float loop variable is initialized with a vmovs from the constant register.
@@ -10,6 +10,7 @@ target triple = "thumbv7-apple-ios"
 ; CHECK: , [[DN]]
 ; CHECK: %for.body.i
 ; CHECK: vadd.f32 [[DL]], [[DL]], [[DN]]
+; CHECK: %rInnerproduct.exit
 ;
 ; This test is verifying:
 ; - The VMOVS widening is happening.
@@ -24,8 +25,8 @@ for.body4:
   br label %for.body.i
 
 for.body.i:
-  %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ]
-  %add.i = fadd float %tmp3.i, 0.000000e+00
+  %tmp3.i = phi float [ 1.000000e+10, %for.body4 ], [ %add.i, %for.body.i ]
+  %add.i = fadd float %tmp3.i, 1.000000e+10
   %exitcond.i = icmp eq i32 undef, 41
   br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i
 
diff --git a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
deleted file mode 100644
index 0b06041..0000000
--- a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure that global variables do not collide if they have the same name,
-; but different types.
-
-@X = global i32 5               ; <i32*> [#uses=0]
-@X.upgrd.1 = global i64 7               ; <i64*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
deleted file mode 100644
index a9f54e4..0000000
--- a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This case was emitting code that looked like this:
-; ...
-;   llvm_BB1:       /* no statement here */
-; }
-; 
-; Which the Sun C compiler rejected, so now we are sure to put a return 
-; instruction in there if the basic block is otherwise empty.
-;
-define void @test() {
-        br label %BB1
-
-BB2:            ; preds = %BB2
-        br label %BB2
-
-BB1:            ; preds = %0
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
deleted file mode 100644
index 2afb1a0..0000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Test const pointer refs & forward references
-
-@t3 = global i32* @t1           ; <i32**> [#uses=0]
-@t1 = global i32 4              ; <i32*> [#uses=1]
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
deleted file mode 100644
index b71cf07..0000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
-global float* @2                       ;; Duplicate forward numeric reference
-global float 0.0
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)
diff --git a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
deleted file mode 100644
index b5a1f0b..0000000
--- a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
-@somestr = constant [11 x i8] c"hello world"
diff --git a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
deleted file mode 100644
index 10b9fe2..0000000
--- a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@fptr = global void ()* @f       ;; Forward ref method defn
-declare void @f()               ;; External method
-
diff --git a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
deleted file mode 100644
index 0827423..0000000
--- a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
deleted file mode 100644
index 59aafd5..0000000
--- a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C Writer bombs on this testcase because it tries the print the prototype
-; for the test function, which tries to print the argument name.  The function
-; has not been incorporated into the slot calculator, so after it does the name
-; lookup, it tries a slot calculator lookup, which fails.
-
-define i32 @test(i32) {
-        ret i32 0
-}
diff --git a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
deleted file mode 100644
index 6c4d629..0000000
--- a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Indirect function call test... found by Joel & Brian
-;
-
-@taskArray = external global i32*               ; <i32**> [#uses=1]
-
-define void @test(i32 %X) {
-        %Y = add i32 %X, -1             ; <i32> [#uses=1]
-        %cast100 = sext i32 %Y to i64           ; <i64> [#uses=1]
-        %gep100 = getelementptr i32** @taskArray, i64 %cast100          ; <i32**> [#uses=1]
-        %fooPtr = load i32** %gep100            ; <i32*> [#uses=1]
-        %cast101 = bitcast i32* %fooPtr to void (i32)*          ; <void (i32)*> [#uses=1]
-        call void %cast101( i32 1000 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
deleted file mode 100644
index 1187a37..0000000
--- a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase fails because the C backend does not arrange to output the 
-; contents of a structure type before it outputs the structure type itself.
-
-@Y = external global { { i32 } }                ; <{ { i32 } }*> [#uses=0]
-@X = external global { float }          ; <{ float }*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
deleted file mode 100644
index 021adb9..0000000
--- a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @test() {
-        %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
deleted file mode 100644
index e915cd2..0000000
--- a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c
-
-
-declare void @foo(...)
-
-
diff --git a/test/CodeGen/CBackend/2002-10-16-External.ll b/test/CodeGen/CBackend/2002-10-16-External.ll
deleted file mode 100644
index 2cdd15c..0000000
--- a/test/CodeGen/CBackend/2002-10-16-External.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@bob = external global i32              ; <i32*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
deleted file mode 100644
index 82d594f..0000000
--- a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-@testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-        call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @testString, i64 0, i64 0) )                ; <i32>:1 [#uses=0]
-        ret i32 0
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
deleted file mode 100644
index 92d582d..0000000
--- a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Apparently this constant was unsigned in ISO C 90, but not in C 99.
-
-define i32 @foo() {
-        ret i32 -2147483648
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
deleted file mode 100644
index a42dc27..0000000
--- a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase breaks the C backend, because gcc doesn't like (...) functions
-; with no arguments at all.
-
-define void @test(i64 %Ptr) {
-        %P = inttoptr i64 %Ptr to void (...)*           ; <void (...)*> [#uses=1]
-        call void (...)* %P( i64 %Ptr )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
deleted file mode 100644
index 19c7840..0000000
--- a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C backend was dying when there was no typename for a struct type!
-
-declare i32 @test(i32, { [32 x i32] }*)
diff --git a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
deleted file mode 100644
index 048e045..0000000
--- a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c
-
-%X = type { i32, float }
-
-define void @test() {
-        getelementptr %X* null, i64 0, i32 1            ; <float*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
deleted file mode 100644
index 6197b30..0000000
--- a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure hex constant does not continue into a valid hexadecimal letter/number
-@version = global [3 x i8] c"\001\00"
diff --git a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
deleted file mode 100644
index f6177ea..0000000
--- a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c
-
-@version = global [3 x i8] c"1\00\00"
diff --git a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
deleted file mode 100644
index f0b1bbc..0000000
--- a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare i32 @callee(i32, i32)
-
-define i32 @test(i32 %X) {
-; <label>:0
-        %A = invoke i32 @callee( i32 %X, i32 5 )
-                        to label %Ok unwind label %Threw                ; <i32> [#uses=1]
-
-Ok:             ; preds = %Threw, %0
-        %B = phi i32 [ %A, %0 ], [ -1, %Threw ]         ; <i32> [#uses=1]
-        ret i32 %B
-
-Threw:          ; preds = %0
-        br label %Ok
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
deleted file mode 100644
index 4bd1da2..0000000
--- a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c | grep common | grep X
-
-@X = linkonce global i32 5
diff --git a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
deleted file mode 100644
index 0fbb3fe..0000000
--- a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This is a non-normal FP value: it's a nan.
-@NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
-@NANs = global { float } { float 0x7FFC000000000000 }           ; <{ float }*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
deleted file mode 100644
index 9195634..0000000
--- a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-%A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
-
-define void @test(%A*) {
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
deleted file mode 100644
index b4389ff..0000000
--- a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c
-
-; reduced from DOOM.
-        %union._XEvent = type { i32 }
-@.X_event_9 = global %union._XEvent zeroinitializer             ; <%union._XEvent*> [#uses=1]
-
-define void @I_InitGraphics() {
-shortcirc_next.3:
-        %tmp.319 = load i32* getelementptr ({ i32, i32 }* bitcast (%union._XEvent* @.X_event_9 to { i32, i32 }*), i64 0, i32 1)               ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
deleted file mode 100644
index 6a26291..0000000
--- a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-@y = weak global i8 0           ; <i8*> [#uses=1]
-
-define i32 @testcaseshr() {
-entry:
-        ret i32 lshr (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
-define i32 @testcaseshl() {
-entry:
-        ret i32 shl (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
deleted file mode 100644
index 142fbd8..0000000
--- a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=c | grep builtin_return_address
-
-declare i8* @llvm.returnaddress(i32)
-
-declare i8* @llvm.frameaddress(i32)
-
-define i8* @test1() {
-        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
-define i8* @test2() {
-        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
deleted file mode 100644
index d1c6861..0000000
--- a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; The intrinsic lowering pass was lowering intrinsics like llvm.memcpy to 
-; explicitly specified prototypes, inserting a new function if the old one
-; didn't exist.  This caused there to be two external memcpy functions in 
-; this testcase for example, which caused the CBE to mangle one, screwing
-; everything up.  :(  Test that this does not happen anymore.
-;
-; RUN: llc < %s -march=c | not grep _memcpy
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare float* @memcpy(i32*, i32, i32)
-
-define i32 @test(i8* %A, i8* %B, i32* %C) {
-        call float* @memcpy( i32* %C, i32 4, i32 17 )           ; <float*>:1 [#uses=0]
-        call void @llvm.memcpy.i32( i8* %A, i8* %B, i32 123, i32 14 )
-        ret i32 7
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
deleted file mode 100644
index 6fceb08..0000000
--- a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; This is a non-normal FP value
-; RUN: llc < %s -march=c | grep FPConstant | grep static
-
-define float @func() {
-        ret float 0xFFF0000000000000
-}
-
-define double @func2() {
-        ret double 0xFF20000000000000
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
deleted file mode 100644
index cf59634..0000000
--- a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c | grep func1 | grep WEAK
-
-define linkonce i32 @func1() {
-        ret i32 5
-}
-
diff --git a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
deleted file mode 100644
index 3ee23d1..0000000
--- a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare void @llvm.va_end(i8*)
-
-define void @test() {
-        %va.upgrd.1 = bitcast i8* null to i8*           ; <i8*> [#uses=1]
-        call void @llvm.va_end( i8* %va.upgrd.1 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
deleted file mode 100644
index af8f441..0000000
--- a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; The CBE should not emit code that casts the function pointer.  This causes
-; GCC to get testy and insert trap instructions instead of doing the right
-; thing. :(
-; RUN: llc < %s -march=c
-
-declare void @external(i8*)
-
-define i32 @test(i32* %X) {
-        %RV = call i32 bitcast (void (i8*)* @external to i32 (i32*)*)( i32* %X )                ; <i32> [#uses=1]
-        ret i32 %RV
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
deleted file mode 100644
index 78e9bac..0000000
--- a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | not grep extern.*msg
-; PR472
-
-@msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
-
-define i8* @foo() {
-entry:
-        ret i8* getelementptr ([6 x i8]* @msg, i32 0, i32 0)
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
deleted file mode 100644
index 57a9adc..0000000
--- a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-define i32 @foo() {
-        ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
-}
diff --git a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
deleted file mode 100644
index 808b8f9..0000000
--- a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=c | not grep -- --65535
-; PR596
-
-target datalayout = "e-p:32:32"
-target triple = "i686-pc-linux-gnu"
-
-declare void @func(i32)
-
-define void @funcb() {
-entry:
-        %tmp.1 = sub i32 0, -65535              ; <i32> [#uses=1]
-        call void @func( i32 %tmp.1 )
-        br label %return
-
-return:         ; preds = %entry
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
deleted file mode 100644
index 6e650eb..0000000
--- a/test/CodeGen/CBackend/2005-08-23-Fmod.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c | grep fmod
-
-define double @test(double %A, double %B) {
-        %C = frem double %A, %B         ; <double> [#uses=1]
-        ret double %C
-}
-
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
deleted file mode 100644
index c9df800..0000000
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:   grep __BITCAST | count 14
-
-define i32 @test1(float %F) {
-        %X = bitcast float %F to i32            ; <i32> [#uses=1]
-        ret i32 %X
-}
-
-define float @test2(i32 %I) {
-        %X = bitcast i32 %I to float            ; <float> [#uses=1]
-        ret float %X
-}
-
-define i64 @test3(double %D) {
-        %X = bitcast double %D to i64           ; <i64> [#uses=1]
-        ret i64 %X
-}
-
-define double @test4(i64 %L) {
-        %X = bitcast i64 %L to double           ; <double> [#uses=1]
-        ret double %X
-}
-
-define double @test5(double %D) {
-        %X = bitcast double %D to double                ; <double> [#uses=1]
-        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]
-        %Z = bitcast double %Y to i64           ; <i64> [#uses=1]
-        %res = bitcast i64 %Z to double         ; <double> [#uses=1]
-        ret double %res
-}
-
-define float @test6(float %F) {
-        %X = bitcast float %F to float          ; <float> [#uses=1]
-        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]
-        %Z = bitcast float %Y to i32            ; <i32> [#uses=1]
-        %res = bitcast i32 %Z to float          ; <float> [#uses=1]
-        ret float %res
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
-        %a = call i32 @test1( float 0x400921FB40000000 )                ; <i32> [#uses=2]
-        %b = call float @test2( i32 %a )                ; <float> [#uses=0]
-        %c = call i64 @test3( double 0x400921FB4D12D84A )               ; <i64> [#uses=1]
-        %d = call double @test4( i64 %c )               ; <double> [#uses=0]
-        %e = call double @test5( double 7.000000e+00 )          ; <double> [#uses=0]
-        %f = call float @test6( float 7.000000e+00 )            ; <float> [#uses=0]
-        ret i32 %a
-}
-
diff --git a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
deleted file mode 100644
index da36e78..0000000
--- a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; For PR1099
-; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
-
-target datalayout = "e-p:32:32"
-target triple = "i686-apple-darwin8"
-        %struct.Connector = type { i16, i16, i8, i8, %struct.Connector*, i8* }
-
-
-define i1 @prune_match_entry_2E_ce(%struct.Connector* %a, i16 %b.0.0.val) {
-newFuncRoot:
-        br label %entry.ce
-
-cond_next.exitStub:             ; preds = %entry.ce
-        ret i1 true
-
-entry.return_crit_edge.exitStub:                ; preds = %entry.ce
-        ret i1 false
-
-entry.ce:               ; preds = %newFuncRoot
-        %tmp1 = getelementptr %struct.Connector* %a, i32 0, i32 0                ; <i16*> [#uses=1]
-        %tmp2 = load i16* %tmp1           ; <i16> [#uses=1]
-        %tmp3 = icmp eq i16 %tmp2, %b.0.0.val             ; <i1> [#uses=1]
-        br i1 %tmp3, label %cond_next.exitStub, label %entry.return_crit_edge.exitStub
-}
-
-
diff --git a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
deleted file mode 100644
index 4f699b7..0000000
--- a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep __builtin_stack_save
-; RUN: llc < %s -march=c | grep __builtin_stack_restore
-; PR1028
-
-declare i8* @llvm.stacksave()
-declare void @llvm.stackrestore(i8*)
-
-define i8* @test() {
-    %s = call i8* @llvm.stacksave()
-    call void @llvm.stackrestore(i8* %s)
-    ret i8* %s
-}
diff --git a/test/CodeGen/CBackend/2007-02-05-memset.ll b/test/CodeGen/CBackend/2007-02-05-memset.ll
deleted file mode 100644
index 7d508e4..0000000
--- a/test/CodeGen/CBackend/2007-02-05-memset.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-; PR1181
-target datalayout = "e-p:64:64"
-target triple = "x86_64-apple-darwin8"
-
-
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
-define fastcc void @InitUser_data_unregistered() {
-entry:
-        tail call void @llvm.memset.i64( i8* null, i8 0, i64 65496, i32 1 )
-        ret void
-}
diff --git a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
deleted file mode 100644
index 7e1ff2a..0000000
--- a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; PR1164
-; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
-; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
-; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
-
-@G = global i32 123
-@ltmp_0_1 = global i32 123
-
-define i32 @test(i32 *%G) {
-        %A = load i32* %G
-        %B = load i32* @ltmp_0_1
-        %C = add i32 %A, %B
-        ret i32 %C
-}
diff --git a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
deleted file mode 100644
index c8bfdd6..0000000
--- a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c | grep {packed}
-
-	%struct.p = type <{ i16 }>
-
-define i32 @main() {
-entry:
-        %t = alloca %struct.p, align 2
-	ret i32 5
-}
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
deleted file mode 100644
index e6eeba3..0000000
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
-
-define void @test(i32* %P) {
-        %X = load i32* %P, align 1
-        store i32 %X, i32* %P, align 1
-        ret void
-}
-
-define void @test2(i32* %P) {
-        %X = load volatile i32* %P, align 2
-        store volatile i32 %X, i32* %P, align 2
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
deleted file mode 100644
index e9fa552..0000000
--- a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
-define i32 @test(i32 %r) {
-  %s = icmp eq i32 %r, 0
-  %t = add i1 %s, %s
-  %u = zext i1 %t to i32
-  br i1 %t, label %A, label %B
-A:
-
-  ret i32 %u
-B:
-
-  %v = select i1 %t, i32 %r, i32 %u
-  ret i32 %v
-}
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
deleted file mode 100644
index b72b573..0000000
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=c
-; PR2907
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
-target triple = "powerpc-apple-darwin9.5"
-	%"struct.Point<0>" = type { %"struct.Tensor<1,0>" }
-	%"struct.QGauss2<1>" = type { %"struct.Quadrature<0>" }
-	%"struct.Quadrature<0>" = type { %struct.Subscriptor, i32, %"struct.std::vector<Point<0>,std::allocator<Point<0> > >", %"struct.std::vector<double,std::allocator<double> >" }
-	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
-	%"struct.Tensor<1,0>" = type { [1 x double] }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" = type { %"struct.Point<0>"*, %"struct.Point<0>"*, %"struct.Point<0>"* }
-	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
-	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
-	%"struct.std::type_info" = type { i32 (...)**, i8* }
-	%"struct.std::vector<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" }
-	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
-
-define fastcc void @_ZN6QGaussILi1EEC1Ej(%"struct.QGauss2<1>"* %this, i32 %n) {
-entry:
-	br label %bb4
-
-bb4:		; preds = %bb5.split, %bb4, %entry
-	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
-	br i1 %0, label %bb4, label %bb5.split
-
-bb5.split:		; preds = %bb4
-	%1 = getelementptr double* null, i32 0		; <double*> [#uses=0]
-	br label %bb4
-}
diff --git a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
deleted file mode 100644
index 0ae480d..0000000
--- a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=c
-; Check that uadd and sadd with overflow are handled by C Backend.
-
-%0 = type { i32, i1 }        ; type %0
-
-define i1 @func1(i32 zeroext %v1, i32 zeroext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-define i1 @func2(i32 signext %v1, i32 signext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
-
-declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind
-
diff --git a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
deleted file mode 100644
index 054a3ca..0000000
--- a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
-; PR2407
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-define void @foo() {
-  %newcw = alloca i16             ; <i16*> [#uses=2]
-  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"( i16*
-%newcw ) nounwind 
-  ret void
-}
diff --git a/test/CodeGen/CBackend/X86/lit.local.cfg b/test/CodeGen/CBackend/X86/lit.local.cfg
deleted file mode 100644
index 037d8c3..0000000
--- a/test/CodeGen/CBackend/X86/lit.local.cfg
+++ /dev/null
@@ -1,13 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
-if not 'CBackend' in targets or not 'X86' in targets:
-    config.unsupported = True
-
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
deleted file mode 100644
index 7dec3d9..0000000
--- a/test/CodeGen/CBackend/fneg.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @func() nounwind {
-  entry:
-  %0 = fsub double -0.0, undef
-  ret void
-}
diff --git a/test/CodeGen/CBackend/lit.local.cfg b/test/CodeGen/CBackend/lit.local.cfg
deleted file mode 100644
index 0dce170..0000000
--- a/test/CodeGen/CBackend/lit.local.cfg
+++ /dev/null
@@ -1,13 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
-if not 'CBackend' in targets:
-    config.unsupported = True
-
diff --git a/test/CodeGen/CBackend/pr2408.ll b/test/CodeGen/CBackend/pr2408.ll
deleted file mode 100644
index bf8477b..0000000
--- a/test/CodeGen/CBackend/pr2408.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
-; PR2408
-
-define i32 @a(i32 %a) {
-entry:
-        %shr = ashr i32 %a, 0           ; <i32> [#uses=1]
-        %shr2 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %mul = mul i32 %shr, %shr2              ; <i32> [#uses=1]
-        %shr4 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %div = sdiv i32 %mul, %shr4             ; <i32> [#uses=1]
-        ret i32 %div
-}
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
deleted file mode 100644
index b7b7677..0000000
--- a/test/CodeGen/CBackend/vectors.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=c
-@.str15 = external global [2 x i8]
-
-define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
-  %c = insertelement <4 x i32> %a, i32 1, i32 %b
-  
-  ret <4 x i32> %c
-}
-
-define i32 @test2(<4 x i32> %a, i32 %b) {
-  %c = extractelement <4 x i32> %a, i32 1
-  
-  ret i32 %c
-}
-
-define <4 x float> @test3(<4 x float> %Y) {
-	%Z = fadd <4 x float> %Y, %Y
-	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
-	ret <4 x float> %X
-}
-
-define void @test4() {
-	%x = alloca <4 x float>
-	%tmp3.i16 = getelementptr <4 x float>* %x, i32 0, i32 0
-	store float 1.0, float* %tmp3.i16
-	ret void
-}
-
-define i32* @test5({i32, i32} * %P) {
-	%x = getelementptr {i32, i32} * %P, i32 0, i32 1
-	ret i32* %x
-}
-
-define i8* @test6() {
-  ret i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0) 
-}
-
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
index 96596d8..4d4b4a4 100644
--- a/test/CodeGen/CPP/lit.local.cfg
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'CppBackend' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
index 58e3190..3542231 100644
--- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
+++ b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=cellspu -o - | grep brnz
+; RUN: llc < %s -march=cellspu -o - | grep brz
 ; PR3274
 
 target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/CellSPU/lit.local.cfg
index 6ae0972..ea00867 100644
--- a/test/CodeGen/CellSPU/lit.local.cfg
+++ b/test/CodeGen/CellSPU/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'CellSPU' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Generic/pr12507.ll b/test/CodeGen/Generic/pr12507.ll
new file mode 100644
index 0000000..c793358
--- /dev/null
+++ b/test/CodeGen/Generic/pr12507.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+@c = external global i32, align 4
+
+define void @foo(i160 %x) {
+entry:
+  %cmp.i = icmp ne i160 %x, 340282366920938463463374607431768211456
+  %conv.i = zext i1 %cmp.i to i32
+  %tobool.i = icmp eq i32 %conv.i, 0
+  br i1 %tobool.i, label %if.then.i, label %fn1.exit
+
+if.then.i:
+  store i32 0, i32* @c, align 4
+  br label %fn1.exit
+
+fn1.exit:
+  ret void
+}
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index 63052c1..77636eb 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -185,3 +185,11 @@ define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {
         ret i32 %reg820
 }
 
+; Test case for scalarising a 1 element vselect
+;
+define <1 x i32> @checkScalariseVSELECT(<1 x i32> %a, <1 x i32> %b) {
+        %cond = icmp uge <1 x i32> %a, %b
+        %s = select <1 x i1> %cond, <1 x i32> %a, <1 x i32> %b
+        ret <1 x i32> %s
+}
+
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index e9ac8b6..69002e0 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = #7
 ; CHECK: memw(r29 + #0) = r[[T0]]
 ; CHECK: r0 = #1
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 7219985..36abd74 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
index c3b6f37..04c2ec1 100644
--- a/test/CodeGen/Hexagon/double.ll
+++ b/test/CodeGen/Hexagon/double.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_adddf3
 ; CHECK: __hexagon_subdf3
 
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
index bec9f58..51acf2e 100644
--- a/test/CodeGen/Hexagon/float.ll
+++ b/test/CodeGen/Hexagon/float.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: __hexagon_addsf3
 ; CHECK: __hexagon_subsf3
 
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
index dc87c73..c0a9fda 100644
--- a/test/CodeGen/Hexagon/frame.ll
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
index ea12f68..24324b2 100644
--- a/test/CodeGen/Hexagon/lit.local.cfg
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'Hexagon' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
index d5c5ae3..afd6fc6 100644
--- a/test/CodeGen/Hexagon/mpy.ll
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: += mpyi
 
 define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
index 1105096..c63a3ba 100644
--- a/test/CodeGen/Hexagon/static.ll
+++ b/test/CodeGen/Hexagon/static.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
 
 @num = external global i32
 @acc = external global i32
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index cc409db..2c962d0 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
 
 %struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
index af099cd..69de4f6 100644
--- a/test/CodeGen/Hexagon/struct_args_large.ll
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
 ; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
 ; CHECK: memw(r29 + #0) = r[[T1]]
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
index 01d2041..788e474 100644
--- a/test/CodeGen/Hexagon/vaddh.ll
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
index e43df89..e236200 100644
--- a/test/CodeGen/MBlaze/lit.local.cfg
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'MBlaze' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
index b9b654d..972732e 100644
--- a/test/CodeGen/MSP430/lit.local.cfg
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'MSP430' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index b8d6826..2b2ee0f 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -1,10 +1,23 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
-target triple = "mips-unknown-linux"
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 define float @h() nounwind readnone {
 entry:
-; CHECK: lw $2, %got($CPI0_0)($gp)
-; CHECK: lwc1 $f0, %lo($CPI0_0)($2)
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($CPI0_0)
+; PIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; STATIC-O32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N32: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-N32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; STATIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
   ret float 0x400B333340000000
 }
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 785a416..aaf6767 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -13,9 +13,13 @@ entry:
 ; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0)
 ; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
 ; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: addu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-O32: jr  $[[R1]]
 ; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
 ; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
 ; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
+; PIC-N64: daddu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-N64: jr  $[[R1]]
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index 8f0bdf2..bc5bcc3 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -26,9 +26,9 @@ return:                                           ; preds = %if.else, %if.end6
 
 define void @f1(float %f) nounwind {
 entry:
-; CHECK: bc1t $BB1_2
+; CHECK: bc1f $BB1_1
 ; CHECK: nop
-; CHECK: # BB#1:      
+; CHECK: # BB#2:
   %cmp = fcmp une float %f, 0.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index 6de6b77..7de7fa6 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 @reg = common global i8* null, align 4
 
@@ -8,14 +12,30 @@ entry:
   ret i8* %x
 }
 
-; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
-; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
-; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp[[T0:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T0]])
-; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp[[T1:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T1]])
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
+; PIC-O32: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
+; STATIC-O32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-O32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N32: lw  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-N32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N64: ld  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N64: ld  $[[R2:[0-9]+]], %got_page($tmp[[T2:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]])
+; STATIC-N64: ld  $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]])
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/cprestore.ll b/test/CodeGen/Mips/cprestore.ll
index a275c8b..57d022f 100644
--- a/test/CodeGen/Mips/cprestore.ll
+++ b/test/CodeGen/Mips/cprestore.ll
@@ -1,7 +1,9 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s
 
 ; CHECK: .set macro
+; CHECK: .set at
 ; CHECK-NEXT: .cprestore
+; CHECK: .set noat
 ; CHECK-NEXT: .set nomacro
 
 %struct.S = type { [16384 x i32] }
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index c3facdb..2e2f9a4 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -26,7 +26,7 @@ entry:
 lpad:                                             ; preds = %entry
 ; CHECK-EL:  # %lpad
 ; CHECK-EL:  lw  $gp
-; CHECK-EL:  beq $5
+; CHECK-EL:  bne $5
 
   %exn.val = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
            catch i8* bitcast (i8** @_ZTId to i8*)
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
new file mode 100644
index 0000000..b296ab3
--- /dev/null
+++ b/test/CodeGen/Mips/fabs.ll
@@ -0,0 +1,52 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+
+define float @foo0(float %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f0
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
+; NO-NAN: abs.s
+
+  %call = tail call float @fabsf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @fabsf(float) nounwind readnone
+
+define double @foo1(double %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f1
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu  $[[T0:[0-9]+]], $zero, 1
+; 64: dsll    $[[T1:[0-9]+]], ${{[0-9]+}}, 63
+; 64: daddiu  $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and     $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dmtc1   $[[AND]], $f0
+
+; 64R2: dins  $[[INS:[0-9]+]], $zero, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+; NO-NAN: abs.d
+
+  %call = tail call double @fabs(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @fabs(double) nounwind readnone
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
new file mode 100644
index 0000000..b36473d
--- /dev/null
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -0,0 +1,50 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+
+declare double @copysign(double, double) nounwind readnone
+
+declare float @copysignf(float, float) nounwind readnone
+
+define float @func2(float %d, double %f) nounwind readnone {
+entry:
+; 64: func2
+; 64: lui  $[[T0:[0-9]+]], 32767
+; 64: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 64: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 63
+; 64: sll  $[[SLL:[0-9]+]], ${{[0-9]+}}, 31
+; 64: or   $[[OR:[0-9]+]], $[[AND0]], $[[SLL]]
+; 64: mtc1 $[[OR]], $f0
+
+; 64R2: dext ${{[0-9]+}}, ${{[0-9]+}}, 63, 1
+; 64R2: ins  $[[INS:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 64R2: mtc1 $[[INS]], $f0
+
+  %add = fadd float %d, 1.000000e+00
+  %conv = fptrunc double %f to float
+  %call = tail call float @copysignf(float %add, float %conv) nounwind readnone
+  ret float %call
+}
+
+define double @func3(double %d, float %f) nounwind readnone {
+entry:
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[T1:[0-9]+]], $[[T0]], 63
+; 64: daddiu $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: srl    ${{[0-9]+}}, ${{[0-9]+}}, 31
+; 64: dsll   $[[DSLL:[0-9]+]], ${{[0-9]+}}, 63
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[DSLL]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: ext   ${{[0-9]+}}, ${{[0-9]+}}, 31, 1
+; 64R2: dins  $[[INS:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+  %add = fadd double %d, 1.000000e+00
+  %conv = fpext float %f to double
+  %call = tail call double @copysign(double %add, double %conv) nounwind readnone
+  ret double %call
+}
+
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index e494fe2..1c57eca 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,40 +1,35 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=MIPS32-EL
-; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=MIPS32-EB
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=MIPS64
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
-; MIPS32-EL: func0:
-; MIPS32-EL: mfc1 $[[HI0:[0-9]+]], $f15
-; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
-; MIPS32-EL: and $[[AND1:[0-9]+]], $[[HI0]], $[[MSK1]]
-; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
-; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; MIPS32-EL: mfc1 $[[HI1:[0-9]+]], $f13
-; MIPS32-EL: and $[[AND0:[0-9]+]], $[[HI1]], $[[MSK0]]
-; MIPS32-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; MIPS32-EL: mfc1 $[[LO0:[0-9]+]], $f12
-; MIPS32-EL: mtc1 $[[LO0]], $f0
-; MIPS32-EL: mtc1 $[[OR]], $f1
 ;
-; MIPS32-EB: mfc1 $[[HI1:[0-9]+]], $f14
-; MIPS32-EB: lui $[[MSK1:[0-9]+]], 32768
-; MIPS32-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; MIPS32-EB: lui $[[T0:[0-9]+]], 32767
-; MIPS32-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; MIPS32-EB: mfc1 $[[HI0:[0-9]+]], $f12
-; MIPS32-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; MIPS32-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; MIPS32-EB: mfc1 $[[LO0:[0-9]+]], $f13
-; MIPS32-EB: mtc1 $[[OR]], $f0
-; MIPS32-EB: mtc1 $[[LO0]], $f1
-
-; MIPS64: dmfc1 $[[R0:[0-9]+]], $f13
-; MIPS64: and $[[R1:[0-9]+]], $[[R0]], ${{[0-9]+}}
-; MIPS64: dmfc1 $[[R2:[0-9]+]], $f12
-; MIPS64: and $[[R3:[0-9]+]], $[[R2]], ${{[0-9]+}}
-; MIPS64: or  $[[R4:[0-9]+]], $[[R3]], $[[R1]]
-; MIPS64: dmtc1 $[[R4]], $f0
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f1
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[MSK1:[0-9]+]], $[[T0]], 63
+; 64: and    $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 64: daddiu $[[MSK0:[0-9]+]], $[[MSK1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: dext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dins  $[[INS:[0-9]+]], $[[EXT]], 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
   %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
   ret double %call
 }
@@ -43,18 +38,22 @@ declare double @copysign(double, double) nounwind readnone
 
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
-; MIPS32-EL: func1:
-; MIPS32-EL: mfc1 $[[ARG1:[0-9]+]], $f14
-; MIPS32-EL: lui $[[MSK1:[0-9]+]], 32768
-; MIPS32-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
-; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
-; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; MIPS32-EL: mfc1 $[[ARG0:[0-9]+]], $f12
-; MIPS32-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
-; MIPS32-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-; MIPS32-EL: mtc1 $[[T4]], $f0
+
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f0
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
   %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
   ret float %call
 }
 
 declare float @copysignf(float, float) nounwind readnone
+
diff --git a/test/CodeGen/Mips/fneg.ll b/test/CodeGen/Mips/fneg.ll
new file mode 100644
index 0000000..b322abd
--- /dev/null
+++ b/test/CodeGen/Mips/fneg.ll
@@ -0,0 +1,17 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s 
+
+define float @foo0(i32 %a, float %d) nounwind readnone {
+entry:
+; CHECK-NOT: neg.s
+  %sub = fsub float -0.000000e+00, %d
+  ret float %sub
+}
+
+define double @foo1(i32 %a, double %d) nounwind readnone {
+entry:
+; CHECK:     foo1
+; CHECK-NOT: neg.d
+; CHECK:     jr
+  %sub = fsub double -0.000000e+00, %d
+  ret double %sub
+}
diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll
index 0a6478b..a136557 100644
--- a/test/CodeGen/Mips/fpbr.ll
+++ b/test/CodeGen/Mips/fpbr.ll
@@ -45,7 +45,7 @@ if.end:                                           ; preds = %if.else, %if.then
 define void @func2(float %f2, float %f3) nounwind {
 entry:
 ; CHECK: c.ole.s
-; CHECK: bc1f
+; CHECK: bc1t
   %cmp = fcmp ugt float %f2, %f3
   br i1 %cmp, label %if.else, label %if.then
 
@@ -102,7 +102,7 @@ if.end:                                           ; preds = %if.else, %if.then
 define void @func5(double %f2, double %f3) nounwind {
 entry:
 ; CHECK: c.ole.d
-; CHECK: bc1f
+; CHECK: bc1t
   %cmp = fcmp ugt double %f2, %f3
   br i1 %cmp, label %if.else, label %if.then
 
diff --git a/test/CodeGen/Mips/frem.ll b/test/CodeGen/Mips/frem.ll
new file mode 100644
index 0000000..be222b2
--- /dev/null
+++ b/test/CodeGen/Mips/frem.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=mipsel 
+
+define float @fmods(float %x, float %y) {
+entry:
+  %r = frem float %x, %y
+  ret float %r
+}
+
+define double @fmodd(double %x, double %y) {
+entry:
+  %r = frem double %x, %y
+  ret double %r
+}
diff --git a/test/CodeGen/Mips/global-address.ll b/test/CodeGen/Mips/global-address.ll
new file mode 100644
index 0000000..0d49a74
--- /dev/null
+++ b/test/CodeGen/Mips/global-address.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static  -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+
+@s1 = internal unnamed_addr global i32 8, align 4
+@g1 = external global i32
+
+define void @foo() nounwind {
+entry:
+; PIC-O32: lw  $[[R0:[0-9]+]], %got(s1)
+; PIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R0]])
+; PIC-O32: lw  ${{[0-9]+}}, %got(g1)
+; STATIC-O32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-O32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N32: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N32: lw  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-N32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N64: ld  $[[R1:[0-9]+]], %got_page(s1)
+; STATIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R1]])
+; STATIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+
+  %0 = load i32* @s1, align 4
+  tail call void @foo1(i32 %0) nounwind
+  %1 = load i32* @g1, align 4
+  store i32 %1, i32* @s1, align 4
+  %add = add nsw i32 %1, 2
+  store i32 %add, i32* @g1, align 4
+  ret void
+}
+
+declare void @foo1(i32)
+
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index e1cd73a..0587d32 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index abeff09..24647b2 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -12,7 +12,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f0, align 4
   ret float %0
@@ -24,7 +24,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d0, align 8 
   ret double %0
@@ -36,7 +36,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f1, align 4 
   store float %0, float* @f0, align 4 
@@ -49,7 +49,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d1, align 8 
   store double %0, double* @d0, align 8 
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index af3a2f8..0e310a8 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -16,7 +16,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: lb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: lb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @c, align 4
   %conv = sext i8 %0 to i64
@@ -29,7 +29,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: lh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: lh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @s, align 4
   %conv = sext i16 %0 to i64
@@ -42,7 +42,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: lw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: lw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @i, align 4
   %conv = sext i32 %0 to i64
@@ -55,7 +55,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: ld ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: ld ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l, align 8
   ret i64 %0
@@ -67,7 +67,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N64: lbu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(uc)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N32: lbu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @uc, align 4
   %conv = zext i8 %0 to i64
@@ -80,7 +80,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N64: lhu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(us)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N32: lhu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @us, align 4
   %conv = zext i16 %0 to i64
@@ -93,7 +93,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N64: lwu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(ui)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N32: lwu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @ui, align 4
   %conv = zext i32 %0 to i64
@@ -106,7 +106,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: sb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: sb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i8
@@ -120,7 +120,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: sh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: sh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i16
@@ -134,7 +134,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: sw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: sw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i32
@@ -148,7 +148,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: sd ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: sd ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   store i64 %0, i64* @l, align 8
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/PTX/lit.local.cfg
index 7399089..e748f7f 100644
--- a/test/CodeGen/PTX/lit.local.cfg
+++ b/test/CodeGen/PTX/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'PTX' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
new file mode 100644
index 0000000..932ad7a
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+  %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+  %a.real = load double* %a.realp
+  %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+  %a.imag = load double* %a.imagp
+  %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+  %b.real = load double* %b.realp
+  %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+  %b.imag = load double* %b.imagp
+  %mul.rl = fmul double %a.real, %b.real
+  %mul.rr = fmul double %a.imag, %b.imag
+  %mul.r = fsub double %mul.rl, %mul.rr
+  %mul.il = fmul double %a.imag, %b.real
+  %mul.ir = fmul double %a.real, %b.imag
+  %mul.i = fadd double %mul.il, %mul.ir
+  %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+  %c.real = load double* %c.realp
+  %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+  %c.imag = load double* %c.imagp
+  %add.r = fadd double %mul.r, %c.real
+  %add.i = fadd double %mul.i, %c.imag
+  %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+  %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+  store double %add.r, double* %real
+  store double %add.i, double* %imag
+  ret void
+; CHECK: fmadd
+}
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 5c7f267..4019eca 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/PowerPC/ppc-vaarg-agg.ll b/test/CodeGen/PowerPC/ppc-vaarg-agg.ll
new file mode 100644
index 0000000..d5ea044
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-vaarg-agg.ll
@@ -0,0 +1,46 @@
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-montavista-linux-gnuspe"
+; RUN: llc < %s -march=ppc32 | FileCheck %s
+
+%struct.__va_list_tag.0.9.18.23.32.41.48.55.62.67.72.77.82.87.90.93.96.101.105 = type { i8, i8, i16, i8*, i8* }
+
+define fastcc void @test1(%struct.__va_list_tag.0.9.18.23.32.41.48.55.62.67.72.77.82.87.90.93.96.101.105* %args) {
+entry:
+  br i1 undef, label %repeat, label %maxlen_reached
+
+repeat:                                           ; preds = %entry
+  switch i32 undef, label %sw.bb323 [
+    i32 77, label %sw.bb72
+    i32 111, label %sw.bb309
+    i32 80, label %sw.bb313
+    i32 117, label %sw.bb326
+    i32 88, label %sw.bb321
+  ]
+
+sw.bb72:                                          ; preds = %repeat
+  unreachable
+
+sw.bb309:                                         ; preds = %repeat
+  unreachable
+
+sw.bb313:                                         ; preds = %repeat
+  unreachable
+
+sw.bb321:                                         ; preds = %repeat
+  unreachable
+
+sw.bb323:                                         ; preds = %repeat
+  %0 = va_arg %struct.__va_list_tag.0.9.18.23.32.41.48.55.62.67.72.77.82.87.90.93.96.101.105* %args, i32
+  unreachable
+
+sw.bb326:                                         ; preds = %repeat
+  unreachable
+
+maxlen_reached:                                   ; preds = %entry
+  ret void
+}
+
+; If the SD nodes are not cleaup up correctly, then this can fail to compile
+; with an error like:  Cannot select: ch = setlt [ID=6]
+; CHECK: @test1
+
diff --git a/test/CodeGen/PowerPC/ppc64-prefetch.ll b/test/CodeGen/PowerPC/ppc64-prefetch.ll
new file mode 100644
index 0000000..b2f3709
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-prefetch.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define void @test1(i8* %a, ...) nounwind {
+entry:
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+; CHECK: @test1
+; CHECK: dcbt
+
diff --git a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
new file mode 100644
index 0000000..5a63b01
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
@@ -0,0 +1,20 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define i32 @intvaarg(i32 %a, ...) nounwind {
+entry:
+  %va = alloca i8*, align 8
+  %va1 = bitcast i8** %va to i8*
+  call void @llvm.va_start(i8* %va1)
+  %0 = va_arg i8** %va, i32
+  %sub = sub nsw i32 %a, %0
+  ret i32 %sub
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+; CHECK: @intvaarg
+; Make sure that the va pointer is incremented by 8 (not 4).
+; CHECK: addi{{.*}}, 8
+
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
index ba81a16..786fee9 100644
--- a/test/CodeGen/SPARC/lit.local.cfg
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
index dd6c50d..cb77b09 100644
--- a/test/CodeGen/Thumb/lit.local.cfg
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index d8b51ec..cb4d080 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -47,3 +47,32 @@ bb2:                                              ; preds = %bb
   tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
   ret i32 0
 }
+
+; PR12389
+; Make sure the DPair register class can spill.
+define void @pr12389(i8* %p) nounwind ssp {
+entry:
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %p, i32 1)
+  tail call void asm sideeffect "", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %p, <4 x float> %vld1, i32 1)
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+; <rdar://problem/11101911>
+; When an strd is expanded into two str instructions, make sure the first str
+; doesn't kill the base register. This can happen if the base register is the
+; same as the data register.
+%class = type { i8*, %class*, i32 }
+define void @f11101911(%class* %this, i32 %num) ssp align 2 {
+entry:
+  %p1 = getelementptr inbounds %class* %this, i32 0, i32 1
+  %p2 = getelementptr inbounds %class* %this, i32 0, i32 2
+  tail call void asm sideeffect "", "~{r1},~{r3},~{r5},~{r11},~{r13}"() nounwind
+  store %class* %this, %class** %p1, align 4
+  store i32 %num, i32* %p2, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
index dd6c50d..cb77b09 100644
--- a/test/CodeGen/Thumb2/lit.local.cfg
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 9ff114e..9aaa821 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -3,11 +3,6 @@
 
 ; This now reduces to a single induction variable.
 
-; TODO: It still gets a GPR shuffle at the end of the loop
-; This is because something in instruction selection has decided
-; that comparing the pre-incremented value with zero is better
-; than comparing the post-incremented value with -4.
-
 @G = external global i32                          ; <i32*> [#uses=2]
 @array = external global i32*                     ; <i32**> [#uses=1]
 
@@ -20,9 +15,9 @@ entry:
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
-; CHECK: cmp [[R2:r[0-9]+]], #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
-; CHECK: mov [[R2]], [[REGISTER]]
+; CHECK: subs [[R2:r[0-9]+]], #1
+; CHECK: cmp.w [[R2]], #-1
+; CHECK: bne LBB0_1
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
index 27d8e8f..f1c097c 100644
--- a/test/CodeGen/Thumb2/thumb2-branch.ll
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -58,8 +58,8 @@ define i32 @f4(i32 %a, i32 %b, i32* %v) {
 entry:
 ; CHECK: f4:
 ; CHECK: blo LBB
-        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
-        br i1 %tmp, label %return, label %cond_true
+        %tmp = icmp uge i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
         fence seq_cst
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index f577f79..5aa9a73 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -29,13 +29,13 @@ declare i32 @bar(...)
 define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
 entry:
 ; CHECK: CountTree:
-; CHECK: it eq
-; CHECK: cmpeq
-; CHECK: bne
-; CHECK: cmp
 ; CHECK: itt eq
 ; CHECK: moveq
 ; CHECK: popeq
+; CHECK: bne
+; CHECK: cmp
+; CHECK: it eq
+; CHECK: cmpeq
 	br label %tailrecurse
 
 tailrecurse:		; preds = %bb, %entry
@@ -83,7 +83,7 @@ define fastcc void @t2() nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: cmp r0, #0
-; CHECK: beq
+; CHECK: %growMapping.exit
 	br i1 undef, label %bb.i.i3, label %growMapping.exit
 
 bb.i.i3:		; preds = %entry
diff --git a/test/CodeGen/Thumb2/thumb2-jtb.ll b/test/CodeGen/Thumb2/thumb2-jtb.ll
index f5a56e5..7e1655f 100644
--- a/test/CodeGen/Thumb2/thumb2-jtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -3,11 +3,19 @@
 ; Do not use tbb / tbh if any destination is before the jumptable.
 ; rdar://7102917
 
-define i16 @main__getopt_internal_2E_exit_2E_ce(i32) nounwind {
+define i16 @main__getopt_internal_2E_exit_2E_ce(i32, i1 %b) nounwind {
+entry:
+  br i1 %b, label %codeRepl127.exitStub, label %newFuncRoot
+
 newFuncRoot:
 	br label %_getopt_internal.exit.ce
 
 codeRepl127.exitStub:		; preds = %_getopt_internal.exit.ce
+  ; Add an explicit edge back to before the jump table to ensure this block
+  ; is placed first.
+  br i1 %b, label %newFuncRoot, label %codeRepl127.exitStub.exit
+
+codeRepl127.exitStub.exit:
 	ret i16 0
 
 parse_options.exit.loopexit.exitStub:		; preds = %_getopt_internal.exit.ce
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index 88e8b4a..d583e59 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -6,8 +6,8 @@
 define i32 @test(i32 %argc, i8** %argv) nounwind {
 entry:
 ; CHECK: cmpl	$2
-; CHECK-NEXT: je
-; CHECK-NEXT: %entry
+; CHECK-NEXT: jne
+; CHECK-NEXT: %bb2
 
 	switch i32 %argc, label %UnifiedReturnBlock [
 		 i32 1, label %bb
diff --git a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
index a708224..4160b20 100644
--- a/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
+++ b/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jnp
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jp
 ; rdar://5902801
 
 declare void @test2()
diff --git a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
deleted file mode 100644
index 628790c..0000000
--- a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; RUN: llc -O2 -mtriple=i386-apple-darwin <%s | FileCheck %s
-; Use DW_FORM_addr for DW_AT_entry_pc.
-; Radar 8094785
-
-; CHECK:	.byte	17                      ## DW_TAG_compile_unit
-; CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
-; CHECK-NEXT:	.byte	37                      ## DW_AT_producer
-; CHECK-NEXT:	.byte	14                      ## DW_FORM_strp
-; CHECK-NEXT:	.byte	19                      ## DW_AT_language
-; CHECK-NEXT:	.byte	5                       ## DW_FORM_data2
-; CHECK-NEXT:	.byte	3                       ## DW_AT_name
-; CHECK-NEXT:	.byte	14                      ## DW_FORM_strp
-; CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
-; CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
-; CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
-; CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
-; CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
-; CHECK-NEXT:	.byte	14                      ## DW_FORM_strp
-; CHECK-NEXT:	.ascii	"\341\177"              ## DW_AT_APPLE_optimized
-
-%struct.a = type { i32, %struct.a* }
-
-@ret = common global i32 0                        ; <i32*> [#uses=2]
-
-define void @foo(i32 %x) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !21), !dbg !28
-  store i32 %x, i32* @ret, align 4, !dbg !29
-  ret void, !dbg !31
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-define i8* @bar(%struct.a* %b) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !22), !dbg !32
-  %0 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !33 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 8, !dbg !33            ; <i32> [#uses=1]
-  tail call void @foo(i32 %1) nounwind noinline ssp, !dbg !33
-  %2 = bitcast %struct.a* %b to i8*, !dbg !35     ; <i8*> [#uses=1]
-  ret i8* %2, !dbg !35
-}
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-entry:
-  %e = alloca %struct.a, align 8                  ; <%struct.a*> [#uses=4]
-  call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !23), !dbg !36
-  call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !24), !dbg !36
-  call void @llvm.dbg.declare(metadata !{%struct.a* %e}, metadata !25), !dbg !37
-  %0 = getelementptr inbounds %struct.a* %e, i64 0, i32 0, !dbg !38 ; <i32*> [#uses=1]
-  store i32 4, i32* %0, align 8, !dbg !38
-  %1 = getelementptr inbounds %struct.a* %e, i64 0, i32 1, !dbg !39 ; <%struct.a**> [#uses=1]
-  store %struct.a* %e, %struct.a** %1, align 8, !dbg !39
-  %2 = call i8* @bar(%struct.a* %e) nounwind noinline ssp, !dbg !40 ; <i8*> [#uses=0]
-  %3 = load i32* @ret, align 4, !dbg !41          ; <i32> [#uses=1]
-  ret i32 %3, !dbg !41
-}
-
-!llvm.dbg.sp = !{!0, !6, !15}
-!llvm.dbg.lv.foo = !{!21}
-!llvm.dbg.lv.bar = !{!22}
-!llvm.dbg.lv.main = !{!23, !24, !25}
-!llvm.dbg.gv = !{!27}
-
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 34, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 38, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 23, i64 128, i64 64, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_structure_type ]
-!12 = metadata !{metadata !13, metadata !14}
-!13 = metadata !{i32 524301, metadata !11, metadata !"c", metadata !1, i32 24, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !11, metadata !"d", metadata !1, i32 25, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
-!15 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 43, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!17 = metadata !{metadata !5, metadata !5, metadata !18}
-!18 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_pointer_type ]
-!19 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 524545, metadata !0, metadata !"x", metadata !1, i32 33, metadata !5} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 524545, metadata !6, metadata !"b", metadata !1, i32 38, metadata !10} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 524545, metadata !15, metadata !"argc", metadata !1, i32 43, metadata !5} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 524545, metadata !15, metadata !"argv", metadata !1, i32 43, metadata !18} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 524544, metadata !26, metadata !"e", metadata !1, i32 44, metadata !11} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 524299, metadata !15, i32 43, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 28, metadata !5, i1 false, i1 true, i32* @ret} ; [ DW_TAG_variable ]
-!28 = metadata !{i32 33, i32 0, metadata !0, null}
-!29 = metadata !{i32 35, i32 0, metadata !30, null}
-!30 = metadata !{i32 524299, metadata !0, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{i32 36, i32 0, metadata !30, null}
-!32 = metadata !{i32 38, i32 0, metadata !6, null}
-!33 = metadata !{i32 39, i32 0, metadata !34, null}
-!34 = metadata !{i32 524299, metadata !6, i32 38, i32 0} ; [ DW_TAG_lexical_block ]
-!35 = metadata !{i32 40, i32 0, metadata !34, null}
-!36 = metadata !{i32 43, i32 0, metadata !15, null}
-!37 = metadata !{i32 44, i32 0, metadata !26, null}
-!38 = metadata !{i32 45, i32 0, metadata !26, null}
-!39 = metadata !{i32 46, i32 0, metadata !26, null}
-!40 = metadata !{i32 48, i32 0, metadata !26, null}
-!41 = metadata !{i32 49, i32 0, metadata !26, null}
diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 1919d2e..12a8274 100644
--- a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -17,7 +17,7 @@ entry:
 
 ; CHECK: andl	$150
 ; CHECK-NEXT: testb
-; CHECK-NEXT: jg
+; CHECK-NEXT: jle
 
 entry.if.end_crit_edge:                           ; preds = %entry
   %tmp4.pre = load i32* @g_38                     ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
index a1074b6..6d54c7e 100644
--- a/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
+++ b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+cmov | FileCheck %s
 ; Both values were being zero extended.
 @u = external global i8
 @s = external global i8
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index 1068d1b..a5ec614 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -disable-code-place | FileCheck %s
 ;
 ; Test RegistersDefinedFromSameValue. We have multiple copies of the same vreg:
 ; while.body85.i:
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
index 016e02c..6e83f67 100644
--- a/test/CodeGen/X86/2011-10-27-tstore.ll
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -3,14 +3,14 @@
 target triple = "x86_64-unknown-linux-gnu"
 
 ;CHECK: ltstore
-;CHECK: pshufd
-;CHECK: pshufd
+;CHECK: movq
+;CHECK: movq
 ;CHECK: ret
-define void @ltstore() {
+define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
 entry:
-  %in = load <4 x i32>* undef
+  %in = load <4 x i32>* %pA
   %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-  store <2 x i32> %j, <2 x i32>* undef
+  store <2 x i32> %j, <2 x i32>* %pB
   ret void
 }
 
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
index 3013f16..557d49d 100644
--- a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -52,7 +52,8 @@ for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond
 
 for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
 ; CHECK: %for.body9.i
-; CHECK: movb %al, %ch
+; CHECK: movb
+; CHECK: shrdl
   %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
   %add10.i = add i32 %i6.02.i, %div.i
   %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
new file mode 100644
index 0000000..101ecca
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
+; RUN:   not grep {Number of machine instructions hoisted out of loops post regalloc}
+
+; rdar://11095580
+
+%struct.ref_s = type { %union.color_sample, i16, i16 }
+%union.color_sample = type { i64 }
+
+@table = external global [3891 x i64]
+
+declare i32 @foo()
+
+define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp {
+entry:
+  %call = tail call i32 @foo()
+  %tmp = ashr i32 %call, 31
+  %0 = and i32 %tmp, 1396
+  %index9 = add i32 %0, 2397
+  indirectbr i8* undef, [label %return, label %if.end]
+
+if.end:                                           ; preds = %entry
+  %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
+  %tmp6 = load i16* %size5, align 2
+  %tobool1 = icmp eq i16 %tmp6, 0
+  %1 = select i1 %tobool1, i32 1396, i32 -1910
+  %index10 = add i32 %index9, %1
+  indirectbr i8* undef, [label %return, label %while.body.lr.ph]
+
+while.body.lr.ph:                                 ; preds = %if.end
+  %refs = bitcast %struct.ref_s* %op to %struct.ref_s**
+  %tmp9 = load %struct.ref_s** %refs, align 8
+  %tmp4 = zext i16 %tmp6 to i64
+  %index13 = add i32 %index10, 1658
+  %2 = sext i32 %index13 to i64
+  %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
+  %blockaddress14 = load i64* %3, align 8
+  %4 = inttoptr i64 %blockaddress14 to i8*
+  indirectbr i8* %4, [label %while.body]
+
+while.body:                                       ; preds = %while.body, %while.body.lr.ph
+  %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
+  %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+  %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
+  store i16 32, i16* %type_attrs, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond5 = icmp eq i64 %indvar.next, %tmp4
+  %tmp7 = select i1 %exitcond5, i32 1648, i32 0
+  %index15 = add i32 %index7, %tmp7
+  %tmp8 = select i1 %exitcond5, i64 13, i64 0
+  %5 = sext i32 %index15 to i64
+  %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
+  %blockaddress16 = load i64* %6, align 8
+  %7 = inttoptr i64 %blockaddress16 to i8*
+  indirectbr i8* %7, [label %return, label %while.body]
+
+return:                                           ; preds = %while.body, %if.end, %entry
+  %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
new file mode 100644
index 0000000..2d90165
--- /dev/null
+++ b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O1 -verify-coalescing < %s
+; PR12495
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @func(i8* nocapture) nounwind uwtable ssp align 2 {
+  br i1 undef, label %4, label %2
+
+; <label>:2                                       ; preds = %1                  
+  %3 = tail call double @foo() nounwind
+  br label %4
+
+; <label>:4                                       ; preds = %2, %1              
+  %5 = phi double [ %3, %2 ], [ 0.000000e+00, %1 ]
+  %6 = fsub double %5, undef
+  %7 = fcmp olt double %6, 0.000000e+00
+  %8 = select i1 %7, double 0.000000e+00, double %6
+  %9 = fcmp olt double undef, 0.000000e+00
+  %10 = fcmp olt double %8, undef
+  %11 = or i1 %9, %10
+  br i1 %11, label %12, label %14
+
+; <label>:12                                      ; preds = %4                  
+  %13 = tail call double @fmod(double %8, double 0.000000e+00) nounwind
+  unreachable
+
+; <label>:14                                      ; preds = %4                  
+  ret void
+}
+
+declare double @foo()
+
+declare double @fmod(double, double)
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
index b05ed3c..a8ad0f1 100644
--- a/test/CodeGen/X86/GC/lit.local.cfg
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll
new file mode 100644
index 0000000..100817a
--- /dev/null
+++ b/test/CodeGen/X86/SwizzleShuff.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; Check that we perform a scalar XOR on i32.
+
+; CHECK: pull_bitcast
+; CHECK: xorl
+; CHECK: ret
+define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
+  %A = load <4 x i8>* %pA
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret void
+}
+
+; CHECK: multi_use_swizzle
+; CHECK: mov
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: xor
+; CHECK-NEXT: ret
+define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
+  %S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
+  %R = xor <4 x i32> %S1, %S2
+  ret <4 x i32> %R
+}
+
+; CHECK: pull_bitcast2
+; CHECK: xorl
+; CHECK: ret
+define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
+  %A = load <4 x i8>* %pA
+  store <4 x i8> %A, <4 x i8>* %pC
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret <4 x i8> %C
+}
+
+
+
+; CHECK: reverse_1
+; CHECK-NOT: shuf
+; CHECK: ret
+define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
+
+
+; CHECK: no_reverse_shuff
+; CHECK: shuf
+; CHECK: ret
+define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
index 2301dfc..4dd9a9e 100644
--- a/test/CodeGen/X86/atom-sched.ll
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -1,5 +1,9 @@
+; XFAIL: *
 ; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
 ; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
+;
+; FIXME: Atom's scheduler is temporarily disabled.
+; XFAIL: *
 
 @a = common global i32 0, align 4
 @b = common global i32 0, align 4
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 972dab2..7c5abe2 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -13,6 +13,7 @@ entry:
 	%xort = alloca i32		; <i32*> [#uses=2]
 	%old = alloca i32		; <i32*> [#uses=18]
 	%temp = alloca i32		; <i32*> [#uses=2]
+	%temp64 = alloca i64
 	store i32 %argc, i32* %argc.addr
 	store i8** %argv, i8*** %argv.addr
 	store i32 0, i32* %val1
@@ -106,6 +107,25 @@ entry:
         ; CHECK: cmpxchgl
   %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic
 	store i32 %17, i32* %old
+        ; CHECK: movl	$1401, %[[R17mask:[a-z]*]]
+        ; CHECK: movl	[[R17atomic:.*]], %eax
+        ; CHECK: movl	%eax, %[[R17newval:[a-z]*]]
+        ; CHECK: andl	%[[R17mask]], %[[R17newval]]
+        ; CHECK: notl	%[[R17newval]]
+        ; CHECK: lock
+        ; CHECK: cmpxchgl	%[[R17newval]], [[R17atomic]]
+        ; CHECK: jne
+        ; CHECK: movl	%eax,
+  %18 = atomicrmw nand i32* %val2, i32 1401 monotonic
+  store i32 %18, i32* %old
+        ; CHECK: andl
+        ; CHECK: andl
+        ; CHECK: notl
+        ; CHECK: notl
+        ; CHECK: lock
+        ; CHECK: cmpxchg8b
+  %19 = atomicrmw nand i64* %temp64, i64 17361641481138401520 monotonic
+  store i64 %19, i64* %temp64
 	ret void
 }
 
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 616601a..b334932 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1078,33 +1078,33 @@ define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sbbl
-  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: seta
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sete
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 
 
 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 54f01e9..16c447b 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -147,3 +147,58 @@ entry:
   %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
   ret <8 x float> %shuffle.i
 }
+
+; PR12413
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+define <32 x i8> @shuf(<32 x i8> %inval1, <32 x i8> %inval2) {
+entry:
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0,
+i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
+22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32
+42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32
+62>
+ ret <32 x i8> %0
+}
+
+; CHECK: blend1
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2a
+; CHECK: vblendps
+; CHECK: ret
+define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %t
+}
+
+; CHECK: blend3
+; CHECK-NOT: vblendps
+; CHECK: ret
+define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend4
+; CHECK: vblendpd
+; CHECK: ret
+define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i64> %t
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 5bf9f4f..148ae73 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -47,7 +47,7 @@ entry:
 ;;;; 128-bit versions
 
 ; CHECK: vbroadcastss (%
-define <4 x float> @E(float* %ptr) nounwind uwtable readnone ssp {
+define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
 entry:
   %q = load float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -57,6 +57,19 @@ entry:
   ret <4 x float> %vecinit6.i
 }
 
+
+; CHECK: _e2
+; CHECK-NOT: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+    %vecinit.i = insertelement <4 x float> undef, float      0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+
 ; CHECK: vbroadcastss (%
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -71,7 +84,7 @@ entry:
 ; Unsupported vbroadcasts
 
 ; CHECK: _G
-; CHECK-NOT: vbroadcastsd (%
+; CHECK-NOT: broadcast (%
 ; CHECK: ret
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -82,7 +95,7 @@ entry:
 }
 
 ; CHECK: _H
-; CHECK-NOT: vbroadcastss
+; CHECK-NOT: broadcast
 ; CHECK: ret
 define <4 x i32> @H(<4 x i32> %a) {
   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -90,7 +103,7 @@ define <4 x i32> @H(<4 x i32> %a) {
 }
 
 ; CHECK: _I
-; CHECK-NOT: vbroadcastsd (%
+; CHECK-NOT: broadcast (%
 ; CHECK: ret
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 9707cd9..cb904b9 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,7 +45,8 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK: vpermilps
+; CHECK: palignr
+; CHECK: palignr
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 1fb41c0..3f27a02 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -800,22 +800,6 @@ define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
 
 
-define <4 x i64> @test_x86_avx2_permq(<4 x i64> %a0) {
-  ; CHECK: vpermq
-  %res = call <4 x i64> @llvm.x86.avx2.permq(<4 x i64> %a0, i8 7) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.permq(<4 x i64>, i8) nounwind readonly
-
-
-define <4 x double> @test_x86_avx2_permpd(<4 x double> %a0) {
-  ; CHECK: vpermpd
-  %res = call <4 x double> @llvm.x86.avx2.permpd(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
-  ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx2.permpd(<4 x double>, i8) nounwind readonly
-
-
 define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
   ; CHECK: vperm2i128
   %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index fbabb15..1a78414 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -150,3 +150,38 @@ entry:
   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
   ret <2 x double> %vecinit2.i
 }
+
+; CHECK: V111
+; CHECK: vpbroadcastd
+; CHECK: ret
+define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
+entry:
+  %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %g
+}
+
+; CHECK: _e2
+; CHECK: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+  %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+; CHECK: _e4
+; CHECK-NOT: broadcast
+; CHECK: ret
+define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+  %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+  ret <8 x i8> %vecinit7.i
+}
diff --git a/test/CodeGen/X86/avx2-vperm.ll b/test/CodeGen/X86/avx2-vperm.ll
new file mode 100755
index 0000000..d576d0e
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vperm.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @perm_cl_int_8x32(<8 x i32> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_8x32
+; CHECK: vpermd
+  %B = shufflevector <8 x i32> %A, <8 x i32> undef, <8 x i32> <i32 0, i32 7, i32 2, i32 1, i32 2, i32 7, i32 6, i32 0>
+  ret <8 x i32> %B
+}
+
+
+define <8 x float> @perm_cl_fp_8x32(<8 x float> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_8x32
+; CHECK: vpermps
+  %B = shufflevector <8 x float> %A, <8 x float> undef, <8 x i32> <i32 undef, i32 7, i32 2, i32 undef, i32 4, i32 undef, i32 1, i32 6>
+  ret <8 x float> %B
+}
+
+define <4 x i64> @perm_cl_int_4x64(<4 x i64> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_int_4x64
+; CHECK: vpermq
+  %B = shufflevector <4 x i64> %A, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x i64> %B
+}
+
+define <4 x double> @perm_cl_fp_4x64(<4 x double> %A) nounwind readnone {
+entry:
+; CHECK: perm_cl_fp_4x64
+; CHECK: vpermpd
+  %B = shufflevector <4 x double> %A, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  ret <4 x double> %B
+}
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index 167d522..fc7b638 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i686-linux -enable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=i686-linux < %s | FileCheck %s
 
 declare void @error(i32 %i, i32 %a, i32 %b)
 
@@ -76,11 +76,11 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
 ; Check that we sink cold loop blocks after the hot loop body.
 ; CHECK: test_loop_cold_blocks:
 ; CHECK: %entry
+; CHECK: %unlikely1
+; CHECK: %unlikely2
 ; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
-; CHECK: %unlikely1
-; CHECK: %unlikely2
 ; CHECK: %exit
 
 entry:
@@ -122,14 +122,14 @@ define i32 @test_loop_early_exits(i32 %i, i32* %a) {
 ; Check that we sink early exit blocks out of loop bodies.
 ; CHECK: test_loop_early_exits:
 ; CHECK: %entry
+; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
 ; CHECK: %body4
-; CHECK: %body1
+; CHECK: %exit
 ; CHECK: %bail1
 ; CHECK: %bail2
 ; CHECK: %bail3
-; CHECK: %exit
 
 entry:
   br label %body1
@@ -199,6 +199,36 @@ exit:
   ret i32 %base
 }
 
+define i32 @test_no_loop_rotate(i32 %i, i32* %a) {
+; Check that we don't try to rotate a loop which is already laid out with
+; fallthrough opportunities into the top and out of the bottom.
+; CHECK: test_no_loop_rotate:
+; CHECK: %entry
+; CHECK: %body0
+; CHECK: %body1
+; CHECK: %exit
+
+entry:
+  br label %body0
+
+body0:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %bailcond1 = icmp eq i32 %sum, 42
+  br i1 %bailcond1, label %exit, label %body1
+
+body1:
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body0
+
+exit:
+  ret i32 %base
+}
+
 define void @test_loop_rotate_reversed_blocks() {
 ; This test case (greatly reduced from an Olden bencmark) ensures that the loop
 ; rotate implementation doesn't assume that loops are laid out in a particular
@@ -348,7 +378,6 @@ define void @unnatural_cfg2() {
 ; CHECK: %entry
 ; CHECK: %loop.body1
 ; CHECK: %loop.body2
-; CHECK: %loop.header
 ; CHECK: %loop.body3
 ; CHECK: %loop.inner1.begin
 ; The end block is folded with %loop.body3...
@@ -356,6 +385,7 @@ define void @unnatural_cfg2() {
 ; CHECK: %loop.body4
 ; CHECK: %loop.inner2.begin
 ; The loop.inner2.end block is folded
+; CHECK: %loop.header
 ; CHECK: %bail
 
 entry:
@@ -928,3 +958,126 @@ entry:
 exit:
   ret void
 }
+
+define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
+; This test case comes from the heapsort benchmark, and exemplifies several
+; important aspects to block placement in the presence of loops:
+; 1) Loop rotation needs to *ensure* that the desired exiting edge can be
+;    a fallthrough.
+; 2) The exiting edge from the loop which is rotated to be laid out at the
+;    bottom of the loop needs to be exiting into the nearest enclosing loop (to
+;    which there is an exit). Otherwise, we force that enclosing loop into
+;    strange layouts that are siginificantly less efficient, often times maing
+;    it discontiguous.
+;
+; CHECK: @benchmark_heapsort
+; CHECK: %entry
+; First rotated loop top.
+; CHECK: .align
+; CHECK: %while.end
+; CHECK: %for.cond
+; CHECK: %if.then
+; CHECK: %if.else
+; CHECK: %if.end10
+; Second rotated loop top
+; CHECK: .align
+; CHECK: %if.then24
+; CHECK: %while.cond.outer
+; Third rotated loop top
+; CHECK: .align
+; CHECK: %while.cond
+; CHECK: %while.body
+; CHECK: %land.lhs.true
+; CHECK: %if.then19
+; CHECK: %if.then19
+; CHECK: %if.then8
+; CHECK: ret
+
+entry:
+  %shr = ashr i32 %n, 1
+  %add = add nsw i32 %shr, 1
+  %arrayidx3 = getelementptr inbounds double* %ra, i64 1
+  br label %for.cond
+
+for.cond:
+  %ir.0 = phi i32 [ %n, %entry ], [ %ir.1, %while.end ]
+  %l.0 = phi i32 [ %add, %entry ], [ %l.1, %while.end ]
+  %cmp = icmp sgt i32 %l.0, 1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %dec = add nsw i32 %l.0, -1
+  %idxprom = sext i32 %dec to i64
+  %arrayidx = getelementptr inbounds double* %ra, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  br label %if.end10
+
+if.else:
+  %idxprom1 = sext i32 %ir.0 to i64
+  %arrayidx2 = getelementptr inbounds double* %ra, i64 %idxprom1
+  %1 = load double* %arrayidx2, align 8
+  %2 = load double* %arrayidx3, align 8
+  store double %2, double* %arrayidx2, align 8
+  %dec6 = add nsw i32 %ir.0, -1
+  %cmp7 = icmp eq i32 %dec6, 1
+  br i1 %cmp7, label %if.then8, label %if.end10
+
+if.then8:
+  store double %1, double* %arrayidx3, align 8
+  ret void
+
+if.end10:
+  %ir.1 = phi i32 [ %ir.0, %if.then ], [ %dec6, %if.else ]
+  %l.1 = phi i32 [ %dec, %if.then ], [ %l.0, %if.else ]
+  %rra.0 = phi double [ %0, %if.then ], [ %1, %if.else ]
+  %add31 = add nsw i32 %ir.1, 1
+  br label %while.cond.outer
+
+while.cond.outer:
+  %j.0.ph.in = phi i32 [ %l.1, %if.end10 ], [ %j.1, %if.then24 ]
+  %j.0.ph = shl i32 %j.0.ph.in, 1
+  br label %while.cond
+
+while.cond:
+  %j.0 = phi i32 [ %add31, %if.end20 ], [ %j.0.ph, %while.cond.outer ]
+  %cmp11 = icmp sgt i32 %j.0, %ir.1
+  br i1 %cmp11, label %while.end, label %while.body
+
+while.body:
+  %cmp12 = icmp slt i32 %j.0, %ir.1
+  br i1 %cmp12, label %land.lhs.true, label %if.end20
+
+land.lhs.true:
+  %idxprom13 = sext i32 %j.0 to i64
+  %arrayidx14 = getelementptr inbounds double* %ra, i64 %idxprom13
+  %3 = load double* %arrayidx14, align 8
+  %add15 = add nsw i32 %j.0, 1
+  %idxprom16 = sext i32 %add15 to i64
+  %arrayidx17 = getelementptr inbounds double* %ra, i64 %idxprom16
+  %4 = load double* %arrayidx17, align 8
+  %cmp18 = fcmp olt double %3, %4
+  br i1 %cmp18, label %if.then19, label %if.end20
+
+if.then19:
+  br label %if.end20
+
+if.end20:
+  %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ]
+  %idxprom21 = sext i32 %j.1 to i64
+  %arrayidx22 = getelementptr inbounds double* %ra, i64 %idxprom21
+  %5 = load double* %arrayidx22, align 8
+  %cmp23 = fcmp olt double %rra.0, %5
+  br i1 %cmp23, label %if.then24, label %while.cond
+
+if.then24:
+  %idxprom27 = sext i32 %j.0.ph.in to i64
+  %arrayidx28 = getelementptr inbounds double* %ra, i64 %idxprom27
+  store double %5, double* %arrayidx28, align 8
+  br label %while.cond.outer
+
+while.end:
+  %idxprom33 = sext i32 %j.0.ph.in to i64
+  %arrayidx34 = getelementptr inbounds double* %ra, i64 %idxprom33
+  store double %rra.0, double* %arrayidx34, align 8
+  br label %for.cond
+}
diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
index 8af3bd1..2c37194 100644
--- a/test/CodeGen/X86/br-fold.ll
+++ b/test/CodeGen/X86/br-fold.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s
 
 ; CHECK: orq
-; CHECK-NEXT: jne
+; CHECK-NEXT: LBB0_1
 
 @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
 @_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index 8cca10c..e69f8c1 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -7,8 +7,8 @@ define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
 ; CHECK: decode_byte:
 ; CHECK: pushl
 ; CHECK: popl
-; CHECK: popl
 ; CHECK: jmp
+; CHECK: popl
 entry:
         %tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4           ; <i16*> [#uses=1]
         %tmp23 = bitcast i16* %tmp2 to i32*             ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
deleted file mode 100644
index 439f7b0..0000000
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | FileCheck %s
-;
-; Nested LSR is required to optimize this case.
-; We do not expect to see this form of IR without -enable-iv-rewrite.
-
-define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
-; CHECK: borf:
-; CHECK-NOT: inc
-; CHECK-NOT: leal 1(
-; CHECK-NOT: leal -1(
-; CHECK: decl
-; CHECK-NEXT: cmpl $-478
-; CHECK: ret
-
-bb4.thread:
-	br label %bb2.outer
-
-bb2.outer:		; preds = %bb4, %bb4.thread
-	%indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ]		; <i32> [#uses=3]
-	%tmp34 = mul i32 %indvar18, 65535		; <i32> [#uses=1]
-	%i.0.reg2mem.0.ph = add i32 %tmp34, 639		; <i32> [#uses=1]
-	%0 = and i32 %i.0.reg2mem.0.ph, 65535		; <i32> [#uses=1]
-	%1 = mul i32 %0, 480		; <i32> [#uses=1]
-	%tmp20 = mul i32 %indvar18, -478		; <i32> [#uses=1]
-	br label %bb2
-
-bb2:		; preds = %bb2, %bb2.outer
-	%indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i32> [#uses=3]
-	%ctg2 = getelementptr i8* %out, i32 %tmp20		; <i8*> [#uses=1]
-	%tmp21 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
-	%tmp23 = sub i32 %tmp21, %indvar		; <i32> [#uses=1]
-	%out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8*		; <i8*> [#uses=1]
-	%tmp25 = mul i32 %indvar, 65535		; <i32> [#uses=1]
-	%j.0.reg2mem.0 = add i32 %tmp25, 479		; <i32> [#uses=1]
-	%2 = and i32 %j.0.reg2mem.0, 65535		; <i32> [#uses=1]
-	%3 = add i32 %1, %2		; <i32> [#uses=9]
-	%4 = add i32 %3, -481		; <i32> [#uses=1]
-	%5 = getelementptr i8* %in, i32 %4		; <i8*> [#uses=1]
-	%6 = load i8* %5, align 1		; <i8> [#uses=1]
-	%7 = add i32 %3, -480		; <i32> [#uses=1]
-	%8 = getelementptr i8* %in, i32 %7		; <i8*> [#uses=1]
-	%9 = load i8* %8, align 1		; <i8> [#uses=1]
-	%10 = add i32 %3, -479		; <i32> [#uses=1]
-	%11 = getelementptr i8* %in, i32 %10		; <i8*> [#uses=1]
-	%12 = load i8* %11, align 1		; <i8> [#uses=1]
-	%13 = add i32 %3, -1		; <i32> [#uses=1]
-	%14 = getelementptr i8* %in, i32 %13		; <i8*> [#uses=1]
-	%15 = load i8* %14, align 1		; <i8> [#uses=1]
-	%16 = getelementptr i8* %in, i32 %3		; <i8*> [#uses=1]
-	%17 = load i8* %16, align 1		; <i8> [#uses=1]
-	%18 = add i32 %3, 1		; <i32> [#uses=1]
-	%19 = getelementptr i8* %in, i32 %18		; <i8*> [#uses=1]
-	%20 = load i8* %19, align 1		; <i8> [#uses=1]
-	%21 = add i32 %3, 481		; <i32> [#uses=1]
-	%22 = getelementptr i8* %in, i32 %21		; <i8*> [#uses=1]
-	%23 = load i8* %22, align 1		; <i8> [#uses=1]
-	%24 = add i32 %3, 480		; <i32> [#uses=1]
-	%25 = getelementptr i8* %in, i32 %24		; <i8*> [#uses=1]
-	%26 = load i8* %25, align 1		; <i8> [#uses=1]
-	%27 = add i32 %3, 479		; <i32> [#uses=1]
-	%28 = getelementptr i8* %in, i32 %27		; <i8*> [#uses=1]
-	%29 = load i8* %28, align 1		; <i8> [#uses=1]
-	%30 = add i8 %9, %6		; <i8> [#uses=1]
-	%31 = add i8 %30, %12		; <i8> [#uses=1]
-	%32 = add i8 %31, %15		; <i8> [#uses=1]
-	%33 = add i8 %32, %17		; <i8> [#uses=1]
-	%34 = add i8 %33, %20		; <i8> [#uses=1]
-	%35 = add i8 %34, %23		; <i8> [#uses=1]
-	%36 = add i8 %35, %26		; <i8> [#uses=1]
-	%37 = add i8 %36, %29		; <i8> [#uses=1]
-	store i8 %37, i8* %out_addr.0.reg2mem.0, align 1
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, 478		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb4, label %bb2
-
-bb4:		; preds = %bb2
-	%indvar.next28 = add i32 %indvar18, 1		; <i32> [#uses=2]
-	%exitcond29 = icmp eq i32 %indvar.next28, 638		; <i1> [#uses=1]
-	br i1 %exitcond29, label %return, label %bb2.outer
-
-return:		; preds = %bb4
-	ret void
-}
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll
new file mode 100644
index 0000000..5d4cedc
--- /dev/null
+++ b/test/CodeGen/X86/dbg-declare.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -O0 -mtriple x86_64-apple-darwin
+; <rdar://problem/11134152>
+
+define i32 @foo(i32* %x) nounwind uwtable ssp {
+entry:
+  %x.addr = alloca i32*, align 8
+  %saved_stack = alloca i8*
+  %cleanup.dest.slot = alloca i32
+  store i32* %x, i32** %x.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32** %x.addr}, metadata !14), !dbg !15
+  %0 = load i32** %x.addr, align 8, !dbg !16
+  %1 = load i32* %0, align 4, !dbg !16
+  %2 = zext i32 %1 to i64, !dbg !16
+  %3 = call i8* @llvm.stacksave(), !dbg !16
+  store i8* %3, i8** %saved_stack, !dbg !16
+  %vla = alloca i8, i64 %2, align 16, !dbg !16
+  call void @llvm.dbg.declare(metadata !{i8* %vla}, metadata !18), !dbg !23
+  store i32 1, i32* %cleanup.dest.slot
+  %4 = load i8** %saved_stack, !dbg !24
+  call void @llvm.stackrestore(i8* %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 5, i32 21, metadata !5, null}
+!16 = metadata !{i32 7, i32 13, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!23 = metadata !{i32 7, i32 8, metadata !17, null}
+!24 = metadata !{i32 9, i32 1, metadata !17, null}
+!25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/X86/dec-eflags-lower.ll b/test/CodeGen/X86/dec-eflags-lower.ll
deleted file mode 100644
index 190819f..0000000
--- a/test/CodeGen/X86/dec-eflags-lower.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-%struct.obj = type { i64 }
-
-; CHECK: _Z7releaseP3obj
-define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
-entry:
-; CHECK: decq	(%{{rdi|rcx}})
-; CHECK-NEXT: je
-  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
-  %0 = load i64* %refcnt, align 8, !tbaa !0
-  %dec = add i64 %0, -1
-  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
-  %tobool = icmp eq i64 %dec, 0
-  br i1 %tobool, label %if.end, label %return
-
-if.end:                                           ; preds = %entry
-  %1 = bitcast %struct.obj* %o to i8*
-  tail call void @free(i8* %1)
-  br label %return
-
-return:                                           ; preds = %entry, %if.end
-  ret void
-}
-
-@c = common global i64 0, align 8
-@a = common global i32 0, align 4
-@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
-@b = common global i32 0, align 4
-
-; CHECK: test
-define i32 @test() nounwind uwtable ssp {
-entry:
-; CHECK: decq
-; CHECK-NOT: decq
-%0 = load i64* @c, align 8, !tbaa !0
-%dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
-%tobool.i = icmp ne i64 %dec.i, 0
-%lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
-ret i32 0
-}
-
-; CHECK: test2
-define i32 @test2() nounwind uwtable ssp {
-entry:
-; CHECK-NOT: decq ({{.*}})
-%0 = load i64* @c, align 8, !tbaa !0
-%dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
-%tobool.i = icmp ne i64 %0, 0
-%lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
-ret i32 0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-
-declare void @free(i8* nocapture) nounwind
-
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/X86/fdiv.ll b/test/CodeGen/X86/fdiv.ll
new file mode 100644
index 0000000..0749682
--- /dev/null
+++ b/test/CodeGen/X86/fdiv.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck %s
+
+define double @exact(double %x) {
+; Exact division by a constant converted to multiplication.
+; CHECK: @exact
+; CHECK: mulsd
+  %div = fdiv double %x, 2.0
+  ret double %div
+}
+
+define double @inexact(double %x) {
+; Inexact division by a constant converted to multiplication.
+; CHECK: @inexact
+; CHECK: mulsd
+  %div = fdiv double %x, 0x41DFFFFFFFC00000 
+  ret double %div
+}
+
+define double @funky(double %x) {
+; No conversion to multiplication if too funky.
+; CHECK: @funky
+; CHECK: divsd
+  %div = fdiv double %x, 0.0
+  ret double %div
+}
+
+define double @denormal1(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal1
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FD0000000000001
+  ret double %div
+}
+
+define double @denormal2(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FEFFFFFFFFFFFFF
+  ret double %div
+}
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
deleted file mode 100644
index e51e61d..0000000
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ /dev/null
@@ -1,299 +0,0 @@
-; RUN: llc < %s -mcpu=generic -march=x86-64 -enable-lsr-nested -o %t
-; RUN: not grep inc %t
-; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 12
-; RUN: not grep addb %t
-; RUN: not grep leal %t
-; RUN: not grep movq %t
-
-; IV users in each of the loops from other loops shouldn't cause LSR
-; to insert new induction variables. Previously it would create a
-; flood of new induction variables.
-; Also, the loop reversal should kick in once.
-;
-; In this example, performing LSR on the entire loop nest,
-; as opposed to only the inner loop can further reduce induction variables,
-; and their related instructions and registers.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
-entry:
-      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
-      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
-      %2 = or i32 %1, %0		; <i32> [#uses=1]
-      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
-      br i1 %3, label %bb2, label %bb13
-
-bb:		; preds = %bb3
-      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
-      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
-      %6 = fmul float %4, %5		; <float> [#uses=1]
-      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
-      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
-      br label %bb2
-
-bb2:		; preds = %entry, %bb
-      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
-      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
-      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
-      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
-      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
-      br i1 %8, label %bb3, label %bb4
-
-bb3:		; preds = %bb2
-      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
-      %10 = and i64 %9, 15		; <i64> [#uses=1]
-      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
-      br i1 %11, label %bb4, label %bb
-
-bb4:		; preds = %bb3, %bb2
-      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
-      %13 = and i64 %12, 15		; <i64> [#uses=1]
-      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
-      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
-      br i1 %14, label %bb6.preheader, label %bb10.preheader
-
-bb10.preheader:		; preds = %bb4
-      br i1 %15, label %bb9, label %bb12.loopexit
-
-bb6.preheader:		; preds = %bb4
-      br i1 %15, label %bb5, label %bb8.loopexit
-
-bb5:		; preds = %bb5, %bb6.preheader
-      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
-      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
-	%indvar145 = trunc i64 %indvar143 to i32		; <i32> [#uses=1]
-	%tmp150 = mul i32 %indvar145, -16		; <i32> [#uses=1]
-	%N_addr.268 = add i32 %tmp150, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.273.rec = shl i64 %indvar143, 4		; <i64> [#uses=5]
-	%B_addr.0.sum180 = add i64 %B_addr.0.rec, %A_addr.273.rec		; <i64> [#uses=2]
-	%B_addr.271 = getelementptr float* %B, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	%A_addr.273 = getelementptr float* %A, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	tail call void asm sideeffect ";# foo", "~{dirflag},~{fpsr},~{flags}"() nounwind
-	%16 = bitcast float* %A_addr.273 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
-	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
-	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
-	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
-	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
-	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%23 = bitcast float* %22 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%24 = load <4 x float>* %23, align 16		; <<4 x float>> [#uses=1]
-	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
-	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
-	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
-	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
-	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%31 = bitcast float* %30 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%32 = load <4 x float>* %31, align 16		; <<4 x float>> [#uses=1]
-	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
-	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
-	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
-	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
-	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%39 = bitcast float* %38 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%40 = load <4 x float>* %39, align 16		; <<4 x float>> [#uses=1]
-	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
-	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
-	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
-	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
-	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%47 = getelementptr float* %B, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%48 = add i32 %N_addr.268, -16		; <i32> [#uses=2]
-	%49 = icmp sgt i32 %48, 15		; <i1> [#uses=1]
-	%indvar.next144 = add i64 %indvar143, 1		; <i64> [#uses=1]
-	br i1 %49, label %bb5, label %bb8.loopexit
-
-bb7:		; preds = %bb7, %bb8.loopexit
-	%indvar130 = phi i64 [ 0, %bb8.loopexit ], [ %indvar.next131, %bb7 ]		; <i64> [#uses=3]
-	%vSum0.260 = phi <4 x float> [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ]		; <<4 x float>> [#uses=1]
-	%indvar132 = trunc i64 %indvar130 to i32		; <i32> [#uses=1]
-	%tmp133 = mul i32 %indvar132, -4		; <i32> [#uses=1]
-	%N_addr.358 = add i32 %tmp133, %N_addr.2.lcssa		; <i32> [#uses=1]
-	%A_addr.361.rec = shl i64 %indvar130, 2		; <i64> [#uses=3]
-	%B_addr.359 = getelementptr float* %B_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%A_addr.361 = getelementptr float* %A_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%50 = bitcast float* %A_addr.361 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
-	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
-	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
-	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
-	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
-	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%58 = add i32 %N_addr.358, -4		; <i32> [#uses=2]
-	%59 = icmp sgt i32 %58, 3		; <i1> [#uses=1]
-	%indvar.next131 = add i64 %indvar130, 1		; <i64> [#uses=1]
-	br i1 %59, label %bb7, label %bb13
-
-bb8.loopexit:		; preds = %bb5, %bb6.preheader
-	%A_addr.2.lcssa = phi float* [ %A_addr.0, %bb6.preheader ], [ %46, %bb5 ]		; <float*> [#uses=3]
-	%vSum0.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=2]
-	%B_addr.2.lcssa = phi float* [ %B_addr.0, %bb6.preheader ], [ %47, %bb5 ]		; <float*> [#uses=3]
-	%vSum1.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=2]
-	%vSum2.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=2]
-	%N_addr.2.lcssa = phi i32 [ %N_addr.0, %bb6.preheader ], [ %48, %bb5 ]		; <i32> [#uses=3]
-	%vSum3.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=2]
-	%60 = icmp sgt i32 %N_addr.2.lcssa, 3		; <i1> [#uses=1]
-	br i1 %60, label %bb7, label %bb13
-
-bb9:		; preds = %bb9, %bb10.preheader
-	%indvar106 = phi i64 [ 0, %bb10.preheader ], [ %indvar.next107, %bb9 ]		; <i64> [#uses=3]
-	%vSum0.339 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum1.237 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum2.236 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum3.234 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=1]
-	%indvar108 = trunc i64 %indvar106 to i32		; <i32> [#uses=1]
-	%tmp113 = mul i32 %indvar108, -16		; <i32> [#uses=1]
-	%N_addr.435 = add i32 %tmp113, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.440.rec = shl i64 %indvar106, 4		; <i64> [#uses=5]
-	%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec		; <i64> [#uses=2]
-	%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%61 = bitcast float* %B_addr.438 to <4 x float>*		; <i8*> [#uses=1]
-	%62 = load <4 x float>* %61, align 1
-	%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4		; <i64> [#uses=1]
-	%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169		; <i64> [#uses=2]
-	%63 = getelementptr float* %B, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%64 = bitcast float* %63 to <4 x float>*		; <i8*> [#uses=1]
-	%65 = load <4 x float>* %64, align 1
-	%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8		; <i64> [#uses=1]
-	%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168		; <i64> [#uses=2]
-	%66 = getelementptr float* %B, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%67 = bitcast float* %66 to <4 x float>*		; <i8*> [#uses=1]
-	%68 = load <4 x float>* %67, align 1
-	%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12		; <i64> [#uses=1]
-	%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167		; <i64> [#uses=2]
-	%69 = getelementptr float* %B, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%70 = bitcast float* %69 to <4 x float>*		; <i8*> [#uses=1]
-	%71 = load <4 x float>* %70, align 1
-	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
-	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
-	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
-	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
-	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
-	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
-	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
-	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
-	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
-	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
-	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
-	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
-	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
-	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%92 = getelementptr float* %B, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%93 = add i32 %N_addr.435, -16		; <i32> [#uses=2]
-	%94 = icmp sgt i32 %93, 15		; <i1> [#uses=1]
-	%indvar.next107 = add i64 %indvar106, 1		; <i64> [#uses=1]
-	br i1 %94, label %bb9, label %bb12.loopexit
-
-bb11:		; preds = %bb11, %bb12.loopexit
-	%indvar = phi i64 [ 0, %bb12.loopexit ], [ %indvar.next, %bb11 ]		; <i64> [#uses=3]
-	%vSum0.428 = phi <4 x float> [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%indvar96 = trunc i64 %indvar to i32		; <i32> [#uses=1]
-	%tmp = mul i32 %indvar96, -4		; <i32> [#uses=1]
-	%N_addr.526 = add i32 %tmp, %N_addr.4.lcssa		; <i32> [#uses=1]
-	%A_addr.529.rec = shl i64 %indvar, 2		; <i64> [#uses=3]
-	%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%95 = bitcast float* %B_addr.527 to <4 x float>*		; <i8*> [#uses=1]
-	%96 = load <4 x float>* %95, align 1
-	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
-	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
-	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
-	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
-	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%103 = add i32 %N_addr.526, -4		; <i32> [#uses=2]
-	%104 = icmp sgt i32 %103, 3		; <i1> [#uses=1]
-	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
-	br i1 %104, label %bb11, label %bb13
-
-bb12.loopexit:		; preds = %bb9, %bb10.preheader
-	%A_addr.4.lcssa = phi float* [ %A_addr.0, %bb10.preheader ], [ %91, %bb9 ]		; <float*> [#uses=3]
-	%vSum0.3.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=2]
-	%B_addr.4.lcssa = phi float* [ %B_addr.0, %bb10.preheader ], [ %92, %bb9 ]		; <float*> [#uses=3]
-	%vSum1.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=2]
-	%vSum2.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=2]
-	%N_addr.4.lcssa = phi i32 [ %N_addr.0, %bb10.preheader ], [ %93, %bb9 ]		; <i32> [#uses=3]
-	%vSum3.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=2]
-	%105 = icmp sgt i32 %N_addr.4.lcssa, 3		; <i1> [#uses=1]
-	br i1 %105, label %bb11, label %bb13
-
-bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
-	%Sum0.1 = phi float [ 0.000000e+00, %entry ], [ %Sum0.0, %bb7 ], [ %Sum0.0, %bb8.loopexit ], [ %Sum0.0, %bb11 ], [ %Sum0.0, %bb12.loopexit ]		; <float> [#uses=1]
-	%vSum3.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum3.0.lcssa, %bb7 ], [ %vSum3.0.lcssa, %bb8.loopexit ], [ %vSum3.2.lcssa, %bb11 ], [ %vSum3.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%N_addr.1 = phi i32 [ %N, %entry ], [ %N_addr.2.lcssa, %bb8.loopexit ], [ %58, %bb7 ], [ %N_addr.4.lcssa, %bb12.loopexit ], [ %103, %bb11 ]		; <i32> [#uses=2]
-	%vSum2.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum2.0.lcssa, %bb7 ], [ %vSum2.0.lcssa, %bb8.loopexit ], [ %vSum2.2.lcssa, %bb11 ], [ %vSum2.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%vSum1.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum1.0.lcssa, %bb7 ], [ %vSum1.0.lcssa, %bb8.loopexit ], [ %vSum1.2.lcssa, %bb11 ], [ %vSum1.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
-	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
-	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
-	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
-	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
-	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
-	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
-	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
-	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
-	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
-	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
-	%111 = fadd float %109, %110		; <float> [#uses=1]
-	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
-	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
-	br i1 %112, label %bb.nph56, label %bb16
-
-bb.nph56:		; preds = %bb13
-	%tmp. = zext i32 %N_addr.1 to i64		; <i64> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb14, %bb.nph56
-	%indvar117 = phi i64 [ 0, %bb.nph56 ], [ %indvar.next118, %bb14 ]		; <i64> [#uses=3]
-	%Sum0.255 = phi float [ %Sum0.254, %bb.nph56 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	%tmp.122 = sext i32 %IB to i64		; <i64> [#uses=1]
-	%B_addr.652.rec = mul i64 %indvar117, %tmp.122		; <i64> [#uses=1]
-	%tmp.124 = sext i32 %IA to i64		; <i64> [#uses=1]
-	%A_addr.653.rec = mul i64 %indvar117, %tmp.124		; <i64> [#uses=1]
-	%B_addr.652 = getelementptr float* %B_addr.1, i64 %B_addr.652.rec		; <float*> [#uses=1]
-	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
-	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
-	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
-	%115 = fmul float %113, %114		; <float> [#uses=1]
-	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
-	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
-	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb16, label %bb14
-
-bb16:		; preds = %bb14, %bb13
-	%Sum0.2.lcssa = phi float [ %Sum0.254, %bb13 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	store float %Sum0.2.lcssa, float* %C, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/licm-dominance.ll b/test/CodeGen/X86/licm-dominance.ll
index 8a0958d..019f8a3 100644
--- a/test/CodeGen/X86/licm-dominance.ll
+++ b/test/CodeGen/X86/licm-dominance.ll
@@ -1,7 +1,7 @@
-; RUN: llc -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -asm-verbose=true < %s | FileCheck %s
 
 ; MachineLICM should check dominance before hoisting instructions.
-; CHECK:	jne	LBB0_3
+; CHECK: ## in Loop:
 ; CHECK-NEXT:	xorb	%al, %al
 ; CHECK-NEXT:	testb	%al, %al
 
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index b05ed3c..a8ad0f1 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index faba630..d14102f 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -41,6 +41,7 @@ done:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_4:
 ; CHECK-NEXT:   callq bar99
+; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_1:
 ; CHECK-NEXT:   callq body
 
@@ -75,19 +76,21 @@ exit:
 ; CHECK: yet_more_involved:
 ;      CHECK:   jmp .LBB2_1
 ; CHECK-NEXT:   align
-; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT:   callq bar99
+; CHECK-NEXT: .LBB2_5:
+; CHECK-NEXT:   callq block_a_true_func
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB2_1:
+; CHECK-NEXT:   callq body
+;
+; LBB2_4
+;      CHECK:   callq bar99
 ; CHECK-NEXT:   callq get
 ; CHECK-NEXT:   cmpl $2999, %eax
 ; CHECK-NEXT:   jle .LBB2_5
 ; CHECK-NEXT:   callq block_a_false_func
 ; CHECK-NEXT:   callq block_a_merge_func
 ; CHECK-NEXT:   jmp .LBB2_1
-; CHECK-NEXT: .LBB2_5:
-; CHECK-NEXT:   callq block_a_true_func
-; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT: .LBB2_1:
-; CHECK-NEXT:   callq body
 
 define void @yet_more_involved() nounwind {
 entry:
@@ -136,17 +139,22 @@ exit:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   callq   bar100
-; CHECK-NEXT:   jmp     .LBB3_1
-; CHECK-NEXT: .LBB3_8:
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT:   callq   loop_header
+;      CHECK:   jl .LBB3_7
+;      CHECK:   jge .LBB3_3
 ; CHECK-NEXT:   callq   bar101
 ; CHECK-NEXT:   jmp     .LBB3_1
-; CHECK-NEXT: .LBB3_9:
+; CHECK-NEXT: .LBB3_3:
+;      CHECK:   jge .LBB3_4
 ; CHECK-NEXT:   callq   bar102
 ; CHECK-NEXT:   jmp     .LBB3_1
-; CHECK-NEXT: .LBB3_5:
+; CHECK-NEXT: .LBB3_4:
+;      CHECK:   jl .LBB3_6
 ; CHECK-NEXT:   callq   loop_latch
-; CHECK-NEXT: .LBB3_1:
-; CHECK-NEXT:   callq   loop_header
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_6:
 
 define void @cfg_islands() nounwind {
 entry:
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
deleted file mode 100644
index d6c265f..0000000
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep cmp | grep 240
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep inc | count 1
-
-define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
-entry:
-	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
-	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
-
-bb26.outer.us:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%i.044.0.ph.us = phi i32 [ 0, %entry ], [ %indvar.next57, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=2]
-	%k.1.ph.us = phi i32 [ 0, %entry ], [ %k.0.us, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=1]
-	%tmp3.us = mul i32 %i.044.0.ph.us, 6		; <i32> [#uses=1]
-	br label %bb1.us
-
-bb1.us:		; preds = %bb1.us, %bb26.outer.us
-	%j.053.us = phi i32 [ 0, %bb26.outer.us ], [ %tmp25.us, %bb1.us ]		; <i32> [#uses=2]
-	%k.154.us = phi i32 [ %k.1.ph.us, %bb26.outer.us ], [ %k.0.us, %bb1.us ]		; <i32> [#uses=1]
-	%tmp5.us = add i32 %tmp3.us, %j.053.us		; <i32> [#uses=1]
-	%tmp7.us = shl i32 %D, %tmp5.us		; <i32> [#uses=2]
-	%tmp9.us = icmp eq i32 %tmp7.us, %B		; <i1> [#uses=1]
-	%tmp910.us = zext i1 %tmp9.us to i32		; <i32> [#uses=1]
-	%tmp12.us = and i32 %tmp7.us, %A		; <i32> [#uses=1]
-	%tmp19.us = and i32 %tmp12.us, %tmp910.us		; <i32> [#uses=1]
-	%k.0.us = add i32 %tmp19.us, %k.154.us		; <i32> [#uses=3]
-	%tmp25.us = add i32 %j.053.us, 1		; <i32> [#uses=2]
-	%tmp29.us = icmp slt i32 %tmp25.us, %C		; <i1> [#uses=1]
-	br i1 %tmp29.us, label %bb1.us, label %bb26.bb32_crit_edge.us
-
-bb26.bb32_crit_edge.us:		; preds = %bb1.us
-	%indvar.next57 = add i32 %i.044.0.ph.us, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next57, 40		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb40.split, label %bb26.outer.us
-
-bb40.split:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%k.1.lcssa.lcssa.us-lcssa = phi i32 [ %k.0.us, %bb26.bb32_crit_edge.us ], [ 0, %entry ]		; <i32> [#uses=1]
-	ret i32 %k.1.lcssa.lcssa.us-lcssa
-}
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index bdf09df..ebda9f2 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
 
+; CHECK: t:
 ; CHECK: decq
 ; CHECK-NEXT: movl (
 ; CHECK-NEXT: jne
@@ -136,3 +137,44 @@ bb2:		; preds = %bb
 	store i8 %92, i8* %93, align 1
 	ret void
 }
+
+; Check that DAGCombiner doesn't mess up the IV update when the exiting value
+; is equal to the stride.
+; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
+
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: incl [[IV:%e..]]
+; CHECK: cmpl $1, [[IV]]
+; CHECK: jne
+; CHECK: ret
+
+define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp eq i32 %i, 1
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %i to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
+  %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %1, %b.05
+  %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
+  %2 = trunc i64 %indvars.iv to i32
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
+
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
index 54fa01c..8e97b99 100644
--- a/test/CodeGen/X86/machine-cp.ll
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -5,11 +5,11 @@
 define i32 @t1(i32 %a, i32 %b) nounwind  {
 entry:
 ; CHECK: t1:
-; CHECK: jne
+; CHECK: je [[LABEL:.*BB.*]]
   %cmp1 = icmp eq i32 %b, 0
   br i1 %cmp1, label %while.end, label %while.body
 
-; CHECK: BB
+; CHECK: [[LABEL]]:
 ; CHECK-NOT: mov
 ; CHECK: ret
 
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
index f3c2af8..8f2f6f7 100644
--- a/test/CodeGen/X86/misched-new.ll
+++ b/test/CodeGen/X86/misched-new.ll
@@ -1,8 +1,10 @@
 ; RUN: llc -march=x86-64 -mcpu=core2 -enable-misched -misched=shuffle -misched-bottomup < %s
-; XFAIL: *
-; ...should pass. See PR12324: misched bringup
+; REQUIRES: asserts
 ;
 ; Interesting MachineScheduler cases.
+;
+; FIXME: There should be an assert in the coalescer that we're not rematting
+; "not-quite-dead" copies, but that breaks a lot of tests <rdar://problem/11148682>.
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 48c48ae..01d6cbe 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -70,8 +70,8 @@ bb26.preheader:                                   ; preds = %imix_test.exit
 bb23:                                             ; preds = %imix_test.exit
   unreachable
 ; Verify that there are no loads inside the loop.
-; X86-32: %bb26.preheader
 ; X86-32: .align 4
+; X86-32: %bb28
 ; X86-32-NOT: (%esp),
 ; X86-32-NOT: (%ebp),
 ; X86-32: jmp
diff --git a/test/CodeGen/X86/pr12360.ll b/test/CodeGen/X86/pr12360.ll
new file mode 100644
index 0000000..f29e50e
--- /dev/null
+++ b/test/CodeGen/X86/pr12360.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define zeroext i1 @f1(i8* %x) {
+; CHECK: f1:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = trunc i8 %0 to i1
+  ret i1 %tobool
+}
+
+define zeroext i1 @f2(i8* %x) {
+; CHECK: f2:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = icmp ne i8 %0, 0
+  ret i1 %tobool
+}
+
+!0 = metadata !{i8 0, i8 2}
+
+
+; check that we don't build a "trunc" from i1 to i1, which would assert.
+define zeroext i1 @f3(i1 %x) {
+; CHECK: f3:
+
+entry:
+  %tobool = icmp ne i1 %x, 0
+  ret i1 %tobool
+}
+
+; check that we don't build a trunc when other bits are needed
+define zeroext i1 @f4(i32 %x) {
+; CHECK: f4:
+; CHECK: and
+
+entry:
+  %y = and i32 %x, 32768
+  %z = icmp ne i32 %y, 0
+  ret i1 %z
+}
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index 5dab5c9..8003588 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -18,11 +18,12 @@ forcond.preheader:              ; preds = %entry
 ; CHECK-NOT: xorl
 ; CHECK-NOT: movl
 ; CHECK-NOT: LBB
-; CHECK: jne
+; CHECK: je
 
 ; There should be no moves required in the for loop body.
 ; CHECK: %forbody
 ; CHECK-NOT: mov
+; CHECK: jbe
 
 ifthen:         ; preds = %entry
   ret i32 0
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
deleted file mode 100644
index 1ce3b49..0000000
--- a/test/CodeGen/X86/pr3495.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; RUN: llc < %s -march=x86 -stats -enable-lsr-nested |& grep {Number of spill slots allocated} | grep 1
-; PR3495
-;
-; Note: this should not spill at all with either good LSR or good regalloc.
-
-target triple = "i386-pc-linux-gnu"
-@x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
-@rows = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=2]
-@up = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=2]
-@down = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=1]
-
-define i32 @queens(i32 %c) nounwind {
-entry:
-	%tmp91 = add i32 %c, 1		; <i32> [#uses=3]
-	%tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91		; <i32*> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb569, %entry
-	%r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ]		; <i32> [#uses=4]
-	%tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp28 = load i32* %tmp27, align 4		; <i32> [#uses=1]
-	%tmp29 = icmp eq i32 %tmp28, 0		; <i1> [#uses=1]
-	br i1 %tmp29, label %bb569, label %bb31
-
-bb31:		; preds = %bb
-	%tmp35 = sub i32 %r25.0.reg2mem.0, 0		; <i32> [#uses=1]
-	%tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35		; <i32*> [#uses=1]
-	%tmp37 = load i32* %tmp36, align 4		; <i32> [#uses=1]
-	%tmp38 = icmp eq i32 %tmp37, 0		; <i1> [#uses=1]
-	br i1 %tmp38, label %bb569, label %bb41
-
-bb41:		; preds = %bb31
-	%tmp54 = sub i32 %r25.0.reg2mem.0, %c		; <i32> [#uses=1]
-	%tmp55 = add i32 %tmp54, 7		; <i32> [#uses=1]
-	%tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55		; <i32*> [#uses=2]
-	store i32 0, i32* %tmp62, align 4
-	br label %bb92
-
-bb92:		; preds = %bb545, %bb41
-	%r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ]		; <i32> [#uses=5]
-	%tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=0]
-	%tmp112 = add i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=1]
-	%tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112		; <i32*> [#uses=2]
-	%tmp114 = load i32* %tmp113, align 4		; <i32> [#uses=1]
-	%tmp115 = icmp eq i32 %tmp114, 0		; <i1> [#uses=1]
-	br i1 %tmp115, label %bb545, label %bb118
-
-bb118:		; preds = %bb92
-	%tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=0]
-	store i32 0, i32* %tmp113, align 4
-	store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
-	br label %bb142
-
-bb142:		; preds = %bb142, %bb118
-	%k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ]		; <i32> [#uses=1]
-	%indvar.next709 = add i32 %k18.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond710 = icmp eq i32 %indvar.next709, 8		; <i1> [#uses=1]
-	br i1 %exitcond710, label %bb155, label %bb142
-
-bb155:		; preds = %bb142
-	%tmp156 = tail call i32 @putchar(i32 10) nounwind		; <i32> [#uses=0]
-	br label %bb545
-
-bb545:		; preds = %bb155, %bb92
-	%indvar.next711 = add i32 %r20.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond712 = icmp eq i32 %indvar.next711, 8		; <i1> [#uses=1]
-	br i1 %exitcond712, label %bb553, label %bb92
-
-bb553:		; preds = %bb545
-	store i32 1, i32* %tmp62, align 4
-	br label %bb569
-
-bb569:		; preds = %bb553, %bb31, %bb
-	%indvar.next715 = add i32 %r25.0.reg2mem.0, 1		; <i32> [#uses=1]
-	br label %bb
-}
-
-declare i32 @putchar(i32)
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
new file mode 100644
index 0000000..faca3d7
--- /dev/null
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+%struct.obj = type { i64 }
+
+; CHECK: _Z7releaseP3obj
+define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
+entry:
+; CHECK: decq	(%{{rdi|rcx}})
+; CHECK-NEXT: je
+  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
+  %0 = load i64* %refcnt, align 8, !tbaa !0
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+  %1 = bitcast %struct.obj* %o to i8*
+  tail call void @free(i8* %1)
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret void
+}
+
+@c = common global i64 0, align 8
+@a = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+@b = common global i32 0, align 4
+
+; CHECK: test
+define i32 @test() nounwind uwtable ssp {
+entry:
+; CHECK: decq
+; CHECK-NOT: decq
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %dec.i, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+; CHECK: test2
+define i32 @test2() nounwind uwtable ssp {
+entry:
+; CHECK-NOT: decq ({{.*}})
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %0, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @free(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
+
+%struct.obj2 = type { i64, i32, i16, i8 }
+
+declare void @other(%struct.obj2* ) nounwind;
+
+; CHECK: example_dec
+define void @example_dec(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit dec
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: decq ({{.*}})
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %s64, align 8
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit dec
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: decl {{[0-9][0-9]*}}({{.*}})
+  %dec1 = add i32 %1, -1
+  store i32 %dec1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %dec1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit dec
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: decw {{[0-9][0-9]*}}({{.*}})
+  %dec2 = add i16 %2, -1
+  store i16 %dec2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %dec2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit dec
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: decb {{[0-9][0-9]*}}({{.*}})
+  %dec3 = add i8 %3, -1
+  store i8 %dec3, i8* %s8
+  %tobool4 = icmp eq i8 %dec3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end4, %if.end, %entry                                                                                                                                                                               
+  ret void
+}
+
+; CHECK: example_inc
+define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit inc
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: incq ({{.*}})
+  %inc = add i64 %0, 1
+  store i64 %inc, i64* %s64, align 8
+  %tobool = icmp eq i64 %inc, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit inc
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: incl {{[0-9][0-9]*}}({{.*}})
+  %inc1 = add i32 %1, 1
+  store i32 %inc1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %inc1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit inc
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: incw {{[0-9][0-9]*}}({{.*}})
+  %inc2 = add i16 %2, 1
+  store i16 %inc2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %inc2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit inc
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: incb {{[0-9][0-9]*}}({{.*}})
+  %inc3 = add i8 %3, 1
+  store i8 %inc3, i8* %s8
+  %tobool4 = icmp eq i8 %inc3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index ce04e07..f465a4f 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -75,9 +75,9 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
 ; Verify that the fmul gets sunk into the one part of the diamond where it is
 ; needed.
 ; CHECK: test6:
-; CHECK: jne
-; CHECK: mulps
+; CHECK: je
 ; CHECK: ret
+; CHECK: mulps
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index a9a5420..2af3559 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -147,7 +147,7 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
 
 ; 32: t11:
 ; 32-NOT: subl ${{[0-9]+}}, %esp
-; 32: jne
+; 32: je
 ; 32-NOT: movl
 ; 32-NOT: addl ${{[0-9]+}}, %esp
 ; 32: jmp {{_?}}foo5
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index e13a817..7957eb8 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -7,8 +7,9 @@
 
 ; CHECK: foo:
 ; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: je
+; CHECK-NEXT: jne
 ; CHECK-NEXT: divsd
+; CHECK-NEXT: movaps
 ; CHECK-NEXT: ret
 ; CHECK:      divsd
 
@@ -25,10 +26,10 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
 
 ; CHECK: split:
 ; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: je
-; CHECK-NEXT: divsd
+; CHECK-NEXT: jne
+; CHECK-NEXT: movaps
 ; CHECK-NEXT: ret
-; CHECK:      movaps
+; CHECK:      divsd
 ; CHECK-NEXT: ret
 define double @split(double %x, double %y, i1 %c) nounwind {
   %a = fdiv double %x, 3.2
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index 7c2e247..7ac3840 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -19,7 +19,7 @@ overflow:
   ret i1 false
 ; CHECK: test1:
 ; CHECK: imull
-; CHECK-NEXT: jo
+; CHECK-NEXT: jno
 }
 
 define i1 @test2(i32 %v1, i32 %v2) nounwind {
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
index 78604a0..1a1017d 100644
--- a/test/CodeGen/X86/sse41-blend.ll
+++ b/test/CodeGen/X86/sse41-blend.ll
@@ -80,3 +80,11 @@ define <2 x double> @B(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %min
 }
 
+; CHECK: float_crash
+define void @float_crash() nounwind {
+entry:
+  %merge205vector_func.i = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
+  %extract214vector_func.i = extractelement <4 x double> %merge205vector_func.i, i32 0
+  store double %extract214vector_func.i, double addrspace(1)* undef, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 2ac4cb4..54264b1 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -183,8 +183,8 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    insertps  $0, %xmm1, %xmm0        
 }
 
-define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_1:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -195,8 +195,8 @@ define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sete	%al
 }
 
-define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_2:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -207,8 +207,8 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sbbl	%eax
 }
 
-define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_3:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -220,9 +220,9 @@ define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 }
 
 
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 ; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
 ; pointless.
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 1168622..8313166 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
@@ -16,9 +17,14 @@ cond_true2732.preheader:                ; preds = %entry
         store i64 %tmp2676.us.us, i64* %tmp2666
         ret i32 0
 
-; CHECK: 	and	{{E..}}, DWORD PTR [360]
-; CHECK:	and	DWORD PTR [356], {{E..}}
-; CHECK:	mov	DWORD PTR [360], {{E..}}
+; INTEL: 	and	{{E..}}, DWORD PTR [360]
+; INTEL:	and	DWORD PTR [356], {{E..}}
+; FIXME:	mov	DWORD PTR [360], {{E..}}
+; The above line comes out as 'mov 360, EAX', but when the register is ECX it works?
+
+; ATT: 	andl	360, %{{e..}}
+; ATT:	andl	%{{e..}}, 356
+; ATT:	movl	%{{e..}}, 360
 
 }
 
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 749b5db..db8313c 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -20,7 +20,7 @@ overflow:
 
 ; CHECK: func1:
 ; CHECK: subl 20(%esp)
-; CHECK-NEXT: jo
+; CHECK-NEXT: jno
 }
 
 define i1 @func2(i32 %v1, i32 %v2) nounwind {
@@ -40,7 +40,7 @@ carry:
 
 ; CHECK: func2:
 ; CHECK: subl 20(%esp)
-; CHECK-NEXT: jb
+; CHECK-NEXT: jae
 }
 
 declare i32 @printf(i8*, ...) nounwind
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index 8e39342..58a5c03 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -5,11 +5,11 @@
 
 ;      CHECK: movabsq $2305843009482129440, %r
 ; CHECK-NEXT: btq %rax, %r
-; CHECK-NEXT: jb
-; CHECK-NEXT: movl  $671088640, %e
+; CHECK-NEXT: jae
+;     CHECK: movl  $671088640, %e
 ; CHECK-NEXT: btq %rax, %r
-; CHECK-NEXT: jb
-; CHECK-NEXT: testq %rax, %r
+; CHECK-NEXT: jae
+;      CHECK: testq %rax, %r
 ; CHECK-NEXT: j
 
 define void @test(i8* %l) nounwind {
@@ -60,7 +60,7 @@ define void @test2(i32 %x) nounwind ssp {
 ; CHECK-NEXT: movl $91
 ; CHECK-NOT: movl
 ; CHECK-NEXT: btl
-; CHECK-NEXT: jb
+; CHECK-NEXT: jae
 entry:
   switch i32 %x, label %if.end [
     i32 6, label %if.then
@@ -85,7 +85,7 @@ define void @test3(i32 %x) nounwind {
 ; CHECK: cmpl $5
 ; CHECK: ja
 ; CHECK: cmpl $4
-; CHECK: jne
+; CHECK: je
   switch i32 %x, label %if.end [
     i32 0, label %if.then
     i32 1, label %if.then
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index f1b9f20..6e20af5 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -113,15 +113,16 @@ altret:
 ; CHECK-NEXT:   jbe .LBB2_3
 ; CHECK-NEXT:   ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
 ; CHECK-NEXT:   ja .LBB2_4
-; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT:   movb $1, %al
-; CHECK-NEXT:   ret
+; CHECK-NEXT:   jmp .LBB2_2
 ; CHECK-NEXT: .LBB2_3:
 ; CHECK-NEXT:   ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
 ; CHECK-NEXT:   jbe .LBB2_2
 ; CHECK-NEXT: .LBB2_4:
 ; CHECK-NEXT:   xorb %al, %al
 ; CHECK-NEXT:   ret
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT:   movb $1, %al
+; CHECK-NEXT:   ret
 
 define i1 @dont_merge_oddly(float* %result) nounwind {
 entry:
@@ -336,10 +337,10 @@ return:
 
 ; CHECK: two:
 ; CHECK-NOT: XYZ
+; CHECK: ret
 ; CHECK: movl $0, XYZ(%rip)
 ; CHECK: movl $1, XYZ(%rip)
 ; CHECK-NOT: XYZ
-; CHECK: ret
 
 define void @two() nounwind optsize {
 entry:
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
new file mode 100644
index 0000000..e2e58a54
--- /dev/null
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+@i2 = external thread_local global i32
+
+define i32 @f1() {
+; X32: f1:
+; X32:      movl %gs:i@NTPOFF, %eax
+; X32-NEXT: ret
+; X64: f1:
+; X64:      movl %fs:i@TPOFF, %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32: f2:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: leal i@NTPOFF(%eax), %eax
+; X32-NEXT: ret
+; X64: f2:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: leaq i@TPOFF(%rax), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i
+}
+
+define i32 @f3() {
+; X32: f3:
+; X32:      movl i2@INDNTPOFF, %eax
+; X32-NEXT: movl %gs:(%eax), %eax
+; X32-NEXT: ret
+; X64: f3:
+; X64:      movq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: movl %fs:(%rax), %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32: f4:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: addl i2@INDNTPOFF, %eax
+; X32-NEXT: ret
+; X64: f4:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
new file mode 100644
index 0000000..e8a79bf
--- /dev/null
+++ b/test/CodeGen/X86/tls.ll
@@ -0,0 +1,329 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; X32_LINUX: f1:
+; X32_LINUX:      movl %gs:i1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f1:
+; X64_LINUX:      movl %fs:i1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f1:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f1:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i1
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32_LINUX: f2:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f2:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f2:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f2:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; X32_LINUX: f3:
+; X32_LINUX:      movl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: movl %gs:(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f3:
+; X64_LINUX:      movq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movl %fs:(%rax), %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f3:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f3:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32_LINUX: f4:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: addl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f4:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f4:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f4:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; X32_LINUX: f5:
+; X32_LINUX:      movl %gs:i3@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f5:
+; X64_LINUX:      movl %fs:i3@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f5:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f5:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i3
+	ret i32 %tmp1
+}
+
+define i32* @f6() {
+; X32_LINUX: f6:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f6:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f6:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f6:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i3
+}
+
+define i32 @f7() {
+; X32_LINUX: f7:
+; X32_LINUX:      movl %gs:i4@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f7:
+; X64_LINUX:      movl %fs:i4@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i4
+	ret i32 %tmp1
+}
+
+define i32* @f8() {
+; X32_LINUX: f8:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f8:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i4
+}
+
+define i32 @f9() {
+; X32_LINUX: f9:
+; X32_LINUX:      movl %gs:i5@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f9:
+; X64_LINUX:      movl %fs:i5@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i5
+	ret i32 %tmp1
+}
+
+define i32* @f10() {
+; X32_LINUX: f10:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f10:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i5
+}
+
+define i16 @f11() {
+; X32_LINUX: f11:
+; X32_LINUX:      movzwl %gs:s1@NTPOFF, %eax
+; Why is this kill line here, but no where else?
+; X32_LINUX-NEXT: # kill
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f11:
+; X64_LINUX:      movzwl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: # kill
+; X64_LINUX-NEXT: ret
+; X32_WIN: f11:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: # kill
+; X32_WIN-NEXT: ret
+; X64_WIN: f11:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: # kill
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+	ret i16 %tmp1
+}
+
+define i32 @f12() {
+; X32_LINUX: f12:
+; X32_LINUX:      movswl %gs:s1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f12:
+; X64_LINUX:      movswl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f12:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f12:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+  %tmp2 = sext i16 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i8 @f13() {
+; X32_LINUX: f13:
+; X32_LINUX:      movb %gs:b1@NTPOFF, %al
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f13:
+; X64_LINUX:      movb %fs:b1@TPOFF, %al
+; X64_LINUX-NEXT: ret
+; X32_WIN: f13:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al
+; X32_WIN-NEXT: ret
+; X64_WIN: f13:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+	ret i8 %tmp1
+}
+
+define i32 @f14() {
+; X32_LINUX: f14:
+; X32_LINUX:      movsbl %gs:b1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f14:
+; X64_LINUX:      movsbl %fs:b1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f14:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f14:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+  %tmp2 = sext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
deleted file mode 100644
index f39658e..0000000
--- a/test/CodeGen/X86/tls1.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = thread_local global i32 15
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
-; X32_LINUX: movl %gs:i@NTPOFF, %eax
-; X64_LINUX: movl %fs:i@TPOFF, %eax
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: movl _i@SECREL(%eax), %eax
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movabsq $i@SECREL, %rcx
-
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
deleted file mode 100644
index fb61596..0000000
--- a/test/CodeGen/X86/tls10.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
deleted file mode 100644
index cc14826..0000000
--- a/test/CodeGen/X86/tls11.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = thread_local global i16 15
-
-define i16 @f() {
-entry:
-	%tmp1 = load i16* @i
-	ret i16 %tmp1
-}
-; X32_LINUX: movzwl %gs:i@NTPOFF, %eax
-; X64_LINUX: movzwl %fs:i@TPOFF, %eax
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: movzwl _i@SECREL(%eax), %eax
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movq %gs:88, %rcx
-; X64_WIN: movabsq $i@SECREL, %rcx
-; X64_WIN: movzwl (%rax,%rcx), %eax
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
deleted file mode 100644
index 3da789e..0000000
--- a/test/CodeGen/X86/tls12.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = thread_local global i8 15
-
-define i8 @f() {
-entry:
-	%tmp1 = load i8* @i
-	ret i8 %tmp1
-}
-; X32_LINUX: movb %gs:i@NTPOFF, %al
-; X64_LINUX: movb %fs:i@TPOFF, %al
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: movb _i@SECREL(%eax), %al
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movq %gs:88, %rcx
-; X64_WIN: movabsq $i@SECREL, %rcx
-; X64_WIN: movb (%rax,%rcx), %al
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
deleted file mode 100644
index 0f6a98a..0000000
--- a/test/CodeGen/X86/tls13.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = thread_local global i16 0
-@j = thread_local global i16 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i16* @i, align 2
-        %1 = sext i16 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i16* @j, align 2
-        %3 = zext i16 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
-
-; X32_LINUX: movswl %gs:i@NTPOFF, %eax
-; X32_LINUX: movzwl %gs:j@NTPOFF, %eax
-; X64_LINUX: movswl %fs:i@TPOFF, %edi
-; X64_LINUX: movzwl %fs:j@TPOFF, %edi
-; X32_WIN: movswl _i@SECREL(%esi), %eax
-; X32_WIN: movzwl _j@SECREL(%esi), %eax
-; X64_WIN: movabsq $i@SECREL, %rax
-; X64_WIN: movswl (%rsi,%rax), %ecx
-; X64_WIN: movabsq $j@SECREL, %rax
-; X64_WIN: movzwl (%rsi,%rax), %ecx
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
deleted file mode 100644
index 6462571..0000000
--- a/test/CodeGen/X86/tls14.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = thread_local global i8 0
-@j = thread_local global i8 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i8* @i, align 2
-        %1 = sext i8 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i8* @j, align 2
-        %3 = zext i8 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
-
-; X32_LINUX: movsbl %gs:i@NTPOFF, %eax
-; X32_LINUX: movzbl %gs:j@NTPOFF, %eax
-; X64_LINUX: movsbl %fs:i@TPOFF, %edi
-; X64_LINUX: movzbl %fs:j@TPOFF, %edi
-; X32_WIN: movsbl _i@SECREL(%esi), %eax
-; X32_WIN: movzbl _j@SECREL(%esi), %eax
-; X64_WIN: movabsq $i@SECREL, %rax
-; X64_WIN: movsbl (%rsi,%rax), %ecx
-; X64_WIN: movabsq $j@SECREL, %rax
-; X64_WIN: movzbl (%rsi,%rax), %ecx
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
deleted file mode 100644
index 7abf070..0000000
--- a/test/CodeGen/X86/tls15.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t | count 1
-; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
-; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
-; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
-; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
-
-@i = thread_local global i32 0
-@j = thread_local global i32 0
-
-define void @f(i32** %a, i32** %b) {
-entry:
-	store i32* @i, i32** %a, align 8
-	store i32* @j, i32** %b, align 8
-	ret void
-}
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
deleted file mode 100644
index e882f53..0000000
--- a/test/CodeGen/X86/tls2.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
-; X32_LINUX: movl %gs:0, %eax
-; X32_LINUX: leal i@NTPOFF(%eax), %eax
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX: leaq i@TPOFF(%rax), %rax
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: leal _i@SECREL(%eax), %eax
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movq %gs:88, %rcx
-; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
deleted file mode 100644
index ee3f28f..0000000
--- a/test/CodeGen/X86/tls3.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
-	ret i32 %tmp1
-}
-; X32_LINUX: movl i@INDNTPOFF, %eax
-; X32_LINUX: movl %gs:(%eax), %eax
-; X64_LINUX: movq i@GOTTPOFF(%rip), %rax
-; X64_LINUX: movl %fs:(%rax), %eax
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: movl _i@SECREL(%eax), %eax
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movabsq $i@SECREL, %rcx
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
deleted file mode 100644
index 2b53ec5..0000000
--- a/test/CodeGen/X86/tls4.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
-; X32_LINUX: movl %gs:0, %eax
-; X32_LINUX: addl i@INDNTPOFF, %eax
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX: addq i@GOTTPOFF(%rip), %rax
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: leal _i@SECREL(%eax), %eax
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movq %gs:88, %rcx
-; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
deleted file mode 100644
index 3cc6dab..0000000
--- a/test/CodeGen/X86/tls5.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-
-@i = internal thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
-; X32_LINUX: movl %gs:i@NTPOFF, %eax
-; X64_LINUX: movl %fs:i@TPOFF, %eax
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: movl _i@SECREL(%eax), %eax
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movabsq $i@SECREL, %rcx
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
deleted file mode 100644
index c98ad7c..0000000
--- a/test/CodeGen/X86/tls6.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
-; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
-
-@i = internal thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
-; X32_LINUX: movl %gs:0, %eax
-; X32_LINUX: leal i@NTPOFF(%eax), %eax
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX: leaq i@TPOFF(%rax), %rax
-; X32_WIN: movl __tls_index, %eax
-; X32_WIN: movl %fs:__tls_array, %ecx
-; X32_WIN: leal _i@SECREL(%eax), %eax
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN: movq %gs:88, %rcx
-; X64_WIN: addq $i@SECREL, %rax
diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
deleted file mode 100644
index e9116e7..0000000
--- a/test/CodeGen/X86/tls7.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
deleted file mode 100644
index 375af94..0000000
--- a/test/CodeGen/X86/tls8.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
deleted file mode 100644
index 7d08df8..0000000
--- a/test/CodeGen/X86/tls9.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index e853e77..ca764e7 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -7,13 +7,14 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 ; CHECK: testq %rdi, %rdi
-; CHECK-NEXT: jns LBB0_2
+; CHECK-NEXT: js LBB0_1
+; CHECK: cvtsi2ss
+; CHECK-NEXT: ret
+; CHECK: LBB0_1
 ; CHECK: shrq
 ; CHECK-NEXT: andq
 ; CHECK-NEXT: orq
 ; CHECK-NEXT: cvtsi2ss
-; CHECK: LBB0_2
-; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
 entry:
   %b = uitofp i64 %a to float
diff --git a/test/CodeGen/X86/utf16-cfstrings.ll b/test/CodeGen/X86/utf16-cfstrings.ll
new file mode 100644
index 0000000..af76a33
--- /dev/null
+++ b/test/CodeGen/X86/utf16-cfstrings.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10 | FileCheck %s
+; <rdar://problem/10655949>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = internal unnamed_addr constant [5 x i16] [i16 252, i16 98, i16 101, i16 114, i16 0], align 2
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 2000, i8* bitcast ([5 x i16]* @.str to i8*), i64 4 }, section "__DATA,__cfstring"
+
+; CHECK:         .section      __TEXT,__ustring
+; CHECK-NEXT:    .align        1
+; CHECK-NEXT: _.str:
+; CHECK-NEXT:    .short  252     ## 0xfc
+; CHECK-NEXT:    .short  98      ## 0x62
+; CHECK-NEXT:    .short  101     ## 0x65
+; CHECK-NEXT:    .short  114     ## 0x72
+; CHECK-NEXT:    .short  0       ## 0x0
+
+define i32 @main() uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void (%0*, ...)* @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
+  ret i32 0
+}
+
+declare void @NSLog(%0*, ...)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/X86/utf8.ll b/test/CodeGen/X86/utf8.ll
new file mode 100644
index 0000000..67bc5ae
--- /dev/null
+++ b/test/CodeGen/X86/utf8.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: iΔ
+@"i\CE\94" = common global i32 0, align 4
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 946b126..91777f7 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -10,8 +10,10 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
 entry:
 ; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
-; CHECK: pcmpgt
-; CHECK: blendvps
+; CHECK: movzwl
+; CHECK: movzwl
+; CHECK: pshufd
+; CHECK: pshufb
   %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
   %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
   %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index fc06b95..976cd18 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 0608398..430aa04 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -4,10 +4,10 @@
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
 entry:
-; CHECK: movaps  ({{%rdi|%rcx}}), %xmm0
-; CHECK: movaps  %xmm0, %xmm1
-; CHECK-NEXT: movss   %xmm2, %xmm1
-; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
+; CHECK: movaps  ({{%rdi|%rcx}}), %[[XMM0:xmm[0-9]+]]
+; CHECK: movaps  %[[XMM0]], %[[XMM1:xmm[0-9]+]]
+; CHECK-NEXT: movss   %xmm{{[0-9]+}}, %[[XMM1]]
+; CHECK-NEXT: shufps  $36, %[[XMM1]], %[[XMM0]]
   %0 = load <4 x i32>* undef, align 16
   %1 = load <4 x i32>* %a0, align 16
   %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll
index 34875ed..fd1b006 100644
--- a/test/CodeGen/X86/xor-icmp.ll
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -9,13 +9,13 @@ entry:
 ; X32-NOT: andb
 ; X32-NOT: shrb
 ; X32:     testb $64
-; X32:     jne
+; X32:     je
 
 ; X64:     t:
 ; X64-NOT: setne
 ; X64:     xorl
 ; X64:     testb $64
-; X64:     jne
+; X64:     je
   %0 = and i32 %a, 16384
   %1 = icmp ne i32 %0, 0
   %2 = and i32 %b, 16384
@@ -43,7 +43,7 @@ define i32 @t2(i32 %x, i32 %y) nounwind ssp {
 ; X32: cmpl
 ; X32: sete
 ; X32-NOT: xor
-; X32: jne
+; X32: je
 
 ; X64: t2:
 ; X64: testl
@@ -51,7 +51,7 @@ define i32 @t2(i32 %x, i32 %y) nounwind ssp {
 ; X64: testl
 ; X64: sete
 ; X64-NOT: xor
-; X64: jne
+; X64: je
 entry:
   %0 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
   %1 = icmp eq i32 %y, 0                          ; <i1> [#uses=1]
diff --git a/test/CodeGen/XCore/ashr.ll b/test/CodeGen/XCore/ashr.ll
index 4514fdb..03b6b1f 100644
--- a/test/CodeGen/XCore/ashr.ll
+++ b/test/CodeGen/XCore/ashr.ll
@@ -30,7 +30,7 @@ not_less:
 }
 ; CHECK: f1:
 ; CHECK-NEXT: ashr r0, r0, 32
-; CHECK-NEXT: bf r0
+; CHECK-NEXT: bt r0
 
 define i32 @f2(i32 %a) {
         %1 = icmp sge i32 %a, 0
@@ -51,9 +51,9 @@ define i32 @f3(i32 %a) {
 }
 ; CHECK: f3:
 ; CHECK-NEXT: ashr r0, r0, 32
-; CHECK-NEXT: bf r0
-; CHECK-NEXT: ldc r0, 10
-; CHECK: ldc r0, 17
+; CHECK-NEXT: bt r0
+; CHECK-NEXT: ldc r0, 17
+; CHECK: ldc r0, 10
 
 define i32 @f4(i32 %a) {
         %1 = icmp sge i32 %a, 0
@@ -62,9 +62,9 @@ define i32 @f4(i32 %a) {
 }
 ; CHECK: f4:
 ; CHECK-NEXT: ashr r0, r0, 32
-; CHECK-NEXT: bf r0
-; CHECK-NEXT: ldc r0, 17
-; CHECK: ldc r0, 10
+; CHECK-NEXT: bt r0
+; CHECK-NEXT: ldc r0, 10
+; CHECK: ldc r0, 17
 
 define i32 @f5(i32 %a) {
         %1 = icmp sge i32 %a, 0
diff --git a/test/CodeGen/XCore/ladd_lsub_combine.ll b/test/CodeGen/XCore/ladd_lsub_combine.ll
index a693ee2..cd89966 100644
--- a/test/CodeGen/XCore/ladd_lsub_combine.ll
+++ b/test/CodeGen/XCore/ladd_lsub_combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore | FileCheck %s
+; RUN: llc -march=xcore < %s | FileCheck %s
 
 ; Only needs one ladd
 define i64 @f1(i32 %x, i32 %y) nounwind {
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
index c697912..f8726af 100644
--- a/test/CodeGen/XCore/lit.local.cfg
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -1,13 +1,6 @@
 config.suffixes = ['.ll', '.c', '.cpp']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets:
     config.unsupported = True