Merge with LLVM upstream r145126 (Nov 25th 2011)

Change-Id: I30d08ae004a4c3c74092ad2537ab30cce4280e1d
author: Logan Chien <loganchien@google.com> 2011-11-25 08:46:08 +0800
committer: Logan Chien <loganchien@google.com> 2011-11-25 13:43:44 +0800
commit: 1429059dc0129c1cec938c29d6fce89e14293241 (patch)
tree: ec4c55e5138c7eb9ad0313d4af895ad0c56d0978 /test
parent: 1035c3e84815607b4f8994cab03ae62cc8519a63 (diff)
parent: 705f2431a086bbe662bca0035938e774378de3ec (diff)
download: external_llvm-1429059dc0129c1cec938c29d6fce89e14293241.zip
external_llvm-1429059dc0129c1cec938c29d6fce89e14293241.tar.gz
external_llvm-1429059dc0129c1cec938c29d6fce89e14293241.tar.bz2
54 files changed, 2030 insertions, 202 deletions
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
new file mode 100644
index 0000000..dbb634d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #124
+; THUMB: adds r0, #124
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ARM: movw r1, #1148
+; ARM: add r0, r0, r1
+; THUMB: addw r0, r0, #1148
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #140
+; THUMB: adds r0, #140
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ARM-NOT: movw r{{[0-9]}}, #1060
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM: movw r{{[0-9]}}, #1284
+; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index ab56e5b..0396a41 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -353,3 +353,21 @@ define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
   ret void
 }
+
+; Use vmov.f32 to materialize f32 immediate splats
+; rdar://10437054
+define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v2f32:
+;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01
+  store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32:
+;CHECK: vmov.f32 q{{.*}}, #3.100000e+01
+  store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll
index 393800b..725c106 100644
--- a/test/CodeGen/PowerPC/ppc32-vaarg.ll
+++ b/test/CodeGen/PowerPC/ppc32-vaarg.ll
@@ -12,10 +12,9 @@ target triple = "powerpc-unknown-freebsd9.0"
 define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
  entry:
   %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
-; CHECK: lbz 4, 0(3)
-; CHECK-NEXT: rlwinm 5, 4, 0, 31, 31
-; CHECK-NEXT: cmplwi 0, 5, 0
-; CHECK-NEXT: addi 5, 4, 1
+; CHECK: addi 5, 4, 1
+; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
+; CHECK-NEXT: cmplwi 0, 6, 0
 ; CHECK-NEXT: stw 3, -4(1)
 ; CHECK-NEXT: stw 5, -8(1)
 ; CHECK-NEXT: stw 4, -12(1)
@@ -25,138 +24,137 @@ define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
 ; CHECK-NEXT: stw 3, -8(1)
 ; CHECK-NEXT: .LBB0_2:                                # %entry
 ; CHECK-NEXT: lwz 3, -8(1)
-; CHECK-NEXT: slwi 4, 3, 2
+; CHECK-NEXT: addi 4, 3, 2
 ; CHECK-NEXT: lwz 5, -4(1)
 ; CHECK-NEXT: lwz 6, 4(5)
 ; CHECK-NEXT: lwz 7, 8(5)
-; CHECK-NEXT: add 4, 7, 4
+; CHECK-NEXT: stb 4, 0(5)
 ; CHECK-NEXT: cmpwi 0, 3, 8
+; CHECK-NEXT: addi 4, 6, 4
+; CHECK-NEXT: mr 8, 6
+; CHECK-NEXT: stw 7, -16(1)
+; CHECK-NEXT: stw 4, -20(1)
+; CHECK-NEXT: stw 3, -24(1)
+; CHECK-NEXT: stw 8, -28(1)
+; CHECK-NEXT: stw 6, -32(1)
 ; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -16(1)
-; CHECK-NEXT: stw 3, -20(1)
-; CHECK-NEXT: stw 4, -24(1)
-; CHECK-NEXT: stw 6, -28(1)
+; CHECK-NEXT: stw 0, -36(1)
 ; CHECK-NEXT: blt 0, .LBB0_4
 ; CHECK-NEXT: # BB#3:                                 # %entry
-; CHECK-NEXT: lwz 3, -28(1)
-; CHECK-NEXT: stw 3, -24(1)
+; CHECK-NEXT: lwz 3, -20(1)
+; CHECK-NEXT: stw 3, -28(1)
 ; CHECK-NEXT: .LBB0_4:                                # %entry
+; CHECK-NEXT: lwz 3, -28(1)
+; CHECK-NEXT: lwz 4, -4(1)
+; CHECK-NEXT: stw 3, 4(4)
+  store i64 %x, i64* @var1, align 8
 ; CHECK-NEXT: lwz 3, -24(1)
-; CHECK-NEXT: lwz 4, -28(1)
-; CHECK-NEXT: addi 5, 4, 4
-; CHECK-NEXT: lwz 0, -16(1)
+; CHECK-NEXT: slwi 5, 3, 2
+; CHECK-NEXT: lwz 6, -16(1)
+; CHECK-NEXT: add 5, 6, 5
+; CHECK-NEXT: lwz 0, -36(1)
 ; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -32(1)
-; CHECK-NEXT: stw 5, -36(1)
-; CHECK-NEXT: stw 3, -40(1)
+; CHECK-NEXT: stw 5, -40(1)
 ; CHECK-NEXT: blt 0, .LBB0_6
 ; CHECK-NEXT: # BB#5:                                 # %entry
-; CHECK-NEXT: lwz 3, -36(1)
-; CHECK-NEXT: stw 3, -32(1)
-; CHECK-NEXT: .LBB0_6:                                # %entry
 ; CHECK-NEXT: lwz 3, -32(1)
-; CHECK-NEXT: lwz 4, -20(1)
-; CHECK-NEXT: addi 5, 4, 2
-; CHECK-NEXT: lwz 6, -4(1)
-; CHECK-NEXT: stb 5, 0(6)
-; CHECK-NEXT: stw 3, 4(6)
-  store i64 %x, i64* @var1, align 8
+; CHECK-NEXT: stw 3, -40(1)
+; CHECK-NEXT: .LBB0_6:                                # %entry
 ; CHECK-NEXT: lwz 3, -40(1)
-; CHECK-NEXT: lwz 5, 0(3)
-; CHECK-NEXT: lwz 7, 4(3)
-; CHECK-NEXT: lis 8, var1@ha
-; CHECK-NEXT: la 9, var1@l(8)
-; CHECK-NEXT: stw 7, 4(9)
-; CHECK-NEXT: stw 5, var1@l(8)
+; CHECK-NEXT: lwz 4, 0(3)
+; CHECK-NEXT: lwz 3, 4(3)
+; CHECK-NEXT: lis 5, var1@ha
+; CHECK-NEXT: la 6, var1@l(5)
+; CHECK-NEXT: stw 3, 4(6)
+; CHECK-NEXT: stw 4, var1@l(5)
+; CHECK-NEXT: lwz 3, -4(1)
   %y = va_arg %struct.__va_list_tag* %ap, double; From f1
-; CHECK-NEXT: lbz 5, 1(6)
-; CHECK-NEXT: lwz 7, 4(6)
-; CHECK-NEXT: lwz 8, 8(6)
-; CHECK-NEXT: slwi 9, 5, 3
-; CHECK-NEXT: add 8, 8, 9
-; CHECK-NEXT: cmpwi 0, 5, 8
-; CHECK-NEXT: addi 9, 7, 8
-; CHECK-NEXT: mr 10, 7
-; CHECK-NEXT: stw 9, -44(1)
+; CHECK-NEXT: lbz 4, 1(3)
+; CHECK-NEXT: lwz 5, 4(3)
+; CHECK-NEXT: lwz 6, 8(3)
+; CHECK-NEXT: addi 7, 4, 1
+; CHECK-NEXT: stb 7, 1(3)
+; CHECK-NEXT: cmpwi 0, 4, 8
+; CHECK-NEXT: addi 7, 5, 8
+; CHECK-NEXT: mr 8, 5
+; CHECK-NEXT: stw 5, -44(1)
 ; CHECK-NEXT: stw 7, -48(1)
+; CHECK-NEXT: stw 4, -52(1)
+; CHECK-NEXT: stw 6, -56(1)
+; CHECK-NEXT: stw 8, -60(1)
 ; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -52(1)
-; CHECK-NEXT: stw 5, -56(1)
-; CHECK-NEXT: stw 10, -60(1)
-; CHECK-NEXT: stw 8, -64(1)
+; CHECK-NEXT: stw 0, -64(1)
 ; CHECK-NEXT: blt 0, .LBB0_8
 ; CHECK-NEXT: # BB#7:                                 # %entry
-; CHECK-NEXT: lwz 3, -44(1)
+; CHECK-NEXT: lwz 3, -48(1)
 ; CHECK-NEXT: stw 3, -60(1)
 ; CHECK-NEXT: .LBB0_8:                                # %entry
 ; CHECK-NEXT: lwz 3, -60(1)
-; CHECK-NEXT: lwz 4, -64(1)
-; CHECK-NEXT: addi 4, 4, 32
-; CHECK-NEXT: lwz 0, -52(1)
+; CHECK-NEXT: lwz 4, -4(1)
+; CHECK-NEXT: stw 3, 4(4)
+; CHECK-NEXT: lwz 3, -52(1)
+; CHECK-NEXT: slwi 5, 3, 3
+; CHECK-NEXT: lwz 6, -56(1)
+; CHECK-NEXT: add 5, 6, 5
+; CHECK-NEXT: addi 5, 5, 32
+; CHECK-NEXT: lwz 0, -64(1)
 ; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -68(1)
-; CHECK-NEXT: stw 3, -72(1)
+; CHECK-NEXT: stw 5, -68(1)
 ; CHECK-NEXT: blt 0, .LBB0_10
 ; CHECK-NEXT: # BB#9:                                 # %entry
-; CHECK-NEXT: lwz 3, -48(1)
+; CHECK-NEXT: lwz 3, -44(1)
 ; CHECK-NEXT: stw 3, -68(1)
 ; CHECK-NEXT: .LBB0_10:                               # %entry
 ; CHECK-NEXT: lwz 3, -68(1)
-; CHECK-NEXT: lwz 4, -56(1)
-; CHECK-NEXT: addi 5, 4, 1
-; CHECK-NEXT: lwz 6, -4(1)
-; CHECK-NEXT: stb 5, 1(6)
-; CHECK-NEXT: lwz 5, -72(1)
-; CHECK-NEXT: stw 5, 4(6)
 ; CHECK-NEXT: lfd 0, 0(3)
   store double %y, double* @var2, align 8
 ; CHECK-NEXT: lis 3, var2@ha
 ; CHECK-NEXT: stfd 0, var2@l(3)
   %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
-; CHECK-NEXT: lbz 3, 0(6)
-; CHECK-NEXT: lwz 5, 4(6)
-; CHECK-NEXT: lwz 7, 8(6)
-; CHECK-NEXT: slwi 8, 3, 2
-; CHECK-NEXT: add 7, 7, 8
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 8, 5, 4
-; CHECK-NEXT: mr 9, 5
-; CHECK-NEXT: stw 3, -76(1)
-; CHECK-NEXT: stw 7, -80(1)
-; CHECK-NEXT: stw 8, -84(1)
-; CHECK-NEXT: stw 5, -88(1)
-; CHECK-NEXT: stw 9, -92(1)
+; CHECK-NEXT: lwz 3, -4(1)
+; CHECK-NEXT: lbz 4, 0(3)
+; CHECK-NEXT: lwz 5, 4(3)
+; CHECK-NEXT: lwz 6, 8(3)
+; CHECK-NEXT: addi 7, 4, 1
+; CHECK-NEXT: stb 7, 0(3)
+; CHECK-NEXT: cmpwi 0, 4, 8
+; CHECK-NEXT: addi 7, 5, 4
+; CHECK-NEXT: mr 8, 5
+; CHECK-NEXT: stw 4, -72(1)
+; CHECK-NEXT: stw 6, -76(1)
 ; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -96(1)
+; CHECK-NEXT: stw 0, -80(1)
+; CHECK-NEXT: stw 5, -84(1)
+; CHECK-NEXT: stw 8, -88(1)
+; CHECK-NEXT: stw 7, -92(1)
 ; CHECK-NEXT: blt 0, .LBB0_12
 ; CHECK-NEXT: # BB#11:                                # %entry
-; CHECK-NEXT: lwz 3, -84(1)
-; CHECK-NEXT: stw 3, -92(1)
-; CHECK-NEXT: .LBB0_12:                               # %entry
 ; CHECK-NEXT: lwz 3, -92(1)
-; CHECK-NEXT: lwz 4, -80(1)
-; CHECK-NEXT: lwz 0, -96(1)
+; CHECK-NEXT: stw 3, -88(1)
+; CHECK-NEXT: .LBB0_12:                               # %entry
+; CHECK-NEXT: lwz 3, -88(1)
+; CHECK-NEXT: lwz 4, -4(1)
+; CHECK-NEXT: stw 3, 4(4)
+; CHECK-NEXT: lwz 3, -72(1)
+; CHECK-NEXT: slwi 5, 3, 2
+; CHECK-NEXT: lwz 6, -76(1)
+; CHECK-NEXT: add 5, 6, 5
+; CHECK-NEXT: lwz 0, -80(1)
 ; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 3, -100(1)
-; CHECK-NEXT: stw 4, -104(1)
+; CHECK-NEXT: stw 5, -96(1)
 ; CHECK-NEXT: blt 0, .LBB0_14
 ; CHECK-NEXT: # BB#13:                                # %entry
-; CHECK-NEXT: lwz 3, -88(1)
-; CHECK-NEXT: stw 3, -104(1)
+; CHECK-NEXT: lwz 3, -84(1)
+; CHECK-NEXT: stw 3, -96(1)
 ; CHECK-NEXT: .LBB0_14:                               # %entry
-; CHECK-NEXT: lwz 3, -104(1)
-; CHECK-NEXT: lwz 4, -76(1)
-; CHECK-NEXT: addi 5, 4, 1
-; CHECK-NEXT: lwz 6, -4(1)
-; CHECK-NEXT: stb 5, 0(6)
-; CHECK-NEXT: lwz 5, -100(1)
-; CHECK-NEXT: stw 5, 4(6)
+; CHECK-NEXT: lwz 3, -96(1)
 ; CHECK-NEXT: lwz 3, 0(3)
   store i32 %z, i32* @var3, align 4
-; CHECK-NEXT: lis 5, var3@ha
-; CHECK-NEXT: stw 3, var3@l(5)
+; CHECK-NEXT: lis 4, var3@ha
+; CHECK-NEXT: stw 3, var3@l(4)
+; CHECK-NEXT: lwz 3, -4(1)
   ret void
-; CHECK-NEXT: stw 6, -108(1)
+; CHECK-NEXT: stw 3, -100(1)
 ; CHECK-NEXT: blr 
 }
 
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 46937fc..4e16c9a 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -51,12 +51,11 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: mov.w [[R3:r[0-9]+]], #1065353216
-; CHECK: vdup.32 q{{.*}}, [[R3]]
+; CHECK: vmov.f32 q{{.*}}, #1.000000e+00
   br i1 undef, label %bb1, label %bb2
 
 bb1:
-; CHECK-NEXT: %bb1
+; CHECK: %bb1
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tmp1 = shl i32 %indvar, 2
   %gep1 = getelementptr i8* %ptr1, i32 %tmp1
diff --git a/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
new file mode 100644
index 0000000..8174109
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11"
+
+; This test would create a vpand %ymm instruction that is only legal in AVX2.
+; CHECK-NOT: vpand %ymm
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+define void @ShadeTile() nounwind {
+allocas:
+  br i1 undef, label %if_then, label %if_else
+
+if_then:                                          ; preds = %allocas
+  unreachable
+
+if_else:                                          ; preds = %allocas
+  br i1 undef, label %for_loop156.lr.ph, label %if_exit
+
+for_loop156.lr.ph:                                ; preds = %if_else
+  %val_6.i21244 = load i16* undef, align 2
+  %0 = insertelement <8 x i16> undef, i16 %val_6.i21244, i32 6
+  %val_7.i21248 = load i16* undef, align 2
+  %1 = insertelement <8 x i16> %0, i16 %val_7.i21248, i32 7
+  %uint2uint32.i20206 = zext <8 x i16> %1 to <8 x i32>
+  %bitop5.i20208 = and <8 x i32> %uint2uint32.i20206, <i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744>
+  %bitop8.i20209 = and <8 x i32> %uint2uint32.i20206, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  %bitop12.i20211 = lshr <8 x i32> %bitop5.i20208, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+  %binop13.i20212 = add <8 x i32> %bitop12.i20211, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+  %bitop15.i20213 = shl <8 x i32> %binop13.i20212, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+  %bitop17.i20214 = shl <8 x i32> %bitop8.i20209, <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+  %bitop20.i20215 = or <8 x i32> undef, %bitop15.i20213
+  %bitop22.i20216 = or <8 x i32> %bitop20.i20215, %bitop17.i20214
+  %int_to_float_bitcast.i.i.i20217 = bitcast <8 x i32> %bitop22.i20216 to <8 x float>
+  %binop401 = fmul <8 x float> undef, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop402 = fadd <8 x float> %binop401, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop403 = fmul <8 x float> zeroinitializer, %binop402
+  %binop406 = fmul <8 x float> %int_to_float_bitcast.i.i.i20217, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop407 = fadd <8 x float> %binop406, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop408 = fmul <8 x float> zeroinitializer, %binop407
+  %binop411 = fsub <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, undef
+  %val_4.i21290 = load i16* undef, align 2
+  %2 = insertelement <8 x i16> undef, i16 %val_4.i21290, i32 4
+  %val_5.i21294 = load i16* undef, align 2
+  %3 = insertelement <8 x i16> %2, i16 %val_5.i21294, i32 5
+  %val_6.i21298 = load i16* undef, align 2
+  %4 = insertelement <8 x i16> %3, i16 %val_6.i21298, i32 6
+  %ptr_7.i21301 = inttoptr i64 undef to i16*
+  %val_7.i21302 = load i16* %ptr_7.i21301, align 2
+  %5 = insertelement <8 x i16> %4, i16 %val_7.i21302, i32 7
+  %uint2uint32.i20218 = zext <8 x i16> %5 to <8 x i32>
+  %structelement561 = load i8** undef, align 8
+  %ptr2int563 = ptrtoint i8* %structelement561 to i64
+  %smear.ptr_smear7571 = insertelement <8 x i64> undef, i64 %ptr2int563, i32 7
+  %new_ptr582 = add <8 x i64> %smear.ptr_smear7571, zeroinitializer
+  %val_5.i21509 = load i8* null, align 1
+  %6 = insertelement <8 x i8> undef, i8 %val_5.i21509, i32 5
+  %7 = insertelement <8 x i8> %6, i8 undef, i32 6
+  %iptr_7.i21515 = extractelement <8 x i64> %new_ptr582, i32 7
+  %ptr_7.i21516 = inttoptr i64 %iptr_7.i21515 to i8*
+  %val_7.i21517 = load i8* %ptr_7.i21516, align 1
+  %8 = insertelement <8 x i8> %7, i8 %val_7.i21517, i32 7
+  %uint2float.i20245 = uitofp <8 x i8> %8 to <8 x float>
+  %binop.i20246 = fmul <8 x float> %uint2float.i20245, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  br i1 undef, label %for_loop594.lr.ph, label %for_exit595
+
+if_exit:                                          ; preds = %if_else
+  ret void
+
+for_loop594.lr.ph:                                ; preds = %for_loop156.lr.ph
+  %bitop8.i20221 = and <8 x i32> %uint2uint32.i20218, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  br i1 undef, label %cif_test_all730, label %cif_mask_mixed1552
+
+for_exit595:                                      ; preds = %for_loop156.lr.ph
+  unreachable
+
+cif_test_all730:                                  ; preds = %for_loop594.lr.ph
+  %binop11.i20545 = fmul <8 x float> %binop408, zeroinitializer
+  %binop12.i20546 = fadd <8 x float> undef, %binop11.i20545
+  %binop15.i20547 = fmul <8 x float> %binop411, undef
+  %binop16.i20548 = fadd <8 x float> %binop12.i20546, %binop15.i20547
+  %bincmp774 = fcmp ogt <8 x float> %binop16.i20548, zeroinitializer
+  %val_to_boolvec32775 = sext <8 x i1> %bincmp774 to <8 x i32>
+  %floatmask.i20549 = bitcast <8 x i32> %val_to_boolvec32775 to <8 x float>
+  %v.i20550 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i20549) nounwind readnone
+  %cond = icmp eq i32 %v.i20550, 255
+  br i1 %cond, label %cif_test_all794, label %cif_test_mixed
+
+cif_test_all794:                                  ; preds = %cif_test_all730
+  %binop.i20572 = fmul <8 x float> %binop403, undef
+  unreachable
+
+cif_test_mixed:                                   ; preds = %cif_test_all730
+  %binop1207 = fmul <8 x float> %binop.i20246, undef
+  unreachable
+
+cif_mask_mixed1552:                               ; preds = %for_loop594.lr.ph
+  unreachable
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 3fa1d95..df12b71 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -2021,7 +2021,9 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
 
 define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
   ; CHECK: vmovdqu
-  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  %a1 = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  ; add operation forces the execution domain.
+  %res = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   ret <32 x i8> %res
 }
 declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
@@ -2029,7 +2031,9 @@ declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
 
 define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
   ; CHECK: vmovupd
-  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  %a1 = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ; add operation forces the execution domain.
+  %res = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   ret <4 x double> %res
 }
 declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
@@ -2157,7 +2161,9 @@ declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
 
 define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
   ; CHECK: vmovntdq
-  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
+  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a2)
   ret void
 }
 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
@@ -2165,7 +2171,8 @@ declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
 
 define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
   ; CHECK: vmovntpd
-  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
+  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a2)
   ret void
 }
 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
@@ -2258,7 +2265,9 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
 
 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
   ; CHECK: vmovdqu
-  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
@@ -2266,7 +2275,9 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
 
 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   ; CHECK: vmovupd
-  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+  ; add operation forces the execution domain.
+  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index cd37135..115cefb 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -7,7 +7,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandpd LCP{{.*}}(%rip)
@@ -16,7 +18,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vandps
@@ -45,7 +49,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vxorpd LCP{{.*}}(%rip)
@@ -54,7 +60,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vxorps
@@ -83,7 +91,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vorpd LCP{{.*}}(%rip)
@@ -92,7 +102,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vorps
@@ -122,7 +134,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnpd (%
@@ -134,7 +148,9 @@ entry:
   %1 = bitcast <4 x double> %tmp2 to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnps
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index a33423d..681747b 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -112,3 +112,27 @@ define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   ret <8 x i32> %bitop
 }
 
+;;; Uses shifts for sign extension
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
index d420101..fcd7bb6 100644
--- a/test/CodeGen/X86/avx-unpack.ll
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -67,6 +67,15 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpckhps (%
+define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpckhpd
 define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -74,6 +83,15 @@ entry:
   ret <4 x i64> %shuffle.i
 }
 
+; CHECK: vunpckhpd (%
+define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
 ; CHECK: vunpcklps
 define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -81,9 +99,27 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpcklps (%
+define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpcklpd
 define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
   %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i64> %shuffle.i
 }
+
+; CHECK: vunpcklpd (%
+define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 89b4188..8fbd02a 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,7 +1,4 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-; XFAIL: *
-
-; xfail this file for now because of PR8156, when it gets solved merge this with avx-splat.ll
 
 ; CHECK: vbroadcastsd (%
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index f06548d..7ec3a44 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -27,3 +27,17 @@ entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
   ret <4 x double> %shuffle
 }
+
+; CHECK: vshufps $-55, %ymm
+define <8 x float> @E(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 10, i32 0, i32 3, i32 13, i32 14, i32 4, i32 7>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufpd  $8, %ymm
+define <4 x double> @F(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
index f1c294c..13ebaa6 100644
--- a/test/CodeGen/X86/avx2-logic.ll
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -53,3 +53,44 @@ define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
   %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
   ret <32 x i8> %min
 }
+
+define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %a, %0
+  %2 = and <8 x i32> %b.lobit, %sub
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @blendvb(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %c, %0
+  %2 = and <8 x i32> %a, %b.lobit
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @allOnes() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <16 x i16> @allOnes2() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
index f759361..b6cf54e 100644
--- a/test/CodeGen/X86/avx2-shift.ll
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -58,14 +58,14 @@ define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
 }
 
 ; CHECK: variable_sra0
-; CHECK: psravd
+; CHECK: vpsravd
 ; CHECK: ret
 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
   %k = ashr <4 x i32> %x, %y
   ret <4 x i32> %k
 }
 ; CHECK: variable_sra1
-; CHECK: psravd
+; CHECK: vpsravd
 ; CHECK: ret
 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
   %k = ashr <8 x i32> %x, %y
@@ -127,7 +127,7 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
 }
 
 ; CHECK: variable_sra0_load
-; CHECK: psravd (%
+; CHECK: vpsravd (%
 ; CHECK: ret
 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
   %y1 = load <4 x i32>* %y
@@ -136,7 +136,7 @@ define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
 }
 
 ; CHECK: variable_sra1_load
-; CHECK: psravd (%
+; CHECK: vpsravd (%
 ; CHECK: ret
 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
   %y1 = load <8 x i32>* %y
@@ -145,7 +145,7 @@ define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
 }
 
 ; CHECK: variable_shl0_load
-; CHECK: psllvd (%
+; CHECK: vpsllvd (%
 ; CHECK: ret
 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
   %y1 = load <4 x i32>* %y
@@ -153,7 +153,7 @@ define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
   ret <4 x i32> %k
 }
 ; CHECK: variable_shl1_load
-; CHECK: psllvd (%
+; CHECK: vpsllvd (%
 ; CHECK: ret
 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
   %y1 = load <8 x i32>* %y
@@ -161,7 +161,7 @@ define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
   ret <8 x i32> %k
 }
 ; CHECK: variable_shl2_load
-; CHECK: psllvq (%
+; CHECK: vpsllvq (%
 ; CHECK: ret
 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
   %y1 = load <2 x i64>* %y
@@ -169,7 +169,7 @@ define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
   ret <2 x i64> %k
 }
 ; CHECK: variable_shl3_load
-; CHECK: psllvq (%
+; CHECK: vpsllvq (%
 ; CHECK: ret
 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
   %y1 = load <4 x i64>* %y
@@ -177,7 +177,7 @@ define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
   ret <4 x i64> %k
 }
 ; CHECK: variable_srl0_load
-; CHECK: psrlvd (%
+; CHECK: vpsrlvd (%
 ; CHECK: ret
 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
   %y1 = load <4 x i32>* %y
@@ -185,7 +185,7 @@ define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
   ret <4 x i32> %k
 }
 ; CHECK: variable_srl1_load
-; CHECK: psrlvd (%
+; CHECK: vpsrlvd (%
 ; CHECK: ret
 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
   %y1 = load <8 x i32>* %y
@@ -193,7 +193,7 @@ define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
   ret <8 x i32> %k
 }
 ; CHECK: variable_srl2_load
-; CHECK: psrlvq (%
+; CHECK: vpsrlvq (%
 ; CHECK: ret
 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
   %y1 = load <2 x i64>* %y
@@ -201,10 +201,68 @@ define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
   ret <2 x i64> %k
 }
 ; CHECK: variable_srl3_load
-; CHECK: psrlvq (%
+; CHECK: vpsrlvq (%
 ; CHECK: ret
 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
   %y1 = load <4 x i64>* %y
   %k = lshr <4 x i64> %x, %y1
   ret <4 x i64> %k
 }
+
+define <32 x i8> @shl9(<32 x i8> %A) nounwind {
+  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shl9:
+; CHECK: vpsllw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @shr9(<32 x i8> %A) nounwind {
+  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shr9:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8_7:
+; CHECK: vxorps
+; CHECK: vpcmpgtb
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: vpxor
+; CHECK: vpsubb
+; CHECK: ret
+}
+
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK-NOT: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK-NOT: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
new file mode 100644
index 0000000..aa97308
--- /dev/null
+++ b/test/CodeGen/X86/avx2-unpack.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckldq
+define <8 x i32> @unpacklodq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpcklqdq
+define <4 x i64> @unpacklqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
new file mode 100644
index 0000000..142be33
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpbroadcastb (%
+define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
+  ret <16 x i8> %qf
+}
+; CHECK: vpbroadcastb (%
+define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
+
+  %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
+  %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
+  %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
+  %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
+  %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
+  %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
+  %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
+  %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
+  %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
+  %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
+  %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
+  %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
+  %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
+  %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
+  %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
+  %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
+  ret <32 x i8> %q2f
+}
+; CHECK: vpbroadcastw (%
+
+define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
+  ret <8 x i16> %q7
+}
+; CHECK: vpbroadcastw (%
+define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
+  %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
+  %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
+  %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
+  %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
+  %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
+  %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
+  %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
+  %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
+  ret <16 x i16> %qf
+}
+; CHECK: vpbroadcastd (%
+define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
+  ret <4 x i32> %q3
+}
+; CHECK: vpbroadcastd (%
+define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
+  %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
+  %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
+  %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
+  %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
+  ret <8 x i32> %q7
+}
+; CHECK: vpbroadcastq (%
+define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
+  ret <2 x i64> %q1
+}
+; CHECK: vpbroadcastq (%
+define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
+  %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
+  %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
+  ret <4 x i64> %q3
+}
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index e41d52c..f87d1a6 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -enable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=i686-linux -enable-block-placement < %s | FileCheck %s
 
 declare void @error(i32 %i, i32 %a, i32 %b)
 
@@ -241,8 +241,8 @@ define void @unnatural_cfg1() {
 ; CHECK: unnatural_cfg1
 ; CHECK: %entry
 ; CHECK: %loop.body1
-; CHECK: %loop.body3
 ; CHECK: %loop.body2
+; CHECK: %loop.body3
 
 entry:
   br label %loop.header
@@ -272,6 +272,77 @@ loop.body5:
   br label %loop.body3
 }
 
+define void @unnatural_cfg2() {
+; Test that we can handle a loop with a nested natural loop *and* an unnatural
+; loop. This was reduced from a crash on block placement when run over
+; single-source GCC.
+; CHECK: unnatural_cfg2
+; CHECK: %entry
+; CHECK: %loop.header
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.body3
+; CHECK: %loop.inner1.begin
+; The end block is folded with %loop.body3...
+; CHECK-NOT: %loop.inner1.end
+; CHECK: %loop.body4
+; CHECK: %loop.inner2.begin
+; The loop.inner2.end block is folded
+; CHECK: %bail
+
+entry:
+  br label %loop.header
+
+loop.header:
+  %comp0 = icmp eq i32* undef, null
+  br i1 %comp0, label %bail, label %loop.body1
+
+loop.body1:
+  %val0 = load i32** undef, align 4
+  br i1 undef, label %loop.body2, label %loop.inner1.begin
+
+loop.body2:
+  br i1 undef, label %loop.body4, label %loop.body3
+
+loop.body3:
+  %ptr1 = getelementptr inbounds i32* %val0, i32 0
+  %castptr1 = bitcast i32* %ptr1 to i32**
+  %val1 = load i32** %castptr1, align 4
+  br label %loop.inner1.begin
+
+loop.inner1.begin:
+  %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
+  %castval = bitcast i32* %valphi to i32*
+  %comp1 = icmp eq i32 undef, 48
+  br i1 %comp1, label %loop.inner1.end, label %loop.body4
+
+loop.inner1.end:
+  %ptr2 = getelementptr inbounds i32* %valphi, i32 0
+  %castptr2 = bitcast i32* %ptr2 to i32**
+  %val2 = load i32** %castptr2, align 4
+  br label %loop.inner1.begin
+
+loop.body4.dead:
+  br label %loop.body4
+
+loop.body4:
+  %comp2 = icmp ult i32 undef, 3
+  br i1 %comp2, label %loop.inner2.begin, label %loop.end
+
+loop.inner2.begin:
+  br i1 false, label %loop.end, label %loop.inner2.end
+
+loop.inner2.end:
+  %comp3 = icmp eq i32 undef, 1769472
+  br i1 %comp3, label %loop.end, label %loop.inner2.begin
+
+loop.end:
+  br label %loop.header
+
+bail:
+  unreachable
+}
+
 define i32 @problematic_switch() {
 ; This function's CFG caused overlow in the machine branch probability
 ; calculation, triggering asserts. Make sure we don't crash on it.
@@ -322,3 +393,470 @@ exit:
   %merge = phi i32 [ 3, %step ], [ 6, %entry ]
   ret i32 %merge
 }
+
+define void @fpcmp_unanalyzable_branch(i1 %cond) {
+; This function's CFG contains an unanalyzable branch that is likely to be
+; split due to having a different high-probability predecessor.
+; CHECK: fpcmp_unanalyzable_branch
+; CHECK: %entry
+; CHECK: %exit
+; CHECK-NOT: %if.then
+; CHECK-NOT: %if.end
+; CHECK-NOT: jne
+; CHECK-NOT: jnp
+; CHECK: jne
+; CHECK-NEXT: jnp
+; CHECK-NEXT: %if.then
+
+entry:
+; Note that this branch must be strongly biased toward
+; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
+; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that
+; chain which would violate the unanalyzable branch in 'exit', but we won't even
+; try this trick unless 'if.then' is believed to almost always be reached from
+; 'entry.if.then_crit_edge'.
+  br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
+
+entry.if.then_crit_edge:
+  %.pre14 = load i8* undef, align 1, !tbaa !0
+  br label %if.then
+
+lor.lhs.false:
+  br i1 undef, label %if.end, label %exit
+
+exit:
+  %cmp.i = fcmp une double 0.000000e+00, undef
+  br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+  %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
+  %1 = and i8 %0, 1
+  store i8 %1, i8* undef, align 4, !tbaa !0
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
+
+declare i32 @f()
+declare i32 @g()
+declare i32 @h(i32 %x)
+
+define i32 @test_global_cfg_break_profitability() {
+; Check that our metrics for the profitability of a CFG break are global rather
+; than local. A successor may be very hot, but if the current block isn't, it
+; doesn't matter. Within this test the 'then' block is slightly warmer than the
+; 'else' block, but not nearly enough to merit merging it with the exit block
+; even though the probability of 'then' branching to the 'exit' block is very
+; high.
+; CHECK: test_global_cfg_break_profitability
+; CHECK: calll {{_?}}f
+; CHECK: calll {{_?}}g
+; CHECK: calll {{_?}}h
+; CHECK: ret
+
+entry:
+  br i1 undef, label %then, label %else, !prof !2
+
+then:
+  %then.result = call i32 @f()
+  br label %exit
+
+else:
+  %else.result = call i32 @g()
+  br label %exit
+
+exit:
+  %result = phi i32 [ %then.result, %then ], [ %else.result, %else ]
+  %result2 = call i32 @h(i32 %result)
+  ret i32 %result
+}
+
+!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @test_eh_lpad_successor() {
+; Some times the landing pad ends up as the first successor of an invoke block.
+; When this happens, a strange result used to fall out of updateTerminators: we
+; didn't correctly locate the fallthrough successor, assuming blindly that the
+; first one was the fallthrough successor. As a result, we would add an
+; erroneous jump to the landing pad thinking *that* was the default successor.
+; CHECK: test_eh_lpad_successor
+; CHECK: %entry
+; CHECK-NOT: jmp
+; CHECK: %loop
+
+entry:
+  invoke i32 @f() to label %preheader unwind label %lpad
+
+preheader:
+  br label %loop
+
+lpad:
+  %lpad.val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %lpad.val
+
+loop:
+  br label %loop
+}
+
+declare void @fake_throw() noreturn
+
+define void @test_eh_throw() {
+; For blocks containing a 'throw' (or similar functionality), we have
+; a no-return invoke. In this case, only EH successors will exist, and
+; fallthrough simply won't occur. Make sure we don't crash trying to update
+; terminators for such constructs.
+;
+; CHECK: test_eh_throw
+; CHECK: %entry
+; CHECK: %cleanup
+
+entry:
+  invoke void @fake_throw() to label %continue unwind label %cleanup
+
+continue:
+  unreachable
+
+cleanup:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  unreachable
+}
+
+define void @test_unnatural_cfg_backwards_inner_loop() {
+; Test that when we encounter an unnatural CFG structure after having formed
+; a chain for an inner loop which happened to be laid out backwards we don't
+; attempt to merge onto the wrong end of the inner loop just because we find it
+; first. This was reduced from a crasher in GCC's single source.
+;
+; CHECK: test_unnatural_cfg_backwards_inner_loop
+; CHECK: %entry
+; CHECK: %body
+; CHECK: %loop1
+; CHECK: %loop2b
+; CHECK: %loop2a
+
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:
+  br label %loop2a
+
+loop1:
+  %next.load = load i32** undef
+  br i1 %comp.a, label %loop2a, label %loop2b
+
+loop2a:
+  %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ]
+  %comp.a = icmp eq i32* %var, null
+  br label %loop3
+
+loop2b:
+  %gep = getelementptr inbounds i32* %var.phi, i32 0
+  %next.ptr = bitcast i32* %gep to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop3
+
+loop3:
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  br label %loop1
+}
+
+define void @unanalyzable_branch_to_loop_header() {
+; Ensure that we can handle unanalyzable branches into loop headers. We
+; pre-form chains for unanalyzable branches, and will find the tail end of that
+; at the start of the loop. This function uses floating point comparison
+; fallthrough because that happens to always produce unanalyzable branches on
+; x86.
+;
+; CHECK: unanalyzable_branch_to_loop_header
+; CHECK: %entry
+; CHECK: %loop
+; CHECK: %exit
+
+entry:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+  %cond = icmp eq i8 undef, 42
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_best_succ(i1 %cond) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the optimal sucessor to merge.
+;
+; CHECK: unanalyzable_branch_to_best_succ
+; CHECK: %entry
+; CHECK: %foo
+; CHECK: %bar
+; CHECK: %exit
+
+entry:
+  ; Bias this branch toward bar to ensure we form that chain.
+  br i1 %cond, label %bar, label %foo, !prof !1
+
+foo:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %bar, label %exit
+
+bar:
+  call i32 @f()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_free_block(float %x) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the best free block in the CFG.
+;
+; CHECK: unanalyzable_branch_to_free_block
+; CHECK: %entry
+; CHECK: %a
+; CHECK: %b
+; CHECK: %c
+; CHECK: %exit
+
+entry:
+  br i1 undef, label %a, label %b
+
+a:
+  call i32 @f()
+  br label %c
+
+b:
+  %cmp = fcmp une float %x, undef
+  br i1 %cmp, label %c, label %exit
+
+c:
+  call i32 @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @many_unanalyzable_branches() {
+; Ensure that we don't crash as we're building up many unanalyzable branches,
+; blocks, and loops.
+;
+; CHECK: many_unanalyzable_branches
+; CHECK: %entry
+; CHECK: %exit
+
+entry:
+  br label %0
+
+  %val0 = volatile load float* undef
+  %cmp0 = fcmp une float %val0, undef
+  br i1 %cmp0, label %1, label %0
+  %val1 = volatile load float* undef
+  %cmp1 = fcmp une float %val1, undef
+  br i1 %cmp1, label %2, label %1
+  %val2 = volatile load float* undef
+  %cmp2 = fcmp une float %val2, undef
+  br i1 %cmp2, label %3, label %2
+  %val3 = volatile load float* undef
+  %cmp3 = fcmp une float %val3, undef
+  br i1 %cmp3, label %4, label %3
+  %val4 = volatile load float* undef
+  %cmp4 = fcmp une float %val4, undef
+  br i1 %cmp4, label %5, label %4
+  %val5 = volatile load float* undef
+  %cmp5 = fcmp une float %val5, undef
+  br i1 %cmp5, label %6, label %5
+  %val6 = volatile load float* undef
+  %cmp6 = fcmp une float %val6, undef
+  br i1 %cmp6, label %7, label %6
+  %val7 = volatile load float* undef
+  %cmp7 = fcmp une float %val7, undef
+  br i1 %cmp7, label %8, label %7
+  %val8 = volatile load float* undef
+  %cmp8 = fcmp une float %val8, undef
+  br i1 %cmp8, label %9, label %8
+  %val9 = volatile load float* undef
+  %cmp9 = fcmp une float %val9, undef
+  br i1 %cmp9, label %10, label %9
+  %val10 = volatile load float* undef
+  %cmp10 = fcmp une float %val10, undef
+  br i1 %cmp10, label %11, label %10
+  %val11 = volatile load float* undef
+  %cmp11 = fcmp une float %val11, undef
+  br i1 %cmp11, label %12, label %11
+  %val12 = volatile load float* undef
+  %cmp12 = fcmp une float %val12, undef
+  br i1 %cmp12, label %13, label %12
+  %val13 = volatile load float* undef
+  %cmp13 = fcmp une float %val13, undef
+  br i1 %cmp13, label %14, label %13
+  %val14 = volatile load float* undef
+  %cmp14 = fcmp une float %val14, undef
+  br i1 %cmp14, label %15, label %14
+  %val15 = volatile load float* undef
+  %cmp15 = fcmp une float %val15, undef
+  br i1 %cmp15, label %16, label %15
+  %val16 = volatile load float* undef
+  %cmp16 = fcmp une float %val16, undef
+  br i1 %cmp16, label %17, label %16
+  %val17 = volatile load float* undef
+  %cmp17 = fcmp une float %val17, undef
+  br i1 %cmp17, label %18, label %17
+  %val18 = volatile load float* undef
+  %cmp18 = fcmp une float %val18, undef
+  br i1 %cmp18, label %19, label %18
+  %val19 = volatile load float* undef
+  %cmp19 = fcmp une float %val19, undef
+  br i1 %cmp19, label %20, label %19
+  %val20 = volatile load float* undef
+  %cmp20 = fcmp une float %val20, undef
+  br i1 %cmp20, label %21, label %20
+  %val21 = volatile load float* undef
+  %cmp21 = fcmp une float %val21, undef
+  br i1 %cmp21, label %22, label %21
+  %val22 = volatile load float* undef
+  %cmp22 = fcmp une float %val22, undef
+  br i1 %cmp22, label %23, label %22
+  %val23 = volatile load float* undef
+  %cmp23 = fcmp une float %val23, undef
+  br i1 %cmp23, label %24, label %23
+  %val24 = volatile load float* undef
+  %cmp24 = fcmp une float %val24, undef
+  br i1 %cmp24, label %25, label %24
+  %val25 = volatile load float* undef
+  %cmp25 = fcmp une float %val25, undef
+  br i1 %cmp25, label %26, label %25
+  %val26 = volatile load float* undef
+  %cmp26 = fcmp une float %val26, undef
+  br i1 %cmp26, label %27, label %26
+  %val27 = volatile load float* undef
+  %cmp27 = fcmp une float %val27, undef
+  br i1 %cmp27, label %28, label %27
+  %val28 = volatile load float* undef
+  %cmp28 = fcmp une float %val28, undef
+  br i1 %cmp28, label %29, label %28
+  %val29 = volatile load float* undef
+  %cmp29 = fcmp une float %val29, undef
+  br i1 %cmp29, label %30, label %29
+  %val30 = volatile load float* undef
+  %cmp30 = fcmp une float %val30, undef
+  br i1 %cmp30, label %31, label %30
+  %val31 = volatile load float* undef
+  %cmp31 = fcmp une float %val31, undef
+  br i1 %cmp31, label %32, label %31
+  %val32 = volatile load float* undef
+  %cmp32 = fcmp une float %val32, undef
+  br i1 %cmp32, label %33, label %32
+  %val33 = volatile load float* undef
+  %cmp33 = fcmp une float %val33, undef
+  br i1 %cmp33, label %34, label %33
+  %val34 = volatile load float* undef
+  %cmp34 = fcmp une float %val34, undef
+  br i1 %cmp34, label %35, label %34
+  %val35 = volatile load float* undef
+  %cmp35 = fcmp une float %val35, undef
+  br i1 %cmp35, label %36, label %35
+  %val36 = volatile load float* undef
+  %cmp36 = fcmp une float %val36, undef
+  br i1 %cmp36, label %37, label %36
+  %val37 = volatile load float* undef
+  %cmp37 = fcmp une float %val37, undef
+  br i1 %cmp37, label %38, label %37
+  %val38 = volatile load float* undef
+  %cmp38 = fcmp une float %val38, undef
+  br i1 %cmp38, label %39, label %38
+  %val39 = volatile load float* undef
+  %cmp39 = fcmp une float %val39, undef
+  br i1 %cmp39, label %40, label %39
+  %val40 = volatile load float* undef
+  %cmp40 = fcmp une float %val40, undef
+  br i1 %cmp40, label %41, label %40
+  %val41 = volatile load float* undef
+  %cmp41 = fcmp une float %val41, undef
+  br i1 %cmp41, label %42, label %41
+  %val42 = volatile load float* undef
+  %cmp42 = fcmp une float %val42, undef
+  br i1 %cmp42, label %43, label %42
+  %val43 = volatile load float* undef
+  %cmp43 = fcmp une float %val43, undef
+  br i1 %cmp43, label %44, label %43
+  %val44 = volatile load float* undef
+  %cmp44 = fcmp une float %val44, undef
+  br i1 %cmp44, label %45, label %44
+  %val45 = volatile load float* undef
+  %cmp45 = fcmp une float %val45, undef
+  br i1 %cmp45, label %46, label %45
+  %val46 = volatile load float* undef
+  %cmp46 = fcmp une float %val46, undef
+  br i1 %cmp46, label %47, label %46
+  %val47 = volatile load float* undef
+  %cmp47 = fcmp une float %val47, undef
+  br i1 %cmp47, label %48, label %47
+  %val48 = volatile load float* undef
+  %cmp48 = fcmp une float %val48, undef
+  br i1 %cmp48, label %49, label %48
+  %val49 = volatile load float* undef
+  %cmp49 = fcmp une float %val49, undef
+  br i1 %cmp49, label %50, label %49
+  %val50 = volatile load float* undef
+  %cmp50 = fcmp une float %val50, undef
+  br i1 %cmp50, label %51, label %50
+  %val51 = volatile load float* undef
+  %cmp51 = fcmp une float %val51, undef
+  br i1 %cmp51, label %52, label %51
+  %val52 = volatile load float* undef
+  %cmp52 = fcmp une float %val52, undef
+  br i1 %cmp52, label %53, label %52
+  %val53 = volatile load float* undef
+  %cmp53 = fcmp une float %val53, undef
+  br i1 %cmp53, label %54, label %53
+  %val54 = volatile load float* undef
+  %cmp54 = fcmp une float %val54, undef
+  br i1 %cmp54, label %55, label %54
+  %val55 = volatile load float* undef
+  %cmp55 = fcmp une float %val55, undef
+  br i1 %cmp55, label %56, label %55
+  %val56 = volatile load float* undef
+  %cmp56 = fcmp une float %val56, undef
+  br i1 %cmp56, label %57, label %56
+  %val57 = volatile load float* undef
+  %cmp57 = fcmp une float %val57, undef
+  br i1 %cmp57, label %58, label %57
+  %val58 = volatile load float* undef
+  %cmp58 = fcmp une float %val58, undef
+  br i1 %cmp58, label %59, label %58
+  %val59 = volatile load float* undef
+  %cmp59 = fcmp une float %val59, undef
+  br i1 %cmp59, label %60, label %59
+  %val60 = volatile load float* undef
+  %cmp60 = fcmp une float %val60, undef
+  br i1 %cmp60, label %61, label %60
+  %val61 = volatile load float* undef
+  %cmp61 = fcmp une float %val61, undef
+  br i1 %cmp61, label %62, label %61
+  %val62 = volatile load float* undef
+  %cmp62 = fcmp une float %val62, undef
+  br i1 %cmp62, label %63, label %62
+  %val63 = volatile load float* undef
+  %cmp63 = fcmp une float %val63, undef
+  br i1 %cmp63, label %64, label %63
+  %val64 = volatile load float* undef
+  %cmp64 = fcmp une float %val64, undef
+  br i1 %cmp64, label %65, label %64
+
+  br label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/X86/btq.ll b/test/CodeGen/X86/btq.ll
new file mode 100644
index 0000000..9c137a7
--- /dev/null
+++ b/test/CodeGen/X86/btq.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @bar()
+
+define void @test1(i64 %foo) nounwind {
+  %and = and i64 %foo, 4294967296
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test1:
+; CHECK: btq $32
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test2(i64 %foo) nounwind {
+  %and = and i64 %foo, 2147483648
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test2:
+; CHECK: testl $-2147483648
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
new file mode 100644
index 0000000..788910c
--- /dev/null
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -0,0 +1,37 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; Radar 10464995
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@s = common global [4294967296 x i8] zeroinitializer, align 16
+;CHECK: .long	4294967295
+
+define void @bar() nounwind uwtable ssp {
+entry:
+  store i8 97, i8* getelementptr inbounds ([4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
+  ret void, !dbg !20
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720929, i64 0, i64 4294967295} ; [ DW_TAG_subrange_type ]
+!18 = metadata !{i32 5, i32 3, metadata !19, null}
+!19 = metadata !{i32 720907, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/dec-eflags-lower.ll b/test/CodeGen/X86/dec-eflags-lower.ll
new file mode 100644
index 0000000..458160a
--- /dev/null
+++ b/test/CodeGen/X86/dec-eflags-lower.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+%struct.obj = type { i64 }
+
+define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
+entry:
+; CHECK: decq	(%{{rdi|rcx}})
+; CHECK-NEXT: je
+  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
+  %0 = load i64* %refcnt, align 8, !tbaa !0
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+  %1 = bitcast %struct.obj* %o to i8*
+  tail call void @free(i8* %1)
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret void
+}
+
+declare void @free(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 91d1f5d..f0375f8 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -82,9 +82,8 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
   ret i64 %v11
 ; X64: test5:
 ; X64: movslq	%e[[A1]], %rax
-; X64-NEXT: movq	(%r[[A0]],%rax), %rax
-; X64-NEXT: addq	%{{rdx|r8}}, %rax
-; X64-NEXT: ret
+; X64-NEXT: (%r[[A0]],%rax),
+; X64: ret
 }
 
 ; PR9500, rdar://9156159 - Don't do non-local address mode folding,
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index 6a5a102..377fd11 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -82,7 +82,7 @@ entry:
   ret i64 %mul
 
 ; CHECK: test6:
-; CHECK: leaq	(,%rdi,8), %rax
+; CHECK: shlq	$3, %rdi
 }
 
 define i32 @test7(i32 %x) nounwind ssp {
@@ -90,7 +90,7 @@ entry:
   %mul = mul nsw i32 %x, 8
   ret i32 %mul
 ; CHECK: test7:
-; CHECK: leal	(,%rdi,8), %eax
+; CHECK: shll	$3, %edi
 }
 
 
diff --git a/test/CodeGen/X86/phaddsub.ll b/test/CodeGen/X86/phaddsub.ll
new file mode 100644
index 0000000..62d85f7
--- /dev/null
+++ b/test/CodeGen/X86/phaddsub.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -march=x86-64 -mattr=+ssse3,-avx | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mattr=-ssse3,+avx | FileCheck %s -check-prefix=AVX
+
+; SSSE3: phaddw1:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw1:
+; AVX: vphaddw
+define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddw2:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw2:
+; AVX: vphaddw
+define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14>
+  %b = shufflevector <8 x i16> %y, <8 x i16> %x, <8 x i32> <i32 8, i32 11, i32 12, i32 15, i32 0, i32 3, i32 4, i32 7>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddd1:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd1:
+; AVX: vphaddd
+define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd2:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd2:
+; AVX: vphaddd
+define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+  %b = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd3:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd3:
+; AVX: vphaddd
+define <4 x i32> @phaddd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd4:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd4:
+; AVX: vphaddd
+define <4 x i32> @phaddd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd5:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd5:
+; AVX: vphaddd
+define <4 x i32> @phaddd5(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd6:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd6:
+; AVX: vphaddd
+define <4 x i32> @phaddd6(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd7:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd7:
+; AVX: vphaddd
+define <4 x i32> @phaddd7(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubw1:
+; SSSE3-NOT: vphsubw
+; SSSE3: phsubw
+; AVX: phsubw1:
+; AVX: vphsubw
+define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = sub <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phsubd1:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd1:
+; AVX: vphsubd
+define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd2:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd2:
+; AVX: vphsubd
+define <4 x i32> @phsubd2(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd3:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd3:
+; AVX: vphsubd
+define <4 x i32> @phsubd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd4:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd4:
+; AVX: vphsubd
+define <4 x i32> @phsubd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
diff --git a/test/CodeGen/X86/pr11202.ll b/test/CodeGen/X86/pr11202.ll
new file mode 100644
index 0000000..2b26a69
--- /dev/null
+++ b/test/CodeGen/X86/pr11202.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+@bb = constant [1 x i8*] [i8* blockaddress(@main, %l2)]
+
+define void @main() {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l2, %entry
+  %a = zext i1 false to i32
+  br label %l2
+
+l2:                                               ; preds = %l1
+  %b = zext i1 false to i32
+  br label %l1
+}
+
+; CHECK: .Ltmp1:                                 # Address of block that was removed by CodeGen
+; CHECK: .quad	.Ltmp1
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
new file mode 100644
index 0000000..e1fa032
--- /dev/null
+++ b/test/CodeGen/X86/pr11415.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast | FileCheck %s
+
+; We used to consider the early clobber in the second asm statement as
+; defining %0 before it was read. This caused us to omit the
+; movq	-8(%rsp), %rdx
+
+; CHECK: 	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rcx, %rax
+; CHECK-NEXT:	movq	%rax, -8(%rsp)
+; CHECK-NEXT:	movq	-8(%rsp), %rdx
+; CHECK-NEXT:	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rdx, %rax
+; CHECK-NEXT:	movq	%rdx, -8(%rsp)
+; CHECK-NEXT:	ret
+
+define i64 @foo() {
+entry:
+  %0 = tail call i64 asm "", "={cx}"() nounwind
+  %1 = tail call i64 asm "", "=&r,0,r,~{rax}"(i64 %0, i64 %0) nounwind
+  ret i64 %1
+}
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
index 2b13029..81a072f 100644
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
 ; rdar://7434544
 
-define <2 x i64> @t2() nounwind ssp {
+define <2 x i64> @t2() nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: pshufd	$85, (%esp), %xmm0
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index f487654..42d7f27 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep extractps   %t
 ; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
-; RUN: grep movss   %t | count 2
+; RUN: not grep movss   %t
 
 define void @t1(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
index 64508b5..55531e3 100644
--- a/test/CodeGen/X86/vec_shuffle-39.ll
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -67,3 +67,20 @@ bb:
   %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1>
   ret <2 x double> %tmp7
 }
+
+; rdar://10450317
+define <2 x i64> @t4() nounwind readonly {
+bb:
+; CHECK: t4:
+; CHECK: punpcklqdq %xmm0, %xmm1
+; CHECK: movq (%rax), %xmm0
+; CHECK: movsd %xmm1, %xmm0
+  %tmp0 = load i128* null, align 1
+  %tmp1 = load <2 x i32>* undef, align 8
+  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
+  %tmp3 = bitcast <2 x i32> %tmp1 to i64
+  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
+  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64>
+  %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %tmp6
+}
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 2df3b6a..24608d0 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -54,3 +54,11 @@ define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
 }
+
+; PR11389: another CONCAT_VECTORS case
+define void @shuf5(<8 x i8>* %p) nounwind {
+; CHECK: shuf5:
+  %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  store <8 x i8> %v, <8 x i8>* %p, align 8
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/bug_11395.ll b/test/Instrumentation/AddressSanitizer/bug_11395.ll
new file mode 100644
index 0000000..c53c385
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/bug_11395.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -asan -S | llc -o /dev/null
+; The bug manifests as a reg alloc failure:
+; error: ran out of registers during register allocation
+; ModuleID = 'z.o'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, i32 (i8*, i8*, i32, i32)*, void (i8*, i8*, i32, i32*, i32*, i32*)*, void (i8*, i8*, i8*, i32, i32)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32*)*, void (i8*, i32, i32*)*, void (i8*, i8*, i32, i16*, i16*)*, void (float*, float*, i32)*, void ([256 x float]*, [2 x float]*, i32, i32, i32)*, void (i32*, i32, i32, double*)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32)*, void (float*, float*, float*, float*, float, i32)*, void (float*, i32*, float, i32)*, void (float*, float*, float, float, i32)*, void (float*, float*, float, i32)*, [2 x void (float*, float*, float**, float, i32)*], [2 x void (float*, float**, float, i32)*], float (float*, float*, i32)*, void (float*, float*, i32)*, void (i16*, float*, i32)*, void (i16*, float**, i32, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i8**, i32*, i16*, i32, i8*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i16*, i16*, i16*, i16*, i16*, i16*, i32)*, void (i16*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer_s*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i32*, i32*, i32, i32, i32, i32, i32, i32*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32)*, void (i8*, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, [16 x void (i8*, i8*, i32, i32)*], [16 x void (i8*, i8*, i32, i32)*], [12 x void (i8*, i8*, i32)*], void (i8*, i8*, i32, i32*, i32*, i32)*, void (i16*, i16*, i32)*, void (i16*, i16*, i32)*, i32 (i16*, i16*, i32, i32)*, [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*] }
+%struct.slice_buffer_s = type opaque
+%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8*, i32*, i32, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, float, i64, i32, i64, i64, float, float, %struct.AVHWAccel*, i32, i8*, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*, i32, i32)*, i8*, i32*, i32)*, i32, i32, i32, i32, i32, i32, i8*, float, float, float, float, i32, i32, i32, float, float, float, i32, i32, i32, i32, [4 x i32], i8*, i32, i32, i32, i32 }
+%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+%struct.AVOption = type opaque
+%struct.AVRational = type { i32, i32 }
+%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], i64, i8* }
+%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, %struct.AVPacket*)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32*, i8*, i32*, i32*, i64* }
+%struct.AVPacket = type { i64, i64, i8*, i32, i32, i32, i32, void (%struct.AVPacket*)*, i8*, i64, i64 }
+%struct.RcOverride = type { i32, i32, i32, float }
+%struct.AVPaletteControl = type { i32, [256 x i32] }
+%struct.AVHWAccel = type { i8*, i32, i32, i32, i32, %struct.AVHWAccel*, i32 (%struct.AVCodecContext*, i8*, i32)*, i32 (%struct.AVCodecContext*, i8*, i32)*, i32 (%struct.AVCodecContext*)*, i32 }
+
+@firtable = internal unnamed_addr constant [9 x i8*] [i8* @ff_mlp_firorder_0, i8* @ff_mlp_firorder_1, i8* @ff_mlp_firorder_2, i8* @ff_mlp_firorder_3, i8* @ff_mlp_firorder_4, i8* @ff_mlp_firorder_5, i8* @ff_mlp_firorder_6, i8* @ff_mlp_firorder_7, i8* @ff_mlp_firorder_8], align 4
+@iirtable = internal unnamed_addr constant [5 x i8*] [i8* @ff_mlp_iirorder_0, i8* @ff_mlp_iirorder_1, i8* @ff_mlp_iirorder_2, i8* @ff_mlp_iirorder_3, i8* @ff_mlp_iirorder_4], align 4
+@ff_mlp_iirorder_0 = external global i8
+@ff_mlp_iirorder_1 = external global i8
+@ff_mlp_iirorder_2 = external global i8
+@ff_mlp_iirorder_3 = external global i8
+@ff_mlp_iirorder_4 = external global i8
+@ff_mlp_firorder_0 = external global i8
+@ff_mlp_firorder_1 = external global i8
+@ff_mlp_firorder_2 = external global i8
+@ff_mlp_firorder_3 = external global i8
+@ff_mlp_firorder_4 = external global i8
+@ff_mlp_firorder_5 = external global i8
+@ff_mlp_firorder_6 = external global i8
+@ff_mlp_firorder_7 = external global i8
+@ff_mlp_firorder_8 = external global i8
+
+define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind {
+entry:
+  %mlp_filter_channel = getelementptr inbounds %struct.DSPContext* %c, i32 0, i32 131
+  store void (i32*, i32*, i32, i32, i32, i32, i32, i32*)* @mlp_filter_channel_x86, void (i32*, i32*, i32, i32, i32, i32, i32, i32*)** %mlp_filter_channel, align 4, !tbaa !0
+  ret void
+}
+
+define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind {
+entry:
+  %filter_shift.addr = alloca i32, align 4
+  %mask.addr = alloca i32, align 4
+  %blocksize.addr = alloca i32, align 4
+  %firjump = alloca i8*, align 4
+  %iirjump = alloca i8*, align 4
+  store i32 %filter_shift, i32* %filter_shift.addr, align 4, !tbaa !3
+  store i32 %mask, i32* %mask.addr, align 4, !tbaa !3
+  %arrayidx = getelementptr inbounds [9 x i8*]* @firtable, i32 0, i32 %firorder
+  %0 = load i8** %arrayidx, align 4, !tbaa !0
+  store i8* %0, i8** %firjump, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds [5 x i8*]* @iirtable, i32 0, i32 %iirorder
+  %1 = load i8** %arrayidx1, align 4, !tbaa !0
+  store i8* %1, i8** %iirjump, align 4, !tbaa !0
+  %sub = sub nsw i32 0, %blocksize
+  store i32 %sub, i32* %blocksize.addr, align 4, !tbaa !3
+  %2 = call { i32*, i32*, i32* } asm sideeffect "1:                           \0A\09xor           %esi, %esi\0A\09xor           %ecx, %ecx\0A\09jmp  *$5                     \0A\09ff_mlp_firorder_8:            \0A\09mov   0x1c+0($0), %eax\0A\09imull 0x1c+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_7:            \0A\09mov   0x18+0($0), %eax\0A\09imull 0x18+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_6:            \0A\09mov   0x14+0($0), %eax\0A\09imull 0x14+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_5:            \0A\09mov   0x10+0($0), %eax\0A\09imull 0x10+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_4:            \0A\09mov   0x0c+0($0), %eax\0A\09imull 0x0c+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_3:            \0A\09mov   0x08+0($0), %eax\0A\09imull 0x08+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_2:            \0A\09mov   0x04+0($0), %eax\0A\09imull 0x04+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_1:            \0A\09mov   0x00+0($0), %eax\0A\09imull 0x00+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_0:\0A\09jmp  *$6                     \0A\09ff_mlp_iirorder_4:            \0A\09mov   0x0c+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x0c+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_3:            \0A\09mov   0x08+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x08+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_2:            \0A\09mov   0x04+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x04+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_1:            \0A\09mov   0x00+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x00+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_0:\0A\09mov           %ecx, %edx\0A\09mov           %esi, %eax\0A\09movzbl        $7   , %ecx\0A\09shrd    %cl, %edx, %eax\0A\09mov  %eax  ,%edx      \0A\09add  ($2)      ,%eax     \0A\09and   $4       ,%eax     \0A\09sub   $$4       ,  $0         \0A\09mov  %eax, ($0)        \0A\09mov  %eax, ($2)        \0A\09add $$4* 8    ,  $2         \0A\09sub  %edx   ,%eax     \0A\09mov  %eax,4*(8 + (40 * 4))($0)  \0A\09incl              $3         \0A\09js 1b                        \0A\09", "=r,=r,=r,=*m,*m,*m,*m,*m,0,1,2,*m,~{eax},~{edx},~{esi},~{ecx},~{dirflag},~{fpsr},~{flags}"(i32* %blocksize.addr, i32* %mask.addr, i8** %firjump, i8** %iirjump, i32* %filter_shift.addr, i32* %state, i32* %coeff, i32* %sample_buffer, i32* %blocksize.addr) nounwind, !srcloc !4
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{i32 156132, i32 156164, i32 156205, i32 156238, i32 156282, i32 156332, i32 156370, i32 156408, i32 156447, i32 156486, i32 156536, i32 156574, i32 156612, i32 156651, i32 156690, i32 156740, i32 156778, i32 156816, i32 156855, i32 156894, i32 156944, i32 156982, i32 157020, i32 157059, i32 157098, i32 157148, i32 157186, i32 157224, i32 157263, i32 157302, i32 157352, i32 157390, i32 157428, i32 157467, i32 157506, i32 157556, i32 157594, i32 157632, i32 157671, i32 157710, i32 157760, i32 157798, i32 157836, i32 157875, i32 157914, i32 157952, i32 157996, i32 158046, i32 158099, i32 158140, i32 158179, i32 158218, i32 158268, i32 158321, i32 158362, i32 158401, i32 158440, i32 158490, i32 158543, i32 158584, i32 158623, i32 158662, i32 158712, i32 158765, i32 158806, i32 158845, i32 158884, i32 158922, i32 158963, i32 158996, i32 159029, i32 159062, i32 159109, i32 159154, i32 159199, i32 159243, i32 159286, i32 159329, i32 159375, i32 159422, i32 159478, i32 159522, i32 159566}
diff --git a/test/Instrumentation/AddressSanitizer/dg.exp b/test/Instrumentation/AddressSanitizer/dg.exp
new file mode 100644
index 0000000..f200589
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
new file mode 100644
index 0000000..1687877
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; no action should be taken for these globals
+@v1 = linkonce_odr constant i8 1
+; CHECK-NOT: __asan_register_globals
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
new file mode 100644
index 0000000..89644d4
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; no action should be taken for thread locals
+@xxx = thread_local global i32 0, align 4
+; CHECK-NOT: __asan_register_globals
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
new file mode 100644
index 0000000..e26fb3d
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @read_4_bytes(i32* %a) {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+; CHECK: @read_4_bytes
+; CHECK-NOT: ret
+; CHECK: lshr {{.*}} 3
+; Check for ASAN's Offset for 64-bit (2^44)
+; CHECK-NEXT: 17592186044416
+; CHECK: ret
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 22ad3cd..133967b 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -133,9 +133,9 @@ Lforward:
         adr	r2, #-3
 
 @ CHECK: Lback:
-@ CHECK: adr	r2, Lback    @ encoding: [0bAAAAAAA0,0x20'A',0x0f'A',0b1110001A]
+@ CHECK: adr	r2, Lback    @ encoding: [A,0x20'A',0x0f'A',0xe2'A']
 @ CHECK:  @   fixup A - offset: 0, value: Lback, kind: fixup_arm_adr_pcrel_12
-@ CHECK: adr	r3, Lforward @ encoding: [0bAAAAAAA0,0x30'A',0x0f'A',0b1110001A]
+@ CHECK: adr	r3, Lforward @ encoding: [A,0x30'A',0x0f'A',0xe2'A']
 @ CHECK:  @   fixup A - offset: 0, value: Lforward, kind: fixup_arm_adr_pcrel_12
 @ CHECK: Lforward:
 @ CHECK: adr	r2, #3                  @ encoding: [0x03,0x20,0x8f,0xe2]
@@ -262,10 +262,12 @@ Lforward:
 	asr r2, r4, #32
 	asr r2, r4, #2
 	asr r2, r4, #0
+	asr r4, #2
 
 @ CHECK: asr	r2, r4, #32             @ encoding: [0x44,0x20,0xa0,0xe1]
 @ CHECK: asr	r2, r4, #2              @ encoding: [0x44,0x21,0xa0,0xe1]
 @ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: asr	r4, r4, #2              @ encoding: [0x44,0x41,0xa0,0xe1]
 
 
 @------------------------------------------------------------------------------
@@ -794,10 +796,12 @@ Lforward:
 	lsl r2, r4, #31
 	lsl r2, r4, #1
 	lsl r2, r4, #0
+	lsl r4, #1
 
 @ CHECK: lsl	r2, r4, #31             @ encoding: [0x84,0x2f,0xa0,0xe1]
 @ CHECK: lsl	r2, r4, #1              @ encoding: [0x84,0x20,0xa0,0xe1]
 @ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: lsl	r4, r4, #1              @ encoding: [0x84,0x40,0xa0,0xe1]
 
 
 @------------------------------------------------------------------------------
@@ -806,10 +810,12 @@ Lforward:
 	lsr r2, r4, #32
 	lsr r2, r4, #2
 	lsr r2, r4, #0
+	lsr r4, #2
 
 @ CHECK: lsr	r2, r4, #32             @ encoding: [0x24,0x20,0xa0,0xe1]
 @ CHECK: lsr	r2, r4, #2              @ encoding: [0x24,0x21,0xa0,0xe1]
 @ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: lsr	r4, r4, #2              @ encoding: [0x24,0x41,0xa0,0xe1]
 
 
 @------------------------------------------------------------------------------
@@ -1001,11 +1007,13 @@ Lforward:
         muls r5, r6, r7
         mulgt r5, r6, r7
         mulsle r5, r6, r7
+        mul r11, r5
 
 @ CHECK: mul	r5, r6, r7              @ encoding: [0x96,0x07,0x05,0xe0]
 @ CHECK: muls	r5, r6, r7              @ encoding: [0x96,0x07,0x15,0xe0]
 @ CHECK: mulgt	r5, r6, r7              @ encoding: [0x96,0x07,0x05,0xc0]
 @ CHECK: mulsle	r5, r6, r7              @ encoding: [0x96,0x07,0x15,0xd0]
+@ CHECK: mul	r11, r11, r5            @ encoding: [0x9b,0x05,0x0b,0xe0]
 
 
 @------------------------------------------------------------------------------
@@ -1344,10 +1352,12 @@ Lforward:
 	ror r2, r4, #31
 	ror r2, r4, #1
 	ror r2, r4, #0
+	ror r4, #1
 
 @ CHECK: ror	r2, r4, #31             @ encoding: [0xe4,0x2f,0xa0,0xe1]
 @ CHECK: ror	r2, r4, #1              @ encoding: [0xe4,0x20,0xa0,0xe1]
 @ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: ror	r4, r4, #1              @ encoding: [0xe4,0x40,0xa0,0xe1]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 74b0681..0dbde19 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -1228,12 +1228,18 @@ _func:
         mul r3, r4, r6
         it eq
         muleq r3, r4, r5
+        it le
+        mulle r4, r4, r8
+        mul r6, r5
 
 @ CHECK: muls	r3, r4, r3              @ encoding: [0x63,0x43]
 @ CHECK: mul	r3, r4, r3              @ encoding: [0x04,0xfb,0x03,0xf3]
 @ CHECK: mul	r3, r4, r6              @ encoding: [0x04,0xfb,0x06,0xf3]
 @ CHECK: it	eq                      @ encoding: [0x08,0xbf]
 @ CHECK: muleq	r3, r4, r5              @ encoding: [0x04,0xfb,0x05,0xf3]
+@ CHECK: it	le                      @ encoding: [0xd8,0xbf]
+@ CHECK: mulle	r4, r4, r8              @ encoding: [0x04,0xfb,0x08,0xf4]
+@ CHECK: mul	r6, r6, r5              @ encoding: [0x06,0xfb,0x05,0xf6]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/neon-shuffle-encoding.s b/test/MC/ARM/neon-shuffle-encoding.s
index ce7eb66..ed209f7 100644
--- a/test/MC/ARM/neon-shuffle-encoding.s
+++ b/test/MC/ARM/neon-shuffle-encoding.s
@@ -44,3 +44,63 @@
 	vzip.16	q9, q8
 @ CHECK: vzip.32	q9, q8                  @ encoding: [0xe0,0x21,0xfa,0xf3]
 	vzip.32	q9, q8
+
+
+@ VTRN alternate size suffices
+
+        vtrn.8 d3, d9
+        vtrn.i8 d3, d9
+        vtrn.u8 d3, d9
+        vtrn.p8 d3, d9
+        vtrn.16 d3, d9
+        vtrn.i16 d3, d9
+        vtrn.u16 d3, d9
+        vtrn.p16 d3, d9
+        vtrn.32 d3, d9
+        vtrn.i32 d3, d9
+        vtrn.u32 d3, d9
+        vtrn.f32 d3, d9
+        vtrn.f d3, d9
+
+        vtrn.8 q14, q6
+        vtrn.i8 q14, q6
+        vtrn.u8 q14, q6
+        vtrn.p8 q14, q6
+        vtrn.16 q14, q6
+        vtrn.i16 q14, q6
+        vtrn.u16 q14, q6
+        vtrn.p16 q14, q6
+        vtrn.32 q14, q6
+        vtrn.i32 q14, q6
+        vtrn.u32 q14, q6
+        vtrn.f32 q14, q6
+        vtrn.f q14, q6
+
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 03a3cea..503b1ec 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -251,3 +251,11 @@
 @ CHECK: vld1.8	{d2, d3, d4}, [r2]      @ encoding: [0x0f,0x26,0x22,0xf4]
 @ CHECK: vld1.32 {d2, d3, d4}, [r2]     @ encoding: [0x8f,0x26,0x22,0xf4]
 @ CHECK: vld1.64 {d2, d3, d4}, [r2]     @ encoding: [0xcf,0x26,0x22,0xf4]
+
+
+@ Register lists can use the range syntax, just like VLDM
+	vld1.f64 {d2-d5}, [r2,:128]!
+	vld1.f64 {d2,d3,d4,d5}, [r2,:128]!
+
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
diff --git a/test/MC/ARM/nop-armv6t2-padding.s b/test/MC/ARM/nop-armv6t2-padding.s
index 0e25718..c38ad2d 100644
--- a/test/MC/ARM/nop-armv6t2-padding.s
+++ b/test/MC/ARM/nop-armv6t2-padding.s
@@ -7,4 +7,4 @@ x:
       .align 4
       add r0, r1, r2
 
-@ CHECK: ('_section_data', '020081e0 007820e3 007820e3 007820e3 020081e0')
+@ CHECK: ('_section_data', '020081e0 00f020e3 00f020e3 00f020e3 020081e0')
diff --git a/test/MC/ARM/prefetch.ll b/test/MC/ARM/prefetch.ll
deleted file mode 100644
index e77fdb1..0000000
--- a/test/MC/ARM/prefetch.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   -mattr=+v7,+mp -show-mc-encoding | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+v7     -show-mc-encoding | FileCheck %s -check-prefix=T2
-; rdar://8924681
-
-define void @t1(i8* %ptr) nounwind  {
-entry:
-; ARM: t1:
-; ARM: pldw [r0]                        @ encoding: [0x00,0xf0,0x90,0xf5]
-; ARM: pld [r0]                         @ encoding: [0x00,0xf0,0xd0,0xf5]
-
-; T2: t1:
-; T2: pld [r0]                      @ encoding: [0x90,0xf8,0x00,0xf0]
-  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
-  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
-  ret void
-}
-
-define void @t2(i8* %ptr) nounwind  {
-entry:
-; ARM: t2:
-; ARM: pld [r0, #1023]                  @ encoding: [0xff,0xf3,0xd0,0xf5]
-
-; T2: t2:
-; T2: pld [r0, #1023]               @ encoding: [0x90,0xf8,0xff,0xf3]
-  %tmp = getelementptr i8* %ptr, i32 1023
-  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
-  ret void
-}
-
-define void @t3(i32 %base, i32 %offset) nounwind  {
-entry:
-; ARM: t3:
-; ARM: pld [r0, r1, lsr #2]             @ encoding: [0x21,0xf1,0xd0,0xf7]
-
-; T2: t3:
-; T2: pld [r0, r1]                  @ encoding: [0x10,0xf8,0x01,0xf0]
-  %tmp1 = lshr i32 %offset, 2
-  %tmp2 = add i32 %base, %tmp1
-  %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
-  ret void
-}
-
-define void @t4(i32 %base, i32 %offset) nounwind  {
-entry:
-; ARM: t4:
-; ARM: pld [r0, r1, lsl #2]             @ encoding: [0x01,0xf1,0xd0,0xf7]
-
-; T2: t4:
-; T2: pld [r0, r1, lsl #2]          @ encoding: [0x10,0xf8,0x21,0xf0]
-  %tmp1 = shl i32 %offset, 2
-  %tmp2 = add i32 %base, %tmp1
-  %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
-  ret void
-}
-
-declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
index 5c81c78..58abe88 100644
--- a/test/MC/ARM/simple-fp-encoding.s
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -21,9 +21,15 @@
 @ CHECK: vmul.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x61,0xee]
         vmul.f64        d16, d17, d16
 
+@ CHECK: vmul.f64	d20, d20, d17   @ encoding: [0xa1,0x4b,0x64,0xee]
+	vmul.f64  d20, d17
+
 @ CHECK: vmul.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x20,0xee]
         vmul.f32        s0, s1, s0
 
+@ CHECK: vmul.f32	s11, s11, s21   @ encoding: [0xaa,0x5a,0x65,0xee]
+	vmul.f32  s11, s21
+
 @ CHECK: vnmul.f64 d16, d17, d16     @ encoding: [0xe0,0x0b,0x61,0xee]
         vnmul.f64       d16, d17, d16
 
@@ -127,6 +133,16 @@
         vmovne  s0, r0
         vmoveq  s0, r1
 
+        vmov.f32 r1, s2
+        vmov.f32 s4, r3
+        vmov.f64 r1, r5, d2
+        vmov.f64 d4, r3, r9
+
+@ CHECK: vmov	r1, s2                  @ encoding: [0x10,0x1a,0x11,0xee]
+@ CHECK: vmov	s4, r3                  @ encoding: [0x10,0x3a,0x02,0xee]
+@ CHECK: vmov	r1, r5, d2              @ encoding: [0x12,0x1b,0x55,0xec]
+@ CHECK: vmov	d4, r3, r9              @ encoding: [0x14,0x3b,0x49,0xec]
+
 @ CHECK: vmrs r0, fpscr              @ encoding: [0x10,0x0a,0xf1,0xee]
         vmrs    r0, fpscr
 @ CHECK: vmrs  r0, fpexc             @ encoding: [0x10,0x0a,0xf8,0xee]
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index 73bcd37..e4346ec 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -1863,3 +1863,9 @@
 # CHECK: vld1.32	{d22, d23, d24, d25}, [pc, :64]!
 0x9d 0xaa 0x41 0xf4
 # CHECK: vst1.32	{d26, d27}, [r1, :64]!
+
+0x10 0x0f 0x83 0xf2
+0x50 0x0f 0x83 0xf2
+# CHECK: vmov.f32	d0, #1.600000e+01
+# CHECK: vmov.f32	q0, #1.600000e+01
+
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 6c836fc..4ec579a 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -459,6 +459,7 @@ cwtl  // CHECK: cwtl
 cbw   // CHECK: cbtw
 cwd   // CHECK: cwtd
 cdq   // CHECK: cltd
+cqo   // CHECK: cqto
 
 // rdar://8456378 and PR7557 - fstsw
 fstsw %ax
diff --git a/test/Makefile b/test/Makefile
index 62b0973..1bf2874 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -121,11 +121,6 @@ DSYMUTIL=dsymutil
 else
 DSYMUTIL=true
 endif
-ifdef TargetCommonOpts
-BUGPOINT_TOPTS="-gcc-tool-args $(TargetCommonOpts)"
-else
-BUGPOINT_TOPTS=""
-endif
 
 ifneq ($(OCAMLOPT),)
 CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
diff --git a/test/Transforms/ConstProp/bswap.ll b/test/Transforms/ConstProp/bswap.ll
index 9fce309..a68fdcd 100644
--- a/test/Transforms/ConstProp/bswap.ll
+++ b/test/Transforms/ConstProp/bswap.ll
@@ -1,6 +1,6 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: opt < %s -constprop -S | not grep call
+; RUN: opt < %s -constprop -S | FileCheck %s
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -8,18 +8,34 @@ declare i32 @llvm.bswap.i32(i32)
 
 declare i64 @llvm.bswap.i64(i64)
 
+declare i80 @llvm.bswap.i80(i80)
+
+; CHECK: define i16 @W
 define i16 @W() {
+        ; CHECK: ret i16 256
         %Z = call i16 @llvm.bswap.i16( i16 1 )          ; <i16> [#uses=1]
         ret i16 %Z
 }
 
+; CHECK: define i32 @X
 define i32 @X() {
+        ; CHECK: ret i32 16777216
         %Z = call i32 @llvm.bswap.i32( i32 1 )          ; <i32> [#uses=1]
         ret i32 %Z
 }
 
+; CHECK: define i64 @Y
 define i64 @Y() {
+        ; CHECK: ret i64 72057594037927936
         %Z = call i64 @llvm.bswap.i64( i64 1 )          ; <i64> [#uses=1]
         ret i64 %Z
 }
 
+; CHECK: define i80 @Z
+define i80 @Z() {
+        ; CHECK: ret i80 -450681596205739728166896
+        ;                0xA0908070605040302010
+        %Z = call i80 @llvm.bswap.i80( i80 76151636403560493650080 )
+        ;                                  0x102030405060708090A0
+        ret i80 %Z
+}
diff --git a/test/Transforms/DeadStoreElimination/pr11390.ll b/test/Transforms/DeadStoreElimination/pr11390.ll
new file mode 100644
index 0000000..2ce6eea
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -0,0 +1,38 @@
+; RUN: opt -basicaa -dse -S -o - %s | FileCheck %s
+; PR11390
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define fastcc void @cat_domain(i8* nocapture %name, i8* nocapture %domain, i8** 
+nocapture %s) nounwind uwtable {
+entry:
+  %call = tail call i64 @strlen(i8* %name) nounwind readonly
+  %call1 = tail call i64 @strlen(i8* %domain) nounwind readonly
+  %add = add i64 %call, 1
+  %add2 = add i64 %add, %call1
+  %add3 = add i64 %add2, 1
+  %call4 = tail call noalias i8* @malloc(i64 %add3) nounwind
+  store i8* %call4, i8** %s, align 8
+  %tobool = icmp eq i8* %call4, null
+  br i1 %tobool, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds i8* %call4, i64 %call
+  store i8 46, i8* %arrayidx, align 1
+; CHECK: store i8 46
+  %add.ptr5 = getelementptr inbounds i8* %call4, i64 %add
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i32 1, i1 false)
+  %arrayidx8 = getelementptr inbounds i8* %call4, i64 %add2
+  store i8 0, i8* %arrayidx8, align 1
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  ret void
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+declare noalias i8* @malloc(i64) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 47ccd85..9e08004 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -26,6 +26,15 @@ define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
   ret i8 %Y
 }
 
+;; No PR filed, crashed in CaptureTracker.
+declare void @helper()
+define void @crash1() {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i32 1, i1 false) nounwind
+  %tmp = load i8* bitcast (void ()* @helper to i8*)
+  %x = icmp eq i8 %tmp, 15
+  ret void
+}
+
 
 ;;===----------------------------------------------------------------------===;;
 ;; Store -> Load  and  Load -> Load forwarding where src and dst are different
diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
index 77354f7..a8b706e 100644
--- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -3,6 +3,9 @@
 ; add219 should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
 
+; FIXME: GetExtendedOperandRecurrence has problems with the nsw bit on add exprs
+; XFAIL: *
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define void @test() nounwind {
diff --git a/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
new file mode 100644
index 0000000..c74d04e
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+;
+; Prior to the fix for PR11375, indvars would replace %firstIV with a
+; loop-invariant gep computed in the preheader. This was incorrect
+; because it was based on the minimum "ExitNotTaken" count. If the
+; final loop test is skipped (odd number of elements) then the early
+; exit would be taken and the loop invariant value would be incorrect.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: if.end:
+; CHECK: phi i32* [ %first.lcssa, %early.exit ]
+define i32 @test(i32* %first, i32* %last) uwtable ssp {
+entry:
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %do.body
+
+do.body:                                          ; preds = %if.else, %if.then
+  %firstIV = phi i32* [ %incdec.ptr2, %if.else ], [ %first, %if.then ]
+  %incdec.ptr1 = getelementptr inbounds i32* %firstIV, i64 1
+  %cmp1 = icmp eq i32* %incdec.ptr1, %last
+  br i1 %cmp1, label %early.exit, label %if.else
+
+if.else:                                        ; preds = %do.body
+  %incdec.ptr2 = getelementptr inbounds i32* %firstIV, i64 2
+  %cmp2 = icmp eq i32* %incdec.ptr2, %last
+  br i1 %cmp2, label %if.end, label %do.body
+
+early.exit:
+  %first.lcssa = phi i32* [ %firstIV, %do.body ]
+  br label %if.end
+
+if.end:
+  %tmp = phi i32* [ %first.lcssa, %early.exit ], [ %first, %if.then ], [ %first, %entry ], [ undef, %if.else ]
+  %val = load i32* %tmp
+  ret i32 %val
+}
diff --git a/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
new file mode 100644
index 0000000..ccf2595
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; PR11350: Check that SimplifyIndvar handles a cycle of useless self-phis.
+
+; CHECK: @test
+; CHECK-NOT: lcssa = phi
+define void @test() nounwind {
+entry:
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %for.cond.preheader, %for.end
+  %p_41.addr.0.ph = phi i32 [ %p_41.addr.1.lcssa, %for.end ], [ 1, %for.cond.preheader ]
+  br label %for.cond
+
+for.cond:
+  br i1 true, label %for.end, label %for.ph
+
+for.ph:                                   ; preds = %for.cond4.preheader
+  br label %for.end
+
+for.end:
+  %p_41.addr.1.lcssa = phi i32 [ undef, %for.ph ], [ %p_41.addr.0.ph, %for.cond ]
+  %p_68.lobit.i = lshr i32 %p_41.addr.1.lcssa, 31
+  %cmp7 = icmp eq i32 %p_41.addr.1.lcssa, 0
+  %conv8 = zext i1 %cmp7 to i32
+  br label %for.cond.outer
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
index 269478a..c3619f6 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -1,8 +1,14 @@
 ; RUN: opt < %s -indvars -S \
 ; RUN:   | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
-
+;
 ; This loop has multiple exits, and the value of %b1 depends on which
 ; exit is taken. Indvars should correctly compute the exit values.
+;
+; XFAIL: *
+; Indvars does not currently replace loop invariant values unless all
+; loop exits have the same exit value. We could handle some cases,
+; such as this, by making getSCEVAtScope() sensitive to a particular
+; loop exit.  See PR11388.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-pc-linux-gnu"
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
index 8184a73..9f3bcaf 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate9.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
@@ -2,8 +2,13 @@
 ; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
 ; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
 ; PR4477
-
 ; Indvars should compute the exit values in loop.
+;
+; XFAIL: *
+; Indvars does not currently replace loop invariant values unless all
+; loop exits have the same exit value. We could handle some cases,
+; such as this, by making getSCEVAtScope() sensitive to a particular
+; loop exit.  See PR11388.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll b/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll
new file mode 100644
index 0000000..6166536
--- /dev/null
+++ b/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instsimplify
+
+; The mul can be proved to always overflow (turning a negative value
+; into a positive one) and thus results in undefined behaviour.  At
+; the same time we were deducing from the nsw flag that that mul could
+; be assumed to have a negative value (since if not it has an undefined
+; value, which can be taken to be negative).  We were reporting the mul
+; as being both positive and negative, firing an assertion!
+define i1 @test1(i32 %a) {
+entry:
+  %0 = or i32 %a, 1
+  %1 = shl i32 %0, 31
+  %2 = mul nsw i32 %1, 4
+  %3 = and i32 %2, -4
+  %4 = icmp ne i32 %3, 0
+  ret i1 %4
+}
diff --git a/test/Transforms/LoopUnroll/unloop.ll b/test/Transforms/LoopUnroll/unloop.ll
index 217c8ce..5a9cacd 100644
--- a/test/Transforms/LoopUnroll/unloop.ll
+++ b/test/Transforms/LoopUnroll/unloop.ll
@@ -427,3 +427,44 @@ if.end2413:                                       ; preds = %defchar
 return:                                           ; preds = %sw.bb304
   ret void
 }
+
+; PR11335: the most deeply nested block should be removed from the outer loop.
+; CHECK: @removeSubloopBlocks2
+; CHECK: for.cond3:
+; CHECK-NOT: br
+; CHECK: ret void
+define void @removeSubloopBlocks2() nounwind {
+entry:
+  %tobool.i = icmp ne i32 undef, 0
+  br label %lbl_616
+
+lbl_616.loopexit:                                 ; preds = %for.cond
+  br label %lbl_616
+
+lbl_616:                                          ; preds = %lbl_616.loopexit, %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond3, %lbl_616
+  br i1 false, label %for.cond1.preheader, label %lbl_616.loopexit
+
+for.cond1.preheader:                              ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1.loopexit:                               ; preds = %for.cond.i
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1.loopexit, %for.cond1.preheader
+  br i1 false, label %for.body2, label %for.cond3
+
+for.body2:                                        ; preds = %for.cond1
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.cond.i, %for.body2
+  br i1 %tobool.i, label %for.cond.i, label %for.cond1.loopexit
+
+for.cond3:                                        ; preds = %for.cond1
+  br i1 false, label %for.cond, label %if.end
+
+if.end:                                           ; preds = %for.cond3
+  ret void
+}
diff --git a/test/Transforms/SimplifyLibCalls/osx-names.ll b/test/Transforms/SimplifyLibCalls/osx-names.ll
new file mode 100644
index 0000000..e321d1d
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/osx-names.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; <rdar://problem/9815881>
+; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
+; Make sure we use the correct names.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.2"
+
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__sFILEX = type opaque
+
+@.str = private unnamed_addr constant [13 x i8] c"Hello world\0A\00", align 1
+@.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+define void @test1(%struct.__sFILE* %stream) nounwind {
+; CHECK: define void @test1
+; CHECK: call i32 @"fwrite$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
+define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
+; CHECK: define void @test2
+; CHECK: call i32 @"fputs$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
+  ret void
+}
+
+declare i32 @fprintf(%struct.__sFILE*, i8*, ...) nounwind
author	Logan Chien <loganchien@google.com>	2011-11-25 08:46:08 +0800
committer	Logan Chien <loganchien@google.com>	2011-11-25 13:43:44 +0800
commit	1429059dc0129c1cec938c29d6fce89e14293241 (patch)
tree	ec4c55e5138c7eb9ad0313d4af895ad0c56d0978 /test
parent	1035c3e84815607b4f8994cab03ae62cc8519a63 (diff)
parent	705f2431a086bbe662bca0035938e774378de3ec (diff)
download	external_llvm-1429059dc0129c1cec938c29d6fce89e14293241.zip external_llvm-1429059dc0129c1cec938c29d6fce89e14293241.tar.gz external_llvm-1429059dc0129c1cec938c29d6fce89e14293241.tar.bz2