Update LLVM for rebase to r212749.

Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
author: Stephen Hines <srhines@google.com> 2014-07-21 00:45:20 -0700
committer: Stephen Hines <srhines@google.com> 2014-07-21 00:45:20 -0700
commit: c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree: 81b7dd2bb4370a392f31d332a566c903b5744764 /test/CodeGen/PowerPC
parent: 19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
download: external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2
26 files changed, 1217 insertions, 82 deletions
diff --git a/test/CodeGen/PowerPC/Atomics-32.ll b/test/CodeGen/PowerPC/Atomics-32.ll
index b5c03e2..b7f23b1 100644
--- a/test/CodeGen/PowerPC/Atomics-32.ll
+++ b/test/CodeGen/PowerPC/Atomics-32.ll
@@ -529,63 +529,73 @@ define void @test_compare_and_swap() nounwind {
 entry:
   %0 = load i8* @uc, align 1
   %1 = load i8* @sc, align 1
-  %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
+  %pair2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
+  %2 = extractvalue { i8, i1 } %pair2, 0
   store i8 %2, i8* @sc, align 1
   %3 = load i8* @uc, align 1
   %4 = load i8* @sc, align 1
-  %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
+  %pair5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
+  %5 = extractvalue { i8, i1 } %pair5, 0
   store i8 %5, i8* @uc, align 1
   %6 = load i8* @uc, align 1
   %7 = zext i8 %6 to i16
   %8 = load i8* @sc, align 1
   %9 = sext i8 %8 to i16
   %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
+  %pair11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
+  %11 = extractvalue { i16, i1 } %pair11, 0
   store i16 %11, i16* @ss, align 2
   %12 = load i8* @uc, align 1
   %13 = zext i8 %12 to i16
   %14 = load i8* @sc, align 1
   %15 = sext i8 %14 to i16
   %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
+  %pair17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
+  %17 = extractvalue { i16, i1 } %pair17, 0
   store i16 %17, i16* @us, align 2
   %18 = load i8* @uc, align 1
   %19 = zext i8 %18 to i32
   %20 = load i8* @sc, align 1
   %21 = sext i8 %20 to i32
   %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
+  %pair23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
+  %23 = extractvalue { i32, i1 } %pair23, 0
   store i32 %23, i32* @si, align 4
   %24 = load i8* @uc, align 1
   %25 = zext i8 %24 to i32
   %26 = load i8* @sc, align 1
   %27 = sext i8 %26 to i32
   %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
+  %pair29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
+  %29 = extractvalue { i32, i1 } %pair29, 0
   store i32 %29, i32* @ui, align 4
   %30 = load i8* @uc, align 1
   %31 = zext i8 %30 to i32
   %32 = load i8* @sc, align 1
   %33 = sext i8 %32 to i32
   %34 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic monotonic
+  %pair35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic monotonic
+  %35 = extractvalue { i32, i1 } %pair35, 0
   store i32 %35, i32* @sl, align 4
   %36 = load i8* @uc, align 1
   %37 = zext i8 %36 to i32
   %38 = load i8* @sc, align 1
   %39 = sext i8 %38 to i32
   %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic monotonic
+  %pair41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic monotonic
+  %41 = extractvalue { i32, i1 } %pair41, 0
   store i32 %41, i32* @ul, align 4
   %42 = load i8* @uc, align 1
   %43 = load i8* @sc, align 1
-  %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
+  %pair44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
+  %44 = extractvalue { i8, i1 } %pair44, 0
   %45 = icmp eq i8 %44, %42
   %46 = zext i1 %45 to i32
   store i32 %46, i32* @ui, align 4
   %47 = load i8* @uc, align 1
   %48 = load i8* @sc, align 1
-  %49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic monotonic
+  %pair49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic monotonic
+  %49 = extractvalue { i8, i1 } %pair49, 0
   %50 = icmp eq i8 %49, %47
   %51 = zext i1 %50 to i32
   store i32 %51, i32* @ui, align 4
@@ -594,7 +604,8 @@ entry:
   %54 = load i8* @sc, align 1
   %55 = sext i8 %54 to i16
   %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic monotonic
+  %pair57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic monotonic
+  %57 = extractvalue { i16, i1 } %pair57, 0
   %58 = icmp eq i16 %57, %53
   %59 = zext i1 %58 to i32
   store i32 %59, i32* @ui, align 4
@@ -603,7 +614,8 @@ entry:
   %62 = load i8* @sc, align 1
   %63 = sext i8 %62 to i16
   %64 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic monotonic
+  %pair65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic monotonic
+  %65 = extractvalue { i16, i1 } %pair65, 0
   %66 = icmp eq i16 %65, %61
   %67 = zext i1 %66 to i32
   store i32 %67, i32* @ui, align 4
@@ -612,7 +624,8 @@ entry:
   %70 = load i8* @sc, align 1
   %71 = sext i8 %70 to i32
   %72 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic monotonic
+  %pair73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic monotonic
+  %73 = extractvalue { i32, i1 } %pair73, 0
   %74 = icmp eq i32 %73, %69
   %75 = zext i1 %74 to i32
   store i32 %75, i32* @ui, align 4
@@ -621,7 +634,8 @@ entry:
   %78 = load i8* @sc, align 1
   %79 = sext i8 %78 to i32
   %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic monotonic
+  %pair81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic monotonic
+  %81 = extractvalue { i32, i1 } %pair81, 0
   %82 = icmp eq i32 %81, %77
   %83 = zext i1 %82 to i32
   store i32 %83, i32* @ui, align 4
@@ -630,7 +644,8 @@ entry:
   %86 = load i8* @sc, align 1
   %87 = sext i8 %86 to i32
   %88 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic monotonic
+  %pair89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic monotonic
+  %89 = extractvalue { i32, i1 } %pair89, 0
   %90 = icmp eq i32 %89, %85
   %91 = zext i1 %90 to i32
   store i32 %91, i32* @ui, align 4
@@ -639,7 +654,8 @@ entry:
   %94 = load i8* @sc, align 1
   %95 = sext i8 %94 to i32
   %96 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic monotonic
+  %pair97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic monotonic
+  %97 = extractvalue { i32, i1 } %pair97, 0
   %98 = icmp eq i32 %97, %93
   %99 = zext i1 %98 to i32
   store i32 %99, i32* @ui, align 4
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 4588bc0..c701fef 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -12,15 +12,15 @@
 ; CHECK-PPC32-NOFP: stw r31, -4(r1)
 ; CHECK-PPC32-NOFP: lwz r1, 0(r1)
 ; CHECK-PPC32-NOFP: lwz r31, -4(r1)
-; CHECK-PPC32-RS: stwu r1, -80(r1)
-; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1)
+; CHECK-PPC32-RS: stwu r1, -48(r1)
+; CHECK-PPC32-RS-NOFP: stwu r1, -48(r1)
 
 ; CHECK-PPC64: std r31, -8(r1)
-; CHECK-PPC64: stdu r1, -128(r1)
+; CHECK-PPC64: stdu r1, -64(r1)
 ; CHECK-PPC64: ld r1, 0(r1)
 ; CHECK-PPC64: ld r31, -8(r1)
 ; CHECK-PPC64-NOFP: std r31, -8(r1)
-; CHECK-PPC64-NOFP: stdu r1, -128(r1)
+; CHECK-PPC64-NOFP: stdu r1, -64(r1)
 ; CHECK-PPC64-NOFP: ld r1, 0(r1)
 ; CHECK-PPC64-NOFP: ld r31, -8(r1)
 
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index d07fea7..0ccea42 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -15,9 +15,9 @@ define i32* @f1() nounwind {
 
 ; PPC32-NOFP: _f1:
 ; PPC32-NOFP: 	lis r0, -1
-; PPC32-NOFP: 	ori r0, r0, 32704
+; PPC32-NOFP: 	ori r0, r0, 32736
 ; PPC32-NOFP: 	stwux r1, r1, r0
-; PPC32-NOFP: 	addi r3, r1, 68
+; PPC32-NOFP: 	addi r3, r1, 36
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
@@ -25,10 +25,10 @@ define i32* @f1() nounwind {
 ; PPC32-FP: _f1:
 ; PPC32-FP:	lis r0, -1
 ; PPC32-FP:	stw r31, -4(r1)
-; PPC32-FP:	ori r0, r0, 32704
+; PPC32-FP:	ori r0, r0, 32736
 ; PPC32-FP:	stwux r1, r1, r0
 ; PPC32-FP:	mr r31, r1
-; PPC32-FP:	addi r3, r31, 64
+; PPC32-FP:	addi r3, r31, 32
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -36,9 +36,9 @@ define i32* @f1() nounwind {
 
 ; PPC64-NOFP: _f1:
 ; PPC64-NOFP: 	lis r0, -1
-; PPC64-NOFP: 	ori r0, r0, 32656
+; PPC64-NOFP: 	ori r0, r0, 32720
 ; PPC64-NOFP: 	stdux r1, r1, r0
-; PPC64-NOFP: 	addi r3, r1, 116
+; PPC64-NOFP: 	addi r3, r1, 52
 ; PPC64-NOFP: 	ld r1, 0(r1)
 ; PPC64-NOFP: 	blr 
 
@@ -46,10 +46,10 @@ define i32* @f1() nounwind {
 ; PPC64-FP: _f1:
 ; PPC64-FP:	lis r0, -1
 ; PPC64-FP:	std r31, -8(r1)
-; PPC64-FP:	ori r0, r0, 32640
+; PPC64-FP:	ori r0, r0, 32704
 ; PPC64-FP:	stdux r1, r1, r0
 ; PPC64-FP:	mr r31, r1
-; PPC64-FP:	addi r3, r31, 124
+; PPC64-FP:	addi r3, r31, 60
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
index 0f6bd10..28c1a5b 100644
--- a/test/CodeGen/PowerPC/Frames-small.ll
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -1,25 +1,25 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
 ; RUN: not grep "stw r31, -4(r1)" %t1
-; RUN: grep "stwu r1, -16448(r1)" %t1
-; RUN: grep "addi r1, r1, 16448" %t1
+; RUN: grep "stwu r1, -16416(r1)" %t1
+; RUN: grep "addi r1, r1, 16416" %t1
 ; RUN: llc < %s -march=ppc32 | \
 ; RUN: not grep "lwz r31, -4(r1)"
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
 ; RUN:   -o %t2
 ; RUN: grep "stw r31, -4(r1)" %t2
-; RUN: grep "stwu r1, -16448(r1)" %t2
-; RUN: grep "addi r1, r1, 16448" %t2
+; RUN: grep "stwu r1, -16416(r1)" %t2
+; RUN: grep "addi r1, r1, 16416" %t2
 ; RUN: grep "lwz r31, -4(r1)" %t2
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
 ; RUN: not grep "std r31, -8(r1)" %t3
-; RUN: grep "stdu r1, -16496(r1)" %t3
-; RUN: grep "addi r1, r1, 16496" %t3
+; RUN: grep "stdu r1, -16432(r1)" %t3
+; RUN: grep "addi r1, r1, 16432" %t3
 ; RUN: not grep "ld r31, -8(r1)" %t3
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
 ; RUN:   -o %t4
 ; RUN: grep "std r31, -8(r1)" %t4
-; RUN: grep "stdu r1, -16512(r1)" %t4
-; RUN: grep "addi r1, r1, 16512" %t4
+; RUN: grep "stdu r1, -16448(r1)" %t4
+; RUN: grep "addi r1, r1, 16448" %t4
 ; RUN: grep "ld r31, -8(r1)" %t4
 
 define i32* @f1() {
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index 083df47..997a016 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -11,7 +11,8 @@ define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 define i32 @exchange_and_cmp(i32* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: lwarx
-  %tmp = cmpxchg i32* %mem, i32 0, i32 1 monotonic monotonic
+  %tmppair = cmpxchg i32* %mem, i32 0, i32 1 monotonic monotonic
+  %tmp = extractvalue { i32, i1 } %tmppair, 0
 ; CHECK: stwcx.
 ; CHECK: stwcx.
   ret i32 %tmp
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 261335e..843250f 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -11,7 +11,8 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
 define i64 @exchange_and_cmp(i64* %mem) nounwind {
 ; CHECK-LABEL: exchange_and_cmp:
 ; CHECK: ldarx
-  %tmp = cmpxchg i64* %mem, i64 0, i64 1 monotonic monotonic
+  %tmppair = cmpxchg i64* %mem, i64 0, i64 1 monotonic monotonic
+  %tmp = extractvalue { i64, i1 } %tmppair, 0
 ; CHECK: stdcx.
 ; CHECK: stdcx.
   ret i64 %tmp
diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll
index a8e456f..1784777 100644
--- a/test/CodeGen/PowerPC/early-ret2.ll
+++ b/test/CodeGen/PowerPC/early-ret2.ll
@@ -11,7 +11,7 @@ while.body.lr.ph:                                 ; preds = %entry
   br i1 undef, label %while.end, label %while.body
 
 while.body:                                       ; preds = %while.body, %while.body.lr.ph
-  br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0
+  br i1 false, label %while.end, label %while.body, !llvm.loop.vectorize.already_vectorized !0
 
 while.end:                                        ; preds = %while.body, %while.body.lr.ph, %entry
   ret void
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
index db0d8ed..ac41e8c 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
@@ -116,18 +116,6 @@ entry:
   ret void
 }
 
-define void @fptoui_float_i64(float %a) nounwind ssp {
-entry:
-; ELF64: fptoui_float_i64
-  %b.addr = alloca i64, align 4
-  %conv = fptoui float %a to i64
-; ELF64: fctiduz
-; ELF64: stfd
-; ELF64: ld
-  store i64 %conv, i64* %b.addr, align 4
-  ret void
-}
-
 define void @fptoui_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i32
@@ -140,14 +128,3 @@ entry:
   ret void
 }
 
-define void @fptoui_double_i64(double %a) nounwind ssp {
-entry:
-; ELF64: fptoui_double_i64
-  %b.addr = alloca i64, align 8
-  %conv = fptoui double %a to i64
-; ELF64: fctiduz
-; ELF64: stfd
-; ELF64: ld
-  store i64 %conv, i64* %b.addr, align 8
-  ret void
-}
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index a31c312..5e00675 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -1,15 +1,24 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
+
+;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
+;; to SelectionDAG in some cases.
 
 ; Test sitofp
 
 define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i64
+; PPC970: sitofp_single_i64
   %b.addr = alloca float, align 4
   %conv = sitofp i64 %a to float
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -17,11 +26,16 @@ entry:
 define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i32
+; PPC970: sitofp_single_i32
   %b.addr = alloca float, align 4
   %conv = sitofp i32 %a to float
 ; ELF64: std
 ; ELF64: lfiwax
 ; ELF64: fcfids
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -29,12 +43,18 @@ entry:
 define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i16
+; PPC970: sitofp_single_i16
   %b.addr = alloca float, align 4
   %conv = sitofp i16 %a to float
 ; ELF64: extsh
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; PPC970: extsh
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -42,12 +62,18 @@ entry:
 define void @sitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ELF64: sitofp_single_i8
+; PPC970: sitofp_single_i8
   %b.addr = alloca float, align 4
   %conv = sitofp i8 %a to float
 ; ELF64: extsb
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfids
+; PPC970: extsb
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -55,11 +81,15 @@ entry:
 define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i32
+; PPC970: sitofp_double_i32
   %b.addr = alloca double, align 8
   %conv = sitofp i32 %a to double
 ; ELF64: std
 ; ELF64: lfiwax
 ; ELF64: fcfid
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -67,11 +97,15 @@ entry:
 define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i64
+; PPC970: sitofp_double_i64
   %b.addr = alloca double, align 8
   %conv = sitofp i64 %a to double
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -79,12 +113,17 @@ entry:
 define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i16
+; PPC970: sitofp_double_i16
   %b.addr = alloca double, align 8
   %conv = sitofp i16 %a to double
 ; ELF64: extsh
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; PPC970: extsh
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -92,12 +131,17 @@ entry:
 define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: sitofp_double_i8
+; PPC970: sitofp_double_i8
   %b.addr = alloca double, align 8
   %conv = sitofp i8 %a to double
 ; ELF64: extsb
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfid
+; PPC970: extsb
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -107,11 +151,13 @@ entry:
 define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i64
+; PPC970: uitofp_single_i64
   %b.addr = alloca float, align 4
   %conv = uitofp i64 %a to float
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; PPC970-NOT: fcfidus
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -119,11 +165,14 @@ entry:
 define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i32
+; PPC970: uitofp_single_i32
   %b.addr = alloca float, align 4
   %conv = uitofp i32 %a to float
 ; ELF64: std
 ; ELF64: lfiwzx
 ; ELF64: fcfidus
+; PPC970-NOT: lfiwzx
+; PPC970-NOT: fcfidus
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -131,12 +180,18 @@ entry:
 define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i16
+; PPC970: uitofp_single_i16
   %b.addr = alloca float, align 4
   %conv = uitofp i16 %a to float
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -144,12 +199,18 @@ entry:
 define void @uitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ELF64: uitofp_single_i8
+; PPC970: uitofp_single_i8
   %b.addr = alloca float, align 4
   %conv = uitofp i8 %a to float
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidus
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
+; PPC970: frsp
   store float %conv, float* %b.addr, align 4
   ret void
 }
@@ -157,11 +218,13 @@ entry:
 define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i64
+; PPC970: uitofp_double_i64
   %b.addr = alloca double, align 8
   %conv = uitofp i64 %a to double
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; PPC970-NOT: fcfidu
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -169,11 +232,14 @@ entry:
 define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i32
+; PPC970: uitofp_double_i32
   %b.addr = alloca double, align 8
   %conv = uitofp i32 %a to double
 ; ELF64: std
 ; ELF64: lfiwzx
 ; ELF64: fcfidu
+; PPC970-NOT: lfiwzx
+; PPC970-NOT: fcfidu
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -181,12 +247,17 @@ entry:
 define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i16
+; PPC970: uitofp_double_i16
   %b.addr = alloca double, align 8
   %conv = uitofp i16 %a to double
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -194,12 +265,17 @@ entry:
 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ELF64: uitofp_double_i8
+; PPC970: uitofp_double_i8
   %b.addr = alloca double, align 8
   %conv = uitofp i8 %a to double
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
 ; ELF64: std
 ; ELF64: lfd
 ; ELF64: fcfidu
+; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; PPC970: std
+; PPC970: lfd
+; PPC970: fcfid
   store double %conv, double* %b.addr, align 8
   ret void
 }
@@ -209,11 +285,15 @@ entry:
 define void @fptosi_float_i32(float %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_float_i32
+; PPC970: fptosi_float_i32
   %b.addr = alloca i32, align 4
   %conv = fptosi float %a to i32
 ; ELF64: fctiwz
 ; ELF64: stfd
 ; ELF64: lwa
+; PPC970: fctiwz
+; PPC970: stfd
+; PPC970: lwa
   store i32 %conv, i32* %b.addr, align 4
   ret void
 }
@@ -221,11 +301,15 @@ entry:
 define void @fptosi_float_i64(float %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_float_i64
+; PPC970: fptosi_float_i64
   %b.addr = alloca i64, align 4
   %conv = fptosi float %a to i64
 ; ELF64: fctidz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: ld
   store i64 %conv, i64* %b.addr, align 4
   ret void
 }
@@ -233,11 +317,15 @@ entry:
 define void @fptosi_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_double_i32
+; PPC970: fptosi_double_i32
   %b.addr = alloca i32, align 8
   %conv = fptosi double %a to i32
 ; ELF64: fctiwz
 ; ELF64: stfd
 ; ELF64: lwa
+; PPC970: fctiwz
+; PPC970: stfd
+; PPC970: lwa
   store i32 %conv, i32* %b.addr, align 8
   ret void
 }
@@ -245,11 +333,15 @@ entry:
 define void @fptosi_double_i64(double %a) nounwind ssp {
 entry:
 ; ELF64: fptosi_double_i64
+; PPC970: fptosi_double_i64
   %b.addr = alloca i64, align 8
   %conv = fptosi double %a to i64
 ; ELF64: fctidz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: ld
   store i64 %conv, i64* %b.addr, align 8
   ret void
 }
@@ -259,11 +351,15 @@ entry:
 define void @fptoui_float_i32(float %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_float_i32
+; PPC970: fptoui_float_i32
   %b.addr = alloca i32, align 4
   %conv = fptoui float %a to i32
 ; ELF64: fctiwuz
 ; ELF64: stfd
 ; ELF64: lwz
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: lwz
   store i32 %conv, i32* %b.addr, align 4
   ret void
 }
@@ -271,11 +367,13 @@ entry:
 define void @fptoui_float_i64(float %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_float_i64
+; PPC970: fptoui_float_i64
   %b.addr = alloca i64, align 4
   %conv = fptoui float %a to i64
 ; ELF64: fctiduz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970-NOT: fctiduz
   store i64 %conv, i64* %b.addr, align 4
   ret void
 }
@@ -283,11 +381,15 @@ entry:
 define void @fptoui_double_i32(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i32
+; PPC970: fptoui_double_i32
   %b.addr = alloca i32, align 8
   %conv = fptoui double %a to i32
 ; ELF64: fctiwuz
 ; ELF64: stfd
 ; ELF64: lwz
+; PPC970: fctidz
+; PPC970: stfd
+; PPC970: lwz
   store i32 %conv, i32* %b.addr, align 8
   ret void
 }
@@ -295,11 +397,13 @@ entry:
 define void @fptoui_double_i64(double %a) nounwind ssp {
 entry:
 ; ELF64: fptoui_double_i64
+; PPC970: fptoui_double_i64
   %b.addr = alloca i64, align 8
   %conv = fptoui double %a to i64
 ; ELF64: fctiduz
 ; ELF64: stfd
 ; ELF64: ld
+; PPC970-NOT: fctiduz
   store i64 %conv, i64* %b.addr, align 8
   ret void
 }
diff --git a/test/CodeGen/PowerPC/func-addr.ll b/test/CodeGen/PowerPC/func-addr.ll
new file mode 100644
index 0000000..4533c62
--- /dev/null
+++ b/test/CodeGen/PowerPC/func-addr.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple powerpc64-linux < %s | FileCheck %s
+; RUN: llc -O0 -mtriple powerpc64-linux < %s | FileCheck %s
+
+define void @foo()  {
+  ret void
+}
+declare i32 @bar(i8*)
+
+; CHECK-LABEL: {{^}}zed:
+; CHECK:        addis 3, 2, foo@toc@ha
+; CHECK-NEXT:   addi 3, 3, foo@toc@l
+; CHECK-NEXT:   bl bar
+
+define  void @zed() {
+  call i32 @bar(i8* bitcast (void ()* @foo to i8*))
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/hello-reloc.s b/test/CodeGen/PowerPC/hello-reloc.s
index 1e3fb8f..97dfbb5 100644
--- a/test/CodeGen/PowerPC/hello-reloc.s
+++ b/test/CodeGen/PowerPC/hello-reloc.s
@@ -62,17 +62,17 @@ L_.str:                                 ; @.str
 ; DARWIN-G4-DUMP:AddressSize: 32bit
 ; DARWIN-G4-DUMP:Relocations [
 ; DARWIN-G4-DUMP:  Section __text {
-; DARWIN-G4-DUMP:    0x34 1 2 0 PPC_RELOC_BR24 0 -
-; DARWIN-G4-DUMP:    0x30 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
-; DARWIN-G4-DUMP:    0x2C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x60 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0x34 1 2 0 PPC_RELOC_BR24 0 0x3
+; DARWIN-G4-DUMP:    0x30 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x74
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 0x14
+; DARWIN-G4-DUMP:    0x2C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x74
+; DARWIN-G4-DUMP:    0x60 0 2 n/a PPC_RELOC_PAIR 1 0x14
 ; DARWIN-G4-DUMP:  }
 ; DARWIN-G4-DUMP:  Section __picsymbolstub1 {
-; DARWIN-G4-DUMP:    0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 _main
-; DARWIN-G4-DUMP:    0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 _main
-; DARWIN-G4-DUMP:    0x18 0 2 n/a PPC_RELOC_PAIR 1 _main
+; DARWIN-G4-DUMP:    0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 0x70
+; DARWIN-G4-DUMP:    0x0 0 2 n/a PPC_RELOC_PAIR 1 0x58
+; DARWIN-G4-DUMP:    0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 0x70
+; DARWIN-G4-DUMP:    0x18 0 2 n/a PPC_RELOC_PAIR 1 0x58
 ; DARWIN-G4-DUMP:  }
 ; DARWIN-G4-DUMP:  Section __la_symbol_ptr {
 ; DARWIN-G4-DUMP:    0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 2e46300..5d33887 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
-targets = set(config.root.targets_to_build.split())
-if not 'PowerPC' in targets:
+if not 'PowerPC' in config.root.targets:
     config.unsupported = True
 
diff --git a/test/CodeGen/PowerPC/ppc64-altivec-abi.ll b/test/CodeGen/PowerPC/ppc64-altivec-abi.ll
new file mode 100644
index 0000000..0bed329
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-altivec-abi.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64 -mattr=+altivec | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Verify that in the 64-bit Linux ABI, vector arguments take up space
+; in the parameter save area.
+
+define i64 @callee(i64 %a, <4 x i32> %b, i64 %c, <4 x i32> %d, i64 %e) {
+entry:
+  ret i64 %e
+}
+; CHECK-LABEL: callee:
+; CHECK: ld 3, 112(1)
+
+define void @caller(i64 %x, <4 x i32> %y) {
+entry:
+  tail call void @test(i64 %x, <4 x i32> %y, i64 %x, <4 x i32> %y, i64 %x)
+  ret void
+}
+; CHECK-LABEL: caller:
+; CHECK: std 3, 112(1)
+
+declare void @test(i64, <4 x i32>, i64, <4 x i32>, i64)
+
diff --git a/test/CodeGen/PowerPC/ppc64-byval-align.ll b/test/CodeGen/PowerPC/ppc64-byval-align.ll
new file mode 100644
index 0000000..0e73cf2
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-byval-align.ll
@@ -0,0 +1,56 @@
+; RUN: llc -O1 < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.test = type { i64, [8 x i8] }
+%struct.pad = type { [8 x i64] }
+
+@gt = common global %struct.test zeroinitializer, align 16
+@gp = common global %struct.pad zeroinitializer, align 8
+
+define signext i32 @callee1(i32 signext %x, %struct.test* byval align 16 nocapture readnone %y, i32 signext %z) {
+entry:
+  ret i32 %z
+}
+; CHECK-LABEL: @callee1
+; CHECK: mr 3, 7
+; CHECK: blr
+
+declare signext i32 @test1(i32 signext, %struct.test* byval align 16, i32 signext)
+define void @caller1(i32 signext %z) {
+entry:
+  %call = tail call signext i32 @test1(i32 signext 0, %struct.test* byval align 16 @gt, i32 signext %z)
+  ret void
+}
+; CHECK-LABEL: @caller1
+; CHECK: mr [[REG:[0-9]+]], 3
+; CHECK: mr 7, [[REG]]
+; CHECK: bl test1
+
+define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) {
+entry:
+  %x1 = getelementptr inbounds %struct.test* %z, i64 0, i32 0
+  %0 = load i64* %x1, align 16
+  ret i64 %0
+}
+; CHECK-LABEL: @callee2
+; CHECK: ld [[REG:[0-9]+]], 128(1)
+; CHECK: mr 3, [[REG]]
+; CHECK: blr
+
+declare i64 @test2(%struct.pad* byval, i32 signext, %struct.test* byval align 16)
+define void @caller2(i64 %z) {
+entry:
+  %tmp = alloca %struct.test, align 16
+  %.compoundliteral.sroa.0.0..sroa_idx = getelementptr inbounds %struct.test* %tmp, i64 0, i32 0
+  store i64 %z, i64* %.compoundliteral.sroa.0.0..sroa_idx, align 16
+  %call = call i64 @test2(%struct.pad* byval @gp, i32 signext 0, %struct.test* byval align 16 %tmp)
+  ret void
+}
+; CHECK-LABEL: @caller2
+; CHECK: std 3, [[OFF:[0-9]+]](1)
+; CHECK: ld [[REG:[0-9]+]], [[OFF]](1)
+; CHECK: std [[REG]], 128(1)
+; CHECK: bl test2
+
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 1f3bb71..31794be 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -42,12 +42,18 @@ define void @test_indirect(void ()* nocapture %fp) nounwind {
   ret void
 }
 
-; Absolute vales should be have the TOC restore 'nop'
+; Absolute values must use the regular indirect call sequence
+; The main purpose of this test is to ensure that BLA is not
+; used on 64-bit SVR4 (as e.g. on Darwin).
 define void @test_abs() nounwind {
 ; CHECK-LABEL: test_abs:
   tail call void inttoptr (i64 1024 to void ()*)() nounwind
-; CHECK: bla 1024
-; CHECK-NEXT: nop
+; CHECK: ld [[FP:[0-9]+]], 1024(0)
+; CHECK: ld 11, 1040(0)
+; CHECK: ld 2, 1032(0)
+; CHECK-NEXT: mtctr [[FP]]
+; CHECK-NEXT: bctrl
+; CHECK-NEXT: ld 2, 40(1)
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/ppc64-smallarg.ll b/test/CodeGen/PowerPC/ppc64-smallarg.ll
new file mode 100644
index 0000000..0d5b078
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-smallarg.ll
@@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 124(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 124(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 156(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 156(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+
diff --git a/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
new file mode 100644
index 0000000..fcb1e92
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 120(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 120(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 152(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 152(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+
diff --git a/test/CodeGen/PowerPC/ppcf128-endian.ll b/test/CodeGen/PowerPC/ppcf128-endian.ll
new file mode 100644
index 0000000..2a5f13a
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppcf128-endian.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mcpu=pwr7 -mattr=+altivec < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+@g = common global ppc_fp128 0xM00000000000000000000000000000000, align 16
+
+define void @callee(ppc_fp128 %x) {
+entry:
+  %x.addr = alloca ppc_fp128, align 16
+  store ppc_fp128 %x, ppc_fp128* %x.addr, align 16
+  %0 = load ppc_fp128* %x.addr, align 16
+  store ppc_fp128 %0, ppc_fp128* @g, align 16
+  ret void
+}
+; CHECK: @callee
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: stfd 2, 8([[REG]])
+; CHECK: stfd 1, 0([[REG]])
+; CHECK: blr
+
+define void @caller() {
+entry:
+  %0 = load ppc_fp128* @g, align 16
+  call void @test(ppc_fp128 %0)
+  ret void
+}
+; CHECK: @caller
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: lfd 2, 8([[REG]])
+; CHECK: lfd 1, 0([[REG]])
+; CHECK: bl test
+
+declare void @test(ppc_fp128)
+
+define void @caller_const() {
+entry:
+  call void @test(ppc_fp128 0xM3FF00000000000000000000000000000)
+  ret void
+}
+; CHECK: .LCPI[[LC:[0-9]+]]_0:
+; CHECK: .long   1065353216
+; CHECK: .LCPI[[LC]]_1:
+; CHECK: .long   0
+; CHECK: @caller_const
+; CHECK: addi [[REG0:[0-9]+]], {{[0-9]+}}, .LCPI[[LC]]_0
+; CHECK: addi [[REG1:[0-9]+]], {{[0-9]+}}, .LCPI[[LC]]_1
+; CHECK: lfs 1, 0([[REG0]])
+; CHECK: lfs 2, 0([[REG1]])
+; CHECK: bl test
+
+define ppc_fp128 @result() {
+entry:
+  %0 = load ppc_fp128* @g, align 16
+  ret ppc_fp128 %0
+}
+; CHECK: @result
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: lfd 1, 0([[REG]])
+; CHECK: lfd 2, 8([[REG]])
+; CHECK: blr
+
+define void @use_result() {
+entry:
+  %call = tail call ppc_fp128 @test_result() #3
+  store ppc_fp128 %call, ppc_fp128* @g, align 16
+  ret void
+}
+; CHECK: @use_result
+; CHECK: bl test_result
+; CHECK: ld [[REG:[0-9]+]], .LC
+; CHECK: stfd 2, 8([[REG]])
+; CHECK: stfd 1, 0([[REG]])
+; CHECK: blr
+
+declare ppc_fp128 @test_result()
+
+define void @caller_result() {
+entry:
+  %call = tail call ppc_fp128 @test_result()
+  tail call void @test(ppc_fp128 %call)
+  ret void
+}
+; CHECK: @caller_result
+; CHECK: bl test_result
+; CHECK-NEXT: nop
+; CHECK-NEXT: bl test
+; CHECK-NEXT: nop
+
+define i128 @convert_from(ppc_fp128 %x) {
+entry:
+  %0 = bitcast ppc_fp128 %x to i128
+  ret i128 %0
+}
+; CHECK: @convert_from
+; CHECK: stfd 1, [[OFF1:.*]](1)
+; CHECK: stfd 2, [[OFF2:.*]](1)
+; CHECK: ld 3, [[OFF1]](1)
+; CHECK: ld 4, [[OFF2]](1)
+; CHECK: blr
+
+define ppc_fp128 @convert_to(i128 %x) {
+entry:
+  %0 = bitcast i128 %x to ppc_fp128
+  ret ppc_fp128 %0
+}
+; CHECK: @convert_to
+; CHECK: std 3, [[OFF1:.*]](1)
+; CHECK: std 4, [[OFF2:.*]](1)
+; CHECK: lfd 1, [[OFF1]](1)
+; CHECK: lfd 2, [[OFF2]](1)
+; CHECK: blr
+
+define ppc_fp128 @convert_to2(i128 %x) {
+entry:
+  %shl = shl i128 %x, 1
+  %0 = bitcast i128 %shl to ppc_fp128
+  ret ppc_fp128 %0
+}
+
+; CHECK: @convert_to
+; CHECK: std 3, [[OFF1:.*]](1)
+; CHECK: std 4, [[OFF2:.*]](1)
+; CHECK: lfd 1, [[OFF1]](1)
+; CHECK: lfd 2, [[OFF2]](1)
+; CHECK: blr
+
+define double @convert_vector(<4 x i32> %x) {
+entry:
+  %cast = bitcast <4 x i32> %x to ppc_fp128
+  %conv = fptrunc ppc_fp128 %cast to double
+  ret double %conv
+}
+; CHECK: @convert_vector
+; CHECK: addi [[REG:[0-9]+]], 1, [[OFF:.*]]
+; CHECK: stvx 2, 0, [[REG]]
+; CHECK: lfd 1, [[OFF]](1)
+; CHECK: blr
+
+declare void @llvm.va_start(i8*)
+
+define double @vararg(i32 %a, ...) {
+entry:
+  %va = alloca i8*, align 8
+  %va1 = bitcast i8** %va to i8*
+  call void @llvm.va_start(i8* %va1)
+  %arg = va_arg i8** %va, ppc_fp128
+  %conv = fptrunc ppc_fp128 %arg to double
+  ret double %conv
+}
+; CHECK: @vararg
+; CHECK: lfd 1, 0({{[0-9]+}})
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/resolvefi-basereg.ll b/test/CodeGen/PowerPC/resolvefi-basereg.ll
new file mode 100644
index 0000000..62c2d13
--- /dev/null
+++ b/test/CodeGen/PowerPC/resolvefi-basereg.ll
@@ -0,0 +1,362 @@
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+
+; Due to a bug in resolveFrameIndex we ended up with invalid addresses
+; containing a base register 0.  Verify that this no longer happens.
+; CHECK-NOT: (0)
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
+%struct.S1998 = type { [2 x i32*], i64, i64, double, i16, i32, [29 x %struct.anon], i16, i8, i32, [8 x i8] }
+%struct.anon = type { [16 x double], i32, i16, i32, [3 x i8], [6 x i8], [4 x i32], i8 }
+
+@info = global %struct.Info zeroinitializer, align 8
+@fails = global i32 0, align 4
+@intarray = global [256 x i32] zeroinitializer, align 4
+@s1998 = global %struct.S1998 zeroinitializer, align 16
+@a1998 = external global [5 x %struct.S1998]
+
+define void @test1998() {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i32, align 4
+  %tmp = alloca i32, align 4
+  %agg.tmp = alloca %struct.S1998, align 16
+  %agg.tmp111 = alloca %struct.S1998, align 16
+  %agg.tmp112 = alloca %struct.S1998, align 16
+  %agg.tmp113 = alloca %struct.S1998, align 16
+  %agg.tmp114 = alloca %struct.S1998, align 16
+  %agg.tmp115 = alloca %struct.S1998, align 16
+  %agg.tmp116 = alloca %struct.S1998, align 16
+  %agg.tmp117 = alloca %struct.S1998, align 16
+  %agg.tmp118 = alloca %struct.S1998, align 16
+  %agg.tmp119 = alloca %struct.S1998, align 16
+  call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.S1998* @s1998 to i8*), i8 0, i64 5168, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* bitcast ([5 x %struct.S1998]* @a1998 to i8*), i8 0, i64 25840, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.Info* @info to i8*), i8 0, i64 832, i32 8, i1 false)
+  store i8* bitcast (%struct.S1998* @s1998 to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 2), align 8
+  store i8* bitcast ([5 x %struct.S1998]* @a1998 to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 3), align 8
+  store i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 3) to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 4), align 8
+  store i64 5168, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 6), align 8
+  store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 9), align 8
+  store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 10), align 8
+  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %sub = sub i64 %0, 1
+  %and = and i64 ptrtoint (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 3) to i64), %sub
+  %tobool = icmp ne i64 %and, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* @fails, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @fails, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %j, align 4
+  %2 = load i32* %i, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom
+  store i8* bitcast (i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1) to i8*), i8** %arrayidx, align 8
+  %3 = load i32* %i, align 4
+  %idxprom1 = sext i32 %3 to i64
+  %arrayidx2 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom1
+  store i64 8, i64* %arrayidx2, align 8
+  %4 = load i32* %i, align 4
+  %idxprom3 = sext i32 %4 to i64
+  %arrayidx4 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom3
+  store i64 8, i64* %arrayidx4, align 8
+  store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 190), i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1), align 8
+  store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 241), i32** getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 0, i64 1), align 8
+  %5 = load i32* %i, align 4
+  %inc5 = add nsw i32 %5, 1
+  store i32 %inc5, i32* %i, align 4
+  %6 = load i32* %i, align 4
+  %idxprom6 = sext i32 %6 to i64
+  %arrayidx7 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom6
+  store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1) to i8*), i8** %arrayidx7, align 8
+  %7 = load i32* %i, align 4
+  %idxprom8 = sext i32 %7 to i64
+  %arrayidx9 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom8
+  store i64 8, i64* %arrayidx9, align 8
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom10
+  store i64 8, i64* %arrayidx11, align 8
+  store i64 -3866974208859106459, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1), align 8
+  store i64 -185376695371304091, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 1), align 8
+  %9 = load i32* %i, align 4
+  %inc12 = add nsw i32 %9, 1
+  store i32 %inc12, i32* %i, align 4
+  %10 = load i32* %i, align 4
+  %idxprom13 = sext i32 %10 to i64
+  %arrayidx14 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom13
+  store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2) to i8*), i8** %arrayidx14, align 8
+  %11 = load i32* %i, align 4
+  %idxprom15 = sext i32 %11 to i64
+  %arrayidx16 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom15
+  store i64 8, i64* %arrayidx16, align 8
+  %12 = load i32* %i, align 4
+  %idxprom17 = sext i32 %12 to i64
+  %arrayidx18 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom17
+  store i64 8, i64* %arrayidx18, align 8
+  store i64 -963638028680427187, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2), align 8
+  store i64 7510542175772455554, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 2), align 8
+  %13 = load i32* %i, align 4
+  %inc19 = add nsw i32 %13, 1
+  store i32 %inc19, i32* %i, align 4
+  %14 = load i32* %i, align 4
+  %idxprom20 = sext i32 %14 to i64
+  %arrayidx21 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom20
+  store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3) to i8*), i8** %arrayidx21, align 8
+  %15 = load i32* %i, align 4
+  %idxprom22 = sext i32 %15 to i64
+  %arrayidx23 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom22
+  store i64 8, i64* %arrayidx23, align 8
+  %16 = load i32* %i, align 4
+  %idxprom24 = sext i32 %16 to i64
+  %arrayidx25 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom24
+  store i64 16, i64* %arrayidx25, align 8
+  store double 0xC0F8783300000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3), align 16
+  store double 0xC10DF3CCC0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 3), align 16
+  %17 = load i32* %i, align 4
+  %inc26 = add nsw i32 %17, 1
+  store i32 %inc26, i32* %i, align 4
+  %18 = load i32* %i, align 4
+  %idxprom27 = sext i32 %18 to i64
+  %arrayidx28 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom27
+  store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4) to i8*), i8** %arrayidx28, align 8
+  %19 = load i32* %i, align 4
+  %idxprom29 = sext i32 %19 to i64
+  %arrayidx30 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom29
+  store i64 2, i64* %arrayidx30, align 8
+  %20 = load i32* %i, align 4
+  %idxprom31 = sext i32 %20 to i64
+  %arrayidx32 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom31
+  store i64 2, i64* %arrayidx32, align 8
+  store i16 -15897, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4), align 2
+  store i16 30935, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 4), align 2
+  %21 = load i32* %i, align 4
+  %inc33 = add nsw i32 %21, 1
+  store i32 %inc33, i32* %i, align 4
+  store i32 -419541644, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 5), align 4
+  store i32 2125926812, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 5), align 4
+  %22 = load i32* %j, align 4
+  %inc34 = add nsw i32 %22, 1
+  store i32 %inc34, i32* %j, align 4
+  %23 = load i32* %i, align 4
+  %idxprom35 = sext i32 %23 to i64
+  %arrayidx36 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom35
+  store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0) to i8*), i8** %arrayidx36, align 8
+  %24 = load i32* %i, align 4
+  %idxprom37 = sext i32 %24 to i64
+  %arrayidx38 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom37
+  store i64 8, i64* %arrayidx38, align 8
+  %25 = load i32* %i, align 4
+  %idxprom39 = sext i32 %25 to i64
+  %arrayidx40 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom39
+  store i64 8, i64* %arrayidx40, align 8
+  store double 0xC0FC765780000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0), align 8
+  store double 0xC1025CD7A0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 0, i64 0), align 8
+  %26 = load i32* %i, align 4
+  %inc41 = add nsw i32 %26, 1
+  store i32 %inc41, i32* %i, align 4
+  %bf.load = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
+  %bf.clear = and i32 %bf.load, 7
+  %bf.set = or i32 %bf.clear, 16
+  store i32 %bf.set, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
+  %bf.load42 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
+  %bf.clear43 = and i32 %bf.load42, 7
+  %bf.set44 = or i32 %bf.clear43, 24
+  store i32 %bf.set44, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
+  %27 = load i32* %j, align 4
+  %inc45 = add nsw i32 %27, 1
+  store i32 %inc45, i32* %j, align 4
+  %bf.load46 = load i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
+  %bf.clear47 = and i16 %bf.load46, 127
+  store i16 %bf.clear47, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
+  %bf.load48 = load i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
+  %bf.clear49 = and i16 %bf.load48, 127
+  store i16 %bf.clear49, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
+  %28 = load i32* %j, align 4
+  %inc50 = add nsw i32 %28, 1
+  store i32 %inc50, i32* %j, align 4
+  %bf.load51 = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
+  %bf.clear52 = and i32 %bf.load51, 63
+  store i32 %bf.clear52, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
+  %bf.load53 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
+  %bf.clear54 = and i32 %bf.load53, 63
+  %bf.set55 = or i32 %bf.clear54, 64
+  store i32 %bf.set55, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
+  %29 = load i32* %j, align 4
+  %inc56 = add nsw i32 %29, 1
+  store i32 %inc56, i32* %j, align 4
+  %bf.load57 = load i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.clear58 = and i24 %bf.load57, 63
+  store i24 %bf.clear58, i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.load59 = load i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.clear60 = and i24 %bf.load59, 63
+  store i24 %bf.clear60, i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
+  %30 = load i32* %j, align 4
+  %inc61 = add nsw i32 %30, 1
+  store i32 %inc61, i32* %j, align 4
+  %31 = load i32* %i, align 4
+  %idxprom62 = sext i32 %31 to i64
+  %arrayidx63 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom62
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), i8** %arrayidx63, align 8
+  %32 = load i32* %i, align 4
+  %idxprom64 = sext i32 %32 to i64
+  %arrayidx65 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom64
+  store i64 1, i64* %arrayidx65, align 8
+  %33 = load i32* %i, align 4
+  %idxprom66 = sext i32 %33 to i64
+  %arrayidx67 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom66
+  store i64 1, i64* %arrayidx67, align 8
+  store i8 -83, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), align 1
+  store i8 -67, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 5), align 1
+  %34 = load i32* %i, align 4
+  %inc68 = add nsw i32 %34, 1
+  store i32 %inc68, i32* %i, align 4
+  %35 = load i32* %i, align 4
+  %idxprom69 = sext i32 %35 to i64
+  %arrayidx70 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom69
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), i8** %arrayidx70, align 8
+  %36 = load i32* %i, align 4
+  %idxprom71 = sext i32 %36 to i64
+  %arrayidx72 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom71
+  store i64 1, i64* %arrayidx72, align 8
+  %37 = load i32* %i, align 4
+  %idxprom73 = sext i32 %37 to i64
+  %arrayidx74 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom73
+  store i64 1, i64* %arrayidx74, align 8
+  store i8 34, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), align 1
+  store i8 64, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 1), align 1
+  %38 = load i32* %i, align 4
+  %inc75 = add nsw i32 %38, 1
+  store i32 %inc75, i32* %i, align 4
+  %39 = load i32* %i, align 4
+  %idxprom76 = sext i32 %39 to i64
+  %arrayidx77 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom76
+  store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3) to i8*), i8** %arrayidx77, align 8
+  %40 = load i32* %i, align 4
+  %idxprom78 = sext i32 %40 to i64
+  %arrayidx79 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom78
+  store i64 4, i64* %arrayidx79, align 8
+  %41 = load i32* %i, align 4
+  %idxprom80 = sext i32 %41 to i64
+  %arrayidx81 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom80
+  store i64 4, i64* %arrayidx81, align 8
+  store i32 -3, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3), align 4
+  store i32 -3, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 6, i64 3), align 4
+  %42 = load i32* %i, align 4
+  %inc82 = add nsw i32 %42, 1
+  store i32 %inc82, i32* %i, align 4
+  %43 = load i32* %i, align 4
+  %idxprom83 = sext i32 %43 to i64
+  %arrayidx84 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom83
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), i8** %arrayidx84, align 8
+  %44 = load i32* %i, align 4
+  %idxprom85 = sext i32 %44 to i64
+  %arrayidx86 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom85
+  store i64 1, i64* %arrayidx86, align 8
+  %45 = load i32* %i, align 4
+  %idxprom87 = sext i32 %45 to i64
+  %arrayidx88 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom87
+  store i64 1, i64* %arrayidx88, align 8
+  store i8 106, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), align 1
+  store i8 -102, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 7), align 1
+  %46 = load i32* %i, align 4
+  %inc89 = add nsw i32 %46, 1
+  store i32 %inc89, i32* %i, align 4
+  %47 = load i32* %i, align 4
+  %idxprom90 = sext i32 %47 to i64
+  %arrayidx91 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom90
+  store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7) to i8*), i8** %arrayidx91, align 8
+  %48 = load i32* %i, align 4
+  %idxprom92 = sext i32 %48 to i64
+  %arrayidx93 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom92
+  store i64 2, i64* %arrayidx93, align 8
+  %49 = load i32* %i, align 4
+  %idxprom94 = sext i32 %49 to i64
+  %arrayidx95 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom94
+  store i64 2, i64* %arrayidx95, align 8
+  store i16 29665, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7), align 2
+  store i16 7107, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 7), align 2
+  %50 = load i32* %i, align 4
+  %inc96 = add nsw i32 %50, 1
+  store i32 %inc96, i32* %i, align 4
+  %51 = load i32* %i, align 4
+  %idxprom97 = sext i32 %51 to i64
+  %arrayidx98 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom97
+  store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), i8** %arrayidx98, align 8
+  %52 = load i32* %i, align 4
+  %idxprom99 = sext i32 %52 to i64
+  %arrayidx100 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom99
+  store i64 1, i64* %arrayidx100, align 8
+  %53 = load i32* %i, align 4
+  %idxprom101 = sext i32 %53 to i64
+  %arrayidx102 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom101
+  store i64 1, i64* %arrayidx102, align 8
+  store i8 52, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), align 1
+  store i8 -86, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 8), align 1
+  %54 = load i32* %i, align 4
+  %inc103 = add nsw i32 %54, 1
+  store i32 %inc103, i32* %i, align 4
+  %55 = load i32* %i, align 4
+  %idxprom104 = sext i32 %55 to i64
+  %arrayidx105 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom104
+  store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9) to i8*), i8** %arrayidx105, align 8
+  %56 = load i32* %i, align 4
+  %idxprom106 = sext i32 %56 to i64
+  %arrayidx107 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom106
+  store i64 4, i64* %arrayidx107, align 8
+  %57 = load i32* %i, align 4
+  %idxprom108 = sext i32 %57 to i64
+  %arrayidx109 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom108
+  store i64 4, i64* %arrayidx109, align 8
+  store i32 -54118453, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9), align 4
+  store i32 1668755823, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 9), align 4
+  %58 = load i32* %i, align 4
+  %inc110 = add nsw i32 %58, 1
+  store i32 %inc110, i32* %i, align 4
+  store i32 %inc110, i32* %tmp
+  %59 = load i32* %tmp
+  %60 = load i32* %i, align 4
+  store i32 %60, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 0), align 4
+  %61 = load i32* %j, align 4
+  store i32 %61, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 1), align 4
+  %62 = bitcast %struct.S1998* %agg.tmp111 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %62, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %63 = bitcast %struct.S1998* %agg.tmp112 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %63, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  call void @check1998(%struct.S1998* sret %agg.tmp, %struct.S1998* byval align 16 %agg.tmp111, %struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 1), %struct.S1998* byval align 16 %agg.tmp112)
+  call void @checkx1998(%struct.S1998* byval align 16 %agg.tmp)
+  %64 = bitcast %struct.S1998* %agg.tmp113 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %64, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %65 = bitcast %struct.S1998* %agg.tmp114 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %65, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  %66 = bitcast %struct.S1998* %agg.tmp115 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %66, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  call void (i32, ...)* @check1998va(i32 signext 1, double 1.000000e+00, %struct.S1998* byval align 16 %agg.tmp113, i64 2, %struct.S1998* byval align 16 %agg.tmp114, %struct.S1998* byval align 16 %agg.tmp115)
+  %67 = bitcast %struct.S1998* %agg.tmp116 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %67, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %68 = bitcast %struct.S1998* %agg.tmp117 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %68, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  %69 = bitcast %struct.S1998* %agg.tmp118 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %69, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+  %70 = bitcast %struct.S1998* %agg.tmp119 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %70, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
+  call void (i32, ...)* @check1998va(i32 signext 2, %struct.S1998* byval align 16 %agg.tmp116, %struct.S1998* byval align 16 %agg.tmp117, ppc_fp128 0xM40000000000000000000000000000000, %struct.S1998* byval align 16 %agg.tmp118, %struct.S1998* byval align 16 %agg.tmp119)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+declare void @check1998(%struct.S1998* sret, %struct.S1998* byval align 16, %struct.S1998*, %struct.S1998* byval align 16)
+declare void @check1998va(i32 signext, ...)
+declare void @checkx1998(%struct.S1998* byval align 16 %arg)
+
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
index 7c51b67..bee3ac3 100644
--- a/test/CodeGen/PowerPC/svr4-redzone.ll
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -36,4 +36,4 @@ entry:
 ; PPC32: stwu 1, -240(1)
 
 ; PPC64-LABEL: bigstack:
-; PPC64: stdu 1, -352(1)
+; PPC64: stdu 1, -288(1)
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 4bce8c8..2733089 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -36,7 +36,7 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
 ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v16i8 since it is a altivec native type
+; Additional tests for v16i8 since it is a altivec native type
 
 define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
   %cmp = icmp eq <16 x i8> %x, %y
@@ -165,7 +165,7 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v8i16 since it is an altivec native type
+; Additional tests for v8i16 since it is an altivec native type
 
 define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -298,7 +298,7 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone {
 ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4si32 since it is an altivec native type
+; Additional tests for v4si32 since it is an altivec native type
 
 define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -449,7 +449,7 @@ entry:
 ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
-; Adicional tests for v4f32 since it is a altivec native type
+; Additional tests for v4f32 since it is a altivec native type
 
 define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll
index d7ed64a..304a84d 100644
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
@@ -8,6 +10,8 @@ target triple = "powerpc-apple-darwin8"
 
 define void @foo(i32 %x, ...) {
 entry:
+; CHECK: foo:
+; CHECK-LE: foo:
 	%x_addr = alloca i32		; <i32*> [#uses=1]
 	%ap = alloca i8*		; <i8**> [#uses=3]
 	%ap.0 = alloca i8*		; <i8**> [#uses=3]
@@ -27,6 +31,10 @@ entry:
 	%tmp8 = getelementptr %struct.u16qi* %tmp6, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
 	%tmp9 = getelementptr %struct.u16qi* %tmp7, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
 	%tmp10 = load <16 x i8>* %tmp9, align 4		; <<16 x i8>> [#uses=1]
+; CHECK: lvsl
+; CHECK: vperm
+; CHECK-LE: lvsr
+; CHECK-LE: vperm
 	store <16 x i8> %tmp10, <16 x i8>* %tmp8, align 4
 	br label %return
 
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index c376751..8a44815 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
@@ -9,6 +11,9 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 ; CHECK-LABEL: test_v4i32:
 ; CHECK: vmsumuhm
 ; CHECK-NOT: mullw
+; CHECK-LE-LABEL: test_v4i32:
+; CHECK-LE: vmsumuhm
+; CHECK-LE-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
@@ -19,6 +24,9 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 ; CHECK-LABEL: test_v8i16:
 ; CHECK: vmladduhm
 ; CHECK-NOT: mullw
+; CHECK-LE-LABEL: test_v8i16:
+; CHECK-LE: vmladduhm
+; CHECK-LE-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
@@ -30,6 +38,11 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 ; CHECK: vmuloub
 ; CHECK: vmuleub
 ; CHECK-NOT: mullw
+; CHECK-LE-LABEL: test_v16i8:
+; CHECK-LE: vmuloub [[REG1:[0-9]+]]
+; CHECK-LE: vmuleub [[REG2:[0-9]+]]
+; CHECK-LE: vperm {{[0-9]+}}, [[REG2]], [[REG1]]
+; CHECK-LE-NOT: mullw
 
 define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
 	%tmp = load <4 x float>* %X
@@ -44,3 +57,7 @@ define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
 ; CHECK: vspltisw [[ZNEG:[0-9]+]], -1
 ; CHECK: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
 ; CHECK: vmaddfp
+; CHECK-LE-LABEL: test_float:
+; CHECK-LE: vspltisw [[ZNEG:[0-9]+]], -1
+; CHECK-LE: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
+; CHECK-LE: vmaddfp
diff --git a/test/CodeGen/PowerPC/vec_shuffle_le.ll b/test/CodeGen/PowerPC/vec_shuffle_le.ll
new file mode 100644
index 0000000..635721c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_shuffle_le.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s
+
+define void @VPKUHUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VPKUHUM_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK: vpkuhum
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VPKUHUM_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VPKUHUM_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK: vpkuhum
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VPKUWUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VPKUWUM_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29>
+; CHECK: vpkuwum
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VPKUWUM_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VPKUWUM_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
+; CHECK: vpkuwum
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLB_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGLB_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+; CHECK: vmrglb
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLB_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGLB_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+; CHECK: vmrglb
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHB_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGHB_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK: vmrghb
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHB_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGHB_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+; CHECK: vmrghb
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLH_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGLH_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23>
+; CHECK: vmrglh
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLH_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGLH_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 4, i32 5, i32 4, i32 5, i32 6, i32 7, i32 6, i32 7>
+; CHECK: vmrglh
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHH_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGHH_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
+; CHECK: vmrghh
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHH_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGHH_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13, i32 14, i32 15, i32 14, i32 15>
+; CHECK: vmrghh
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLW_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGLW_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
+; CHECK: vmrglw
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGLW_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGLW_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+; CHECK: vmrglw
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHW_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VMRGHW_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
+; CHECK: vmrghw
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VMRGHW_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VMRGHW_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
+; CHECK: vmrghw
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
+define void @VSLDOI_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK: VSLDOI_xy:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = load <16 x i8>* %B
+        %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+; CHECK: vsldoi
+        store <16 x i8> %tmp3, <16 x i8>* %A
+        ret void
+}
+
+define void @VSLDOI_xx(<16 x i8>* %A) {
+entry:
+; CHECK: VSLDOI_xx:
+        %tmp = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+; CHECK: vsldoi
+        store <16 x i8> %tmp2, <16 x i8>* %A
+        ret void
+}
+
diff --git a/test/CodeGen/PowerPC/vperm-instcombine.ll b/test/CodeGen/PowerPC/vperm-instcombine.ll
new file mode 100644
index 0000000..d9084c8
--- /dev/null
+++ b/test/CodeGen/PowerPC/vperm-instcombine.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define <16 x i8> @foo() nounwind ssp {
+; CHECK: @foo
+;; Arguments are {0,1,...,15},{16,17,...,31},{30,28,26,...,0}
+  %1 = call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>, <4 x i32> <i32 319951120, i32 387323156, i32 454695192, i32 522067228>, <16 x i8> <i8 30, i8 28, i8 26, i8 24, i8 22, i8 20, i8 18, i8 16, i8 14, i8 12, i8 10, i8 8, i8 6, i8 4, i8 2, i8 0>)
+  %2 = bitcast <4 x i32> %1 to <16 x i8>
+  ret <16 x i8> %2
+;; Revised arguments are {16,17,...31},{0,1,...,15},{1,3,5,...,31}
+;; optimized into the following:
+; CHECK: ret <16 x i8> <i8 17, i8 19, i8 21, i8 23, i8 25, i8 27, i8 29, i8 31, i8 1, i8 3, i8 5, i8 7, i8 9, i8 11, i8 13, i8 15>
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
diff --git a/test/CodeGen/PowerPC/vperm-lowering.ll b/test/CodeGen/PowerPC/vperm-lowering.ll
new file mode 100644
index 0000000..d55d26c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vperm-lowering.ll
@@ -0,0 +1,66 @@
+; RUN: llc -O0 -fast-isel=false -mcpu=ppc64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define <16 x i8> @foo() nounwind ssp {
+  %1 = shufflevector <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 3, i32 8, i32 13, i32 18, i32 23, i32 28, i32 1, i32 6, i32 11>
+  ret <16 x i8> %1
+}
+
+; CHECK: .LCPI0_0:
+; CHECK: .byte 31
+; CHECK: .byte 26
+; CHECK: .byte 21
+; CHECK: .byte 16
+; CHECK: .byte 11
+; CHECK: .byte 6
+; CHECK: .byte 1
+; CHECK: .byte 28
+; CHECK: .byte 23
+; CHECK: .byte 18
+; CHECK: .byte 13
+; CHECK: .byte 8
+; CHECK: .byte 3
+; CHECK: .byte 30
+; CHECK: .byte 25
+; CHECK: .byte 20
+; CHECK: .LCPI0_1:
+; CHECK: .byte 0
+; CHECK: .byte 1
+; CHECK: .byte 2
+; CHECK: .byte 3
+; CHECK: .byte 4
+; CHECK: .byte 5
+; CHECK: .byte 6
+; CHECK: .byte 7
+; CHECK: .byte 8
+; CHECK: .byte 9
+; CHECK: .byte 10
+; CHECK: .byte 11
+; CHECK: .byte 12
+; CHECK: .byte 13
+; CHECK: .byte 14
+; CHECK: .byte 15
+; CHECK: .LCPI0_2:
+; CHECK: .byte 16
+; CHECK: .byte 17
+; CHECK: .byte 18
+; CHECK: .byte 19
+; CHECK: .byte 20
+; CHECK: .byte 21
+; CHECK: .byte 22
+; CHECK: .byte 23
+; CHECK: .byte 24
+; CHECK: .byte 25
+; CHECK: .byte 26
+; CHECK: .byte 27
+; CHECK: .byte 28
+; CHECK: .byte 29
+; CHECK: .byte 30
+; CHECK: .byte 31
+; CHECK: foo:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_2@toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_2@toc@l
+; CHECK: lvx [[REG3:[0-9]+]], 0, [[REG2]]
+; CHECK: vperm {{[0-9]+}}, [[REG3]], {{[0-9]+}}, {{[0-9]+}}
author	Stephen Hines <srhines@google.com>	2014-07-21 00:45:20 -0700
committer	Stephen Hines <srhines@google.com>	2014-07-21 00:45:20 -0700
commit	c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree	81b7dd2bb4370a392f31d332a566c903b5744764 /test/CodeGen/PowerPC
parent	19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
download	external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2