aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/PowerPC')
-rw-r--r--test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll6
-rw-r--r--test/CodeGen/PowerPC/2004-11-30-shift-crash.ll6
-rw-r--r--test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll7
-rw-r--r--test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll3
-rw-r--r--test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll8
-rw-r--r--test/CodeGen/PowerPC/2005-01-14-UndefLong.ll3
-rw-r--r--test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll12
-rw-r--r--test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll10
-rw-r--r--test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll9
-rw-r--r--test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll17
-rw-r--r--test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll12
-rw-r--r--test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll17
-rw-r--r--test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll7
-rw-r--r--test/CodeGen/PowerPC/2006-04-05-splat-ish.ll10
-rw-r--r--test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll72
-rw-r--r--test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll60
-rw-r--r--test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll16
-rw-r--r--test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll10
-rw-r--r--test/CodeGen/PowerPC/2006-08-11-RetVector.ll8
-rw-r--r--test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll38
-rw-r--r--test/CodeGen/PowerPC/2006-09-28-shift_64.ll27
-rw-r--r--test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll26
-rw-r--r--test/CodeGen/PowerPC/2006-10-13-Miscompile.ll18
-rw-r--r--test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll24
-rw-r--r--test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll6
-rw-r--r--test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll14
-rw-r--r--test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll10
-rw-r--r--test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll26
-rw-r--r--test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll27
-rw-r--r--test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll10
-rw-r--r--test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll27
-rw-r--r--test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll7
-rw-r--r--test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll24
-rw-r--r--test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll4
-rw-r--r--test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll11
-rw-r--r--test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll14
-rw-r--r--test/CodeGen/PowerPC/2007-03-24-cntlzd.ll11
-rw-r--r--test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll1801
-rw-r--r--test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll15
-rw-r--r--test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll27
-rw-r--r--test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll12
-rw-r--r--test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll25
-rw-r--r--test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll68
-rw-r--r--test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll14
-rw-r--r--test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll85
-rw-r--r--test/CodeGen/PowerPC/Frames-align.ll16
-rw-r--r--test/CodeGen/PowerPC/Frames-alloca.ll55
-rw-r--r--test/CodeGen/PowerPC/Frames-large.ll79
-rw-r--r--test/CodeGen/PowerPC/Frames-leaf.ll40
-rw-r--r--test/CodeGen/PowerPC/Frames-small.ll34
-rw-r--r--test/CodeGen/PowerPC/LargeAbsoluteAddr.ll17
-rw-r--r--test/CodeGen/PowerPC/addc.ll27
-rw-r--r--test/CodeGen/PowerPC/addi-reassoc.ll20
-rw-r--r--test/CodeGen/PowerPC/align.ll12
-rw-r--r--test/CodeGen/PowerPC/and-branch.ll18
-rw-r--r--test/CodeGen/PowerPC/and-elim.ll18
-rw-r--r--test/CodeGen/PowerPC/and-imm.ll12
-rw-r--r--test/CodeGen/PowerPC/and_add.ll12
-rw-r--r--test/CodeGen/PowerPC/and_sext.ll28
-rw-r--r--test/CodeGen/PowerPC/and_sra.ll26
-rw-r--r--test/CodeGen/PowerPC/big-endian-actual-args.ll9
-rw-r--r--test/CodeGen/PowerPC/big-endian-call-result.ll13
-rw-r--r--test/CodeGen/PowerPC/big-endian-formal-args.ll15
-rw-r--r--test/CodeGen/PowerPC/branch-opt.ll93
-rw-r--r--test/CodeGen/PowerPC/bswap-load-store.ll44
-rw-r--r--test/CodeGen/PowerPC/buildvec_canonicalize.ll27
-rw-r--r--test/CodeGen/PowerPC/calls.ll31
-rw-r--r--test/CodeGen/PowerPC/cmp-cmp.ll15
-rw-r--r--test/CodeGen/PowerPC/compare-duplicate.ll11
-rw-r--r--test/CodeGen/PowerPC/compare-simm.ll14
-rw-r--r--test/CodeGen/PowerPC/constants.ll54
-rw-r--r--test/CodeGen/PowerPC/cttz.ll12
-rw-r--r--test/CodeGen/PowerPC/darwin-labels.ll8
-rw-r--r--test/CodeGen/PowerPC/dg.exp5
-rw-r--r--test/CodeGen/PowerPC/div-2.ll29
-rw-r--r--test/CodeGen/PowerPC/eqv-andc-orc-nor.ll94
-rw-r--r--test/CodeGen/PowerPC/extsh.ll7
-rw-r--r--test/CodeGen/PowerPC/fma.ll47
-rw-r--r--test/CodeGen/PowerPC/fnabs.ll11
-rw-r--r--test/CodeGen/PowerPC/fneg.ll12
-rw-r--r--test/CodeGen/PowerPC/fnegsel.ll8
-rw-r--r--test/CodeGen/PowerPC/fold-li.ll14
-rw-r--r--test/CodeGen/PowerPC/fp-branch.ll20
-rw-r--r--test/CodeGen/PowerPC/fp-int-fp.ll26
-rw-r--r--test/CodeGen/PowerPC/fp_to_uint.ll9
-rw-r--r--test/CodeGen/PowerPC/fpcopy.ll6
-rw-r--r--test/CodeGen/PowerPC/fsqrt.ll21
-rw-r--r--test/CodeGen/PowerPC/hello.ll12
-rw-r--r--test/CodeGen/PowerPC/i64_fp.ll25
-rw-r--r--test/CodeGen/PowerPC/iabs.ll15
-rw-r--r--test/CodeGen/PowerPC/inlineasm-copy.ll13
-rw-r--r--test/CodeGen/PowerPC/inverted-bool-compares.ll10
-rw-r--r--test/CodeGen/PowerPC/ispositive.ll10
-rw-r--r--test/CodeGen/PowerPC/lha.ll7
-rw-r--r--test/CodeGen/PowerPC/load-constant-addr.ll9
-rw-r--r--test/CodeGen/PowerPC/long-compare.ll9
-rw-r--r--test/CodeGen/PowerPC/mem-rr-addr-mode.ll17
-rw-r--r--test/CodeGen/PowerPC/mem_update.ll68
-rw-r--r--test/CodeGen/PowerPC/mul-neg-power-2.ll8
-rw-r--r--test/CodeGen/PowerPC/mulhs.ll18
-rw-r--r--test/CodeGen/PowerPC/neg.ll6
-rw-r--r--test/CodeGen/PowerPC/or-addressing-mode.ll22
-rw-r--r--test/CodeGen/PowerPC/reg-coalesce-simple.ll11
-rw-r--r--test/CodeGen/PowerPC/rlwimi-commute.ll26
-rw-r--r--test/CodeGen/PowerPC/rlwimi.ll72
-rw-r--r--test/CodeGen/PowerPC/rlwimi2.ll31
-rw-r--r--test/CodeGen/PowerPC/rlwimi3.ll26
-rw-r--r--test/CodeGen/PowerPC/rlwinm.ll64
-rw-r--r--test/CodeGen/PowerPC/rlwinm2.ll28
-rw-r--r--test/CodeGen/PowerPC/rotl-2.ll38
-rw-r--r--test/CodeGen/PowerPC/rotl.ll37
-rw-r--r--test/CodeGen/PowerPC/select_lt0.ll51
-rw-r--r--test/CodeGen/PowerPC/setcc_no_zext.ll8
-rw-r--r--test/CodeGen/PowerPC/seteq-0.ll9
-rw-r--r--test/CodeGen/PowerPC/shl_elim.ll11
-rw-r--r--test/CodeGen/PowerPC/shl_sext.ll17
-rw-r--r--test/CodeGen/PowerPC/sign_ext_inreg1.ll12
-rw-r--r--test/CodeGen/PowerPC/small-arguments.ll52
-rw-r--r--test/CodeGen/PowerPC/stfiwx.ll26
-rw-r--r--test/CodeGen/PowerPC/store-load-fwd.ll6
-rw-r--r--test/CodeGen/PowerPC/subc.ll26
-rw-r--r--test/CodeGen/PowerPC/unsafe-math.ll10
-rw-r--r--test/CodeGen/PowerPC/vcmp-fold.ll21
-rw-r--r--test/CodeGen/PowerPC/vec_br_cmp.ll23
-rw-r--r--test/CodeGen/PowerPC/vec_call.ll11
-rw-r--r--test/CodeGen/PowerPC/vec_constants.ll47
-rw-r--r--test/CodeGen/PowerPC/vec_mul.ll24
-rw-r--r--test/CodeGen/PowerPC/vec_perf_shuffle.ll42
-rw-r--r--test/CodeGen/PowerPC/vec_shuffle.ll506
-rw-r--r--test/CodeGen/PowerPC/vec_spat.ll73
-rw-r--r--test/CodeGen/PowerPC/vec_vrsave.ll14
-rw-r--r--test/CodeGen/PowerPC/vec_zero.ll8
-rw-r--r--test/CodeGen/PowerPC/vector-identity-shuffle.ll16
-rw-r--r--test/CodeGen/PowerPC/vector.ll157
134 files changed, 5485 insertions, 0 deletions
diff --git a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
new file mode 100644
index 0000000..e2a00d1
--- /dev/null
+++ b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+void %main() {
+ %tr1 = shr uint 1, ubyte 0
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
new file mode 100644
index 0000000..4603bdb
--- /dev/null
+++ b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+void %main() {
+ %tr4 = shl ulong 1, ubyte 0 ; <ulong> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
new file mode 100644
index 0000000..8f54c78
--- /dev/null
+++ b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+void %main() {
+ %shamt = add ubyte 0, 1 ; <ubyte> [#uses=1]
+ %tr2 = shr long 1, ubyte %shamt ; <long> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
new file mode 100644
index 0000000..87f6005
--- /dev/null
+++ b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep .comm.*X,0
+
+%X = linkonce global {} {}
diff --git a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
new file mode 100644
index 0000000..5dc4b28
--- /dev/null
+++ b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+int %main() {
+ %setle = setle long 1, 0
+ %select = select bool true, bool %setle, bool true
+ ret int 0
+}
+
diff --git a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
new file mode 100644
index 0000000..a4121c5
--- /dev/null
+++ b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+long %test() { ret long undef }
diff --git a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
new file mode 100644
index 0000000..ef0137f
--- /dev/null
+++ b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
@@ -0,0 +1,12 @@
+; this should not crash the ppc backend
+
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+uint %test( int %j.0.0.i) {
+ %tmp.85.i = and int %j.0.0.i, 7
+ %tmp.161278.i = cast int %tmp.85.i to uint
+ %tmp.5.i77.i = shr uint %tmp.161278.i, ubyte 3
+ ret uint %tmp.5.i77.i
+}
+
+
diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
new file mode 100644
index 0000000..7bb1317
--- /dev/null
+++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
@@ -0,0 +1,10 @@
+; This function should have exactly one call to fixdfdi, no more!
+
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \
+; RUN: grep {bl .*fixdfdi} | wc -l | grep 1
+
+double %test2(double %tmp.7705) {
+ %mem_tmp.2.0.in = cast double %tmp.7705 to long ; <long> [#uses=1]
+ %mem_tmp.2.0 = cast long %mem_tmp.2.0.in to double
+ ret double %mem_tmp.2.0
+}
diff --git a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
new file mode 100644
index 0000000..edbdc4a
--- /dev/null
+++ b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
@@ -0,0 +1,9 @@
+; This was erroneously being turned into an rlwinm instruction.
+; The sign bit does matter in this case.
+
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep srawi
+int %test(int %X) {
+ %Y = and int %X, -2
+ %Z = shr int %Y, ubyte 11
+ ret int %Z
+}
diff --git a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
new file mode 100644
index 0000000..4264e9e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc
+target endian = big
+target pointersize = 32
+target triple = "powerpc-apple-darwin8.2.0"
+implementation ; Functions:
+
+void %bar(int %G, int %E, int %F, int %A, int %B, int %C, int %D, sbyte* %fmt, ...) {
+ %ap = alloca sbyte* ; <sbyte**> [#uses=2]
+ call void %llvm.va_start( sbyte** %ap )
+ %tmp.1 = load sbyte** %ap ; <sbyte*> [#uses=1]
+ %tmp.0 = call double %foo( sbyte* %tmp.1 ) ; <double> [#uses=0]
+ ret void
+}
+
+declare void %llvm.va_start(sbyte**)
+
+declare double %foo(sbyte*)
diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
new file mode 100644
index 0000000..c90ef0a
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc | not grep {, f1}
+
+target endian = big
+target pointersize = 32
+target triple = "powerpc-apple-darwin8.2.0"
+
+; Dead argument should reserve an FP register.
+double %bar(double %DEAD, double %X, double %Y) {
+ %tmp.2 = add double %X, %Y
+ ret double %tmp.2
+}
+
diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
new file mode 100644
index 0000000..7700459
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc
+
+void %iterative_hash_host_wide_int() {
+ %zero = alloca int ; <int*> [#uses=2]
+ %b = alloca uint ; <uint*> [#uses=1]
+ store int 0, int* %zero
+ %tmp = load int* %zero ; <int> [#uses=1]
+ %tmp5 = cast int %tmp to uint ; <uint> [#uses=1]
+ %tmp6.u = add uint %tmp5, 32 ; <uint> [#uses=1]
+ %tmp6 = cast uint %tmp6.u to int ; <int> [#uses=1]
+ %tmp7 = load long* null ; <long> [#uses=1]
+ %tmp6 = cast int %tmp6 to ubyte ; <ubyte> [#uses=1]
+ %tmp8 = shr long %tmp7, ubyte %tmp6 ; <long> [#uses=1]
+ %tmp8 = cast long %tmp8 to uint ; <uint> [#uses=1]
+ store uint %tmp8, uint* %b
+ unreachable
+}
diff --git a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
new file mode 100644
index 0000000..dcf599b
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+double %CalcSpeed(float %tmp127) {
+ %tmp145 = cast float %tmp127 to double ; <double> [#uses=1]
+ %tmp150 = call double asm "frsqrte $0,$1", "=f,f"( double %tmp145 ) ; <double> [#uses=0]
+ ret double %tmp150
+}
diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
new file mode 100644
index 0000000..b4facea
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN: grep {vspltish v.*, 10}
+
+void %test(<8 x short>* %P) {
+ %tmp = load <8 x short>* %P ; <<8 x short>> [#uses=1]
+ %tmp1 = add <8 x short> %tmp, < short 10, short 10, short 10, short 10, short 10, short 10, short 10, short 10 > ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp1, <8 x short>* %P
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
new file mode 100644
index 0000000..59f7ed4
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
@@ -0,0 +1,72 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5
+
+void %test(sbyte* %stack) {
+entry:
+ %tmp9 = seteq int 0, 0 ; <bool> [#uses=1]
+ %tmp30 = seteq uint 0, 0 ; <bool> [#uses=1]
+ br bool %tmp30, label %cond_next54, label %cond_true31
+
+cond_true860: ; preds = %bb855
+ %tmp879 = tail call <4 x float> %llvm.ppc.altivec.vmaddfp( <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]
+ %tmp880 = cast <4 x float> %tmp879 to <4 x int> ; <<4 x int>> [#uses=2]
+ %tmp883 = shufflevector <4 x int> %tmp880, <4 x int> undef, <4 x uint> < uint 1, uint 1, uint 1, uint 1 > ; <<4 x int>> [#uses=1]
+ %tmp883 = cast <4 x int> %tmp883 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp885 = shufflevector <4 x int> %tmp880, <4 x int> undef, <4 x uint> < uint 2, uint 2, uint 2, uint 2 > ; <<4 x int>> [#uses=1]
+ %tmp885 = cast <4 x int> %tmp885 to <4 x float> ; <<4 x float>> [#uses=1]
+ br label %cond_next905
+
+cond_true31: ; preds = %entry
+ ret void
+
+cond_next54: ; preds = %entry
+ br bool %tmp9, label %cond_false385, label %bb279
+
+bb279: ; preds = %cond_next54
+ ret void
+
+cond_false385: ; preds = %cond_next54
+ %tmp388 = seteq uint 0, 0 ; <bool> [#uses=1]
+ br bool %tmp388, label %cond_next463, label %cond_true389
+
+cond_true389: ; preds = %cond_false385
+ ret void
+
+cond_next463: ; preds = %cond_false385
+ %tmp1208107 = setgt sbyte* null, %stack ; <bool> [#uses=1]
+ br bool %tmp1208107, label %cond_true1209.preheader, label %bb1212
+
+cond_true498: ; preds = %cond_true1209.preheader
+ ret void
+
+cond_true519: ; preds = %cond_true1209.preheader
+ %bothcond = or bool false, false ; <bool> [#uses=1]
+ br bool %bothcond, label %bb855, label %bb980
+
+cond_false548: ; preds = %cond_true1209.preheader
+ ret void
+
+bb855: ; preds = %cond_true519
+ %tmp859 = seteq int 0, 0 ; <bool> [#uses=1]
+ br bool %tmp859, label %cond_true860, label %cond_next905
+
+cond_next905: ; preds = %bb855, %cond_true860
+ %vfpw2.4 = phi <4 x float> [ %tmp885, %cond_true860 ], [ undef, %bb855 ] ; <<4 x float>> [#uses=0]
+ %vfpw1.4 = phi <4 x float> [ %tmp883, %cond_true860 ], [ undef, %bb855 ] ; <<4 x float>> [#uses=0]
+ %tmp930 = cast <4 x float> zeroinitializer to <4 x int> ; <<4 x int>> [#uses=0]
+ ret void
+
+bb980: ; preds = %cond_true519
+ ret void
+
+cond_true1209.preheader: ; preds = %cond_next463
+ %tmp496 = and uint 0, 12288 ; <uint> [#uses=1]
+ switch uint %tmp496, label %cond_false548 [
+ uint 0, label %cond_true498
+ uint 4096, label %cond_true519
+ ]
+
+bb1212: ; preds = %cond_next463
+ ret void
+}
+
+declare <4 x float> %llvm.ppc.altivec.vmaddfp(<4 x float>, <4 x float>, <4 x float>)
diff --git a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
new file mode 100644
index 0000000..6c34cd7
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
@@ -0,0 +1,60 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+ %struct.attr_desc = type { sbyte*, %struct.attr_desc*, %struct.attr_value*, %struct.attr_value*, uint }
+ %struct.attr_value = type { %struct.rtx_def*, %struct.attr_value*, %struct.insn_ent*, int, int }
+ %struct.insn_def = type { %struct.insn_def*, %struct.rtx_def*, int, int, int, int, int }
+ %struct.insn_ent = type { %struct.insn_ent*, %struct.insn_def* }
+ %struct.rtx_def = type { ushort, ubyte, ubyte, %struct.u }
+ %struct.u = type { [1 x long] }
+
+implementation ; Functions:
+
+void %find_attr() {
+entry:
+ %tmp26 = seteq %struct.attr_desc* null, null ; <bool> [#uses=1]
+ br bool %tmp26, label %bb30, label %cond_true27
+
+cond_true27: ; preds = %entry
+ ret void
+
+bb30: ; preds = %entry
+ %tmp67 = seteq %struct.attr_desc* null, null ; <bool> [#uses=1]
+ br bool %tmp67, label %cond_next92, label %cond_true68
+
+cond_true68: ; preds = %bb30
+ ret void
+
+cond_next92: ; preds = %bb30
+ %tmp173 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2]
+ %tmp174 = load uint* %tmp173 ; <uint> [#uses=1]
+ %tmp177 = and uint %tmp174, 4294967287 ; <uint> [#uses=1]
+ store uint %tmp177, uint* %tmp173
+ %tmp180 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=1]
+ %tmp181 = load uint* %tmp180 ; <uint> [#uses=1]
+ %tmp185 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2]
+ %tmp186 = load uint* %tmp185 ; <uint> [#uses=1]
+ %tmp183187 = shl uint %tmp181, ubyte 1 ; <uint> [#uses=1]
+ %tmp188 = and uint %tmp183187, 16 ; <uint> [#uses=1]
+ %tmp190 = and uint %tmp186, 4294967279 ; <uint> [#uses=1]
+ %tmp191 = or uint %tmp190, %tmp188 ; <uint> [#uses=1]
+ store uint %tmp191, uint* %tmp185
+ %tmp193 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=1]
+ %tmp194 = load uint* %tmp193 ; <uint> [#uses=1]
+ %tmp198 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2]
+ %tmp199 = load uint* %tmp198 ; <uint> [#uses=1]
+ %tmp196200 = shl uint %tmp194, ubyte 2 ; <uint> [#uses=1]
+ %tmp201 = and uint %tmp196200, 64 ; <uint> [#uses=1]
+ %tmp203 = and uint %tmp199, 4294967231 ; <uint> [#uses=1]
+ %tmp204 = or uint %tmp203, %tmp201 ; <uint> [#uses=1]
+ store uint %tmp204, uint* %tmp198
+ %tmp206 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=1]
+ %tmp207 = load uint* %tmp206 ; <uint> [#uses=1]
+ %tmp211 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2]
+ %tmp212 = load uint* %tmp211 ; <uint> [#uses=1]
+ %tmp209213 = shl uint %tmp207, ubyte 1 ; <uint> [#uses=1]
+ %tmp214 = and uint %tmp209213, 128 ; <uint> [#uses=1]
+ %tmp216 = and uint %tmp212, 4294967167 ; <uint> [#uses=1]
+ %tmp217 = or uint %tmp216, %tmp214 ; <uint> [#uses=1]
+ store uint %tmp217, uint* %tmp211
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
new file mode 100644
index 0000000..1026072
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc64-apple-darwin | grep extsw | wc -l | grep 2
+
+%lens = external global ubyte*
+%vals = external global int*
+
+int %test(int %i) {
+ %tmp = load ubyte** %lens
+ %tmp1 = getelementptr ubyte* %tmp, int %i
+ %tmp = load ubyte* %tmp1
+ %tmp2 = cast ubyte %tmp to int
+ %tmp3 = load int** %vals
+ %tmp5 = sub int 1, %tmp2
+ %tmp6 = getelementptr int* %tmp3, int %tmp5
+ %tmp7 = load int* %tmp6
+ ret int %tmp7
+}
diff --git a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
new file mode 100644
index 0000000..d71ba5a
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+
+void %img2buf(int %symbol_size_in_bytes, ushort* %ui16) {
+ %tmp93 = load ushort* null ; <ushort> [#uses=1]
+ %tmp99 = call ushort %llvm.bswap.i16( ushort %tmp93 ) ; <ushort> [#uses=1]
+ store ushort %tmp99, ushort* %ui16
+ ret void
+}
+
+declare ushort %llvm.bswap.i16(ushort)
diff --git a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
new file mode 100644
index 0000000..cf0cd2c
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vsldoi
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vor
+
+<4 x float> %func(<4 x float> %fp0, <4 x float> %fp1) {
+ %tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x uint> < uint 0, uint 1, uint 2, uint 7 > ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp76
+}
+
diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
new file mode 100644
index 0000000..287a79d
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc
+
+ %struct..0anon = type { int }
+ %struct.rtx_def = type { ushort, ubyte, ubyte, [1 x %struct..0anon] }
+
+implementation ; Functions:
+
+fastcc void %immed_double_const(int %i0, int %i1) {
+entry:
+ %tmp1 = load uint* null ; <uint> [#uses=1]
+ switch uint %tmp1, label %bb103 [
+ uint 1, label %bb
+ uint 3, label %bb
+ ]
+
+bb: ; preds = %entry, %entry
+ %tmp14 = setgt int 0, 31 ; <bool> [#uses=1]
+ br bool %tmp14, label %cond_next77, label %cond_next17
+
+cond_next17: ; preds = %bb
+ ret void
+
+cond_next77: ; preds = %bb
+ %tmp79.not = setne int %i1, 0 ; <bool> [#uses=1]
+ %tmp84 = setlt int %i0, 0 ; <bool> [#uses=2]
+ %bothcond1 = or bool %tmp79.not, %tmp84 ; <bool> [#uses=1]
+ br bool %bothcond1, label %bb88, label %bb99
+
+bb88: ; preds = %cond_next77
+ %bothcond2 = and bool false, %tmp84 ; <bool> [#uses=0]
+ ret void
+
+bb99: ; preds = %cond_next77
+ ret void
+
+bb103: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
new file mode 100644
index 0000000..58d1f26
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64
+target endian = big
+target pointersize = 64
+target triple = "powerpc64-apple-darwin8"
+
+implementation ; Functions:
+
+void %glArrayElement_CompExec() {
+entry:
+ %tmp3 = and ulong 0, 18446744073701163007 ; <ulong> [#uses=1]
+ br label %cond_true24
+
+cond_false: ; preds = %cond_true24
+ ret void
+
+cond_true24: ; preds = %cond_true24, %entry
+ %indvar.ph = phi uint [ 0, %entry ], [ %indvar.next, %cond_true24 ] ; <uint> [#uses=1]
+ %indvar = add uint 0, %indvar.ph ; <uint> [#uses=2]
+ %code.0 = cast uint %indvar to ubyte ; <ubyte> [#uses=1]
+ %tmp5 = add ubyte %code.0, 16 ; <ubyte> [#uses=1]
+ %tmp7 = shr ulong %tmp3, ubyte %tmp5 ; <ulong> [#uses=1]
+ %tmp7 = cast ulong %tmp7 to int ; <int> [#uses=1]
+ %tmp8 = and int %tmp7, 1 ; <int> [#uses=1]
+ %tmp8 = seteq int %tmp8, 0 ; <bool> [#uses=1]
+ %indvar.next = add uint %indvar, 1 ; <uint> [#uses=1]
+ br bool %tmp8, label %cond_false, label %cond_true24
+}
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
new file mode 100644
index 0000000..992e52a
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -combiner-alias-analysis | grep f5
+
+target endian = big
+target pointersize = 32
+target triple = "powerpc-apple-darwin8.2.0"
+ %struct.Point = type { double, double, double }
+
+implementation ; Functions:
+
+void %offset(%struct.Point* %pt, double %x, double %y, double %z) {
+entry:
+ %tmp = getelementptr %struct.Point* %pt, int 0, uint 0 ; <double*> [#uses=2]
+ %tmp = load double* %tmp ; <double> [#uses=1]
+ %tmp2 = add double %tmp, %x ; <double> [#uses=1]
+ store double %tmp2, double* %tmp
+ %tmp6 = getelementptr %struct.Point* %pt, int 0, uint 1 ; <double*> [#uses=2]
+ %tmp7 = load double* %tmp6 ; <double> [#uses=1]
+ %tmp9 = add double %tmp7, %y ; <double> [#uses=1]
+ store double %tmp9, double* %tmp6
+ %tmp13 = getelementptr %struct.Point* %pt, int 0, uint 2 ; <double*> [#uses=2]
+ %tmp14 = load double* %tmp13 ; <double> [#uses=1]
+ %tmp16 = add double %tmp14, %z ; <double> [#uses=1]
+ store double %tmp16, double* %tmp13
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
new file mode 100644
index 0000000..95b5312
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep IMPLICIT_DEF
+
+void %foo(long %X) {
+entry:
+ %tmp1 = and long %X, 3 ; <long> [#uses=1]
+ %tmp = setgt long %tmp1, 2 ; <bool> [#uses=1]
+ br bool %tmp, label %UnifiedReturnBlock, label %cond_true
+
+cond_true: ; preds = %entry
+ %tmp = tail call int (...)* %bar( ) ; <int> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
+declare int %bar(...)
+
diff --git a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
new file mode 100644
index 0000000..397ada7
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep xor
+
+target endian = big
+target pointersize = 32
+target triple = "powerpc-apple-darwin8.7.0"
+
+implementation ; Functions:
+
+void %foo(int %X) {
+entry:
+ %tmp1 = and int %X, 3 ; <int> [#uses=1]
+ %tmp2 = xor int %tmp1, 1
+ %tmp = seteq int %tmp2, 0 ; <bool> [#uses=1]
+ br bool %tmp, label %UnifiedReturnBlock, label %cond_true
+
+cond_true: ; preds = %entry
+ tail call int (...)* %bar( ) ; <int> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
+declare int %bar(...)
diff --git a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
new file mode 100644
index 0000000..c981c26
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64
+
+int * %foo(uint %n) {
+ %A = alloca int, uint %n
+ ret int* %A
+}
diff --git a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
new file mode 100644
index 0000000..a5476eb
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi
+
+void %test(short %div.0.i.i.i.i, int %L_num.0.i.i.i.i, int %tmp1.i.i206.i.i, short* %P) {
+ %X = shl short %div.0.i.i.i.i, ubyte 1 ; <short> [#uses=1]
+ %tmp28.i.i.i.i = shl int %L_num.0.i.i.i.i, ubyte 1 ; <int> [#uses=2]
+ %tmp31.i.i.i.i = setlt int %tmp28.i.i.i.i, %tmp1.i.i206.i.i ; <bool> [#uses=2]
+
+ %tmp31.i.i.i.i = cast bool %tmp31.i.i.i.i to short ; <short> [#uses=1]
+ %tmp371.i.i.i.i1 = or short %tmp31.i.i.i.i, %X ; <short> [#uses=1]
+ %div.0.be.i.i.i.i = xor short %tmp371.i.i.i.i1, 1 ; <short> [#uses=1]
+ store short %div.0.be.i.i.i.i, short* %P
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
new file mode 100644
index 0000000..0411eb5
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5
+
+void %glgRunProcessor15() {
+ %tmp26355.i = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000 >, <4 x uint> < uint 0, uint 1, uint 2, uint 7 > ; <<4 x float>> [#uses=1]
+ %tmp3030030304.i = cast <4 x float> %tmp26355.i to <8 x short> ; <<8 x short>> [#uses=1]
+ %tmp30305.i = shufflevector <8 x short> zeroinitializer, <8 x short> %tmp3030030304.i, <8 x uint> < uint 1, uint 3, uint 5, uint 7, uint 9, uint 11, uint 13, uint 15 > ; <<8 x short>> [#uses=1]
+ %tmp30305.i = cast <8 x short> %tmp30305.i to <4 x int> ; <<4 x int>> [#uses=1]
+ store <4 x int> %tmp30305.i, <4 x int>* null
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
new file mode 100644
index 0000000..f6103e5
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+; RUN: llvm-upgrade < %s | llvm-as | llc
+
+void %bitap() {
+entry:
+ %RMask.i = alloca [256 x uint], align 16 ; <[256 x uint]*> [#uses=1]
+ %buffer = alloca [147456 x sbyte], align 16 ; <[147456 x sbyte]*> [#uses=0]
+ br bool false, label %bb19, label %bb.preheader
+
+bb.preheader: ; preds = %entry
+ ret void
+
+bb19: ; preds = %entry
+ br bool false, label %bb12.i, label %cond_next39
+
+bb12.i: ; preds = %bb12.i, %bb19
+ %i.0.i = phi uint [ %tmp11.i, %bb12.i ], [ 0, %bb19 ] ; <uint> [#uses=2]
+ %tmp9.i = getelementptr [256 x uint]* %RMask.i, int 0, uint %i.0.i ; <uint*> [#uses=1]
+ store uint 0, uint* %tmp9.i
+ %tmp11.i = add uint %i.0.i, 1 ; <uint> [#uses=1]
+ br label %bb12.i
+
+cond_next39: ; preds = %bb19
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
new file mode 100644
index 0000000..6fa410e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32
+; RUN: llvm-upgrade < %s | llvm-as | llc
+
+%qsz.b = external global bool ; <bool*> [#uses=1]
+
+implementation ; Functions:
+
+fastcc void %qst() {
+entry:
+ br bool true, label %cond_next71, label %cond_true
+
+cond_true: ; preds = %entry
+ ret void
+
+cond_next71: ; preds = %entry
+ %tmp73.b = load bool* %qsz.b ; <bool> [#uses=1]
+ %ii.4.ph = select bool %tmp73.b, ulong 4, ulong 0 ; <ulong> [#uses=1]
+ br label %bb139
+
+bb82: ; preds = %bb139
+ ret void
+
+bb139: ; preds = %bb139, %cond_next71
+ %exitcond89 = seteq ulong 0, %ii.4.ph ; <bool> [#uses=1]
+ br bool %exitcond89, label %bb82, label %bb139
+}
diff --git a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
new file mode 100644
index 0000000..19fedf9
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | grep extsb
+; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh
+
+define i32 @p1(i8 %c, i16 %s) {
+entry:
+ %tmp = sext i8 %c to i32 ; <i32> [#uses=1]
+ %tmp1 = sext i16 %s to i32 ; <i32> [#uses=1]
+ %tmp2 = add i32 %tmp1, %tmp ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
new file mode 100644
index 0000000..d9374ed
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep cntlzw
+
+define i32 @foo() {
+entry:
+ %retval = alloca i32, align 4 ; <i32*> [#uses=2]
+ %temp = alloca i32, align 4 ; <i32*> [#uses=2]
+ %ctz_x = alloca i32, align 4 ; <i32*> [#uses=3]
+ %ctz_c = alloca i32, align 4 ; <i32*> [#uses=2]
+ "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 61440, i32* %ctz_x
+ %tmp = load i32* %ctz_x ; <i32> [#uses=1]
+ %tmp1 = sub i32 0, %tmp ; <i32> [#uses=1]
+ %tmp2 = load i32* %ctz_x ; <i32> [#uses=1]
+ %tmp3 = and i32 %tmp1, %tmp2 ; <i32> [#uses=1]
+ %tmp4 = call i32 asm "$(cntlz$|cntlzw$) $0,$1", "=r,r,~{dirflag},~{fpsr},~{flags}"( i32 %tmp3 ) ; <i32> [#uses=1]
+ store i32 %tmp4, i32* %ctz_c
+ %tmp5 = load i32* %ctz_c ; <i32> [#uses=1]
+ store i32 %tmp5, i32* %temp
+ %tmp6 = load i32* %temp ; <i32> [#uses=1]
+ store i32 %tmp6, i32* %retval
+ br label %return
+
+return: ; preds = %entry
+ %retval2 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %retval2
+}
diff --git a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
new file mode 100644
index 0000000..f2c951e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llvm-as < %s | llc -march=ppc64
+
+define i16 @test(i8* %d1, i16* %d2) {
+ %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 ) ; <i16> [#uses=1]
+ ret i16 %tmp237
+}
diff --git a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
new file mode 100644
index 0000000..d476462
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llvm-as < %s | llc -march=ppc64
+
+; Test two things: 1) that a frameidx can be rewritten in an inline asm
+; 2) that inline asms can handle reg+imm addr modes.
+
+ %struct.A = type { i32, i32 }
+
+
+define void @test1() {
+entry:
+ %Out = alloca %struct.A, align 4 ; <%struct.A*> [#uses=1]
+ %tmp2 = getelementptr %struct.A* %Out, i32 0, i32 1
+ %tmp5 = call i32 asm "lwbrx $0, $1", "=r,m"(i32* %tmp2 )
+ ret void
+}
+
+define void @test2() {
+entry:
+ %Out = alloca %struct.A, align 4 ; <%struct.A*> [#uses=1]
+ %tmp2 = getelementptr %struct.A* %Out, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp5 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,m"( i8* null, i32 0, i32* %tmp2 ) ; <i32> [#uses=0]
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
new file mode 100644
index 0000000..97f6a01
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
+; RUN: grep align.*3
+
+@X = global <{i32, i32}> <{ i32 1, i32 123 }>
diff --git a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
new file mode 100644
index 0000000..5a3d3b5
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.8.0"
+
+
+define void @blargh() {
+entry:
+ %tmp4 = call i32 asm "rlwimi $0,$2,$3,$4,$5", "=r,0,r,n,n,n"( i32 0, i32 0, i32 0, i32 24, i32 31 ) ; <i32> [#uses=0]
+ unreachable
+}
diff --git a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
new file mode 100644
index 0000000..3a7d393
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc | grep mflr | wc -l | grep 1
+
+target datalayout = "e-p:32:32"
+target triple = "powerpc-apple-darwin8"
+@str = internal constant [18 x i8] c"hello world!, %d\0A\00" ; <[18 x i8]*> [#uses=1]
+
+
+define i32 @main() {
+entry:
+ %tmp = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @str, i32 0, i32 0) ) ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
new file mode 100644
index 0000000..1ea6174
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=ppc64 -mcpu=g5 | grep cntlzd
+
+define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) {
+ %tmp19 = load i64* %t
+ %tmp23 = tail call i32 @llvm.ctlz.i64( i64 %tmp19 ) ; <i64> [#uses=1]
+ %tmp89 = add i32 %tmp23, -64 ; <i32> [#uses=1]
+ %tmp90 = add i32 %tmp89, 0 ; <i32> [#uses=1]
+ ret i32 %tmp90
+}
+
+declare i32 @llvm.ctlz.i64(i64)
diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
new file mode 100644
index 0000000..04ca3bb
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -0,0 +1,1801 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5
+
+define void @test(<4 x float>*, { { i16, i16, i32 } }*) {
+xOperationInitMasks.exit:
+ %.sub7896 = getelementptr [4 x <4 x i32>]* null, i32 0, i32 0 ; <<4 x i32>*> [#uses=24]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 175, i32 3 ; <<4 x float>*>:2 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 2 ; <<4 x float>*>:3 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 3 ; <<4 x float>*>:4 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 1 ; <<4 x float>*>:5 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 2 ; <<4 x float>*>:6 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 3 ; <<4 x float>*>:7 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 1 ; <<4 x float>*>:8 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 2 ; <<4 x float>*>:9 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 3 ; <<4 x float>*>:10 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 1 ; <<4 x float>*>:11 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 2 ; <<4 x float>*>:12 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 3 ; <<4 x float>*>:13 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 1 ; <<4 x float>*>:14 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 2 ; <<4 x float>*>:15 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 3 ; <<4 x float>*>:16 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 1 ; <<4 x float>*>:17 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 2 ; <<4 x float>*>:18 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 3 ; <<4 x float>*>:19 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 1 ; <<4 x float>*>:20 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 2 ; <<4 x float>*>:21 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 3 ; <<4 x float>*>:22 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 1 ; <<4 x float>*>:23 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 2 ; <<4 x float>*>:24 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 3 ; <<4 x float>*>:25 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 1 ; <<4 x float>*>:26 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 2 ; <<4 x float>*>:27 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 3 ; <<4 x float>*>:28 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 1 ; <<4 x float>*>:29 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 2 ; <<4 x float>*>:30 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 3 ; <<4 x float>*>:31 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 1 ; <<4 x float>*>:32 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 2 ; <<4 x float>*>:33 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 3 ; <<4 x float>*>:34 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 1 ; <<4 x float>*>:35 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 2 ; <<4 x float>*>:36 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 3 ; <<4 x float>*>:37 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 1 ; <<4 x float>*>:38 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 2 ; <<4 x float>*>:39 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 3 ; <<4 x float>*>:40 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 1 ; <<4 x float>*>:41 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 2 ; <<4 x float>*>:42 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 3 ; <<4 x float>*>:43 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 1 ; <<4 x float>*>:44 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 2 ; <<4 x float>*>:45 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 3 ; <<4 x float>*>:46 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 1 ; <<4 x float>*>:47 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 2 ; <<4 x float>*>:48 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 3 ; <<4 x float>*>:49 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 1 ; <<4 x float>*>:50 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 2 ; <<4 x float>*>:51 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 3 ; <<4 x float>*>:52 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 1 ; <<4 x float>*>:53 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 2 ; <<4 x float>*>:54 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 3 ; <<4 x float>*>:55 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 1 ; <<4 x float>*>:56 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 2 ; <<4 x float>*>:57 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 3 ; <<4 x float>*>:58 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 1 ; <<4 x float>*>:59 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 2 ; <<4 x float>*>:60 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 3 ; <<4 x float>*>:61 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 1 ; <<4 x float>*>:62 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 2 ; <<4 x float>*>:63 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 3 ; <<4 x float>*>:64 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 1 ; <<4 x float>*>:65 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 2 ; <<4 x float>*>:66 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 3 ; <<4 x float>*>:67 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 1 ; <<4 x float>*>:68 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 2 ; <<4 x float>*>:69 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 3 ; <<4 x float>*>:70 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 1 ; <<4 x float>*>:71 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 2 ; <<4 x float>*>:72 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 3 ; <<4 x float>*>:73 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 1 ; <<4 x float>*>:74 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 2 ; <<4 x float>*>:75 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 3 ; <<4 x float>*>:76 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 1 ; <<4 x float>*>:77 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 2 ; <<4 x float>*>:78 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 3 ; <<4 x float>*>:79 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 1 ; <<4 x float>*>:80 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 2 ; <<4 x float>*>:81 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 3 ; <<4 x float>*>:82 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 1 ; <<4 x float>*>:83 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 2 ; <<4 x float>*>:84 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 3 ; <<4 x float>*>:85 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 1 ; <<4 x float>*>:86 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 2 ; <<4 x float>*>:87 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 3 ; <<4 x float>*>:88 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 1 ; <<4 x float>*>:89 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 2 ; <<4 x float>*>:90 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 3 ; <<4 x float>*>:91 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 1 ; <<4 x float>*>:92 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 2 ; <<4 x float>*>:93 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 3 ; <<4 x float>*>:94 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 1 ; <<4 x float>*>:95 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 2 ; <<4 x float>*>:96 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 3 ; <<4 x float>*>:97 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 1 ; <<4 x float>*>:98 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 2 ; <<4 x float>*>:99 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 3 ; <<4 x float>*>:100 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 1 ; <<4 x float>*>:101 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 2 ; <<4 x float>*>:102 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 3 ; <<4 x float>*>:103 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 1 ; <<4 x float>*>:104 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 2 ; <<4 x float>*>:105 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 3 ; <<4 x float>*>:106 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 1 ; <<4 x float>*>:107 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 2 ; <<4 x float>*>:108 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 3 ; <<4 x float>*>:109 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 1 ; <<4 x float>*>:110 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 2 ; <<4 x float>*>:111 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 3 ; <<4 x float>*>:112 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 1 ; <<4 x float>*>:113 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 2 ; <<4 x float>*>:114 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 3 ; <<4 x float>*>:115 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 1 ; <<4 x float>*>:116 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 2 ; <<4 x float>*>:117 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 3 ; <<4 x float>*>:118 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 1 ; <<4 x float>*>:119 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 2 ; <<4 x float>*>:120 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 3 ; <<4 x float>*>:121 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 1 ; <<4 x float>*>:122 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 2 ; <<4 x float>*>:123 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 3 ; <<4 x float>*>:124 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 1 ; <<4 x float>*>:125 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 2 ; <<4 x float>*>:126 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 3 ; <<4 x float>*>:127 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 1 ; <<4 x float>*>:128 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 2 ; <<4 x float>*>:129 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 3 ; <<4 x float>*>:130 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 1 ; <<4 x float>*>:131 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 2 ; <<4 x float>*>:132 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 3 ; <<4 x float>*>:133 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 1 ; <<4 x float>*>:134 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 2 ; <<4 x float>*>:135 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 3 ; <<4 x float>*>:136 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 1 ; <<4 x float>*>:137 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 2 ; <<4 x float>*>:138 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 3 ; <<4 x float>*>:139 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 1 ; <<4 x float>*>:140 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 2 ; <<4 x float>*>:141 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 3 ; <<4 x float>*>:142 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 1 ; <<4 x float>*>:143 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 2 ; <<4 x float>*>:144 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 3 ; <<4 x float>*>:145 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 1 ; <<4 x float>*>:146 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 2 ; <<4 x float>*>:147 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 3 ; <<4 x float>*>:148 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 1 ; <<4 x float>*>:149 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 2 ; <<4 x float>*>:150 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 3 ; <<4 x float>*>:151 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 1 ; <<4 x float>*>:152 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 2 ; <<4 x float>*>:153 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 3 ; <<4 x float>*>:154 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 1 ; <<4 x float>*>:155 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 2 ; <<4 x float>*>:156 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 3 ; <<4 x float>*>:157 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 1 ; <<4 x float>*>:158 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 2 ; <<4 x float>*>:159 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 3 ; <<4 x float>*>:160 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 1 ; <<4 x float>*>:161 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 2 ; <<4 x float>*>:162 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 3 ; <<4 x float>*>:163 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 1 ; <<4 x float>*>:164 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 2 ; <<4 x float>*>:165 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 3 ; <<4 x float>*>:166 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 1 ; <<4 x float>*>:167 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 2 ; <<4 x float>*>:168 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 3 ; <<4 x float>*>:169 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 1 ; <<4 x float>*>:170 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 2 ; <<4 x float>*>:171 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 3 ; <<4 x float>*>:172 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 1 ; <<4 x float>*>:173 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 2 ; <<4 x float>*>:174 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 3 ; <<4 x float>*>:175 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 1 ; <<4 x float>*>:176 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 2 ; <<4 x float>*>:177 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 3 ; <<4 x float>*>:178 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 1 ; <<4 x float>*>:179 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 2 ; <<4 x float>*>:180 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 3 ; <<4 x float>*>:181 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 1 ; <<4 x float>*>:182 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 2 ; <<4 x float>*>:183 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 3 ; <<4 x float>*>:184 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 1 ; <<4 x float>*>:185 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 2 ; <<4 x float>*>:186 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 3 ; <<4 x float>*>:187 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 1 ; <<4 x float>*>:188 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 2 ; <<4 x float>*>:189 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 3 ; <<4 x float>*>:190 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 1 ; <<4 x float>*>:191 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 2 ; <<4 x float>*>:192 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 3 ; <<4 x float>*>:193 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 1 ; <<4 x float>*>:194 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 2 ; <<4 x float>*>:195 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 3 ; <<4 x float>*>:196 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 1 ; <<4 x float>*>:197 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 2 ; <<4 x float>*>:198 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 3 ; <<4 x float>*>:199 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 1 ; <<4 x float>*>:200 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 2 ; <<4 x float>*>:201 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 3 ; <<4 x float>*>:202 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 1 ; <<4 x float>*>:203 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 2 ; <<4 x float>*>:204 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 3 ; <<4 x float>*>:205 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 1 ; <<4 x float>*>:206 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 2 ; <<4 x float>*>:207 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 3 ; <<4 x float>*>:208 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 1 ; <<4 x float>*>:209 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 2 ; <<4 x float>*>:210 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 3 ; <<4 x float>*>:211 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 1 ; <<4 x float>*>:212 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 2 ; <<4 x float>*>:213 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 3 ; <<4 x float>*>:214 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 1 ; <<4 x float>*>:215 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 2 ; <<4 x float>*>:216 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 3 ; <<4 x float>*>:217 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 1 ; <<4 x float>*>:218 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 2 ; <<4 x float>*>:219 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 3 ; <<4 x float>*>:220 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 1 ; <<4 x float>*>:221 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 2 ; <<4 x float>*>:222 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 3 ; <<4 x float>*>:223 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 1 ; <<4 x float>*>:224 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 2 ; <<4 x float>*>:225 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 3 ; <<4 x float>*>:226 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 1 ; <<4 x float>*>:227 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 2 ; <<4 x float>*>:228 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 3 ; <<4 x float>*>:229 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 1 ; <<4 x float>*>:230 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 2 ; <<4 x float>*>:231 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 3 ; <<4 x float>*>:232 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 1 ; <<4 x float>*>:233 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 2 ; <<4 x float>*>:234 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 3 ; <<4 x float>*>:235 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 1 ; <<4 x float>*>:236 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 2 ; <<4 x float>*>:237 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 3 ; <<4 x float>*>:238 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 1 ; <<4 x float>*>:239 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 2 ; <<4 x float>*>:240 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 3 ; <<4 x float>*>:241 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 1 ; <<4 x float>*>:242 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 2 ; <<4 x float>*>:243 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 3 ; <<4 x float>*>:244 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 1 ; <<4 x float>*>:245 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 2 ; <<4 x float>*>:246 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 3 ; <<4 x float>*>:247 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 1 ; <<4 x float>*>:248 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 2 ; <<4 x float>*>:249 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 3 ; <<4 x float>*>:250 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 1 ; <<4 x float>*>:251 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 2 ; <<4 x float>*>:252 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 3 ; <<4 x float>*>:253 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 1 ; <<4 x float>*>:254 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 2 ; <<4 x float>*>:255 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 3 ; <<4 x float>*>:256 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 1 ; <<4 x float>*>:257 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 2 ; <<4 x float>*>:258 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 3 ; <<4 x float>*>:259 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 1 ; <<4 x float>*>:260 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 2 ; <<4 x float>*>:261 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 3 ; <<4 x float>*>:262 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 1 ; <<4 x float>*>:263 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 2 ; <<4 x float>*>:264 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 3 ; <<4 x float>*>:265 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 1 ; <<4 x float>*>:266 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 2 ; <<4 x float>*>:267 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 3 ; <<4 x float>*>:268 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 1 ; <<4 x float>*>:269 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 2 ; <<4 x float>*>:270 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 3 ; <<4 x float>*>:271 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 1 ; <<4 x float>*>:272 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 2 ; <<4 x float>*>:273 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 3 ; <<4 x float>*>:274 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 1 ; <<4 x float>*>:275 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 2 ; <<4 x float>*>:276 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 3 ; <<4 x float>*>:277 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 1 ; <<4 x float>*>:278 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 2 ; <<4 x float>*>:279 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 3 ; <<4 x float>*>:280 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 1 ; <<4 x float>*>:281 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 2 ; <<4 x float>*>:282 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 3 ; <<4 x float>*>:283 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 1 ; <<4 x float>*>:284 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 2 ; <<4 x float>*>:285 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 3 ; <<4 x float>*>:286 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 1 ; <<4 x float>*>:287 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 2 ; <<4 x float>*>:288 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 3 ; <<4 x float>*>:289 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 1 ; <<4 x float>*>:290 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 2 ; <<4 x float>*>:291 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 3 ; <<4 x float>*>:292 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 1 ; <<4 x float>*>:293 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 2 ; <<4 x float>*>:294 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 3 ; <<4 x float>*>:295 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 1 ; <<4 x float>*>:296 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 2 ; <<4 x float>*>:297 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 3 ; <<4 x float>*>:298 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 1 ; <<4 x float>*>:299 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 2 ; <<4 x float>*>:300 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 3 ; <<4 x float>*>:301 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 1 ; <<4 x float>*>:302 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 2 ; <<4 x float>*>:303 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 3 ; <<4 x float>*>:304 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 1 ; <<4 x float>*>:305 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 2 ; <<4 x float>*>:306 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 3 ; <<4 x float>*>:307 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 1 ; <<4 x float>*>:308 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 2 ; <<4 x float>*>:309 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 3 ; <<4 x float>*>:310 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 1 ; <<4 x float>*>:311 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 2 ; <<4 x float>*>:312 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 3 ; <<4 x float>*>:313 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 1 ; <<4 x float>*>:314 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 2 ; <<4 x float>*>:315 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 3 ; <<4 x float>*>:316 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 1 ; <<4 x float>*>:317 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 2 ; <<4 x float>*>:318 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 3 ; <<4 x float>*>:319 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 1 ; <<4 x float>*>:320 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 2 ; <<4 x float>*>:321 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 3 ; <<4 x float>*>:322 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 1 ; <<4 x float>*>:323 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 2 ; <<4 x float>*>:324 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 3 ; <<4 x float>*>:325 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 1 ; <<4 x float>*>:326 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 2 ; <<4 x float>*>:327 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 3 ; <<4 x float>*>:328 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 1 ; <<4 x float>*>:329 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 2 ; <<4 x float>*>:330 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 3 ; <<4 x float>*>:331 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 1 ; <<4 x float>*>:332 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 2 ; <<4 x float>*>:333 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 3 ; <<4 x float>*>:334 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 1 ; <<4 x float>*>:335 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 2 ; <<4 x float>*>:336 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 3 ; <<4 x float>*>:337 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 1 ; <<4 x float>*>:338 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 2 ; <<4 x float>*>:339 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 3 ; <<4 x float>*>:340 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 1 ; <<4 x float>*>:341 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 2 ; <<4 x float>*>:342 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 3 ; <<4 x float>*>:343 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 1 ; <<4 x float>*>:344 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 2 ; <<4 x float>*>:345 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 3 ; <<4 x float>*>:346 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 1 ; <<4 x float>*>:347 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 2 ; <<4 x float>*>:348 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 3 ; <<4 x float>*>:349 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 1 ; <<4 x float>*>:350 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 2 ; <<4 x float>*>:351 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 3 ; <<4 x float>*>:352 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 1 ; <<4 x float>*>:353 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 2 ; <<4 x float>*>:354 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 3 ; <<4 x float>*>:355 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 1 ; <<4 x float>*>:356 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 2 ; <<4 x float>*>:357 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 3 ; <<4 x float>*>:358 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 1 ; <<4 x float>*>:359 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 2 ; <<4 x float>*>:360 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 3 ; <<4 x float>*>:361 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 1 ; <<4 x float>*>:362 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 2 ; <<4 x float>*>:363 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 3 ; <<4 x float>*>:364 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 1 ; <<4 x float>*>:365 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 2 ; <<4 x float>*>:366 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 3 ; <<4 x float>*>:367 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 1 ; <<4 x float>*>:368 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 2 ; <<4 x float>*>:369 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 3 ; <<4 x float>*>:370 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 1 ; <<4 x float>*>:371 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 2 ; <<4 x float>*>:372 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 3 ; <<4 x float>*>:373 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 1 ; <<4 x float>*>:374 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 2 ; <<4 x float>*>:375 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 3 ; <<4 x float>*>:376 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 1 ; <<4 x float>*>:377 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 2 ; <<4 x float>*>:378 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 3 ; <<4 x float>*>:379 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 1 ; <<4 x float>*>:380 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 2 ; <<4 x float>*>:381 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 3 ; <<4 x float>*>:382 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 1 ; <<4 x float>*>:383 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 2 ; <<4 x float>*>:384 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 3 ; <<4 x float>*>:385 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 1 ; <<4 x float>*>:386 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 2 ; <<4 x float>*>:387 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 3 ; <<4 x float>*>:388 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 1 ; <<4 x float>*>:389 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 2 ; <<4 x float>*>:390 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 3 ; <<4 x float>*>:391 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 1 ; <<4 x float>*>:392 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 2 ; <<4 x float>*>:393 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 3 ; <<4 x float>*>:394 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 1 ; <<4 x float>*>:395 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 2 ; <<4 x float>*>:396 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 3 ; <<4 x float>*>:397 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 1 ; <<4 x float>*>:398 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 2 ; <<4 x float>*>:399 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 3 ; <<4 x float>*>:400 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 1 ; <<4 x float>*>:401 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 2 ; <<4 x float>*>:402 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 3 ; <<4 x float>*>:403 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 1 ; <<4 x float>*>:404 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 2 ; <<4 x float>*>:405 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 3 ; <<4 x float>*>:406 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 1 ; <<4 x float>*>:407 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 2 ; <<4 x float>*>:408 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 3 ; <<4 x float>*>:409 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 1 ; <<4 x float>*>:410 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 2 ; <<4 x float>*>:411 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 3 ; <<4 x float>*>:412 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 1 ; <<4 x float>*>:413 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 2 ; <<4 x float>*>:414 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 3 ; <<4 x float>*>:415 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 1 ; <<4 x float>*>:416 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 2 ; <<4 x float>*>:417 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 3 ; <<4 x float>*>:418 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 1 ; <<4 x float>*>:419 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 2 ; <<4 x float>*>:420 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 3 ; <<4 x float>*>:421 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 1 ; <<4 x float>*>:422 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 2 ; <<4 x float>*>:423 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 3 ; <<4 x float>*>:424 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 1 ; <<4 x float>*>:425 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 2 ; <<4 x float>*>:426 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 3 ; <<4 x float>*>:427 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 1 ; <<4 x float>*>:428 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 2 ; <<4 x float>*>:429 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 3 ; <<4 x float>*>:430 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 1 ; <<4 x float>*>:431 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 2 ; <<4 x float>*>:432 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 3 ; <<4 x float>*>:433 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 1 ; <<4 x float>*>:434 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 2 ; <<4 x float>*>:435 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 3 ; <<4 x float>*>:436 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 1 ; <<4 x float>*>:437 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 2 ; <<4 x float>*>:438 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 3 ; <<4 x float>*>:439 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 1 ; <<4 x float>*>:440 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 2 ; <<4 x float>*>:441 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 3 ; <<4 x float>*>:442 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 1 ; <<4 x float>*>:443 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 2 ; <<4 x float>*>:444 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 3 ; <<4 x float>*>:445 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 1 ; <<4 x float>*>:446 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 2 ; <<4 x float>*>:447 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 3 ; <<4 x float>*>:448 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 1 ; <<4 x float>*>:449 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 2 ; <<4 x float>*>:450 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 3 ; <<4 x float>*>:451 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 1 ; <<4 x float>*>:452 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 2 ; <<4 x float>*>:453 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 3 ; <<4 x float>*>:454 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 1 ; <<4 x float>*>:455 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 2 ; <<4 x float>*>:456 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 3 ; <<4 x float>*>:457 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 1 ; <<4 x float>*>:458 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 2 ; <<4 x float>*>:459 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 3 ; <<4 x float>*>:460 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 1 ; <<4 x float>*>:461 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 2 ; <<4 x float>*>:462 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 3 ; <<4 x float>*>:463 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 1 ; <<4 x float>*>:464 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 2 ; <<4 x float>*>:465 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 3 ; <<4 x float>*>:466 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 1 ; <<4 x float>*>:467 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 2 ; <<4 x float>*>:468 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 3 ; <<4 x float>*>:469 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 1 ; <<4 x float>*>:470 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 2 ; <<4 x float>*>:471 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 3 ; <<4 x float>*>:472 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 1 ; <<4 x float>*>:473 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 2 ; <<4 x float>*>:474 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 3 ; <<4 x float>*>:475 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 1 ; <<4 x float>*>:476 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 2 ; <<4 x float>*>:477 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 3 ; <<4 x float>*>:478 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 1 ; <<4 x float>*>:479 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 2 ; <<4 x float>*>:480 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 3 ; <<4 x float>*>:481 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 1 ; <<4 x float>*>:482 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 2 ; <<4 x float>*>:483 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 3 ; <<4 x float>*>:484 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:485 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:486 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:487 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:488 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:489 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:490 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 1 ; <<4 x float>*>:491 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 2 ; <<4 x float>*>:492 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 3 ; <<4 x float>*>:493 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 1 ; <<4 x float>*>:494 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 2 ; <<4 x float>*>:495 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 3 ; <<4 x float>*>:496 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 1 ; <<4 x float>*>:497 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 2 ; <<4 x float>*>:498 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 3 ; <<4 x float>*>:499 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 1 ; <<4 x float>*>:500 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 2 ; <<4 x float>*>:501 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 3 ; <<4 x float>*>:502 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 1 ; <<4 x float>*>:503 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 2 ; <<4 x float>*>:504 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 3 ; <<4 x float>*>:505 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 1 ; <<4 x float>*>:506 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 2 ; <<4 x float>*>:507 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 3 ; <<4 x float>*>:508 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 1 ; <<4 x float>*>:509 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 2 ; <<4 x float>*>:510 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 3 ; <<4 x float>*>:511 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 1 ; <<4 x float>*>:512 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 2 ; <<4 x float>*>:513 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 3 ; <<4 x float>*>:514 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 1 ; <<4 x float>*>:515 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 2 ; <<4 x float>*>:516 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 3 ; <<4 x float>*>:517 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 1 ; <<4 x float>*>:518 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 2 ; <<4 x float>*>:519 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 3 ; <<4 x float>*>:520 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 1 ; <<4 x float>*>:521 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 2 ; <<4 x float>*>:522 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 3 ; <<4 x float>*>:523 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 1 ; <<4 x float>*>:524 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 2 ; <<4 x float>*>:525 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 3 ; <<4 x float>*>:526 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:527 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:528 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:529 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:530 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:531 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:532 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:533 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:534 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:535 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 1 ; <<4 x float>*>:536 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 2 ; <<4 x float>*>:537 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 3 ; <<4 x float>*>:538 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 1 ; <<4 x float>*>:539 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 2 ; <<4 x float>*>:540 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 3 ; <<4 x float>*>:541 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:542 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:543 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:544 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 1 ; <<4 x float>*>:545 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 2 ; <<4 x float>*>:546 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 3 ; <<4 x float>*>:547 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 1 ; <<4 x float>*>:548 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 2 ; <<4 x float>*>:549 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 3 ; <<4 x float>*>:550 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:551 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:552 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:553 [#uses=1]
+ load <4 x float>* %553 ; <<4 x float>>:554 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 3 ; <<4 x float>*>:555 [#uses=0]
+ shufflevector <4 x float> %554, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:556 [#uses=1]
+ call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> zeroinitializer, <4 x float> %556 ) ; <<4 x i32>>:557 [#uses=0]
+ bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:558 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:559 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:560 [#uses=1]
+ store <4 x float> zeroinitializer, <4 x float>* %560
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:561 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:562 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:563 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:564 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:565 [#uses=1]
+ store <4 x float> %565, <4 x float>* null
+ icmp eq i32 0, 0 ; <i1>:566 [#uses=1]
+ br i1 %566, label %.critedge, label %xPIF.exit
+
+.critedge: ; preds = %xOperationInitMasks.exit
+ getelementptr [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:567 [#uses=0]
+ and <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:568 [#uses=0]
+ or <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:569 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:570 [#uses=1]
+ br i1 %570, label %.critedge7898, label %xPBRK.exit
+
+.critedge7898: ; preds = %.critedge
+ br label %xPIF.exit
+
+xPIF.exit: ; preds = %.critedge7898, %xOperationInitMasks.exit
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:571 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:572 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:573 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:574 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:575 [#uses=0]
+ load <4 x float>* %0 ; <<4 x float>>:576 [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:577 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 0 ; <<4 x float>*>:578 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:579 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:580 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:581 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:582 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:583 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:584 [#uses=1]
+ load <4 x float>* %584 ; <<4 x float>>:585 [#uses=1]
+ load <4 x float>* null ; <<4 x float>>:586 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:587 [#uses=1]
+ load <4 x float>* %587 ; <<4 x float>>:588 [#uses=1]
+ shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:589 [#uses=1]
+ shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:590 [#uses=1]
+ shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:591 [#uses=1]
+ mul <4 x float> zeroinitializer, %589 ; <<4 x float>>:592 [#uses=0]
+ mul <4 x float> zeroinitializer, %590 ; <<4 x float>>:593 [#uses=0]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:594 [#uses=1]
+ mul <4 x float> zeroinitializer, %591 ; <<4 x float>>:595 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:596 [#uses=2]
+ load <4 x float>* %596 ; <<4 x float>>:597 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %596
+ load <4 x float>* null ; <<4 x float>>:598 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:599 [#uses=0]
+ shufflevector <4 x float> %594, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:600 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:601 [#uses=2]
+ load <4 x float>* %601 ; <<4 x float>>:602 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %601
+ load <4 x float>* null ; <<4 x float>>:603 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:604 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:605 [#uses=1]
+ load <4 x float>* %605 ; <<4 x float>>:606 [#uses=1]
+ sub <4 x float> zeroinitializer, %604 ; <<4 x float>>:607 [#uses=2]
+ sub <4 x float> zeroinitializer, %606 ; <<4 x float>>:608 [#uses=2]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:609 [#uses=0]
+ br i1 false, label %617, label %610
+
+; <label>:610 ; preds = %xPIF.exit
+ load <4 x float>* null ; <<4 x float>>:611 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:612 [#uses=2]
+ load <4 x float>* %612 ; <<4 x float>>:613 [#uses=1]
+ shufflevector <4 x float> %607, <4 x float> %613, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:614 [#uses=1]
+ store <4 x float> %614, <4 x float>* %612
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:615 [#uses=2]
+ load <4 x float>* %615 ; <<4 x float>>:616 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %615
+ br label %xST.exit400
+
+; <label>:617 ; preds = %xPIF.exit
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:618 [#uses=0]
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:619 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %619, <4 x i32> zeroinitializer ) ; <i32>:620 [#uses=1]
+ icmp eq i32 %620, 0 ; <i1>:621 [#uses=1]
+ br i1 %621, label %625, label %622
+
+; <label>:622 ; preds = %617
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:623 [#uses=0]
+ shufflevector <4 x float> %607, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:624 [#uses=0]
+ br label %625
+
+; <label>:625 ; preds = %622, %617
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:626 [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:627 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:628 [#uses=1]
+ load <4 x float>* %628 ; <<4 x float>>:629 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:630 [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:631 [#uses=1]
+ icmp eq i32 %631, 0 ; <i1>:632 [#uses=1]
+ br i1 %632, label %xST.exit400, label %633
+
+; <label>:633 ; preds = %625
+ load <4 x float>* null ; <<4 x float>>:634 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %634, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:635 [#uses=1]
+ store <4 x float> %635, <4 x float>* null
+ br label %xST.exit400
+
+xST.exit400: ; preds = %633, %625, %610
+ %.17218 = phi <4 x float> [ zeroinitializer, %610 ], [ %608, %633 ], [ %608, %625 ] ; <<4 x float>> [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:636 [#uses=1]
+ load <4 x float>* %636 ; <<4 x float>>:637 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:638 [#uses=2]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:639 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:640 [#uses=2]
+ mul <4 x float> %638, %638 ; <<4 x float>>:641 [#uses=1]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:642 [#uses=0]
+ mul <4 x float> %640, %640 ; <<4 x float>>:643 [#uses=2]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x float>>:644 [#uses=0]
+ shufflevector <4 x float> %643, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x float>>:645 [#uses=1]
+ add <4 x float> %645, %643 ; <<4 x float>>:646 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>>:647 [#uses=1]
+ shufflevector <4 x float> %641, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>>:648 [#uses=1]
+ add <4 x float> zeroinitializer, %647 ; <<4 x float>>:649 [#uses=2]
+ add <4 x float> zeroinitializer, %648 ; <<4 x float>>:650 [#uses=0]
+ add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:651 [#uses=2]
+ call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %649 ) ; <<4 x float>>:652 [#uses=1]
+ mul <4 x float> %652, %649 ; <<4 x float>>:653 [#uses=1]
+ call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %651 ) ; <<4 x float>>:654 [#uses=1]
+ mul <4 x float> %654, %651 ; <<4 x float>>:655 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:656 [#uses=1]
+ br i1 %656, label %665, label %657
+
+; <label>:657 ; preds = %xST.exit400
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:658 [#uses=0]
+ shufflevector <4 x float> %653, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:659 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:660 [#uses=1]
+ load <4 x float>* %660 ; <<4 x float>>:661 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:662 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:663 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:664 [#uses=0]
+ br label %xST.exit402
+
+; <label>:665 ; preds = %xST.exit400
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:666 [#uses=0]
+ br i1 false, label %669, label %667
+
+; <label>:667 ; preds = %665
+ load <4 x float>* null ; <<4 x float>>:668 [#uses=0]
+ br label %669
+
+; <label>:669 ; preds = %667, %665
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:670 [#uses=0]
+ br label %xST.exit402
+
+xST.exit402: ; preds = %669, %657
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:671 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:672 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:673 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:674 [#uses=1]
+ load <4 x float>* %674 ; <<4 x float>>:675 [#uses=1]
+ load <4 x float>* null ; <<4 x float>>:676 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:677 [#uses=1]
+ shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:678 [#uses=1]
+ mul <4 x float> zeroinitializer, %677 ; <<4 x float>>:679 [#uses=0]
+ mul <4 x float> zeroinitializer, %678 ; <<4 x float>>:680 [#uses=0]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:681 [#uses=1]
+ icmp eq i32 0, 0 ; <i1>:682 [#uses=1]
+ br i1 %682, label %689, label %683
+
+; <label>:683 ; preds = %xST.exit402
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:684 [#uses=1]
+ load <4 x float>* %684 ; <<4 x float>>:685 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:686 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:687 [#uses=0]
+ shufflevector <4 x float> %681, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:688 [#uses=0]
+ br label %xST.exit405
+
+; <label>:689 ; preds = %xST.exit402
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:690 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:691 [#uses=1]
+ shufflevector <4 x i32> %691, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:692 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %692, <4 x i32> zeroinitializer ) ; <i32>:693 [#uses=1]
+ icmp eq i32 %693, 0 ; <i1>:694 [#uses=0]
+ br label %xST.exit405
+
+xST.exit405: ; preds = %689, %683
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:695 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:696 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:697 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:698 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:699 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:700 [#uses=1]
+ add <4 x float> zeroinitializer, %700 ; <<4 x float>>:701 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:702 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer ) ; <i32>:703 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:704 [#uses=2]
+ load <4 x float>* %704 ; <<4 x float>>:705 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %704
+ load <4 x float>* null ; <<4 x float>>:706 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* null
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:707 [#uses=2]
+ load <4 x float>* %707 ; <<4 x float>>:708 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %707
+ load <4 x float>* null ; <<4 x float>>:709 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:710 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:711 [#uses=1]
+ shufflevector <4 x float> %711, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>>:712 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:713 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:714 [#uses=1]
+ load <4 x float>* %714 ; <<4 x float>>:715 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:716 [#uses=0]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:717 [#uses=1]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:718 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0 ; <<4 x float>*>:719 [#uses=1]
+ store <4 x float> zeroinitializer, <4 x float>* %719
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:720 [#uses=1]
+ shufflevector <4 x float> %717, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:721 [#uses=1]
+ store <4 x float> %721, <4 x float>* %720
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:722 [#uses=1]
+ load <4 x float>* %722 ; <<4 x float>>:723 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %723, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:724 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:725 [#uses=1]
+ store <4 x float> zeroinitializer, <4 x float>* %725
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:726 [#uses=1]
+ load <4 x float>* %726 ; <<4 x float>>:727 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:728 [#uses=1]
+ load <4 x float>* %728 ; <<4 x float>>:729 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:730 [#uses=1]
+ load <4 x float>* %730 ; <<4 x float>>:731 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:732 [#uses=1]
+ load <4 x float>* %732 ; <<4 x float>>:733 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:734 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:735 [#uses=1]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:736 [#uses=1]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:737 [#uses=1]
+ mul <4 x float> zeroinitializer, %735 ; <<4 x float>>:738 [#uses=1]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:739 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:740 [#uses=1]
+ icmp eq i32 %740, 0 ; <i1>:741 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:742 [#uses=2]
+ load <4 x float>* %742 ; <<4 x float>>:743 [#uses=1]
+ shufflevector <4 x float> %736, <4 x float> %743, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:744 [#uses=1]
+ store <4 x float> %744, <4 x float>* %742
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:745 [#uses=1]
+ load <4 x float>* %745 ; <<4 x float>>:746 [#uses=1]
+ shufflevector <4 x float> %737, <4 x float> %746, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:747 [#uses=0]
+ shufflevector <4 x float> %738, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:748 [#uses=1]
+ store <4 x float> %748, <4 x float>* null
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:749 [#uses=1]
+ load <4 x float>* %749 ; <<4 x float>>:750 [#uses=1]
+ shufflevector <4 x float> %739, <4 x float> %750, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:751 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:752 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:753 [#uses=1]
+ load <4 x float>* %753 ; <<4 x float>>:754 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:755 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:756 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:757 [#uses=1]
+ shufflevector <4 x float> %756, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:758 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:759 [#uses=1]
+ load <4 x float>* %759 ; <<4 x float>>:760 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:761 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:762 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:763 [#uses=1]
+ add <4 x float> %757, zeroinitializer ; <<4 x float>>:764 [#uses=0]
+ add <4 x float> %758, %763 ; <<4 x float>>:765 [#uses=0]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:766 [#uses=1]
+ br i1 false, label %773, label %767
+
+; <label>:767 ; preds = %xST.exit405
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:768 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:769 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %769, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:770 [#uses=1]
+ store <4 x float> %770, <4 x float>* null
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:771 [#uses=1]
+ load <4 x float>* %771 ; <<4 x float>>:772 [#uses=0]
+ br label %xST.exit422
+
+; <label>:773 ; preds = %xST.exit405
+ br label %xST.exit422
+
+xST.exit422: ; preds = %773, %767
+ %.07267 = phi <4 x float> [ %766, %767 ], [ undef, %773 ] ; <<4 x float>> [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:774 [#uses=0]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:775 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:776 [#uses=1]
+ br i1 %776, label %780, label %777
+
+; <label>:777 ; preds = %xST.exit422
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:778 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:779 [#uses=0]
+ br label %xST.exit431
+
+; <label>:780 ; preds = %xST.exit422
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:781 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:782 [#uses=2]
+ load <4 x float>* %782 ; <<4 x float>>:783 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %782
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:784 [#uses=1]
+ shufflevector <4 x i32> %784, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:785 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:786 [#uses=0]
+ br label %xST.exit431
+
+xST.exit431: ; preds = %780, %777
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:787 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:788 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:789 [#uses=2]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %789, <4 x i32> zeroinitializer ) ; <i32>:790 [#uses=1]
+ icmp eq i32 %790, 0 ; <i1>:791 [#uses=0]
+ shufflevector <4 x i32> %789, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:792 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %792, <4 x i32> zeroinitializer ) ; <i32>:793 [#uses=1]
+ icmp eq i32 %793, 0 ; <i1>:794 [#uses=1]
+ br i1 %794, label %797, label %795
+
+; <label>:795 ; preds = %xST.exit431
+ load <4 x float>* null ; <<4 x float>>:796 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* null
+ br label %797
+
+; <label>:797 ; preds = %795, %xST.exit431
+ %.07332 = phi <4 x float> [ zeroinitializer, %795 ], [ undef, %xST.exit431 ] ; <<4 x float>> [#uses=0]
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:798 [#uses=0]
+ br i1 false, label %xST.exit434, label %799
+
+; <label>:799 ; preds = %797
+ load <4 x float>* null ; <<4 x float>>:800 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* null
+ br label %xST.exit434
+
+xST.exit434: ; preds = %799, %797
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:801 [#uses=1]
+ shufflevector <4 x i32> %801, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:802 [#uses=0]
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:803 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:804 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:805 [#uses=1]
+ load <4 x float>* %805 ; <<4 x float>>:806 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:807 [#uses=1]
+ load <4 x float>* %807 ; <<4 x float>>:808 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:809 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:810 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:811 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:812 [#uses=1]
+ load <4 x float>* %812 ; <<4 x float>>:813 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:814 [#uses=1]
+ load <4 x float>* %814 ; <<4 x float>>:815 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:816 [#uses=0]
+ unreachable
+
+xPBRK.exit: ; preds = %.critedge
+ store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* %.sub7896
+ store <4 x i32> zeroinitializer, <4 x i32>* null
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:817 [#uses=1]
+ load <4 x float>* %817 ; <<4 x float>>:818 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:819 [#uses=1]
+ load <4 x float>* %819 ; <<4 x float>>:820 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:821 [#uses=1]
+ load <4 x float>* %821 ; <<4 x float>>:822 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:823 [#uses=1]
+ shufflevector <4 x float> %818, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:824 [#uses=1]
+ shufflevector <4 x float> %820, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:825 [#uses=1]
+ shufflevector <4 x float> %822, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:826 [#uses=1]
+ shufflevector <4 x float> %823, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:827 [#uses=0]
+ shufflevector <4 x float> %824, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:828 [#uses=1]
+ store <4 x float> %828, <4 x float>* null
+ load <4 x float>* null ; <<4 x float>>:829 [#uses=1]
+ shufflevector <4 x float> %825, <4 x float> %829, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:830 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:831 [#uses=2]
+ load <4 x float>* %831 ; <<4 x float>>:832 [#uses=1]
+ shufflevector <4 x float> %826, <4 x float> %832, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:833 [#uses=1]
+ store <4 x float> %833, <4 x float>* %831
+ br label %xLS.exit449
+
+xLS.exit449: ; preds = %1215, %xPBRK.exit
+ %.27464 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17463, %1215 ] ; <<4 x float>> [#uses=2]
+ %.27469 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17468, %1215 ] ; <<4 x float>> [#uses=2]
+ %.27474 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=1]
+ %.17482 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17486 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17490 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07489, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17494 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.27504 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17513 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17517 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17552 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07551, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17556 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07555, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17560 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17583 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07582, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17591 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07590, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17599 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17618 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07617, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17622 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07621, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17626 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0]
+ %.17653 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07652, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17657 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07656, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17661 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07660, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17665 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07664, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17723 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07722, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17727 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07726, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17731 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07730, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17735 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07734, %1215 ] ; <<4 x float>> [#uses=2]
+ %.17770 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07769, %1215 ] ; <<4 x float>> [#uses=2]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:834 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:835 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:836 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:837 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:838 [#uses=0]
+ shufflevector <4 x float> %835, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:839 [#uses=1]
+ getelementptr <4 x float>* null, i32 878 ; <<4 x float>*>:840 [#uses=1]
+ load <4 x float>* %840 ; <<4 x float>>:841 [#uses=0]
+ call <4 x float> @llvm.ppc.altivec.vcfsx( <4 x i32> zeroinitializer, i32 0 ) ; <<4 x float>>:842 [#uses=1]
+ shufflevector <4 x float> %842, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:843 [#uses=2]
+ call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> %839 ) ; <<4 x i32>>:844 [#uses=1]
+ bitcast <4 x i32> %844 to <4 x float> ; <<4 x float>>:845 [#uses=1]
+ call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> zeroinitializer ) ; <<4 x i32>>:846 [#uses=0]
+ bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:847 [#uses=1]
+ icmp eq i32 0, 0 ; <i1>:848 [#uses=1]
+ br i1 %848, label %854, label %849
+
+; <label>:849 ; preds = %xLS.exit449
+ shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:850 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:851 [#uses=1]
+ store <4 x float> zeroinitializer, <4 x float>* %851
+ shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:852 [#uses=1]
+ store <4 x float> %852, <4 x float>* null
+ shufflevector <4 x float> %847, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:853 [#uses=0]
+ br label %xST.exit451
+
+; <label>:854 ; preds = %xLS.exit449
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:855 [#uses=0]
+ br i1 false, label %859, label %856
+
+; <label>:856 ; preds = %854
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:857 [#uses=2]
+ load <4 x float>* %857 ; <<4 x float>>:858 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %857
+ br label %859
+
+; <label>:859 ; preds = %856, %854
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:860 [#uses=0]
+ br i1 false, label %864, label %861
+
+; <label>:861 ; preds = %859
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:862 [#uses=1]
+ shufflevector <4 x float> %845, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:863 [#uses=1]
+ store <4 x float> %863, <4 x float>* %862
+ br label %864
+
+; <label>:864 ; preds = %861, %859
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:865 [#uses=1]
+ shufflevector <4 x i32> %865, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:866 [#uses=0]
+ br i1 false, label %868, label %867
+
+; <label>:867 ; preds = %864
+ store <4 x float> zeroinitializer, <4 x float>* null
+ br label %868
+
+; <label>:868 ; preds = %867, %864
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:869 [#uses=0]
+ br label %xST.exit451
+
+xST.exit451: ; preds = %868, %849
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:870 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:871 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:872 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:873 [#uses=1]
+ bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:874 [#uses=1]
+ xor <4 x i32> %874, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:875 [#uses=0]
+ bitcast <4 x float> %873 to <4 x i32> ; <<4 x i32>>:876 [#uses=1]
+ xor <4 x i32> %876, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:877 [#uses=0]
+ bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:878 [#uses=1]
+ xor <4 x i32> %878, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:879 [#uses=1]
+ bitcast <4 x i32> %879 to <4 x float> ; <<4 x float>>:880 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:881 [#uses=1]
+ icmp eq i32 0, 0 ; <i1>:882 [#uses=1]
+ br i1 %882, label %888, label %883
+
+; <label>:883 ; preds = %xST.exit451
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:884 [#uses=1]
+ store <4 x float> zeroinitializer, <4 x float>* %884
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:885 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:886 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:887 [#uses=0]
+ br label %xST.exit453
+
+; <label>:888 ; preds = %xST.exit451
+ shufflevector <4 x i32> %881, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:889 [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:890 [#uses=0]
+ br i1 false, label %894, label %891
+
+; <label>:891 ; preds = %888
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:892 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:893 [#uses=1]
+ store <4 x float> %893, <4 x float>* %892
+ br label %894
+
+; <label>:894 ; preds = %891, %888
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:895 [#uses=1]
+ icmp eq i32 %895, 0 ; <i1>:896 [#uses=1]
+ br i1 %896, label %898, label %897
+
+; <label>:897 ; preds = %894
+ br label %898
+
+; <label>:898 ; preds = %897, %894
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:899 [#uses=0]
+ br i1 false, label %xST.exit453, label %900
+
+; <label>:900 ; preds = %898
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:901 [#uses=1]
+ load <4 x float>* %901 ; <<4 x float>>:902 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %902, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:903 [#uses=0]
+ br label %xST.exit453
+
+xST.exit453: ; preds = %900, %898, %883
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:904 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:905 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:906 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:907 [#uses=1]
+ shufflevector <4 x float> %905, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:908 [#uses=1]
+ bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:909 [#uses=0]
+ bitcast <4 x float> %908 to <4 x i32> ; <<4 x i32>>:910 [#uses=0]
+ bitcast <4 x float> %907 to <4 x i32> ; <<4 x i32>>:911 [#uses=0]
+ bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:912 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:913 [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:914 [#uses=0]
+ br i1 false, label %915, label %xPIF.exit455
+
+; <label>:915 ; preds = %xST.exit453
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:916 [#uses=0]
+ getelementptr [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:917 [#uses=1]
+ store <4 x i32> zeroinitializer, <4 x i32>* %917
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:918 [#uses=1]
+ and <4 x i32> %918, zeroinitializer ; <<4 x i32>>:919 [#uses=0]
+ br label %.critedge7899
+
+.critedge7899: ; preds = %.critedge7899, %915
+ or <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:920 [#uses=1]
+ br i1 false, label %.critedge7899, label %xPBRK.exit456
+
+xPBRK.exit456: ; preds = %.critedge7899
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> %920, <4 x i32> zeroinitializer ) ; <i32>:921 [#uses=0]
+ unreachable
+
+xPIF.exit455: ; preds = %xST.exit453
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:922 [#uses=1]
+ load <4 x float>* %922 ; <<4 x float>>:923 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:924 [#uses=1]
+ load <4 x float>* %924 ; <<4 x float>>:925 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:926 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:927 [#uses=0]
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:928 [#uses=0]
+ bitcast { { i16, i16, i32 } }* %1 to <4 x float>* ; <<4 x float>*>:929 [#uses=0]
+ bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:930 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:931 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:932 [#uses=1]
+ br i1 %932, label %934, label %933
+
+; <label>:933 ; preds = %xPIF.exit455
+ store <4 x float> zeroinitializer, <4 x float>* null
+ br label %934
+
+; <label>:934 ; preds = %933, %xPIF.exit455
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:935 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:936 [#uses=1]
+ br i1 %936, label %xST.exit459, label %937
+
+; <label>:937 ; preds = %934
+ br label %xST.exit459
+
+xST.exit459: ; preds = %937, %934
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:938 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %938, <4 x i32> zeroinitializer ) ; <i32>:939 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:940 [#uses=1]
+ store <4 x float> zeroinitializer, <4 x float>* %940
+ load <4 x float>* null ; <<4 x float>>:941 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %941, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:942 [#uses=1]
+ store <4 x float> %942, <4 x float>* null
+ shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:943 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:944 [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:945 [#uses=0]
+ br i1 false, label %947, label %946
+
+; <label>:946 ; preds = %xST.exit459
+ br label %947
+
+; <label>:947 ; preds = %946, %xST.exit459
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:948 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:949 [#uses=1]
+ br i1 %949, label %952, label %950
+
+; <label>:950 ; preds = %947
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:951 [#uses=1]
+ call void @llvm.ppc.altivec.stvewx( <4 x i32> %951, i8* null )
+ br label %952
+
+; <label>:952 ; preds = %950, %947
+ br i1 false, label %955, label %953
+
+; <label>:953 ; preds = %952
+ getelementptr [4 x <4 x i32>]* null, i32 0, i32 2 ; <<4 x i32>*>:954 [#uses=0]
+ br label %955
+
+; <label>:955 ; preds = %953, %952
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:956 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:957 [#uses=1]
+ br i1 %957, label %xStoreDestAddressWithMask.exit461, label %958
+
+; <label>:958 ; preds = %955
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:959 [#uses=1]
+ call void @llvm.ppc.altivec.stvewx( <4 x i32> %959, i8* null )
+ br label %xStoreDestAddressWithMask.exit461
+
+xStoreDestAddressWithMask.exit461: ; preds = %958, %955
+ load <4 x float>* %0 ; <<4 x float>>:960 [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:961 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 0 ; <<4 x float>*>:962 [#uses=0]
+ br i1 false, label %968, label %xST.exit463
+
+xST.exit463: ; preds = %xStoreDestAddressWithMask.exit461
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:963 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:964 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:965 [#uses=0]
+ load <4 x float>* %0 ; <<4 x float>>:966 [#uses=3]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:967 [#uses=0]
+ br i1 false, label %972, label %969
+
+; <label>:968 ; preds = %xStoreDestAddressWithMask.exit461
+ unreachable
+
+; <label>:969 ; preds = %xST.exit463
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:970 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:971 [#uses=1]
+ store <4 x float> %966, <4 x float>* %971
+ store <4 x float> %966, <4 x float>* null
+ br label %xST.exit465
+
+; <label>:972 ; preds = %xST.exit463
+ call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <<4 x i32>>:973 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* null
+ store <4 x float> zeroinitializer, <4 x float>* null
+ load <4 x float>* null ; <<4 x float>>:974 [#uses=0]
+ bitcast <4 x float> %966 to <4 x i32> ; <<4 x i32>>:975 [#uses=1]
+ call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> %975, <4 x i32> zeroinitializer ) ; <<4 x i32>>:976 [#uses=1]
+ bitcast <4 x i32> %976 to <4 x float> ; <<4 x float>>:977 [#uses=1]
+ store <4 x float> %977, <4 x float>* null
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:978 [#uses=0]
+ bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:979 [#uses=1]
+ call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %979, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <<4 x i32>>:980 [#uses=1]
+ bitcast <4 x i32> %980 to <4 x float> ; <<4 x float>>:981 [#uses=0]
+ br label %xST.exit465
+
+xST.exit465: ; preds = %972, %969
+ load <4 x float>* %0 ; <<4 x float>>:982 [#uses=3]
+ icmp eq i32 0, 0 ; <i1>:983 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:984 [#uses=1]
+ br i1 %983, label %989, label %985
+
+; <label>:985 ; preds = %xST.exit465
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:986 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:987 [#uses=1]
+ store <4 x float> %982, <4 x float>* %987
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:988 [#uses=0]
+ br label %xST.exit467
+
+; <label>:989 ; preds = %xST.exit465
+ bitcast <4 x float> %982 to <4 x i32> ; <<4 x i32>>:990 [#uses=0]
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:991 [#uses=0]
+ store <4 x float> zeroinitializer, <4 x float>* %984
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:992 [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:993 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:994 [#uses=0]
+ bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:995 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:996 [#uses=0]
+ bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:997 [#uses=1]
+ bitcast <4 x float> %982 to <4 x i32> ; <<4 x i32>>:998 [#uses=1]
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:999 [#uses=1]
+ call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %997, <4 x i32> %998, <4 x i32> %999 ) ; <<4 x i32>>:1000 [#uses=1]
+ bitcast <4 x i32> %1000 to <4 x float> ; <<4 x float>>:1001 [#uses=0]
+ br label %xST.exit467
+
+xST.exit467: ; preds = %989, %985
+ load <4 x float>* %0 ; <<4 x float>>:1002 [#uses=5]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:1003 [#uses=2]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1003, <4 x i32> zeroinitializer ) ; <i32>:1004 [#uses=0]
+ br i1 false, label %1011, label %1005
+
+; <label>:1005 ; preds = %xST.exit467
+ load <4 x float>* null ; <<4 x float>>:1006 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1007 [#uses=1]
+ load <4 x float>* %1007 ; <<4 x float>>:1008 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:1009 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1010 [#uses=0]
+ br label %xST.exit469
+
+; <label>:1011 ; preds = %xST.exit467
+ shufflevector <4 x i32> %1003, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:1012 [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:1013 [#uses=1]
+ br i1 %1013, label %1015, label %1014
+
+; <label>:1014 ; preds = %1011
+ br label %1015
+
+; <label>:1015 ; preds = %1014, %1011
+ %.07472 = phi <4 x float> [ %1002, %1014 ], [ %.27474, %1011 ] ; <<4 x float>> [#uses=0]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:1016 [#uses=1]
+ icmp eq i32 %1016, 0 ; <i1>:1017 [#uses=1]
+ br i1 %1017, label %1021, label %1018
+
+; <label>:1018 ; preds = %1015
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1019 [#uses=0]
+ shufflevector <4 x float> %1002, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1020 [#uses=0]
+ br label %1021
+
+; <label>:1021 ; preds = %1018, %1015
+ %.07467 = phi <4 x float> [ %1002, %1018 ], [ %.27469, %1015 ] ; <<4 x float>> [#uses=2]
+ icmp eq i32 0, 0 ; <i1>:1022 [#uses=1]
+ br i1 %1022, label %1025, label %1023
+
+; <label>:1023 ; preds = %1021
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1024 [#uses=1]
+ store <4 x float> zeroinitializer, <4 x float>* %1024
+ br label %1025
+
+; <label>:1025 ; preds = %1023, %1021
+ %.07462 = phi <4 x float> [ %1002, %1023 ], [ %.27464, %1021 ] ; <<4 x float>> [#uses=2]
+ icmp eq i32 0, 0 ; <i1>:1026 [#uses=1]
+ br i1 %1026, label %xST.exit469, label %1027
+
+; <label>:1027 ; preds = %1025
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1028 [#uses=0]
+ br label %xST.exit469
+
+xST.exit469: ; preds = %1027, %1025, %1005
+ %.17463 = phi <4 x float> [ %.27464, %1005 ], [ %.07462, %1027 ], [ %.07462, %1025 ] ; <<4 x float>> [#uses=1]
+ %.17468 = phi <4 x float> [ %.27469, %1005 ], [ %.07467, %1027 ], [ %.07467, %1025 ] ; <<4 x float>> [#uses=1]
+ %.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ] ; <<4 x float>> [#uses=1]
+ load <4 x float>* null ; <<4 x float>>:1029 [#uses=0]
+ load <4 x float>* null ; <<4 x float>>:1030 [#uses=0]
+ sub <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1031 [#uses=1]
+ br i1 false, label %1037, label %1032
+
+; <label>:1032 ; preds = %xST.exit469
+ load <4 x float>* null ; <<4 x float>>:1033 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:1034 [#uses=1]
+ load <4 x float>* %1034 ; <<4 x float>>:1035 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:1036 [#uses=0]
+ br label %xST.exit472
+
+; <label>:1037 ; preds = %xST.exit469
+ icmp eq i32 0, 0 ; <i1>:1038 [#uses=1]
+ br i1 %1038, label %1040, label %1039
+
+; <label>:1039 ; preds = %1037
+ br label %1040
+
+; <label>:1040 ; preds = %1039, %1037
+ %.07507 = phi <4 x float> [ zeroinitializer, %1039 ], [ zeroinitializer, %1037 ] ; <<4 x float>> [#uses=0]
+ icmp eq i32 0, 0 ; <i1>:1041 [#uses=1]
+ br i1 %1041, label %1045, label %1042
+
+; <label>:1042 ; preds = %1040
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:1043 [#uses=1]
+ load <4 x float>* %1043 ; <<4 x float>>:1044 [#uses=0]
+ br label %1045
+
+; <label>:1045 ; preds = %1042, %1040
+ br i1 false, label %1048, label %1046
+
+; <label>:1046 ; preds = %1045
+ shufflevector <4 x float> %1031, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1047 [#uses=0]
+ br label %1048
+
+; <label>:1048 ; preds = %1046, %1045
+ icmp eq i32 0, 0 ; <i1>:1049 [#uses=1]
+ br i1 %1049, label %xST.exit472, label %1050
+
+; <label>:1050 ; preds = %1048
+ br label %xST.exit472
+
+xST.exit472: ; preds = %1050, %1048, %1032
+ br i1 false, label %1052, label %1051
+
+; <label>:1051 ; preds = %xST.exit472
+ br label %xST.exit474
+
+; <label>:1052 ; preds = %xST.exit472
+ br i1 false, label %1054, label %1053
+
+; <label>:1053 ; preds = %1052
+ br label %1054
+
+; <label>:1054 ; preds = %1053, %1052
+ br i1 false, label %1056, label %1055
+
+; <label>:1055 ; preds = %1054
+ br label %1056
+
+; <label>:1056 ; preds = %1055, %1054
+ br i1 false, label %1058, label %1057
+
+; <label>:1057 ; preds = %1056
+ br label %1058
+
+; <label>:1058 ; preds = %1057, %1056
+ br i1 false, label %xST.exit474, label %1059
+
+; <label>:1059 ; preds = %1058
+ br label %xST.exit474
+
+xST.exit474: ; preds = %1059, %1058, %1051
+ load <4 x float>* null ; <<4 x float>>:1060 [#uses=1]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1061 [#uses=1]
+ mul <4 x float> %1060, zeroinitializer ; <<4 x float>>:1062 [#uses=2]
+ br i1 false, label %1065, label %1063
+
+; <label>:1063 ; preds = %xST.exit474
+ shufflevector <4 x float> %1062, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1064 [#uses=1]
+ store <4 x float> %1064, <4 x float>* null
+ br label %xST.exit476
+
+; <label>:1065 ; preds = %xST.exit474
+ br i1 false, label %1067, label %1066
+
+; <label>:1066 ; preds = %1065
+ br label %1067
+
+; <label>:1067 ; preds = %1066, %1065
+ shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:1068 [#uses=0]
+ br i1 false, label %1070, label %1069
+
+; <label>:1069 ; preds = %1067
+ br label %1070
+
+; <label>:1070 ; preds = %1069, %1067
+ br i1 false, label %1072, label %1071
+
+; <label>:1071 ; preds = %1070
+ br label %1072
+
+; <label>:1072 ; preds = %1071, %1070
+ br i1 false, label %xST.exit476, label %1073
+
+; <label>:1073 ; preds = %1072
+ br label %xST.exit476
+
+xST.exit476: ; preds = %1073, %1072, %1063
+ %.07551 = phi <4 x float> [ %1062, %1063 ], [ %.17552, %1073 ], [ %.17552, %1072 ] ; <<4 x float>> [#uses=1]
+ %.07555 = phi <4 x float> [ %1061, %1063 ], [ %.17556, %1073 ], [ %.17556, %1072 ] ; <<4 x float>> [#uses=1]
+ br i1 false, label %1075, label %1074
+
+; <label>:1074 ; preds = %xST.exit476
+ br label %xST.exit479
+
+; <label>:1075 ; preds = %xST.exit476
+ br i1 false, label %1077, label %1076
+
+; <label>:1076 ; preds = %1075
+ br label %1077
+
+; <label>:1077 ; preds = %1076, %1075
+ br i1 false, label %1079, label %1078
+
+; <label>:1078 ; preds = %1077
+ br label %1079
+
+; <label>:1079 ; preds = %1078, %1077
+ br i1 false, label %1081, label %1080
+
+; <label>:1080 ; preds = %1079
+ br label %1081
+
+; <label>:1081 ; preds = %1080, %1079
+ br i1 false, label %xST.exit479, label %1082
+
+; <label>:1082 ; preds = %1081
+ br label %xST.exit479
+
+xST.exit479: ; preds = %1082, %1081, %1074
+ br i1 false, label %1084, label %1083
+
+; <label>:1083 ; preds = %xST.exit479
+ br label %xST.exit482
+
+; <label>:1084 ; preds = %xST.exit479
+ br i1 false, label %1086, label %1085
+
+; <label>:1085 ; preds = %1084
+ br label %1086
+
+; <label>:1086 ; preds = %1085, %1084
+ br i1 false, label %1088, label %1087
+
+; <label>:1087 ; preds = %1086
+ br label %1088
+
+; <label>:1088 ; preds = %1087, %1086
+ br i1 false, label %1090, label %1089
+
+; <label>:1089 ; preds = %1088
+ br label %1090
+
+; <label>:1090 ; preds = %1089, %1088
+ br i1 false, label %xST.exit482, label %1091
+
+; <label>:1091 ; preds = %1090
+ br label %xST.exit482
+
+xST.exit482: ; preds = %1091, %1090, %1083
+ br i1 false, label %1093, label %1092
+
+; <label>:1092 ; preds = %xST.exit482
+ br label %xST.exit486
+
+; <label>:1093 ; preds = %xST.exit482
+ br i1 false, label %1095, label %1094
+
+; <label>:1094 ; preds = %1093
+ br label %1095
+
+; <label>:1095 ; preds = %1094, %1093
+ br i1 false, label %1097, label %1096
+
+; <label>:1096 ; preds = %1095
+ br label %1097
+
+; <label>:1097 ; preds = %1096, %1095
+ br i1 false, label %1099, label %1098
+
+; <label>:1098 ; preds = %1097
+ br label %1099
+
+; <label>:1099 ; preds = %1098, %1097
+ br i1 false, label %xST.exit486, label %1100
+
+; <label>:1100 ; preds = %1099
+ br label %xST.exit486
+
+xST.exit486: ; preds = %1100, %1099, %1092
+ br i1 false, label %1102, label %1101
+
+; <label>:1101 ; preds = %xST.exit486
+ br label %xST.exit489
+
+; <label>:1102 ; preds = %xST.exit486
+ br i1 false, label %1104, label %1103
+
+; <label>:1103 ; preds = %1102
+ br label %1104
+
+; <label>:1104 ; preds = %1103, %1102
+ br i1 false, label %1106, label %1105
+
+; <label>:1105 ; preds = %1104
+ br label %1106
+
+; <label>:1106 ; preds = %1105, %1104
+ br i1 false, label %1108, label %1107
+
+; <label>:1107 ; preds = %1106
+ br label %1108
+
+; <label>:1108 ; preds = %1107, %1106
+ br i1 false, label %xST.exit489, label %1109
+
+; <label>:1109 ; preds = %1108
+ br label %xST.exit489
+
+xST.exit489: ; preds = %1109, %1108, %1101
+ br i1 false, label %1111, label %1110
+
+; <label>:1110 ; preds = %xST.exit489
+ br label %xST.exit492
+
+; <label>:1111 ; preds = %xST.exit489
+ br i1 false, label %1113, label %1112
+
+; <label>:1112 ; preds = %1111
+ br label %1113
+
+; <label>:1113 ; preds = %1112, %1111
+ br i1 false, label %1115, label %1114
+
+; <label>:1114 ; preds = %1113
+ br label %1115
+
+; <label>:1115 ; preds = %1114, %1113
+ br i1 false, label %1117, label %1116
+
+; <label>:1116 ; preds = %1115
+ br label %1117
+
+; <label>:1117 ; preds = %1116, %1115
+ br i1 false, label %xST.exit492, label %1118
+
+; <label>:1118 ; preds = %1117
+ br label %xST.exit492
+
+xST.exit492: ; preds = %1118, %1117, %1110
+ load <4 x float>* null ; <<4 x float>>:1119 [#uses=1]
+ mul <4 x float> %1119, zeroinitializer ; <<4 x float>>:1120 [#uses=1]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1121 [#uses=1]
+ br i1 false, label %1123, label %1122
+
+; <label>:1122 ; preds = %xST.exit492
+ br label %xST.exit495
+
+; <label>:1123 ; preds = %xST.exit492
+ br i1 false, label %1125, label %1124
+
+; <label>:1124 ; preds = %1123
+ br label %1125
+
+; <label>:1125 ; preds = %1124, %1123
+ br i1 false, label %1127, label %1126
+
+; <label>:1126 ; preds = %1125
+ br label %1127
+
+; <label>:1127 ; preds = %1126, %1125
+ br i1 false, label %1129, label %1128
+
+; <label>:1128 ; preds = %1127
+ br label %1129
+
+; <label>:1129 ; preds = %1128, %1127
+ br i1 false, label %xST.exit495, label %1130
+
+; <label>:1130 ; preds = %1129
+ br label %xST.exit495
+
+xST.exit495: ; preds = %1130, %1129, %1122
+ %.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ] ; <<4 x float>> [#uses=1]
+ %.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ] ; <<4 x float>> [#uses=1]
+ load <4 x float>* null ; <<4 x float>>:1131 [#uses=1]
+ add <4 x float> %1131, zeroinitializer ; <<4 x float>>:1132 [#uses=1]
+ add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1133 [#uses=1]
+ br i1 false, label %1135, label %1134
+
+; <label>:1134 ; preds = %xST.exit495
+ br label %xST.exit498
+
+; <label>:1135 ; preds = %xST.exit495
+ br i1 false, label %1137, label %1136
+
+; <label>:1136 ; preds = %1135
+ br label %1137
+
+; <label>:1137 ; preds = %1136, %1135
+ br i1 false, label %1139, label %1138
+
+; <label>:1138 ; preds = %1137
+ br label %1139
+
+; <label>:1139 ; preds = %1138, %1137
+ br i1 false, label %1141, label %1140
+
+; <label>:1140 ; preds = %1139
+ br label %1141
+
+; <label>:1141 ; preds = %1140, %1139
+ br i1 false, label %xST.exit498, label %1142
+
+; <label>:1142 ; preds = %1141
+ br label %xST.exit498
+
+xST.exit498: ; preds = %1142, %1141, %1134
+ %.07617 = phi <4 x float> [ %1133, %1134 ], [ %.17618, %1142 ], [ %.17618, %1141 ] ; <<4 x float>> [#uses=1]
+ %.07621 = phi <4 x float> [ %1132, %1134 ], [ %.17622, %1142 ], [ %.17622, %1141 ] ; <<4 x float>> [#uses=1]
+ load <4 x float>* null ; <<4 x float>>:1143 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1144 [#uses=1]
+ load <4 x float>* %1144 ; <<4 x float>>:1145 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1146 [#uses=1]
+ load <4 x float>* %1146 ; <<4 x float>>:1147 [#uses=1]
+ shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1148 [#uses=1]
+ shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1149 [#uses=1]
+ shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1150 [#uses=1]
+ mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1151 [#uses=1]
+ mul <4 x float> zeroinitializer, %1148 ; <<4 x float>>:1152 [#uses=1]
+ mul <4 x float> zeroinitializer, %1149 ; <<4 x float>>:1153 [#uses=1]
+ mul <4 x float> zeroinitializer, %1150 ; <<4 x float>>:1154 [#uses=1]
+ br i1 false, label %1156, label %1155
+
+; <label>:1155 ; preds = %xST.exit498
+ br label %xST.exit501
+
+; <label>:1156 ; preds = %xST.exit498
+ br i1 false, label %1158, label %1157
+
+; <label>:1157 ; preds = %1156
+ br label %1158
+
+; <label>:1158 ; preds = %1157, %1156
+ br i1 false, label %1160, label %1159
+
+; <label>:1159 ; preds = %1158
+ br label %1160
+
+; <label>:1160 ; preds = %1159, %1158
+ br i1 false, label %1162, label %1161
+
+; <label>:1161 ; preds = %1160
+ br label %1162
+
+; <label>:1162 ; preds = %1161, %1160
+ br i1 false, label %xST.exit501, label %1163
+
+; <label>:1163 ; preds = %1162
+ br label %xST.exit501
+
+xST.exit501: ; preds = %1163, %1162, %1155
+ %.07652 = phi <4 x float> [ %1154, %1155 ], [ %.17653, %1163 ], [ %.17653, %1162 ] ; <<4 x float>> [#uses=1]
+ %.07656 = phi <4 x float> [ %1153, %1155 ], [ %.17657, %1163 ], [ %.17657, %1162 ] ; <<4 x float>> [#uses=1]
+ %.07660 = phi <4 x float> [ %1152, %1155 ], [ %.17661, %1163 ], [ %.17661, %1162 ] ; <<4 x float>> [#uses=1]
+ %.07664 = phi <4 x float> [ %1151, %1155 ], [ %.17665, %1163 ], [ %.17665, %1162 ] ; <<4 x float>> [#uses=1]
+ load <4 x float>* null ; <<4 x float>>:1164 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1165 [#uses=1]
+ load <4 x float>* %1165 ; <<4 x float>>:1166 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1167 [#uses=1]
+ load <4 x float>* %1167 ; <<4 x float>>:1168 [#uses=1]
+ add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1169 [#uses=1]
+ add <4 x float> zeroinitializer, %1164 ; <<4 x float>>:1170 [#uses=1]
+ add <4 x float> zeroinitializer, %1166 ; <<4 x float>>:1171 [#uses=1]
+ add <4 x float> zeroinitializer, %1168 ; <<4 x float>>:1172 [#uses=1]
+ br i1 false, label %1174, label %1173
+
+; <label>:1173 ; preds = %xST.exit501
+ br label %xST.exit504
+
+; <label>:1174 ; preds = %xST.exit501
+ br i1 false, label %1176, label %1175
+
+; <label>:1175 ; preds = %1174
+ br label %1176
+
+; <label>:1176 ; preds = %1175, %1174
+ br i1 false, label %1178, label %1177
+
+; <label>:1177 ; preds = %1176
+ br label %1178
+
+; <label>:1178 ; preds = %1177, %1176
+ br i1 false, label %1180, label %1179
+
+; <label>:1179 ; preds = %1178
+ br label %1180
+
+; <label>:1180 ; preds = %1179, %1178
+ br i1 false, label %xST.exit504, label %1181
+
+; <label>:1181 ; preds = %1180
+ br label %xST.exit504
+
+xST.exit504: ; preds = %1181, %1180, %1173
+ %.07722 = phi <4 x float> [ %1172, %1173 ], [ %.17723, %1181 ], [ %.17723, %1180 ] ; <<4 x float>> [#uses=1]
+ %.07726 = phi <4 x float> [ %1171, %1173 ], [ %.17727, %1181 ], [ %.17727, %1180 ] ; <<4 x float>> [#uses=1]
+ %.07730 = phi <4 x float> [ %1170, %1173 ], [ %.17731, %1181 ], [ %.17731, %1180 ] ; <<4 x float>> [#uses=1]
+ %.07734 = phi <4 x float> [ %1169, %1173 ], [ %.17735, %1181 ], [ %.17735, %1180 ] ; <<4 x float>> [#uses=1]
+ add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1182 [#uses=1]
+ br i1 false, label %1184, label %1183
+
+; <label>:1183 ; preds = %xST.exit504
+ br label %xST.exit507
+
+; <label>:1184 ; preds = %xST.exit504
+ br i1 false, label %1186, label %1185
+
+; <label>:1185 ; preds = %1184
+ br label %1186
+
+; <label>:1186 ; preds = %1185, %1184
+ br i1 false, label %1188, label %1187
+
+; <label>:1187 ; preds = %1186
+ store <4 x float> zeroinitializer, <4 x float>* null
+ br label %1188
+
+; <label>:1188 ; preds = %1187, %1186
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:1189 [#uses=1]
+ shufflevector <4 x i32> %1189, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:1190 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1190, <4 x i32> zeroinitializer ) ; <i32>:1191 [#uses=1]
+ icmp eq i32 %1191, 0 ; <i1>:1192 [#uses=1]
+ br i1 %1192, label %1196, label %1193
+
+; <label>:1193 ; preds = %1188
+ load <4 x float>* null ; <<4 x float>>:1194 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %1194, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1195 [#uses=1]
+ store <4 x float> %1195, <4 x float>* null
+ br label %1196
+
+; <label>:1196 ; preds = %1193, %1188
+ %.07742 = phi <4 x float> [ zeroinitializer, %1193 ], [ zeroinitializer, %1188 ] ; <<4 x float>> [#uses=0]
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:1197 [#uses=1]
+ shufflevector <4 x i32> %1197, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:1198 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1198, <4 x i32> zeroinitializer ) ; <i32>:1199 [#uses=1]
+ icmp eq i32 %1199, 0 ; <i1>:1200 [#uses=1]
+ br i1 %1200, label %xST.exit507, label %1201
+
+; <label>:1201 ; preds = %1196
+ store <4 x float> zeroinitializer, <4 x float>* null
+ br label %xST.exit507
+
+xST.exit507: ; preds = %1201, %1196, %1183
+ %.07769 = phi <4 x float> [ %1182, %1183 ], [ %.17770, %1201 ], [ %.17770, %1196 ] ; <<4 x float>> [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:1202 [#uses=1]
+ icmp eq i32 %1202, 0 ; <i1>:1203 [#uses=1]
+ br i1 %1203, label %1207, label %1204
+
+; <label>:1204 ; preds = %xST.exit507
+ load <4 x float>* null ; <<4 x float>>:1205 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %1205, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:1206 [#uses=1]
+ store <4 x float> %1206, <4 x float>* null
+ br label %1207
+
+; <label>:1207 ; preds = %1204, %xST.exit507
+ load <4 x i32>* %.sub7896 ; <<4 x i32>>:1208 [#uses=1]
+ shufflevector <4 x i32> %1208, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:1209 [#uses=1]
+ call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1209, <4 x i32> zeroinitializer ) ; <i32>:1210 [#uses=1]
+ icmp eq i32 %1210, 0 ; <i1>:1211 [#uses=1]
+ br i1 %1211, label %1215, label %1212
+
+; <label>:1212 ; preds = %1207
+ load <4 x float>* null ; <<4 x float>>:1213 [#uses=1]
+ shufflevector <4 x float> zeroinitializer, <4 x float> %1213, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:1214 [#uses=1]
+ store <4 x float> %1214, <4 x float>* null
+ br label %1215
+
+; <label>:1215 ; preds = %1212, %1207
+ store <4 x float> zeroinitializer, <4 x float>* null
+ br label %xLS.exit449
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vsel(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare void @llvm.ppc.altivec.stvewx(<4 x i32>, i8*)
+
+declare <4 x float> @llvm.ppc.altivec.vrsqrtefp(<4 x float>)
+
+declare <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32>, i32)
+
+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
+
+declare <4 x i32> @llvm.ppc.altivec.vcmpgtfp(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
new file mode 100644
index 0000000..8405703
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bar r3, r}
+
+; PR1351
+
+define i32 @test1(i32 %Y, i32 %X) {
+ %tmp1 = tail call i32 asm "foo${1:I} $0, $1", "=r,rI"( i32 %X )
+ ret i32 %tmp1
+}
+
+;; TODO: We'd actually prefer this to be 'bari r3, 47', but 'bar r3, rN' is also ok.
+define i32 @test2(i32 %Y, i32 %X) {
+ %tmp1 = tail call i32 asm "bar${1:I} $0, $1", "=r,rI"( i32 47 )
+ ret i32 %tmp1
+}
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
new file mode 100644
index 0000000..f43b87c
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llc | grep {subfc r2,r5,r4}
+; RUN: llvm-as < %s | llc | grep {subfze r4,r3}
+
+; PR1357
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+
+;long long test(int A, int B, int C) {
+; unsigned X, Y;
+; __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"
+; : "=r" (X), "=&r" (Y)
+; : "r" (A), "rI" (B), "r" (C));
+; return ((long long)Y << 32) | X;
+;}
+
+define i64 @test(i32 %A, i32 %B, i32 %C) {
+entry:
+ %Y = alloca i32, align 4 ; <i32*> [#uses=2]
+ %tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* %Y, i32 %A, i32 %B, i32 %C ) ; <i32> [#uses=1]
+ %tmp5 = load i32* %Y ; <i32> [#uses=1]
+ %tmp56 = zext i32 %tmp5 to i64 ; <i64> [#uses=1]
+ %tmp7 = shl i64 %tmp56, 32 ; <i64> [#uses=1]
+ %tmp89 = zext i32 %tmp4 to i64 ; <i64> [#uses=1]
+ %tmp10 = or i64 %tmp7, %tmp89 ; <i64> [#uses=1]
+ ret i64 %tmp10
+}
diff --git a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
new file mode 100644
index 0000000..989a751
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc
+; PR1382
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+@x = global [2 x i32] [ i32 1, i32 2 ] ; <[2 x i32]*> [#uses=1]
+
+define void @foo() {
+entry:
+ tail call void asm sideeffect "$0 $1", "s,i"( i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*), i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*) )
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
new file mode 100644
index 0000000..b64de68
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llc -march=ppc32
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+ %struct..0anon = type { i32 }
+ %struct.A = type { %struct.anon }
+ %struct.anon = type <{ }>
+
+define void @bork(%struct.A* %In0P) {
+entry:
+ %tmp56 = bitcast %struct.A* %In0P to float* ; <float*> [#uses=1]
+ br label %bb
+
+bb: ; preds = %bb, %entry
+ %i.035.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+ %tmp8 = getelementptr float* %tmp56, i32 %i.035.0 ; <float*> [#uses=2]
+ %tmp101112 = bitcast float* %tmp8 to i8* ; <i8*> [#uses=1]
+ %tmp1617 = bitcast float* %tmp8 to i32* ; <i32*> [#uses=1]
+ %tmp21 = tail call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* %tmp101112, i32 0, i32* %tmp1617 ) ; <i32> [#uses=0]
+ %indvar.next = add i32 %i.035.0, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %indvar.next, 4 ; <i1> [#uses=1]
+ br i1 %exitcond, label %return, label %bb
+
+return: ; preds = %bb
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
new file mode 100644
index 0000000..0aebeb9
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -0,0 +1,68 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*baz | wc -l | grep 2
+; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*quux | wc -l | grep 2
+; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge | grep bl.*baz | wc -l | grep 1
+; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | wc -l | grep 1
+; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+ %i_addr = alloca i32 ; <i32*> [#uses=2]
+ %q_addr = alloca i32 ; <i32*> [#uses=2]
+ %retval = alloca i32, align 4 ; <i32*> [#uses=1]
+ "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 %i, i32* %i_addr
+ store i32 %q, i32* %q_addr
+ %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
+ %tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
+ %toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
+ br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true: ; preds = %entry
+ %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
+ %tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
+ %toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
+ br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_false: ; preds = %entry
+ %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp27 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1]
+ %tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1]
+ %toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1]
+ br i1 %toBool210, label %cond_true11, label %cond_false15
+
+cond_true11: ; preds = %cond_next
+ %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_false15: ; preds = %cond_next
+ %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ br label %cond_next18
+
+cond_next18: ; preds = %cond_false15, %cond_true11
+ %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ br label %return
+
+return: ; preds = %cond_next18
+ %retval20 = load i32* %retval ; <i32> [#uses=1]
+ ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
new file mode 100644
index 0000000..0ea76c7
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -0,0 +1,14 @@
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+
+; RUN: llvm-as < %s | llc -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
+; PR1473
+
+define i8 @foo(i16 zext %a) zext {
+ %tmp2 = lshr i16 %a, 10 ; <i16> [#uses=1]
+ %tmp23 = trunc i16 %tmp2 to i8 ; <i8> [#uses=1]
+ %tmp4 = shl i8 %tmp23, 1 ; <i8> [#uses=1]
+ %tmp5 = and i8 %tmp4, 2 ; <i8> [#uses=1]
+ ret i8 %tmp5
+}
+
diff --git a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
new file mode 100644
index 0000000..58260ec
--- /dev/null
+++ b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
@@ -0,0 +1,85 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+altivec
+
+ %struct.XATest = type { float, i16, i8, i8 }
+ %struct.XArrayRange = type { i8, i8, i8, i8 }
+ %struct.XBlendMode = type { i16, i16, i16, i16, %struct.GIC4, i16, i16, i8, i8, i8, i8 }
+ %struct.XClearC = type { double, %struct.GIC4, %struct.GIC4, float, i32 }
+ %struct.XClipPlane = type { i32, [6 x %struct.GIC4] }
+ %struct.XCBuffer = type { i16, i16, [8 x i16] }
+ %struct.XCMatrix = type { [16 x float]*, %struct.XICSS }
+ %struct.XConvolution = type { %struct.GIC4, %struct.XICSS, i16, i16, float*, i32, i32 }
+ %struct.XDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+ %struct.XFixedFunctionProgram = type { %struct.PPSToken* }
+ %struct.XFogMode = type { %struct.GIC4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+ %struct.XFramebufferAttachment = type { i32, i32, i32, i32 }
+ %struct.XHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+ %struct.XHistogram = type { %struct.XFramebufferAttachment*, i32, i16, i8, i8 }
+ %struct.XICSS = type { %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2 }
+ %struct.XISubset = type { %struct.XConvolution, %struct.XConvolution, %struct.XConvolution, %struct.XCMatrix, %struct.XMinmax, %struct.XHistogram, %struct.XICSS, %struct.XICSS, %struct.XICSS, %struct.XICSS, i32 }
+ %struct.XLight = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.XPointLineLimits, float, float, float, float, float, %struct.XPointLineLimits, float, float, float, float, float }
+ %struct.XLightModel = type { %struct.GIC4, [8 x %struct.XLight], [2 x %struct.XMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+ %struct.XLightProduct = type { %struct.GIC4, %struct.GIC4, %struct.GIC4 }
+ %struct.XLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+ %struct.XLogicOp = type { i16, i8, i8 }
+ %struct.XMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+ %struct.XMaterial = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, float, float, float, float, [8 x %struct.XLightProduct], %struct.GIC4, [6 x i32], [2 x i32] }
+ %struct.XMinmax = type { %struct.XMinmaxTable*, i16, i8, i8 }
+ %struct.XMinmaxTable = type { %struct.GIC4, %struct.GIC4 }
+ %struct.XMipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+ %struct.XMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+ %struct.XPipelineProgramState = type { i8, i8, i8, i8, %struct.GIC4* }
+ %struct.XPMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+ %struct.XPMode = type { float, float, %struct.XPStore, %struct.XPTransfer, %struct.XPMap, %struct.XISubset, i32, i32 }
+ %struct.XPPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+ %struct.XPStore = type { %struct.XPPack, %struct.XPPack }
+ %struct.XPTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+ %struct.XPointLineLimits = type { float, float, float }
+ %struct.XPointMode = type { float, float, float, float, %struct.XPointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+ %struct.XPGMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+ %struct.XRegisterCCs = type { i8, i8, i8, i8, i32, [2 x %struct.GIC4], [8 x %struct.XRegisterCCsPerStageState], %struct.XRegisterCCsFinalStageState }
+ %struct.XRegisterCCsFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XRegisterCCsPerVariableState] }
+ %struct.XRegisterCCsPerPortionState = type { [4 x %struct.XRegisterCCsPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+ %struct.XRegisterCCsPerStageState = type { [2 x %struct.XRegisterCCsPerPortionState], [2 x %struct.GIC4] }
+ %struct.XRegisterCCsPerVariableState = type { i16, i16, i16, i16 }
+ %struct.XScissorTest = type { %struct.XFramebufferAttachment, i8, i8, i8, i8 }
+ %struct.XState = type { i16, i16, i16, i16, i32, i32, [256 x %struct.GIC4], [128 x %struct.GIC4], %struct.XViewport, %struct.XXF, %struct.XLightModel, %struct.XATest, %struct.XBlendMode, %struct.XClearC, %struct.XCBuffer, %struct.XDepthTest, %struct.XArrayRange, %struct.XFogMode, %struct.XHintMode, %struct.XLineMode, %struct.XLogicOp, %struct.XMaskMode, %struct.XPMode, %struct.XPointMode, %struct.XPGMode, %struct.XScissorTest, i32, %struct.XStencilTest, [16 x %struct.XTMode], %struct.XArrayRange, [8 x %struct.XTCoordGen], %struct.XClipPlane, %struct.XMultisample, %struct.XRegisterCCs, %struct.XArrayRange, %struct.XArrayRange, [3 x %struct.XPipelineProgramState], %struct.XXFFeedback, i32*, %struct.XFixedFunctionProgram, [3 x i32] }
+ %struct.XStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+ %struct.XTCoordGen = type { { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, i8, i8, i8, i8 }
+ %struct.XTGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
+ %struct.XTLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+ %struct.XTMode = type { %struct.GIC4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+ %struct.XTParamState = type { i16, i16, i16, i16, i16, i16, %struct.GIC4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
+ %struct.XTRec = type { %struct.XTState*, float, float, float, float, %struct.XMipmaplevel*, %struct.XMipmaplevel*, i32, i32, i32, i32, i32, i32, i32, [2 x %struct.PPSToken] }
+ %struct.XTState = type { i16, i8, i8, i16, i16, float, i32, %struct.GISWRSurface*, %struct.XTParamState, %struct.XTGeomState, %struct.XTLevel, [6 x [15 x %struct.XTLevel]] }
+ %struct.XXF = type { [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }
+ %struct.XXFFeedback = type { i8, i8, i8, i8, [16 x i32], [16 x i32] }
+ %struct.XViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+ %struct.GIC4 = type { float, float, float, float }
+ %struct.GISWRSurface = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+ %struct.GTCoord2 = type { float, float }
+ %struct.GVMFPContext = type { float, i32, i32, i32, float, [3 x float] }
+ %struct.GVMFPStack = type { [8 x i8*], i8*, i8*, i32, i32, { <4 x float> }, { <4 x float> }, <4 x i32> }
+ %struct.GVMFGAttrib = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] }
+ %struct.GVMTs = type { [16 x %struct.XTRec*] }
+ %struct.PPSToken = type { { i16, i16, i32 } }
+ %struct._GVMConstants = type { <4 x i32>, <4 x i32>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8] }
+
+declare <4 x i32> @llvm.ppc.altivec.lvewx(i8*)
+
+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
+
+define void @test(%struct.XState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._GVMConstants* %cnstn, %struct.PPSToken* %pstrm, %struct.GVMFPContext* %vmctx, %struct.GVMTs* %txtrs, %struct.GVMFPStack* %fpstk, %struct.GVMFGAttrib* %start, %struct.GVMFGAttrib* %deriv, i32 %fragx, i32 %fragy) {
+bb58.i:
+ %tmp3405.i = getelementptr %struct.XTRec* null, i32 0, i32 1 ; <float*> [#uses=1]
+ %tmp34053406.i = bitcast float* %tmp3405.i to i8* ; <i8*> [#uses=1]
+ %tmp3407.i = call <4 x i32> @llvm.ppc.altivec.lvewx( i8* %tmp34053406.i ) ; <<4 x i32>> [#uses=0]
+ %tmp4146.i = call i32 @llvm.ppc.altivec.vcmpequw.p( i32 3, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32> [#uses=1]
+ %tmp4147.i = icmp eq i32 %tmp4146.i, 0 ; <i1> [#uses=1]
+ br i1 %tmp4147.i, label %bb8799.i, label %bb4150.i
+
+bb4150.i: ; preds = %bb58.i
+ br label %bb8799.i
+
+bb8799.i: ; preds = %bb4150.i, %bb58.i
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/Frames-align.ll b/test/CodeGen/PowerPC/Frames-align.ll
new file mode 100644
index 0000000..a7c02cc
--- /dev/null
+++ b/test/CodeGen/PowerPC/Frames-align.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {rlwinm r0, r1, 0, 22, 31}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {subfic r0, r0, -16448}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {rldicl r0, r1, 0, 54}
+
+implementation
+
+int* %f1() {
+ %tmp = alloca int, uint 4095, align 1024
+ ret int* %tmp
+}
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
new file mode 100644
index 0000000..205cf9a
--- /dev/null
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -0,0 +1,55 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {stw r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {stwu r1, -64(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | grep {lwz r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {lwz r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {stw r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {stwu r1, -64(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {lwz r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {lwz r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {std r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {stdu r1, -112(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {ld r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {ld r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {std r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {stdu r1, -112(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {ld r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {ld r31, 40(r1)}
+
+
+implementation
+
+int* %f1(uint %n) {
+ %tmp = alloca int, uint %n
+ ret int* %tmp
+}
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
new file mode 100644
index 0000000..1f58fe0
--- /dev/null
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -0,0 +1,79 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: not grep {stw r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | grep {lis r0, -1}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {ori r0, r0, 32704}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {stwux r1, r1, r0}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {lwz r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: not grep {lwz r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {stw r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {lis r0, -1}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {ori r0, r0, 32704}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {stwux r1, r1, r0}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {lwz r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {lwz r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: not grep {std r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {lis r0, -1}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {ori r0, r0, 32656}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {stdux r1, r1, r0}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {ld r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \
+; RUN: not grep {ld r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {std r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {lis r0, -1}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {ori r0, r0, 32656}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {stdux r1, r1, r0}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {ld r1, 0(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \
+; RUN: grep {ld r31, 40(r1)}
+
+
+implementation
+
+int* %f1() {
+ %tmp = alloca int, uint 8191
+ ret int* %tmp
+}
diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll
new file mode 100644
index 0000000..9de1bde
--- /dev/null
+++ b/test/CodeGen/PowerPC/Frames-leaf.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: not grep {stw r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: not grep {stwu r1, -.*(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: not grep {addi r1, r1, }
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: not grep {lwz r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \
+; RUN: not grep {stw r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \
+; RUN: not grep {stwu r1, -.*(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \
+; RUN: not grep {addi r1, r1, }
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \
+; RUN: not grep {lwz r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \
+; RUN: not grep {std r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \
+; RUN: not grep {stdu r1, -.*(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \
+; RUN: not grep {addi r1, r1, }
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \
+; RUN: not grep {ld r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \
+; RUN: not grep {stw r31, 40(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \
+; RUN: not grep {stdu r1, -.*(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \
+; RUN: not grep {addi r1, r1, }
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \
+; RUN: not grep {ld r31, 40(r1)}
+
+
+implementation
+
+int* %f1() {
+ %tmp = alloca int, uint 2
+ ret int* %tmp
+}
diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll
new file mode 100644
index 0000000..549083a
--- /dev/null
+++ b/test/CodeGen/PowerPC/Frames-small.ll
@@ -0,0 +1,34 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -f
+; RUN not grep {stw r31, 20(r1)} %t1
+; RUN: grep {stwu r1, -16448(r1)} %t1
+; RUN: grep {addi r1, r1, 16448} %t1
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: not grep {lwz r31, 20(r1)}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN: -o %t2 -f
+; RUN: grep {stw r31, 20(r1)} %t2
+; RUN: grep {stwu r1, -16448(r1)} %t2
+; RUN: grep {addi r1, r1, 16448} %t2
+; RUN: grep {lwz r31, 20(r1)} %t2
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 -f
+; RUN: not grep {std r31, 40(r1)} %t3
+; RUN: grep {stdu r1, -16496(r1)} %t3
+; RUN: grep {addi r1, r1, 16496} %t3
+; RUN: not grep {ld r31, 40(r1)} %t3
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN: -o %t4 -f
+; RUN: grep {std r31, 40(r1)} %t4
+; RUN: grep {stdu r1, -16496(r1)} %t4
+; RUN: grep {addi r1, r1, 16496} %t4
+; RUN: grep {ld r31, 40(r1)} %t4
+
+implementation
+
+int* %f1() {
+ %tmp = alloca int, uint 4095
+ ret int* %tmp
+}
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
new file mode 100644
index 0000000..1705379
--- /dev/null
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | \
+; RUN: grep {stw r3, 32751}
+; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: grep {stw r3, 32751}
+; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN: grep {std r2, 9024}
+
+define void @test() {
+ store i32 0, i32* inttoptr (i64 48725999 to i32*)
+ ret void
+}
+
+define void @test2() {
+ store i64 0, i64* inttoptr (i64 74560 to i64*)
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll
new file mode 100644
index 0000000..b268389
--- /dev/null
+++ b/test/CodeGen/PowerPC/addc.ll
@@ -0,0 +1,27 @@
+; All of these should be codegen'd without loading immediates
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
+; RUN: grep addc %t | wc -l | grep 1
+; RUN: grep adde %t | wc -l | grep 1
+; RUN: grep addze %t | wc -l | grep 1
+; RUN: grep addme %t | wc -l | grep 1
+; RUN: grep addic %t | wc -l | grep 2
+
+implementation ; Functions:
+
+long %add_ll(long %a, long %b) {
+entry:
+ %tmp.2 = add long %b, %a ; <long> [#uses=1]
+ ret long %tmp.2
+}
+
+long %add_l_5(long %a) {
+entry:
+ %tmp.1 = add long %a, 5 ; <long> [#uses=1]
+ ret long %tmp.1
+}
+
+long %add_l_m5(long %a) {
+entry:
+ %tmp.1 = add long %a, -5 ; <long> [#uses=1]
+ ret long %tmp.1
+}
diff --git a/test/CodeGen/PowerPC/addi-reassoc.ll b/test/CodeGen/PowerPC/addi-reassoc.ll
new file mode 100644
index 0000000..753f628
--- /dev/null
+++ b/test/CodeGen/PowerPC/addi-reassoc.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep addi
+
+ %struct.X = type { [5 x sbyte] }
+implementation ; Functions:
+
+int %test1([4 x int]* %P, int %i) {
+ %tmp.2 = add int %i, 2 ; <int> [#uses=1]
+ %tmp.4 = getelementptr [4 x int]* %P, int %tmp.2, int 1
+ %tmp.5 = load int* %tmp.4
+ ret int %tmp.5
+}
+
+int %test2(%struct.X* %P, int %i) {
+ %tmp.2 = add int %i, 2
+ %tmp.5 = getelementptr %struct.X* %P, int %tmp.2, uint 0, int 1
+ %tmp.6 = load sbyte* %tmp.5
+ %tmp.7 = cast sbyte %tmp.6 to int
+ ret int %tmp.7
+}
+
diff --git a/test/CodeGen/PowerPC/align.ll b/test/CodeGen/PowerPC/align.ll
new file mode 100644
index 0000000..caf4a5d
--- /dev/null
+++ b/test/CodeGen/PowerPC/align.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep align.4 | wc -l | grep 1
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep align.2 | wc -l | grep 1
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep align.3 | wc -l | grep 1
+
+
+%A = global <4 x uint> < uint 10, uint 20, uint 30, uint 40 >
+%B = global float 1.000000e+02
+%C = global double 2.000000e+03
+
diff --git a/test/CodeGen/PowerPC/and-branch.ll b/test/CodeGen/PowerPC/and-branch.ll
new file mode 100644
index 0000000..4b0e7fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/and-branch.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mfcr
+
+void %foo(int %X, int %Y, int %Z) {
+entry:
+ %tmp = seteq int %X, 0 ; <bool> [#uses=1]
+ %tmp3 = setlt int %Y, 5 ; <bool> [#uses=1]
+ %tmp4 = and bool %tmp3, %tmp ; <bool> [#uses=1]
+ br bool %tmp4, label %cond_true, label %UnifiedReturnBlock
+
+cond_true: ; preds = %entry
+ %tmp5 = tail call int (...)* %bar( ) ; <int> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
+declare int %bar(...)
diff --git a/test/CodeGen/PowerPC/and-elim.ll b/test/CodeGen/PowerPC/and-elim.ll
new file mode 100644
index 0000000..f85b3d8
--- /dev/null
+++ b/test/CodeGen/PowerPC/and-elim.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwin
+
+define void @test(i8* %P) {
+ %W = load i8* %P
+ %X = shl i8 %W, 1
+ %Y = add i8 %X, 2
+ %Z = and i8 %Y, 254 ; dead and
+ store i8 %Z, i8* %P
+ ret void
+}
+
+define i16 @test2(i16 zext %crc) zext {
+ ; No and's should be needed for the i16s here.
+ %tmp.1 = lshr i16 %crc, 1
+ %tmp.7 = xor i16 %tmp.1, 40961
+ ret i16 %tmp.7
+}
+
diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll
new file mode 100644
index 0000000..b1d9fcb
--- /dev/null
+++ b/test/CodeGen/PowerPC/and-imm.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep {ori\\|lis}
+
+int %test(int %X) {
+ %Y = and int %X, 32769 ; andi. r3, r3, 32769
+ ret int %Y
+}
+
+int %test2(int %X) {
+ %Y = and int %X, -2147418112 ; andis. r3, r3, 32769
+ ret int %Y
+}
+
diff --git a/test/CodeGen/PowerPC/and_add.ll b/test/CodeGen/PowerPC/and_add.ll
new file mode 100644
index 0000000..1f6428a
--- /dev/null
+++ b/test/CodeGen/PowerPC/and_add.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
+; RUN: grep slwi %t
+; RUN: not grep addi %t
+; RUN: not grep rlwinm %t
+
+int %test(int %A) {
+ %B = mul int %A, 8 ;; shift
+ %C = add int %B, 7 ;; dead, no demanded bits.
+ %D = and int %C, -8 ;; dead once add is gone.
+ ret int %D
+}
+
diff --git a/test/CodeGen/PowerPC/and_sext.ll b/test/CodeGen/PowerPC/and_sext.ll
new file mode 100644
index 0000000..ac27798
--- /dev/null
+++ b/test/CodeGen/PowerPC/and_sext.ll
@@ -0,0 +1,28 @@
+; These tests should not contain a sign extend.
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsh
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb
+
+define i32 @test1(i32 %mode.0.i.0) {
+ %tmp.79 = trunc i32 %mode.0.i.0 to i16
+ %tmp.80 = sext i16 %tmp.79 to i32
+ %tmp.81 = and i32 %tmp.80, 24
+ ret i32 %tmp.81
+}
+
+define i16 @test2(i16 sext %X, i16 sext %x) sext {
+ %tmp = sext i16 %X to i32
+ %tmp1 = sext i16 %x to i32
+ %tmp2 = add i32 %tmp, %tmp1
+ %tmp4 = ashr i32 %tmp2, 1
+ %tmp5 = trunc i32 %tmp4 to i16
+ %tmp45 = sext i16 %tmp5 to i32
+ %retval = trunc i32 %tmp45 to i16
+ ret i16 %retval
+}
+
+define i16 @test3(i32 zext %X) sext {
+ %tmp1 = lshr i32 %X, 16
+ %tmp2 = trunc i32 %tmp1 to i16
+ ret i16 %tmp2
+}
+
diff --git a/test/CodeGen/PowerPC/and_sra.ll b/test/CodeGen/PowerPC/and_sra.ll
new file mode 100644
index 0000000..abfa9f1
--- /dev/null
+++ b/test/CodeGen/PowerPC/and_sra.ll
@@ -0,0 +1,26 @@
+; Neither of these functions should contain algebraic right shifts
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep srawi
+
+int %test1(uint %mode.0.i.0) {
+ %tmp.79 = cast uint %mode.0.i.0 to int ; <sbyte> [#uses=1]
+ %tmp.80 = shr int %tmp.79, ubyte 15 ; <int> [#uses=1]
+ %tmp.81 = and int %tmp.80, 24 ; <int> [#uses=1]
+ ret int %tmp.81
+}
+
+int %test2(uint %mode.0.i.0) {
+ %tmp.79 = cast uint %mode.0.i.0 to int ; <sbyte> [#uses=1]
+ %tmp.80 = shr int %tmp.79, ubyte 15 ; <int> [#uses=1]
+ %tmp.81 = shr uint %mode.0.i.0, ubyte 16
+ %tmp.82 = cast uint %tmp.81 to int
+ %tmp.83 = and int %tmp.80, %tmp.82 ; <int> [#uses=1]
+ ret int %tmp.83
+}
+
+uint %test3(int %specbits.6.1) {
+ %tmp.2540 = shr int %specbits.6.1, ubyte 11 ; <int> [#uses=1]
+ %tmp.2541 = cast int %tmp.2540 to uint ; <uint> [#uses=1]
+ %tmp.2542 = shl uint %tmp.2541, ubyte 13 ; <uint> [#uses=1]
+ %tmp.2543 = and uint %tmp.2542, 8192 ; <uint> [#uses=1]
+ ret uint %tmp.2543
+}
diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll
new file mode 100644
index 0000000..d239357
--- /dev/null
+++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {addc 4, 4, 6}
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {adde 3, 3, 5}
+
+define i64 @foo(i64 %x, i64 %y) {
+ %z = add i64 %x, %y
+ ret i64 %z
+}
diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll
new file mode 100644
index 0000000..ab136f6
--- /dev/null
+++ b/test/CodeGen/PowerPC/big-endian-call-result.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {addic 4, 4, 1}
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {addze 3, 3}
+
+declare i64 @foo()
+
+define i64 @bar()
+{
+ %t = call i64 @foo()
+ %s = add i64 %t, 1
+ ret i64 %s
+}
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
new file mode 100644
index 0000000..08589f4
--- /dev/null
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {li 6, 3}
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {li 4, 2}
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {li 3, 0}
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN: grep {mr 5, 3}
+
+declare void @bar(i64 %x, i64 %y)
+
+define void @foo() {
+ call void @bar(i64 2, i64 3)
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
new file mode 100644
index 0000000..ab550a3
--- /dev/null
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -0,0 +1,93 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep {b LBB.*} | wc -l | grep 4
+
+target endian = big
+target pointersize = 32
+target triple = "powerpc-apple-darwin8.7.0"
+
+implementation ; Functions:
+
+void %foo(int %W, int %X, int %Y, int %Z) {
+entry:
+ %X = cast int %X to uint ; <uint> [#uses=1]
+ %Y = cast int %Y to uint ; <uint> [#uses=1]
+ %Z = cast int %Z to uint ; <uint> [#uses=1]
+ %W = cast int %W to uint ; <uint> [#uses=1]
+ %tmp1 = and int %W, 1 ; <int> [#uses=1]
+ %tmp1 = seteq int %tmp1, 0 ; <bool> [#uses=1]
+ br bool %tmp1, label %cond_false, label %bb5
+
+bb: ; preds = %bb5, %bb
+ %indvar77 = phi uint [ %indvar.next78, %bb ], [ 0, %bb5 ] ; <uint> [#uses=1]
+ %tmp2 = tail call int (...)* %bar( ) ; <int> [#uses=0]
+ %indvar.next78 = add uint %indvar77, 1 ; <uint> [#uses=2]
+ %exitcond79 = seteq uint %indvar.next78, %X ; <bool> [#uses=1]
+ br bool %exitcond79, label %cond_next48, label %bb
+
+bb5: ; preds = %entry
+ %tmp = seteq int %X, 0 ; <bool> [#uses=1]
+ br bool %tmp, label %cond_next48, label %bb
+
+cond_false: ; preds = %entry
+ %tmp10 = and int %W, 2 ; <int> [#uses=1]
+ %tmp10 = seteq int %tmp10, 0 ; <bool> [#uses=1]
+ br bool %tmp10, label %cond_false20, label %bb16
+
+bb12: ; preds = %bb16, %bb12
+ %indvar72 = phi uint [ %indvar.next73, %bb12 ], [ 0, %bb16 ] ; <uint> [#uses=1]
+ %tmp13 = tail call int (...)* %bar( ) ; <int> [#uses=0]
+ %indvar.next73 = add uint %indvar72, 1 ; <uint> [#uses=2]
+ %exitcond74 = seteq uint %indvar.next73, %Y ; <bool> [#uses=1]
+ br bool %exitcond74, label %cond_next48, label %bb12
+
+bb16: ; preds = %cond_false
+ %tmp18 = seteq int %Y, 0 ; <bool> [#uses=1]
+ br bool %tmp18, label %cond_next48, label %bb12
+
+cond_false20: ; preds = %cond_false
+ %tmp23 = and int %W, 4 ; <int> [#uses=1]
+ %tmp23 = seteq int %tmp23, 0 ; <bool> [#uses=1]
+ br bool %tmp23, label %cond_false33, label %bb29
+
+bb25: ; preds = %bb29, %bb25
+ %indvar67 = phi uint [ %indvar.next68, %bb25 ], [ 0, %bb29 ] ; <uint> [#uses=1]
+ %tmp26 = tail call int (...)* %bar( ) ; <int> [#uses=0]
+ %indvar.next68 = add uint %indvar67, 1 ; <uint> [#uses=2]
+ %exitcond69 = seteq uint %indvar.next68, %Z ; <bool> [#uses=1]
+ br bool %exitcond69, label %cond_next48, label %bb25
+
+bb29: ; preds = %cond_false20
+ %tmp31 = seteq int %Z, 0 ; <bool> [#uses=1]
+ br bool %tmp31, label %cond_next48, label %bb25
+
+cond_false33: ; preds = %cond_false20
+ %tmp36 = and int %W, 8 ; <int> [#uses=1]
+ %tmp36 = seteq int %tmp36, 0 ; <bool> [#uses=1]
+ br bool %tmp36, label %cond_next48, label %bb42
+
+bb38: ; preds = %bb42
+ %tmp39 = tail call int (...)* %bar( ) ; <int> [#uses=0]
+ %indvar.next = add uint %indvar, 1 ; <uint> [#uses=1]
+ br label %bb42
+
+bb42: ; preds = %cond_false33, %bb38
+ %indvar = phi uint [ %indvar.next, %bb38 ], [ 0, %cond_false33 ] ; <uint> [#uses=3]
+ %indvar = cast uint %indvar to int ; <int> [#uses=1]
+ %W_addr.0 = sub int %W, %indvar ; <int> [#uses=1]
+ %exitcond = seteq uint %indvar, %W ; <bool> [#uses=1]
+ br bool %exitcond, label %cond_next48, label %bb38
+
+cond_next48: ; preds = %bb, %bb12, %bb25, %bb42, %cond_false33, %bb29, %bb16, %bb5
+ %W_addr.1 = phi int [ %W, %bb5 ], [ %W, %bb16 ], [ %W, %bb29 ], [ %W, %cond_false33 ], [ %W_addr.0, %bb42 ], [ %W, %bb25 ], [ %W, %bb12 ], [ %W, %bb ] ; <int> [#uses=1]
+ %tmp50 = seteq int %W_addr.1, 0 ; <bool> [#uses=1]
+ br bool %tmp50, label %UnifiedReturnBlock, label %cond_true51
+
+cond_true51: ; preds = %cond_next48
+ %tmp52 = tail call int (...)* %bar( ) ; <int> [#uses=0]
+ ret void
+
+UnifiedReturnBlock: ; preds = %cond_next48
+ ret void
+}
+
+declare int %bar(...)
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
new file mode 100644
index 0000000..0c4a117
--- /dev/null
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -0,0 +1,44 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | wc -l | grep 4
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep rlwinm
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep rlwimi
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \
+; RUN: grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | wc -l | grep 4
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | not grep rlwinm
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | not grep rlwimi
+
+void %STWBRX(uint %i, sbyte* %ptr, int %off) {
+ %tmp1 = getelementptr sbyte* %ptr, int %off
+ %tmp1 = cast sbyte* %tmp1 to uint*
+ %tmp13 = tail call uint %llvm.bswap.i32(uint %i)
+ store uint %tmp13, uint* %tmp1
+ ret void
+}
+
+uint %LWBRX(sbyte* %ptr, int %off) {
+ %tmp1 = getelementptr sbyte* %ptr, int %off
+ %tmp1 = cast sbyte* %tmp1 to uint*
+ %tmp = load uint* %tmp1
+ %tmp14 = tail call uint %llvm.bswap.i32( uint %tmp )
+ ret uint %tmp14
+}
+
+void %STHBRX(ushort %s, sbyte* %ptr, int %off) {
+ %tmp1 = getelementptr sbyte* %ptr, int %off
+ %tmp1 = cast sbyte* %tmp1 to ushort*
+ %tmp5 = call ushort %llvm.bswap.i16( ushort %s )
+ store ushort %tmp5, ushort* %tmp1
+ ret void
+}
+
+ushort %LHBRX(sbyte* %ptr, int %off) {
+ %tmp1 = getelementptr sbyte* %ptr, int %off
+ %tmp1 = cast sbyte* %tmp1 to ushort*
+ %tmp = load ushort* %tmp1
+ %tmp6 = call ushort %llvm.bswap.i16(ushort %tmp)
+ ret ushort %tmp6
+}
+
+declare uint %llvm.bswap.i32(uint)
+
+declare ushort %llvm.bswap.i16(ushort)
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
new file mode 100644
index 0000000..54cbdae
--- /dev/null
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -0,0 +1,27 @@
+; There should be exactly one vxor here.
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: grep vxor | wc -l | grep 1
+
+; There should be exactly one vsplti here.
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN: grep vsplti | wc -l | grep 1
+
+
+void %VXOR(<4 x float>* %P1, <4 x int>* %P2, <4 x float>* %P3) {
+ %tmp = load <4 x float>* %P3
+ %tmp3 = load <4 x float>* %P1
+ %tmp4 = mul <4 x float> %tmp, %tmp3
+ store <4 x float> %tmp4, <4 x float>* %P3
+ store <4 x float> zeroinitializer, <4 x float>* %P1
+ store <4 x int> zeroinitializer, <4 x int>* %P2
+ ret void
+}
+
+void %VSPLTI(<4 x int>* %P2, <8 x short>* %P3) {
+ store <4 x int> cast (<16 x sbyte> < sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1 > to <4 x int>), <4 x int>* %P2
+ store <8 x short> < short -1, short -1, short -1, short -1, short -1, short -1, short -1, short -1 >, <8 x short>* %P3
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll
new file mode 100644
index 0000000..f2a6003
--- /dev/null
+++ b/test/CodeGen/PowerPC/calls.ll
@@ -0,0 +1,31 @@
+; Test various forms of calls.
+
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep {bl } | wc -l | grep 2
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep {bctrl} | wc -l | grep 1
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep {bla } | wc -l | grep 1
+
+declare void %foo()
+
+void %test_direct() {
+ call void %foo()
+ ret void
+}
+
+void %test_extsym(sbyte *%P) {
+ free sbyte* %P
+ ret void
+}
+
+void %test_indirect(void()* %fp) {
+ call void %fp()
+ ret void
+}
+
+void %test_abs() {
+ %fp = cast int 400 to void()*
+ call void %fp()
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/cmp-cmp.ll b/test/CodeGen/PowerPC/cmp-cmp.ll
new file mode 100644
index 0000000..6dbe484
--- /dev/null
+++ b/test/CodeGen/PowerPC/cmp-cmp.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mfcr
+
+void %test(long %X) {
+ %tmp1 = and long %X, 3 ; <long> [#uses=1]
+ %tmp = setgt long %tmp1, 2 ; <bool> [#uses=1]
+ br bool %tmp, label %UnifiedReturnBlock, label %cond_true
+
+cond_true: ; preds = %entry
+ tail call void %test(long 0)
+ ret void
+
+UnifiedReturnBlock: ; preds = %entry
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/compare-duplicate.ll b/test/CodeGen/PowerPC/compare-duplicate.ll
new file mode 100644
index 0000000..df2dfdc
--- /dev/null
+++ b/test/CodeGen/PowerPC/compare-duplicate.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep slwi
+
+define i32 @test(i32 %A, i32 %B) {
+ %C = sub i32 %B, %A
+ %D = icmp eq i32 %C, %A
+ br i1 %D, label %T, label %F
+T:
+ ret i32 19123
+F:
+ ret i32 %C
+}
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
new file mode 100644
index 0000000..b0ef2d3f
--- /dev/null
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {cmpwi cr0, r3, -1}
+
+define i32 @test(i32 %x) {
+ %c = icmp eq i32 %x, -1
+ br i1 %c, label %T, label %F
+T:
+ %A = call i32 @test(i32 123)
+ %B = add i32 %A, 43
+ ret i32 %B
+F:
+ %G = add i32 %x, 1234
+ ret i32 %G
+}
diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll
new file mode 100644
index 0000000..4689a62
--- /dev/null
+++ b/test/CodeGen/PowerPC/constants.ll
@@ -0,0 +1,54 @@
+; All of these routines should be perform optimal load of constants.
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep lis | wc -l | grep 5
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep ori | wc -l | grep 3
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep {li } | wc -l | grep 4
+
+implementation ; Functions:
+
+int %f1() {
+entry:
+ ret int 1
+}
+
+int %f2() {
+entry:
+ ret int -1
+}
+
+int %f3() {
+entry:
+ ret int 0
+}
+
+int %f4() {
+entry:
+ ret int 32767
+}
+
+int %f5() {
+entry:
+ ret int 65535
+}
+
+int %f6() {
+entry:
+ ret int 65536
+}
+
+int %f7() {
+entry:
+ ret int 131071
+}
+
+int %f8() {
+entry:
+ ret int 2147483647
+}
+
+int %f9() {
+entry:
+ ret int -2147483648
+}
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
new file mode 100644
index 0000000..3751d66
--- /dev/null
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -0,0 +1,12 @@
+; Make sure this testcase does not use ctpop
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep -i cntlzw
+
+declare uint %llvm.cttz.i32(uint)
+
+implementation ; Functions:
+
+uint %bar(uint %x) {
+entry:
+ %tmp.1 = call uint %llvm.cttz.i32( uint %x )
+ ret uint %tmp.1
+}
diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll
new file mode 100644
index 0000000..c8bf47c
--- /dev/null
+++ b/test/CodeGen/PowerPC/darwin-labels.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc | grep {foo bar":}
+
+target endian = big
+target pointersize = 32
+target triple = "powerpc-apple-darwin8.2.0"
+
+"foo bar" = global int 4
+
diff --git a/test/CodeGen/PowerPC/dg.exp b/test/CodeGen/PowerPC/dg.exp
new file mode 100644
index 0000000..22b60bc
--- /dev/null
+++ b/test/CodeGen/PowerPC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PowerPC] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,llx,c,cpp,tr}]]
+}
diff --git a/test/CodeGen/PowerPC/div-2.ll b/test/CodeGen/PowerPC/div-2.ll
new file mode 100644
index 0000000..a3cd73c
--- /dev/null
+++ b/test/CodeGen/PowerPC/div-2.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep srawi
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep blr
+
+int %test1(int %X) {
+ %Y = and int %X, 15
+ %Z = div int %Y, 4
+ ret int %Z
+}
+
+int %test2(int %W) {
+ %X = and int %W, 15
+ %Y = sub int 16, %X
+ %Z = div int %Y, 4
+ ret int %Z
+}
+
+int %test3(int %W) {
+ %X = and int %W, 15
+ %Y = sub int 15, %X
+ %Z = div int %Y, 4
+ ret int %Z
+}
+
+int %test4(int %W) {
+ %X = and int %W, 2
+ %Y = sub int 5, %X
+ %Z = div int %Y, 2
+ ret int %Z
+}
diff --git a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
new file mode 100644
index 0000000..359824c
--- /dev/null
+++ b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep eqv | wc -l | grep 3
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
+; RUN: grep andc | wc -l | grep 3
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep orc | wc -l | grep 2
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
+; RUN: grep nor | wc -l | grep 3
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep nand | wc -l | grep 1
+
+int %EQV1(int %X, int %Y) {
+ %A = xor int %X, %Y
+ %B = xor int %A, -1
+ ret int %B
+}
+
+int %EQV2(int %X, int %Y) {
+ %A = xor int %X, -1
+ %B = xor int %A, %Y
+ ret int %B
+}
+
+int %EQV3(int %X, int %Y) {
+ %A = xor int %X, -1
+ %B = xor int %Y, %A
+ ret int %B
+}
+
+int %ANDC1(int %X, int %Y) {
+ %A = xor int %Y, -1
+ %B = and int %X, %A
+ ret int %B
+}
+
+int %ANDC2(int %X, int %Y) {
+ %A = xor int %X, -1
+ %B = and int %A, %Y
+ ret int %B
+}
+
+int %ORC1(int %X, int %Y) {
+ %A = xor int %Y, -1
+ %B = or int %X, %A
+ ret int %B
+}
+
+int %ORC2(int %X, int %Y) {
+ %A = xor int %X, -1
+ %B = or int %A, %Y
+ ret int %B
+}
+
+int %NOR1(int %X) {
+ %Y = xor int %X, -1
+ ret int %Y
+}
+
+int %NOR2(int %X, int %Y) {
+ %Z = or int %X, %Y
+ %R = xor int %Z, -1
+ ret int %R
+}
+
+int %NAND1(int %X, int %Y) {
+ %Z = and int %X, %Y
+ %W = xor int %Z, -1
+ ret int %W
+}
+
+void %VNOR(<4 x float>* %P, <4 x float>* %Q) {
+ %tmp = load <4 x float>* %P
+ %tmp = cast <4 x float> %tmp to <4 x int>
+ %tmp2 = load <4 x float>* %Q
+ %tmp2 = cast <4 x float> %tmp2 to <4 x int>
+ %tmp3 = or <4 x int> %tmp, %tmp2
+ %tmp4 = xor <4 x int> %tmp3, < int -1, int -1, int -1, int -1 >
+ %tmp4 = cast <4 x int> %tmp4 to <4 x float>
+ store <4 x float> %tmp4, <4 x float>* %P
+ ret void
+}
+
+void %VANDC(<4 x float>* %P, <4 x float>* %Q) {
+ %tmp = load <4 x float>* %P
+ %tmp = cast <4 x float> %tmp to <4 x int>
+ %tmp2 = load <4 x float>* %Q
+ %tmp2 = cast <4 x float> %tmp2 to <4 x int>
+ %tmp4 = xor <4 x int> %tmp2, < int -1, int -1, int -1, int -1 >
+ %tmp3 = and <4 x int> %tmp, %tmp4
+ %tmp4 = cast <4 x int> %tmp3 to <4 x float>
+ store <4 x float> %tmp4, <4 x float>* %P
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/extsh.ll b/test/CodeGen/PowerPC/extsh.ll
new file mode 100644
index 0000000..0f4f512
--- /dev/null
+++ b/test/CodeGen/PowerPC/extsh.ll
@@ -0,0 +1,7 @@
+; This should turn into a single extsh
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep extsh | wc -l | grep 1
+int %test(int %X) {
+ %tmp.81 = shl int %X, ubyte 16 ; <int> [#uses=1]
+ %tmp.82 = shr int %tmp.81, ubyte 16 ; <int> [#uses=1]
+ ret int %tmp.82
+}
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
new file mode 100644
index 0000000..da2790b
--- /dev/null
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -0,0 +1,47 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: egrep {fn?madd|fn?msub} | wc -l | grep 8
+
+double %test_FMADD1(double %A, double %B, double %C) {
+ %D = mul double %A, %B
+ %E = add double %D, %C
+ ret double %E
+}
+double %test_FMADD2(double %A, double %B, double %C) {
+ %D = mul double %A, %B
+ %E = add double %D, %C
+ ret double %E
+}
+double %test_FMSUB(double %A, double %B, double %C) {
+ %D = mul double %A, %B
+ %E = sub double %D, %C
+ ret double %E
+}
+double %test_FNMADD1(double %A, double %B, double %C) {
+ %D = mul double %A, %B
+ %E = add double %D, %C
+ %F = sub double -0.0, %E
+ ret double %F
+}
+double %test_FNMADD2(double %A, double %B, double %C) {
+ %D = mul double %A, %B
+ %E = add double %C, %D
+ %F = sub double -0.0, %E
+ ret double %F
+}
+double %test_FNMSUB1(double %A, double %B, double %C) {
+ %D = mul double %A, %B
+ %E = sub double %C, %D
+ ret double %E
+}
+double %test_FNMSUB2(double %A, double %B, double %C) {
+ %D = mul double %A, %B
+ %E = sub double %D, %C
+ %F = sub double -0.0, %E
+ ret double %F
+}
+float %test_FNMSUBS(float %A, float %B, float %C) {
+ %D = mul float %A, %B
+ %E = sub float %D, %C
+ %F = sub float -0.0, %E
+ ret float %F
+}
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
new file mode 100644
index 0000000..5d0ef5f
--- /dev/null
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fnabs
+
+declare double %fabs(double)
+
+implementation
+
+double %test(double %X) {
+ %Y = call double %fabs(double %X)
+ %Z = sub double -0.0, %Y
+ ret double %Z
+}
diff --git a/test/CodeGen/PowerPC/fneg.ll b/test/CodeGen/PowerPC/fneg.ll
new file mode 100644
index 0000000..a4f49f7
--- /dev/null
+++ b/test/CodeGen/PowerPC/fneg.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+
+define double @test1(double %a, double %b, double %c, double %d) {
+entry:
+ %tmp2 = sub double -0.000000e+00, %c ; <double> [#uses=1]
+ %tmp4 = mul double %tmp2, %d ; <double> [#uses=1]
+ %tmp7 = mul double %a, %b ; <double> [#uses=1]
+ %tmp9 = sub double %tmp7, %tmp4 ; <double> [#uses=1]
+ ret double %tmp9
+}
+
+
diff --git a/test/CodeGen/PowerPC/fnegsel.ll b/test/CodeGen/PowerPC/fnegsel.ll
new file mode 100644
index 0000000..b1b0645
--- /dev/null
+++ b/test/CodeGen/PowerPC/fnegsel.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep fneg
+
+double %test_FNEG_sel(double %A, double %B, double %C) {
+ %D = sub double -0.0, %A
+ %Cond = setgt double %D, -0.0
+ %E = select bool %Cond, double %B, double %C
+ ret double %E
+}
diff --git a/test/CodeGen/PowerPC/fold-li.ll b/test/CodeGen/PowerPC/fold-li.ll
new file mode 100644
index 0000000..66a900f
--- /dev/null
+++ b/test/CodeGen/PowerPC/fold-li.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \
+; RUN: grep -v align | not grep li
+
+;; Test that immediates are folded into these instructions correctly.
+
+int %ADD(int %X) {
+ %Y = add int %X, 65537
+ ret int %Y
+}
+
+int %SUB(int %X) {
+ %Y = sub int %X, 65537
+ ret int %Y
+}
diff --git a/test/CodeGen/PowerPC/fp-branch.ll b/test/CodeGen/PowerPC/fp-branch.ll
new file mode 100644
index 0000000..1a371ed
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp-branch.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fcmp | wc -l | grep 1
+
+declare bool %llvm.isunordered.f64(double, double)
+
+bool %intcoord_cond_next55(double %tmp48.reload) {
+newFuncRoot:
+ br label %cond_next55
+
+bb72.exitStub: ; preds = %cond_next55
+ ret bool true
+
+cond_next62.exitStub: ; preds = %cond_next55
+ ret bool false
+
+cond_next55: ; preds = %newFuncRoot
+ %tmp57 = setge double %tmp48.reload, 1.000000e+00 ; <bool> [#uses=1]
+ %tmp58 = tail call bool %llvm.isunordered.f64( double %tmp48.reload, double 1.000000e+00 ) ; <bool> [#uses=1]
+ %tmp59 = or bool %tmp57, %tmp58 ; <bool> [#uses=1]
+ br bool %tmp59, label %bb72.exitStub, label %cond_next62.exitStub
+}
diff --git a/test/CodeGen/PowerPC/fp-int-fp.ll b/test/CodeGen/PowerPC/fp-int-fp.ll
new file mode 100644
index 0000000..63ebc49
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp-int-fp.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep r1
+
+double %test1(double %X) {
+ %Y = cast double %X to long
+ %Z = cast long %Y to double
+ ret double %Z
+}
+
+float %test2(double %X) {
+ %Y = cast double %X to long
+ %Z = cast long %Y to float
+ ret float %Z
+}
+
+double %test3(float %X) {
+ %Y = cast float %X to long
+ %Z = cast long %Y to double
+ ret double %Z
+}
+
+float %test4(float %X) {
+ %Y = cast float %X to long
+ %Z = cast long %Y to float
+ ret float %Z
+}
+
diff --git a/test/CodeGen/PowerPC/fp_to_uint.ll b/test/CodeGen/PowerPC/fp_to_uint.ll
new file mode 100644
index 0000000..83468a4
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp_to_uint.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fctiwz | wc -l | grep 1
+
+implementation
+
+ushort %foo(float %a) {
+entry:
+ %tmp.1 = cast float %a to ushort
+ ret ushort %tmp.1
+}
diff --git a/test/CodeGen/PowerPC/fpcopy.ll b/test/CodeGen/PowerPC/fpcopy.ll
new file mode 100644
index 0000000..ce86da8
--- /dev/null
+++ b/test/CodeGen/PowerPC/fpcopy.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep fmr
+
+double %test(float %F) {
+ %F = cast float %F to double
+ ret double %F
+}
diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll
new file mode 100644
index 0000000..809077b
--- /dev/null
+++ b/test/CodeGen/PowerPC/fsqrt.ll
@@ -0,0 +1,21 @@
+; fsqrt should be generated when the fsqrt feature is enabled, but not
+; otherwise.
+
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
+; RUN: grep {fsqrt f1, f1}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN: grep {fsqrt f1, f1}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
+; RUN: not grep {fsqrt f1, f1}
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
+; RUN: not grep {fsqrt f1, f1}
+
+declare double %llvm.sqrt.f64(double)
+double %X(double %Y) {
+ %Z = call double %llvm.sqrt.f64(double %Y)
+ ret double %Z
+}
diff --git a/test/CodeGen/PowerPC/hello.ll b/test/CodeGen/PowerPC/hello.ll
new file mode 100644
index 0000000..1d7275f
--- /dev/null
+++ b/test/CodeGen/PowerPC/hello.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=ppc32
+; RUN: llvm-as < %s | llc -march=ppc64
+; PR1399
+
+@.str = internal constant [13 x i8] c"Hello World!\00"
+
+define i32 @main() {
+ %tmp2 = tail call i32 @puts( i8* getelementptr ([13 x i8]* @.str, i32 0, i64 0) )
+ ret i32 0
+}
+
+declare i32 @puts(i8*)
diff --git a/test/CodeGen/PowerPC/i64_fp.ll b/test/CodeGen/PowerPC/i64_fp.ll
new file mode 100644
index 0000000..8720327
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64_fp.ll
@@ -0,0 +1,25 @@
+; fcfid and fctid should be generated when the 64bit feature is enabled, but not
+; otherwise.
+
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=+64bit | \
+; RUN: grep fcfid
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=+64bit | \
+; RUN: grep fctidz
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
+; RUN: grep fcfid
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \
+; RUN: grep fctidz
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \
+; RUN: not grep fcfid
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \
+; RUN: not grep fctidz
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g4 | \
+; RUN: not grep fcfid
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g4 | \
+; RUN: not grep fctidz
+
+double %X(double %Y) {
+ %A = cast double %Y to long
+ %B = cast long %A to double
+ ret double %B
+}
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
new file mode 100644
index 0000000..677b41b
--- /dev/null
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \
+; RUN: grep {4 .*Number of machine instrs printed}
+
+;; Integer absolute value, should produce something as good as:
+;; srawi r2, r3, 31
+;; add r3, r3, r2
+;; xor r3, r3, r2
+;; blr
+define i32 @test(i32 %a) {
+ %tmp1neg = sub i32 0, %a
+ %b = icmp sgt i32 %a, -1
+ %abs = select i1 %b, i32 %a, i32 %tmp1neg
+ ret i32 %abs
+}
+
diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll
new file mode 100644
index 0000000..34594d2
--- /dev/null
+++ b/test/CodeGen/PowerPC/inlineasm-copy.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mr
+
+int %test(int %Y, int %X) {
+entry:
+ %tmp = tail call int asm "foo $0", "=r"( ) ; <int> [#uses=1]
+ ret int %tmp
+}
+
+int %test2(int %Y, int %X) {
+entry:
+ %tmp1 = tail call int asm "foo $0, $1", "=r,r"( int %X ) ; <int> [#uses=1]
+ ret int %tmp1
+}
diff --git a/test/CodeGen/PowerPC/inverted-bool-compares.ll b/test/CodeGen/PowerPC/inverted-bool-compares.ll
new file mode 100644
index 0000000..fbbf6a5
--- /dev/null
+++ b/test/CodeGen/PowerPC/inverted-bool-compares.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep xori
+
+int %test(bool %B, int* %P) {
+ br bool %B, label %T, label %F
+T:
+ store int 123, int* %P
+ ret int 0
+F:
+ret int 17
+}
diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll
new file mode 100644
index 0000000..192d738
--- /dev/null
+++ b/test/CodeGen/PowerPC/ispositive.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {srwi r3, r3, 31}
+
+define i32 @test1(i32 %X) {
+entry:
+ icmp slt i32 %X, 0 ; <i1>:0 [#uses=1]
+ zext i1 %0 to i32 ; <i32>:1 [#uses=1]
+ ret i32 %1
+}
+
diff --git a/test/CodeGen/PowerPC/lha.ll b/test/CodeGen/PowerPC/lha.ll
new file mode 100644
index 0000000..cc35e8a
--- /dev/null
+++ b/test/CodeGen/PowerPC/lha.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep lha
+
+uint %test(short* %a) {
+ %tmp.1 = load short* %a
+ %tmp.2 = cast short %tmp.1 to uint
+ ret uint %tmp.2
+}
diff --git a/test/CodeGen/PowerPC/load-constant-addr.ll b/test/CodeGen/PowerPC/load-constant-addr.ll
new file mode 100644
index 0000000..65ec782
--- /dev/null
+++ b/test/CodeGen/PowerPC/load-constant-addr.ll
@@ -0,0 +1,9 @@
+; Should fold the ori into the lfs.
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep lfs
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep ori
+
+float %test() {
+ %tmp.i = load float* cast (uint 186018016 to float*)
+ ret float %tmp.i
+}
+
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
new file mode 100644
index 0000000..7b90725
--- /dev/null
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | grep cntlzw
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep {li }
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep {mr }
+
+define i1 @test(i64 %x) {
+ %tmp = icmp ult i64 %x, 4294967296
+ ret i1 %tmp
+}
diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
new file mode 100644
index 0000000..8aa7aa2
--- /dev/null
+++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep li.*16
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep addi
+
+; Codegen lvx (R+16) as t = li 16, lvx t,R
+; This shares the 16 between the two loads.
+
+void %func(<4 x float>* %a, <4 x float>* %b) {
+ %tmp1 = getelementptr <4 x float>* %b, int 1
+ %tmp = load <4 x float>* %tmp1
+ %tmp3 = getelementptr <4 x float>* %a, int 1
+ %tmp4 = load <4 x float>* %tmp3
+ %tmp5 = mul <4 x float> %tmp, %tmp4
+ %tmp8 = load <4 x float>* %b
+ %tmp9 = add <4 x float> %tmp5, %tmp8
+ store <4 x float> %tmp9, <4 x float>* %a
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
new file mode 100644
index 0000000..4d3ebe9
--- /dev/null
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -0,0 +1,68 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -enable-ppc-preinc | \
+; RUN: not grep addi
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -enable-ppc-preinc | \
+; RUN: not grep addi
+%Glob = global ulong 4
+
+int *%test0(int *%X, int *%dest) {
+ %Y = getelementptr int* %X, int 4
+ %A = load int* %Y
+ store int %A, int* %dest
+ ret int* %Y
+}
+
+int *%test1(int *%X, int *%dest) {
+ %Y = getelementptr int* %X, int 4
+ %A = load int* %Y
+ store int %A, int* %dest
+ ret int* %Y
+}
+
+short *%test2(short *%X, int *%dest) {
+ %Y = getelementptr short* %X, int 4
+ %A = load short* %Y
+ %B = cast short %A to int
+ store int %B, int* %dest
+ ret short* %Y
+}
+
+ushort *%test3(ushort *%X, int *%dest) {
+ %Y = getelementptr ushort* %X, int 4
+ %A = load ushort* %Y
+ %B = cast ushort %A to int
+ store int %B, int* %dest
+ ret ushort* %Y
+}
+
+short *%test3a(short *%X, long *%dest) {
+ %Y = getelementptr short* %X, int 4
+ %A = load short* %Y
+ %B = cast short %A to long
+ store long %B, long* %dest
+ ret short* %Y
+}
+
+long *%test4(long *%X, long *%dest) {
+ %Y = getelementptr long* %X, int 4
+ %A = load long* %Y
+ store long %A, long* %dest
+ ret long* %Y
+}
+
+ushort *%test5(ushort *%X) {
+ %Y = getelementptr ushort* %X, int 4
+ store ushort 7, ushort* %Y
+ ret ushort* %Y
+}
+
+ulong *%test6(ulong *%X, ulong %A) {
+ %Y = getelementptr ulong* %X, int 4
+ store ulong %A, ulong* %Y
+ ret ulong* %Y
+}
+
+ulong *%test7(ulong *%X, ulong %A) {
+ store ulong %A, ulong* %Glob
+ ret ulong *%Glob
+}
+
diff --git a/test/CodeGen/PowerPC/mul-neg-power-2.ll b/test/CodeGen/PowerPC/mul-neg-power-2.ll
new file mode 100644
index 0000000..b9be1cc
--- /dev/null
+++ b/test/CodeGen/PowerPC/mul-neg-power-2.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mul
+
+int %test1(int %a) {
+ %tmp.1 = mul int %a, -2 ; <int> [#uses=1]
+ %tmp.2 = add int %tmp.1, 63 ; <int> [#uses=1]
+ ret int %tmp.2
+}
+
diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll
new file mode 100644
index 0000000..967905d
--- /dev/null
+++ b/test/CodeGen/PowerPC/mulhs.ll
@@ -0,0 +1,18 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
+; RUN: not grep mulhwu %t
+; RUN: not grep srawi %t
+; RUN: not grep add %t
+; RUN: grep mulhw %t | wc -l | grep 1
+
+implementation ; Functions:
+
+int %mulhs(int %a, int %b) {
+entry:
+ %tmp.1 = cast int %a to ulong ; <ulong> [#uses=1]
+ %tmp.3 = cast int %b to ulong ; <ulong> [#uses=1]
+ %tmp.4 = mul ulong %tmp.3, %tmp.1 ; <ulong> [#uses=1]
+ %tmp.6 = shr ulong %tmp.4, ubyte 32 ; <ulong> [#uses=1]
+ %tmp.7 = cast ulong %tmp.6 to int ; <int> [#uses=1]
+ ret int %tmp.7
+}
diff --git a/test/CodeGen/PowerPC/neg.ll b/test/CodeGen/PowerPC/neg.ll
new file mode 100644
index 0000000..7119f6c
--- /dev/null
+++ b/test/CodeGen/PowerPC/neg.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep neg
+
+int %test(int %X) {
+ %Y = sub int 0, %X
+ ret int %Y
+}
diff --git a/test/CodeGen/PowerPC/or-addressing-mode.ll b/test/CodeGen/PowerPC/or-addressing-mode.ll
new file mode 100644
index 0000000..e448140
--- /dev/null
+++ b/test/CodeGen/PowerPC/or-addressing-mode.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc-apple-darwin8 | not grep ori
+; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc-apple-darwin8 | not grep rlwimi
+
+int %test1(sbyte* %P) { ;; or -> lwzx
+ %tmp.2.i = cast sbyte* %P to uint
+ %tmp.4.i = and uint %tmp.2.i, 4294901760
+ %tmp.10.i = shr uint %tmp.2.i, ubyte 5
+ %tmp.11.i = and uint %tmp.10.i, 2040
+ %tmp.13.i = or uint %tmp.11.i, %tmp.4.i
+ %tmp.14.i = cast uint %tmp.13.i to int*
+ %tmp.3 = load int* %tmp.14.i
+ ret int %tmp.3
+}
+
+int %test2(int %P) { ;; or -> lwz
+ %tmp.2 = shl int %P, ubyte 4
+ %tmp.3 = or int %tmp.2, 2
+ %tmp.4 = cast int %tmp.3 to int*
+ %tmp.5 = load int* %tmp.4
+ ret int %tmp.5
+}
+
diff --git a/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
new file mode 100644
index 0000000..60e9458
--- /dev/null
+++ b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep or
+
+%struct.foo = type { int, int, [0 x ubyte] }
+int %test(%struct.foo* %X) {
+ %tmp1 = getelementptr %struct.foo* %X, int 0, uint 2, int 100
+ %tmp = load ubyte* %tmp1 ; <ubyte> [#uses=1]
+ %tmp2 = cast ubyte %tmp to int ; <int> [#uses=1]
+ ret int %tmp2}
+
+
+
diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll
new file mode 100644
index 0000000..8e6b1d6
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep {or }
+
+; Make sure there is no register-register copies here.
+
+void %test1(int *%A, int *%B, int *%D, int* %E) {
+ %A = load int* %A
+ %B = load int* %B
+ %X = and int %A, 15
+ %Y = and int %B, -16
+ %Z = or int %X, %Y
+ store int %Z, int* %D
+ store int %A, int* %E
+ ret void
+}
+
+void %test2(int *%A, int *%B, int *%D, int* %E) {
+ %A = load int* %A
+ %B = load int* %B
+ %X = and int %A, 15
+ %Y = and int %B, -16
+ %Z = or int %X, %Y
+ store int %Z, int* %D
+ store int %B, int* %E
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/rlwimi.ll b/test/CodeGen/PowerPC/rlwimi.ll
new file mode 100644
index 0000000..92afcf9
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwimi.ll
@@ -0,0 +1,72 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep and
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi | wc -l | grep 8
+
+implementation ; Functions:
+
+int %test1(int %x, int %y) {
+entry:
+ %tmp.3 = shl int %x, ubyte 16 ; <int> [#uses=1]
+ %tmp.7 = and int %y, 65535 ; <int> [#uses=1]
+ %tmp.9 = or int %tmp.7, %tmp.3 ; <int> [#uses=1]
+ ret int %tmp.9
+}
+
+int %test2(int %x, int %y) {
+entry:
+ %tmp.7 = and int %x, 65535 ; <int> [#uses=1]
+ %tmp.3 = shl int %y, ubyte 16 ; <int> [#uses=1]
+ %tmp.9 = or int %tmp.7, %tmp.3 ; <int> [#uses=1]
+ ret int %tmp.9
+}
+
+uint %test3(uint %x, uint %y) {
+entry:
+ %tmp.3 = shr uint %x, ubyte 16 ; <uint> [#uses=1]
+ %tmp.6 = and uint %y, 4294901760 ; <uint> [#uses=1]
+ %tmp.7 = or uint %tmp.6, %tmp.3 ; <uint> [#uses=1]
+ ret uint %tmp.7
+}
+
+uint %test4(uint %x, uint %y) {
+entry:
+ %tmp.6 = and uint %x, 4294901760 ; <uint> [#uses=1]
+ %tmp.3 = shr uint %y, ubyte 16 ; <uint> [#uses=1]
+ %tmp.7 = or uint %tmp.6, %tmp.3 ; <uint> [#uses=1]
+ ret uint %tmp.7
+}
+
+int %test5(int %x, int %y) {
+entry:
+ %tmp.3 = shl int %x, ubyte 1 ; <int> [#uses=1]
+ %tmp.4 = and int %tmp.3, -65536 ; <int> [#uses=1]
+ %tmp.7 = and int %y, 65535 ; <int> [#uses=1]
+ %tmp.9 = or int %tmp.4, %tmp.7 ; <int> [#uses=1]
+ ret int %tmp.9
+}
+
+int %test6(int %x, int %y) {
+entry:
+ %tmp.7 = and int %x, 65535 ; <int> [#uses=1]
+ %tmp.3 = shl int %y, ubyte 1 ; <int> [#uses=1]
+ %tmp.4 = and int %tmp.3, -65536 ; <int> [#uses=1]
+ %tmp.9 = or int %tmp.4, %tmp.7 ; <int> [#uses=1]
+ ret int %tmp.9
+}
+
+int %test7(int %x, int %y) {
+entry:
+ %tmp.2 = and int %x, -65536 ; <int> [#uses=1]
+ %tmp.5 = and int %y, 65535 ; <int> [#uses=1]
+ %tmp.7 = or int %tmp.5, %tmp.2 ; <int> [#uses=1]
+ ret int %tmp.7
+}
+
+uint %test8(uint %bar) {
+entry:
+ %tmp.3 = shl uint %bar, ubyte 1 ; <uint> [#uses=1]
+ %tmp.4 = and uint %tmp.3, 2 ; <uint> [#uses=1]
+ %tmp.6 = and uint %bar, 4294967293 ; <uint> [#uses=1]
+ %tmp.7 = or uint %tmp.4, %tmp.6 ; <uint> [#uses=1]
+ ret uint %tmp.7
+}
diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll
new file mode 100644
index 0000000..c264d2e
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwimi2.ll
@@ -0,0 +1,31 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
+; RUN: grep rlwimi %t | wc -l | grep 3
+; RUN: grep srwi %t | wc -l | grep 1
+; RUN: not grep slwi %t
+
+implementation ; Functions:
+
+ushort %test1(uint %srcA, uint %srcB, uint %alpha) {
+entry:
+ %tmp.1 = shl uint %srcA, ubyte 15 ; <uint> [#uses=1]
+ %tmp.4 = and uint %tmp.1, 32505856 ; <uint> [#uses=1]
+ %tmp.6 = and uint %srcA, 31775 ; <uint> [#uses=1]
+ %tmp.7 = or uint %tmp.4, %tmp.6 ; <uint> [#uses=1]
+ %tmp.9 = shl uint %srcB, ubyte 15 ; <uint> [#uses=1]
+ %tmp.12 = and uint %tmp.9, 32505856 ; <uint> [#uses=1]
+ %tmp.14 = and uint %srcB, 31775 ; <uint> [#uses=1]
+ %tmp.15 = or uint %tmp.12, %tmp.14 ; <uint> [#uses=1]
+ %tmp.18 = mul uint %tmp.7, %alpha ; <uint> [#uses=1]
+ %tmp.20 = sub uint 32, %alpha ; <uint> [#uses=1]
+ %tmp.22 = mul uint %tmp.15, %tmp.20 ; <uint> [#uses=1]
+ %tmp.23 = add uint %tmp.22, %tmp.18 ; <uint> [#uses=2]
+ %tmp.27 = shr uint %tmp.23, ubyte 5 ; <uint> [#uses=1]
+ %tmp.28 = cast uint %tmp.27 to ushort ; <ushort> [#uses=1]
+ %tmp.29 = and ushort %tmp.28, 31775 ; <ushort> [#uses=1]
+ %tmp.33 = shr uint %tmp.23, ubyte 20 ; <uint> [#uses=1]
+ %tmp.34 = cast uint %tmp.33 to ushort ; <ushort> [#uses=1]
+ %tmp.35 = and ushort %tmp.34, 992 ; <ushort> [#uses=1]
+ %tmp.36 = or ushort %tmp.29, %tmp.35 ; <ushort> [#uses=1]
+ ret ushort %tmp.36
+}
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
new file mode 100644
index 0000000..b313ef9
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -stats |& \
+; RUN: grep {Number of machine instrs printed} | grep 12
+
+ushort %Trans16Bit(uint %srcA, uint %srcB, uint %alpha) {
+ %tmp1 = shl uint %srcA, ubyte 15 ; <uint> [#uses=1]
+ %tmp2 = and uint %tmp1, 32505856 ; <uint> [#uses=1]
+ %tmp4 = and uint %srcA, 31775 ; <uint> [#uses=1]
+ %tmp5 = or uint %tmp2, %tmp4 ; <uint> [#uses=1]
+ %tmp7 = shl uint %srcB, ubyte 15 ; <uint> [#uses=1]
+ %tmp8 = and uint %tmp7, 32505856 ; <uint> [#uses=1]
+ %tmp10 = and uint %srcB, 31775 ; <uint> [#uses=1]
+ %tmp11 = or uint %tmp8, %tmp10 ; <uint> [#uses=1]
+ %tmp14 = mul uint %tmp5, %alpha ; <uint> [#uses=1]
+ %tmp16 = sub uint 32, %alpha ; <uint> [#uses=1]
+ %tmp18 = mul uint %tmp11, %tmp16 ; <uint> [#uses=1]
+ %tmp19 = add uint %tmp18, %tmp14 ; <uint> [#uses=2]
+ %tmp21 = shr uint %tmp19, ubyte 5 ; <uint> [#uses=1]
+ %tmp21 = cast uint %tmp21 to ushort ; <ushort> [#uses=1]
+ %tmp = and ushort %tmp21, 31775 ; <ushort> [#uses=1]
+ %tmp23 = shr uint %tmp19, ubyte 20 ; <uint> [#uses=1]
+ %tmp23 = cast uint %tmp23 to ushort ; <ushort> [#uses=1]
+ %tmp24 = and ushort %tmp23, 992 ; <ushort> [#uses=1]
+ %tmp25 = or ushort %tmp, %tmp24 ; <ushort> [#uses=1]
+ ret ushort %tmp25
+}
+
diff --git a/test/CodeGen/PowerPC/rlwinm.ll b/test/CodeGen/PowerPC/rlwinm.ll
new file mode 100644
index 0000000..32e8f26
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwinm.ll
@@ -0,0 +1,64 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
+; RUN: not grep and %t
+; RUN: not grep srawi %t
+; RUN: not grep srwi %t
+; RUN: not grep slwi %t
+; RUN: grep rlwinm %t | wc -l | grep 8
+
+implementation ; Functions:
+
+int %test1(int %a) {
+entry:
+ %tmp.1 = and int %a, 268431360 ; <int> [#uses=1]
+ ret int %tmp.1
+}
+
+int %test2(int %a) {
+entry:
+ %tmp.1 = and int %a, -268435441 ; <int> [#uses=1]
+ ret int %tmp.1
+}
+
+int %test3(int %a) {
+entry:
+ %tmp.2 = shr int %a, ubyte 8 ; <int> [#uses=1]
+ %tmp.3 = and int %tmp.2, 255 ; <int> [#uses=1]
+ ret int %tmp.3
+}
+
+uint %test4(uint %a) {
+entry:
+ %tmp.3 = shr uint %a, ubyte 8 ; <uint> [#uses=1]
+ %tmp.4 = and uint %tmp.3, 255 ; <uint> [#uses=1]
+ ret uint %tmp.4
+}
+
+int %test5(int %a) {
+entry:
+ %tmp.2 = shl int %a, ubyte 8 ; <int> [#uses=1]
+ %tmp.3 = and int %tmp.2, -8388608 ; <int> [#uses=1]
+ ret int %tmp.3
+}
+
+int %test6(int %a) {
+entry:
+ %tmp.1 = and int %a, 65280 ; <int> [#uses=1]
+ %tmp.2 = shr int %tmp.1, ubyte 8 ; <uint> [#uses=1]
+ ret int %tmp.2
+}
+
+uint %test7(uint %a) {
+entry:
+ %tmp.1 = and uint %a, 65280 ; <uint> [#uses=1]
+ %tmp.2 = shr uint %tmp.1, ubyte 8 ; <uint> [#uses=1]
+ ret uint %tmp.2
+}
+
+int %test8(int %a) {
+entry:
+ %tmp.1 = and int %a, 16711680 ; <int> [#uses=1]
+ %tmp.2 = shl int %tmp.1, ubyte 8 ; <int> [#uses=1]
+ ret int %tmp.2
+}
+
diff --git a/test/CodeGen/PowerPC/rlwinm2.ll b/test/CodeGen/PowerPC/rlwinm2.ll
new file mode 100644
index 0000000..78127f1
--- /dev/null
+++ b/test/CodeGen/PowerPC/rlwinm2.ll
@@ -0,0 +1,28 @@
+; All of these ands and shifts should be folded into rlw[i]nm instructions
+; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f
+; RUN: not grep and %t
+; RUN: not grep srawi %t
+; RUN: not grep srwi %t
+; RUN: not grep slwi %t
+; RUN: grep rlwnm %t | wc -l | grep 1
+; RUN: grep rlwinm %t | wc -l | grep 1
+
+define i32 @test1(i32 %X, i32 %Y) {
+entry:
+ %tmp = trunc i32 %Y to i8 ; <i8> [#uses=2]
+ %tmp1 = shl i32 %X, %Y ; <i32> [#uses=1]
+ %tmp2 = sub i32 32, %Y ; <i8> [#uses=1]
+ %tmp3 = lshr i32 %X, %tmp2 ; <i32> [#uses=1]
+ %tmp4 = or i32 %tmp1, %tmp3 ; <i32> [#uses=1]
+ %tmp6 = and i32 %tmp4, 127 ; <i32> [#uses=1]
+ ret i32 %tmp6
+}
+
+define i32 @test2(i32 %X) {
+entry:
+ %tmp1 = lshr i32 %X, 27 ; <i32> [#uses=1]
+ %tmp2 = shl i32 %X, 5 ; <i32> [#uses=1]
+ %tmp2.masked = and i32 %tmp2, 96 ; <i32> [#uses=1]
+ %tmp5 = or i32 %tmp1, %tmp2.masked ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
diff --git a/test/CodeGen/PowerPC/rotl-2.ll b/test/CodeGen/PowerPC/rotl-2.ll
new file mode 100644
index 0000000..523b5e4
--- /dev/null
+++ b/test/CodeGen/PowerPC/rotl-2.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwinm | wc -l | grep 4
+; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwnm | wc -l | grep 2
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep or
+
+define i32 @rotl32(i32 %A, i8 %Amt) {
+ %shift.upgrd.1 = zext i8 %Amt to i32 ; <i32> [#uses=1]
+ %B = shl i32 %A, %shift.upgrd.1 ; <i32> [#uses=1]
+ %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1]
+ %shift.upgrd.2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1]
+ %C = lshr i32 %A, %shift.upgrd.2 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotr32(i32 %A, i8 %Amt) {
+ %shift.upgrd.3 = zext i8 %Amt to i32 ; <i32> [#uses=1]
+ %B = lshr i32 %A, %shift.upgrd.3 ; <i32> [#uses=1]
+ %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1]
+ %shift.upgrd.4 = zext i8 %Amt2 to i32 ; <i32> [#uses=1]
+ %C = shl i32 %A, %shift.upgrd.4 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotli32(i32 %A) {
+ %B = shl i32 %A, 5 ; <i32> [#uses=1]
+ %C = lshr i32 %A, 27 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
+define i32 @rotri32(i32 %A) {
+ %B = lshr i32 %A, 5 ; <i32> [#uses=1]
+ %C = shl i32 %A, 27 ; <i32> [#uses=1]
+ %D = or i32 %B, %C ; <i32> [#uses=1]
+ ret i32 %D
+}
+
diff --git a/test/CodeGen/PowerPC/rotl.ll b/test/CodeGen/PowerPC/rotl.ll
new file mode 100644
index 0000000..aa033cf
--- /dev/null
+++ b/test/CodeGen/PowerPC/rotl.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwnm | wc -l | grep 2
+; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwinm | wc -l | grep 2
+
+define i32 @rotlw(i32 %x, i32 %sh) {
+entry:
+ %tmp.7 = sub i32 32, %sh ; <i32> [#uses=1]
+ %tmp.10 = lshr i32 %x, %tmp.7 ; <i32> [#uses=2]
+ %tmp.4 = shl i32 %x, %sh ; <i32> [#uses=1]
+ %tmp.12 = or i32 %tmp.10, %tmp.4 ; <i32> [#uses=1]
+ ret i32 %tmp.12
+}
+
+define i32 @rotrw(i32 %x, i32 %sh) {
+entry:
+ %tmp.3 = trunc i32 %sh to i8 ; <i8> [#uses=1]
+ %tmp.4 = lshr i32 %x, %sh ; <i32> [#uses=2]
+ %tmp.7 = sub i32 32, %sh ; <i32> [#uses=1]
+ %tmp.10 = shl i32 %x, %tmp.7 ; <i32> [#uses=1]
+ %tmp.12 = or i32 %tmp.4, %tmp.10 ; <i32> [#uses=1]
+ ret i32 %tmp.12
+}
+
+define i32 @rotlwi(i32 %x) {
+entry:
+ %tmp.7 = lshr i32 %x, 27 ; <i32> [#uses=2]
+ %tmp.3 = shl i32 %x, 5 ; <i32> [#uses=1]
+ %tmp.9 = or i32 %tmp.3, %tmp.7 ; <i32> [#uses=1]
+ ret i32 %tmp.9
+}
+
+define i32 @rotrwi(i32 %x) {
+entry:
+ %tmp.3 = lshr i32 %x, 5 ; <i32> [#uses=2]
+ %tmp.7 = shl i32 %x, 27 ; <i32> [#uses=1]
+ %tmp.9 = or i32 %tmp.3, %tmp.7 ; <i32> [#uses=1]
+ ret i32 %tmp.9
+}
diff --git a/test/CodeGen/PowerPC/select_lt0.ll b/test/CodeGen/PowerPC/select_lt0.ll
new file mode 100644
index 0000000..bb5213f
--- /dev/null
+++ b/test/CodeGen/PowerPC/select_lt0.ll
@@ -0,0 +1,51 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep cmp
+
+int %seli32_1(int %a) {
+entry:
+ %tmp.1 = setlt int %a, 0
+ %retval = select bool %tmp.1, int 5, int 0
+ ret int %retval
+}
+
+int %seli32_2(int %a, int %b) {
+entry:
+ %tmp.1 = setlt int %a, 0
+ %retval = select bool %tmp.1, int %b, int 0
+ ret int %retval
+}
+
+int %seli32_3(int %a, short %b) {
+entry:
+ %tmp.2 = cast short %b to int
+ %tmp.1 = setlt int %a, 0
+ %retval = select bool %tmp.1, int %tmp.2, int 0
+ ret int %retval
+}
+
+int %seli32_4(int %a, ushort %b) {
+entry:
+ %tmp.2 = cast ushort %b to int
+ %tmp.1 = setlt int %a, 0
+ %retval = select bool %tmp.1, int %tmp.2, int 0
+ ret int %retval
+}
+
+short %seli16_1(short %a) {
+entry:
+ %tmp.1 = setlt short %a, 0
+ %retval = select bool %tmp.1, short 7, short 0
+ ret short %retval
+}
+
+short %seli16_2(int %a, short %b) {
+ %tmp.1 = setlt int %a, 0
+ %retval = select bool %tmp.1, short %b, short 0
+ ret short %retval
+}
+
+int %seli32_a_a(int %a) {
+ %tmp = setlt int %a, 1
+ %min = select bool %tmp, int %a, int 0
+ ret int %min
+}
+
diff --git a/test/CodeGen/PowerPC/setcc_no_zext.ll b/test/CodeGen/PowerPC/setcc_no_zext.ll
new file mode 100644
index 0000000..00e9bf0
--- /dev/null
+++ b/test/CodeGen/PowerPC/setcc_no_zext.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep rlwinm
+
+int %setcc_one_or_zero(int* %a) {
+entry:
+ %tmp.1 = setne int* %a, null
+ %inc.1 = cast bool %tmp.1 to int
+ ret int %inc.1
+}
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
new file mode 100644
index 0000000..a574100
--- /dev/null
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN: grep {srwi r., r., 5}
+
+int %eq0(int %a) {
+ %tmp.1 = seteq int %a, 0 ; <bool> [#uses=1]
+ %tmp.2 = cast bool %tmp.1 to int ; <int> [#uses=1]
+ ret int %tmp.2
+}
diff --git a/test/CodeGen/PowerPC/shl_elim.ll b/test/CodeGen/PowerPC/shl_elim.ll
new file mode 100644
index 0000000..3dc4772
--- /dev/null
+++ b/test/CodeGen/PowerPC/shl_elim.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep slwi
+
+define i32 @test1(i64 %a) {
+ %tmp29 = lshr i64 %a, 24 ; <i64> [#uses=1]
+ %tmp23 = trunc i64 %tmp29 to i32 ; <i32> [#uses=1]
+ %tmp410 = lshr i32 %tmp23, 9 ; <i32> [#uses=1]
+ %tmp45 = trunc i32 %tmp410 to i16 ; <i16> [#uses=1]
+ %tmp456 = sext i16 %tmp45 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp456
+}
+
diff --git a/test/CodeGen/PowerPC/shl_sext.ll b/test/CodeGen/PowerPC/shl_sext.ll
new file mode 100644
index 0000000..af18338
--- /dev/null
+++ b/test/CodeGen/PowerPC/shl_sext.ll
@@ -0,0 +1,17 @@
+; This test should not contain a sign extend
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep extsb
+
+int %test(uint %mode.0.i.0) {
+ %tmp.79 = cast uint %mode.0.i.0 to sbyte ; <sbyte> [#uses=1]
+ %tmp.80 = cast sbyte %tmp.79 to int ; <int> [#uses=1]
+ %tmp.81 = shl int %tmp.80, ubyte 24 ; <int> [#uses=1]
+ ret int %tmp.81
+}
+
+int %test2(uint %mode.0.i.0) {
+ %tmp.79 = cast uint %mode.0.i.0 to sbyte ; <sbyte> [#uses=1]
+ %tmp.80 = cast sbyte %tmp.79 to int ; <int> [#uses=1]
+ %tmp.81 = shl int %tmp.80, ubyte 16 ; <int> [#uses=1]
+ %tmp.82 = and int %tmp.81, 16711680
+ ret int %tmp.82
+}
diff --git a/test/CodeGen/PowerPC/sign_ext_inreg1.ll b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
new file mode 100644
index 0000000..0e67f77
--- /dev/null
+++ b/test/CodeGen/PowerPC/sign_ext_inreg1.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | grep srwi
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi
+
+define i32 @baz(i64 %a) {
+ %tmp29 = lshr i64 %a, 24 ; <i64> [#uses=1]
+ %tmp23 = trunc i64 %tmp29 to i32 ; <i32> [#uses=1]
+ %tmp410 = lshr i32 %tmp23, 9 ; <i32> [#uses=1]
+ %tmp45 = trunc i32 %tmp410 to i16 ; <i16> [#uses=1]
+ %tmp456 = sext i16 %tmp45 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp456
+}
+
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
new file mode 100644
index 0000000..e512047
--- /dev/null
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -0,0 +1,52 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep {extsh\\|rlwinm}
+
+declare i16 @foo() sext
+
+define i32 @test1(i16 sext %X) {
+ %Y = sext i16 %X to i32 ;; dead
+ ret i32 %Y
+}
+
+define i32 @test2(i16 zext %X) {
+ %Y = sext i16 %X to i32
+ %Z = and i32 %Y, 65535 ;; dead
+ ret i32 %Z
+}
+
+define void @test3() {
+ %tmp.0 = call i16 @foo() sext ;; no extsh!
+ %tmp.1 = icmp slt i16 %tmp.0, 1234
+ br i1 %tmp.1, label %then, label %UnifiedReturnBlock
+
+then:
+ call i32 @test1(i16 0 sext)
+ ret void
+UnifiedReturnBlock:
+ ret void
+}
+
+define i32 @test4(i16* %P) {
+ %tmp.1 = load i16* %P
+ %tmp.2 = zext i16 %tmp.1 to i32
+ %tmp.3 = and i32 %tmp.2, 255
+ ret i32 %tmp.3
+}
+
+define i32 @test5(i16* %P) {
+ %tmp.1 = load i16* %P
+ %tmp.2 = bitcast i16 %tmp.1 to i16
+ %tmp.3 = zext i16 %tmp.2 to i32
+ %tmp.4 = and i32 %tmp.3, 255
+ ret i32 %tmp.4
+}
+
+define i32 @test6(i32* %P) {
+ %tmp.1 = load i32* %P
+ %tmp.2 = and i32 %tmp.1, 255
+ ret i32 %tmp.2
+}
+
+define i16 @test7(float %a) zext {
+ %tmp.1 = fptoui float %a to i16
+ ret i16 %tmp.1
+}
diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll
new file mode 100644
index 0000000..2eebc07
--- /dev/null
+++ b/test/CodeGen/PowerPC/stfiwx.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1 -f
+; RUN: grep stfiwx %t1
+; RUN: not grep r1 %t1
+; RUN: llvm-upgrade < %s | llvm-as | \
+; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
+; RUN: -o %t2 -f
+; RUN: not grep stfiwx %t2
+; RUN: grep r1 %t2
+
+void %test(float %a, int* %b) {
+ %tmp.2 = cast float %a to int
+ store int %tmp.2, int* %b
+ ret void
+}
+
+void %test2(float %a, int* %b, int %i) {
+ %tmp.2 = getelementptr int* %b, int 1
+ %tmp.5 = getelementptr int* %b, int %i
+ %tmp.7 = cast float %a to int
+ store int %tmp.7, int* %tmp.5
+ store int %tmp.7, int* %tmp.2
+ store int %tmp.7, int* %b
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/store-load-fwd.ll b/test/CodeGen/PowerPC/store-load-fwd.ll
new file mode 100644
index 0000000..761fb5a
--- /dev/null
+++ b/test/CodeGen/PowerPC/store-load-fwd.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep lwz
+int %test(int* %P) {
+ store int 1, int* %P
+ %V = load int* %P
+ ret int %V
+}
diff --git a/test/CodeGen/PowerPC/subc.ll b/test/CodeGen/PowerPC/subc.ll
new file mode 100644
index 0000000..3624791
--- /dev/null
+++ b/test/CodeGen/PowerPC/subc.ll
@@ -0,0 +1,26 @@
+; All of these should be codegen'd without loading immediates
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f
+; RUN: grep subfc %t | wc -l | grep 1
+; RUN: grep subfe %t | wc -l | grep 1
+; RUN: grep subfze %t | wc -l | grep 1
+; RUN: grep subfme %t | wc -l | grep 1
+; RUN: grep subfic %t | wc -l | grep 2
+implementation ; Functions:
+
+long %sub_ll(long %a, long %b) {
+entry:
+ %tmp.2 = sub long %a, %b ; <long> [#uses=1]
+ ret long %tmp.2
+}
+
+long %sub_l_5(long %a) {
+entry:
+ %tmp.1 = sub long 5, %a ; <long> [#uses=1]
+ ret long %tmp.1
+}
+
+long %sub_l_m5(long %a) {
+entry:
+ %tmp.1 = sub long -5, %a ; <long> [#uses=1]
+ ret long %tmp.1
+}
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
new file mode 100644
index 0000000..770dcb6
--- /dev/null
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=ppc32 | grep fmul | wc -l | grep 2
+; RUN: llvm-as < %s | llc -march=ppc32 -enable-unsafe-fp-math | \
+; RUN: grep fmul | wc -l | grep 1
+
+define double @foo(double %X) {
+ %tmp1 = mul double %X, 1.23
+ %tmp2 = mul double %tmp1, 4.124
+ ret double %tmp2
+}
+
diff --git a/test/CodeGen/PowerPC/vcmp-fold.ll b/test/CodeGen/PowerPC/vcmp-fold.ll
new file mode 100644
index 0000000..6ae41a9
--- /dev/null
+++ b/test/CodeGen/PowerPC/vcmp-fold.ll
@@ -0,0 +1,21 @@
+; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single
+; "vcmpbfp.".
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vcmpbfp | wc -l | grep 1
+
+void %test(<4 x float>* %x, <4 x float>* %y, int* %P) {
+entry:
+ %tmp = load <4 x float>* %x ; <<4 x float>> [#uses=1]
+ %tmp2 = load <4 x float>* %y ; <<4 x float>> [#uses=1]
+ %tmp = call int %llvm.ppc.altivec.vcmpbfp.p( int 1, <4 x float> %tmp, <4 x float> %tmp2 ) ; <int> [#uses=1]
+ %tmp4 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
+ %tmp6 = load <4 x float>* %y ; <<4 x float>> [#uses=1]
+ %tmp = call <4 x int> %llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 ) ; <<4 x int>> [#uses=1]
+ %tmp7 = cast <4 x int> %tmp to <4 x float> ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp7, <4 x float>* %x
+ store int %tmp, int* %P
+ ret void
+}
+
+declare int %llvm.ppc.altivec.vcmpbfp.p(int, <4 x float>, <4 x float>)
+
+declare <4 x int> %llvm.ppc.altivec.vcmpbfp(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/PowerPC/vec_br_cmp.ll b/test/CodeGen/PowerPC/vec_br_cmp.ll
new file mode 100644
index 0000000..bc60bae
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_br_cmp.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: grep vcmpeqfp. %t
+; RUN: not grep mfcr %t
+
+; A predicate compare used immediately by a branch should not generate an mfcr.
+
+void %test(<4 x float>* %A, <4 x float>* %B) {
+ %tmp = load <4 x float>* %A
+ %tmp3 = load <4 x float>* %B
+ %tmp = tail call int %llvm.ppc.altivec.vcmpeqfp.p( int 1, <4 x float> %tmp, <4 x float> %tmp3 )
+ %tmp = seteq int %tmp, 0
+ br bool %tmp, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:
+ store <4 x float> zeroinitializer, <4 x float>* %B
+ ret void
+
+UnifiedReturnBlock:
+ ret void
+}
+
+declare int %llvm.ppc.altivec.vcmpeqfp.p(int, <4 x float>, <4 x float>)
+
diff --git a/test/CodeGen/PowerPC/vec_call.ll b/test/CodeGen/PowerPC/vec_call.ll
new file mode 100644
index 0000000..b2b91fe
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_call.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5
+
+<4 x int> %test_arg(<4 x int> %A, <4 x int> %B) {
+ %C = add <4 x int> %A, %B
+ ret <4 x int> %C
+}
+
+<4 x int> %foo() {
+ %X = call <4 x int> %test_arg(<4 x int> zeroinitializer, <4 x int> zeroinitializer)
+ ret <4 x int> %X
+}
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
new file mode 100644
index 0000000..507d2d9
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -0,0 +1,47 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep CPI
+
+
+; Tests spltw(0x80000000) and spltw(0x7FFFFFFF).
+void %test1(<4 x int>* %P1, <4 x int>* %P2, <4 x float>* %P3) {
+ %tmp = load <4 x int>* %P1
+ %tmp4 = and <4 x int> %tmp, < int -2147483648, int -2147483648, int -2147483648, int -2147483648 >
+ store <4 x int> %tmp4, <4 x int>* %P1
+ %tmp7 = load <4 x int>* %P2
+ %tmp9 = and <4 x int> %tmp7, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 >
+ store <4 x int> %tmp9, <4 x int>* %P2
+ %tmp = load <4 x float>* %P3
+ %tmp11 = cast <4 x float> %tmp to <4 x int>
+ %tmp12 = and <4 x int> %tmp11, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 >
+ %tmp13 = cast <4 x int> %tmp12 to <4 x float>
+ store <4 x float> %tmp13, <4 x float>* %P3
+ ret void
+}
+
+<4 x int> %test_30() {
+ ret <4 x int> <int 30, int 30, int 30, int 30>
+}
+
+<4 x int> %test_29() {
+ ret <4 x int> <int 29, int 29, int 29, int 29>
+}
+
+<8 x short> %test_n30() {
+ ret <8 x short> <short -30, short -30, short -30, short -30,
+ short -30, short -30, short -30, short -30>
+}
+
+<16 x sbyte> %test_n104() {
+ ret <16 x sbyte> <sbyte -104, sbyte -104, sbyte -104, sbyte -104,
+ sbyte -104, sbyte -104, sbyte -104, sbyte -104,
+ sbyte -104, sbyte -104, sbyte -104, sbyte -104,
+ sbyte -104, sbyte -104, sbyte -104, sbyte -104>
+}
+
+<4 x int> %test_vsldoi() {
+ ret <4 x int> <int 512, int 512, int 512, int 512>
+}
+
+<4 x int> %test_rol() {
+ ret <4 x int> <int -11534337, int -11534337, int -11534337, int -11534337>
+}
+
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
new file mode 100644
index 0000000..eea1def
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep mullw
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vmsumuhm
+
+<4 x int> %test_v4i32(<4 x int>* %X, <4 x int>* %Y) {
+ %tmp = load <4 x int>* %X
+ %tmp2 = load <4 x int>* %Y
+ %tmp3 = mul <4 x int> %tmp, %tmp2
+ ret <4 x int> %tmp3
+}
+
+<8 x short> %test_v8i16(<8 x short>* %X, <8 x short>* %Y) {
+ %tmp = load <8 x short>* %X
+ %tmp2 = load <8 x short>* %Y
+ %tmp3 = mul <8 x short> %tmp, %tmp2
+ ret <8 x short> %tmp3
+}
+
+<16 x sbyte> %test_v16i8(<16 x sbyte>* %X, <16 x sbyte>* %Y) {
+ %tmp = load <16 x sbyte>* %X
+ %tmp2 = load <16 x sbyte>* %Y
+ %tmp3 = mul <16 x sbyte> %tmp, %tmp2
+ ret <16 x sbyte> %tmp3
+}
+
diff --git a/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
new file mode 100644
index 0000000..6177b5f
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
@@ -0,0 +1,42 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vperm
+
+<4 x float> %test_uu72(<4 x float> *%P1, <4 x float> *%P2) {
+ %V1 = load <4 x float> *%P1
+ %V2 = load <4 x float> *%P2
+ ; vmrglw + vsldoi
+ %V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
+ <4 x uint> <uint undef, uint undef, uint 7, uint 2>
+ ret <4 x float> %V3
+}
+
+<4 x float> %test_30u5(<4 x float> *%P1, <4 x float> *%P2) {
+ %V1 = load <4 x float> *%P1
+ %V2 = load <4 x float> *%P2
+ %V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
+ <4 x uint> <uint 3, uint 0, uint undef, uint 5>
+ ret <4 x float> %V3
+}
+
+<4 x float> %test_3u73(<4 x float> *%P1, <4 x float> *%P2) {
+ %V1 = load <4 x float> *%P1
+ %V2 = load <4 x float> *%P2
+ %V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
+ <4 x uint> <uint 3, uint undef, uint 7, uint 3>
+ ret <4 x float> %V3
+}
+
+<4 x float> %test_3774(<4 x float> *%P1, <4 x float> *%P2) {
+ %V1 = load <4 x float> *%P1
+ %V2 = load <4 x float> *%P2
+ %V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
+ <4 x uint> <uint 3, uint 7, uint 7, uint 4>
+ ret <4 x float> %V3
+}
+
+<4 x float> %test_4450(<4 x float> *%P1, <4 x float> *%P2) {
+ %V1 = load <4 x float> *%P1
+ %V2 = load <4 x float> *%P2
+ %V3 = shufflevector <4 x float> %V1, <4 x float> %V2,
+ <4 x uint> <uint 4, uint 4, uint 5, uint 0>
+ ret <4 x float> %V3
+}
diff --git a/test/CodeGen/PowerPC/vec_shuffle.ll b/test/CodeGen/PowerPC/vec_shuffle.ll
new file mode 100644
index 0000000..ba856ee
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -0,0 +1,506 @@
+; RUN: llvm-upgrade < %s | llvm-as | opt -instcombine | \
+; RUN: llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 > %t
+; RUN: grep vsldoi %t | wc -l | grep 2
+; RUN: grep vmrgh %t | wc -l | grep 7
+; RUN: grep vmrgl %t | wc -l | grep 6
+; RUN: grep vpkuhum %t | wc -l | grep 1
+; RUN: grep vpkuwum %t | wc -l | grep 1
+
+void %VSLDOI_xy(<8 x short>* %A, <8 x short>* %B) {
+entry:
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
+ %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
+ %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
+ %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
+ %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
+ %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
+ %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
+ %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
+ %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
+ %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
+ %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
+ %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
+ %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
+ %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
+ %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
+ %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
+ %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
+ %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
+ %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
+ %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
+ %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
+ %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
+ %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
+ %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
+ %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
+ %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
+ %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
+ %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
+ %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
+ %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
+ %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
+ %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
+ %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
+ %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
+ %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp33, <8 x short>* %A
+ ret void
+}
+
+void %VSLDOI_xx(<8 x short>* %A, <8 x short>* %B) {
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
+ %tmp2 = load <8 x short>* %A ; <<8 x short>> [#uses=1]
+ %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11]
+ %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5]
+ %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
+ %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
+ %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
+ %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
+ %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
+ %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
+ %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
+ %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
+ %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
+ %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
+ %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
+ %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
+ %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
+ %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
+ %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
+ %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
+ %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
+ %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
+ %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
+ %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
+ %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
+ %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
+ %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
+ %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
+ %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
+ %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
+ %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
+ %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
+ %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
+ %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
+ %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp33, <8 x short>* %A
+ ret void
+}
+
+void %VPERM_promote(<8 x short>* %A, <8 x short>* %B) {
+entry:
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1]
+ %tmp = cast <8 x short> %tmp to <4 x int> ; <<4 x int>> [#uses=1]
+ %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1]
+ %tmp2 = cast <8 x short> %tmp2 to <4 x int> ; <<4 x int>> [#uses=1]
+ %tmp3 = call <4 x int> %llvm.ppc.altivec.vperm( <4 x int> %tmp, <4 x int> %tmp2, <16 x sbyte> < sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14 > ) ; <<4 x int>> [#uses=1]
+ %tmp3 = cast <4 x int> %tmp3 to <8 x short> ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp3, <8 x short>* %A
+ ret void
+}
+
+declare <4 x int> %llvm.ppc.altivec.vperm(<4 x int>, <4 x int>, <16 x sbyte>)
+
+
+void %tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
+entry:
+ %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
+ %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
+ %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
+ %tmp3 = extractelement <16 x sbyte> %tmp2, uint 8 ; <sbyte> [#uses=1]
+ %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
+ %tmp5 = extractelement <16 x sbyte> %tmp2, uint 9 ; <sbyte> [#uses=1]
+ %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
+ %tmp7 = extractelement <16 x sbyte> %tmp2, uint 10 ; <sbyte> [#uses=1]
+ %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
+ %tmp9 = extractelement <16 x sbyte> %tmp2, uint 11 ; <sbyte> [#uses=1]
+ %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
+ %tmp11 = extractelement <16 x sbyte> %tmp2, uint 12 ; <sbyte> [#uses=1]
+ %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
+ %tmp13 = extractelement <16 x sbyte> %tmp2, uint 13 ; <sbyte> [#uses=1]
+ %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
+ %tmp15 = extractelement <16 x sbyte> %tmp2, uint 14 ; <sbyte> [#uses=1]
+ %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
+ %tmp17 = extractelement <16 x sbyte> %tmp2, uint 15 ; <sbyte> [#uses=1]
+ %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
+ %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
+ %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
+ %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
+ %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
+ %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
+ %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
+ %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
+ %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
+ %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
+ %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
+ %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
+ %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
+ %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
+ %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
+ store <16 x sbyte> %tmp33, <16 x sbyte>* %A
+ ret void
+}
+
+void %th_l(<8 x short>* %A, <8 x short>* %B) {
+entry:
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
+ %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
+ %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
+ %tmp3 = extractelement <8 x short> %tmp2, uint 4 ; <short> [#uses=1]
+ %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
+ %tmp5 = extractelement <8 x short> %tmp2, uint 5 ; <short> [#uses=1]
+ %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
+ %tmp7 = extractelement <8 x short> %tmp2, uint 6 ; <short> [#uses=1]
+ %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
+ %tmp9 = extractelement <8 x short> %tmp2, uint 7 ; <short> [#uses=1]
+ %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
+ %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
+ %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
+ %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
+ %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
+ %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
+ %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
+ %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp17, <8 x short>* %A
+ ret void
+}
+
+void %tw_l(<4 x int>* %A, <4 x int>* %B) {
+entry:
+ %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
+ %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
+ %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
+ %tmp3 = extractelement <4 x int> %tmp2, uint 2 ; <int> [#uses=1]
+ %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
+ %tmp5 = extractelement <4 x int> %tmp2, uint 3 ; <int> [#uses=1]
+ %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
+ %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
+ %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
+ %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
+ store <4 x int> %tmp9, <4 x int>* %A
+ ret void
+}
+
+void %tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
+entry:
+ %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8]
+ %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8]
+ %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
+ %tmp3 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1]
+ %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
+ %tmp5 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1]
+ %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
+ %tmp7 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1]
+ %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
+ %tmp9 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1]
+ %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
+ %tmp11 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1]
+ %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
+ %tmp13 = extractelement <16 x sbyte> %tmp2, uint 5 ; <sbyte> [#uses=1]
+ %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
+ %tmp15 = extractelement <16 x sbyte> %tmp2, uint 6 ; <sbyte> [#uses=1]
+ %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
+ %tmp17 = extractelement <16 x sbyte> %tmp2, uint 7 ; <sbyte> [#uses=1]
+ %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
+ %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
+ %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
+ %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
+ %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
+ %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
+ %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
+ %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
+ %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
+ %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
+ %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
+ %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
+ %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
+ %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
+ %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
+ store <16 x sbyte> %tmp33, <16 x sbyte>* %A
+ ret void
+}
+
+void %th_h(<8 x short>* %A, <8 x short>* %B) {
+entry:
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4]
+ %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4]
+ %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
+ %tmp3 = extractelement <8 x short> %tmp2, uint 0 ; <short> [#uses=1]
+ %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
+ %tmp5 = extractelement <8 x short> %tmp2, uint 1 ; <short> [#uses=1]
+ %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
+ %tmp7 = extractelement <8 x short> %tmp2, uint 2 ; <short> [#uses=1]
+ %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
+ %tmp9 = extractelement <8 x short> %tmp2, uint 3 ; <short> [#uses=1]
+ %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
+ %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
+ %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
+ %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
+ %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
+ %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
+ %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
+ %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp17, <8 x short>* %A
+ ret void
+}
+
+void %tw_h(<4 x int>* %A, <4 x int>* %B) {
+entry:
+ %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
+ %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
+ %tmp = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
+ %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
+ %tmp4 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
+ %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
+ %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
+ %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
+ %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
+ %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
+ store <4 x int> %tmp9, <4 x int>* %A
+ ret void
+}
+
+void %tw_h_flop(<4 x int>* %A, <4 x int>* %B) {
+ %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
+ %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2]
+ %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
+ %tmp3 = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1]
+ %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
+ %tmp5 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1]
+ %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
+ %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
+ %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
+ %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
+ store <4 x int> %tmp9, <4 x int>* %A
+ ret void
+}
+
+
+void %VMRG_UNARY_tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) {
+entry:
+ %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
+ %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
+ %tmp3 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1]
+ %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
+ %tmp5 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
+ %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
+ %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1]
+ %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
+ %tmp9 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
+ %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
+ %tmp11 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1]
+ %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
+ %tmp13 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
+ %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
+ %tmp15 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1]
+ %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
+ %tmp17 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
+ %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
+ %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
+ %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
+ %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
+ %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
+ %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
+ %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
+ %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
+ %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
+ %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
+ %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
+ %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
+ %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
+ %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
+ %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
+ store <16 x sbyte> %tmp33, <16 x sbyte>* %A
+ ret void
+}
+
+void %VMRG_UNARY_th_l(<8 x short>* %A, <8 x short>* %B) {
+entry:
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
+ %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
+ %tmp3 = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1]
+ %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
+ %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
+ %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
+ %tmp7 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1]
+ %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
+ %tmp9 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
+ %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
+ %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
+ %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
+ %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
+ %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
+ %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
+ %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
+ %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp17, <8 x short>* %A
+ ret void
+}
+
+void %VMRG_UNARY_tw_l(<4 x int>* %A, <4 x int>* %B) {
+entry:
+ %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
+ %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
+ %tmp3 = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1]
+ %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
+ %tmp5 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1]
+ %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
+ %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
+ %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
+ %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
+ store <4 x int> %tmp9, <4 x int>* %A
+ ret void
+}
+
+void %VMRG_UNARY_tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) {
+entry:
+ %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16]
+ %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
+ %tmp3 = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1]
+ %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
+ %tmp5 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
+ %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
+ %tmp7 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1]
+ %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
+ %tmp9 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
+ %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
+ %tmp11 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1]
+ %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
+ %tmp13 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
+ %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
+ %tmp15 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1]
+ %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
+ %tmp17 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
+ %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
+ %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1]
+ %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1]
+ %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1]
+ %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1]
+ %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1]
+ %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1]
+ %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1]
+ %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1]
+ %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1]
+ %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1]
+ %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1]
+ %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1]
+ %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1]
+ %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1]
+ store <16 x sbyte> %tmp33, <16 x sbyte>* %A
+ ret void
+}
+
+void %VMRG_UNARY_th_h(<8 x short>* %A, <8 x short>* %B) {
+entry:
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8]
+ %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
+ %tmp3 = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1]
+ %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
+ %tmp5 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
+ %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
+ %tmp7 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1]
+ %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
+ %tmp9 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
+ %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
+ %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1]
+ %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1]
+ %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1]
+ %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1]
+ %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1]
+ %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1]
+ %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp17, <8 x short>* %A
+ ret void
+}
+
+void %VMRG_UNARY_tw_h(<4 x int>* %A, <4 x int>* %B) {
+entry:
+ %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4]
+ %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
+ %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1]
+ %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
+ %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1]
+ %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1]
+ %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1]
+ %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1]
+ %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1]
+ store <4 x int> %tmp9, <4 x int>* %A
+ ret void
+}
+
+void %VPCKUHUM_unary(<8 x short>* %A, <8 x short>* %B) {
+entry:
+ %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=2]
+ %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
+ %tmp3 = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8]
+ %tmp = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1]
+ %tmp4 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1]
+ %tmp5 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1]
+ %tmp6 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1]
+ %tmp7 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1]
+ %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1]
+ %tmp9 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1]
+ %tmp10 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1]
+ %tmp11 = extractelement <16 x sbyte> %tmp3, uint 1 ; <sbyte> [#uses=1]
+ %tmp12 = extractelement <16 x sbyte> %tmp3, uint 3 ; <sbyte> [#uses=1]
+ %tmp13 = extractelement <16 x sbyte> %tmp3, uint 5 ; <sbyte> [#uses=1]
+ %tmp14 = extractelement <16 x sbyte> %tmp3, uint 7 ; <sbyte> [#uses=1]
+ %tmp15 = extractelement <16 x sbyte> %tmp3, uint 9 ; <sbyte> [#uses=1]
+ %tmp16 = extractelement <16 x sbyte> %tmp3, uint 11 ; <sbyte> [#uses=1]
+ %tmp17 = extractelement <16 x sbyte> %tmp3, uint 13 ; <sbyte> [#uses=1]
+ %tmp18 = extractelement <16 x sbyte> %tmp3, uint 15 ; <sbyte> [#uses=1]
+ %tmp19 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1]
+ %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 1 ; <<16 x sbyte>> [#uses=1]
+ %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 2 ; <<16 x sbyte>> [#uses=1]
+ %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 3 ; <<16 x sbyte>> [#uses=1]
+ %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 4 ; <<16 x sbyte>> [#uses=1]
+ %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 5 ; <<16 x sbyte>> [#uses=1]
+ %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 6 ; <<16 x sbyte>> [#uses=1]
+ %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 7 ; <<16 x sbyte>> [#uses=1]
+ %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 8 ; <<16 x sbyte>> [#uses=1]
+ %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 9 ; <<16 x sbyte>> [#uses=1]
+ %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 10 ; <<16 x sbyte>> [#uses=1]
+ %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 11 ; <<16 x sbyte>> [#uses=1]
+ %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 12 ; <<16 x sbyte>> [#uses=1]
+ %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 13 ; <<16 x sbyte>> [#uses=1]
+ %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 14 ; <<16 x sbyte>> [#uses=1]
+ %tmp34 = insertelement <16 x sbyte> %tmp33, sbyte %tmp18, uint 15 ; <<16 x sbyte>> [#uses=1]
+ %tmp34 = cast <16 x sbyte> %tmp34 to <8 x short> ; <<8 x short>> [#uses=1]
+ store <8 x short> %tmp34, <8 x short>* %A
+ ret void
+}
+
+void %VPCKUWUM_unary(<4 x int>* %A, <4 x int>* %B) {
+entry:
+ %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2]
+ %tmp = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
+ %tmp3 = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4]
+ %tmp = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1]
+ %tmp4 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1]
+ %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1]
+ %tmp6 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1]
+ %tmp7 = extractelement <8 x short> %tmp3, uint 1 ; <short> [#uses=1]
+ %tmp8 = extractelement <8 x short> %tmp3, uint 3 ; <short> [#uses=1]
+ %tmp9 = extractelement <8 x short> %tmp3, uint 5 ; <short> [#uses=1]
+ %tmp10 = extractelement <8 x short> %tmp3, uint 7 ; <short> [#uses=1]
+ %tmp11 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1]
+ %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 1 ; <<8 x short>> [#uses=1]
+ %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 2 ; <<8 x short>> [#uses=1]
+ %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 3 ; <<8 x short>> [#uses=1]
+ %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 4 ; <<8 x short>> [#uses=1]
+ %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 5 ; <<8 x short>> [#uses=1]
+ %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 6 ; <<8 x short>> [#uses=1]
+ %tmp18 = insertelement <8 x short> %tmp17, short %tmp10, uint 7 ; <<8 x short>> [#uses=1]
+ %tmp18 = cast <8 x short> %tmp18 to <4 x int> ; <<4 x int>> [#uses=1]
+ store <4 x int> %tmp18, <4 x int>* %A
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/vec_spat.ll b/test/CodeGen/PowerPC/vec_spat.ll
new file mode 100644
index 0000000..15e2950
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_spat.ll
@@ -0,0 +1,73 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g3 | \
+; RUN: grep stfs | wc -l | grep 4
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: grep vspltw %t | wc -l | grep 2
+; RUN: grep vsplti %t | wc -l | grep 3
+; RUN: grep vsplth %t | wc -l | grep 1
+
+%f4 = type <4 x float>
+%i4 = type <4 x int>
+
+implementation
+
+void %splat(%f4* %P, %f4* %Q, float %X) {
+ %tmp = insertelement %f4 undef, float %X, uint 0
+ %tmp2 = insertelement %f4 %tmp, float %X, uint 1
+ %tmp4 = insertelement %f4 %tmp2, float %X, uint 2
+ %tmp6 = insertelement %f4 %tmp4, float %X, uint 3
+ %q = load %f4* %Q
+ %R = add %f4 %q, %tmp6
+ store %f4 %R, %f4* %P
+ ret void
+}
+
+void %splat_i4(%i4* %P, %i4* %Q, int %X) {
+ %tmp = insertelement %i4 undef, int %X, uint 0
+ %tmp2 = insertelement %i4 %tmp, int %X, uint 1
+ %tmp4 = insertelement %i4 %tmp2, int %X, uint 2
+ %tmp6 = insertelement %i4 %tmp4, int %X, uint 3
+ %q = load %i4* %Q
+ %R = add %i4 %q, %tmp6
+ store %i4 %R, %i4* %P
+ ret void
+}
+
+void %splat_imm_i32(%i4* %P, %i4* %Q, int %X) {
+ %q = load %i4* %Q
+ %R = add %i4 %q, <int -1, int -1, int -1, int -1>
+ store %i4 %R, %i4* %P
+ ret void
+}
+
+void %splat_imm_i16(%i4* %P, %i4* %Q, int %X) {
+ %q = load %i4* %Q
+ %R = add %i4 %q, <int 65537, int 65537, int 65537, int 65537>
+ store %i4 %R, %i4* %P
+ ret void
+}
+
+void %splat_h(short %tmp, <16 x ubyte>* %dst) {
+ %tmp = insertelement <8 x short> undef, short %tmp, uint 0
+ %tmp72 = insertelement <8 x short> %tmp, short %tmp, uint 1
+ %tmp73 = insertelement <8 x short> %tmp72, short %tmp, uint 2
+ %tmp74 = insertelement <8 x short> %tmp73, short %tmp, uint 3
+ %tmp75 = insertelement <8 x short> %tmp74, short %tmp, uint 4
+ %tmp76 = insertelement <8 x short> %tmp75, short %tmp, uint 5
+ %tmp77 = insertelement <8 x short> %tmp76, short %tmp, uint 6
+ %tmp78 = insertelement <8 x short> %tmp77, short %tmp, uint 7
+ %tmp78 = cast <8 x short> %tmp78 to <16 x ubyte>
+ store <16 x ubyte> %tmp78, <16 x ubyte>* %dst
+ ret void
+}
+
+void %spltish(<16 x ubyte>* %A, <16 x ubyte>* %B) {
+ ; Gets converted to 16 x ubyte
+ %tmp = load <16 x ubyte>* %B
+ %tmp.s = cast <16 x ubyte> %tmp to <16 x sbyte>
+ %tmp4 = sub <16 x sbyte> %tmp.s, cast (<8 x short> < short 15, short 15, short 15, short 15, short 15, short 15, short 15, short 15 > to <16 x sbyte>)
+ %tmp4.u = cast <16 x sbyte> %tmp4 to <16 x ubyte>
+ store <16 x ubyte> %tmp4.u, <16 x ubyte>* %A
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/vec_vrsave.ll b/test/CodeGen/PowerPC/vec_vrsave.ll
new file mode 100644
index 0000000..63e3eba
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_vrsave.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f
+; RUN: grep vrlw %t
+; RUN: not grep spr %t
+; RUN: not grep vrsave %t
+
+<4 x int> %test_rol() {
+ ret <4 x int> < int -11534337, int -11534337, int -11534337, int -11534337 >
+}
+
+<4 x int> %test_arg(<4 x int> %A, <4 x int> %B) {
+ %C = add <4 x int> %A, %B
+ ret <4 x int> %C
+}
+
diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll
new file mode 100644
index 0000000..c845c0e
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_zero.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vxor
+
+void %foo(<4 x float> *%P) {
+ %T = load <4 x float> * %P
+ %S = add <4 x float> zeroinitializer, %T
+ store <4 x float> %S, <4 x float>* %P
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
new file mode 100644
index 0000000..af5cc02
--- /dev/null
+++ b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep test:
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vperm
+
+void %test(<4 x float> *%tmp2.i) {
+ %tmp2.i = load <4x float>* %tmp2.i
+ %xFloat0.48 = extractelement <4 x float> %tmp2.i, uint 0 ; <float> [#uses=1]
+ %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, uint 0 ; <<4 x float>> [#uses=1]
+ %xFloat1.50 = extractelement <4 x float> %tmp2.i, uint 1 ; <float> [#uses=1]
+ %inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, uint 1 ; <<4 x float>> [#uses=1]
+ %xFloat2.53 = extractelement <4 x float> %tmp2.i, uint 2 ; <float> [#uses=1]
+ %inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, uint 2 ; <<4 x float>> [#uses=1]
+ %xFloat3.56 = extractelement <4 x float> %tmp2.i, uint 3 ; <float> [#uses=1]
+ %inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, uint 3 ; <<4 x float>> [#uses=4]
+ store <4 x float> %inFloat3.58, <4x float>* %tmp2.i
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
new file mode 100644
index 0000000..f8dbbb0
--- /dev/null
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -0,0 +1,157 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5
+; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g3
+
+%f1 = type <1 x float>
+%f2 = type <2 x float>
+%f4 = type <4 x float>
+%i4 = type <4 x int>
+%f8 = type <8 x float>
+%d8 = type <8 x double>
+
+implementation
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+void %test_f1(%f1 *%P, %f1* %Q, %f1 *%S) {
+ %p = load %f1 *%P
+ %q = load %f1* %Q
+ %R = add %f1 %p, %q
+ store %f1 %R, %f1 *%S
+ ret void
+}
+
+void %test_f2(%f2 *%P, %f2* %Q, %f2 *%S) {
+ %p = load %f2* %P
+ %q = load %f2* %Q
+ %R = add %f2 %p, %q
+ store %f2 %R, %f2 *%S
+ ret void
+}
+
+void %test_f4(%f4 *%P, %f4* %Q, %f4 *%S) {
+ %p = load %f4* %P
+ %q = load %f4* %Q
+ %R = add %f4 %p, %q
+ store %f4 %R, %f4 *%S
+ ret void
+}
+
+void %test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
+ %p = load %f8* %P
+ %q = load %f8* %Q
+ %R = add %f8 %p, %q
+ store %f8 %R, %f8 *%S
+ ret void
+}
+
+void %test_fmul(%f8 *%P, %f8* %Q, %f8 *%S) {
+ %p = load %f8* %P
+ %q = load %f8* %Q
+ %R = mul %f8 %p, %q
+ store %f8 %R, %f8 *%S
+ ret void
+}
+
+void %test_div(%f8 *%P, %f8* %Q, %f8 *%S) {
+ %p = load %f8* %P
+ %q = load %f8* %Q
+ %R = div %f8 %p, %q
+ store %f8 %R, %f8 *%S
+ ret void
+}
+
+;;; TEST VECTOR CONSTRUCTS
+
+void %test_cst(%f4 *%P, %f4 *%S) {
+ %p = load %f4* %P
+ %R = add %f4 %p, <float 0.1, float 1.0, float 2.0, float 4.5>
+ store %f4 %R, %f4 *%S
+ ret void
+}
+
+void %test_zero(%f4 *%P, %f4 *%S) {
+ %p = load %f4* %P
+ %R = add %f4 %p, zeroinitializer
+ store %f4 %R, %f4 *%S
+ ret void
+}
+
+void %test_undef(%f4 *%P, %f4 *%S) {
+ %p = load %f4* %P
+ %R = add %f4 %p, undef
+ store %f4 %R, %f4 *%S
+ ret void
+}
+
+void %test_constant_insert(%f4 *%S) {
+ %R = insertelement %f4 zeroinitializer, float 10.0, uint 0
+ store %f4 %R, %f4 *%S
+ ret void
+}
+
+void %test_variable_buildvector(float %F, %f4 *%S) {
+ %R = insertelement %f4 zeroinitializer, float %F, uint 0
+ store %f4 %R, %f4 *%S
+ ret void
+}
+
+void %test_scalar_to_vector(float %F, %f4 *%S) {
+ %R = insertelement %f4 undef, float %F, uint 0 ;; R = scalar_to_vector F
+ store %f4 %R, %f4 *%S
+ ret void
+}
+
+float %test_extract_elt(%f8 *%P) {
+ %p = load %f8* %P
+ %R = extractelement %f8 %p, uint 3
+ ret float %R
+}
+
+double %test_extract_elt2(%d8 *%P) {
+ %p = load %d8* %P
+ %R = extractelement %d8 %p, uint 3
+ ret double %R
+}
+
+void %test_cast_1(<4 x float>* %b, <4 x int>* %a) {
+ %tmp = load <4 x float>* %b
+ %tmp2 = add <4 x float> %tmp, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %tmp3 = cast <4 x float> %tmp2 to <4 x int>
+ %tmp4 = add <4 x int> %tmp3, <int 1, int 2, int 3, int 4>
+ store <4 x int> %tmp4, <4 x int>* %a
+ ret void
+}
+
+void %test_cast_2(<8 x float>* %a, <8 x int>* %b) {
+ %T = load <8 x float>* %a
+ %T2 = cast <8 x float> %T to <8 x int>
+ store <8 x int> %T2, <8 x int>* %b
+ ret void
+}
+
+
+;;; TEST IMPORTANT IDIOMS
+
+void %splat(%f4* %P, %f4* %Q, float %X) {
+ %tmp = insertelement %f4 undef, float %X, uint 0
+ %tmp2 = insertelement %f4 %tmp, float %X, uint 1
+ %tmp4 = insertelement %f4 %tmp2, float %X, uint 2
+ %tmp6 = insertelement %f4 %tmp4, float %X, uint 3
+ %q = load %f4* %Q
+ %R = add %f4 %q, %tmp6
+ store %f4 %R, %f4* %P
+ ret void
+}
+
+void %splat_i4(%i4* %P, %i4* %Q, int %X) {
+ %tmp = insertelement %i4 undef, int %X, uint 0
+ %tmp2 = insertelement %i4 %tmp, int %X, uint 1
+ %tmp4 = insertelement %i4 %tmp2, int %X, uint 2
+ %tmp6 = insertelement %i4 %tmp4, int %X, uint 3
+ %q = load %i4* %Q
+ %R = add %i4 %q, %tmp6
+ store %i4 %R, %i4* %P
+ ret void
+}
+