diff options
Diffstat (limited to 'test/CodeGen/PowerPC')
134 files changed, 5485 insertions, 0 deletions
diff --git a/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll new file mode 100644 index 0000000..e2a00d1 --- /dev/null +++ b/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll @@ -0,0 +1,6 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +void %main() { + %tr1 = shr uint 1, ubyte 0 + ret void +} diff --git a/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll new file mode 100644 index 0000000..4603bdb --- /dev/null +++ b/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll @@ -0,0 +1,6 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +void %main() { + %tr4 = shl ulong 1, ubyte 0 ; <ulong> [#uses=0] + ret void +} diff --git a/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll new file mode 100644 index 0000000..8f54c78 --- /dev/null +++ b/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll @@ -0,0 +1,7 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +void %main() { + %shamt = add ubyte 0, 1 ; <ubyte> [#uses=1] + %tr2 = shr long 1, ubyte %shamt ; <long> [#uses=0] + ret void +} diff --git a/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll new file mode 100644 index 0000000..87f6005 --- /dev/null +++ b/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll @@ -0,0 +1,3 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep .comm.*X,0 + +%X = linkonce global {} {} diff --git a/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll new file mode 100644 index 0000000..5dc4b28 --- /dev/null +++ b/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll @@ -0,0 +1,8 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +int %main() { + %setle = setle long 1, 0 + %select = select bool true, bool %setle, bool true + ret int 0 +} + diff --git a/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll new file mode 100644 index 0000000..a4121c5 --- /dev/null +++ b/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll @@ -0,0 +1,3 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +long %test() { ret long undef } diff --git a/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll new file mode 100644 index 0000000..ef0137f --- /dev/null +++ b/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll @@ -0,0 +1,12 @@ +; this should not crash the ppc backend + +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +uint %test( int %j.0.0.i) { + %tmp.85.i = and int %j.0.0.i, 7 + %tmp.161278.i = cast int %tmp.85.i to uint + %tmp.5.i77.i = shr uint %tmp.161278.i, ubyte 3 + ret uint %tmp.5.i77.i +} + + diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll new file mode 100644 index 0000000..7bb1317 --- /dev/null +++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll @@ -0,0 +1,10 @@ +; This function should have exactly one call to fixdfdi, no more! + +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \ +; RUN: grep {bl .*fixdfdi} | wc -l | grep 1 + +double %test2(double %tmp.7705) { + %mem_tmp.2.0.in = cast double %tmp.7705 to long ; <long> [#uses=1] + %mem_tmp.2.0 = cast long %mem_tmp.2.0.in to double + ret double %mem_tmp.2.0 +} diff --git a/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll new file mode 100644 index 0000000..edbdc4a --- /dev/null +++ b/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll @@ -0,0 +1,9 @@ +; This was erroneously being turned into an rlwinm instruction. +; The sign bit does matter in this case. + +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep srawi +int %test(int %X) { + %Y = and int %X, -2 + %Z = shr int %Y, ubyte 11 + ret int %Z +} diff --git a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll new file mode 100644 index 0000000..4264e9e --- /dev/null +++ b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll @@ -0,0 +1,17 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc +target endian = big +target pointersize = 32 +target triple = "powerpc-apple-darwin8.2.0" +implementation ; Functions: + +void %bar(int %G, int %E, int %F, int %A, int %B, int %C, int %D, sbyte* %fmt, ...) { + %ap = alloca sbyte* ; <sbyte**> [#uses=2] + call void %llvm.va_start( sbyte** %ap ) + %tmp.1 = load sbyte** %ap ; <sbyte*> [#uses=1] + %tmp.0 = call double %foo( sbyte* %tmp.1 ) ; <double> [#uses=0] + ret void +} + +declare void %llvm.va_start(sbyte**) + +declare double %foo(sbyte*) diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll new file mode 100644 index 0000000..c90ef0a --- /dev/null +++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll @@ -0,0 +1,12 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc | not grep {, f1} + +target endian = big +target pointersize = 32 +target triple = "powerpc-apple-darwin8.2.0" + +; Dead argument should reserve an FP register. +double %bar(double %DEAD, double %X, double %Y) { + %tmp.2 = add double %X, %Y + ret double %tmp.2 +} + diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll new file mode 100644 index 0000000..7700459 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll @@ -0,0 +1,17 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc + +void %iterative_hash_host_wide_int() { + %zero = alloca int ; <int*> [#uses=2] + %b = alloca uint ; <uint*> [#uses=1] + store int 0, int* %zero + %tmp = load int* %zero ; <int> [#uses=1] + %tmp5 = cast int %tmp to uint ; <uint> [#uses=1] + %tmp6.u = add uint %tmp5, 32 ; <uint> [#uses=1] + %tmp6 = cast uint %tmp6.u to int ; <int> [#uses=1] + %tmp7 = load long* null ; <long> [#uses=1] + %tmp6 = cast int %tmp6 to ubyte ; <ubyte> [#uses=1] + %tmp8 = shr long %tmp7, ubyte %tmp6 ; <long> [#uses=1] + %tmp8 = cast long %tmp8 to uint ; <uint> [#uses=1] + store uint %tmp8, uint* %b + unreachable +} diff --git a/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll new file mode 100644 index 0000000..dcf599b --- /dev/null +++ b/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll @@ -0,0 +1,7 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +double %CalcSpeed(float %tmp127) { + %tmp145 = cast float %tmp127 to double ; <double> [#uses=1] + %tmp150 = call double asm "frsqrte $0,$1", "=f,f"( double %tmp145 ) ; <double> [#uses=0] + ret double %tmp150 +} diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll new file mode 100644 index 0000000..b4facea --- /dev/null +++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll @@ -0,0 +1,10 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \ +; RUN: grep {vspltish v.*, 10} + +void %test(<8 x short>* %P) { + %tmp = load <8 x short>* %P ; <<8 x short>> [#uses=1] + %tmp1 = add <8 x short> %tmp, < short 10, short 10, short 10, short 10, short 10, short 10, short 10, short 10 > ; <<8 x short>> [#uses=1] + store <8 x short> %tmp1, <8 x short>* %P + ret void +} diff --git a/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll new file mode 100644 index 0000000..59f7ed4 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll @@ -0,0 +1,72 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 + +void %test(sbyte* %stack) { +entry: + %tmp9 = seteq int 0, 0 ; <bool> [#uses=1] + %tmp30 = seteq uint 0, 0 ; <bool> [#uses=1] + br bool %tmp30, label %cond_next54, label %cond_true31 + +cond_true860: ; preds = %bb855 + %tmp879 = tail call <4 x float> %llvm.ppc.altivec.vmaddfp( <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1] + %tmp880 = cast <4 x float> %tmp879 to <4 x int> ; <<4 x int>> [#uses=2] + %tmp883 = shufflevector <4 x int> %tmp880, <4 x int> undef, <4 x uint> < uint 1, uint 1, uint 1, uint 1 > ; <<4 x int>> [#uses=1] + %tmp883 = cast <4 x int> %tmp883 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp885 = shufflevector <4 x int> %tmp880, <4 x int> undef, <4 x uint> < uint 2, uint 2, uint 2, uint 2 > ; <<4 x int>> [#uses=1] + %tmp885 = cast <4 x int> %tmp885 to <4 x float> ; <<4 x float>> [#uses=1] + br label %cond_next905 + +cond_true31: ; preds = %entry + ret void + +cond_next54: ; preds = %entry + br bool %tmp9, label %cond_false385, label %bb279 + +bb279: ; preds = %cond_next54 + ret void + +cond_false385: ; preds = %cond_next54 + %tmp388 = seteq uint 0, 0 ; <bool> [#uses=1] + br bool %tmp388, label %cond_next463, label %cond_true389 + +cond_true389: ; preds = %cond_false385 + ret void + +cond_next463: ; preds = %cond_false385 + %tmp1208107 = setgt sbyte* null, %stack ; <bool> [#uses=1] + br bool %tmp1208107, label %cond_true1209.preheader, label %bb1212 + +cond_true498: ; preds = %cond_true1209.preheader + ret void + +cond_true519: ; preds = %cond_true1209.preheader + %bothcond = or bool false, false ; <bool> [#uses=1] + br bool %bothcond, label %bb855, label %bb980 + +cond_false548: ; preds = %cond_true1209.preheader + ret void + +bb855: ; preds = %cond_true519 + %tmp859 = seteq int 0, 0 ; <bool> [#uses=1] + br bool %tmp859, label %cond_true860, label %cond_next905 + +cond_next905: ; preds = %bb855, %cond_true860 + %vfpw2.4 = phi <4 x float> [ %tmp885, %cond_true860 ], [ undef, %bb855 ] ; <<4 x float>> [#uses=0] + %vfpw1.4 = phi <4 x float> [ %tmp883, %cond_true860 ], [ undef, %bb855 ] ; <<4 x float>> [#uses=0] + %tmp930 = cast <4 x float> zeroinitializer to <4 x int> ; <<4 x int>> [#uses=0] + ret void + +bb980: ; preds = %cond_true519 + ret void + +cond_true1209.preheader: ; preds = %cond_next463 + %tmp496 = and uint 0, 12288 ; <uint> [#uses=1] + switch uint %tmp496, label %cond_false548 [ + uint 0, label %cond_true498 + uint 4096, label %cond_true519 + ] + +bb1212: ; preds = %cond_next463 + ret void +} + +declare <4 x float> %llvm.ppc.altivec.vmaddfp(<4 x float>, <4 x float>, <4 x float>) diff --git a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll new file mode 100644 index 0000000..6c34cd7 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll @@ -0,0 +1,60 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + + %struct.attr_desc = type { sbyte*, %struct.attr_desc*, %struct.attr_value*, %struct.attr_value*, uint } + %struct.attr_value = type { %struct.rtx_def*, %struct.attr_value*, %struct.insn_ent*, int, int } + %struct.insn_def = type { %struct.insn_def*, %struct.rtx_def*, int, int, int, int, int } + %struct.insn_ent = type { %struct.insn_ent*, %struct.insn_def* } + %struct.rtx_def = type { ushort, ubyte, ubyte, %struct.u } + %struct.u = type { [1 x long] } + +implementation ; Functions: + +void %find_attr() { +entry: + %tmp26 = seteq %struct.attr_desc* null, null ; <bool> [#uses=1] + br bool %tmp26, label %bb30, label %cond_true27 + +cond_true27: ; preds = %entry + ret void + +bb30: ; preds = %entry + %tmp67 = seteq %struct.attr_desc* null, null ; <bool> [#uses=1] + br bool %tmp67, label %cond_next92, label %cond_true68 + +cond_true68: ; preds = %bb30 + ret void + +cond_next92: ; preds = %bb30 + %tmp173 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2] + %tmp174 = load uint* %tmp173 ; <uint> [#uses=1] + %tmp177 = and uint %tmp174, 4294967287 ; <uint> [#uses=1] + store uint %tmp177, uint* %tmp173 + %tmp180 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=1] + %tmp181 = load uint* %tmp180 ; <uint> [#uses=1] + %tmp185 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2] + %tmp186 = load uint* %tmp185 ; <uint> [#uses=1] + %tmp183187 = shl uint %tmp181, ubyte 1 ; <uint> [#uses=1] + %tmp188 = and uint %tmp183187, 16 ; <uint> [#uses=1] + %tmp190 = and uint %tmp186, 4294967279 ; <uint> [#uses=1] + %tmp191 = or uint %tmp190, %tmp188 ; <uint> [#uses=1] + store uint %tmp191, uint* %tmp185 + %tmp193 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=1] + %tmp194 = load uint* %tmp193 ; <uint> [#uses=1] + %tmp198 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2] + %tmp199 = load uint* %tmp198 ; <uint> [#uses=1] + %tmp196200 = shl uint %tmp194, ubyte 2 ; <uint> [#uses=1] + %tmp201 = and uint %tmp196200, 64 ; <uint> [#uses=1] + %tmp203 = and uint %tmp199, 4294967231 ; <uint> [#uses=1] + %tmp204 = or uint %tmp203, %tmp201 ; <uint> [#uses=1] + store uint %tmp204, uint* %tmp198 + %tmp206 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=1] + %tmp207 = load uint* %tmp206 ; <uint> [#uses=1] + %tmp211 = getelementptr %struct.attr_desc* null, int 0, uint 4 ; <uint*> [#uses=2] + %tmp212 = load uint* %tmp211 ; <uint> [#uses=1] + %tmp209213 = shl uint %tmp207, ubyte 1 ; <uint> [#uses=1] + %tmp214 = and uint %tmp209213, 128 ; <uint> [#uses=1] + %tmp216 = and uint %tmp212, 4294967167 ; <uint> [#uses=1] + %tmp217 = or uint %tmp216, %tmp214 ; <uint> [#uses=1] + store uint %tmp217, uint* %tmp211 + ret void +} diff --git a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll new file mode 100644 index 0000000..1026072 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll @@ -0,0 +1,16 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc64-apple-darwin | grep extsw | wc -l | grep 2 + +%lens = external global ubyte* +%vals = external global int* + +int %test(int %i) { + %tmp = load ubyte** %lens + %tmp1 = getelementptr ubyte* %tmp, int %i + %tmp = load ubyte* %tmp1 + %tmp2 = cast ubyte %tmp to int + %tmp3 = load int** %vals + %tmp5 = sub int 1, %tmp2 + %tmp6 = getelementptr int* %tmp3, int %tmp5 + %tmp7 = load int* %tmp6 + ret int %tmp7 +} diff --git a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll new file mode 100644 index 0000000..d71ba5a --- /dev/null +++ b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll @@ -0,0 +1,10 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 + +void %img2buf(int %symbol_size_in_bytes, ushort* %ui16) { + %tmp93 = load ushort* null ; <ushort> [#uses=1] + %tmp99 = call ushort %llvm.bswap.i16( ushort %tmp93 ) ; <ushort> [#uses=1] + store ushort %tmp99, ushort* %ui16 + ret void +} + +declare ushort %llvm.bswap.i16(ushort) diff --git a/test/CodeGen/PowerPC/2006-08-11-RetVector.ll b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll new file mode 100644 index 0000000..cf0cd2c --- /dev/null +++ b/test/CodeGen/PowerPC/2006-08-11-RetVector.ll @@ -0,0 +1,8 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vsldoi +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vor + +<4 x float> %func(<4 x float> %fp0, <4 x float> %fp1) { + %tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x uint> < uint 0, uint 1, uint 2, uint 7 > ; <<4 x float>> [#uses=1] + ret <4 x float> %tmp76 +} + diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll new file mode 100644 index 0000000..287a79d --- /dev/null +++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll @@ -0,0 +1,38 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc + + %struct..0anon = type { int } + %struct.rtx_def = type { ushort, ubyte, ubyte, [1 x %struct..0anon] } + +implementation ; Functions: + +fastcc void %immed_double_const(int %i0, int %i1) { +entry: + %tmp1 = load uint* null ; <uint> [#uses=1] + switch uint %tmp1, label %bb103 [ + uint 1, label %bb + uint 3, label %bb + ] + +bb: ; preds = %entry, %entry + %tmp14 = setgt int 0, 31 ; <bool> [#uses=1] + br bool %tmp14, label %cond_next77, label %cond_next17 + +cond_next17: ; preds = %bb + ret void + +cond_next77: ; preds = %bb + %tmp79.not = setne int %i1, 0 ; <bool> [#uses=1] + %tmp84 = setlt int %i0, 0 ; <bool> [#uses=2] + %bothcond1 = or bool %tmp79.not, %tmp84 ; <bool> [#uses=1] + br bool %bothcond1, label %bb88, label %bb99 + +bb88: ; preds = %cond_next77 + %bothcond2 = and bool false, %tmp84 ; <bool> [#uses=0] + ret void + +bb99: ; preds = %cond_next77 + ret void + +bb103: ; preds = %entry + ret void +} diff --git a/test/CodeGen/PowerPC/2006-09-28-shift_64.ll b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll new file mode 100644 index 0000000..58d1f26 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-09-28-shift_64.ll @@ -0,0 +1,27 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 +target endian = big +target pointersize = 64 +target triple = "powerpc64-apple-darwin8" + +implementation ; Functions: + +void %glArrayElement_CompExec() { +entry: + %tmp3 = and ulong 0, 18446744073701163007 ; <ulong> [#uses=1] + br label %cond_true24 + +cond_false: ; preds = %cond_true24 + ret void + +cond_true24: ; preds = %cond_true24, %entry + %indvar.ph = phi uint [ 0, %entry ], [ %indvar.next, %cond_true24 ] ; <uint> [#uses=1] + %indvar = add uint 0, %indvar.ph ; <uint> [#uses=2] + %code.0 = cast uint %indvar to ubyte ; <ubyte> [#uses=1] + %tmp5 = add ubyte %code.0, 16 ; <ubyte> [#uses=1] + %tmp7 = shr ulong %tmp3, ubyte %tmp5 ; <ulong> [#uses=1] + %tmp7 = cast ulong %tmp7 to int ; <int> [#uses=1] + %tmp8 = and int %tmp7, 1 ; <int> [#uses=1] + %tmp8 = seteq int %tmp8, 0 ; <bool> [#uses=1] + %indvar.next = add uint %indvar, 1 ; <uint> [#uses=1] + br bool %tmp8, label %cond_false, label %cond_true24 +} diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll new file mode 100644 index 0000000..992e52a --- /dev/null +++ b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll @@ -0,0 +1,26 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -combiner-alias-analysis | grep f5 + +target endian = big +target pointersize = 32 +target triple = "powerpc-apple-darwin8.2.0" + %struct.Point = type { double, double, double } + +implementation ; Functions: + +void %offset(%struct.Point* %pt, double %x, double %y, double %z) { +entry: + %tmp = getelementptr %struct.Point* %pt, int 0, uint 0 ; <double*> [#uses=2] + %tmp = load double* %tmp ; <double> [#uses=1] + %tmp2 = add double %tmp, %x ; <double> [#uses=1] + store double %tmp2, double* %tmp + %tmp6 = getelementptr %struct.Point* %pt, int 0, uint 1 ; <double*> [#uses=2] + %tmp7 = load double* %tmp6 ; <double> [#uses=1] + %tmp9 = add double %tmp7, %y ; <double> [#uses=1] + store double %tmp9, double* %tmp6 + %tmp13 = getelementptr %struct.Point* %pt, int 0, uint 2 ; <double*> [#uses=2] + %tmp14 = load double* %tmp13 ; <double> [#uses=1] + %tmp16 = add double %tmp14, %z ; <double> [#uses=1] + store double %tmp16, double* %tmp13 + ret void +} diff --git a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll new file mode 100644 index 0000000..95b5312 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll @@ -0,0 +1,18 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep IMPLICIT_DEF + +void %foo(long %X) { +entry: + %tmp1 = and long %X, 3 ; <long> [#uses=1] + %tmp = setgt long %tmp1, 2 ; <bool> [#uses=1] + br bool %tmp, label %UnifiedReturnBlock, label %cond_true + +cond_true: ; preds = %entry + %tmp = tail call int (...)* %bar( ) ; <int> [#uses=0] + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +declare int %bar(...) + diff --git a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll new file mode 100644 index 0000000..397ada7 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll @@ -0,0 +1,24 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep xor + +target endian = big +target pointersize = 32 +target triple = "powerpc-apple-darwin8.7.0" + +implementation ; Functions: + +void %foo(int %X) { +entry: + %tmp1 = and int %X, 3 ; <int> [#uses=1] + %tmp2 = xor int %tmp1, 1 + %tmp = seteq int %tmp2, 0 ; <bool> [#uses=1] + br bool %tmp, label %UnifiedReturnBlock, label %cond_true + +cond_true: ; preds = %entry + tail call int (...)* %bar( ) ; <int> [#uses=0] + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +declare int %bar(...) diff --git a/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll new file mode 100644 index 0000000..c981c26 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll @@ -0,0 +1,6 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 + +int * %foo(uint %n) { + %A = alloca int, uint %n + ret int* %A +} diff --git a/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll new file mode 100644 index 0000000..a5476eb --- /dev/null +++ b/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll @@ -0,0 +1,14 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi + +void %test(short %div.0.i.i.i.i, int %L_num.0.i.i.i.i, int %tmp1.i.i206.i.i, short* %P) { + %X = shl short %div.0.i.i.i.i, ubyte 1 ; <short> [#uses=1] + %tmp28.i.i.i.i = shl int %L_num.0.i.i.i.i, ubyte 1 ; <int> [#uses=2] + %tmp31.i.i.i.i = setlt int %tmp28.i.i.i.i, %tmp1.i.i206.i.i ; <bool> [#uses=2] + + %tmp31.i.i.i.i = cast bool %tmp31.i.i.i.i to short ; <short> [#uses=1] + %tmp371.i.i.i.i1 = or short %tmp31.i.i.i.i, %X ; <short> [#uses=1] + %div.0.be.i.i.i.i = xor short %tmp371.i.i.i.i1, 1 ; <short> [#uses=1] + store short %div.0.be.i.i.i.i, short* %P + ret void +} + diff --git a/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll new file mode 100644 index 0000000..0411eb5 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll @@ -0,0 +1,10 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 + +void %glgRunProcessor15() { + %tmp26355.i = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000 >, <4 x uint> < uint 0, uint 1, uint 2, uint 7 > ; <<4 x float>> [#uses=1] + %tmp3030030304.i = cast <4 x float> %tmp26355.i to <8 x short> ; <<8 x short>> [#uses=1] + %tmp30305.i = shufflevector <8 x short> zeroinitializer, <8 x short> %tmp3030030304.i, <8 x uint> < uint 1, uint 3, uint 5, uint 7, uint 9, uint 11, uint 13, uint 15 > ; <<8 x short>> [#uses=1] + %tmp30305.i = cast <8 x short> %tmp30305.i to <4 x int> ; <<4 x int>> [#uses=1] + store <4 x int> %tmp30305.i, <4 x int>* null + ret void +} diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll new file mode 100644 index 0000000..f6103e5 --- /dev/null +++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll @@ -0,0 +1,26 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 +; RUN: llvm-upgrade < %s | llvm-as | llc + +void %bitap() { +entry: + %RMask.i = alloca [256 x uint], align 16 ; <[256 x uint]*> [#uses=1] + %buffer = alloca [147456 x sbyte], align 16 ; <[147456 x sbyte]*> [#uses=0] + br bool false, label %bb19, label %bb.preheader + +bb.preheader: ; preds = %entry + ret void + +bb19: ; preds = %entry + br bool false, label %bb12.i, label %cond_next39 + +bb12.i: ; preds = %bb12.i, %bb19 + %i.0.i = phi uint [ %tmp11.i, %bb12.i ], [ 0, %bb19 ] ; <uint> [#uses=2] + %tmp9.i = getelementptr [256 x uint]* %RMask.i, int 0, uint %i.0.i ; <uint*> [#uses=1] + store uint 0, uint* %tmp9.i + %tmp11.i = add uint %i.0.i, 1 ; <uint> [#uses=1] + br label %bb12.i + +cond_next39: ; preds = %bb19 + ret void +} diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll new file mode 100644 index 0000000..6fa410e --- /dev/null +++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll @@ -0,0 +1,27 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 +; RUN: llvm-upgrade < %s | llvm-as | llc + +%qsz.b = external global bool ; <bool*> [#uses=1] + +implementation ; Functions: + +fastcc void %qst() { +entry: + br bool true, label %cond_next71, label %cond_true + +cond_true: ; preds = %entry + ret void + +cond_next71: ; preds = %entry + %tmp73.b = load bool* %qsz.b ; <bool> [#uses=1] + %ii.4.ph = select bool %tmp73.b, ulong 4, ulong 0 ; <ulong> [#uses=1] + br label %bb139 + +bb82: ; preds = %bb139 + ret void + +bb139: ; preds = %bb139, %cond_next71 + %exitcond89 = seteq ulong 0, %ii.4.ph ; <bool> [#uses=1] + br bool %exitcond89, label %bb82, label %bb139 +} diff --git a/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll new file mode 100644 index 0000000..19fedf9 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | grep extsb +; RUN: llvm-as < %s | llc -march=ppc32 | grep extsh + +define i32 @p1(i8 %c, i16 %s) { +entry: + %tmp = sext i8 %c to i32 ; <i32> [#uses=1] + %tmp1 = sext i16 %s to i32 ; <i32> [#uses=1] + %tmp2 = add i32 %tmp1, %tmp ; <i32> [#uses=1] + ret i32 %tmp2 +} diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll new file mode 100644 index 0000000..d9374ed --- /dev/null +++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll @@ -0,0 +1,27 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep cntlzw + +define i32 @foo() { +entry: + %retval = alloca i32, align 4 ; <i32*> [#uses=2] + %temp = alloca i32, align 4 ; <i32*> [#uses=2] + %ctz_x = alloca i32, align 4 ; <i32*> [#uses=3] + %ctz_c = alloca i32, align 4 ; <i32*> [#uses=2] + "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 61440, i32* %ctz_x + %tmp = load i32* %ctz_x ; <i32> [#uses=1] + %tmp1 = sub i32 0, %tmp ; <i32> [#uses=1] + %tmp2 = load i32* %ctz_x ; <i32> [#uses=1] + %tmp3 = and i32 %tmp1, %tmp2 ; <i32> [#uses=1] + %tmp4 = call i32 asm "$(cntlz$|cntlzw$) $0,$1", "=r,r,~{dirflag},~{fpsr},~{flags}"( i32 %tmp3 ) ; <i32> [#uses=1] + store i32 %tmp4, i32* %ctz_c + %tmp5 = load i32* %ctz_c ; <i32> [#uses=1] + store i32 %tmp5, i32* %temp + %tmp6 = load i32* %temp ; <i32> [#uses=1] + store i32 %tmp6, i32* %retval + br label %return + +return: ; preds = %entry + %retval2 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval2 +} diff --git a/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll new file mode 100644 index 0000000..f2c951e --- /dev/null +++ b/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll @@ -0,0 +1,7 @@ +; RUN: llvm-as < %s | llc -march=ppc32 +; RUN: llvm-as < %s | llc -march=ppc64 + +define i16 @test(i8* %d1, i16* %d2) { + %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 ) ; <i16> [#uses=1] + ret i16 %tmp237 +} diff --git a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll new file mode 100644 index 0000000..d476462 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -march=ppc32 +; RUN: llvm-as < %s | llc -march=ppc64 + +; Test two things: 1) that a frameidx can be rewritten in an inline asm +; 2) that inline asms can handle reg+imm addr modes. + + %struct.A = type { i32, i32 } + + +define void @test1() { +entry: + %Out = alloca %struct.A, align 4 ; <%struct.A*> [#uses=1] + %tmp2 = getelementptr %struct.A* %Out, i32 0, i32 1 + %tmp5 = call i32 asm "lwbrx $0, $1", "=r,m"(i32* %tmp2 ) + ret void +} + +define void @test2() { +entry: + %Out = alloca %struct.A, align 4 ; <%struct.A*> [#uses=1] + %tmp2 = getelementptr %struct.A* %Out, i32 0, i32 0 ; <i32*> [#uses=1] + %tmp5 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,m"( i8* null, i32 0, i32* %tmp2 ) ; <i32> [#uses=0] + ret void +} diff --git a/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll new file mode 100644 index 0000000..97f6a01 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll @@ -0,0 +1,4 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \ +; RUN: grep align.*3 + +@X = global <{i32, i32}> <{ i32 1, i32 123 }> diff --git a/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll new file mode 100644 index 0000000..5a3d3b5 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc + +target datalayout = "E-p:32:32" +target triple = "powerpc-apple-darwin8.8.0" + + +define void @blargh() { +entry: + %tmp4 = call i32 asm "rlwimi $0,$2,$3,$4,$5", "=r,0,r,n,n,n"( i32 0, i32 0, i32 0, i32 24, i32 31 ) ; <i32> [#uses=0] + unreachable +} diff --git a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll new file mode 100644 index 0000000..3a7d393 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llc | grep mflr | wc -l | grep 1 + +target datalayout = "e-p:32:32" +target triple = "powerpc-apple-darwin8" +@str = internal constant [18 x i8] c"hello world!, %d\0A\00" ; <[18 x i8]*> [#uses=1] + + +define i32 @main() { +entry: + %tmp = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @str, i32 0, i32 0) ) ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @printf(i8*, ...) diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll new file mode 100644 index 0000000..1ea6174 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -march=ppc64 -mcpu=g5 | grep cntlzd + +define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) { + %tmp19 = load i64* %t + %tmp23 = tail call i32 @llvm.ctlz.i64( i64 %tmp19 ) ; <i64> [#uses=1] + %tmp89 = add i32 %tmp23, -64 ; <i32> [#uses=1] + %tmp90 = add i32 %tmp89, 0 ; <i32> [#uses=1] + ret i32 %tmp90 +} + +declare i32 @llvm.ctlz.i64(i64) diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll new file mode 100644 index 0000000..04ca3bb --- /dev/null +++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll @@ -0,0 +1,1801 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mcpu=g5 + +define void @test(<4 x float>*, { { i16, i16, i32 } }*) { +xOperationInitMasks.exit: + %.sub7896 = getelementptr [4 x <4 x i32>]* null, i32 0, i32 0 ; <<4 x i32>*> [#uses=24] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 175, i32 3 ; <<4 x float>*>:2 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 2 ; <<4 x float>*>:3 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 3 ; <<4 x float>*>:4 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 1 ; <<4 x float>*>:5 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 2 ; <<4 x float>*>:6 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 3 ; <<4 x float>*>:7 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 1 ; <<4 x float>*>:8 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 2 ; <<4 x float>*>:9 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 3 ; <<4 x float>*>:10 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 1 ; <<4 x float>*>:11 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 2 ; <<4 x float>*>:12 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 3 ; <<4 x float>*>:13 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 1 ; <<4 x float>*>:14 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 2 ; <<4 x float>*>:15 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 3 ; <<4 x float>*>:16 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 1 ; <<4 x float>*>:17 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 2 ; <<4 x float>*>:18 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 3 ; <<4 x float>*>:19 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 1 ; <<4 x float>*>:20 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 2 ; <<4 x float>*>:21 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 3 ; <<4 x float>*>:22 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 1 ; <<4 x float>*>:23 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 2 ; <<4 x float>*>:24 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 3 ; <<4 x float>*>:25 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 1 ; <<4 x float>*>:26 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 2 ; <<4 x float>*>:27 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 3 ; <<4 x float>*>:28 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 1 ; <<4 x float>*>:29 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 2 ; <<4 x float>*>:30 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 3 ; <<4 x float>*>:31 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 1 ; <<4 x float>*>:32 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 2 ; <<4 x float>*>:33 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 3 ; <<4 x float>*>:34 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 1 ; <<4 x float>*>:35 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 2 ; <<4 x float>*>:36 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 3 ; <<4 x float>*>:37 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 1 ; <<4 x float>*>:38 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 2 ; <<4 x float>*>:39 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 3 ; <<4 x float>*>:40 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 1 ; <<4 x float>*>:41 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 2 ; <<4 x float>*>:42 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 3 ; <<4 x float>*>:43 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 1 ; <<4 x float>*>:44 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 2 ; <<4 x float>*>:45 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 3 ; <<4 x float>*>:46 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 1 ; <<4 x float>*>:47 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 2 ; <<4 x float>*>:48 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 3 ; <<4 x float>*>:49 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 1 ; <<4 x float>*>:50 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 2 ; <<4 x float>*>:51 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 3 ; <<4 x float>*>:52 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 1 ; <<4 x float>*>:53 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 2 ; <<4 x float>*>:54 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 3 ; <<4 x float>*>:55 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 1 ; <<4 x float>*>:56 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 2 ; <<4 x float>*>:57 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 3 ; <<4 x float>*>:58 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 1 ; <<4 x float>*>:59 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 2 ; <<4 x float>*>:60 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 3 ; <<4 x float>*>:61 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 1 ; <<4 x float>*>:62 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 2 ; <<4 x float>*>:63 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 3 ; <<4 x float>*>:64 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 1 ; <<4 x float>*>:65 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 2 ; <<4 x float>*>:66 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 3 ; <<4 x float>*>:67 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 1 ; <<4 x float>*>:68 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 2 ; <<4 x float>*>:69 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 3 ; <<4 x float>*>:70 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 1 ; <<4 x float>*>:71 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 2 ; <<4 x float>*>:72 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 3 ; <<4 x float>*>:73 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 1 ; <<4 x float>*>:74 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 2 ; <<4 x float>*>:75 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 3 ; <<4 x float>*>:76 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 1 ; <<4 x float>*>:77 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 2 ; <<4 x float>*>:78 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 3 ; <<4 x float>*>:79 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 1 ; <<4 x float>*>:80 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 2 ; <<4 x float>*>:81 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 3 ; <<4 x float>*>:82 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 1 ; <<4 x float>*>:83 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 2 ; <<4 x float>*>:84 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 3 ; <<4 x float>*>:85 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 1 ; <<4 x float>*>:86 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 2 ; <<4 x float>*>:87 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 3 ; <<4 x float>*>:88 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 1 ; <<4 x float>*>:89 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 2 ; <<4 x float>*>:90 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 3 ; <<4 x float>*>:91 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 1 ; <<4 x float>*>:92 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 2 ; <<4 x float>*>:93 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 3 ; <<4 x float>*>:94 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 1 ; <<4 x float>*>:95 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 2 ; <<4 x float>*>:96 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 3 ; <<4 x float>*>:97 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 1 ; <<4 x float>*>:98 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 2 ; <<4 x float>*>:99 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 3 ; <<4 x float>*>:100 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 1 ; <<4 x float>*>:101 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 2 ; <<4 x float>*>:102 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 3 ; <<4 x float>*>:103 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 1 ; <<4 x float>*>:104 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 2 ; <<4 x float>*>:105 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 3 ; <<4 x float>*>:106 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 1 ; <<4 x float>*>:107 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 2 ; <<4 x float>*>:108 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 3 ; <<4 x float>*>:109 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 1 ; <<4 x float>*>:110 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 2 ; <<4 x float>*>:111 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 3 ; <<4 x float>*>:112 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 1 ; <<4 x float>*>:113 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 2 ; <<4 x float>*>:114 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 3 ; <<4 x float>*>:115 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 1 ; <<4 x float>*>:116 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 2 ; <<4 x float>*>:117 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 3 ; <<4 x float>*>:118 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 1 ; <<4 x float>*>:119 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 2 ; <<4 x float>*>:120 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 3 ; <<4 x float>*>:121 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 1 ; <<4 x float>*>:122 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 2 ; <<4 x float>*>:123 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 3 ; <<4 x float>*>:124 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 1 ; <<4 x float>*>:125 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 2 ; <<4 x float>*>:126 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 3 ; <<4 x float>*>:127 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 1 ; <<4 x float>*>:128 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 2 ; <<4 x float>*>:129 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 3 ; <<4 x float>*>:130 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 1 ; <<4 x float>*>:131 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 2 ; <<4 x float>*>:132 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 3 ; <<4 x float>*>:133 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 1 ; <<4 x float>*>:134 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 2 ; <<4 x float>*>:135 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 3 ; <<4 x float>*>:136 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 1 ; <<4 x float>*>:137 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 2 ; <<4 x float>*>:138 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 3 ; <<4 x float>*>:139 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 1 ; <<4 x float>*>:140 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 2 ; <<4 x float>*>:141 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 3 ; <<4 x float>*>:142 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 1 ; <<4 x float>*>:143 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 2 ; <<4 x float>*>:144 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 3 ; <<4 x float>*>:145 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 1 ; <<4 x float>*>:146 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 2 ; <<4 x float>*>:147 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 3 ; <<4 x float>*>:148 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 1 ; <<4 x float>*>:149 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 2 ; <<4 x float>*>:150 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 3 ; <<4 x float>*>:151 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 1 ; <<4 x float>*>:152 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 2 ; <<4 x float>*>:153 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 3 ; <<4 x float>*>:154 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 1 ; <<4 x float>*>:155 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 2 ; <<4 x float>*>:156 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 3 ; <<4 x float>*>:157 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 1 ; <<4 x float>*>:158 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 2 ; <<4 x float>*>:159 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 3 ; <<4 x float>*>:160 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 1 ; <<4 x float>*>:161 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 2 ; <<4 x float>*>:162 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 3 ; <<4 x float>*>:163 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 1 ; <<4 x float>*>:164 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 2 ; <<4 x float>*>:165 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 3 ; <<4 x float>*>:166 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 1 ; <<4 x float>*>:167 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 2 ; <<4 x float>*>:168 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 3 ; <<4 x float>*>:169 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 1 ; <<4 x float>*>:170 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 2 ; <<4 x float>*>:171 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 3 ; <<4 x float>*>:172 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 1 ; <<4 x float>*>:173 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 2 ; <<4 x float>*>:174 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 3 ; <<4 x float>*>:175 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 1 ; <<4 x float>*>:176 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 2 ; <<4 x float>*>:177 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 3 ; <<4 x float>*>:178 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 1 ; <<4 x float>*>:179 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 2 ; <<4 x float>*>:180 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 3 ; <<4 x float>*>:181 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 1 ; <<4 x float>*>:182 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 2 ; <<4 x float>*>:183 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 3 ; <<4 x float>*>:184 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 1 ; <<4 x float>*>:185 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 2 ; <<4 x float>*>:186 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 3 ; <<4 x float>*>:187 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 1 ; <<4 x float>*>:188 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 2 ; <<4 x float>*>:189 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 3 ; <<4 x float>*>:190 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 1 ; <<4 x float>*>:191 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 2 ; <<4 x float>*>:192 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 3 ; <<4 x float>*>:193 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 1 ; <<4 x float>*>:194 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 2 ; <<4 x float>*>:195 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 3 ; <<4 x float>*>:196 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 1 ; <<4 x float>*>:197 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 2 ; <<4 x float>*>:198 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 3 ; <<4 x float>*>:199 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 1 ; <<4 x float>*>:200 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 2 ; <<4 x float>*>:201 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 3 ; <<4 x float>*>:202 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 1 ; <<4 x float>*>:203 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 2 ; <<4 x float>*>:204 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 3 ; <<4 x float>*>:205 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 1 ; <<4 x float>*>:206 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 2 ; <<4 x float>*>:207 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 3 ; <<4 x float>*>:208 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 1 ; <<4 x float>*>:209 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 2 ; <<4 x float>*>:210 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 3 ; <<4 x float>*>:211 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 1 ; <<4 x float>*>:212 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 2 ; <<4 x float>*>:213 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 3 ; <<4 x float>*>:214 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 1 ; <<4 x float>*>:215 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 2 ; <<4 x float>*>:216 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 3 ; <<4 x float>*>:217 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 1 ; <<4 x float>*>:218 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 2 ; <<4 x float>*>:219 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 3 ; <<4 x float>*>:220 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 1 ; <<4 x float>*>:221 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 2 ; <<4 x float>*>:222 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 3 ; <<4 x float>*>:223 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 1 ; <<4 x float>*>:224 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 2 ; <<4 x float>*>:225 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 3 ; <<4 x float>*>:226 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 1 ; <<4 x float>*>:227 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 2 ; <<4 x float>*>:228 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 3 ; <<4 x float>*>:229 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 1 ; <<4 x float>*>:230 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 2 ; <<4 x float>*>:231 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 3 ; <<4 x float>*>:232 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 1 ; <<4 x float>*>:233 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 2 ; <<4 x float>*>:234 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 3 ; <<4 x float>*>:235 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 1 ; <<4 x float>*>:236 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 2 ; <<4 x float>*>:237 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 3 ; <<4 x float>*>:238 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 1 ; <<4 x float>*>:239 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 2 ; <<4 x float>*>:240 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 3 ; <<4 x float>*>:241 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 1 ; <<4 x float>*>:242 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 2 ; <<4 x float>*>:243 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 3 ; <<4 x float>*>:244 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 1 ; <<4 x float>*>:245 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 2 ; <<4 x float>*>:246 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 3 ; <<4 x float>*>:247 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 1 ; <<4 x float>*>:248 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 2 ; <<4 x float>*>:249 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 3 ; <<4 x float>*>:250 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 1 ; <<4 x float>*>:251 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 2 ; <<4 x float>*>:252 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 3 ; <<4 x float>*>:253 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 1 ; <<4 x float>*>:254 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 2 ; <<4 x float>*>:255 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 3 ; <<4 x float>*>:256 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 1 ; <<4 x float>*>:257 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 2 ; <<4 x float>*>:258 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 3 ; <<4 x float>*>:259 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 1 ; <<4 x float>*>:260 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 2 ; <<4 x float>*>:261 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 3 ; <<4 x float>*>:262 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 1 ; <<4 x float>*>:263 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 2 ; <<4 x float>*>:264 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 3 ; <<4 x float>*>:265 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 1 ; <<4 x float>*>:266 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 2 ; <<4 x float>*>:267 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 3 ; <<4 x float>*>:268 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 1 ; <<4 x float>*>:269 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 2 ; <<4 x float>*>:270 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 3 ; <<4 x float>*>:271 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 1 ; <<4 x float>*>:272 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 2 ; <<4 x float>*>:273 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 3 ; <<4 x float>*>:274 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 1 ; <<4 x float>*>:275 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 2 ; <<4 x float>*>:276 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 3 ; <<4 x float>*>:277 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 1 ; <<4 x float>*>:278 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 2 ; <<4 x float>*>:279 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 3 ; <<4 x float>*>:280 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 1 ; <<4 x float>*>:281 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 2 ; <<4 x float>*>:282 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 3 ; <<4 x float>*>:283 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 1 ; <<4 x float>*>:284 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 2 ; <<4 x float>*>:285 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 3 ; <<4 x float>*>:286 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 1 ; <<4 x float>*>:287 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 2 ; <<4 x float>*>:288 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 3 ; <<4 x float>*>:289 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 1 ; <<4 x float>*>:290 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 2 ; <<4 x float>*>:291 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 3 ; <<4 x float>*>:292 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 1 ; <<4 x float>*>:293 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 2 ; <<4 x float>*>:294 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 3 ; <<4 x float>*>:295 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 1 ; <<4 x float>*>:296 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 2 ; <<4 x float>*>:297 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 3 ; <<4 x float>*>:298 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 1 ; <<4 x float>*>:299 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 2 ; <<4 x float>*>:300 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 3 ; <<4 x float>*>:301 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 1 ; <<4 x float>*>:302 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 2 ; <<4 x float>*>:303 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 3 ; <<4 x float>*>:304 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 1 ; <<4 x float>*>:305 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 2 ; <<4 x float>*>:306 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 3 ; <<4 x float>*>:307 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 1 ; <<4 x float>*>:308 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 2 ; <<4 x float>*>:309 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 3 ; <<4 x float>*>:310 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 1 ; <<4 x float>*>:311 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 2 ; <<4 x float>*>:312 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 3 ; <<4 x float>*>:313 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 1 ; <<4 x float>*>:314 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 2 ; <<4 x float>*>:315 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 3 ; <<4 x float>*>:316 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 1 ; <<4 x float>*>:317 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 2 ; <<4 x float>*>:318 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 3 ; <<4 x float>*>:319 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 1 ; <<4 x float>*>:320 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 2 ; <<4 x float>*>:321 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 3 ; <<4 x float>*>:322 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 1 ; <<4 x float>*>:323 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 2 ; <<4 x float>*>:324 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 3 ; <<4 x float>*>:325 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 1 ; <<4 x float>*>:326 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 2 ; <<4 x float>*>:327 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 3 ; <<4 x float>*>:328 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 1 ; <<4 x float>*>:329 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 2 ; <<4 x float>*>:330 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 3 ; <<4 x float>*>:331 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 1 ; <<4 x float>*>:332 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 2 ; <<4 x float>*>:333 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 3 ; <<4 x float>*>:334 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 1 ; <<4 x float>*>:335 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 2 ; <<4 x float>*>:336 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 3 ; <<4 x float>*>:337 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 1 ; <<4 x float>*>:338 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 2 ; <<4 x float>*>:339 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 3 ; <<4 x float>*>:340 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 1 ; <<4 x float>*>:341 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 2 ; <<4 x float>*>:342 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 3 ; <<4 x float>*>:343 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 1 ; <<4 x float>*>:344 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 2 ; <<4 x float>*>:345 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 3 ; <<4 x float>*>:346 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 1 ; <<4 x float>*>:347 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 2 ; <<4 x float>*>:348 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 3 ; <<4 x float>*>:349 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 1 ; <<4 x float>*>:350 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 2 ; <<4 x float>*>:351 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 3 ; <<4 x float>*>:352 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 1 ; <<4 x float>*>:353 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 2 ; <<4 x float>*>:354 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 3 ; <<4 x float>*>:355 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 1 ; <<4 x float>*>:356 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 2 ; <<4 x float>*>:357 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 3 ; <<4 x float>*>:358 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 1 ; <<4 x float>*>:359 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 2 ; <<4 x float>*>:360 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 3 ; <<4 x float>*>:361 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 1 ; <<4 x float>*>:362 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 2 ; <<4 x float>*>:363 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 3 ; <<4 x float>*>:364 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 1 ; <<4 x float>*>:365 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 2 ; <<4 x float>*>:366 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 3 ; <<4 x float>*>:367 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 1 ; <<4 x float>*>:368 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 2 ; <<4 x float>*>:369 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 3 ; <<4 x float>*>:370 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 1 ; <<4 x float>*>:371 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 2 ; <<4 x float>*>:372 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 3 ; <<4 x float>*>:373 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 1 ; <<4 x float>*>:374 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 2 ; <<4 x float>*>:375 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 3 ; <<4 x float>*>:376 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 1 ; <<4 x float>*>:377 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 2 ; <<4 x float>*>:378 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 3 ; <<4 x float>*>:379 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 1 ; <<4 x float>*>:380 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 2 ; <<4 x float>*>:381 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 3 ; <<4 x float>*>:382 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 1 ; <<4 x float>*>:383 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 2 ; <<4 x float>*>:384 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 3 ; <<4 x float>*>:385 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 1 ; <<4 x float>*>:386 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 2 ; <<4 x float>*>:387 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 3 ; <<4 x float>*>:388 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 1 ; <<4 x float>*>:389 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 2 ; <<4 x float>*>:390 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 3 ; <<4 x float>*>:391 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 1 ; <<4 x float>*>:392 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 2 ; <<4 x float>*>:393 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 3 ; <<4 x float>*>:394 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 1 ; <<4 x float>*>:395 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 2 ; <<4 x float>*>:396 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 3 ; <<4 x float>*>:397 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 1 ; <<4 x float>*>:398 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 2 ; <<4 x float>*>:399 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 3 ; <<4 x float>*>:400 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 1 ; <<4 x float>*>:401 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 2 ; <<4 x float>*>:402 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 3 ; <<4 x float>*>:403 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 1 ; <<4 x float>*>:404 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 2 ; <<4 x float>*>:405 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 3 ; <<4 x float>*>:406 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 1 ; <<4 x float>*>:407 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 2 ; <<4 x float>*>:408 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 3 ; <<4 x float>*>:409 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 1 ; <<4 x float>*>:410 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 2 ; <<4 x float>*>:411 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 3 ; <<4 x float>*>:412 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 1 ; <<4 x float>*>:413 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 2 ; <<4 x float>*>:414 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 3 ; <<4 x float>*>:415 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 1 ; <<4 x float>*>:416 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 2 ; <<4 x float>*>:417 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 3 ; <<4 x float>*>:418 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 1 ; <<4 x float>*>:419 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 2 ; <<4 x float>*>:420 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 3 ; <<4 x float>*>:421 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 1 ; <<4 x float>*>:422 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 2 ; <<4 x float>*>:423 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 3 ; <<4 x float>*>:424 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 1 ; <<4 x float>*>:425 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 2 ; <<4 x float>*>:426 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 3 ; <<4 x float>*>:427 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 1 ; <<4 x float>*>:428 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 2 ; <<4 x float>*>:429 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 3 ; <<4 x float>*>:430 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 1 ; <<4 x float>*>:431 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 2 ; <<4 x float>*>:432 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 3 ; <<4 x float>*>:433 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 1 ; <<4 x float>*>:434 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 2 ; <<4 x float>*>:435 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 3 ; <<4 x float>*>:436 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 1 ; <<4 x float>*>:437 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 2 ; <<4 x float>*>:438 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 3 ; <<4 x float>*>:439 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 1 ; <<4 x float>*>:440 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 2 ; <<4 x float>*>:441 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 3 ; <<4 x float>*>:442 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 1 ; <<4 x float>*>:443 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 2 ; <<4 x float>*>:444 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 3 ; <<4 x float>*>:445 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 1 ; <<4 x float>*>:446 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 2 ; <<4 x float>*>:447 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 3 ; <<4 x float>*>:448 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 1 ; <<4 x float>*>:449 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 2 ; <<4 x float>*>:450 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 3 ; <<4 x float>*>:451 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 1 ; <<4 x float>*>:452 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 2 ; <<4 x float>*>:453 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 3 ; <<4 x float>*>:454 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 1 ; <<4 x float>*>:455 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 2 ; <<4 x float>*>:456 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 3 ; <<4 x float>*>:457 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 1 ; <<4 x float>*>:458 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 2 ; <<4 x float>*>:459 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 3 ; <<4 x float>*>:460 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 1 ; <<4 x float>*>:461 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 2 ; <<4 x float>*>:462 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 3 ; <<4 x float>*>:463 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 1 ; <<4 x float>*>:464 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 2 ; <<4 x float>*>:465 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 3 ; <<4 x float>*>:466 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 1 ; <<4 x float>*>:467 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 2 ; <<4 x float>*>:468 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 3 ; <<4 x float>*>:469 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 1 ; <<4 x float>*>:470 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 2 ; <<4 x float>*>:471 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 3 ; <<4 x float>*>:472 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 1 ; <<4 x float>*>:473 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 2 ; <<4 x float>*>:474 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 3 ; <<4 x float>*>:475 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 1 ; <<4 x float>*>:476 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 2 ; <<4 x float>*>:477 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 3 ; <<4 x float>*>:478 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 1 ; <<4 x float>*>:479 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 2 ; <<4 x float>*>:480 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 3 ; <<4 x float>*>:481 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 1 ; <<4 x float>*>:482 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 2 ; <<4 x float>*>:483 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 3 ; <<4 x float>*>:484 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:485 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:486 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:487 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:488 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:489 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:490 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 1 ; <<4 x float>*>:491 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 2 ; <<4 x float>*>:492 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 3 ; <<4 x float>*>:493 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 1 ; <<4 x float>*>:494 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 2 ; <<4 x float>*>:495 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 3 ; <<4 x float>*>:496 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 1 ; <<4 x float>*>:497 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 2 ; <<4 x float>*>:498 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 3 ; <<4 x float>*>:499 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 1 ; <<4 x float>*>:500 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 2 ; <<4 x float>*>:501 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 3 ; <<4 x float>*>:502 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 1 ; <<4 x float>*>:503 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 2 ; <<4 x float>*>:504 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 3 ; <<4 x float>*>:505 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 1 ; <<4 x float>*>:506 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 2 ; <<4 x float>*>:507 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 3 ; <<4 x float>*>:508 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 1 ; <<4 x float>*>:509 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 2 ; <<4 x float>*>:510 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 3 ; <<4 x float>*>:511 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 1 ; <<4 x float>*>:512 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 2 ; <<4 x float>*>:513 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 3 ; <<4 x float>*>:514 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 1 ; <<4 x float>*>:515 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 2 ; <<4 x float>*>:516 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 3 ; <<4 x float>*>:517 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 1 ; <<4 x float>*>:518 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 2 ; <<4 x float>*>:519 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 3 ; <<4 x float>*>:520 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 1 ; <<4 x float>*>:521 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 2 ; <<4 x float>*>:522 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 3 ; <<4 x float>*>:523 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 1 ; <<4 x float>*>:524 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 2 ; <<4 x float>*>:525 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 3 ; <<4 x float>*>:526 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:527 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:528 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:529 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:530 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:531 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:532 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:533 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:534 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:535 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 1 ; <<4 x float>*>:536 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 2 ; <<4 x float>*>:537 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 3 ; <<4 x float>*>:538 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 1 ; <<4 x float>*>:539 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 2 ; <<4 x float>*>:540 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 3 ; <<4 x float>*>:541 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:542 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:543 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:544 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 1 ; <<4 x float>*>:545 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 2 ; <<4 x float>*>:546 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 3 ; <<4 x float>*>:547 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 1 ; <<4 x float>*>:548 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 2 ; <<4 x float>*>:549 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 3 ; <<4 x float>*>:550 [#uses=0] + load <4 x float>* null ; <<4 x float>>:551 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:552 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:553 [#uses=1] + load <4 x float>* %553 ; <<4 x float>>:554 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 3 ; <<4 x float>*>:555 [#uses=0] + shufflevector <4 x float> %554, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:556 [#uses=1] + call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> zeroinitializer, <4 x float> %556 ) ; <<4 x i32>>:557 [#uses=0] + bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:558 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:559 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:560 [#uses=1] + store <4 x float> zeroinitializer, <4 x float>* %560 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:561 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:562 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:563 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:564 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:565 [#uses=1] + store <4 x float> %565, <4 x float>* null + icmp eq i32 0, 0 ; <i1>:566 [#uses=1] + br i1 %566, label %.critedge, label %xPIF.exit + +.critedge: ; preds = %xOperationInitMasks.exit + getelementptr [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:567 [#uses=0] + and <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:568 [#uses=0] + or <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:569 [#uses=0] + icmp eq i32 0, 0 ; <i1>:570 [#uses=1] + br i1 %570, label %.critedge7898, label %xPBRK.exit + +.critedge7898: ; preds = %.critedge + br label %xPIF.exit + +xPIF.exit: ; preds = %.critedge7898, %xOperationInitMasks.exit + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:571 [#uses=0] + load <4 x float>* null ; <<4 x float>>:572 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:573 [#uses=0] + icmp eq i32 0, 0 ; <i1>:574 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:575 [#uses=0] + load <4 x float>* %0 ; <<4 x float>>:576 [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:577 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 0 ; <<4 x float>*>:578 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:579 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:580 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:581 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:582 [#uses=0] + load <4 x float>* null ; <<4 x float>>:583 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:584 [#uses=1] + load <4 x float>* %584 ; <<4 x float>>:585 [#uses=1] + load <4 x float>* null ; <<4 x float>>:586 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:587 [#uses=1] + load <4 x float>* %587 ; <<4 x float>>:588 [#uses=1] + shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:589 [#uses=1] + shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:590 [#uses=1] + shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:591 [#uses=1] + mul <4 x float> zeroinitializer, %589 ; <<4 x float>>:592 [#uses=0] + mul <4 x float> zeroinitializer, %590 ; <<4 x float>>:593 [#uses=0] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:594 [#uses=1] + mul <4 x float> zeroinitializer, %591 ; <<4 x float>>:595 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:596 [#uses=2] + load <4 x float>* %596 ; <<4 x float>>:597 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %596 + load <4 x float>* null ; <<4 x float>>:598 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:599 [#uses=0] + shufflevector <4 x float> %594, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:600 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:601 [#uses=2] + load <4 x float>* %601 ; <<4 x float>>:602 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %601 + load <4 x float>* null ; <<4 x float>>:603 [#uses=0] + load <4 x float>* null ; <<4 x float>>:604 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:605 [#uses=1] + load <4 x float>* %605 ; <<4 x float>>:606 [#uses=1] + sub <4 x float> zeroinitializer, %604 ; <<4 x float>>:607 [#uses=2] + sub <4 x float> zeroinitializer, %606 ; <<4 x float>>:608 [#uses=2] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:609 [#uses=0] + br i1 false, label %617, label %610 + +; <label>:610 ; preds = %xPIF.exit + load <4 x float>* null ; <<4 x float>>:611 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:612 [#uses=2] + load <4 x float>* %612 ; <<4 x float>>:613 [#uses=1] + shufflevector <4 x float> %607, <4 x float> %613, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:614 [#uses=1] + store <4 x float> %614, <4 x float>* %612 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:615 [#uses=2] + load <4 x float>* %615 ; <<4 x float>>:616 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %615 + br label %xST.exit400 + +; <label>:617 ; preds = %xPIF.exit + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:618 [#uses=0] + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:619 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %619, <4 x i32> zeroinitializer ) ; <i32>:620 [#uses=1] + icmp eq i32 %620, 0 ; <i1>:621 [#uses=1] + br i1 %621, label %625, label %622 + +; <label>:622 ; preds = %617 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:623 [#uses=0] + shufflevector <4 x float> %607, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:624 [#uses=0] + br label %625 + +; <label>:625 ; preds = %622, %617 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:626 [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:627 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:628 [#uses=1] + load <4 x float>* %628 ; <<4 x float>>:629 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:630 [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:631 [#uses=1] + icmp eq i32 %631, 0 ; <i1>:632 [#uses=1] + br i1 %632, label %xST.exit400, label %633 + +; <label>:633 ; preds = %625 + load <4 x float>* null ; <<4 x float>>:634 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %634, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:635 [#uses=1] + store <4 x float> %635, <4 x float>* null + br label %xST.exit400 + +xST.exit400: ; preds = %633, %625, %610 + %.17218 = phi <4 x float> [ zeroinitializer, %610 ], [ %608, %633 ], [ %608, %625 ] ; <<4 x float>> [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:636 [#uses=1] + load <4 x float>* %636 ; <<4 x float>>:637 [#uses=0] + load <4 x float>* null ; <<4 x float>>:638 [#uses=2] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:639 [#uses=0] + load <4 x float>* null ; <<4 x float>>:640 [#uses=2] + mul <4 x float> %638, %638 ; <<4 x float>>:641 [#uses=1] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:642 [#uses=0] + mul <4 x float> %640, %640 ; <<4 x float>>:643 [#uses=2] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x float>>:644 [#uses=0] + shufflevector <4 x float> %643, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x float>>:645 [#uses=1] + add <4 x float> %645, %643 ; <<4 x float>>:646 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>>:647 [#uses=1] + shufflevector <4 x float> %641, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>>:648 [#uses=1] + add <4 x float> zeroinitializer, %647 ; <<4 x float>>:649 [#uses=2] + add <4 x float> zeroinitializer, %648 ; <<4 x float>>:650 [#uses=0] + add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:651 [#uses=2] + call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %649 ) ; <<4 x float>>:652 [#uses=1] + mul <4 x float> %652, %649 ; <<4 x float>>:653 [#uses=1] + call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %651 ) ; <<4 x float>>:654 [#uses=1] + mul <4 x float> %654, %651 ; <<4 x float>>:655 [#uses=0] + icmp eq i32 0, 0 ; <i1>:656 [#uses=1] + br i1 %656, label %665, label %657 + +; <label>:657 ; preds = %xST.exit400 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:658 [#uses=0] + shufflevector <4 x float> %653, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:659 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:660 [#uses=1] + load <4 x float>* %660 ; <<4 x float>>:661 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:662 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:663 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:664 [#uses=0] + br label %xST.exit402 + +; <label>:665 ; preds = %xST.exit400 + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:666 [#uses=0] + br i1 false, label %669, label %667 + +; <label>:667 ; preds = %665 + load <4 x float>* null ; <<4 x float>>:668 [#uses=0] + br label %669 + +; <label>:669 ; preds = %667, %665 + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:670 [#uses=0] + br label %xST.exit402 + +xST.exit402: ; preds = %669, %657 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:671 [#uses=0] + load <4 x float>* null ; <<4 x float>>:672 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:673 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:674 [#uses=1] + load <4 x float>* %674 ; <<4 x float>>:675 [#uses=1] + load <4 x float>* null ; <<4 x float>>:676 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:677 [#uses=1] + shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:678 [#uses=1] + mul <4 x float> zeroinitializer, %677 ; <<4 x float>>:679 [#uses=0] + mul <4 x float> zeroinitializer, %678 ; <<4 x float>>:680 [#uses=0] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:681 [#uses=1] + icmp eq i32 0, 0 ; <i1>:682 [#uses=1] + br i1 %682, label %689, label %683 + +; <label>:683 ; preds = %xST.exit402 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:684 [#uses=1] + load <4 x float>* %684 ; <<4 x float>>:685 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:686 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:687 [#uses=0] + shufflevector <4 x float> %681, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:688 [#uses=0] + br label %xST.exit405 + +; <label>:689 ; preds = %xST.exit402 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:690 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:691 [#uses=1] + shufflevector <4 x i32> %691, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:692 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %692, <4 x i32> zeroinitializer ) ; <i32>:693 [#uses=1] + icmp eq i32 %693, 0 ; <i1>:694 [#uses=0] + br label %xST.exit405 + +xST.exit405: ; preds = %689, %683 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:695 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:696 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:697 [#uses=0] + load <4 x float>* null ; <<4 x float>>:698 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:699 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:700 [#uses=1] + add <4 x float> zeroinitializer, %700 ; <<4 x float>>:701 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:702 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer ) ; <i32>:703 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:704 [#uses=2] + load <4 x float>* %704 ; <<4 x float>>:705 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %704 + load <4 x float>* null ; <<4 x float>>:706 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* null + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:707 [#uses=2] + load <4 x float>* %707 ; <<4 x float>>:708 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %707 + load <4 x float>* null ; <<4 x float>>:709 [#uses=0] + load <4 x float>* null ; <<4 x float>>:710 [#uses=0] + load <4 x float>* null ; <<4 x float>>:711 [#uses=1] + shufflevector <4 x float> %711, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>>:712 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:713 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:714 [#uses=1] + load <4 x float>* %714 ; <<4 x float>>:715 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:716 [#uses=0] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:717 [#uses=1] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:718 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0 ; <<4 x float>*>:719 [#uses=1] + store <4 x float> zeroinitializer, <4 x float>* %719 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:720 [#uses=1] + shufflevector <4 x float> %717, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:721 [#uses=1] + store <4 x float> %721, <4 x float>* %720 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:722 [#uses=1] + load <4 x float>* %722 ; <<4 x float>>:723 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %723, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:724 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:725 [#uses=1] + store <4 x float> zeroinitializer, <4 x float>* %725 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:726 [#uses=1] + load <4 x float>* %726 ; <<4 x float>>:727 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:728 [#uses=1] + load <4 x float>* %728 ; <<4 x float>>:729 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:730 [#uses=1] + load <4 x float>* %730 ; <<4 x float>>:731 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:732 [#uses=1] + load <4 x float>* %732 ; <<4 x float>>:733 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:734 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:735 [#uses=1] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:736 [#uses=1] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:737 [#uses=1] + mul <4 x float> zeroinitializer, %735 ; <<4 x float>>:738 [#uses=1] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:739 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:740 [#uses=1] + icmp eq i32 %740, 0 ; <i1>:741 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:742 [#uses=2] + load <4 x float>* %742 ; <<4 x float>>:743 [#uses=1] + shufflevector <4 x float> %736, <4 x float> %743, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:744 [#uses=1] + store <4 x float> %744, <4 x float>* %742 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:745 [#uses=1] + load <4 x float>* %745 ; <<4 x float>>:746 [#uses=1] + shufflevector <4 x float> %737, <4 x float> %746, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:747 [#uses=0] + shufflevector <4 x float> %738, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:748 [#uses=1] + store <4 x float> %748, <4 x float>* null + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:749 [#uses=1] + load <4 x float>* %749 ; <<4 x float>>:750 [#uses=1] + shufflevector <4 x float> %739, <4 x float> %750, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:751 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:752 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:753 [#uses=1] + load <4 x float>* %753 ; <<4 x float>>:754 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:755 [#uses=0] + load <4 x float>* null ; <<4 x float>>:756 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:757 [#uses=1] + shufflevector <4 x float> %756, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:758 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:759 [#uses=1] + load <4 x float>* %759 ; <<4 x float>>:760 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:761 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:762 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:763 [#uses=1] + add <4 x float> %757, zeroinitializer ; <<4 x float>>:764 [#uses=0] + add <4 x float> %758, %763 ; <<4 x float>>:765 [#uses=0] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:766 [#uses=1] + br i1 false, label %773, label %767 + +; <label>:767 ; preds = %xST.exit405 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:768 [#uses=0] + load <4 x float>* null ; <<4 x float>>:769 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %769, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:770 [#uses=1] + store <4 x float> %770, <4 x float>* null + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:771 [#uses=1] + load <4 x float>* %771 ; <<4 x float>>:772 [#uses=0] + br label %xST.exit422 + +; <label>:773 ; preds = %xST.exit405 + br label %xST.exit422 + +xST.exit422: ; preds = %773, %767 + %.07267 = phi <4 x float> [ %766, %767 ], [ undef, %773 ] ; <<4 x float>> [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:774 [#uses=0] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:775 [#uses=0] + icmp eq i32 0, 0 ; <i1>:776 [#uses=1] + br i1 %776, label %780, label %777 + +; <label>:777 ; preds = %xST.exit422 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:778 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:779 [#uses=0] + br label %xST.exit431 + +; <label>:780 ; preds = %xST.exit422 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:781 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:782 [#uses=2] + load <4 x float>* %782 ; <<4 x float>>:783 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %782 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:784 [#uses=1] + shufflevector <4 x i32> %784, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:785 [#uses=0] + icmp eq i32 0, 0 ; <i1>:786 [#uses=0] + br label %xST.exit431 + +xST.exit431: ; preds = %780, %777 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:787 [#uses=0] + load <4 x float>* null ; <<4 x float>>:788 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:789 [#uses=2] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %789, <4 x i32> zeroinitializer ) ; <i32>:790 [#uses=1] + icmp eq i32 %790, 0 ; <i1>:791 [#uses=0] + shufflevector <4 x i32> %789, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:792 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %792, <4 x i32> zeroinitializer ) ; <i32>:793 [#uses=1] + icmp eq i32 %793, 0 ; <i1>:794 [#uses=1] + br i1 %794, label %797, label %795 + +; <label>:795 ; preds = %xST.exit431 + load <4 x float>* null ; <<4 x float>>:796 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* null + br label %797 + +; <label>:797 ; preds = %795, %xST.exit431 + %.07332 = phi <4 x float> [ zeroinitializer, %795 ], [ undef, %xST.exit431 ] ; <<4 x float>> [#uses=0] + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:798 [#uses=0] + br i1 false, label %xST.exit434, label %799 + +; <label>:799 ; preds = %797 + load <4 x float>* null ; <<4 x float>>:800 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* null + br label %xST.exit434 + +xST.exit434: ; preds = %799, %797 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:801 [#uses=1] + shufflevector <4 x i32> %801, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:802 [#uses=0] + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:803 [#uses=0] + icmp eq i32 0, 0 ; <i1>:804 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:805 [#uses=1] + load <4 x float>* %805 ; <<4 x float>>:806 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:807 [#uses=1] + load <4 x float>* %807 ; <<4 x float>>:808 [#uses=0] + load <4 x float>* null ; <<4 x float>>:809 [#uses=0] + load <4 x float>* null ; <<4 x float>>:810 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:811 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:812 [#uses=1] + load <4 x float>* %812 ; <<4 x float>>:813 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:814 [#uses=1] + load <4 x float>* %814 ; <<4 x float>>:815 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:816 [#uses=0] + unreachable + +xPBRK.exit: ; preds = %.critedge + store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* %.sub7896 + store <4 x i32> zeroinitializer, <4 x i32>* null + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:817 [#uses=1] + load <4 x float>* %817 ; <<4 x float>>:818 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:819 [#uses=1] + load <4 x float>* %819 ; <<4 x float>>:820 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:821 [#uses=1] + load <4 x float>* %821 ; <<4 x float>>:822 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:823 [#uses=1] + shufflevector <4 x float> %818, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:824 [#uses=1] + shufflevector <4 x float> %820, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:825 [#uses=1] + shufflevector <4 x float> %822, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:826 [#uses=1] + shufflevector <4 x float> %823, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:827 [#uses=0] + shufflevector <4 x float> %824, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:828 [#uses=1] + store <4 x float> %828, <4 x float>* null + load <4 x float>* null ; <<4 x float>>:829 [#uses=1] + shufflevector <4 x float> %825, <4 x float> %829, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:830 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:831 [#uses=2] + load <4 x float>* %831 ; <<4 x float>>:832 [#uses=1] + shufflevector <4 x float> %826, <4 x float> %832, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:833 [#uses=1] + store <4 x float> %833, <4 x float>* %831 + br label %xLS.exit449 + +xLS.exit449: ; preds = %1215, %xPBRK.exit + %.27464 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17463, %1215 ] ; <<4 x float>> [#uses=2] + %.27469 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17468, %1215 ] ; <<4 x float>> [#uses=2] + %.27474 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=1] + %.17482 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17486 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17490 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07489, %1215 ] ; <<4 x float>> [#uses=2] + %.17494 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.27504 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17513 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17517 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17552 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07551, %1215 ] ; <<4 x float>> [#uses=2] + %.17556 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07555, %1215 ] ; <<4 x float>> [#uses=2] + %.17560 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17583 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07582, %1215 ] ; <<4 x float>> [#uses=2] + %.17591 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07590, %1215 ] ; <<4 x float>> [#uses=2] + %.17599 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17618 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07617, %1215 ] ; <<4 x float>> [#uses=2] + %.17622 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07621, %1215 ] ; <<4 x float>> [#uses=2] + %.17626 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ] ; <<4 x float>> [#uses=0] + %.17653 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07652, %1215 ] ; <<4 x float>> [#uses=2] + %.17657 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07656, %1215 ] ; <<4 x float>> [#uses=2] + %.17661 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07660, %1215 ] ; <<4 x float>> [#uses=2] + %.17665 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07664, %1215 ] ; <<4 x float>> [#uses=2] + %.17723 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07722, %1215 ] ; <<4 x float>> [#uses=2] + %.17727 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07726, %1215 ] ; <<4 x float>> [#uses=2] + %.17731 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07730, %1215 ] ; <<4 x float>> [#uses=2] + %.17735 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07734, %1215 ] ; <<4 x float>> [#uses=2] + %.17770 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07769, %1215 ] ; <<4 x float>> [#uses=2] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:834 [#uses=0] + load <4 x float>* null ; <<4 x float>>:835 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:836 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:837 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:838 [#uses=0] + shufflevector <4 x float> %835, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:839 [#uses=1] + getelementptr <4 x float>* null, i32 878 ; <<4 x float>*>:840 [#uses=1] + load <4 x float>* %840 ; <<4 x float>>:841 [#uses=0] + call <4 x float> @llvm.ppc.altivec.vcfsx( <4 x i32> zeroinitializer, i32 0 ) ; <<4 x float>>:842 [#uses=1] + shufflevector <4 x float> %842, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:843 [#uses=2] + call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> %839 ) ; <<4 x i32>>:844 [#uses=1] + bitcast <4 x i32> %844 to <4 x float> ; <<4 x float>>:845 [#uses=1] + call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> zeroinitializer ) ; <<4 x i32>>:846 [#uses=0] + bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:847 [#uses=1] + icmp eq i32 0, 0 ; <i1>:848 [#uses=1] + br i1 %848, label %854, label %849 + +; <label>:849 ; preds = %xLS.exit449 + shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:850 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:851 [#uses=1] + store <4 x float> zeroinitializer, <4 x float>* %851 + shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:852 [#uses=1] + store <4 x float> %852, <4 x float>* null + shufflevector <4 x float> %847, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:853 [#uses=0] + br label %xST.exit451 + +; <label>:854 ; preds = %xLS.exit449 + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:855 [#uses=0] + br i1 false, label %859, label %856 + +; <label>:856 ; preds = %854 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:857 [#uses=2] + load <4 x float>* %857 ; <<4 x float>>:858 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %857 + br label %859 + +; <label>:859 ; preds = %856, %854 + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:860 [#uses=0] + br i1 false, label %864, label %861 + +; <label>:861 ; preds = %859 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:862 [#uses=1] + shufflevector <4 x float> %845, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:863 [#uses=1] + store <4 x float> %863, <4 x float>* %862 + br label %864 + +; <label>:864 ; preds = %861, %859 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:865 [#uses=1] + shufflevector <4 x i32> %865, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:866 [#uses=0] + br i1 false, label %868, label %867 + +; <label>:867 ; preds = %864 + store <4 x float> zeroinitializer, <4 x float>* null + br label %868 + +; <label>:868 ; preds = %867, %864 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:869 [#uses=0] + br label %xST.exit451 + +xST.exit451: ; preds = %868, %849 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:870 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:871 [#uses=0] + load <4 x float>* null ; <<4 x float>>:872 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:873 [#uses=1] + bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:874 [#uses=1] + xor <4 x i32> %874, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:875 [#uses=0] + bitcast <4 x float> %873 to <4 x i32> ; <<4 x i32>>:876 [#uses=1] + xor <4 x i32> %876, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:877 [#uses=0] + bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:878 [#uses=1] + xor <4 x i32> %878, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:879 [#uses=1] + bitcast <4 x i32> %879 to <4 x float> ; <<4 x float>>:880 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:881 [#uses=1] + icmp eq i32 0, 0 ; <i1>:882 [#uses=1] + br i1 %882, label %888, label %883 + +; <label>:883 ; preds = %xST.exit451 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:884 [#uses=1] + store <4 x float> zeroinitializer, <4 x float>* %884 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:885 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:886 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:887 [#uses=0] + br label %xST.exit453 + +; <label>:888 ; preds = %xST.exit451 + shufflevector <4 x i32> %881, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:889 [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:890 [#uses=0] + br i1 false, label %894, label %891 + +; <label>:891 ; preds = %888 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:892 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:893 [#uses=1] + store <4 x float> %893, <4 x float>* %892 + br label %894 + +; <label>:894 ; preds = %891, %888 + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:895 [#uses=1] + icmp eq i32 %895, 0 ; <i1>:896 [#uses=1] + br i1 %896, label %898, label %897 + +; <label>:897 ; preds = %894 + br label %898 + +; <label>:898 ; preds = %897, %894 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:899 [#uses=0] + br i1 false, label %xST.exit453, label %900 + +; <label>:900 ; preds = %898 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:901 [#uses=1] + load <4 x float>* %901 ; <<4 x float>>:902 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %902, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:903 [#uses=0] + br label %xST.exit453 + +xST.exit453: ; preds = %900, %898, %883 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:904 [#uses=0] + load <4 x float>* null ; <<4 x float>>:905 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:906 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:907 [#uses=1] + shufflevector <4 x float> %905, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:908 [#uses=1] + bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:909 [#uses=0] + bitcast <4 x float> %908 to <4 x i32> ; <<4 x i32>>:910 [#uses=0] + bitcast <4 x float> %907 to <4 x i32> ; <<4 x i32>>:911 [#uses=0] + bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:912 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:913 [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:914 [#uses=0] + br i1 false, label %915, label %xPIF.exit455 + +; <label>:915 ; preds = %xST.exit453 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:916 [#uses=0] + getelementptr [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:917 [#uses=1] + store <4 x i32> zeroinitializer, <4 x i32>* %917 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:918 [#uses=1] + and <4 x i32> %918, zeroinitializer ; <<4 x i32>>:919 [#uses=0] + br label %.critedge7899 + +.critedge7899: ; preds = %.critedge7899, %915 + or <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:920 [#uses=1] + br i1 false, label %.critedge7899, label %xPBRK.exit456 + +xPBRK.exit456: ; preds = %.critedge7899 + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> %920, <4 x i32> zeroinitializer ) ; <i32>:921 [#uses=0] + unreachable + +xPIF.exit455: ; preds = %xST.exit453 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:922 [#uses=1] + load <4 x float>* %922 ; <<4 x float>>:923 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:924 [#uses=1] + load <4 x float>* %924 ; <<4 x float>>:925 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:926 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:927 [#uses=0] + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:928 [#uses=0] + bitcast { { i16, i16, i32 } }* %1 to <4 x float>* ; <<4 x float>*>:929 [#uses=0] + bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:930 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:931 [#uses=0] + icmp eq i32 0, 0 ; <i1>:932 [#uses=1] + br i1 %932, label %934, label %933 + +; <label>:933 ; preds = %xPIF.exit455 + store <4 x float> zeroinitializer, <4 x float>* null + br label %934 + +; <label>:934 ; preds = %933, %xPIF.exit455 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:935 [#uses=0] + icmp eq i32 0, 0 ; <i1>:936 [#uses=1] + br i1 %936, label %xST.exit459, label %937 + +; <label>:937 ; preds = %934 + br label %xST.exit459 + +xST.exit459: ; preds = %937, %934 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:938 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %938, <4 x i32> zeroinitializer ) ; <i32>:939 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:940 [#uses=1] + store <4 x float> zeroinitializer, <4 x float>* %940 + load <4 x float>* null ; <<4 x float>>:941 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %941, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:942 [#uses=1] + store <4 x float> %942, <4 x float>* null + shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:943 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:944 [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:945 [#uses=0] + br i1 false, label %947, label %946 + +; <label>:946 ; preds = %xST.exit459 + br label %947 + +; <label>:947 ; preds = %946, %xST.exit459 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:948 [#uses=0] + icmp eq i32 0, 0 ; <i1>:949 [#uses=1] + br i1 %949, label %952, label %950 + +; <label>:950 ; preds = %947 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:951 [#uses=1] + call void @llvm.ppc.altivec.stvewx( <4 x i32> %951, i8* null ) + br label %952 + +; <label>:952 ; preds = %950, %947 + br i1 false, label %955, label %953 + +; <label>:953 ; preds = %952 + getelementptr [4 x <4 x i32>]* null, i32 0, i32 2 ; <<4 x i32>*>:954 [#uses=0] + br label %955 + +; <label>:955 ; preds = %953, %952 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:956 [#uses=0] + icmp eq i32 0, 0 ; <i1>:957 [#uses=1] + br i1 %957, label %xStoreDestAddressWithMask.exit461, label %958 + +; <label>:958 ; preds = %955 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:959 [#uses=1] + call void @llvm.ppc.altivec.stvewx( <4 x i32> %959, i8* null ) + br label %xStoreDestAddressWithMask.exit461 + +xStoreDestAddressWithMask.exit461: ; preds = %958, %955 + load <4 x float>* %0 ; <<4 x float>>:960 [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:961 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 0 ; <<4 x float>*>:962 [#uses=0] + br i1 false, label %968, label %xST.exit463 + +xST.exit463: ; preds = %xStoreDestAddressWithMask.exit461 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:963 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:964 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:965 [#uses=0] + load <4 x float>* %0 ; <<4 x float>>:966 [#uses=3] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:967 [#uses=0] + br i1 false, label %972, label %969 + +; <label>:968 ; preds = %xStoreDestAddressWithMask.exit461 + unreachable + +; <label>:969 ; preds = %xST.exit463 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:970 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:971 [#uses=1] + store <4 x float> %966, <4 x float>* %971 + store <4 x float> %966, <4 x float>* null + br label %xST.exit465 + +; <label>:972 ; preds = %xST.exit463 + call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <<4 x i32>>:973 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* null + store <4 x float> zeroinitializer, <4 x float>* null + load <4 x float>* null ; <<4 x float>>:974 [#uses=0] + bitcast <4 x float> %966 to <4 x i32> ; <<4 x i32>>:975 [#uses=1] + call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> %975, <4 x i32> zeroinitializer ) ; <<4 x i32>>:976 [#uses=1] + bitcast <4 x i32> %976 to <4 x float> ; <<4 x float>>:977 [#uses=1] + store <4 x float> %977, <4 x float>* null + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:978 [#uses=0] + bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:979 [#uses=1] + call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %979, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <<4 x i32>>:980 [#uses=1] + bitcast <4 x i32> %980 to <4 x float> ; <<4 x float>>:981 [#uses=0] + br label %xST.exit465 + +xST.exit465: ; preds = %972, %969 + load <4 x float>* %0 ; <<4 x float>>:982 [#uses=3] + icmp eq i32 0, 0 ; <i1>:983 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:984 [#uses=1] + br i1 %983, label %989, label %985 + +; <label>:985 ; preds = %xST.exit465 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:986 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:987 [#uses=1] + store <4 x float> %982, <4 x float>* %987 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:988 [#uses=0] + br label %xST.exit467 + +; <label>:989 ; preds = %xST.exit465 + bitcast <4 x float> %982 to <4 x i32> ; <<4 x i32>>:990 [#uses=0] + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:991 [#uses=0] + store <4 x float> zeroinitializer, <4 x float>* %984 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:992 [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:993 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:994 [#uses=0] + bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:995 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:996 [#uses=0] + bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:997 [#uses=1] + bitcast <4 x float> %982 to <4 x i32> ; <<4 x i32>>:998 [#uses=1] + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:999 [#uses=1] + call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %997, <4 x i32> %998, <4 x i32> %999 ) ; <<4 x i32>>:1000 [#uses=1] + bitcast <4 x i32> %1000 to <4 x float> ; <<4 x float>>:1001 [#uses=0] + br label %xST.exit467 + +xST.exit467: ; preds = %989, %985 + load <4 x float>* %0 ; <<4 x float>>:1002 [#uses=5] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:1003 [#uses=2] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1003, <4 x i32> zeroinitializer ) ; <i32>:1004 [#uses=0] + br i1 false, label %1011, label %1005 + +; <label>:1005 ; preds = %xST.exit467 + load <4 x float>* null ; <<4 x float>>:1006 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1007 [#uses=1] + load <4 x float>* %1007 ; <<4 x float>>:1008 [#uses=0] + load <4 x float>* null ; <<4 x float>>:1009 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1010 [#uses=0] + br label %xST.exit469 + +; <label>:1011 ; preds = %xST.exit467 + shufflevector <4 x i32> %1003, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:1012 [#uses=0] + icmp eq i32 0, 0 ; <i1>:1013 [#uses=1] + br i1 %1013, label %1015, label %1014 + +; <label>:1014 ; preds = %1011 + br label %1015 + +; <label>:1015 ; preds = %1014, %1011 + %.07472 = phi <4 x float> [ %1002, %1014 ], [ %.27474, %1011 ] ; <<4 x float>> [#uses=0] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:1016 [#uses=1] + icmp eq i32 %1016, 0 ; <i1>:1017 [#uses=1] + br i1 %1017, label %1021, label %1018 + +; <label>:1018 ; preds = %1015 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1019 [#uses=0] + shufflevector <4 x float> %1002, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1020 [#uses=0] + br label %1021 + +; <label>:1021 ; preds = %1018, %1015 + %.07467 = phi <4 x float> [ %1002, %1018 ], [ %.27469, %1015 ] ; <<4 x float>> [#uses=2] + icmp eq i32 0, 0 ; <i1>:1022 [#uses=1] + br i1 %1022, label %1025, label %1023 + +; <label>:1023 ; preds = %1021 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1024 [#uses=1] + store <4 x float> zeroinitializer, <4 x float>* %1024 + br label %1025 + +; <label>:1025 ; preds = %1023, %1021 + %.07462 = phi <4 x float> [ %1002, %1023 ], [ %.27464, %1021 ] ; <<4 x float>> [#uses=2] + icmp eq i32 0, 0 ; <i1>:1026 [#uses=1] + br i1 %1026, label %xST.exit469, label %1027 + +; <label>:1027 ; preds = %1025 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1028 [#uses=0] + br label %xST.exit469 + +xST.exit469: ; preds = %1027, %1025, %1005 + %.17463 = phi <4 x float> [ %.27464, %1005 ], [ %.07462, %1027 ], [ %.07462, %1025 ] ; <<4 x float>> [#uses=1] + %.17468 = phi <4 x float> [ %.27469, %1005 ], [ %.07467, %1027 ], [ %.07467, %1025 ] ; <<4 x float>> [#uses=1] + %.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ] ; <<4 x float>> [#uses=1] + load <4 x float>* null ; <<4 x float>>:1029 [#uses=0] + load <4 x float>* null ; <<4 x float>>:1030 [#uses=0] + sub <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1031 [#uses=1] + br i1 false, label %1037, label %1032 + +; <label>:1032 ; preds = %xST.exit469 + load <4 x float>* null ; <<4 x float>>:1033 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:1034 [#uses=1] + load <4 x float>* %1034 ; <<4 x float>>:1035 [#uses=0] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:1036 [#uses=0] + br label %xST.exit472 + +; <label>:1037 ; preds = %xST.exit469 + icmp eq i32 0, 0 ; <i1>:1038 [#uses=1] + br i1 %1038, label %1040, label %1039 + +; <label>:1039 ; preds = %1037 + br label %1040 + +; <label>:1040 ; preds = %1039, %1037 + %.07507 = phi <4 x float> [ zeroinitializer, %1039 ], [ zeroinitializer, %1037 ] ; <<4 x float>> [#uses=0] + icmp eq i32 0, 0 ; <i1>:1041 [#uses=1] + br i1 %1041, label %1045, label %1042 + +; <label>:1042 ; preds = %1040 + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:1043 [#uses=1] + load <4 x float>* %1043 ; <<4 x float>>:1044 [#uses=0] + br label %1045 + +; <label>:1045 ; preds = %1042, %1040 + br i1 false, label %1048, label %1046 + +; <label>:1046 ; preds = %1045 + shufflevector <4 x float> %1031, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1047 [#uses=0] + br label %1048 + +; <label>:1048 ; preds = %1046, %1045 + icmp eq i32 0, 0 ; <i1>:1049 [#uses=1] + br i1 %1049, label %xST.exit472, label %1050 + +; <label>:1050 ; preds = %1048 + br label %xST.exit472 + +xST.exit472: ; preds = %1050, %1048, %1032 + br i1 false, label %1052, label %1051 + +; <label>:1051 ; preds = %xST.exit472 + br label %xST.exit474 + +; <label>:1052 ; preds = %xST.exit472 + br i1 false, label %1054, label %1053 + +; <label>:1053 ; preds = %1052 + br label %1054 + +; <label>:1054 ; preds = %1053, %1052 + br i1 false, label %1056, label %1055 + +; <label>:1055 ; preds = %1054 + br label %1056 + +; <label>:1056 ; preds = %1055, %1054 + br i1 false, label %1058, label %1057 + +; <label>:1057 ; preds = %1056 + br label %1058 + +; <label>:1058 ; preds = %1057, %1056 + br i1 false, label %xST.exit474, label %1059 + +; <label>:1059 ; preds = %1058 + br label %xST.exit474 + +xST.exit474: ; preds = %1059, %1058, %1051 + load <4 x float>* null ; <<4 x float>>:1060 [#uses=1] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1061 [#uses=1] + mul <4 x float> %1060, zeroinitializer ; <<4 x float>>:1062 [#uses=2] + br i1 false, label %1065, label %1063 + +; <label>:1063 ; preds = %xST.exit474 + shufflevector <4 x float> %1062, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1064 [#uses=1] + store <4 x float> %1064, <4 x float>* null + br label %xST.exit476 + +; <label>:1065 ; preds = %xST.exit474 + br i1 false, label %1067, label %1066 + +; <label>:1066 ; preds = %1065 + br label %1067 + +; <label>:1067 ; preds = %1066, %1065 + shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:1068 [#uses=0] + br i1 false, label %1070, label %1069 + +; <label>:1069 ; preds = %1067 + br label %1070 + +; <label>:1070 ; preds = %1069, %1067 + br i1 false, label %1072, label %1071 + +; <label>:1071 ; preds = %1070 + br label %1072 + +; <label>:1072 ; preds = %1071, %1070 + br i1 false, label %xST.exit476, label %1073 + +; <label>:1073 ; preds = %1072 + br label %xST.exit476 + +xST.exit476: ; preds = %1073, %1072, %1063 + %.07551 = phi <4 x float> [ %1062, %1063 ], [ %.17552, %1073 ], [ %.17552, %1072 ] ; <<4 x float>> [#uses=1] + %.07555 = phi <4 x float> [ %1061, %1063 ], [ %.17556, %1073 ], [ %.17556, %1072 ] ; <<4 x float>> [#uses=1] + br i1 false, label %1075, label %1074 + +; <label>:1074 ; preds = %xST.exit476 + br label %xST.exit479 + +; <label>:1075 ; preds = %xST.exit476 + br i1 false, label %1077, label %1076 + +; <label>:1076 ; preds = %1075 + br label %1077 + +; <label>:1077 ; preds = %1076, %1075 + br i1 false, label %1079, label %1078 + +; <label>:1078 ; preds = %1077 + br label %1079 + +; <label>:1079 ; preds = %1078, %1077 + br i1 false, label %1081, label %1080 + +; <label>:1080 ; preds = %1079 + br label %1081 + +; <label>:1081 ; preds = %1080, %1079 + br i1 false, label %xST.exit479, label %1082 + +; <label>:1082 ; preds = %1081 + br label %xST.exit479 + +xST.exit479: ; preds = %1082, %1081, %1074 + br i1 false, label %1084, label %1083 + +; <label>:1083 ; preds = %xST.exit479 + br label %xST.exit482 + +; <label>:1084 ; preds = %xST.exit479 + br i1 false, label %1086, label %1085 + +; <label>:1085 ; preds = %1084 + br label %1086 + +; <label>:1086 ; preds = %1085, %1084 + br i1 false, label %1088, label %1087 + +; <label>:1087 ; preds = %1086 + br label %1088 + +; <label>:1088 ; preds = %1087, %1086 + br i1 false, label %1090, label %1089 + +; <label>:1089 ; preds = %1088 + br label %1090 + +; <label>:1090 ; preds = %1089, %1088 + br i1 false, label %xST.exit482, label %1091 + +; <label>:1091 ; preds = %1090 + br label %xST.exit482 + +xST.exit482: ; preds = %1091, %1090, %1083 + br i1 false, label %1093, label %1092 + +; <label>:1092 ; preds = %xST.exit482 + br label %xST.exit486 + +; <label>:1093 ; preds = %xST.exit482 + br i1 false, label %1095, label %1094 + +; <label>:1094 ; preds = %1093 + br label %1095 + +; <label>:1095 ; preds = %1094, %1093 + br i1 false, label %1097, label %1096 + +; <label>:1096 ; preds = %1095 + br label %1097 + +; <label>:1097 ; preds = %1096, %1095 + br i1 false, label %1099, label %1098 + +; <label>:1098 ; preds = %1097 + br label %1099 + +; <label>:1099 ; preds = %1098, %1097 + br i1 false, label %xST.exit486, label %1100 + +; <label>:1100 ; preds = %1099 + br label %xST.exit486 + +xST.exit486: ; preds = %1100, %1099, %1092 + br i1 false, label %1102, label %1101 + +; <label>:1101 ; preds = %xST.exit486 + br label %xST.exit489 + +; <label>:1102 ; preds = %xST.exit486 + br i1 false, label %1104, label %1103 + +; <label>:1103 ; preds = %1102 + br label %1104 + +; <label>:1104 ; preds = %1103, %1102 + br i1 false, label %1106, label %1105 + +; <label>:1105 ; preds = %1104 + br label %1106 + +; <label>:1106 ; preds = %1105, %1104 + br i1 false, label %1108, label %1107 + +; <label>:1107 ; preds = %1106 + br label %1108 + +; <label>:1108 ; preds = %1107, %1106 + br i1 false, label %xST.exit489, label %1109 + +; <label>:1109 ; preds = %1108 + br label %xST.exit489 + +xST.exit489: ; preds = %1109, %1108, %1101 + br i1 false, label %1111, label %1110 + +; <label>:1110 ; preds = %xST.exit489 + br label %xST.exit492 + +; <label>:1111 ; preds = %xST.exit489 + br i1 false, label %1113, label %1112 + +; <label>:1112 ; preds = %1111 + br label %1113 + +; <label>:1113 ; preds = %1112, %1111 + br i1 false, label %1115, label %1114 + +; <label>:1114 ; preds = %1113 + br label %1115 + +; <label>:1115 ; preds = %1114, %1113 + br i1 false, label %1117, label %1116 + +; <label>:1116 ; preds = %1115 + br label %1117 + +; <label>:1117 ; preds = %1116, %1115 + br i1 false, label %xST.exit492, label %1118 + +; <label>:1118 ; preds = %1117 + br label %xST.exit492 + +xST.exit492: ; preds = %1118, %1117, %1110 + load <4 x float>* null ; <<4 x float>>:1119 [#uses=1] + mul <4 x float> %1119, zeroinitializer ; <<4 x float>>:1120 [#uses=1] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1121 [#uses=1] + br i1 false, label %1123, label %1122 + +; <label>:1122 ; preds = %xST.exit492 + br label %xST.exit495 + +; <label>:1123 ; preds = %xST.exit492 + br i1 false, label %1125, label %1124 + +; <label>:1124 ; preds = %1123 + br label %1125 + +; <label>:1125 ; preds = %1124, %1123 + br i1 false, label %1127, label %1126 + +; <label>:1126 ; preds = %1125 + br label %1127 + +; <label>:1127 ; preds = %1126, %1125 + br i1 false, label %1129, label %1128 + +; <label>:1128 ; preds = %1127 + br label %1129 + +; <label>:1129 ; preds = %1128, %1127 + br i1 false, label %xST.exit495, label %1130 + +; <label>:1130 ; preds = %1129 + br label %xST.exit495 + +xST.exit495: ; preds = %1130, %1129, %1122 + %.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ] ; <<4 x float>> [#uses=1] + %.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ] ; <<4 x float>> [#uses=1] + load <4 x float>* null ; <<4 x float>>:1131 [#uses=1] + add <4 x float> %1131, zeroinitializer ; <<4 x float>>:1132 [#uses=1] + add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1133 [#uses=1] + br i1 false, label %1135, label %1134 + +; <label>:1134 ; preds = %xST.exit495 + br label %xST.exit498 + +; <label>:1135 ; preds = %xST.exit495 + br i1 false, label %1137, label %1136 + +; <label>:1136 ; preds = %1135 + br label %1137 + +; <label>:1137 ; preds = %1136, %1135 + br i1 false, label %1139, label %1138 + +; <label>:1138 ; preds = %1137 + br label %1139 + +; <label>:1139 ; preds = %1138, %1137 + br i1 false, label %1141, label %1140 + +; <label>:1140 ; preds = %1139 + br label %1141 + +; <label>:1141 ; preds = %1140, %1139 + br i1 false, label %xST.exit498, label %1142 + +; <label>:1142 ; preds = %1141 + br label %xST.exit498 + +xST.exit498: ; preds = %1142, %1141, %1134 + %.07617 = phi <4 x float> [ %1133, %1134 ], [ %.17618, %1142 ], [ %.17618, %1141 ] ; <<4 x float>> [#uses=1] + %.07621 = phi <4 x float> [ %1132, %1134 ], [ %.17622, %1142 ], [ %.17622, %1141 ] ; <<4 x float>> [#uses=1] + load <4 x float>* null ; <<4 x float>>:1143 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1144 [#uses=1] + load <4 x float>* %1144 ; <<4 x float>>:1145 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1146 [#uses=1] + load <4 x float>* %1146 ; <<4 x float>>:1147 [#uses=1] + shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1148 [#uses=1] + shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1149 [#uses=1] + shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1150 [#uses=1] + mul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1151 [#uses=1] + mul <4 x float> zeroinitializer, %1148 ; <<4 x float>>:1152 [#uses=1] + mul <4 x float> zeroinitializer, %1149 ; <<4 x float>>:1153 [#uses=1] + mul <4 x float> zeroinitializer, %1150 ; <<4 x float>>:1154 [#uses=1] + br i1 false, label %1156, label %1155 + +; <label>:1155 ; preds = %xST.exit498 + br label %xST.exit501 + +; <label>:1156 ; preds = %xST.exit498 + br i1 false, label %1158, label %1157 + +; <label>:1157 ; preds = %1156 + br label %1158 + +; <label>:1158 ; preds = %1157, %1156 + br i1 false, label %1160, label %1159 + +; <label>:1159 ; preds = %1158 + br label %1160 + +; <label>:1160 ; preds = %1159, %1158 + br i1 false, label %1162, label %1161 + +; <label>:1161 ; preds = %1160 + br label %1162 + +; <label>:1162 ; preds = %1161, %1160 + br i1 false, label %xST.exit501, label %1163 + +; <label>:1163 ; preds = %1162 + br label %xST.exit501 + +xST.exit501: ; preds = %1163, %1162, %1155 + %.07652 = phi <4 x float> [ %1154, %1155 ], [ %.17653, %1163 ], [ %.17653, %1162 ] ; <<4 x float>> [#uses=1] + %.07656 = phi <4 x float> [ %1153, %1155 ], [ %.17657, %1163 ], [ %.17657, %1162 ] ; <<4 x float>> [#uses=1] + %.07660 = phi <4 x float> [ %1152, %1155 ], [ %.17661, %1163 ], [ %.17661, %1162 ] ; <<4 x float>> [#uses=1] + %.07664 = phi <4 x float> [ %1151, %1155 ], [ %.17665, %1163 ], [ %.17665, %1162 ] ; <<4 x float>> [#uses=1] + load <4 x float>* null ; <<4 x float>>:1164 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1165 [#uses=1] + load <4 x float>* %1165 ; <<4 x float>>:1166 [#uses=1] + getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1167 [#uses=1] + load <4 x float>* %1167 ; <<4 x float>>:1168 [#uses=1] + add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1169 [#uses=1] + add <4 x float> zeroinitializer, %1164 ; <<4 x float>>:1170 [#uses=1] + add <4 x float> zeroinitializer, %1166 ; <<4 x float>>:1171 [#uses=1] + add <4 x float> zeroinitializer, %1168 ; <<4 x float>>:1172 [#uses=1] + br i1 false, label %1174, label %1173 + +; <label>:1173 ; preds = %xST.exit501 + br label %xST.exit504 + +; <label>:1174 ; preds = %xST.exit501 + br i1 false, label %1176, label %1175 + +; <label>:1175 ; preds = %1174 + br label %1176 + +; <label>:1176 ; preds = %1175, %1174 + br i1 false, label %1178, label %1177 + +; <label>:1177 ; preds = %1176 + br label %1178 + +; <label>:1178 ; preds = %1177, %1176 + br i1 false, label %1180, label %1179 + +; <label>:1179 ; preds = %1178 + br label %1180 + +; <label>:1180 ; preds = %1179, %1178 + br i1 false, label %xST.exit504, label %1181 + +; <label>:1181 ; preds = %1180 + br label %xST.exit504 + +xST.exit504: ; preds = %1181, %1180, %1173 + %.07722 = phi <4 x float> [ %1172, %1173 ], [ %.17723, %1181 ], [ %.17723, %1180 ] ; <<4 x float>> [#uses=1] + %.07726 = phi <4 x float> [ %1171, %1173 ], [ %.17727, %1181 ], [ %.17727, %1180 ] ; <<4 x float>> [#uses=1] + %.07730 = phi <4 x float> [ %1170, %1173 ], [ %.17731, %1181 ], [ %.17731, %1180 ] ; <<4 x float>> [#uses=1] + %.07734 = phi <4 x float> [ %1169, %1173 ], [ %.17735, %1181 ], [ %.17735, %1180 ] ; <<4 x float>> [#uses=1] + add <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1182 [#uses=1] + br i1 false, label %1184, label %1183 + +; <label>:1183 ; preds = %xST.exit504 + br label %xST.exit507 + +; <label>:1184 ; preds = %xST.exit504 + br i1 false, label %1186, label %1185 + +; <label>:1185 ; preds = %1184 + br label %1186 + +; <label>:1186 ; preds = %1185, %1184 + br i1 false, label %1188, label %1187 + +; <label>:1187 ; preds = %1186 + store <4 x float> zeroinitializer, <4 x float>* null + br label %1188 + +; <label>:1188 ; preds = %1187, %1186 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:1189 [#uses=1] + shufflevector <4 x i32> %1189, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:1190 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1190, <4 x i32> zeroinitializer ) ; <i32>:1191 [#uses=1] + icmp eq i32 %1191, 0 ; <i1>:1192 [#uses=1] + br i1 %1192, label %1196, label %1193 + +; <label>:1193 ; preds = %1188 + load <4 x float>* null ; <<4 x float>>:1194 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %1194, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1195 [#uses=1] + store <4 x float> %1195, <4 x float>* null + br label %1196 + +; <label>:1196 ; preds = %1193, %1188 + %.07742 = phi <4 x float> [ zeroinitializer, %1193 ], [ zeroinitializer, %1188 ] ; <<4 x float>> [#uses=0] + load <4 x i32>* %.sub7896 ; <<4 x i32>>:1197 [#uses=1] + shufflevector <4 x i32> %1197, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:1198 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1198, <4 x i32> zeroinitializer ) ; <i32>:1199 [#uses=1] + icmp eq i32 %1199, 0 ; <i1>:1200 [#uses=1] + br i1 %1200, label %xST.exit507, label %1201 + +; <label>:1201 ; preds = %1196 + store <4 x float> zeroinitializer, <4 x float>* null + br label %xST.exit507 + +xST.exit507: ; preds = %1201, %1196, %1183 + %.07769 = phi <4 x float> [ %1182, %1183 ], [ %.17770, %1201 ], [ %.17770, %1196 ] ; <<4 x float>> [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:1202 [#uses=1] + icmp eq i32 %1202, 0 ; <i1>:1203 [#uses=1] + br i1 %1203, label %1207, label %1204 + +; <label>:1204 ; preds = %xST.exit507 + load <4 x float>* null ; <<4 x float>>:1205 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %1205, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:1206 [#uses=1] + store <4 x float> %1206, <4 x float>* null + br label %1207 + +; <label>:1207 ; preds = %1204, %xST.exit507 + load <4 x i32>* %.sub7896 ; <<4 x i32>>:1208 [#uses=1] + shufflevector <4 x i32> %1208, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:1209 [#uses=1] + call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1209, <4 x i32> zeroinitializer ) ; <i32>:1210 [#uses=1] + icmp eq i32 %1210, 0 ; <i1>:1211 [#uses=1] + br i1 %1211, label %1215, label %1212 + +; <label>:1212 ; preds = %1207 + load <4 x float>* null ; <<4 x float>>:1213 [#uses=1] + shufflevector <4 x float> zeroinitializer, <4 x float> %1213, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:1214 [#uses=1] + store <4 x float> %1214, <4 x float>* null + br label %1215 + +; <label>:1215 ; preds = %1212, %1207 + store <4 x float> zeroinitializer, <4 x float>* null + br label %xLS.exit449 +} + +declare <4 x i32> @llvm.ppc.altivec.vsel(<4 x i32>, <4 x i32>, <4 x i32>) + +declare void @llvm.ppc.altivec.stvewx(<4 x i32>, i8*) + +declare <4 x float> @llvm.ppc.altivec.vrsqrtefp(<4 x float>) + +declare <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32>, i32) + +declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>) + +declare <4 x i32> @llvm.ppc.altivec.vcmpgtfp(<4 x float>, <4 x float>) diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll new file mode 100644 index 0000000..8405703 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4} +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bar r3, r} + +; PR1351 + +define i32 @test1(i32 %Y, i32 %X) { + %tmp1 = tail call i32 asm "foo${1:I} $0, $1", "=r,rI"( i32 %X ) + ret i32 %tmp1 +} + +;; TODO: We'd actually prefer this to be 'bari r3, 47', but 'bar r3, rN' is also ok. +define i32 @test2(i32 %Y, i32 %X) { + %tmp1 = tail call i32 asm "bar${1:I} $0, $1", "=r,rI"( i32 47 ) + ret i32 %tmp1 +} diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll new file mode 100644 index 0000000..f43b87c --- /dev/null +++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll @@ -0,0 +1,27 @@ +; RUN: llvm-as < %s | llc | grep {subfc r2,r5,r4} +; RUN: llvm-as < %s | llc | grep {subfze r4,r3} + +; PR1357 + +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "powerpc-apple-darwin8.8.0" + +;long long test(int A, int B, int C) { +; unsigned X, Y; +; __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" +; : "=r" (X), "=&r" (Y) +; : "r" (A), "rI" (B), "r" (C)); +; return ((long long)Y << 32) | X; +;} + +define i64 @test(i32 %A, i32 %B, i32 %C) { +entry: + %Y = alloca i32, align 4 ; <i32*> [#uses=2] + %tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* %Y, i32 %A, i32 %B, i32 %C ) ; <i32> [#uses=1] + %tmp5 = load i32* %Y ; <i32> [#uses=1] + %tmp56 = zext i32 %tmp5 to i64 ; <i64> [#uses=1] + %tmp7 = shl i64 %tmp56, 32 ; <i64> [#uses=1] + %tmp89 = zext i32 %tmp4 to i64 ; <i64> [#uses=1] + %tmp10 = or i64 %tmp7, %tmp89 ; <i64> [#uses=1] + ret i64 %tmp10 +} diff --git a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll new file mode 100644 index 0000000..989a751 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc +; PR1382 + +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "powerpc-apple-darwin8.8.0" +@x = global [2 x i32] [ i32 1, i32 2 ] ; <[2 x i32]*> [#uses=1] + +define void @foo() { +entry: + tail call void asm sideeffect "$0 $1", "s,i"( i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*), i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*) ) + ret void +} diff --git a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll new file mode 100644 index 0000000..b64de68 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as < %s | llc -march=ppc32 +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "powerpc-apple-darwin8.8.0" + %struct..0anon = type { i32 } + %struct.A = type { %struct.anon } + %struct.anon = type <{ }> + +define void @bork(%struct.A* %In0P) { +entry: + %tmp56 = bitcast %struct.A* %In0P to float* ; <float*> [#uses=1] + br label %bb + +bb: ; preds = %bb, %entry + %i.035.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] + %tmp8 = getelementptr float* %tmp56, i32 %i.035.0 ; <float*> [#uses=2] + %tmp101112 = bitcast float* %tmp8 to i8* ; <i8*> [#uses=1] + %tmp1617 = bitcast float* %tmp8 to i32* ; <i32*> [#uses=1] + %tmp21 = tail call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* %tmp101112, i32 0, i32* %tmp1617 ) ; <i32> [#uses=0] + %indvar.next = add i32 %i.035.0, 1 ; <i32> [#uses=2] + %exitcond = icmp eq i32 %indvar.next, 4 ; <i1> [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb + ret void +} diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll new file mode 100644 index 0000000..0aebeb9 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll @@ -0,0 +1,68 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*baz | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=ppc32 | grep bl.*quux | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge | grep bl.*baz | wc -l | grep 1 +; RUN: llvm-as < %s | llc -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | wc -l | grep 1 +; Check that tail merging is not the default on ppc, and that -enable-tail-merge works. + +; ModuleID = 'tail.c' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "i686-apple-darwin8" + +define i32 @f(i32 %i, i32 %q) { +entry: + %i_addr = alloca i32 ; <i32*> [#uses=2] + %q_addr = alloca i32 ; <i32*> [#uses=2] + %retval = alloca i32, align 4 ; <i32*> [#uses=1] + "alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 %i, i32* %i_addr + store i32 %q, i32* %q_addr + %tmp = load i32* %i_addr ; <i32> [#uses=1] + %tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1] + %tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1] + %toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1] + br i1 %toBool, label %cond_true, label %cond_false + +cond_true: ; preds = %entry + %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0] + %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0] + %tmp7 = load i32* %q_addr ; <i32> [#uses=1] + %tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1] + %tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1] + %toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1] + br i1 %toBool10, label %cond_true11, label %cond_false15 + +cond_false: ; preds = %entry + %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0] + %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0] + %tmp27 = load i32* %q_addr ; <i32> [#uses=1] + %tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1] + %tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1] + %toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1] + br i1 %toBool210, label %cond_true11, label %cond_false15 + +cond_true11: ; preds = %cond_next + %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0] + %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0] + br label %cond_next18 + +cond_false15: ; preds = %cond_next + %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0] + %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0] + br label %cond_next18 + +cond_next18: ; preds = %cond_false15, %cond_true11 + %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0] + br label %return + +return: ; preds = %cond_next18 + %retval20 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval20 +} + +declare i32 @bar(...) + +declare i32 @baz(...) + +declare i32 @foo(...) + +declare i32 @quux(...) diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll new file mode 100644 index 0000000..0ea76c7 --- /dev/null +++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll @@ -0,0 +1,14 @@ +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "powerpc-apple-darwin8.8.0" + +; RUN: llvm-as < %s | llc -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30} +; PR1473 + +define i8 @foo(i16 zext %a) zext { + %tmp2 = lshr i16 %a, 10 ; <i16> [#uses=1] + %tmp23 = trunc i16 %tmp2 to i8 ; <i8> [#uses=1] + %tmp4 = shl i8 %tmp23, 1 ; <i8> [#uses=1] + %tmp5 = and i8 %tmp4, 2 ; <i8> [#uses=1] + ret i8 %tmp5 +} + diff --git a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll new file mode 100644 index 0000000..58260ec --- /dev/null +++ b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll @@ -0,0 +1,85 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mattr=+altivec + + %struct.XATest = type { float, i16, i8, i8 } + %struct.XArrayRange = type { i8, i8, i8, i8 } + %struct.XBlendMode = type { i16, i16, i16, i16, %struct.GIC4, i16, i16, i8, i8, i8, i8 } + %struct.XClearC = type { double, %struct.GIC4, %struct.GIC4, float, i32 } + %struct.XClipPlane = type { i32, [6 x %struct.GIC4] } + %struct.XCBuffer = type { i16, i16, [8 x i16] } + %struct.XCMatrix = type { [16 x float]*, %struct.XICSS } + %struct.XConvolution = type { %struct.GIC4, %struct.XICSS, i16, i16, float*, i32, i32 } + %struct.XDepthTest = type { i16, i16, i8, i8, i8, i8, double, double } + %struct.XFixedFunctionProgram = type { %struct.PPSToken* } + %struct.XFogMode = type { %struct.GIC4, float, float, float, float, float, i16, i16, i16, i8, i8 } + %struct.XFramebufferAttachment = type { i32, i32, i32, i32 } + %struct.XHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 } + %struct.XHistogram = type { %struct.XFramebufferAttachment*, i32, i16, i8, i8 } + %struct.XICSS = type { %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2 } + %struct.XISubset = type { %struct.XConvolution, %struct.XConvolution, %struct.XConvolution, %struct.XCMatrix, %struct.XMinmax, %struct.XHistogram, %struct.XICSS, %struct.XICSS, %struct.XICSS, %struct.XICSS, i32 } + %struct.XLight = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.XPointLineLimits, float, float, float, float, float, %struct.XPointLineLimits, float, float, float, float, float } + %struct.XLightModel = type { %struct.GIC4, [8 x %struct.XLight], [2 x %struct.XMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 } + %struct.XLightProduct = type { %struct.GIC4, %struct.GIC4, %struct.GIC4 } + %struct.XLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 } + %struct.XLogicOp = type { i16, i8, i8 } + %struct.XMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 } + %struct.XMaterial = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, float, float, float, float, [8 x %struct.XLightProduct], %struct.GIC4, [6 x i32], [2 x i32] } + %struct.XMinmax = type { %struct.XMinmaxTable*, i16, i8, i8 } + %struct.XMinmaxTable = type { %struct.GIC4, %struct.GIC4 } + %struct.XMipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] } + %struct.XMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 } + %struct.XPipelineProgramState = type { i8, i8, i8, i8, %struct.GIC4* } + %struct.XPMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + %struct.XPMode = type { float, float, %struct.XPStore, %struct.XPTransfer, %struct.XPMap, %struct.XISubset, i32, i32 } + %struct.XPPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 } + %struct.XPStore = type { %struct.XPPack, %struct.XPPack } + %struct.XPTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float } + %struct.XPointLineLimits = type { float, float, float } + %struct.XPointMode = type { float, float, float, float, %struct.XPointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 } + %struct.XPGMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 } + %struct.XRegisterCCs = type { i8, i8, i8, i8, i32, [2 x %struct.GIC4], [8 x %struct.XRegisterCCsPerStageState], %struct.XRegisterCCsFinalStageState } + %struct.XRegisterCCsFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XRegisterCCsPerVariableState] } + %struct.XRegisterCCsPerPortionState = type { [4 x %struct.XRegisterCCsPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 } + %struct.XRegisterCCsPerStageState = type { [2 x %struct.XRegisterCCsPerPortionState], [2 x %struct.GIC4] } + %struct.XRegisterCCsPerVariableState = type { i16, i16, i16, i16 } + %struct.XScissorTest = type { %struct.XFramebufferAttachment, i8, i8, i8, i8 } + %struct.XState = type { i16, i16, i16, i16, i32, i32, [256 x %struct.GIC4], [128 x %struct.GIC4], %struct.XViewport, %struct.XXF, %struct.XLightModel, %struct.XATest, %struct.XBlendMode, %struct.XClearC, %struct.XCBuffer, %struct.XDepthTest, %struct.XArrayRange, %struct.XFogMode, %struct.XHintMode, %struct.XLineMode, %struct.XLogicOp, %struct.XMaskMode, %struct.XPMode, %struct.XPointMode, %struct.XPGMode, %struct.XScissorTest, i32, %struct.XStencilTest, [16 x %struct.XTMode], %struct.XArrayRange, [8 x %struct.XTCoordGen], %struct.XClipPlane, %struct.XMultisample, %struct.XRegisterCCs, %struct.XArrayRange, %struct.XArrayRange, [3 x %struct.XPipelineProgramState], %struct.XXFFeedback, i32*, %struct.XFixedFunctionProgram, [3 x i32] } + %struct.XStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] } + %struct.XTCoordGen = type { { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, i8, i8, i8, i8 } + %struct.XTGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] } + %struct.XTLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* } + %struct.XTMode = type { %struct.GIC4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float } + %struct.XTParamState = type { i16, i16, i16, i16, i16, i16, %struct.GIC4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* } + %struct.XTRec = type { %struct.XTState*, float, float, float, float, %struct.XMipmaplevel*, %struct.XMipmaplevel*, i32, i32, i32, i32, i32, i32, i32, [2 x %struct.PPSToken] } + %struct.XTState = type { i16, i8, i8, i16, i16, float, i32, %struct.GISWRSurface*, %struct.XTParamState, %struct.XTGeomState, %struct.XTLevel, [6 x [15 x %struct.XTLevel]] } + %struct.XXF = type { [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 } + %struct.XXFFeedback = type { i8, i8, i8, i8, [16 x i32], [16 x i32] } + %struct.XViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float } + %struct.GIC4 = type { float, float, float, float } + %struct.GISWRSurface = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 } + %struct.GTCoord2 = type { float, float } + %struct.GVMFPContext = type { float, i32, i32, i32, float, [3 x float] } + %struct.GVMFPStack = type { [8 x i8*], i8*, i8*, i32, i32, { <4 x float> }, { <4 x float> }, <4 x i32> } + %struct.GVMFGAttrib = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] } + %struct.GVMTs = type { [16 x %struct.XTRec*] } + %struct.PPSToken = type { { i16, i16, i32 } } + %struct._GVMConstants = type { <4 x i32>, <4 x i32>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8] } + +declare <4 x i32> @llvm.ppc.altivec.lvewx(i8*) + +declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>) + +define void @test(%struct.XState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._GVMConstants* %cnstn, %struct.PPSToken* %pstrm, %struct.GVMFPContext* %vmctx, %struct.GVMTs* %txtrs, %struct.GVMFPStack* %fpstk, %struct.GVMFGAttrib* %start, %struct.GVMFGAttrib* %deriv, i32 %fragx, i32 %fragy) { +bb58.i: + %tmp3405.i = getelementptr %struct.XTRec* null, i32 0, i32 1 ; <float*> [#uses=1] + %tmp34053406.i = bitcast float* %tmp3405.i to i8* ; <i8*> [#uses=1] + %tmp3407.i = call <4 x i32> @llvm.ppc.altivec.lvewx( i8* %tmp34053406.i ) ; <<4 x i32>> [#uses=0] + %tmp4146.i = call i32 @llvm.ppc.altivec.vcmpequw.p( i32 3, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32> [#uses=1] + %tmp4147.i = icmp eq i32 %tmp4146.i, 0 ; <i1> [#uses=1] + br i1 %tmp4147.i, label %bb8799.i, label %bb4150.i + +bb4150.i: ; preds = %bb58.i + br label %bb8799.i + +bb8799.i: ; preds = %bb4150.i, %bb58.i + ret void +} diff --git a/test/CodeGen/PowerPC/Frames-align.ll b/test/CodeGen/PowerPC/Frames-align.ll new file mode 100644 index 0000000..a7c02cc --- /dev/null +++ b/test/CodeGen/PowerPC/Frames-align.ll @@ -0,0 +1,16 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {rlwinm r0, r1, 0, 22, 31} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {subfic r0, r0, -16448} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {rldicl r0, r1, 0, 54} + +implementation + +int* %f1() { + %tmp = alloca int, uint 4095, align 1024 + ret int* %tmp +} diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll new file mode 100644 index 0000000..205cf9a --- /dev/null +++ b/test/CodeGen/PowerPC/Frames-alloca.ll @@ -0,0 +1,55 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {stw r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {stwu r1, -64(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | grep {lwz r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {lwz r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {stw r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {stwu r1, -64(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {lwz r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {lwz r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {std r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {stdu r1, -112(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {ld r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {ld r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {std r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {stdu r1, -112(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {ld r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {ld r31, 40(r1)} + + +implementation + +int* %f1(uint %n) { + %tmp = alloca int, uint %n + ret int* %tmp +} diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll new file mode 100644 index 0000000..1f58fe0 --- /dev/null +++ b/test/CodeGen/PowerPC/Frames-large.ll @@ -0,0 +1,79 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: not grep {stw r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | grep {lis r0, -1} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {ori r0, r0, 32704} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {stwux r1, r1, r0} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {lwz r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: not grep {lwz r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {stw r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {lis r0, -1} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {ori r0, r0, 32704} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {stwux r1, r1, r0} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {lwz r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {lwz r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: not grep {std r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {lis r0, -1} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {ori r0, r0, 32656} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {stdux r1, r1, r0} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {ld r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 | \ +; RUN: not grep {ld r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {std r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {lis r0, -1} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {ori r0, r0, 32656} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {stdux r1, r1, r0} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {ld r1, 0(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | \ +; RUN: grep {ld r31, 40(r1)} + + +implementation + +int* %f1() { + %tmp = alloca int, uint 8191 + ret int* %tmp +} diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll new file mode 100644 index 0000000..9de1bde --- /dev/null +++ b/test/CodeGen/PowerPC/Frames-leaf.ll @@ -0,0 +1,40 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: not grep {stw r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: not grep {stwu r1, -.*(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: not grep {addi r1, r1, } +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: not grep {lwz r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \ +; RUN: not grep {stw r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \ +; RUN: not grep {stwu r1, -.*(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \ +; RUN: not grep {addi r1, r1, } +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -disable-fp-elim | \ +; RUN: not grep {lwz r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \ +; RUN: not grep {std r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \ +; RUN: not grep {stdu r1, -.*(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \ +; RUN: not grep {addi r1, r1, } +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \ +; RUN: not grep {ld r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \ +; RUN: not grep {stw r31, 40(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \ +; RUN: not grep {stdu r1, -.*(r1)} +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \ +; RUN: not grep {addi r1, r1, } +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -disable-fp-elim | \ +; RUN: not grep {ld r31, 40(r1)} + + +implementation + +int* %f1() { + %tmp = alloca int, uint 2 + ret int* %tmp +} diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll new file mode 100644 index 0000000..549083a --- /dev/null +++ b/test/CodeGen/PowerPC/Frames-small.ll @@ -0,0 +1,34 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -f +; RUN not grep {stw r31, 20(r1)} %t1 +; RUN: grep {stwu r1, -16448(r1)} %t1 +; RUN: grep {addi r1, r1, 16448} %t1 +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: not grep {lwz r31, 20(r1)} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ +; RUN: -o %t2 -f +; RUN: grep {stw r31, 20(r1)} %t2 +; RUN: grep {stwu r1, -16448(r1)} %t2 +; RUN: grep {addi r1, r1, 16448} %t2 +; RUN: grep {lwz r31, 20(r1)} %t2 +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 -f +; RUN: not grep {std r31, 40(r1)} %t3 +; RUN: grep {stdu r1, -16496(r1)} %t3 +; RUN: grep {addi r1, r1, 16496} %t3 +; RUN: not grep {ld r31, 40(r1)} %t3 +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ +; RUN: -o %t4 -f +; RUN: grep {std r31, 40(r1)} %t4 +; RUN: grep {stdu r1, -16496(r1)} %t4 +; RUN: grep {addi r1, r1, 16496} %t4 +; RUN: grep {ld r31, 40(r1)} %t4 + +implementation + +int* %f1() { + %tmp = alloca int, uint 4095 + ret int* %tmp +} diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll new file mode 100644 index 0000000..1705379 --- /dev/null +++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin | \ +; RUN: grep {stw r3, 32751} +; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \ +; RUN: grep {stw r3, 32751} +; RUN: llvm-as < %s | llc -march=ppc64 -mtriple=powerpc-apple-darwin | \ +; RUN: grep {std r2, 9024} + +define void @test() { + store i32 0, i32* inttoptr (i64 48725999 to i32*) + ret void +} + +define void @test2() { + store i64 0, i64* inttoptr (i64 74560 to i64*) + ret void +} + diff --git a/test/CodeGen/PowerPC/addc.ll b/test/CodeGen/PowerPC/addc.ll new file mode 100644 index 0000000..b268389 --- /dev/null +++ b/test/CodeGen/PowerPC/addc.ll @@ -0,0 +1,27 @@ +; All of these should be codegen'd without loading immediates +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f +; RUN: grep addc %t | wc -l | grep 1 +; RUN: grep adde %t | wc -l | grep 1 +; RUN: grep addze %t | wc -l | grep 1 +; RUN: grep addme %t | wc -l | grep 1 +; RUN: grep addic %t | wc -l | grep 2 + +implementation ; Functions: + +long %add_ll(long %a, long %b) { +entry: + %tmp.2 = add long %b, %a ; <long> [#uses=1] + ret long %tmp.2 +} + +long %add_l_5(long %a) { +entry: + %tmp.1 = add long %a, 5 ; <long> [#uses=1] + ret long %tmp.1 +} + +long %add_l_m5(long %a) { +entry: + %tmp.1 = add long %a, -5 ; <long> [#uses=1] + ret long %tmp.1 +} diff --git a/test/CodeGen/PowerPC/addi-reassoc.ll b/test/CodeGen/PowerPC/addi-reassoc.ll new file mode 100644 index 0000000..753f628 --- /dev/null +++ b/test/CodeGen/PowerPC/addi-reassoc.ll @@ -0,0 +1,20 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep addi + + %struct.X = type { [5 x sbyte] } +implementation ; Functions: + +int %test1([4 x int]* %P, int %i) { + %tmp.2 = add int %i, 2 ; <int> [#uses=1] + %tmp.4 = getelementptr [4 x int]* %P, int %tmp.2, int 1 + %tmp.5 = load int* %tmp.4 + ret int %tmp.5 +} + +int %test2(%struct.X* %P, int %i) { + %tmp.2 = add int %i, 2 + %tmp.5 = getelementptr %struct.X* %P, int %tmp.2, uint 0, int 1 + %tmp.6 = load sbyte* %tmp.5 + %tmp.7 = cast sbyte %tmp.6 to int + ret int %tmp.7 +} + diff --git a/test/CodeGen/PowerPC/align.ll b/test/CodeGen/PowerPC/align.ll new file mode 100644 index 0000000..caf4a5d --- /dev/null +++ b/test/CodeGen/PowerPC/align.ll @@ -0,0 +1,12 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep align.4 | wc -l | grep 1 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep align.2 | wc -l | grep 1 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep align.3 | wc -l | grep 1 + + +%A = global <4 x uint> < uint 10, uint 20, uint 30, uint 40 > +%B = global float 1.000000e+02 +%C = global double 2.000000e+03 + diff --git a/test/CodeGen/PowerPC/and-branch.ll b/test/CodeGen/PowerPC/and-branch.ll new file mode 100644 index 0000000..4b0e7fa --- /dev/null +++ b/test/CodeGen/PowerPC/and-branch.ll @@ -0,0 +1,18 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mfcr + +void %foo(int %X, int %Y, int %Z) { +entry: + %tmp = seteq int %X, 0 ; <bool> [#uses=1] + %tmp3 = setlt int %Y, 5 ; <bool> [#uses=1] + %tmp4 = and bool %tmp3, %tmp ; <bool> [#uses=1] + br bool %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: ; preds = %entry + %tmp5 = tail call int (...)* %bar( ) ; <int> [#uses=0] + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +declare int %bar(...) diff --git a/test/CodeGen/PowerPC/and-elim.ll b/test/CodeGen/PowerPC/and-elim.ll new file mode 100644 index 0000000..f85b3d8 --- /dev/null +++ b/test/CodeGen/PowerPC/and-elim.ll @@ -0,0 +1,18 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwin + +define void @test(i8* %P) { + %W = load i8* %P + %X = shl i8 %W, 1 + %Y = add i8 %X, 2 + %Z = and i8 %Y, 254 ; dead and + store i8 %Z, i8* %P + ret void +} + +define i16 @test2(i16 zext %crc) zext { + ; No and's should be needed for the i16s here. + %tmp.1 = lshr i16 %crc, 1 + %tmp.7 = xor i16 %tmp.1, 40961 + ret i16 %tmp.7 +} + diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll new file mode 100644 index 0000000..b1d9fcb --- /dev/null +++ b/test/CodeGen/PowerPC/and-imm.ll @@ -0,0 +1,12 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep {ori\\|lis} + +int %test(int %X) { + %Y = and int %X, 32769 ; andi. r3, r3, 32769 + ret int %Y +} + +int %test2(int %X) { + %Y = and int %X, -2147418112 ; andis. r3, r3, 32769 + ret int %Y +} + diff --git a/test/CodeGen/PowerPC/and_add.ll b/test/CodeGen/PowerPC/and_add.ll new file mode 100644 index 0000000..1f6428a --- /dev/null +++ b/test/CodeGen/PowerPC/and_add.ll @@ -0,0 +1,12 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f +; RUN: grep slwi %t +; RUN: not grep addi %t +; RUN: not grep rlwinm %t + +int %test(int %A) { + %B = mul int %A, 8 ;; shift + %C = add int %B, 7 ;; dead, no demanded bits. + %D = and int %C, -8 ;; dead once add is gone. + ret int %D +} + diff --git a/test/CodeGen/PowerPC/and_sext.ll b/test/CodeGen/PowerPC/and_sext.ll new file mode 100644 index 0000000..ac27798 --- /dev/null +++ b/test/CodeGen/PowerPC/and_sext.ll @@ -0,0 +1,28 @@ +; These tests should not contain a sign extend. +; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsh +; RUN: llvm-as < %s | llc -march=ppc32 | not grep extsb + +define i32 @test1(i32 %mode.0.i.0) { + %tmp.79 = trunc i32 %mode.0.i.0 to i16 + %tmp.80 = sext i16 %tmp.79 to i32 + %tmp.81 = and i32 %tmp.80, 24 + ret i32 %tmp.81 +} + +define i16 @test2(i16 sext %X, i16 sext %x) sext { + %tmp = sext i16 %X to i32 + %tmp1 = sext i16 %x to i32 + %tmp2 = add i32 %tmp, %tmp1 + %tmp4 = ashr i32 %tmp2, 1 + %tmp5 = trunc i32 %tmp4 to i16 + %tmp45 = sext i16 %tmp5 to i32 + %retval = trunc i32 %tmp45 to i16 + ret i16 %retval +} + +define i16 @test3(i32 zext %X) sext { + %tmp1 = lshr i32 %X, 16 + %tmp2 = trunc i32 %tmp1 to i16 + ret i16 %tmp2 +} + diff --git a/test/CodeGen/PowerPC/and_sra.ll b/test/CodeGen/PowerPC/and_sra.ll new file mode 100644 index 0000000..abfa9f1 --- /dev/null +++ b/test/CodeGen/PowerPC/and_sra.ll @@ -0,0 +1,26 @@ +; Neither of these functions should contain algebraic right shifts +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep srawi + +int %test1(uint %mode.0.i.0) { + %tmp.79 = cast uint %mode.0.i.0 to int ; <sbyte> [#uses=1] + %tmp.80 = shr int %tmp.79, ubyte 15 ; <int> [#uses=1] + %tmp.81 = and int %tmp.80, 24 ; <int> [#uses=1] + ret int %tmp.81 +} + +int %test2(uint %mode.0.i.0) { + %tmp.79 = cast uint %mode.0.i.0 to int ; <sbyte> [#uses=1] + %tmp.80 = shr int %tmp.79, ubyte 15 ; <int> [#uses=1] + %tmp.81 = shr uint %mode.0.i.0, ubyte 16 + %tmp.82 = cast uint %tmp.81 to int + %tmp.83 = and int %tmp.80, %tmp.82 ; <int> [#uses=1] + ret int %tmp.83 +} + +uint %test3(int %specbits.6.1) { + %tmp.2540 = shr int %specbits.6.1, ubyte 11 ; <int> [#uses=1] + %tmp.2541 = cast int %tmp.2540 to uint ; <uint> [#uses=1] + %tmp.2542 = shl uint %tmp.2541, ubyte 13 ; <uint> [#uses=1] + %tmp.2543 = and uint %tmp.2542, 8192 ; <uint> [#uses=1] + ret uint %tmp.2543 +} diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll new file mode 100644 index 0000000..d239357 --- /dev/null +++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {addc 4, 4, 6} +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {adde 3, 3, 5} + +define i64 @foo(i64 %x, i64 %y) { + %z = add i64 %x, %y + ret i64 %z +} diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll new file mode 100644 index 0000000..ab136f6 --- /dev/null +++ b/test/CodeGen/PowerPC/big-endian-call-result.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {addic 4, 4, 1} +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {addze 3, 3} + +declare i64 @foo() + +define i64 @bar() +{ + %t = call i64 @foo() + %s = add i64 %t, 1 + ret i64 %s +} diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll new file mode 100644 index 0000000..08589f4 --- /dev/null +++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {li 6, 3} +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {li 4, 2} +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {li 3, 0} +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ +; RUN: grep {mr 5, 3} + +declare void @bar(i64 %x, i64 %y) + +define void @foo() { + call void @bar(i64 2, i64 3) + ret void +} diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll new file mode 100644 index 0000000..ab550a3 --- /dev/null +++ b/test/CodeGen/PowerPC/branch-opt.ll @@ -0,0 +1,93 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep {b LBB.*} | wc -l | grep 4 + +target endian = big +target pointersize = 32 +target triple = "powerpc-apple-darwin8.7.0" + +implementation ; Functions: + +void %foo(int %W, int %X, int %Y, int %Z) { +entry: + %X = cast int %X to uint ; <uint> [#uses=1] + %Y = cast int %Y to uint ; <uint> [#uses=1] + %Z = cast int %Z to uint ; <uint> [#uses=1] + %W = cast int %W to uint ; <uint> [#uses=1] + %tmp1 = and int %W, 1 ; <int> [#uses=1] + %tmp1 = seteq int %tmp1, 0 ; <bool> [#uses=1] + br bool %tmp1, label %cond_false, label %bb5 + +bb: ; preds = %bb5, %bb + %indvar77 = phi uint [ %indvar.next78, %bb ], [ 0, %bb5 ] ; <uint> [#uses=1] + %tmp2 = tail call int (...)* %bar( ) ; <int> [#uses=0] + %indvar.next78 = add uint %indvar77, 1 ; <uint> [#uses=2] + %exitcond79 = seteq uint %indvar.next78, %X ; <bool> [#uses=1] + br bool %exitcond79, label %cond_next48, label %bb + +bb5: ; preds = %entry + %tmp = seteq int %X, 0 ; <bool> [#uses=1] + br bool %tmp, label %cond_next48, label %bb + +cond_false: ; preds = %entry + %tmp10 = and int %W, 2 ; <int> [#uses=1] + %tmp10 = seteq int %tmp10, 0 ; <bool> [#uses=1] + br bool %tmp10, label %cond_false20, label %bb16 + +bb12: ; preds = %bb16, %bb12 + %indvar72 = phi uint [ %indvar.next73, %bb12 ], [ 0, %bb16 ] ; <uint> [#uses=1] + %tmp13 = tail call int (...)* %bar( ) ; <int> [#uses=0] + %indvar.next73 = add uint %indvar72, 1 ; <uint> [#uses=2] + %exitcond74 = seteq uint %indvar.next73, %Y ; <bool> [#uses=1] + br bool %exitcond74, label %cond_next48, label %bb12 + +bb16: ; preds = %cond_false + %tmp18 = seteq int %Y, 0 ; <bool> [#uses=1] + br bool %tmp18, label %cond_next48, label %bb12 + +cond_false20: ; preds = %cond_false + %tmp23 = and int %W, 4 ; <int> [#uses=1] + %tmp23 = seteq int %tmp23, 0 ; <bool> [#uses=1] + br bool %tmp23, label %cond_false33, label %bb29 + +bb25: ; preds = %bb29, %bb25 + %indvar67 = phi uint [ %indvar.next68, %bb25 ], [ 0, %bb29 ] ; <uint> [#uses=1] + %tmp26 = tail call int (...)* %bar( ) ; <int> [#uses=0] + %indvar.next68 = add uint %indvar67, 1 ; <uint> [#uses=2] + %exitcond69 = seteq uint %indvar.next68, %Z ; <bool> [#uses=1] + br bool %exitcond69, label %cond_next48, label %bb25 + +bb29: ; preds = %cond_false20 + %tmp31 = seteq int %Z, 0 ; <bool> [#uses=1] + br bool %tmp31, label %cond_next48, label %bb25 + +cond_false33: ; preds = %cond_false20 + %tmp36 = and int %W, 8 ; <int> [#uses=1] + %tmp36 = seteq int %tmp36, 0 ; <bool> [#uses=1] + br bool %tmp36, label %cond_next48, label %bb42 + +bb38: ; preds = %bb42 + %tmp39 = tail call int (...)* %bar( ) ; <int> [#uses=0] + %indvar.next = add uint %indvar, 1 ; <uint> [#uses=1] + br label %bb42 + +bb42: ; preds = %cond_false33, %bb38 + %indvar = phi uint [ %indvar.next, %bb38 ], [ 0, %cond_false33 ] ; <uint> [#uses=3] + %indvar = cast uint %indvar to int ; <int> [#uses=1] + %W_addr.0 = sub int %W, %indvar ; <int> [#uses=1] + %exitcond = seteq uint %indvar, %W ; <bool> [#uses=1] + br bool %exitcond, label %cond_next48, label %bb38 + +cond_next48: ; preds = %bb, %bb12, %bb25, %bb42, %cond_false33, %bb29, %bb16, %bb5 + %W_addr.1 = phi int [ %W, %bb5 ], [ %W, %bb16 ], [ %W, %bb29 ], [ %W, %cond_false33 ], [ %W_addr.0, %bb42 ], [ %W, %bb25 ], [ %W, %bb12 ], [ %W, %bb ] ; <int> [#uses=1] + %tmp50 = seteq int %W_addr.1, 0 ; <bool> [#uses=1] + br bool %tmp50, label %UnifiedReturnBlock, label %cond_true51 + +cond_true51: ; preds = %cond_next48 + %tmp52 = tail call int (...)* %bar( ) ; <int> [#uses=0] + ret void + +UnifiedReturnBlock: ; preds = %cond_next48 + ret void +} + +declare int %bar(...) diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll new file mode 100644 index 0000000..0c4a117 --- /dev/null +++ b/test/CodeGen/PowerPC/bswap-load-store.ll @@ -0,0 +1,44 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | wc -l | grep 4 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep rlwinm +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep rlwimi +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | \ +; RUN: grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | wc -l | grep 4 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | not grep rlwinm +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 | not grep rlwimi + +void %STWBRX(uint %i, sbyte* %ptr, int %off) { + %tmp1 = getelementptr sbyte* %ptr, int %off + %tmp1 = cast sbyte* %tmp1 to uint* + %tmp13 = tail call uint %llvm.bswap.i32(uint %i) + store uint %tmp13, uint* %tmp1 + ret void +} + +uint %LWBRX(sbyte* %ptr, int %off) { + %tmp1 = getelementptr sbyte* %ptr, int %off + %tmp1 = cast sbyte* %tmp1 to uint* + %tmp = load uint* %tmp1 + %tmp14 = tail call uint %llvm.bswap.i32( uint %tmp ) + ret uint %tmp14 +} + +void %STHBRX(ushort %s, sbyte* %ptr, int %off) { + %tmp1 = getelementptr sbyte* %ptr, int %off + %tmp1 = cast sbyte* %tmp1 to ushort* + %tmp5 = call ushort %llvm.bswap.i16( ushort %s ) + store ushort %tmp5, ushort* %tmp1 + ret void +} + +ushort %LHBRX(sbyte* %ptr, int %off) { + %tmp1 = getelementptr sbyte* %ptr, int %off + %tmp1 = cast sbyte* %tmp1 to ushort* + %tmp = load ushort* %tmp1 + %tmp6 = call ushort %llvm.bswap.i16(ushort %tmp) + ret ushort %tmp6 +} + +declare uint %llvm.bswap.i32(uint) + +declare ushort %llvm.bswap.i16(ushort) diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll new file mode 100644 index 0000000..54cbdae --- /dev/null +++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll @@ -0,0 +1,27 @@ +; There should be exactly one vxor here. +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \ +; RUN: grep vxor | wc -l | grep 1 + +; There should be exactly one vsplti here. +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \ +; RUN: grep vsplti | wc -l | grep 1 + + +void %VXOR(<4 x float>* %P1, <4 x int>* %P2, <4 x float>* %P3) { + %tmp = load <4 x float>* %P3 + %tmp3 = load <4 x float>* %P1 + %tmp4 = mul <4 x float> %tmp, %tmp3 + store <4 x float> %tmp4, <4 x float>* %P3 + store <4 x float> zeroinitializer, <4 x float>* %P1 + store <4 x int> zeroinitializer, <4 x int>* %P2 + ret void +} + +void %VSPLTI(<4 x int>* %P2, <8 x short>* %P3) { + store <4 x int> cast (<16 x sbyte> < sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1, sbyte -1 > to <4 x int>), <4 x int>* %P2 + store <8 x short> < short -1, short -1, short -1, short -1, short -1, short -1, short -1, short -1 >, <8 x short>* %P3 + ret void +} + diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll new file mode 100644 index 0000000..f2a6003 --- /dev/null +++ b/test/CodeGen/PowerPC/calls.ll @@ -0,0 +1,31 @@ +; Test various forms of calls. + +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep {bl } | wc -l | grep 2 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep {bctrl} | wc -l | grep 1 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep {bla } | wc -l | grep 1 + +declare void %foo() + +void %test_direct() { + call void %foo() + ret void +} + +void %test_extsym(sbyte *%P) { + free sbyte* %P + ret void +} + +void %test_indirect(void()* %fp) { + call void %fp() + ret void +} + +void %test_abs() { + %fp = cast int 400 to void()* + call void %fp() + ret void +} diff --git a/test/CodeGen/PowerPC/cmp-cmp.ll b/test/CodeGen/PowerPC/cmp-cmp.ll new file mode 100644 index 0000000..6dbe484 --- /dev/null +++ b/test/CodeGen/PowerPC/cmp-cmp.ll @@ -0,0 +1,15 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mfcr + +void %test(long %X) { + %tmp1 = and long %X, 3 ; <long> [#uses=1] + %tmp = setgt long %tmp1, 2 ; <bool> [#uses=1] + br bool %tmp, label %UnifiedReturnBlock, label %cond_true + +cond_true: ; preds = %entry + tail call void %test(long 0) + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + diff --git a/test/CodeGen/PowerPC/compare-duplicate.ll b/test/CodeGen/PowerPC/compare-duplicate.ll new file mode 100644 index 0000000..df2dfdc --- /dev/null +++ b/test/CodeGen/PowerPC/compare-duplicate.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -mtriple=powerpc-apple-darwin8 | not grep slwi + +define i32 @test(i32 %A, i32 %B) { + %C = sub i32 %B, %A + %D = icmp eq i32 %C, %A + br i1 %D, label %T, label %F +T: + ret i32 19123 +F: + ret i32 %C +} diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll new file mode 100644 index 0000000..b0ef2d3f --- /dev/null +++ b/test/CodeGen/PowerPC/compare-simm.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {cmpwi cr0, r3, -1} + +define i32 @test(i32 %x) { + %c = icmp eq i32 %x, -1 + br i1 %c, label %T, label %F +T: + %A = call i32 @test(i32 123) + %B = add i32 %A, 43 + ret i32 %B +F: + %G = add i32 %x, 1234 + ret i32 %G +} diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll new file mode 100644 index 0000000..4689a62 --- /dev/null +++ b/test/CodeGen/PowerPC/constants.ll @@ -0,0 +1,54 @@ +; All of these routines should be perform optimal load of constants. +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep lis | wc -l | grep 5 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep ori | wc -l | grep 3 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep {li } | wc -l | grep 4 + +implementation ; Functions: + +int %f1() { +entry: + ret int 1 +} + +int %f2() { +entry: + ret int -1 +} + +int %f3() { +entry: + ret int 0 +} + +int %f4() { +entry: + ret int 32767 +} + +int %f5() { +entry: + ret int 65535 +} + +int %f6() { +entry: + ret int 65536 +} + +int %f7() { +entry: + ret int 131071 +} + +int %f8() { +entry: + ret int 2147483647 +} + +int %f9() { +entry: + ret int -2147483648 +} diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll new file mode 100644 index 0000000..3751d66 --- /dev/null +++ b/test/CodeGen/PowerPC/cttz.ll @@ -0,0 +1,12 @@ +; Make sure this testcase does not use ctpop +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep -i cntlzw + +declare uint %llvm.cttz.i32(uint) + +implementation ; Functions: + +uint %bar(uint %x) { +entry: + %tmp.1 = call uint %llvm.cttz.i32( uint %x ) + ret uint %tmp.1 +} diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll new file mode 100644 index 0000000..c8bf47c --- /dev/null +++ b/test/CodeGen/PowerPC/darwin-labels.ll @@ -0,0 +1,8 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc | grep {foo bar":} + +target endian = big +target pointersize = 32 +target triple = "powerpc-apple-darwin8.2.0" + +"foo bar" = global int 4 + diff --git a/test/CodeGen/PowerPC/dg.exp b/test/CodeGen/PowerPC/dg.exp new file mode 100644 index 0000000..22b60bc --- /dev/null +++ b/test/CodeGen/PowerPC/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if { [llvm_supports_target PowerPC] } { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,llx,c,cpp,tr}]] +} diff --git a/test/CodeGen/PowerPC/div-2.ll b/test/CodeGen/PowerPC/div-2.ll new file mode 100644 index 0000000..a3cd73c --- /dev/null +++ b/test/CodeGen/PowerPC/div-2.ll @@ -0,0 +1,29 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep srawi +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep blr + +int %test1(int %X) { + %Y = and int %X, 15 + %Z = div int %Y, 4 + ret int %Z +} + +int %test2(int %W) { + %X = and int %W, 15 + %Y = sub int 16, %X + %Z = div int %Y, 4 + ret int %Z +} + +int %test3(int %W) { + %X = and int %W, 15 + %Y = sub int 15, %X + %Z = div int %Y, 4 + ret int %Z +} + +int %test4(int %W) { + %X = and int %W, 2 + %Y = sub int 5, %X + %Z = div int %Y, 2 + ret int %Z +} diff --git a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll new file mode 100644 index 0000000..359824c --- /dev/null +++ b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll @@ -0,0 +1,94 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep eqv | wc -l | grep 3 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \ +; RUN: grep andc | wc -l | grep 3 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep orc | wc -l | grep 2 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \ +; RUN: grep nor | wc -l | grep 3 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep nand | wc -l | grep 1 + +int %EQV1(int %X, int %Y) { + %A = xor int %X, %Y + %B = xor int %A, -1 + ret int %B +} + +int %EQV2(int %X, int %Y) { + %A = xor int %X, -1 + %B = xor int %A, %Y + ret int %B +} + +int %EQV3(int %X, int %Y) { + %A = xor int %X, -1 + %B = xor int %Y, %A + ret int %B +} + +int %ANDC1(int %X, int %Y) { + %A = xor int %Y, -1 + %B = and int %X, %A + ret int %B +} + +int %ANDC2(int %X, int %Y) { + %A = xor int %X, -1 + %B = and int %A, %Y + ret int %B +} + +int %ORC1(int %X, int %Y) { + %A = xor int %Y, -1 + %B = or int %X, %A + ret int %B +} + +int %ORC2(int %X, int %Y) { + %A = xor int %X, -1 + %B = or int %A, %Y + ret int %B +} + +int %NOR1(int %X) { + %Y = xor int %X, -1 + ret int %Y +} + +int %NOR2(int %X, int %Y) { + %Z = or int %X, %Y + %R = xor int %Z, -1 + ret int %R +} + +int %NAND1(int %X, int %Y) { + %Z = and int %X, %Y + %W = xor int %Z, -1 + ret int %W +} + +void %VNOR(<4 x float>* %P, <4 x float>* %Q) { + %tmp = load <4 x float>* %P + %tmp = cast <4 x float> %tmp to <4 x int> + %tmp2 = load <4 x float>* %Q + %tmp2 = cast <4 x float> %tmp2 to <4 x int> + %tmp3 = or <4 x int> %tmp, %tmp2 + %tmp4 = xor <4 x int> %tmp3, < int -1, int -1, int -1, int -1 > + %tmp4 = cast <4 x int> %tmp4 to <4 x float> + store <4 x float> %tmp4, <4 x float>* %P + ret void +} + +void %VANDC(<4 x float>* %P, <4 x float>* %Q) { + %tmp = load <4 x float>* %P + %tmp = cast <4 x float> %tmp to <4 x int> + %tmp2 = load <4 x float>* %Q + %tmp2 = cast <4 x float> %tmp2 to <4 x int> + %tmp4 = xor <4 x int> %tmp2, < int -1, int -1, int -1, int -1 > + %tmp3 = and <4 x int> %tmp, %tmp4 + %tmp4 = cast <4 x int> %tmp3 to <4 x float> + store <4 x float> %tmp4, <4 x float>* %P + ret void +} + diff --git a/test/CodeGen/PowerPC/extsh.ll b/test/CodeGen/PowerPC/extsh.ll new file mode 100644 index 0000000..0f4f512 --- /dev/null +++ b/test/CodeGen/PowerPC/extsh.ll @@ -0,0 +1,7 @@ +; This should turn into a single extsh +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep extsh | wc -l | grep 1 +int %test(int %X) { + %tmp.81 = shl int %X, ubyte 16 ; <int> [#uses=1] + %tmp.82 = shr int %tmp.81, ubyte 16 ; <int> [#uses=1] + ret int %tmp.82 +} diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll new file mode 100644 index 0000000..da2790b --- /dev/null +++ b/test/CodeGen/PowerPC/fma.ll @@ -0,0 +1,47 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: egrep {fn?madd|fn?msub} | wc -l | grep 8 + +double %test_FMADD1(double %A, double %B, double %C) { + %D = mul double %A, %B + %E = add double %D, %C + ret double %E +} +double %test_FMADD2(double %A, double %B, double %C) { + %D = mul double %A, %B + %E = add double %D, %C + ret double %E +} +double %test_FMSUB(double %A, double %B, double %C) { + %D = mul double %A, %B + %E = sub double %D, %C + ret double %E +} +double %test_FNMADD1(double %A, double %B, double %C) { + %D = mul double %A, %B + %E = add double %D, %C + %F = sub double -0.0, %E + ret double %F +} +double %test_FNMADD2(double %A, double %B, double %C) { + %D = mul double %A, %B + %E = add double %C, %D + %F = sub double -0.0, %E + ret double %F +} +double %test_FNMSUB1(double %A, double %B, double %C) { + %D = mul double %A, %B + %E = sub double %C, %D + ret double %E +} +double %test_FNMSUB2(double %A, double %B, double %C) { + %D = mul double %A, %B + %E = sub double %D, %C + %F = sub double -0.0, %E + ret double %F +} +float %test_FNMSUBS(float %A, float %B, float %C) { + %D = mul float %A, %B + %E = sub float %D, %C + %F = sub float -0.0, %E + ret float %F +} diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll new file mode 100644 index 0000000..5d0ef5f --- /dev/null +++ b/test/CodeGen/PowerPC/fnabs.ll @@ -0,0 +1,11 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fnabs + +declare double %fabs(double) + +implementation + +double %test(double %X) { + %Y = call double %fabs(double %X) + %Z = sub double -0.0, %Y + ret double %Z +} diff --git a/test/CodeGen/PowerPC/fneg.ll b/test/CodeGen/PowerPC/fneg.ll new file mode 100644 index 0000000..a4f49f7 --- /dev/null +++ b/test/CodeGen/PowerPC/fneg.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg + +define double @test1(double %a, double %b, double %c, double %d) { +entry: + %tmp2 = sub double -0.000000e+00, %c ; <double> [#uses=1] + %tmp4 = mul double %tmp2, %d ; <double> [#uses=1] + %tmp7 = mul double %a, %b ; <double> [#uses=1] + %tmp9 = sub double %tmp7, %tmp4 ; <double> [#uses=1] + ret double %tmp9 +} + + diff --git a/test/CodeGen/PowerPC/fnegsel.ll b/test/CodeGen/PowerPC/fnegsel.ll new file mode 100644 index 0000000..b1b0645 --- /dev/null +++ b/test/CodeGen/PowerPC/fnegsel.ll @@ -0,0 +1,8 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep fneg + +double %test_FNEG_sel(double %A, double %B, double %C) { + %D = sub double -0.0, %A + %Cond = setgt double %D, -0.0 + %E = select bool %Cond, double %B, double %C + ret double %E +} diff --git a/test/CodeGen/PowerPC/fold-li.ll b/test/CodeGen/PowerPC/fold-li.ll new file mode 100644 index 0000000..66a900f --- /dev/null +++ b/test/CodeGen/PowerPC/fold-li.ll @@ -0,0 +1,14 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | \ +; RUN: grep -v align | not grep li + +;; Test that immediates are folded into these instructions correctly. + +int %ADD(int %X) { + %Y = add int %X, 65537 + ret int %Y +} + +int %SUB(int %X) { + %Y = sub int %X, 65537 + ret int %Y +} diff --git a/test/CodeGen/PowerPC/fp-branch.ll b/test/CodeGen/PowerPC/fp-branch.ll new file mode 100644 index 0000000..1a371ed --- /dev/null +++ b/test/CodeGen/PowerPC/fp-branch.ll @@ -0,0 +1,20 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fcmp | wc -l | grep 1 + +declare bool %llvm.isunordered.f64(double, double) + +bool %intcoord_cond_next55(double %tmp48.reload) { +newFuncRoot: + br label %cond_next55 + +bb72.exitStub: ; preds = %cond_next55 + ret bool true + +cond_next62.exitStub: ; preds = %cond_next55 + ret bool false + +cond_next55: ; preds = %newFuncRoot + %tmp57 = setge double %tmp48.reload, 1.000000e+00 ; <bool> [#uses=1] + %tmp58 = tail call bool %llvm.isunordered.f64( double %tmp48.reload, double 1.000000e+00 ) ; <bool> [#uses=1] + %tmp59 = or bool %tmp57, %tmp58 ; <bool> [#uses=1] + br bool %tmp59, label %bb72.exitStub, label %cond_next62.exitStub +} diff --git a/test/CodeGen/PowerPC/fp-int-fp.ll b/test/CodeGen/PowerPC/fp-int-fp.ll new file mode 100644 index 0000000..63ebc49 --- /dev/null +++ b/test/CodeGen/PowerPC/fp-int-fp.ll @@ -0,0 +1,26 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep r1 + +double %test1(double %X) { + %Y = cast double %X to long + %Z = cast long %Y to double + ret double %Z +} + +float %test2(double %X) { + %Y = cast double %X to long + %Z = cast long %Y to float + ret float %Z +} + +double %test3(float %X) { + %Y = cast float %X to long + %Z = cast long %Y to double + ret double %Z +} + +float %test4(float %X) { + %Y = cast float %X to long + %Z = cast long %Y to float + ret float %Z +} + diff --git a/test/CodeGen/PowerPC/fp_to_uint.ll b/test/CodeGen/PowerPC/fp_to_uint.ll new file mode 100644 index 0000000..83468a4 --- /dev/null +++ b/test/CodeGen/PowerPC/fp_to_uint.ll @@ -0,0 +1,9 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep fctiwz | wc -l | grep 1 + +implementation + +ushort %foo(float %a) { +entry: + %tmp.1 = cast float %a to ushort + ret ushort %tmp.1 +} diff --git a/test/CodeGen/PowerPC/fpcopy.ll b/test/CodeGen/PowerPC/fpcopy.ll new file mode 100644 index 0000000..ce86da8 --- /dev/null +++ b/test/CodeGen/PowerPC/fpcopy.ll @@ -0,0 +1,6 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep fmr + +double %test(float %F) { + %F = cast float %F to double + ret double %F +} diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll new file mode 100644 index 0000000..809077b --- /dev/null +++ b/test/CodeGen/PowerPC/fsqrt.ll @@ -0,0 +1,21 @@ +; fsqrt should be generated when the fsqrt feature is enabled, but not +; otherwise. + +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \ +; RUN: grep {fsqrt f1, f1} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \ +; RUN: grep {fsqrt f1, f1} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \ +; RUN: not grep {fsqrt f1, f1} +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \ +; RUN: not grep {fsqrt f1, f1} + +declare double %llvm.sqrt.f64(double) +double %X(double %Y) { + %Z = call double %llvm.sqrt.f64(double %Y) + ret double %Z +} diff --git a/test/CodeGen/PowerPC/hello.ll b/test/CodeGen/PowerPC/hello.ll new file mode 100644 index 0000000..1d7275f --- /dev/null +++ b/test/CodeGen/PowerPC/hello.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=ppc32 +; RUN: llvm-as < %s | llc -march=ppc64 +; PR1399 + +@.str = internal constant [13 x i8] c"Hello World!\00" + +define i32 @main() { + %tmp2 = tail call i32 @puts( i8* getelementptr ([13 x i8]* @.str, i32 0, i64 0) ) + ret i32 0 +} + +declare i32 @puts(i8*) diff --git a/test/CodeGen/PowerPC/i64_fp.ll b/test/CodeGen/PowerPC/i64_fp.ll new file mode 100644 index 0000000..8720327 --- /dev/null +++ b/test/CodeGen/PowerPC/i64_fp.ll @@ -0,0 +1,25 @@ +; fcfid and fctid should be generated when the 64bit feature is enabled, but not +; otherwise. + +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=+64bit | \ +; RUN: grep fcfid +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=+64bit | \ +; RUN: grep fctidz +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \ +; RUN: grep fcfid +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | \ +; RUN: grep fctidz +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \ +; RUN: not grep fcfid +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mattr=-64bit | \ +; RUN: not grep fctidz +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g4 | \ +; RUN: not grep fcfid +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g4 | \ +; RUN: not grep fctidz + +double %X(double %Y) { + %A = cast double %Y to long + %B = cast long %A to double + ret double %B +} diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll new file mode 100644 index 0000000..677b41b --- /dev/null +++ b/test/CodeGen/PowerPC/iabs.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -stats |& \ +; RUN: grep {4 .*Number of machine instrs printed} + +;; Integer absolute value, should produce something as good as: +;; srawi r2, r3, 31 +;; add r3, r3, r2 +;; xor r3, r3, r2 +;; blr +define i32 @test(i32 %a) { + %tmp1neg = sub i32 0, %a + %b = icmp sgt i32 %a, -1 + %abs = select i1 %b, i32 %a, i32 %tmp1neg + ret i32 %abs +} + diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll new file mode 100644 index 0000000..34594d2 --- /dev/null +++ b/test/CodeGen/PowerPC/inlineasm-copy.ll @@ -0,0 +1,13 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mr + +int %test(int %Y, int %X) { +entry: + %tmp = tail call int asm "foo $0", "=r"( ) ; <int> [#uses=1] + ret int %tmp +} + +int %test2(int %Y, int %X) { +entry: + %tmp1 = tail call int asm "foo $0, $1", "=r,r"( int %X ) ; <int> [#uses=1] + ret int %tmp1 +} diff --git a/test/CodeGen/PowerPC/inverted-bool-compares.ll b/test/CodeGen/PowerPC/inverted-bool-compares.ll new file mode 100644 index 0000000..fbbf6a5 --- /dev/null +++ b/test/CodeGen/PowerPC/inverted-bool-compares.ll @@ -0,0 +1,10 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep xori + +int %test(bool %B, int* %P) { + br bool %B, label %T, label %F +T: + store int 123, int* %P + ret int 0 +F: +ret int 17 +} diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll new file mode 100644 index 0000000..192d738 --- /dev/null +++ b/test/CodeGen/PowerPC/ispositive.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {srwi r3, r3, 31} + +define i32 @test1(i32 %X) { +entry: + icmp slt i32 %X, 0 ; <i1>:0 [#uses=1] + zext i1 %0 to i32 ; <i32>:1 [#uses=1] + ret i32 %1 +} + diff --git a/test/CodeGen/PowerPC/lha.ll b/test/CodeGen/PowerPC/lha.ll new file mode 100644 index 0000000..cc35e8a --- /dev/null +++ b/test/CodeGen/PowerPC/lha.ll @@ -0,0 +1,7 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep lha + +uint %test(short* %a) { + %tmp.1 = load short* %a + %tmp.2 = cast short %tmp.1 to uint + ret uint %tmp.2 +} diff --git a/test/CodeGen/PowerPC/load-constant-addr.ll b/test/CodeGen/PowerPC/load-constant-addr.ll new file mode 100644 index 0000000..65ec782 --- /dev/null +++ b/test/CodeGen/PowerPC/load-constant-addr.ll @@ -0,0 +1,9 @@ +; Should fold the ori into the lfs. +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep lfs +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep ori + +float %test() { + %tmp.i = load float* cast (uint 186018016 to float*) + ret float %tmp.i +} + diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll new file mode 100644 index 0000000..7b90725 --- /dev/null +++ b/test/CodeGen/PowerPC/long-compare.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | grep cntlzw +; RUN: llvm-as < %s | llc -march=ppc32 | not grep xori +; RUN: llvm-as < %s | llc -march=ppc32 | not grep {li } +; RUN: llvm-as < %s | llc -march=ppc32 | not grep {mr } + +define i1 @test(i64 %x) { + %tmp = icmp ult i64 %x, 4294967296 + ret i1 %tmp +} diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll new file mode 100644 index 0000000..8aa7aa2 --- /dev/null +++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll @@ -0,0 +1,17 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep li.*16 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep addi + +; Codegen lvx (R+16) as t = li 16, lvx t,R +; This shares the 16 between the two loads. + +void %func(<4 x float>* %a, <4 x float>* %b) { + %tmp1 = getelementptr <4 x float>* %b, int 1 + %tmp = load <4 x float>* %tmp1 + %tmp3 = getelementptr <4 x float>* %a, int 1 + %tmp4 = load <4 x float>* %tmp3 + %tmp5 = mul <4 x float> %tmp, %tmp4 + %tmp8 = load <4 x float>* %b + %tmp9 = add <4 x float> %tmp5, %tmp8 + store <4 x float> %tmp9, <4 x float>* %a + ret void +} diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll new file mode 100644 index 0000000..4d3ebe9 --- /dev/null +++ b/test/CodeGen/PowerPC/mem_update.ll @@ -0,0 +1,68 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -enable-ppc-preinc | \ +; RUN: not grep addi +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc64 -enable-ppc-preinc | \ +; RUN: not grep addi +%Glob = global ulong 4 + +int *%test0(int *%X, int *%dest) { + %Y = getelementptr int* %X, int 4 + %A = load int* %Y + store int %A, int* %dest + ret int* %Y +} + +int *%test1(int *%X, int *%dest) { + %Y = getelementptr int* %X, int 4 + %A = load int* %Y + store int %A, int* %dest + ret int* %Y +} + +short *%test2(short *%X, int *%dest) { + %Y = getelementptr short* %X, int 4 + %A = load short* %Y + %B = cast short %A to int + store int %B, int* %dest + ret short* %Y +} + +ushort *%test3(ushort *%X, int *%dest) { + %Y = getelementptr ushort* %X, int 4 + %A = load ushort* %Y + %B = cast ushort %A to int + store int %B, int* %dest + ret ushort* %Y +} + +short *%test3a(short *%X, long *%dest) { + %Y = getelementptr short* %X, int 4 + %A = load short* %Y + %B = cast short %A to long + store long %B, long* %dest + ret short* %Y +} + +long *%test4(long *%X, long *%dest) { + %Y = getelementptr long* %X, int 4 + %A = load long* %Y + store long %A, long* %dest + ret long* %Y +} + +ushort *%test5(ushort *%X) { + %Y = getelementptr ushort* %X, int 4 + store ushort 7, ushort* %Y + ret ushort* %Y +} + +ulong *%test6(ulong *%X, ulong %A) { + %Y = getelementptr ulong* %X, int 4 + store ulong %A, ulong* %Y + ret ulong* %Y +} + +ulong *%test7(ulong *%X, ulong %A) { + store ulong %A, ulong* %Glob + ret ulong *%Glob +} + diff --git a/test/CodeGen/PowerPC/mul-neg-power-2.ll b/test/CodeGen/PowerPC/mul-neg-power-2.ll new file mode 100644 index 0000000..b9be1cc --- /dev/null +++ b/test/CodeGen/PowerPC/mul-neg-power-2.ll @@ -0,0 +1,8 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep mul + +int %test1(int %a) { + %tmp.1 = mul int %a, -2 ; <int> [#uses=1] + %tmp.2 = add int %tmp.1, 63 ; <int> [#uses=1] + ret int %tmp.2 +} + diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll new file mode 100644 index 0000000..967905d --- /dev/null +++ b/test/CodeGen/PowerPC/mulhs.ll @@ -0,0 +1,18 @@ +; All of these ands and shifts should be folded into rlwimi's +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f +; RUN: not grep mulhwu %t +; RUN: not grep srawi %t +; RUN: not grep add %t +; RUN: grep mulhw %t | wc -l | grep 1 + +implementation ; Functions: + +int %mulhs(int %a, int %b) { +entry: + %tmp.1 = cast int %a to ulong ; <ulong> [#uses=1] + %tmp.3 = cast int %b to ulong ; <ulong> [#uses=1] + %tmp.4 = mul ulong %tmp.3, %tmp.1 ; <ulong> [#uses=1] + %tmp.6 = shr ulong %tmp.4, ubyte 32 ; <ulong> [#uses=1] + %tmp.7 = cast ulong %tmp.6 to int ; <int> [#uses=1] + ret int %tmp.7 +} diff --git a/test/CodeGen/PowerPC/neg.ll b/test/CodeGen/PowerPC/neg.ll new file mode 100644 index 0000000..7119f6c --- /dev/null +++ b/test/CodeGen/PowerPC/neg.ll @@ -0,0 +1,6 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep neg + +int %test(int %X) { + %Y = sub int 0, %X + ret int %Y +} diff --git a/test/CodeGen/PowerPC/or-addressing-mode.ll b/test/CodeGen/PowerPC/or-addressing-mode.ll new file mode 100644 index 0000000..e448140 --- /dev/null +++ b/test/CodeGen/PowerPC/or-addressing-mode.ll @@ -0,0 +1,22 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc-apple-darwin8 | not grep ori +; RUN: llvm-upgrade < %s | llvm-as | llc -mtriple=powerpc-apple-darwin8 | not grep rlwimi + +int %test1(sbyte* %P) { ;; or -> lwzx + %tmp.2.i = cast sbyte* %P to uint + %tmp.4.i = and uint %tmp.2.i, 4294901760 + %tmp.10.i = shr uint %tmp.2.i, ubyte 5 + %tmp.11.i = and uint %tmp.10.i, 2040 + %tmp.13.i = or uint %tmp.11.i, %tmp.4.i + %tmp.14.i = cast uint %tmp.13.i to int* + %tmp.3 = load int* %tmp.14.i + ret int %tmp.3 +} + +int %test2(int %P) { ;; or -> lwz + %tmp.2 = shl int %P, ubyte 4 + %tmp.3 = or int %tmp.2, 2 + %tmp.4 = cast int %tmp.3 to int* + %tmp.5 = load int* %tmp.4 + ret int %tmp.5 +} + diff --git a/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/test/CodeGen/PowerPC/reg-coalesce-simple.ll new file mode 100644 index 0000000..60e9458 --- /dev/null +++ b/test/CodeGen/PowerPC/reg-coalesce-simple.ll @@ -0,0 +1,11 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep or + +%struct.foo = type { int, int, [0 x ubyte] } +int %test(%struct.foo* %X) { + %tmp1 = getelementptr %struct.foo* %X, int 0, uint 2, int 100 + %tmp = load ubyte* %tmp1 ; <ubyte> [#uses=1] + %tmp2 = cast ubyte %tmp to int ; <int> [#uses=1] + ret int %tmp2} + + + diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll new file mode 100644 index 0000000..8e6b1d6 --- /dev/null +++ b/test/CodeGen/PowerPC/rlwimi-commute.ll @@ -0,0 +1,26 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep {or } + +; Make sure there is no register-register copies here. + +void %test1(int *%A, int *%B, int *%D, int* %E) { + %A = load int* %A + %B = load int* %B + %X = and int %A, 15 + %Y = and int %B, -16 + %Z = or int %X, %Y + store int %Z, int* %D + store int %A, int* %E + ret void +} + +void %test2(int *%A, int *%B, int *%D, int* %E) { + %A = load int* %A + %B = load int* %B + %X = and int %A, 15 + %Y = and int %B, -16 + %Z = or int %X, %Y + store int %Z, int* %D + store int %B, int* %E + ret void +} diff --git a/test/CodeGen/PowerPC/rlwimi.ll b/test/CodeGen/PowerPC/rlwimi.ll new file mode 100644 index 0000000..92afcf9 --- /dev/null +++ b/test/CodeGen/PowerPC/rlwimi.ll @@ -0,0 +1,72 @@ +; All of these ands and shifts should be folded into rlwimi's +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep and +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | grep rlwimi | wc -l | grep 8 + +implementation ; Functions: + +int %test1(int %x, int %y) { +entry: + %tmp.3 = shl int %x, ubyte 16 ; <int> [#uses=1] + %tmp.7 = and int %y, 65535 ; <int> [#uses=1] + %tmp.9 = or int %tmp.7, %tmp.3 ; <int> [#uses=1] + ret int %tmp.9 +} + +int %test2(int %x, int %y) { +entry: + %tmp.7 = and int %x, 65535 ; <int> [#uses=1] + %tmp.3 = shl int %y, ubyte 16 ; <int> [#uses=1] + %tmp.9 = or int %tmp.7, %tmp.3 ; <int> [#uses=1] + ret int %tmp.9 +} + +uint %test3(uint %x, uint %y) { +entry: + %tmp.3 = shr uint %x, ubyte 16 ; <uint> [#uses=1] + %tmp.6 = and uint %y, 4294901760 ; <uint> [#uses=1] + %tmp.7 = or uint %tmp.6, %tmp.3 ; <uint> [#uses=1] + ret uint %tmp.7 +} + +uint %test4(uint %x, uint %y) { +entry: + %tmp.6 = and uint %x, 4294901760 ; <uint> [#uses=1] + %tmp.3 = shr uint %y, ubyte 16 ; <uint> [#uses=1] + %tmp.7 = or uint %tmp.6, %tmp.3 ; <uint> [#uses=1] + ret uint %tmp.7 +} + +int %test5(int %x, int %y) { +entry: + %tmp.3 = shl int %x, ubyte 1 ; <int> [#uses=1] + %tmp.4 = and int %tmp.3, -65536 ; <int> [#uses=1] + %tmp.7 = and int %y, 65535 ; <int> [#uses=1] + %tmp.9 = or int %tmp.4, %tmp.7 ; <int> [#uses=1] + ret int %tmp.9 +} + +int %test6(int %x, int %y) { +entry: + %tmp.7 = and int %x, 65535 ; <int> [#uses=1] + %tmp.3 = shl int %y, ubyte 1 ; <int> [#uses=1] + %tmp.4 = and int %tmp.3, -65536 ; <int> [#uses=1] + %tmp.9 = or int %tmp.4, %tmp.7 ; <int> [#uses=1] + ret int %tmp.9 +} + +int %test7(int %x, int %y) { +entry: + %tmp.2 = and int %x, -65536 ; <int> [#uses=1] + %tmp.5 = and int %y, 65535 ; <int> [#uses=1] + %tmp.7 = or int %tmp.5, %tmp.2 ; <int> [#uses=1] + ret int %tmp.7 +} + +uint %test8(uint %bar) { +entry: + %tmp.3 = shl uint %bar, ubyte 1 ; <uint> [#uses=1] + %tmp.4 = and uint %tmp.3, 2 ; <uint> [#uses=1] + %tmp.6 = and uint %bar, 4294967293 ; <uint> [#uses=1] + %tmp.7 = or uint %tmp.4, %tmp.6 ; <uint> [#uses=1] + ret uint %tmp.7 +} diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll new file mode 100644 index 0000000..c264d2e --- /dev/null +++ b/test/CodeGen/PowerPC/rlwimi2.ll @@ -0,0 +1,31 @@ +; All of these ands and shifts should be folded into rlwimi's +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f +; RUN: grep rlwimi %t | wc -l | grep 3 +; RUN: grep srwi %t | wc -l | grep 1 +; RUN: not grep slwi %t + +implementation ; Functions: + +ushort %test1(uint %srcA, uint %srcB, uint %alpha) { +entry: + %tmp.1 = shl uint %srcA, ubyte 15 ; <uint> [#uses=1] + %tmp.4 = and uint %tmp.1, 32505856 ; <uint> [#uses=1] + %tmp.6 = and uint %srcA, 31775 ; <uint> [#uses=1] + %tmp.7 = or uint %tmp.4, %tmp.6 ; <uint> [#uses=1] + %tmp.9 = shl uint %srcB, ubyte 15 ; <uint> [#uses=1] + %tmp.12 = and uint %tmp.9, 32505856 ; <uint> [#uses=1] + %tmp.14 = and uint %srcB, 31775 ; <uint> [#uses=1] + %tmp.15 = or uint %tmp.12, %tmp.14 ; <uint> [#uses=1] + %tmp.18 = mul uint %tmp.7, %alpha ; <uint> [#uses=1] + %tmp.20 = sub uint 32, %alpha ; <uint> [#uses=1] + %tmp.22 = mul uint %tmp.15, %tmp.20 ; <uint> [#uses=1] + %tmp.23 = add uint %tmp.22, %tmp.18 ; <uint> [#uses=2] + %tmp.27 = shr uint %tmp.23, ubyte 5 ; <uint> [#uses=1] + %tmp.28 = cast uint %tmp.27 to ushort ; <ushort> [#uses=1] + %tmp.29 = and ushort %tmp.28, 31775 ; <ushort> [#uses=1] + %tmp.33 = shr uint %tmp.23, ubyte 20 ; <uint> [#uses=1] + %tmp.34 = cast uint %tmp.33 to ushort ; <ushort> [#uses=1] + %tmp.35 = and ushort %tmp.34, 992 ; <ushort> [#uses=1] + %tmp.36 = or ushort %tmp.29, %tmp.35 ; <ushort> [#uses=1] + ret ushort %tmp.36 +} diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll new file mode 100644 index 0000000..b313ef9 --- /dev/null +++ b/test/CodeGen/PowerPC/rlwimi3.ll @@ -0,0 +1,26 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -stats |& \ +; RUN: grep {Number of machine instrs printed} | grep 12 + +ushort %Trans16Bit(uint %srcA, uint %srcB, uint %alpha) { + %tmp1 = shl uint %srcA, ubyte 15 ; <uint> [#uses=1] + %tmp2 = and uint %tmp1, 32505856 ; <uint> [#uses=1] + %tmp4 = and uint %srcA, 31775 ; <uint> [#uses=1] + %tmp5 = or uint %tmp2, %tmp4 ; <uint> [#uses=1] + %tmp7 = shl uint %srcB, ubyte 15 ; <uint> [#uses=1] + %tmp8 = and uint %tmp7, 32505856 ; <uint> [#uses=1] + %tmp10 = and uint %srcB, 31775 ; <uint> [#uses=1] + %tmp11 = or uint %tmp8, %tmp10 ; <uint> [#uses=1] + %tmp14 = mul uint %tmp5, %alpha ; <uint> [#uses=1] + %tmp16 = sub uint 32, %alpha ; <uint> [#uses=1] + %tmp18 = mul uint %tmp11, %tmp16 ; <uint> [#uses=1] + %tmp19 = add uint %tmp18, %tmp14 ; <uint> [#uses=2] + %tmp21 = shr uint %tmp19, ubyte 5 ; <uint> [#uses=1] + %tmp21 = cast uint %tmp21 to ushort ; <ushort> [#uses=1] + %tmp = and ushort %tmp21, 31775 ; <ushort> [#uses=1] + %tmp23 = shr uint %tmp19, ubyte 20 ; <uint> [#uses=1] + %tmp23 = cast uint %tmp23 to ushort ; <ushort> [#uses=1] + %tmp24 = and ushort %tmp23, 992 ; <ushort> [#uses=1] + %tmp25 = or ushort %tmp, %tmp24 ; <ushort> [#uses=1] + ret ushort %tmp25 +} + diff --git a/test/CodeGen/PowerPC/rlwinm.ll b/test/CodeGen/PowerPC/rlwinm.ll new file mode 100644 index 0000000..32e8f26 --- /dev/null +++ b/test/CodeGen/PowerPC/rlwinm.ll @@ -0,0 +1,64 @@ +; All of these ands and shifts should be folded into rlwimi's +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f +; RUN: not grep and %t +; RUN: not grep srawi %t +; RUN: not grep srwi %t +; RUN: not grep slwi %t +; RUN: grep rlwinm %t | wc -l | grep 8 + +implementation ; Functions: + +int %test1(int %a) { +entry: + %tmp.1 = and int %a, 268431360 ; <int> [#uses=1] + ret int %tmp.1 +} + +int %test2(int %a) { +entry: + %tmp.1 = and int %a, -268435441 ; <int> [#uses=1] + ret int %tmp.1 +} + +int %test3(int %a) { +entry: + %tmp.2 = shr int %a, ubyte 8 ; <int> [#uses=1] + %tmp.3 = and int %tmp.2, 255 ; <int> [#uses=1] + ret int %tmp.3 +} + +uint %test4(uint %a) { +entry: + %tmp.3 = shr uint %a, ubyte 8 ; <uint> [#uses=1] + %tmp.4 = and uint %tmp.3, 255 ; <uint> [#uses=1] + ret uint %tmp.4 +} + +int %test5(int %a) { +entry: + %tmp.2 = shl int %a, ubyte 8 ; <int> [#uses=1] + %tmp.3 = and int %tmp.2, -8388608 ; <int> [#uses=1] + ret int %tmp.3 +} + +int %test6(int %a) { +entry: + %tmp.1 = and int %a, 65280 ; <int> [#uses=1] + %tmp.2 = shr int %tmp.1, ubyte 8 ; <uint> [#uses=1] + ret int %tmp.2 +} + +uint %test7(uint %a) { +entry: + %tmp.1 = and uint %a, 65280 ; <uint> [#uses=1] + %tmp.2 = shr uint %tmp.1, ubyte 8 ; <uint> [#uses=1] + ret uint %tmp.2 +} + +int %test8(int %a) { +entry: + %tmp.1 = and int %a, 16711680 ; <int> [#uses=1] + %tmp.2 = shl int %tmp.1, ubyte 8 ; <int> [#uses=1] + ret int %tmp.2 +} + diff --git a/test/CodeGen/PowerPC/rlwinm2.ll b/test/CodeGen/PowerPC/rlwinm2.ll new file mode 100644 index 0000000..78127f1 --- /dev/null +++ b/test/CodeGen/PowerPC/rlwinm2.ll @@ -0,0 +1,28 @@ +; All of these ands and shifts should be folded into rlw[i]nm instructions +; RUN: llvm-as < %s | llc -march=ppc32 -o %t -f +; RUN: not grep and %t +; RUN: not grep srawi %t +; RUN: not grep srwi %t +; RUN: not grep slwi %t +; RUN: grep rlwnm %t | wc -l | grep 1 +; RUN: grep rlwinm %t | wc -l | grep 1 + +define i32 @test1(i32 %X, i32 %Y) { +entry: + %tmp = trunc i32 %Y to i8 ; <i8> [#uses=2] + %tmp1 = shl i32 %X, %Y ; <i32> [#uses=1] + %tmp2 = sub i32 32, %Y ; <i8> [#uses=1] + %tmp3 = lshr i32 %X, %tmp2 ; <i32> [#uses=1] + %tmp4 = or i32 %tmp1, %tmp3 ; <i32> [#uses=1] + %tmp6 = and i32 %tmp4, 127 ; <i32> [#uses=1] + ret i32 %tmp6 +} + +define i32 @test2(i32 %X) { +entry: + %tmp1 = lshr i32 %X, 27 ; <i32> [#uses=1] + %tmp2 = shl i32 %X, 5 ; <i32> [#uses=1] + %tmp2.masked = and i32 %tmp2, 96 ; <i32> [#uses=1] + %tmp5 = or i32 %tmp1, %tmp2.masked ; <i32> [#uses=1] + ret i32 %tmp5 +} diff --git a/test/CodeGen/PowerPC/rotl-2.ll b/test/CodeGen/PowerPC/rotl-2.ll new file mode 100644 index 0000000..523b5e4 --- /dev/null +++ b/test/CodeGen/PowerPC/rotl-2.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwinm | wc -l | grep 4 +; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwnm | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=ppc32 | not grep or + +define i32 @rotl32(i32 %A, i8 %Amt) { + %shift.upgrd.1 = zext i8 %Amt to i32 ; <i32> [#uses=1] + %B = shl i32 %A, %shift.upgrd.1 ; <i32> [#uses=1] + %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1] + %shift.upgrd.2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1] + %C = lshr i32 %A, %shift.upgrd.2 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotr32(i32 %A, i8 %Amt) { + %shift.upgrd.3 = zext i8 %Amt to i32 ; <i32> [#uses=1] + %B = lshr i32 %A, %shift.upgrd.3 ; <i32> [#uses=1] + %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1] + %shift.upgrd.4 = zext i8 %Amt2 to i32 ; <i32> [#uses=1] + %C = shl i32 %A, %shift.upgrd.4 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotli32(i32 %A) { + %B = shl i32 %A, 5 ; <i32> [#uses=1] + %C = lshr i32 %A, 27 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + +define i32 @rotri32(i32 %A) { + %B = lshr i32 %A, 5 ; <i32> [#uses=1] + %C = shl i32 %A, 27 ; <i32> [#uses=1] + %D = or i32 %B, %C ; <i32> [#uses=1] + ret i32 %D +} + diff --git a/test/CodeGen/PowerPC/rotl.ll b/test/CodeGen/PowerPC/rotl.ll new file mode 100644 index 0000000..aa033cf --- /dev/null +++ b/test/CodeGen/PowerPC/rotl.ll @@ -0,0 +1,37 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwnm | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=ppc32 | grep rlwinm | wc -l | grep 2 + +define i32 @rotlw(i32 %x, i32 %sh) { +entry: + %tmp.7 = sub i32 32, %sh ; <i32> [#uses=1] + %tmp.10 = lshr i32 %x, %tmp.7 ; <i32> [#uses=2] + %tmp.4 = shl i32 %x, %sh ; <i32> [#uses=1] + %tmp.12 = or i32 %tmp.10, %tmp.4 ; <i32> [#uses=1] + ret i32 %tmp.12 +} + +define i32 @rotrw(i32 %x, i32 %sh) { +entry: + %tmp.3 = trunc i32 %sh to i8 ; <i8> [#uses=1] + %tmp.4 = lshr i32 %x, %sh ; <i32> [#uses=2] + %tmp.7 = sub i32 32, %sh ; <i32> [#uses=1] + %tmp.10 = shl i32 %x, %tmp.7 ; <i32> [#uses=1] + %tmp.12 = or i32 %tmp.4, %tmp.10 ; <i32> [#uses=1] + ret i32 %tmp.12 +} + +define i32 @rotlwi(i32 %x) { +entry: + %tmp.7 = lshr i32 %x, 27 ; <i32> [#uses=2] + %tmp.3 = shl i32 %x, 5 ; <i32> [#uses=1] + %tmp.9 = or i32 %tmp.3, %tmp.7 ; <i32> [#uses=1] + ret i32 %tmp.9 +} + +define i32 @rotrwi(i32 %x) { +entry: + %tmp.3 = lshr i32 %x, 5 ; <i32> [#uses=2] + %tmp.7 = shl i32 %x, 27 ; <i32> [#uses=1] + %tmp.9 = or i32 %tmp.3, %tmp.7 ; <i32> [#uses=1] + ret i32 %tmp.9 +} diff --git a/test/CodeGen/PowerPC/select_lt0.ll b/test/CodeGen/PowerPC/select_lt0.ll new file mode 100644 index 0000000..bb5213f --- /dev/null +++ b/test/CodeGen/PowerPC/select_lt0.ll @@ -0,0 +1,51 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep cmp + +int %seli32_1(int %a) { +entry: + %tmp.1 = setlt int %a, 0 + %retval = select bool %tmp.1, int 5, int 0 + ret int %retval +} + +int %seli32_2(int %a, int %b) { +entry: + %tmp.1 = setlt int %a, 0 + %retval = select bool %tmp.1, int %b, int 0 + ret int %retval +} + +int %seli32_3(int %a, short %b) { +entry: + %tmp.2 = cast short %b to int + %tmp.1 = setlt int %a, 0 + %retval = select bool %tmp.1, int %tmp.2, int 0 + ret int %retval +} + +int %seli32_4(int %a, ushort %b) { +entry: + %tmp.2 = cast ushort %b to int + %tmp.1 = setlt int %a, 0 + %retval = select bool %tmp.1, int %tmp.2, int 0 + ret int %retval +} + +short %seli16_1(short %a) { +entry: + %tmp.1 = setlt short %a, 0 + %retval = select bool %tmp.1, short 7, short 0 + ret short %retval +} + +short %seli16_2(int %a, short %b) { + %tmp.1 = setlt int %a, 0 + %retval = select bool %tmp.1, short %b, short 0 + ret short %retval +} + +int %seli32_a_a(int %a) { + %tmp = setlt int %a, 1 + %min = select bool %tmp, int %a, int 0 + ret int %min +} + diff --git a/test/CodeGen/PowerPC/setcc_no_zext.ll b/test/CodeGen/PowerPC/setcc_no_zext.ll new file mode 100644 index 0000000..00e9bf0 --- /dev/null +++ b/test/CodeGen/PowerPC/setcc_no_zext.ll @@ -0,0 +1,8 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep rlwinm + +int %setcc_one_or_zero(int* %a) { +entry: + %tmp.1 = setne int* %a, null + %inc.1 = cast bool %tmp.1 to int + ret int %inc.1 +} diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll new file mode 100644 index 0000000..a574100 --- /dev/null +++ b/test/CodeGen/PowerPC/seteq-0.ll @@ -0,0 +1,9 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ +; RUN: grep {srwi r., r., 5} + +int %eq0(int %a) { + %tmp.1 = seteq int %a, 0 ; <bool> [#uses=1] + %tmp.2 = cast bool %tmp.1 to int ; <int> [#uses=1] + ret int %tmp.2 +} diff --git a/test/CodeGen/PowerPC/shl_elim.ll b/test/CodeGen/PowerPC/shl_elim.ll new file mode 100644 index 0000000..3dc4772 --- /dev/null +++ b/test/CodeGen/PowerPC/shl_elim.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | not grep slwi + +define i32 @test1(i64 %a) { + %tmp29 = lshr i64 %a, 24 ; <i64> [#uses=1] + %tmp23 = trunc i64 %tmp29 to i32 ; <i32> [#uses=1] + %tmp410 = lshr i32 %tmp23, 9 ; <i32> [#uses=1] + %tmp45 = trunc i32 %tmp410 to i16 ; <i16> [#uses=1] + %tmp456 = sext i16 %tmp45 to i32 ; <i32> [#uses=1] + ret i32 %tmp456 +} + diff --git a/test/CodeGen/PowerPC/shl_sext.ll b/test/CodeGen/PowerPC/shl_sext.ll new file mode 100644 index 0000000..af18338 --- /dev/null +++ b/test/CodeGen/PowerPC/shl_sext.ll @@ -0,0 +1,17 @@ +; This test should not contain a sign extend +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep extsb + +int %test(uint %mode.0.i.0) { + %tmp.79 = cast uint %mode.0.i.0 to sbyte ; <sbyte> [#uses=1] + %tmp.80 = cast sbyte %tmp.79 to int ; <int> [#uses=1] + %tmp.81 = shl int %tmp.80, ubyte 24 ; <int> [#uses=1] + ret int %tmp.81 +} + +int %test2(uint %mode.0.i.0) { + %tmp.79 = cast uint %mode.0.i.0 to sbyte ; <sbyte> [#uses=1] + %tmp.80 = cast sbyte %tmp.79 to int ; <int> [#uses=1] + %tmp.81 = shl int %tmp.80, ubyte 16 ; <int> [#uses=1] + %tmp.82 = and int %tmp.81, 16711680 + ret int %tmp.82 +} diff --git a/test/CodeGen/PowerPC/sign_ext_inreg1.ll b/test/CodeGen/PowerPC/sign_ext_inreg1.ll new file mode 100644 index 0000000..0e67f77 --- /dev/null +++ b/test/CodeGen/PowerPC/sign_ext_inreg1.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | grep srwi +; RUN: llvm-as < %s | llc -march=ppc32 | not grep rlwimi + +define i32 @baz(i64 %a) { + %tmp29 = lshr i64 %a, 24 ; <i64> [#uses=1] + %tmp23 = trunc i64 %tmp29 to i32 ; <i32> [#uses=1] + %tmp410 = lshr i32 %tmp23, 9 ; <i32> [#uses=1] + %tmp45 = trunc i32 %tmp410 to i16 ; <i16> [#uses=1] + %tmp456 = sext i16 %tmp45 to i32 ; <i32> [#uses=1] + ret i32 %tmp456 +} + diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll new file mode 100644 index 0000000..e512047 --- /dev/null +++ b/test/CodeGen/PowerPC/small-arguments.ll @@ -0,0 +1,52 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | not grep {extsh\\|rlwinm} + +declare i16 @foo() sext + +define i32 @test1(i16 sext %X) { + %Y = sext i16 %X to i32 ;; dead + ret i32 %Y +} + +define i32 @test2(i16 zext %X) { + %Y = sext i16 %X to i32 + %Z = and i32 %Y, 65535 ;; dead + ret i32 %Z +} + +define void @test3() { + %tmp.0 = call i16 @foo() sext ;; no extsh! + %tmp.1 = icmp slt i16 %tmp.0, 1234 + br i1 %tmp.1, label %then, label %UnifiedReturnBlock + +then: + call i32 @test1(i16 0 sext) + ret void +UnifiedReturnBlock: + ret void +} + +define i32 @test4(i16* %P) { + %tmp.1 = load i16* %P + %tmp.2 = zext i16 %tmp.1 to i32 + %tmp.3 = and i32 %tmp.2, 255 + ret i32 %tmp.3 +} + +define i32 @test5(i16* %P) { + %tmp.1 = load i16* %P + %tmp.2 = bitcast i16 %tmp.1 to i16 + %tmp.3 = zext i16 %tmp.2 to i32 + %tmp.4 = and i32 %tmp.3, 255 + ret i32 %tmp.4 +} + +define i32 @test6(i32* %P) { + %tmp.1 = load i32* %P + %tmp.2 = and i32 %tmp.1, 255 + ret i32 %tmp.2 +} + +define i16 @test7(float %a) zext { + %tmp.1 = fptoui float %a to i16 + ret i16 %tmp.1 +} diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll new file mode 100644 index 0000000..2eebc07 --- /dev/null +++ b/test/CodeGen/PowerPC/stfiwx.ll @@ -0,0 +1,26 @@ +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1 -f +; RUN: grep stfiwx %t1 +; RUN: not grep r1 %t1 +; RUN: llvm-upgrade < %s | llvm-as | \ +; RUN: llc -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \ +; RUN: -o %t2 -f +; RUN: not grep stfiwx %t2 +; RUN: grep r1 %t2 + +void %test(float %a, int* %b) { + %tmp.2 = cast float %a to int + store int %tmp.2, int* %b + ret void +} + +void %test2(float %a, int* %b, int %i) { + %tmp.2 = getelementptr int* %b, int 1 + %tmp.5 = getelementptr int* %b, int %i + %tmp.7 = cast float %a to int + store int %tmp.7, int* %tmp.5 + store int %tmp.7, int* %tmp.2 + store int %tmp.7, int* %b + ret void +} + diff --git a/test/CodeGen/PowerPC/store-load-fwd.ll b/test/CodeGen/PowerPC/store-load-fwd.ll new file mode 100644 index 0000000..761fb5a --- /dev/null +++ b/test/CodeGen/PowerPC/store-load-fwd.ll @@ -0,0 +1,6 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 | not grep lwz +int %test(int* %P) { + store int 1, int* %P + %V = load int* %P + ret int %V +} diff --git a/test/CodeGen/PowerPC/subc.ll b/test/CodeGen/PowerPC/subc.ll new file mode 100644 index 0000000..3624791 --- /dev/null +++ b/test/CodeGen/PowerPC/subc.ll @@ -0,0 +1,26 @@ +; All of these should be codegen'd without loading immediates +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -o %t -f +; RUN: grep subfc %t | wc -l | grep 1 +; RUN: grep subfe %t | wc -l | grep 1 +; RUN: grep subfze %t | wc -l | grep 1 +; RUN: grep subfme %t | wc -l | grep 1 +; RUN: grep subfic %t | wc -l | grep 2 +implementation ; Functions: + +long %sub_ll(long %a, long %b) { +entry: + %tmp.2 = sub long %a, %b ; <long> [#uses=1] + ret long %tmp.2 +} + +long %sub_l_5(long %a) { +entry: + %tmp.1 = sub long 5, %a ; <long> [#uses=1] + ret long %tmp.1 +} + +long %sub_l_m5(long %a) { +entry: + %tmp.1 = sub long -5, %a ; <long> [#uses=1] + ret long %tmp.1 +} diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll new file mode 100644 index 0000000..770dcb6 --- /dev/null +++ b/test/CodeGen/PowerPC/unsafe-math.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -march=ppc32 | grep fmul | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=ppc32 -enable-unsafe-fp-math | \ +; RUN: grep fmul | wc -l | grep 1 + +define double @foo(double %X) { + %tmp1 = mul double %X, 1.23 + %tmp2 = mul double %tmp1, 4.124 + ret double %tmp2 +} + diff --git a/test/CodeGen/PowerPC/vcmp-fold.ll b/test/CodeGen/PowerPC/vcmp-fold.ll new file mode 100644 index 0000000..6ae41a9 --- /dev/null +++ b/test/CodeGen/PowerPC/vcmp-fold.ll @@ -0,0 +1,21 @@ +; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single +; "vcmpbfp.". +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vcmpbfp | wc -l | grep 1 + +void %test(<4 x float>* %x, <4 x float>* %y, int* %P) { +entry: + %tmp = load <4 x float>* %x ; <<4 x float>> [#uses=1] + %tmp2 = load <4 x float>* %y ; <<4 x float>> [#uses=1] + %tmp = call int %llvm.ppc.altivec.vcmpbfp.p( int 1, <4 x float> %tmp, <4 x float> %tmp2 ) ; <int> [#uses=1] + %tmp4 = load <4 x float>* %x ; <<4 x float>> [#uses=1] + %tmp6 = load <4 x float>* %y ; <<4 x float>> [#uses=1] + %tmp = call <4 x int> %llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 ) ; <<4 x int>> [#uses=1] + %tmp7 = cast <4 x int> %tmp to <4 x float> ; <<4 x float>> [#uses=1] + store <4 x float> %tmp7, <4 x float>* %x + store int %tmp, int* %P + ret void +} + +declare int %llvm.ppc.altivec.vcmpbfp.p(int, <4 x float>, <4 x float>) + +declare <4 x int> %llvm.ppc.altivec.vcmpbfp(<4 x float>, <4 x float>) diff --git a/test/CodeGen/PowerPC/vec_br_cmp.ll b/test/CodeGen/PowerPC/vec_br_cmp.ll new file mode 100644 index 0000000..bc60bae --- /dev/null +++ b/test/CodeGen/PowerPC/vec_br_cmp.ll @@ -0,0 +1,23 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f +; RUN: grep vcmpeqfp. %t +; RUN: not grep mfcr %t + +; A predicate compare used immediately by a branch should not generate an mfcr. + +void %test(<4 x float>* %A, <4 x float>* %B) { + %tmp = load <4 x float>* %A + %tmp3 = load <4 x float>* %B + %tmp = tail call int %llvm.ppc.altivec.vcmpeqfp.p( int 1, <4 x float> %tmp, <4 x float> %tmp3 ) + %tmp = seteq int %tmp, 0 + br bool %tmp, label %cond_true, label %UnifiedReturnBlock + +cond_true: + store <4 x float> zeroinitializer, <4 x float>* %B + ret void + +UnifiedReturnBlock: + ret void +} + +declare int %llvm.ppc.altivec.vcmpeqfp.p(int, <4 x float>, <4 x float>) + diff --git a/test/CodeGen/PowerPC/vec_call.ll b/test/CodeGen/PowerPC/vec_call.ll new file mode 100644 index 0000000..b2b91fe --- /dev/null +++ b/test/CodeGen/PowerPC/vec_call.ll @@ -0,0 +1,11 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 + +<4 x int> %test_arg(<4 x int> %A, <4 x int> %B) { + %C = add <4 x int> %A, %B + ret <4 x int> %C +} + +<4 x int> %foo() { + %X = call <4 x int> %test_arg(<4 x int> zeroinitializer, <4 x int> zeroinitializer) + ret <4 x int> %X +} diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll new file mode 100644 index 0000000..507d2d9 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_constants.ll @@ -0,0 +1,47 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep CPI + + +; Tests spltw(0x80000000) and spltw(0x7FFFFFFF). +void %test1(<4 x int>* %P1, <4 x int>* %P2, <4 x float>* %P3) { + %tmp = load <4 x int>* %P1 + %tmp4 = and <4 x int> %tmp, < int -2147483648, int -2147483648, int -2147483648, int -2147483648 > + store <4 x int> %tmp4, <4 x int>* %P1 + %tmp7 = load <4 x int>* %P2 + %tmp9 = and <4 x int> %tmp7, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 > + store <4 x int> %tmp9, <4 x int>* %P2 + %tmp = load <4 x float>* %P3 + %tmp11 = cast <4 x float> %tmp to <4 x int> + %tmp12 = and <4 x int> %tmp11, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 > + %tmp13 = cast <4 x int> %tmp12 to <4 x float> + store <4 x float> %tmp13, <4 x float>* %P3 + ret void +} + +<4 x int> %test_30() { + ret <4 x int> <int 30, int 30, int 30, int 30> +} + +<4 x int> %test_29() { + ret <4 x int> <int 29, int 29, int 29, int 29> +} + +<8 x short> %test_n30() { + ret <8 x short> <short -30, short -30, short -30, short -30, + short -30, short -30, short -30, short -30> +} + +<16 x sbyte> %test_n104() { + ret <16 x sbyte> <sbyte -104, sbyte -104, sbyte -104, sbyte -104, + sbyte -104, sbyte -104, sbyte -104, sbyte -104, + sbyte -104, sbyte -104, sbyte -104, sbyte -104, + sbyte -104, sbyte -104, sbyte -104, sbyte -104> +} + +<4 x int> %test_vsldoi() { + ret <4 x int> <int 512, int 512, int 512, int 512> +} + +<4 x int> %test_rol() { + ret <4 x int> <int -11534337, int -11534337, int -11534337, int -11534337> +} + diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll new file mode 100644 index 0000000..eea1def --- /dev/null +++ b/test/CodeGen/PowerPC/vec_mul.ll @@ -0,0 +1,24 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep mullw +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vmsumuhm + +<4 x int> %test_v4i32(<4 x int>* %X, <4 x int>* %Y) { + %tmp = load <4 x int>* %X + %tmp2 = load <4 x int>* %Y + %tmp3 = mul <4 x int> %tmp, %tmp2 + ret <4 x int> %tmp3 +} + +<8 x short> %test_v8i16(<8 x short>* %X, <8 x short>* %Y) { + %tmp = load <8 x short>* %X + %tmp2 = load <8 x short>* %Y + %tmp3 = mul <8 x short> %tmp, %tmp2 + ret <8 x short> %tmp3 +} + +<16 x sbyte> %test_v16i8(<16 x sbyte>* %X, <16 x sbyte>* %Y) { + %tmp = load <16 x sbyte>* %X + %tmp2 = load <16 x sbyte>* %Y + %tmp3 = mul <16 x sbyte> %tmp, %tmp2 + ret <16 x sbyte> %tmp3 +} + diff --git a/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/test/CodeGen/PowerPC/vec_perf_shuffle.ll new file mode 100644 index 0000000..6177b5f --- /dev/null +++ b/test/CodeGen/PowerPC/vec_perf_shuffle.ll @@ -0,0 +1,42 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vperm + +<4 x float> %test_uu72(<4 x float> *%P1, <4 x float> *%P2) { + %V1 = load <4 x float> *%P1 + %V2 = load <4 x float> *%P2 + ; vmrglw + vsldoi + %V3 = shufflevector <4 x float> %V1, <4 x float> %V2, + <4 x uint> <uint undef, uint undef, uint 7, uint 2> + ret <4 x float> %V3 +} + +<4 x float> %test_30u5(<4 x float> *%P1, <4 x float> *%P2) { + %V1 = load <4 x float> *%P1 + %V2 = load <4 x float> *%P2 + %V3 = shufflevector <4 x float> %V1, <4 x float> %V2, + <4 x uint> <uint 3, uint 0, uint undef, uint 5> + ret <4 x float> %V3 +} + +<4 x float> %test_3u73(<4 x float> *%P1, <4 x float> *%P2) { + %V1 = load <4 x float> *%P1 + %V2 = load <4 x float> *%P2 + %V3 = shufflevector <4 x float> %V1, <4 x float> %V2, + <4 x uint> <uint 3, uint undef, uint 7, uint 3> + ret <4 x float> %V3 +} + +<4 x float> %test_3774(<4 x float> *%P1, <4 x float> *%P2) { + %V1 = load <4 x float> *%P1 + %V2 = load <4 x float> *%P2 + %V3 = shufflevector <4 x float> %V1, <4 x float> %V2, + <4 x uint> <uint 3, uint 7, uint 7, uint 4> + ret <4 x float> %V3 +} + +<4 x float> %test_4450(<4 x float> *%P1, <4 x float> *%P2) { + %V1 = load <4 x float> *%P1 + %V2 = load <4 x float> *%P2 + %V3 = shufflevector <4 x float> %V1, <4 x float> %V2, + <4 x uint> <uint 4, uint 4, uint 5, uint 0> + ret <4 x float> %V3 +} diff --git a/test/CodeGen/PowerPC/vec_shuffle.ll b/test/CodeGen/PowerPC/vec_shuffle.ll new file mode 100644 index 0000000..ba856ee --- /dev/null +++ b/test/CodeGen/PowerPC/vec_shuffle.ll @@ -0,0 +1,506 @@ +; RUN: llvm-upgrade < %s | llvm-as | opt -instcombine | \ +; RUN: llc -march=ppc32 -mcpu=g5 | not grep vperm +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 > %t +; RUN: grep vsldoi %t | wc -l | grep 2 +; RUN: grep vmrgh %t | wc -l | grep 7 +; RUN: grep vmrgl %t | wc -l | grep 6 +; RUN: grep vpkuhum %t | wc -l | grep 1 +; RUN: grep vpkuwum %t | wc -l | grep 1 + +void %VSLDOI_xy(<8 x short>* %A, <8 x short>* %B) { +entry: + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1] + %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1] + %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11] + %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5] + %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1] + %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1] + %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1] + %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1] + %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1] + %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1] + %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1] + %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1] + %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1] + %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1] + %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1] + %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1] + %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1] + %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1] + %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1] + %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1] + %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1] + %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1] + %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1] + %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1] + %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1] + %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1] + %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1] + %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1] + %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1] + %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1] + %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1] + %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1] + %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1] + %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1] + %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1] + %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1] + %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1] + store <8 x short> %tmp33, <8 x short>* %A + ret void +} + +void %VSLDOI_xx(<8 x short>* %A, <8 x short>* %B) { + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1] + %tmp2 = load <8 x short>* %A ; <<8 x short>> [#uses=1] + %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=11] + %tmp2 = cast <8 x short> %tmp2 to <16 x sbyte> ; <<16 x sbyte>> [#uses=5] + %tmp = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1] + %tmp3 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1] + %tmp4 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1] + %tmp5 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1] + %tmp6 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1] + %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1] + %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1] + %tmp9 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1] + %tmp10 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1] + %tmp11 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1] + %tmp12 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1] + %tmp13 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1] + %tmp14 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1] + %tmp15 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1] + %tmp16 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1] + %tmp17 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1] + %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1] + %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1] + %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1] + %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1] + %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1] + %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1] + %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1] + %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1] + %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1] + %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1] + %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1] + %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1] + %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1] + %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1] + %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1] + %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1] + %tmp33 = cast <16 x sbyte> %tmp33 to <8 x short> ; <<8 x short>> [#uses=1] + store <8 x short> %tmp33, <8 x short>* %A + ret void +} + +void %VPERM_promote(<8 x short>* %A, <8 x short>* %B) { +entry: + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=1] + %tmp = cast <8 x short> %tmp to <4 x int> ; <<4 x int>> [#uses=1] + %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=1] + %tmp2 = cast <8 x short> %tmp2 to <4 x int> ; <<4 x int>> [#uses=1] + %tmp3 = call <4 x int> %llvm.ppc.altivec.vperm( <4 x int> %tmp, <4 x int> %tmp2, <16 x sbyte> < sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14, sbyte 14 > ) ; <<4 x int>> [#uses=1] + %tmp3 = cast <4 x int> %tmp3 to <8 x short> ; <<8 x short>> [#uses=1] + store <8 x short> %tmp3, <8 x short>* %A + ret void +} + +declare <4 x int> %llvm.ppc.altivec.vperm(<4 x int>, <4 x int>, <16 x sbyte>) + + +void %tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) { +entry: + %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8] + %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8] + %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1] + %tmp3 = extractelement <16 x sbyte> %tmp2, uint 8 ; <sbyte> [#uses=1] + %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1] + %tmp5 = extractelement <16 x sbyte> %tmp2, uint 9 ; <sbyte> [#uses=1] + %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1] + %tmp7 = extractelement <16 x sbyte> %tmp2, uint 10 ; <sbyte> [#uses=1] + %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1] + %tmp9 = extractelement <16 x sbyte> %tmp2, uint 11 ; <sbyte> [#uses=1] + %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1] + %tmp11 = extractelement <16 x sbyte> %tmp2, uint 12 ; <sbyte> [#uses=1] + %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1] + %tmp13 = extractelement <16 x sbyte> %tmp2, uint 13 ; <sbyte> [#uses=1] + %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1] + %tmp15 = extractelement <16 x sbyte> %tmp2, uint 14 ; <sbyte> [#uses=1] + %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1] + %tmp17 = extractelement <16 x sbyte> %tmp2, uint 15 ; <sbyte> [#uses=1] + %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1] + %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1] + %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1] + %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1] + %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1] + %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1] + %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1] + %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1] + %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1] + %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1] + %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1] + %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1] + %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1] + %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1] + %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1] + %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1] + store <16 x sbyte> %tmp33, <16 x sbyte>* %A + ret void +} + +void %th_l(<8 x short>* %A, <8 x short>* %B) { +entry: + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4] + %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4] + %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1] + %tmp3 = extractelement <8 x short> %tmp2, uint 4 ; <short> [#uses=1] + %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1] + %tmp5 = extractelement <8 x short> %tmp2, uint 5 ; <short> [#uses=1] + %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1] + %tmp7 = extractelement <8 x short> %tmp2, uint 6 ; <short> [#uses=1] + %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1] + %tmp9 = extractelement <8 x short> %tmp2, uint 7 ; <short> [#uses=1] + %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1] + %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1] + %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1] + %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1] + %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1] + %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1] + %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1] + %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1] + store <8 x short> %tmp17, <8 x short>* %A + ret void +} + +void %tw_l(<4 x int>* %A, <4 x int>* %B) { +entry: + %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2] + %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2] + %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1] + %tmp3 = extractelement <4 x int> %tmp2, uint 2 ; <int> [#uses=1] + %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1] + %tmp5 = extractelement <4 x int> %tmp2, uint 3 ; <int> [#uses=1] + %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1] + %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1] + %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1] + %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1] + store <4 x int> %tmp9, <4 x int>* %A + ret void +} + +void %tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) { +entry: + %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=8] + %tmp2 = load <16 x sbyte>* %B ; <<16 x sbyte>> [#uses=8] + %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1] + %tmp3 = extractelement <16 x sbyte> %tmp2, uint 0 ; <sbyte> [#uses=1] + %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1] + %tmp5 = extractelement <16 x sbyte> %tmp2, uint 1 ; <sbyte> [#uses=1] + %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1] + %tmp7 = extractelement <16 x sbyte> %tmp2, uint 2 ; <sbyte> [#uses=1] + %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1] + %tmp9 = extractelement <16 x sbyte> %tmp2, uint 3 ; <sbyte> [#uses=1] + %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1] + %tmp11 = extractelement <16 x sbyte> %tmp2, uint 4 ; <sbyte> [#uses=1] + %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1] + %tmp13 = extractelement <16 x sbyte> %tmp2, uint 5 ; <sbyte> [#uses=1] + %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1] + %tmp15 = extractelement <16 x sbyte> %tmp2, uint 6 ; <sbyte> [#uses=1] + %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1] + %tmp17 = extractelement <16 x sbyte> %tmp2, uint 7 ; <sbyte> [#uses=1] + %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1] + %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1] + %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1] + %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1] + %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1] + %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1] + %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1] + %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1] + %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1] + %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1] + %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1] + %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1] + %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1] + %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1] + %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1] + %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1] + store <16 x sbyte> %tmp33, <16 x sbyte>* %A + ret void +} + +void %th_h(<8 x short>* %A, <8 x short>* %B) { +entry: + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=4] + %tmp2 = load <8 x short>* %B ; <<8 x short>> [#uses=4] + %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1] + %tmp3 = extractelement <8 x short> %tmp2, uint 0 ; <short> [#uses=1] + %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1] + %tmp5 = extractelement <8 x short> %tmp2, uint 1 ; <short> [#uses=1] + %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1] + %tmp7 = extractelement <8 x short> %tmp2, uint 2 ; <short> [#uses=1] + %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1] + %tmp9 = extractelement <8 x short> %tmp2, uint 3 ; <short> [#uses=1] + %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1] + %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1] + %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1] + %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1] + %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1] + %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1] + %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1] + %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1] + store <8 x short> %tmp17, <8 x short>* %A + ret void +} + +void %tw_h(<4 x int>* %A, <4 x int>* %B) { +entry: + %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2] + %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2] + %tmp = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1] + %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1] + %tmp4 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1] + %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1] + %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1] + %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1] + %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1] + %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1] + store <4 x int> %tmp9, <4 x int>* %A + ret void +} + +void %tw_h_flop(<4 x int>* %A, <4 x int>* %B) { + %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2] + %tmp2 = load <4 x int>* %B ; <<4 x int>> [#uses=2] + %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1] + %tmp3 = extractelement <4 x int> %tmp2, uint 0 ; <int> [#uses=1] + %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1] + %tmp5 = extractelement <4 x int> %tmp2, uint 1 ; <int> [#uses=1] + %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1] + %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1] + %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1] + %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1] + store <4 x int> %tmp9, <4 x int>* %A + ret void +} + + +void %VMRG_UNARY_tb_l(<16 x sbyte>* %A, <16 x sbyte>* %B) { +entry: + %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16] + %tmp = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1] + %tmp3 = extractelement <16 x sbyte> %tmp, uint 8 ; <sbyte> [#uses=1] + %tmp4 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1] + %tmp5 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1] + %tmp6 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1] + %tmp7 = extractelement <16 x sbyte> %tmp, uint 10 ; <sbyte> [#uses=1] + %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1] + %tmp9 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1] + %tmp10 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1] + %tmp11 = extractelement <16 x sbyte> %tmp, uint 12 ; <sbyte> [#uses=1] + %tmp12 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1] + %tmp13 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1] + %tmp14 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1] + %tmp15 = extractelement <16 x sbyte> %tmp, uint 14 ; <sbyte> [#uses=1] + %tmp16 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1] + %tmp17 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1] + %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1] + %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1] + %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1] + %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1] + %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1] + %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1] + %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1] + %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1] + %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1] + %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1] + %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1] + %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1] + %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1] + %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1] + %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1] + %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1] + store <16 x sbyte> %tmp33, <16 x sbyte>* %A + ret void +} + +void %VMRG_UNARY_th_l(<8 x short>* %A, <8 x short>* %B) { +entry: + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8] + %tmp = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1] + %tmp3 = extractelement <8 x short> %tmp, uint 4 ; <short> [#uses=1] + %tmp4 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1] + %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1] + %tmp6 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1] + %tmp7 = extractelement <8 x short> %tmp, uint 6 ; <short> [#uses=1] + %tmp8 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1] + %tmp9 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1] + %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1] + %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1] + %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1] + %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1] + %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1] + %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1] + %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1] + %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1] + store <8 x short> %tmp17, <8 x short>* %A + ret void +} + +void %VMRG_UNARY_tw_l(<4 x int>* %A, <4 x int>* %B) { +entry: + %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4] + %tmp = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1] + %tmp3 = extractelement <4 x int> %tmp, uint 2 ; <int> [#uses=1] + %tmp4 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1] + %tmp5 = extractelement <4 x int> %tmp, uint 3 ; <int> [#uses=1] + %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1] + %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1] + %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1] + %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1] + store <4 x int> %tmp9, <4 x int>* %A + ret void +} + +void %VMRG_UNARY_tb_h(<16 x sbyte>* %A, <16 x sbyte>* %B) { +entry: + %tmp = load <16 x sbyte>* %A ; <<16 x sbyte>> [#uses=16] + %tmp = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1] + %tmp3 = extractelement <16 x sbyte> %tmp, uint 0 ; <sbyte> [#uses=1] + %tmp4 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1] + %tmp5 = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1] + %tmp6 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1] + %tmp7 = extractelement <16 x sbyte> %tmp, uint 2 ; <sbyte> [#uses=1] + %tmp8 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1] + %tmp9 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1] + %tmp10 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1] + %tmp11 = extractelement <16 x sbyte> %tmp, uint 4 ; <sbyte> [#uses=1] + %tmp12 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1] + %tmp13 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1] + %tmp14 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1] + %tmp15 = extractelement <16 x sbyte> %tmp, uint 6 ; <sbyte> [#uses=1] + %tmp16 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1] + %tmp17 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1] + %tmp18 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1] + %tmp19 = insertelement <16 x sbyte> %tmp18, sbyte %tmp3, uint 1 ; <<16 x sbyte>> [#uses=1] + %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 2 ; <<16 x sbyte>> [#uses=1] + %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 3 ; <<16 x sbyte>> [#uses=1] + %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 4 ; <<16 x sbyte>> [#uses=1] + %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 5 ; <<16 x sbyte>> [#uses=1] + %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 6 ; <<16 x sbyte>> [#uses=1] + %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 7 ; <<16 x sbyte>> [#uses=1] + %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 8 ; <<16 x sbyte>> [#uses=1] + %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 9 ; <<16 x sbyte>> [#uses=1] + %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 10 ; <<16 x sbyte>> [#uses=1] + %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 11 ; <<16 x sbyte>> [#uses=1] + %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 12 ; <<16 x sbyte>> [#uses=1] + %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 13 ; <<16 x sbyte>> [#uses=1] + %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 14 ; <<16 x sbyte>> [#uses=1] + %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 15 ; <<16 x sbyte>> [#uses=1] + store <16 x sbyte> %tmp33, <16 x sbyte>* %A + ret void +} + +void %VMRG_UNARY_th_h(<8 x short>* %A, <8 x short>* %B) { +entry: + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=8] + %tmp = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1] + %tmp3 = extractelement <8 x short> %tmp, uint 0 ; <short> [#uses=1] + %tmp4 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1] + %tmp5 = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1] + %tmp6 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1] + %tmp7 = extractelement <8 x short> %tmp, uint 2 ; <short> [#uses=1] + %tmp8 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1] + %tmp9 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1] + %tmp10 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1] + %tmp11 = insertelement <8 x short> %tmp10, short %tmp3, uint 1 ; <<8 x short>> [#uses=1] + %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 2 ; <<8 x short>> [#uses=1] + %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 3 ; <<8 x short>> [#uses=1] + %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 4 ; <<8 x short>> [#uses=1] + %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 5 ; <<8 x short>> [#uses=1] + %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 6 ; <<8 x short>> [#uses=1] + %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 7 ; <<8 x short>> [#uses=1] + store <8 x short> %tmp17, <8 x short>* %A + ret void +} + +void %VMRG_UNARY_tw_h(<4 x int>* %A, <4 x int>* %B) { +entry: + %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=4] + %tmp = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1] + %tmp3 = extractelement <4 x int> %tmp, uint 0 ; <int> [#uses=1] + %tmp4 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1] + %tmp5 = extractelement <4 x int> %tmp, uint 1 ; <int> [#uses=1] + %tmp6 = insertelement <4 x int> undef, int %tmp, uint 0 ; <<4 x int>> [#uses=1] + %tmp7 = insertelement <4 x int> %tmp6, int %tmp3, uint 1 ; <<4 x int>> [#uses=1] + %tmp8 = insertelement <4 x int> %tmp7, int %tmp4, uint 2 ; <<4 x int>> [#uses=1] + %tmp9 = insertelement <4 x int> %tmp8, int %tmp5, uint 3 ; <<4 x int>> [#uses=1] + store <4 x int> %tmp9, <4 x int>* %A + ret void +} + +void %VPCKUHUM_unary(<8 x short>* %A, <8 x short>* %B) { +entry: + %tmp = load <8 x short>* %A ; <<8 x short>> [#uses=2] + %tmp = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8] + %tmp3 = cast <8 x short> %tmp to <16 x sbyte> ; <<16 x sbyte>> [#uses=8] + %tmp = extractelement <16 x sbyte> %tmp, uint 1 ; <sbyte> [#uses=1] + %tmp4 = extractelement <16 x sbyte> %tmp, uint 3 ; <sbyte> [#uses=1] + %tmp5 = extractelement <16 x sbyte> %tmp, uint 5 ; <sbyte> [#uses=1] + %tmp6 = extractelement <16 x sbyte> %tmp, uint 7 ; <sbyte> [#uses=1] + %tmp7 = extractelement <16 x sbyte> %tmp, uint 9 ; <sbyte> [#uses=1] + %tmp8 = extractelement <16 x sbyte> %tmp, uint 11 ; <sbyte> [#uses=1] + %tmp9 = extractelement <16 x sbyte> %tmp, uint 13 ; <sbyte> [#uses=1] + %tmp10 = extractelement <16 x sbyte> %tmp, uint 15 ; <sbyte> [#uses=1] + %tmp11 = extractelement <16 x sbyte> %tmp3, uint 1 ; <sbyte> [#uses=1] + %tmp12 = extractelement <16 x sbyte> %tmp3, uint 3 ; <sbyte> [#uses=1] + %tmp13 = extractelement <16 x sbyte> %tmp3, uint 5 ; <sbyte> [#uses=1] + %tmp14 = extractelement <16 x sbyte> %tmp3, uint 7 ; <sbyte> [#uses=1] + %tmp15 = extractelement <16 x sbyte> %tmp3, uint 9 ; <sbyte> [#uses=1] + %tmp16 = extractelement <16 x sbyte> %tmp3, uint 11 ; <sbyte> [#uses=1] + %tmp17 = extractelement <16 x sbyte> %tmp3, uint 13 ; <sbyte> [#uses=1] + %tmp18 = extractelement <16 x sbyte> %tmp3, uint 15 ; <sbyte> [#uses=1] + %tmp19 = insertelement <16 x sbyte> undef, sbyte %tmp, uint 0 ; <<16 x sbyte>> [#uses=1] + %tmp20 = insertelement <16 x sbyte> %tmp19, sbyte %tmp4, uint 1 ; <<16 x sbyte>> [#uses=1] + %tmp21 = insertelement <16 x sbyte> %tmp20, sbyte %tmp5, uint 2 ; <<16 x sbyte>> [#uses=1] + %tmp22 = insertelement <16 x sbyte> %tmp21, sbyte %tmp6, uint 3 ; <<16 x sbyte>> [#uses=1] + %tmp23 = insertelement <16 x sbyte> %tmp22, sbyte %tmp7, uint 4 ; <<16 x sbyte>> [#uses=1] + %tmp24 = insertelement <16 x sbyte> %tmp23, sbyte %tmp8, uint 5 ; <<16 x sbyte>> [#uses=1] + %tmp25 = insertelement <16 x sbyte> %tmp24, sbyte %tmp9, uint 6 ; <<16 x sbyte>> [#uses=1] + %tmp26 = insertelement <16 x sbyte> %tmp25, sbyte %tmp10, uint 7 ; <<16 x sbyte>> [#uses=1] + %tmp27 = insertelement <16 x sbyte> %tmp26, sbyte %tmp11, uint 8 ; <<16 x sbyte>> [#uses=1] + %tmp28 = insertelement <16 x sbyte> %tmp27, sbyte %tmp12, uint 9 ; <<16 x sbyte>> [#uses=1] + %tmp29 = insertelement <16 x sbyte> %tmp28, sbyte %tmp13, uint 10 ; <<16 x sbyte>> [#uses=1] + %tmp30 = insertelement <16 x sbyte> %tmp29, sbyte %tmp14, uint 11 ; <<16 x sbyte>> [#uses=1] + %tmp31 = insertelement <16 x sbyte> %tmp30, sbyte %tmp15, uint 12 ; <<16 x sbyte>> [#uses=1] + %tmp32 = insertelement <16 x sbyte> %tmp31, sbyte %tmp16, uint 13 ; <<16 x sbyte>> [#uses=1] + %tmp33 = insertelement <16 x sbyte> %tmp32, sbyte %tmp17, uint 14 ; <<16 x sbyte>> [#uses=1] + %tmp34 = insertelement <16 x sbyte> %tmp33, sbyte %tmp18, uint 15 ; <<16 x sbyte>> [#uses=1] + %tmp34 = cast <16 x sbyte> %tmp34 to <8 x short> ; <<8 x short>> [#uses=1] + store <8 x short> %tmp34, <8 x short>* %A + ret void +} + +void %VPCKUWUM_unary(<4 x int>* %A, <4 x int>* %B) { +entry: + %tmp = load <4 x int>* %A ; <<4 x int>> [#uses=2] + %tmp = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4] + %tmp3 = cast <4 x int> %tmp to <8 x short> ; <<8 x short>> [#uses=4] + %tmp = extractelement <8 x short> %tmp, uint 1 ; <short> [#uses=1] + %tmp4 = extractelement <8 x short> %tmp, uint 3 ; <short> [#uses=1] + %tmp5 = extractelement <8 x short> %tmp, uint 5 ; <short> [#uses=1] + %tmp6 = extractelement <8 x short> %tmp, uint 7 ; <short> [#uses=1] + %tmp7 = extractelement <8 x short> %tmp3, uint 1 ; <short> [#uses=1] + %tmp8 = extractelement <8 x short> %tmp3, uint 3 ; <short> [#uses=1] + %tmp9 = extractelement <8 x short> %tmp3, uint 5 ; <short> [#uses=1] + %tmp10 = extractelement <8 x short> %tmp3, uint 7 ; <short> [#uses=1] + %tmp11 = insertelement <8 x short> undef, short %tmp, uint 0 ; <<8 x short>> [#uses=1] + %tmp12 = insertelement <8 x short> %tmp11, short %tmp4, uint 1 ; <<8 x short>> [#uses=1] + %tmp13 = insertelement <8 x short> %tmp12, short %tmp5, uint 2 ; <<8 x short>> [#uses=1] + %tmp14 = insertelement <8 x short> %tmp13, short %tmp6, uint 3 ; <<8 x short>> [#uses=1] + %tmp15 = insertelement <8 x short> %tmp14, short %tmp7, uint 4 ; <<8 x short>> [#uses=1] + %tmp16 = insertelement <8 x short> %tmp15, short %tmp8, uint 5 ; <<8 x short>> [#uses=1] + %tmp17 = insertelement <8 x short> %tmp16, short %tmp9, uint 6 ; <<8 x short>> [#uses=1] + %tmp18 = insertelement <8 x short> %tmp17, short %tmp10, uint 7 ; <<8 x short>> [#uses=1] + %tmp18 = cast <8 x short> %tmp18 to <4 x int> ; <<4 x int>> [#uses=1] + store <4 x int> %tmp18, <4 x int>* %A + ret void +} diff --git a/test/CodeGen/PowerPC/vec_spat.ll b/test/CodeGen/PowerPC/vec_spat.ll new file mode 100644 index 0000000..15e2950 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_spat.ll @@ -0,0 +1,73 @@ +; Test that vectors are scalarized/lowered correctly. +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g3 | \ +; RUN: grep stfs | wc -l | grep 4 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f +; RUN: grep vspltw %t | wc -l | grep 2 +; RUN: grep vsplti %t | wc -l | grep 3 +; RUN: grep vsplth %t | wc -l | grep 1 + +%f4 = type <4 x float> +%i4 = type <4 x int> + +implementation + +void %splat(%f4* %P, %f4* %Q, float %X) { + %tmp = insertelement %f4 undef, float %X, uint 0 + %tmp2 = insertelement %f4 %tmp, float %X, uint 1 + %tmp4 = insertelement %f4 %tmp2, float %X, uint 2 + %tmp6 = insertelement %f4 %tmp4, float %X, uint 3 + %q = load %f4* %Q + %R = add %f4 %q, %tmp6 + store %f4 %R, %f4* %P + ret void +} + +void %splat_i4(%i4* %P, %i4* %Q, int %X) { + %tmp = insertelement %i4 undef, int %X, uint 0 + %tmp2 = insertelement %i4 %tmp, int %X, uint 1 + %tmp4 = insertelement %i4 %tmp2, int %X, uint 2 + %tmp6 = insertelement %i4 %tmp4, int %X, uint 3 + %q = load %i4* %Q + %R = add %i4 %q, %tmp6 + store %i4 %R, %i4* %P + ret void +} + +void %splat_imm_i32(%i4* %P, %i4* %Q, int %X) { + %q = load %i4* %Q + %R = add %i4 %q, <int -1, int -1, int -1, int -1> + store %i4 %R, %i4* %P + ret void +} + +void %splat_imm_i16(%i4* %P, %i4* %Q, int %X) { + %q = load %i4* %Q + %R = add %i4 %q, <int 65537, int 65537, int 65537, int 65537> + store %i4 %R, %i4* %P + ret void +} + +void %splat_h(short %tmp, <16 x ubyte>* %dst) { + %tmp = insertelement <8 x short> undef, short %tmp, uint 0 + %tmp72 = insertelement <8 x short> %tmp, short %tmp, uint 1 + %tmp73 = insertelement <8 x short> %tmp72, short %tmp, uint 2 + %tmp74 = insertelement <8 x short> %tmp73, short %tmp, uint 3 + %tmp75 = insertelement <8 x short> %tmp74, short %tmp, uint 4 + %tmp76 = insertelement <8 x short> %tmp75, short %tmp, uint 5 + %tmp77 = insertelement <8 x short> %tmp76, short %tmp, uint 6 + %tmp78 = insertelement <8 x short> %tmp77, short %tmp, uint 7 + %tmp78 = cast <8 x short> %tmp78 to <16 x ubyte> + store <16 x ubyte> %tmp78, <16 x ubyte>* %dst + ret void +} + +void %spltish(<16 x ubyte>* %A, <16 x ubyte>* %B) { + ; Gets converted to 16 x ubyte + %tmp = load <16 x ubyte>* %B + %tmp.s = cast <16 x ubyte> %tmp to <16 x sbyte> + %tmp4 = sub <16 x sbyte> %tmp.s, cast (<8 x short> < short 15, short 15, short 15, short 15, short 15, short 15, short 15, short 15 > to <16 x sbyte>) + %tmp4.u = cast <16 x sbyte> %tmp4 to <16 x ubyte> + store <16 x ubyte> %tmp4.u, <16 x ubyte>* %A + ret void +} + diff --git a/test/CodeGen/PowerPC/vec_vrsave.ll b/test/CodeGen/PowerPC/vec_vrsave.ll new file mode 100644 index 0000000..63e3eba --- /dev/null +++ b/test/CodeGen/PowerPC/vec_vrsave.ll @@ -0,0 +1,14 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 -o %t -f +; RUN: grep vrlw %t +; RUN: not grep spr %t +; RUN: not grep vrsave %t + +<4 x int> %test_rol() { + ret <4 x int> < int -11534337, int -11534337, int -11534337, int -11534337 > +} + +<4 x int> %test_arg(<4 x int> %A, <4 x int> %B) { + %C = add <4 x int> %A, %B + ret <4 x int> %C +} + diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll new file mode 100644 index 0000000..c845c0e --- /dev/null +++ b/test/CodeGen/PowerPC/vec_zero.ll @@ -0,0 +1,8 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep vxor + +void %foo(<4 x float> *%P) { + %T = load <4 x float> * %P + %S = add <4 x float> zeroinitializer, %T + store <4 x float> %S, <4 x float>* %P + ret void +} diff --git a/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/test/CodeGen/PowerPC/vector-identity-shuffle.ll new file mode 100644 index 0000000..af5cc02 --- /dev/null +++ b/test/CodeGen/PowerPC/vector-identity-shuffle.ll @@ -0,0 +1,16 @@ +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | grep test: +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 | not grep vperm + +void %test(<4 x float> *%tmp2.i) { + %tmp2.i = load <4x float>* %tmp2.i + %xFloat0.48 = extractelement <4 x float> %tmp2.i, uint 0 ; <float> [#uses=1] + %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, uint 0 ; <<4 x float>> [#uses=1] + %xFloat1.50 = extractelement <4 x float> %tmp2.i, uint 1 ; <float> [#uses=1] + %inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, uint 1 ; <<4 x float>> [#uses=1] + %xFloat2.53 = extractelement <4 x float> %tmp2.i, uint 2 ; <float> [#uses=1] + %inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, uint 2 ; <<4 x float>> [#uses=1] + %xFloat3.56 = extractelement <4 x float> %tmp2.i, uint 3 ; <float> [#uses=1] + %inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, uint 3 ; <<4 x float>> [#uses=4] + store <4 x float> %inFloat3.58, <4x float>* %tmp2.i + ret void +} diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll new file mode 100644 index 0000000..f8dbbb0 --- /dev/null +++ b/test/CodeGen/PowerPC/vector.ll @@ -0,0 +1,157 @@ +; Test that vectors are scalarized/lowered correctly. +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g5 +; RUN: llvm-upgrade < %s | llvm-as | llc -march=ppc32 -mcpu=g3 + +%f1 = type <1 x float> +%f2 = type <2 x float> +%f4 = type <4 x float> +%i4 = type <4 x int> +%f8 = type <8 x float> +%d8 = type <8 x double> + +implementation + +;;; TEST HANDLING OF VARIOUS VECTOR SIZES + +void %test_f1(%f1 *%P, %f1* %Q, %f1 *%S) { + %p = load %f1 *%P + %q = load %f1* %Q + %R = add %f1 %p, %q + store %f1 %R, %f1 *%S + ret void +} + +void %test_f2(%f2 *%P, %f2* %Q, %f2 *%S) { + %p = load %f2* %P + %q = load %f2* %Q + %R = add %f2 %p, %q + store %f2 %R, %f2 *%S + ret void +} + +void %test_f4(%f4 *%P, %f4* %Q, %f4 *%S) { + %p = load %f4* %P + %q = load %f4* %Q + %R = add %f4 %p, %q + store %f4 %R, %f4 *%S + ret void +} + +void %test_f8(%f8 *%P, %f8* %Q, %f8 *%S) { + %p = load %f8* %P + %q = load %f8* %Q + %R = add %f8 %p, %q + store %f8 %R, %f8 *%S + ret void +} + +void %test_fmul(%f8 *%P, %f8* %Q, %f8 *%S) { + %p = load %f8* %P + %q = load %f8* %Q + %R = mul %f8 %p, %q + store %f8 %R, %f8 *%S + ret void +} + +void %test_div(%f8 *%P, %f8* %Q, %f8 *%S) { + %p = load %f8* %P + %q = load %f8* %Q + %R = div %f8 %p, %q + store %f8 %R, %f8 *%S + ret void +} + +;;; TEST VECTOR CONSTRUCTS + +void %test_cst(%f4 *%P, %f4 *%S) { + %p = load %f4* %P + %R = add %f4 %p, <float 0.1, float 1.0, float 2.0, float 4.5> + store %f4 %R, %f4 *%S + ret void +} + +void %test_zero(%f4 *%P, %f4 *%S) { + %p = load %f4* %P + %R = add %f4 %p, zeroinitializer + store %f4 %R, %f4 *%S + ret void +} + +void %test_undef(%f4 *%P, %f4 *%S) { + %p = load %f4* %P + %R = add %f4 %p, undef + store %f4 %R, %f4 *%S + ret void +} + +void %test_constant_insert(%f4 *%S) { + %R = insertelement %f4 zeroinitializer, float 10.0, uint 0 + store %f4 %R, %f4 *%S + ret void +} + +void %test_variable_buildvector(float %F, %f4 *%S) { + %R = insertelement %f4 zeroinitializer, float %F, uint 0 + store %f4 %R, %f4 *%S + ret void +} + +void %test_scalar_to_vector(float %F, %f4 *%S) { + %R = insertelement %f4 undef, float %F, uint 0 ;; R = scalar_to_vector F + store %f4 %R, %f4 *%S + ret void +} + +float %test_extract_elt(%f8 *%P) { + %p = load %f8* %P + %R = extractelement %f8 %p, uint 3 + ret float %R +} + +double %test_extract_elt2(%d8 *%P) { + %p = load %d8* %P + %R = extractelement %d8 %p, uint 3 + ret double %R +} + +void %test_cast_1(<4 x float>* %b, <4 x int>* %a) { + %tmp = load <4 x float>* %b + %tmp2 = add <4 x float> %tmp, <float 1.0, float 2.0, float 3.0, float 4.0> + %tmp3 = cast <4 x float> %tmp2 to <4 x int> + %tmp4 = add <4 x int> %tmp3, <int 1, int 2, int 3, int 4> + store <4 x int> %tmp4, <4 x int>* %a + ret void +} + +void %test_cast_2(<8 x float>* %a, <8 x int>* %b) { + %T = load <8 x float>* %a + %T2 = cast <8 x float> %T to <8 x int> + store <8 x int> %T2, <8 x int>* %b + ret void +} + + +;;; TEST IMPORTANT IDIOMS + +void %splat(%f4* %P, %f4* %Q, float %X) { + %tmp = insertelement %f4 undef, float %X, uint 0 + %tmp2 = insertelement %f4 %tmp, float %X, uint 1 + %tmp4 = insertelement %f4 %tmp2, float %X, uint 2 + %tmp6 = insertelement %f4 %tmp4, float %X, uint 3 + %q = load %f4* %Q + %R = add %f4 %q, %tmp6 + store %f4 %R, %f4* %P + ret void +} + +void %splat_i4(%i4* %P, %i4* %Q, int %X) { + %tmp = insertelement %i4 undef, int %X, uint 0 + %tmp2 = insertelement %i4 %tmp, int %X, uint 1 + %tmp4 = insertelement %i4 %tmp2, int %X, uint 2 + %tmp6 = insertelement %i4 %tmp4, int %X, uint 3 + %q = load %i4* %Q + %R = add %i4 %q, %tmp6 + store %i4 %R, %i4* %P + ret void +} + |