diff options
Diffstat (limited to 'test')
23 files changed, 467 insertions, 0 deletions
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll index f23355e..6858dba 100644 --- a/test/CodeGen/CellSPU/and_ops.ll +++ b/test/CodeGen/CellSPU/and_ops.ll @@ -4,6 +4,8 @@ ; RUN: grep andi %t1.s | count 36 ; RUN: grep andhi %t1.s | count 30 ; RUN: grep andbi %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; AND instruction generation: define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll new file mode 100644 index 0000000..7aa8abc --- /dev/null +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -0,0 +1,29 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep bisl %t1.s | count 6 && +; RUN: grep ila %t1.s | count 1 && +; RUN: grep rotqbyi %t1.s | count 4 && +; RUN: grep lqa %t1.s | count 4 && +; RUN: grep lqd %t1.s | count 6 && +; RUN: grep dispatch_tab %t1.s | count 10 +; ModuleID = 'call_indirect.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128" +target triple = "spu-unknown-elf" + +@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16 + +define void @dispatcher(i32 %i_arg, float %f_arg) { +entry: + %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16 + tail call void %tmp2( i32 %i_arg, float %f_arg ) + %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4 + tail call void %tmp2.1( i32 %i_arg, float %f_arg ) + %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4 + tail call void %tmp2.2( i32 %i_arg, float %f_arg ) + %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4 + tail call void %tmp2.3( i32 %i_arg, float %f_arg ) + %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4 + tail call void %tmp2.4( i32 %i_arg, float %f_arg ) + %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4 + tail call void %tmp2.5( i32 %i_arg, float %f_arg ) + ret void +} diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll index 3e2bc64..406a20a 100644 --- a/test/CodeGen/CellSPU/ctpop.ll +++ b/test/CodeGen/CellSPU/ctpop.ll @@ -3,6 +3,8 @@ ; RUN: grep andi %t1.s | count 3 && ; RUN: grep rotmi %t1.s | count 2 && ; RUN: grep rothmi %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" declare i32 @llvm.ctpop.i8(i8) declare i32 @llvm.ctpop.i16(i16) diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll index 58c56e1..5cdb33e 100644 --- a/test/CodeGen/CellSPU/dp_farith.ll +++ b/test/CodeGen/CellSPU/dp_farith.ll @@ -7,6 +7,8 @@ ; RUN: grep dfnms %t1.s | count 4 ; ; This file includes double precision floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define double @fadd(double %arg1, double %arg2) { %A = add double %arg1, %arg2 diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll index a4d6dbb..0f02180 100644 --- a/test/CodeGen/CellSPU/eqv.ll +++ b/test/CodeGen/CellSPU/eqv.ll @@ -10,6 +10,8 @@ ; Alternatively, a ^ ~b, which the compiler will also match. ; ModuleID = 'eqv.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll index ab485a8..f9cc32e 100644 --- a/test/CodeGen/CellSPU/extract_elt.ll +++ b/test/CodeGen/CellSPU/extract_elt.ll @@ -5,6 +5,8 @@ ; RUN: grep lqx %t2.s | count 27 && ; RUN: grep space %t1.s | count 8 && ; RUN: grep byte %t1.s | count 424 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define i32 @i32_extract_0(<4 x i32> %v) { entry: diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll index 8ae97e6..f4406d6 100644 --- a/test/CodeGen/CellSPU/fcmp.ll +++ b/test/CodeGen/CellSPU/fcmp.ll @@ -3,6 +3,8 @@ ; RUN: grep fcmeq %t1.s | count 1 ; ; This file includes standard floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" declare double @fabs(double) declare float @fabsf(float) diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll index d55b12b..a107bbe 100644 --- a/test/CodeGen/CellSPU/fdiv.ll +++ b/test/CodeGen/CellSPU/fdiv.ll @@ -6,6 +6,8 @@ ; RUN: grep fnms %t1.s | count 2 ; ; This file includes standard floating point arithmetic instructions +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define float @fdiv32(float %arg1, float %arg2) { %A = fdiv float %arg1, %arg2 diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll index 1abdcf6..a183483 100644 --- a/test/CodeGen/CellSPU/fneg-fabs.ll +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -4,6 +4,8 @@ ; RUN: grep xor %t1.s | count 4 && ; RUN: grep and %t1.s | count 5 && ; RUN: grep andbi %t1.s | count 3 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define double @fneg_dp(double %X) { %Y = sub double -0.000000e+00, %X diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll index 19cabc4..603ec05 100644 --- a/test/CodeGen/CellSPU/immed16.ll +++ b/test/CodeGen/CellSPU/immed16.ll @@ -1,5 +1,7 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: grep "ilh" %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define i16 @test_1() { %x = alloca i16, align 16 diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll index 6a5a361..4bf5bbd 100644 --- a/test/CodeGen/CellSPU/immed32.ll +++ b/test/CodeGen/CellSPU/immed32.ll @@ -12,6 +12,8 @@ ; RUN: grep 49077 %t1.s | count 1 && ; RUN: grep 1267 %t1.s | count 2 && ; RUN: grep 16309 %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define i32 @test_1() { ret i32 4784128 ;; ILHU via pattern (0x49000) diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll index c4eec8b..4d388b1 100644 --- a/test/CodeGen/CellSPU/immed64.ll +++ b/test/CodeGen/CellSPU/immed64.ll @@ -11,6 +11,9 @@ ; RUN: grep 128 %t1.s | count 30 && ; RUN: grep 224 %t1.s | count 2 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + ; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202) ; 18446744073709551591 => 0x ffffffff ffffffe7 (-25) ; 18446744073708516742 => 0x ffffffff fff03586 (-1034874) diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll index 95a4984..b4cfea8 100644 --- a/test/CodeGen/CellSPU/int2fp.ll +++ b/test/CodeGen/CellSPU/int2fp.ll @@ -7,6 +7,9 @@ ; RUN: grep andi %t1.s | count 1 && ; RUN: grep ila %t1.s | count 1 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + define float @sitofp_i32(i32 %arg1) { %A = sitofp i32 %arg1 to float ; <float> [#uses=1] ret float %A diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll new file mode 100644 index 0000000..5051cd5 --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_branch.ll @@ -0,0 +1,150 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep ceq %t1.s | count 30 && +; RUN: grep ceqb %t1.s | count 10 && +; RUN: grep ceqhi %t1.s | count 5 && +; RUN: grep ceqi %t1.s | count 5 && +; RUN: grep cgt %t1.s | count 30 && +; RUN: grep cgtb %t1.s | count 10 && +; RUN: grep cgthi %t1.s | count 5 && +; RUN: grep cgti %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) + +declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>) +declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8) + + + +define <4 x i32> @test(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @ceqitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @ceqhitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @ceqbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @cgtitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @cgthitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @cgtbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) { + call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) { + call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} + +define <4 x i32> @clgtitest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @clgthitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} + +define <16 x i8> @clgtbitest(<16 x i8> %A) { + call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y +} diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll new file mode 100644 index 0000000..f5a192a --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_float.ll @@ -0,0 +1,94 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep fa %t1.s | count 5 && +; RUN: grep fs %t1.s | count 5 && +; RUN: grep fm %t1.s | count 15 && +; RUN: grep fceq %t1.s | count 5 && +; RUN: grep fcmeq %t1.s | count 5 && +; RUN: grep fcgt %t1.s | count 5 && +; RUN: grep fcmgt %t1.s | count 5 && +; RUN: grep fma %t1.s | count 5 && +; RUN: grep fnms %t1.s | count 5 && +; RUN: grep fms %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8) + +declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>) + +declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>) + +declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>) + +define <4 x i32> @test(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) { + call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +} + +define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { + call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y +}
\ No newline at end of file diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll new file mode 100644 index 0000000..e43558c --- /dev/null +++ b/test/CodeGen/CellSPU/intrinsics_logical.ll @@ -0,0 +1,49 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep and %t1.s | count 20 && +; RUN: grep andc %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16) +declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16) +declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8) + +declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>) + +define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) { + call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <4 x i32> @anditest(<4 x i32> %A) { + call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y +} + +define <8 x i16> @andhitest(<8 x i16> %A) { + call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y +} diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll index 091f4b2..841a3ec 100644 --- a/test/CodeGen/CellSPU/nand.ll +++ b/test/CodeGen/CellSPU/nand.ll @@ -3,6 +3,8 @@ ; RUN: grep and %t1.s | count 94 ; RUN: grep xsbh %t1.s | count 2 ; RUN: grep xshw %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll index 6c46b41..91e3e21 100644 --- a/test/CodeGen/CellSPU/or_ops.ll +++ b/test/CodeGen/CellSPU/or_ops.ll @@ -4,6 +4,8 @@ ; RUN: grep ori %t1.s | count 30 ; RUN: grep orhi %t1.s | count 30 ; RUN: grep orbi %t1.s | count 15 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; OR instruction generation: define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll index 6983c18..0386838 100644 --- a/test/CodeGen/CellSPU/rotate_ops.ll +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -8,6 +8,8 @@ ; RUN grep rothi.*,.3 %t1.s | count 1 ; RUN: grep andhi %t1.s | count 4 ; RUN: grep shlhi %t1.s | count 4 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; Vector rotates are not currently supported in gcc or llvm assembly. These are ; not tested. diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll index 3cbb7a0..b1600bf 100644 --- a/test/CodeGen/CellSPU/select_bits.ll +++ b/test/CodeGen/CellSPU/select_bits.ll @@ -3,6 +3,8 @@ ; RUN: grep and %t1.s | count 2 ; RUN: grep xsbh %t1.s | count 1 ; RUN: grep xshw %t1.s | count 2 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 162ca16..4256d91 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -5,6 +5,8 @@ ; RUN: grep shli %t1.s | count 51 ; RUN: grep xshw %t1.s | count 5 ; RUN: grep and %t1.s | count 5 +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" ; Vector shifts are not currently supported in gcc or llvm assembly. These are ; not tested. diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll index c7e7199..473e9a3 100644 --- a/test/CodeGen/CellSPU/sp_farith.ll +++ b/test/CodeGen/CellSPU/sp_farith.ll @@ -8,6 +8,8 @@ ; ; This file includes standard floating point arithmetic instructions ; NOTE fdiv is tested separately since it is a compound operation +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" define float @fp_add(float %arg1, float %arg2) { %A = add float %arg1, %arg2 ; <float> [#uses=1] diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll new file mode 100644 index 0000000..1159b55 --- /dev/null +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -0,0 +1,107 @@ +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s +; RUN: grep lqa %t1.s | count 10 && +; RUN: grep lqd %t1.s | count 2 && +; RUN: grep rotqbyi %t1.s | count 5 && +; RUN: grep xshw %t1.s | count 1 && +; RUN: grep andi %t1.s | count 4 && +; RUN: grep cbd %t1.s | count 3 && +; RUN: grep chd %t1.s | count 1 && +; RUN: grep cwd %t1.s | count 1 && +; RUN: grep shufb %t1.s | count 5 && +; RUN: grep stqa %t1.s | count 5 +; ModuleID = 'struct_1.bc' +target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" +target triple = "spu" + +; struct hackstate { +; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3) +; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3) +; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3) +; int i1; // offset 4 (rotate left by 4 bytes to byte 0) +; short s1; // offset 8 (rotate left by 6 bytes to byte 2) +; int i2; // offset 12 [ignored] +; unsigned char c4; // offset 16 [ignored] +; unsigned char c5; // offset 17 [ignored] +; unsigned char c6; // offset 18 [ignored] +; unsigned char c7; // offset 19 (no rotate, in preferred slot) +; int i3; // offset 20 [ignored] +; int i4; // offset 24 [ignored] +; int i5; // offset 28 [ignored] +; int i6; // offset 32 (no rotate, in preferred slot) +; } +%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 } + +; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +@state = global %struct.hackstate zeroinitializer, align 16 + +define i8 @get_hackstate_c1() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret i8 %tmp2 +} + +define i8 @get_hackstate_c2() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret i8 %tmp2 +} + +define i8 @get_hackstate_c3() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret i8 %tmp2 +} + +define i32 @get_hackstate_i1() { +entry: + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret i32 %tmp2 +} + +define i16 @get_hackstate_s1() signext { +entry: + %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret i16 %tmp2 +} + +define i8 @get_hackstate_c7() zeroext { +entry: + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 + ret i8 %tmp2 +} + +define i32 @get_hackstate_i6() zeroext { +entry: + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret i32 %tmp2 +} + +define void @set_hackstate_c1(i8 zeroext %c) { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret void +} + +define void @set_hackstate_c2(i8 zeroext %c) { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret void +} + +define void @set_hackstate_c3(i8 zeroext %c) { +entry: + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret void +} + +define void @set_hackstate_i1(i32 %i) { +entry: + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret void +} + +define void @set_hackstate_s1(i16 signext %s) { +entry: + store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret void +} |