aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/CellSPU/and_ops.ll2
-rw-r--r--test/CodeGen/CellSPU/call_indirect.ll29
-rw-r--r--test/CodeGen/CellSPU/ctpop.ll2
-rw-r--r--test/CodeGen/CellSPU/dp_farith.ll2
-rw-r--r--test/CodeGen/CellSPU/eqv.ll2
-rw-r--r--test/CodeGen/CellSPU/extract_elt.ll2
-rw-r--r--test/CodeGen/CellSPU/fcmp.ll2
-rw-r--r--test/CodeGen/CellSPU/fdiv.ll2
-rw-r--r--test/CodeGen/CellSPU/fneg-fabs.ll2
-rw-r--r--test/CodeGen/CellSPU/immed16.ll2
-rw-r--r--test/CodeGen/CellSPU/immed32.ll2
-rw-r--r--test/CodeGen/CellSPU/immed64.ll3
-rw-r--r--test/CodeGen/CellSPU/int2fp.ll3
-rw-r--r--test/CodeGen/CellSPU/intrinsics_branch.ll150
-rw-r--r--test/CodeGen/CellSPU/intrinsics_float.ll94
-rw-r--r--test/CodeGen/CellSPU/intrinsics_logical.ll49
-rw-r--r--test/CodeGen/CellSPU/nand.ll2
-rw-r--r--test/CodeGen/CellSPU/or_ops.ll2
-rw-r--r--test/CodeGen/CellSPU/rotate_ops.ll2
-rw-r--r--test/CodeGen/CellSPU/select_bits.ll2
-rw-r--r--test/CodeGen/CellSPU/shift_ops.ll2
-rw-r--r--test/CodeGen/CellSPU/sp_farith.ll2
-rw-r--r--test/CodeGen/CellSPU/struct_1.ll107
23 files changed, 467 insertions, 0 deletions
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index f23355e..6858dba 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -4,6 +4,8 @@
; RUN: grep andi %t1.s | count 36
; RUN: grep andhi %t1.s | count 30
; RUN: grep andbi %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; AND instruction generation:
define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
new file mode 100644
index 0000000..7aa8abc
--- /dev/null
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep bisl %t1.s | count 6 &&
+; RUN: grep ila %t1.s | count 1 &&
+; RUN: grep rotqbyi %t1.s | count 4 &&
+; RUN: grep lqa %t1.s | count 4 &&
+; RUN: grep lqd %t1.s | count 6 &&
+; RUN: grep dispatch_tab %t1.s | count 10
+; ModuleID = 'call_indirect.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
+
+define void @dispatcher(i32 %i_arg, float %f_arg) {
+entry:
+ %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
+ tail call void %tmp2( i32 %i_arg, float %f_arg )
+ %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
+ tail call void %tmp2.1( i32 %i_arg, float %f_arg )
+ %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
+ tail call void %tmp2.2( i32 %i_arg, float %f_arg )
+ %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
+ tail call void %tmp2.3( i32 %i_arg, float %f_arg )
+ %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
+ tail call void %tmp2.4( i32 %i_arg, float %f_arg )
+ %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
+ tail call void %tmp2.5( i32 %i_arg, float %f_arg )
+ ret void
+}
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
index 3e2bc64..406a20a 100644
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ b/test/CodeGen/CellSPU/ctpop.ll
@@ -3,6 +3,8 @@
; RUN: grep andi %t1.s | count 3 &&
; RUN: grep rotmi %t1.s | count 2 &&
; RUN: grep rothmi %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
declare i32 @llvm.ctpop.i8(i8)
declare i32 @llvm.ctpop.i16(i16)
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
index 58c56e1..5cdb33e 100644
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ b/test/CodeGen/CellSPU/dp_farith.ll
@@ -7,6 +7,8 @@
; RUN: grep dfnms %t1.s | count 4
;
; This file includes double precision floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define double @fadd(double %arg1, double %arg2) {
%A = add double %arg1, %arg2
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
index a4d6dbb..0f02180 100644
--- a/test/CodeGen/CellSPU/eqv.ll
+++ b/test/CodeGen/CellSPU/eqv.ll
@@ -10,6 +10,8 @@
; Alternatively, a ^ ~b, which the compiler will also match.
; ModuleID = 'eqv.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
%A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
index ab485a8..f9cc32e 100644
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ b/test/CodeGen/CellSPU/extract_elt.ll
@@ -5,6 +5,8 @@
; RUN: grep lqx %t2.s | count 27 &&
; RUN: grep space %t1.s | count 8 &&
; RUN: grep byte %t1.s | count 424
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i32 @i32_extract_0(<4 x i32> %v) {
entry:
diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll
index 8ae97e6..f4406d6 100644
--- a/test/CodeGen/CellSPU/fcmp.ll
+++ b/test/CodeGen/CellSPU/fcmp.ll
@@ -3,6 +3,8 @@
; RUN: grep fcmeq %t1.s | count 1
;
; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
declare double @fabs(double)
declare float @fabsf(float)
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
index d55b12b..a107bbe 100644
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ b/test/CodeGen/CellSPU/fdiv.ll
@@ -6,6 +6,8 @@
; RUN: grep fnms %t1.s | count 2
;
; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define float @fdiv32(float %arg1, float %arg2) {
%A = fdiv float %arg1, %arg2
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 1abdcf6..a183483 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -4,6 +4,8 @@
; RUN: grep xor %t1.s | count 4 &&
; RUN: grep and %t1.s | count 5 &&
; RUN: grep andbi %t1.s | count 3
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define double @fneg_dp(double %X) {
%Y = sub double -0.000000e+00, %X
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
index 19cabc4..603ec05 100644
--- a/test/CodeGen/CellSPU/immed16.ll
+++ b/test/CodeGen/CellSPU/immed16.ll
@@ -1,5 +1,7 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep "ilh" %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i16 @test_1() {
%x = alloca i16, align 16
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
index 6a5a361..4bf5bbd 100644
--- a/test/CodeGen/CellSPU/immed32.ll
+++ b/test/CodeGen/CellSPU/immed32.ll
@@ -12,6 +12,8 @@
; RUN: grep 49077 %t1.s | count 1 &&
; RUN: grep 1267 %t1.s | count 2 &&
; RUN: grep 16309 %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define i32 @test_1() {
ret i32 4784128 ;; ILHU via pattern (0x49000)
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
index c4eec8b..4d388b1 100644
--- a/test/CodeGen/CellSPU/immed64.ll
+++ b/test/CodeGen/CellSPU/immed64.ll
@@ -11,6 +11,9 @@
; RUN: grep 128 %t1.s | count 30 &&
; RUN: grep 224 %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
index 95a4984..b4cfea8 100644
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ b/test/CodeGen/CellSPU/int2fp.ll
@@ -7,6 +7,9 @@
; RUN: grep andi %t1.s | count 1 &&
; RUN: grep ila %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
define float @sitofp_i32(i32 %arg1) {
%A = sitofp i32 %arg1 to float ; <float> [#uses=1]
ret float %A
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
new file mode 100644
index 0000000..5051cd5
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -0,0 +1,150 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceq %t1.s | count 30 &&
+; RUN: grep ceqb %t1.s | count 10 &&
+; RUN: grep ceqhi %t1.s | count 5 &&
+; RUN: grep ceqi %t1.s | count 5 &&
+; RUN: grep cgt %t1.s | count 30 &&
+; RUN: grep cgtb %t1.s | count 10 &&
+; RUN: grep cgthi %t1.s | count 5 &&
+; RUN: grep cgti %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
+
+
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @ceqitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
+ call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
+ call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgtitest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbitest(<16 x i8> %A) {
+ call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
+ %Y = bitcast <16 x i8> %1 to <16 x i8>
+ ret <16 x i8> %Y
+}
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
new file mode 100644
index 0000000..f5a192a
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep fa %t1.s | count 5 &&
+; RUN: grep fs %t1.s | count 5 &&
+; RUN: grep fm %t1.s | count 15 &&
+; RUN: grep fceq %t1.s | count 5 &&
+; RUN: grep fcmeq %t1.s | count 5 &&
+; RUN: grep fcgt %t1.s | count 5 &&
+; RUN: grep fcmgt %t1.s | count 5 &&
+; RUN: grep fma %t1.s | count 5 &&
+; RUN: grep fnms %t1.s | count 5 &&
+; RUN: grep fms %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
+
+define <4 x i32> @test(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
+ call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+}
+
+define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+ call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+ %Y = bitcast <4 x float> %1 to <4 x float>
+ ret <4 x float> %Y
+} \ No newline at end of file
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
new file mode 100644
index 0000000..e43558c
--- /dev/null
+++ b/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep and %t1.s | count 20 &&
+; RUN: grep andc %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
+ call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @anditest(<4 x i32> %A) {
+ call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
+ %Y = bitcast <4 x i32> %1 to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <8 x i16> @andhitest(<8 x i16> %A) {
+ call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
+ %Y = bitcast <8 x i16> %1 to <8 x i16>
+ ret <8 x i16> %Y
+}
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index 091f4b2..841a3ec 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -3,6 +3,8 @@
; RUN: grep and %t1.s | count 94
; RUN: grep xsbh %t1.s | count 2
; RUN: grep xshw %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
%A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 6c46b41..91e3e21 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -4,6 +4,8 @@
; RUN: grep ori %t1.s | count 30
; RUN: grep orhi %t1.s | count 30
; RUN: grep orbi %t1.s | count 15
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; OR instruction generation:
define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index 6983c18..0386838 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -8,6 +8,8 @@
; RUN grep rothi.*,.3 %t1.s | count 1
; RUN: grep andhi %t1.s | count 4
; RUN: grep shlhi %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; Vector rotates are not currently supported in gcc or llvm assembly. These are
; not tested.
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
index 3cbb7a0..b1600bf 100644
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -3,6 +3,8 @@
; RUN: grep and %t1.s | count 2
; RUN: grep xsbh %t1.s | count 1
; RUN: grep xshw %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
%A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 162ca16..4256d91 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -5,6 +5,8 @@
; RUN: grep shli %t1.s | count 51
; RUN: grep xshw %t1.s | count 5
; RUN: grep and %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
; Vector shifts are not currently supported in gcc or llvm assembly. These are
; not tested.
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
index c7e7199..473e9a3 100644
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ b/test/CodeGen/CellSPU/sp_farith.ll
@@ -8,6 +8,8 @@
;
; This file includes standard floating point arithmetic instructions
; NOTE fdiv is tested separately since it is a compound operation
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
define float @fp_add(float %arg1, float %arg2) {
%A = add float %arg1, %arg2 ; <float> [#uses=1]
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
new file mode 100644
index 0000000..1159b55
--- /dev/null
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -0,0 +1,107 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep lqa %t1.s | count 10 &&
+; RUN: grep lqd %t1.s | count 2 &&
+; RUN: grep rotqbyi %t1.s | count 5 &&
+; RUN: grep xshw %t1.s | count 1 &&
+; RUN: grep andi %t1.s | count 4 &&
+; RUN: grep cbd %t1.s | count 3 &&
+; RUN: grep chd %t1.s | count 1 &&
+; RUN: grep cwd %t1.s | count 1 &&
+; RUN: grep shufb %t1.s | count 5 &&
+; RUN: grep stqa %t1.s | count 5
+; ModuleID = 'struct_1.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; struct hackstate {
+; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3)
+; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3)
+; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3)
+; int i1; // offset 4 (rotate left by 4 bytes to byte 0)
+; short s1; // offset 8 (rotate left by 6 bytes to byte 2)
+; int i2; // offset 12 [ignored]
+; unsigned char c4; // offset 16 [ignored]
+; unsigned char c5; // offset 17 [ignored]
+; unsigned char c6; // offset 18 [ignored]
+; unsigned char c7; // offset 19 (no rotate, in preferred slot)
+; int i3; // offset 20 [ignored]
+; int i4; // offset 24 [ignored]
+; int i5; // offset 28 [ignored]
+; int i6; // offset 32 (no rotate, in preferred slot)
+; }
+%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
+
+; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+@state = global %struct.hackstate zeroinitializer, align 16
+
+define i8 @get_hackstate_c1() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c2() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c3() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i1() {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret i32 %tmp2
+}
+
+define i16 @get_hackstate_s1() signext {
+entry:
+ %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret i16 %tmp2
+}
+
+define i8 @get_hackstate_c7() zeroext {
+entry:
+ %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
+ ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i6() zeroext {
+entry:
+ %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+ ret i32 %tmp2
+}
+
+define void @set_hackstate_c1(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+ ret void
+}
+
+define void @set_hackstate_c2(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+ ret void
+}
+
+define void @set_hackstate_c3(i8 zeroext %c) {
+entry:
+ store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+ ret void
+}
+
+define void @set_hackstate_i1(i32 %i) {
+entry:
+ store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+ ret void
+}
+
+define void @set_hackstate_s1(i16 signext %s) {
+entry:
+ store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+ ret void
+}