aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorScott Michel <scottm@aero.org>2008-06-02 22:18:03 +0000
committerScott Michel <scottm@aero.org>2008-06-02 22:18:03 +0000
commit8bf61e8c2a39bcf070c39848fea83eda57851ebb (patch)
treea73a80f3763205767300ee4a008edd8cc8b17dbf
parent193c2358507b151ffbca2f0bb6fbfba55b60bdbd (diff)
downloadexternal_llvm-8bf61e8c2a39bcf070c39848fea83eda57851ebb.zip
external_llvm-8bf61e8c2a39bcf070c39848fea83eda57851ebb.tar.gz
external_llvm-8bf61e8c2a39bcf070c39848fea83eda57851ebb.tar.bz2
Add necessary 64-bit support so that gcc frontend compiles (mostly). Current
issue is operand promotion for setcc/select... but looks like the fundamental stuff is implemented for CellSPU. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51884 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp11
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp156
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h7
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td298
-rw-r--r--lib/Target/CellSPU/SPUNodes.td41
-rw-r--r--lib/Target/CellSPU/SPUOperands.td4
-rw-r--r--test/CodeGen/CellSPU/immed64.ll24
7 files changed, 401 insertions, 140 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a6268e7..3ec4181 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4441,11 +4441,20 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) {
AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1)));
break;
}
- case ISD::SELECT:
+ case ISD::SELECT: {
Tmp2 = PromoteOp(Node->getOperand(1)); // Legalize the op0
Tmp3 = PromoteOp(Node->getOperand(2)); // Legalize the op1
+
+ unsigned VT2 = Tmp2.getValueType();
+ assert(VT2 == Tmp3.getValueType()
+ && "PromoteOp: Operands 2 and 3 ValueTypes don't match");
+ // Ensure tha NVT is the same as the operands' value types, because we
+ // cannot assume that TLI.getSetCCValueType() is constant.
+ if (NVT != VT2)
+ NVT = VT2;
Result = DAG.getNode(ISD::SELECT, NVT, Node->getOperand(0), Tmp2, Tmp3);
break;
+ }
case ISD::SELECT_CC:
Tmp2 = PromoteOp(Node->getOperand(2)); // True
Tmp3 = PromoteOp(Node->getOperand(3)); // False
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ad797dd..0a736d7 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -219,8 +219,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Custom lower i32 multiplications
setOperationAction(ISD::MUL, MVT::i32, Custom);
- // Need to custom handle (some) common i8 math ops
+ // Need to custom handle (some) common i8, i64 math ops
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
setOperationAction(ISD::MUL, MVT::i8, Custom);
// SPU does not have BSWAP. It does have i32 support CTLZ.
@@ -238,7 +240,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
- // SPU has a version of select that implements (a&~c)|(b|c), just like
+ // SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i8, Legal);
@@ -427,8 +429,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
"SPUISD::ROTBYTES_LEFT_CHAINED";
- node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+ "SPUISD::ROTBYTES_LEFT_BITS";
+ node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
+ node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
+ node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
+ node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
+ node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
@@ -1706,33 +1714,33 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
}
for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
for (int j = 0; j < 4; ++j) {
SDOperand V;
bool process_upper, process_lower;
- uint64_t val = 0;
-
+ val <<= 8;
process_upper = (upper_special && (i & 1) == 0);
process_lower = (lower_special && (i & 1) == 1);
if (process_upper || process_lower) {
if ((process_upper && upper == 0)
|| (process_lower && lower == 0))
- val = 0x80;
+ val |= 0x80;
else if ((process_upper && upper == 0xffffffff)
|| (process_lower && lower == 0xffffffff))
- val = 0xc0;
+ val |= 0xc0;
else if ((process_upper && upper == 0x80000000)
|| (process_lower && lower == 0x80000000))
- val = (j == 0 ? 0xe0 : 0x80);
+ val |= (j == 0 ? 0xe0 : 0x80);
} else
- val = i * 4 + j + ((i & 1) * 16);
-
- ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
+ val |= i * 4 + j + ((i & 1) * 16);
}
+
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
}
return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
}
}
@@ -1904,7 +1912,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
// b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
// c) Use SELB to select upper and lower halves from the intermediate results
//
- // NOTE: We really want to move the FSMBI to earlier to actually get the
+ // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
// dual-issue. This code does manage to do this, even if it's a little on
// the wacky side
case MVT::v8i16: {
@@ -1918,7 +1926,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
SDOperand FSMBOp =
DAG.getCopyToReg(Chain, FSMBIreg,
- DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
+ DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
DAG.getConstant(0xcccc, MVT::i16)));
SDOperand HHProd =
@@ -1962,7 +1970,7 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
- SDOperand FSMBmask = DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
+ SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
DAG.getConstant(0x2222, MVT::i16));
SDOperand LoProdParts =
@@ -2293,6 +2301,64 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
DAG.getConstant(4, MVT::i32))));
}
+ case ISD::ADD: {
+ // Turn operands into vectors to satisfy type checking (shufb works on
+ // vectors)
+ SDOperand Op0 =
+ DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ SDOperand Op1 =
+ DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+ SmallVector<SDOperand, 16> ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+ SDOperand CarryGen =
+ DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
+ SDOperand ShiftedCarry =
+ DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
+ CarryGen, CarryGen,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+
+ return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+ DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
+ Op0, Op1, ShiftedCarry));
+ }
+
+ case ISD::SUB: {
+ // Turn operands into vectors to satisfy type checking (shufb works on
+ // vectors)
+ SDOperand Op0 =
+ DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ SDOperand Op1 =
+ DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+ SmallVector<SDOperand, 16> ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+ SDOperand BorrowGen =
+ DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
+ SDOperand ShiftedBorrow =
+ DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
+ BorrowGen, BorrowGen,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+
+ return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+ DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
+ Op0, Op1, ShiftedBorrow));
+ }
+
case ISD::SHL: {
SDOperand ShiftAmt = Op.getOperand(1);
unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
@@ -2301,7 +2367,7 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
DAG.getNode(SPUISD::SELB, VecVT,
Op0Vec,
DAG.getConstant(0, VecVT),
- DAG.getNode(SPUISD::FSMBI, VecVT,
+ DAG.getNode(SPUISD::SELECT_MASK, VecVT,
DAG.getConstant(0xff00ULL, MVT::i16)));
SDOperand ShiftAmtBytes =
DAG.getNode(ISD::SRL, ShiftAmtVT,
@@ -2337,6 +2403,43 @@ static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
Op0, ShiftAmtBytes),
ShiftAmtBits);
}
+
+ case ISD::SRA: {
+ // Promote Op0 to vector
+ SDOperand Op0 =
+ DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ SDOperand ShiftAmt = Op.getOperand(1);
+ unsigned ShiftVT = ShiftAmt.getValueType();
+
+ // Negate variable shift amounts
+ if (!isa<ConstantSDNode>(ShiftAmt)) {
+ ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
+ DAG.getConstant(0, ShiftVT), ShiftAmt);
+ }
+
+ SDOperand UpperHalfSign =
+ DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
+ DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
+ Op0, DAG.getConstant(31, MVT::i32))));
+ SDOperand UpperHalfSignMask =
+ DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
+ SDOperand UpperLowerMask =
+ DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
+ DAG.getConstant(0xff00, MVT::i16));
+ SDOperand UpperLowerSelect =
+ DAG.getNode(SPUISD::SELB, MVT::v2i64,
+ UpperHalfSignMask, Op0, UpperLowerMask);
+ SDOperand RotateLeftBytes =
+ DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
+ UpperLowerSelect, ShiftAmt);
+ SDOperand RotateLeftBits =
+ DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
+ RotateLeftBytes, ShiftAmt);
+
+ return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+ RotateLeftBits);
+ }
}
return SDOperand();
@@ -2567,17 +2670,19 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND:
+ case ISD::ADD:
case ISD::SUB:
case ISD::ROTR:
case ISD::ROTL:
case ISD::SRL:
case ISD::SHL:
- case ISD::SRA:
+ case ISD::SRA: {
if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc);
else if (VT == MVT::i64)
return LowerI64Math(Op, DAG, Opc);
break;
+ }
// Vector-related lowering.
case ISD::BUILD_VECTOR:
@@ -2641,9 +2746,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case ISD::ADD: {
SDOperand Op1 = N->getOperand(1);
- if ((Op1.getOpcode() == ISD::Constant
- || Op1.getOpcode() == ISD::TargetConstant)
- && Op0.getOpcode() == SPUISD::IndirectAddr) {
+ if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
SDOperand Op01 = Op0.getOperand(1);
if (Op01.getOpcode() == ISD::Constant
|| Op01.getOpcode() == ISD::TargetConstant) {
@@ -2662,8 +2765,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
Op0.getOperand(0), combinedConst);
}
- } else if ((Op0.getOpcode() == ISD::Constant
- || Op0.getOpcode() == ISD::TargetConstant)
+ } else if (isa<ConstantSDNode>(Op0)
&& Op1.getOpcode() == SPUISD::IndirectAddr) {
SDOperand Op11 = Op1.getOperand(1);
if (Op11.getOpcode() == ISD::Constant
@@ -2899,11 +3001,11 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
case SPUISD::ROTBYTES_RIGHT_S:
case SPUISD::ROTBYTES_LEFT:
case SPUISD::ROTBYTES_LEFT_CHAINED:
- case FSMBI:
- case SELB:
- case FPInterp:
- case FPRecipEst:
- case SEXT32TO64:
+ case SPUISD::SELECT_MASK:
+ case SPUISD::SELB:
+ case SPUISD::FPInterp:
+ case SPUISD::FPRecipEst:
+ case SPUISD::SEXT32TO64:
#endif
}
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 3c73aa5..5632ee3 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -62,8 +62,13 @@ namespace llvm {
ROTBYTES_RIGHT_S, ///< Vector rotate right, by bytes, sign fill
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain
- FSMBI, ///< Form Select Mask for Bytes, Immediate
+ ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
+ SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
+ ADD_EXTENDED, ///< Add extended, with carry
+ CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
+ SUB_EXTENDED, ///< Subtract extended, with borrow
+ BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
FPInterp, ///< Floating point interpolate
FPRecipEst, ///< Floating point reciprocal estimate
SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 8e1933c..d00fe71 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -469,7 +469,7 @@ class FSMBIVec<ValueType vectype>:
RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
"fsmbi\t$rT, $val",
SelectOp,
- [(set (vectype VECREG:$rT), (SPUfsmbi (i16 immU16:$val)))]>;
+ [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
multiclass FormSelectMaskBytesImm
{
@@ -485,21 +485,37 @@ defm FSMBI : FormSelectMaskBytesImm;
def FSMB:
RRForm_1<0b01101101100, (outs VECREG:$rT), (ins R16C:$rA),
"fsmb\t$rT, $rA", SelectOp,
- [(set (v16i8 VECREG:$rT), (SPUfsmbi R16C:$rA))]>;
+ [(set (v16i8 VECREG:$rT), (SPUselmask R16C:$rA))]>;
// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
// only 8-bits wide (even though it's input as 16-bits here)
def FSMH:
RRForm_1<0b10101101100, (outs VECREG:$rT), (ins R16C:$rA),
"fsmh\t$rT, $rA", SelectOp,
- [(set (v8i16 VECREG:$rT), (SPUfsmbi R16C:$rA))]>;
+ [(set (v8i16 VECREG:$rT), (SPUselmask R16C:$rA))]>;
// fsm: Form select mask for words. Like the other fsm* instructions,
// only the lower 4 bits of $rA are significant.
-def FSM:
- RRForm_1<0b00101101100, (outs VECREG:$rT), (ins R16C:$rA),
- "fsm\t$rT, $rA", SelectOp,
- [(set (v4i32 VECREG:$rT), (SPUfsmbi R16C:$rA))]>;
+class FSMInst<ValueType vectype, RegisterClass rclass>:
+ RRForm_1<0b00101101100, (outs VECREG:$rT), (ins rclass:$rA),
+ "fsm\t$rT, $rA",
+ SelectOp,
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+multiclass FormSelectMaskWord {
+ def r32 : FSMInst<v4i32, R32C>;
+ def r16 : FSMInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+ def r32 : FSMInst<v2i64, R32C>;
+ def r16 : FSMInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
//===----------------------------------------------------------------------===//
// Integer and Logical Operations:
@@ -545,7 +561,7 @@ def Ar32:
def Ar8:
RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
"a\t$rT, $rA, $rB", IntegerOp,
- [(set R8C:$rT, (add R8C:$rA, R8C:$rB))]>;
+ [/* no pattern */]>;
def AIvec:
RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
@@ -600,42 +616,125 @@ def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
[(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
// ADDX: only available in vector form, doesn't match a pattern.
-def ADDXvec:
- RRForm<0b00000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
- VECREG:$rCarry),
- "addx\t$rT, $rA, $rB", IntegerOp,
- []>,
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000010110, OOL, IOL,
+ "addx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+ ADDXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [(set (vectype VECREG:$rT),
+ (SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB),
+ (vectype VECREG:$rCarry)))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
-// CG: only available in vector form, doesn't match a pattern.
-def CGvec:
- RRForm<0b01000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
- VECREG:$rCarry),
- "cg\t$rT, $rA, $rB", IntegerOp,
- []>,
+class ADDXRegInst<RegisterClass rclass>:
+ ADDXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [(set rclass:$rT,
+ (SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
-// SFX: only available in vector form, doesn't match a pattern
-def SFXvec:
- RRForm<0b10000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
- VECREG:$rCarry),
- "sfx\t$rT, $rA, $rB", IntegerOp,
- []>,
+multiclass AddExtended {
+ def v2i64 : ADDXVecInst<v2i64>;
+ def v4i32 : ADDXVecInst<v4i32>;
+ def r64 : ADDXRegInst<R64C>;
+ def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000011000, OOL, IOL,
+ "cg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+ CGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+class CGRegInst<RegisterClass rclass>:
+ CGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT,
+ (SPUcarry_gen rclass:$rA, rclass:$rB))]>;
+
+multiclass CarryGenerate {
+ def v2i64 : CGVecInst<v2i64>;
+ def v4i32 : CGVecInst<v4i32>;
+ def r64 : CGRegInst<R64C>;
+ def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010110, OOL, IOL,
+ "sfx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+ SFXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [(set (vectype VECREG:$rT),
+ (SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB),
+ (vectype VECREG:$rCarry)))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
-// BG: only available in vector form, doesn't match a pattern.
-def BGvec:
- RRForm<0b01000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
- VECREG:$rCarry),
- "bg\t$rT, $rA, $rB", IntegerOp,
- []>,
+class SFXRegInst<RegisterClass rclass>:
+ SFXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [(set rclass:$rT,
+ (SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
-// BGX: only available in vector form, doesn't match a pattern.
+multiclass SubtractExtended {
+ def v2i64 : SFXVecInst<v2i64>;
+ def v4i32 : SFXVecInst<v4i32>;
+ def r64 : SFXRegInst<R64C>;
+ def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000010000, OOL, IOL,
+ "bg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+ BGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+class BGRegInst<RegisterClass rclass>:
+ BGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT,
+ (SPUborrow_gen rclass:$rA, rclass:$rB))]>;
+
+multiclass BorrowGenerate {
+ def v4i32 : BGVecInst<v4i32>;
+ def v2i64 : BGVecInst<v2i64>;
+ def r64 : BGRegInst<R64C>;
+ def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
def BGXvec:
RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
VECREG:$rCarry),
@@ -817,17 +916,17 @@ def CLZr32:
def CNTBv16i8:
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
- [(set (v16i8 VECREG:$rT), (SPUcntb_v16i8 (v16i8 VECREG:$rA)))]>;
+ [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
def CNTBv8i16 :
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
- [(set (v8i16 VECREG:$rT), (SPUcntb_v8i16 (v8i16 VECREG:$rA)))]>;
+ [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
def CNTBv4i32 :
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
- [(set (v4i32 VECREG:$rT), (SPUcntb_v4i32 (v4i32 VECREG:$rA)))]>;
+ [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
// gbb: Gather all low order bits from each byte in $rA into a single 16-bit
// quantity stored into $rT
@@ -869,31 +968,38 @@ def SUMB:
[]>;
// Sign extension operations:
-def XSBHvec:
- RRForm_1<0b01101101010, (outs VECREG:$rDst), (ins VECREG:$rSrc),
- "xsbh\t$rDst, $rSrc", IntegerOp,
- [(set (v8i16 VECREG:$rDst), (sext (v16i8 VECREG:$rSrc)))]>;
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL,
+ "xsbh\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSBHVecInst<ValueType vectype>:
+ XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
+
+class XSBHRegInst<RegisterClass rclass>:
+ XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+ [(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>;
-// Ordinary form for XSBH
-def XSBHr16:
- RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R16C:$rSrc),
- "xsbh\t$rDst, $rSrc", IntegerOp,
- [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+multiclass ExtendByteHalfword {
+ def v16i8: XSBHVecInst<v8i16>;
+ def r16: XSBHRegInst<R16C>;
+ // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+ // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+ // pattern below). Intentionally doesn't match a pattern because we want the
+ // sext 8->32 pattern to do the work for us, namely because we need the extra
+ // XSHWr32.
+ def r32: XSBHRegInst<R32C>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign-extend, but take an 8-bit register to a 16-bit register (not done as
+// sext_inreg)
def XSBHr8:
- RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R8C:$rSrc),
- "xsbh\t$rDst, $rSrc", IntegerOp,
- [(set R16C:$rDst, (sext R8C:$rSrc))]>;
-
-// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
-// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
-// pattern below). Intentionally doesn't match a pattern because we want the
-// sext 8->32 pattern to do the work for us, namely because we need the extra
-// XSHWr32.
-def XSBHr32:
- RRForm_1<0b01101101010, (outs R32C:$rDst), (ins R32C:$rSrc),
- "xsbh\t$rDst, $rSrc", IntegerOp,
- [(set R32C:$rDst, (sext_inreg R32C:$rSrc, i8))]>;
+ XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
// Sign extend halfwords to words:
def XSHWvec:
@@ -1658,9 +1764,9 @@ class SHUFBVecInst<ValueType vectype>:
// It's this pattern that's probably the most useful, since SPUISelLowering
// methods create a v16i8 vector for $rC:
-class SHUFBVecPat1<ValueType vectype, SPUInstr inst>:
+class SHUFBVecPat1<ValueType vectype, ValueType masktype, SPUInstr inst>:
Pat<(SPUshuffle (vectype VECREG:$rA), (vectype VECREG:$rB),
- (v16i8 VECREG:$rC)),
+ (masktype VECREG:$rC)),
(inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
multiclass ShuffleBytes
@@ -1676,11 +1782,19 @@ multiclass ShuffleBytes
defm SHUFB : ShuffleBytes;
-def : SHUFBVecPat1<v8i16, SHUFBv16i8>;
-def : SHUFBVecPat1<v4i32, SHUFBv16i8>;
-def : SHUFBVecPat1<v2i64, SHUFBv16i8>;
-def : SHUFBVecPat1<v4f32, SHUFBv16i8>;
-def : SHUFBVecPat1<v2f64, SHUFBv16i8>;
+// Shuffle mask is a v16i8 vector
+def : SHUFBVecPat1<v8i16, v16i8, SHUFBv16i8>;
+def : SHUFBVecPat1<v4i32, v16i8, SHUFBv16i8>;
+def : SHUFBVecPat1<v2i64, v16i8, SHUFBv16i8>;
+def : SHUFBVecPat1<v4f32, v16i8, SHUFBv16i8>;
+def : SHUFBVecPat1<v2f64, v16i8, SHUFBv16i8>;
+
+// Shuffle mask is a v4i32 vector:
+def : SHUFBVecPat1<v8i16, v4i32, SHUFBv4i32>;
+def : SHUFBVecPat1<v4i32, v4i32, SHUFBv4i32>;
+def : SHUFBVecPat1<v2i64, v4i32, SHUFBv4i32>;
+def : SHUFBVecPat1<v4f32, v4i32, SHUFBv4i32>;
+def : SHUFBVecPat1<v2f64, v4i32, SHUFBv4i32>;
//===----------------------------------------------------------------------===//
// Shift and rotate group:
@@ -2079,10 +2193,24 @@ def : Pat<(SPUrotbytes_left_chained (v2i64 VECREG:$rA), (i16 uimm7:$val)),
(ROTQBYIv2i64 VECREG:$rA, uimm7:$val)>;
// See ROTQBY note above.
-def ROTQBYBIvec:
- RI7Form<0b00110011100, (outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
- "rotqbybi\t$rT, $rA, $val", RotateShift,
- [/* intrinsic */]>;
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00110011100, OOL, IOL,
+ "rotqbybi\t$rT, $rA, $shift",
+ RotateShift, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+ ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+ def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+ def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+ def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+ def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// See ROTQBY note above.
@@ -2358,7 +2486,6 @@ multiclass RotateQuadBytesImm
defm ROTQMBYI : RotateQuadBytesImm;
-
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate right and mask by bit count
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -2545,25 +2672,28 @@ def : Pat<(sra R32C:$rA, R8C:$rB),
(ROTMAr32 R32C:$rA,
(SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-def ROTMAIv4i32:
- RRForm<0b01011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- "rotmai\t$rT, $rA, $val", RotateShift,
- [(set (v4i32 VECREG:$rT),
- (SPUvec_sra VECREG:$rA, (i32 uimm7:$val)))]>;
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011110000, OOL, IOL,
+ "rotmai\t$rT, $rA, $val",
+ RotateShift, pattern>;
-def : Pat<(SPUvec_sra VECREG:$rA, (i16 uimm7:$val)),
- (ROTMAIv4i32 VECREG:$rA, uimm7:$val)>;
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
-def ROTMAIr32:
- RRForm<0b01011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
- "rotmai\t$rT, $rA, $val", RotateShift,
- [(set R32C:$rT, (sra R32C:$rA, (i32 uimm7:$val)))]>;
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+ [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
-def : Pat<(sra R32C:$rA, (i16 uimm7:$val)),
- (ROTMAIr32 R32C:$rA, uimm7:$val)>;
+multiclass RotateMaskAlgebraicImm {
+ def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+ def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+ def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+ def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
-def : Pat<(sra R32C:$rA, (i8 uimm7:$val)),
- (ROTMAIr32 R32C:$rA, uimm7:$val)>;
+defm ROTMAI : RotateMaskAlgebraicImm;
//===----------------------------------------------------------------------===//
// Branch and conditionals:
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 00d0f94..63b852f 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -36,29 +36,25 @@ def SDT_SPUshuffle : SDTypeProfile<1, 3, [
]>;
// Unary, binary v16i8 operator type constraints:
-def SPUv16i8_unop: SDTypeProfile<1, 1, [
- SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>]>;
-
def SPUv16i8_binop: SDTypeProfile<1, 2, [
SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
// Binary v8i16 operator type constraints:
-def SPUv8i16_unop: SDTypeProfile<1, 1, [
- SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>]>;
-
def SPUv8i16_binop: SDTypeProfile<1, 2, [
SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
// Binary v4i32 operator type constraints:
-def SPUv4i32_unop: SDTypeProfile<1, 1, [
- SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>]>;
-
def SPUv4i32_binop: SDTypeProfile<1, 2, [
SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
-// FSMBI type constraints: There are several variations for the various
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
// vector types (this avoids having to bit_convert all over the place.)
-def SPUfsmbi_type: SDTypeProfile<1, 1, [
+def SPUselmask_type: SDTypeProfile<1, 1, [
SDTCisInt<1>
]>;
@@ -74,10 +70,16 @@ def SPUvecshift_type: SDTypeProfile<1, 2, [
// Synthetic/pseudo-instructions
//===----------------------------------------------------------------------===//
+/// Add extended, carry generate:
+def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>;
+def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>;
+
+// Subtract extended, borrow generate
+def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>;
+def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>;
+
// SPU CNTB:
-def SPUcntb_v16i8: SDNode<"SPUISD::CNTB", SPUv16i8_unop, []>;
-def SPUcntb_v8i16: SDNode<"SPUISD::CNTB", SPUv8i16_unop, []>;
-def SPUcntb_v4i32: SDNode<"SPUISD::CNTB", SPUv4i32_unop, []>;
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
// SPUISelLowering.h):
@@ -122,14 +124,23 @@ def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
def SPUrotbytes_right_sfill: SDNode<"SPUISD::ROTBYTES_RIGHT_S",
SPUvecshift_type, []>;
+// Vector rotate left, bits shifted out of the left are rotated in on the right
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
SPUvecshift_type, []>;
+// Same as above, but the node also has a chain associated (used in loads and
+// stores)
def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED",
SPUvecshift_type, [SDNPHasChain]>;
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+ SPUvecshift_type>;
+
// SPU form select mask for bytes, immediate
-def SPUfsmbi: SDNode<"SPUISD::FSMBI", SPUfsmbi_type, []>;
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
// SPU select bits instruction
def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index aae79b5..d17faac 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -559,6 +559,10 @@ def rotNeg7imm_i16 : Operand<i16> {
let PrintMethod = "printROTNeg7Imm";
}
+def rotNeg7imm_i8 : Operand<i8> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
def target : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
}
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
index 7ef3d76..d933222 100644
--- a/test/CodeGen/CellSPU/immed64.ll
+++ b/test/CodeGen/CellSPU/immed64.ll
@@ -1,16 +1,16 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep lqa %t1.s | count 13
-; RUN: grep il %t1.s | count 22
-; RUN: grep shufb %t1.s | count 13
-; RUN: grep 65520 %t1.s | count 1
-; RUN: grep 43981 %t1.s | count 1
-; RUN: grep 13702 %t1.s | count 1
-; RUN: grep 81 %t1.s | count 2
-; RUN: grep 28225 %t1.s | count 1
-; RUN: grep 30720 %t1.s | count 1
-; RUN: grep 192 %t1.s | count 32
-; RUN: grep 128 %t1.s | count 30
-; RUN: grep 224 %t1.s | count 2
+; RUN: grep lqa %t1.s | count 13
+; RUN: grep il %t1.s | count 22
+; RUN: grep shufb %t1.s | count 13
+; RUN: grep 65520 %t1.s | count 1
+; RUN: grep 43981 %t1.s | count 1
+; RUN: grep 13702 %t1.s | count 1
+; RUN: grep 28225 %t1.s | count 1
+; RUN: grep 30720 %t1.s | count 1
+; RUN: grep 3233857728 %t1.s | count 8
+; RUN: grep 2155905152 %t1.s | count 6
+; RUN: grep 66051 %t1.s | count 7
+; RUN: grep 471670303 %t1.s | count 11
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"