aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp101
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td199
-rw-r--r--lib/Target/CellSPU/SPUNodes.td30
3 files changed, 174 insertions, 156 deletions
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index e975d0d..0822181 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -124,6 +124,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+ // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
MVT StoreVT = (MVT::SimpleValueType) stype;
setTruncStoreAction(VT, StoreVT, Expand);
@@ -207,7 +211,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
- setOperationAction(ISD::MUL, MVT::i32, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
// Need to custom handle (some) common i8, i64 math ops
@@ -239,8 +243,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
// Zero extension and sign extension for i64 have to be
// custom legalized
@@ -289,9 +293,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
- setOperationAction(ISD::GlobalAddress, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
}
// RET must be custom lowered, to meet ABI requirements
@@ -362,12 +366,15 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
- setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::AND, MVT::v16i8, Custom);
setOperationAction(ISD::OR, MVT::v16i8, Custom);
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ // FIXME: This is only temporary until I put all vector multiplications in
+ // SPUInstrInfo.td:
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+
setShiftAmountType(MVT::i32);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -402,7 +409,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
@@ -467,9 +474,9 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
emitted, e.g. for MVT::f32 extending load to MVT::f64:
\verbatim
-%1 v16i8,ch = load
+%1 v16i8,ch = load
%2 v16i8,ch = rotate %1
-%3 v4f8, ch = bitconvert %2
+%3 v4f8, ch = bitconvert %2
%4 f32 = vec2perfslot %3
%5 f64 = fp_extend %4
\endverbatim
@@ -902,7 +909,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
assert((FP != 0) &&
"LowerConstantFP: Node is not ConstantFPSDNode");
-
+
uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
SDValue T = DAG.getConstant(dbits, MVT::i64);
SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
@@ -936,7 +943,7 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
return DAG.getNode(ISD::BRCOND, Op.getValueType(),
Op.getOperand(0), Cond, Op.getOperand(2));
}
-
+
return SDValue(); // Unchanged
}
@@ -1197,9 +1204,18 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
// address pairs:
Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
}
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
- else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ MVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+
+ if (!ST->usingLargeMem()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
+ }
+ } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
// If this is an absolute destination address that appears to be a legal
// local store address, use the munged value.
Callee = SDValue(Dest, 0);
@@ -1831,7 +1847,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
} else if (rotate) {
int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+
return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
V1, DAG.getConstant(rotamt, MVT::i16));
} else {
@@ -1915,17 +1931,8 @@ static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
abort();
/*NOTREACHED*/
- case MVT::v4i32: {
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
- SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
- SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
- SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
- SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
-
- return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
- break;
- }
+ case MVT::v4i32:
+ break;
// Multiply two v8i16 vectors (pipeline friendly version):
// a) multiply lower halves, mask off upper 16-bit of 32-bit product
@@ -2271,7 +2278,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue result =
DAG.getNode(SPUISD::SHUFB, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
- VecOp,
+ VecOp,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
return result;
@@ -2630,32 +2637,6 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
return Op;
}
-//! Lower i32 multiplication
-static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
- unsigned Opc) {
- switch (VT.getSimpleVT()) {
- default:
- cerr << "CellSPU: Unknown LowerMUL value type, got "
- << Op.getValueType().getMVTString()
- << "\n";
- abort();
- /*NOTREACHED*/
-
- case MVT::i32: {
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
-
- return DAG.getNode(ISD::ADD, MVT::i32,
- DAG.getNode(ISD::ADD, MVT::i32,
- DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
- DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
- DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
- }
- }
-
- return SDValue();
-}
-
//! Custom lowering for CTPOP (count population)
/*!
Custom lowering code that counts the number ones in the input
@@ -2951,8 +2932,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
return LowerVectorMUL(Op, DAG);
else if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc, *this);
- else
- return LowerMUL(Op, DAG, VT, Opc);
case ISD::FDIV:
if (VT == MVT::f32 || VT == MVT::v4f32)
@@ -3030,7 +3009,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|| Op1.getOpcode() == SPUISD::IndirectAddr) {
// Normalize the operands to reduce repeated code
SDValue IndirectArg = Op0, AddArg = Op1;
-
+
if (Op1.getOpcode() == SPUISD::IndirectAddr) {
IndirectArg = Op1;
AddArg = Op0;
@@ -3160,9 +3139,9 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND: {
- // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+ // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
// <arg>
- // but only if the SPUpromote_scalar and <arg> types match.
+ // but only if the SPUprefslot2vec and <arg> types match.
SDValue Op00 = Op0.getOperand(0);
if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
SDValue Op000 = Op00.getOperand(0);
@@ -3173,7 +3152,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
break;
}
case SPUISD::VEC2PREFSLOT: {
- // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
+ // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
// <arg>
Result = Op0.getOperand(0);
break;
@@ -3329,7 +3308,7 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
}
}
}
-
+
// LowerAsmOperandForConstraint
void
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 08d7676..1abbc0a 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -585,23 +585,29 @@ def AHIr16:
"ahi\t$rT, $rA, $val", IntegerOp,
[(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>;
-def Avec:
- RRForm<0b00000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "a\t$rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def : Pat<(add (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
- (Avec VECREG:$rA, VECREG:$rB)>;
-
-def Ar32:
- RRForm<0b00000011000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "a\t$rT, $rA, $rB", IntegerOp,
- [(set R32C:$rT, (add R32C:$rA, R32C:$rB))]>;
-
-def Ar8:
- RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
- "a\t$rT, $rA, $rB", IntegerOp,
- [/* no pattern */]>;
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000011000, OOL, IOL,
+ "a\t$rT, $rA, $rB", IntegerOp,
+ pattern>;
+
+class AVecInst<ValueType vectype>:
+ AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+ AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+
+multiclass AddInstruction {
+ def v4i32: AVecInst<v4i32>;
+ def v16i8: AVecInst<v16i8>;
+
+ def r32: ARegInst<R32C>;
+ def r8: AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;
+}
+
+defm A : AddInstruction;
def AIvec:
RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
@@ -789,96 +795,109 @@ def BGXvec:
def MPYv8i16:
RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
- [(set (v8i16 VECREG:$rT), (SPUmpy_v8i16 (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
+ [(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
def MPYr16:
RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
[(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00110011110, OOL, IOL,
+ "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
def MPYUv4i32:
- RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT),
- (SPUmpyu_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def MPYUr16:
- RRForm<0b00110011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
- "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
- [(set R32C:$rT, (mul (zext R16C:$rA),
- (zext R16C:$rB)))]>;
+ MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
def MPYUr32:
- RRForm<0b00110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
- [(set R32C:$rT, (SPUmpyu_i32 R32C:$rA, R32C:$rB))]>;
+ MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>;
-// mpyi: multiply 16 x s10imm -> 32 result (custom lowering for 32 bit result,
-// this only produces the lower 16 bits)
-def MPYIvec:
- RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00101110, OOL, IOL,
"mpyi\t$rT, $rA, $val", IntegerMulDiv,
- [(set (v8i16 VECREG:$rT), (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+ pattern>;
+
+def MPYIvec:
+ MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
def MPYIr16:
- RI10Form<0b00101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "mpyi\t$rT, $rA, $val", IntegerMulDiv,
- [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+ MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
// mpyui: same issues as other multiplies, plus, this doesn't match a
// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10101110, OOL, IOL,
+ "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
def MPYUIvec:
- RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "mpyui\t$rT, $rA, $val", IntegerMulDiv,
- []>;
+ MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ []>;
def MPYUIr16:
- RI10Form<0b10101110, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "mpyui\t$rT, $rA, $val", IntegerMulDiv,
- []>;
+ MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ []>;
// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b0011, OOL, IOL,
+ "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+ pattern>;
+
def MPYAvec:
- RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))),
- (v4i32 VECREG:$rC)))]>;
+ MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4i32 VECREG:$rT),
+ (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))),
+ (v4i32 VECREG:$rC)))]>;
def MPYAr32:
- RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
- "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
- [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
- R32C:$rC))]>;
-
-def : Pat<(add (mul (sext R16C:$rA), (sext R16C:$rB)), R32C:$rC),
- (MPYAr32 R16C:$rA, R16C:$rB, R32C:$rC)>;
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sext:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+ R32C:$rC))]>;
def MPYAr32_sextinreg:
- RRRForm<0b0011, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
- "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
- [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
- (sext_inreg R32C:$rB, i16)),
- R32C:$rC))]>;
-
-//def MPYAr32:
-// RRRForm<0b0011, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
-// "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
-// [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
-// R32C:$rC))]>;
+ MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+ (sext_inreg R32C:$rB, i16)),
+ R32C:$rC))]>;
// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10100011110, OOL, IOL,
+ "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
def MPYHv4i32:
- RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT),
- (SPUmpyh_v4i32 (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def MPYHr32:
- RRForm<0b10100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
- [(set R32C:$rT, (SPUmpyh_i32 R32C:$rA, R32C:$rB))]>;
+ MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>;
// mpys: multiply high and shift right (returns the top half of
// a 16-bit multiply, sign extended to 32 bits.)
@@ -898,7 +917,7 @@ def MPYHHv8i16:
RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
[(set (v8i16 VECREG:$rT),
- (SPUmpyhh_v8i16 (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+ (SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
def MPYHHr32:
RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
@@ -938,7 +957,26 @@ def MPYHHAUr32:
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
[]>;
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+def MPYv4i32:
+ Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (Av4i32
+ (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
+ (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
+ (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
+
+def MPYi32:
+ Pat<(mul R32C:$rA, R32C:$rB),
+ (Ar32
+ (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+ (MPYHr32 R32C:$rB, R32C:$rA)),
+ (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
IntegerOp, pattern>;
@@ -1803,8 +1841,8 @@ class SELBVecCondInst<ValueType vectype>:
class SELBRegInst<RegisterClass rclass>:
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
[(set rclass:$rT,
- (or (and rclass:$rA, rclass:$rC),
- (and rclass:$rB, (not rclass:$rC))))]>;
+ (or (and rclass:$rB, rclass:$rC),
+ (and rclass:$rA, (not rclass:$rC))))]>;
class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
@@ -3442,6 +3480,13 @@ let isCall = 1,
BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
}
+// Support calls to external symbols:
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+ (BRSL texternalsym:$func)>;
+
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+ (BRASL texternalsym:$func)>;
+
// Unconditional branches:
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
def BR :
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index b22c6b5..5cf229e 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -35,17 +35,12 @@ def SDT_SPUshuffle : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>;
-// Unary, binary v16i8 operator type constraints:
-def SPUv16i8_binop: SDTypeProfile<1, 2, [
- SDTCisVT<0, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
-// Binary v8i16 operator type constraints:
-def SPUv8i16_binop: SDTypeProfile<1, 2, [
- SDTCisVT<0, v8i16>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
-
-// Binary v4i32 operator type constraints:
-def SPUv4i32_binop: SDTypeProfile<1, 2, [
- SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
+def SPUVecBinop: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
// Trinary operators, e.g., addx, carry generate
def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
@@ -93,23 +88,22 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
// SPU 16-bit multiply
-def SPUmpy_v16i8: SDNode<"SPUISD::MPY", SPUv16i8_binop, []>;
-def SPUmpy_v8i16: SDNode<"SPUISD::MPY", SPUv8i16_binop, []>;
-def SPUmpy_v4i32: SDNode<"SPUISD::MPY", SPUv4i32_binop, []>;
+def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>;
// SPU multiply unsigned, used in instruction lowering for v4i32
// multiplies:
-def SPUmpyu_v4i32: SDNode<"SPUISD::MPYU", SPUv4i32_binop, []>;
-def SPUmpyu_i32: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>;
+def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>;
+def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>;
// SPU 16-bit multiply high x low, shift result 16-bits
// Used to compute intermediate products for 32-bit multiplies
-def SPUmpyh_v4i32: SDNode<"SPUISD::MPYH", SPUv4i32_binop, []>;
-def SPUmpyh_i32: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>;
+def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>;
+def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>;
// SPU 16-bit multiply high x high, 32-bit product
// Used to compute intermediate products for 16-bit multiplies
-def SPUmpyhh_v8i16: SDNode<"SPUISD::MPYHH", SPUv8i16_binop, []>;
+def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>;
+def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>;
// Shift left quadword by bits and bytes
def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;