aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td143
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp112
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp172
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h25
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td40
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td2
-rw-r--r--lib/Target/CellSPU/SPUNodes.td24
-rw-r--r--test/CodeGen/CellSPU/i64ops.ll17
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i64operations.c32
9 files changed, 373 insertions, 194 deletions
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 74c0eca..cb8b48b 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -2,7 +2,6 @@
//
// Cell SPU 64-bit operations
//
-// Primary author: Scott Michel (scottm@aero.org)
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -240,3 +239,145 @@ def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
// i64 setult:
def : I64SETCCNegCond<setlt, I64GEr64>;
def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+ CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+ CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+ v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_add<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+ CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_sub<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ v2i64_sub_bg<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+ CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+ CodeFrag<(Av4i32
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
+ v2i64_mul_ahi64<rA>.Fragment),
+ (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
+ v2i64_mul_bshlq4<rB>.Fragment)),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+ v2i64_mul_ashlq4<rA>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment),
+ (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+ CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+ v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+ v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+ v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+ (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+ (ILv4i32 0),
+ (FSMBIv4i32 0x0f0f)),
+ rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 76b2284..1f00bac 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -18,11 +18,13 @@
#include "SPUHazardRecognizers.h"
#include "SPUFrameInfo.h"
#include "SPURegisterNames.h"
+#include "SPUTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Constants.h"
@@ -254,6 +256,26 @@ public:
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
}
+ SDNode *emitBuildVector(SDValue build_vec) {
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(build_vec.getOperand(i));
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG,
+ SPUtli.getSPUTargetMachine());
+ return SelectCode(CurDAG->getLoad(build_vec.getValueType(),
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment));
+ }
+
/// Select - Convert the specified operand from a target-independent to a
/// target-specific node if it hasn't already been changed.
SDNode *Select(SDValue Op);
@@ -647,22 +669,82 @@ SPUDAGToDAGISel::Select(SDValue Op) {
TFI, Imm0), 0);
n_ops = 2;
}
- } else if (Opc == ISD::ZERO_EXTEND) {
- // (zero_extend:i16 (and:i8 <arg>, <const>))
- const SDValue &Op1 = N->getOperand(0);
-
- if (Op.getValueType() == MVT::i16 && Op1.getValueType() == MVT::i8) {
- if (Op1.getOpcode() == ISD::AND) {
- // Fold this into a single ANDHI. This is often seen in expansions of i1
- // to i8, then i8 to i16 in logical/branching operations.
- DEBUG(cerr << "CellSPU: Coalescing (zero_extend:i16 (and:i8 "
- "<arg>, <const>))\n");
- NewOpc = SPU::ANDHIi8i16;
- Ops[0] = Op1.getOperand(0);
- Ops[1] = Op1.getOperand(1);
- n_ops = 2;
- }
+ } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+ && OpVT == MVT::i64) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+ MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
+ MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue shufMask;
+
+ switch (Op0VT.getSimpleVT()) {
+ default:
+ cerr << "CellSPU Select: Unhandled zero/any extend MVT\n";
+ abort();
+ /*NOTREACHED*/
+ break;
+ case MVT::i32:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x00010203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80808003, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x8080800b, MVT::i32));
+ break;
}
+
+ SDNode *shufMaskLoad = emitBuildVector(shufMask);
+ SDNode *PromoteScalar =
+ SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0));
+
+ SDValue zextShuffle =
+ CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
+ SDValue(PromoteScalar, 0),
+ SDValue(PromoteScalar, 0),
+ SDValue(shufMaskLoad, 0));
+
+ // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
+ // re-use it in the VEC2PREFSLOT selection without needing to explicitly
+ // call SelectCode (it's already done for us.)
+ SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, OpVecVT, zextShuffle));
+ return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, OpVT,
+ zextShuffle));
+ } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
+
+ return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
+
+ return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
+
+ return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
} else if (Opc == ISD::SHL) {
if (OpVT == MVT::i64) {
return SelectSHLi64(Op, OpVT);
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 9dd9855..92bd928 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -78,6 +78,7 @@ namespace {
return retval;
}
+
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@@ -208,13 +209,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
setOperationAction(ISD::MUL, MVT::i32, Legal);
- setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i8, Custom);
- setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::SUB, MVT::i8, Custom);
- setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
// SPU does not have BSWAP. It does have i32 support CTLZ.
// CTPOP has to be custom lowered.
@@ -243,11 +244,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i32, Legal);
setOperationAction(ISD::SETCC, MVT::i64, Legal);
- // Zero extension and sign extension for i64 have to be
- // custom legalized
- setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
-
// Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
@@ -416,10 +412,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
- node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
- node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
- node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
- node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
+ node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+ node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+ node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
}
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
@@ -778,8 +773,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
return SDValue();
}
-/// Generate the address of a constant pool entry.
-static SDValue
+//! Generate the address of a constant pool entry.
+SDValue
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
MVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -805,6 +800,12 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
return SDValue();
}
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
static SDValue
LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
MVT PtrVT = Op.getValueType();
@@ -2185,123 +2186,34 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
return SDValue();
}
-static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
-{
- MVT VT = Op.getValueType();
- MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+//! Generate the carry-generate shuffle mask.
+SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
+SmallVector<SDValue, 16> ShufBytes;
- SDValue Op0 = Op.getOperand(0);
+// Create the shuffle mask for "rotating" the borrow up one register slot
+// once the borrow is generated.
+ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- switch (Opc) {
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND: {
- MVT Op0VT = Op0.getValueType();
- MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
-
- SDValue PromoteScalar =
- DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
-
- // Use a shuffle to zero extend the i32 to i64 directly:
- SDValue shufMask;
-
- switch (Op0VT.getSimpleVT()) {
- default:
- cerr << "CellSPU LowerI64Math: Unhandled zero/any extend MVT\n";
- abort();
- /*NOTREACHED*/
- break;
- case MVT::i32:
- shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x00010203, MVT::i32),
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x08090a0b, MVT::i32));
- break;
-
- case MVT::i16:
- shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x80800203, MVT::i32),
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x80800a0b, MVT::i32));
- break;
-
- case MVT::i8:
- shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x80808003, MVT::i32),
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x8080800b, MVT::i32));
- break;
- }
-
- SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
- PromoteScalar, PromoteScalar, shufMask);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
- }
+return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+}
- case ISD::ADD: {
- // Turn operands into vectors to satisfy type checking (shufb works on
- // vectors)
- SDValue Op0 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
- SDValue Op1 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
- SmallVector<SDValue, 16> ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-
- SDValue CarryGen =
- DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
- SDValue ShiftedCarry =
- DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
- CarryGen, CarryGen,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
- DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
- Op0, Op1, ShiftedCarry));
- }
+//! Generate the borrow-generate shuffle mask
+SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
+SmallVector<SDValue, 16> ShufBytes;
- case ISD::SUB: {
- // Turn operands into vectors to satisfy type checking (shufb works on
- // vectors)
- SDValue Op0 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
- SDValue Op1 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
- SmallVector<SDValue, 16> ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
- SDValue BorrowGen =
- DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
- SDValue ShiftedBorrow =
- DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
- BorrowGen, BorrowGen,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
- DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
- Op0, Op1, ShiftedBorrow));
- }
- }
+// Create the shuffle mask for "rotating" the borrow up one register slot
+// once the borrow is generated.
+ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- return SDValue();
+return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
}
//! Lower byte immediate operations for v16i8 vectors:
@@ -2576,11 +2488,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::RET:
return LowerRET(Op, DAG, getTargetMachine());
-
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- return LowerI64Math(Op, DAG, Opc);
-
// i8, i64 math ops:
case ISD::ADD:
case ISD::SUB:
@@ -2591,8 +2498,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::SRA: {
if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc, *this);
- else if (VT == MVT::i64)
- return LowerI64Math(Op, DAG, Opc);
break;
}
@@ -2831,6 +2736,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
break;
}
}
+
// Otherwise, return unchanged.
#ifndef NDEBUG
if (Result.getNode()) {
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 24b8f82..a98a8f6 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -52,10 +52,11 @@ namespace llvm {
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
- ADD_EXTENDED, ///< Add extended, with carry
- CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
- SUB_EXTENDED, ///< Subtract extended, with borrow
- BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
+ // Markers: These aren't used to generate target-dependent nodes, but
+ // are used during instruction selection.
+ ADD64_MARKER, ///< i64 addition marker
+ SUB64_MARKER, ///< i64 subtraction marker
+ MUL64_MARKER, ///< i64 multiply marker
LAST_SPUISD ///< Last user-defined instruction
};
}
@@ -74,6 +75,12 @@ namespace llvm {
MVT ValueType);
SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetMachine &TM);
+
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG);
}
class SPUTargetMachine; // forward dec'l.
@@ -86,8 +93,18 @@ namespace llvm {
SPUTargetMachine &SPUTM;
public:
+ //! The venerable constructor
+ /*!
+ This is where the CellSPU backend sets operation handling (i.e., legal,
+ custom, expand or promote.)
+ */
SPUTargetLowering(SPUTargetMachine &TM);
+ //! Get the target machine
+ SPUTargetMachine &getSPUTargetMachine() {
+ return SPUTM;
+ }
+
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index b995640..b639ec2 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -705,17 +705,14 @@ class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
class ADDXVecInst<ValueType vectype>:
ADDXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [(set (vectype VECREG:$rT),
- (SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB),
- (vectype VECREG:$rCarry)))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class ADDXRegInst<RegisterClass rclass>:
ADDXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [(set rclass:$rT,
- (SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
@@ -737,14 +734,12 @@ class CGInst<dag OOL, dag IOL, list<dag> pattern>:
class CGVecInst<ValueType vectype>:
CGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+ [/* no pattern */]>;
class CGRegInst<RegisterClass rclass>:
CGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT,
- (SPUcarry_gen rclass:$rA, rclass:$rB))]>;
+ [/* no pattern */]>;
multiclass CarryGenerate {
def v2i64 : CGVecInst<v2i64>;
@@ -765,17 +760,14 @@ class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
class SFXVecInst<ValueType vectype>:
SFXInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [(set (vectype VECREG:$rT),
- (SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB),
- (vectype VECREG:$rCarry)))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
class SFXRegInst<RegisterClass rclass>:
SFXInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [(set rclass:$rT,
- (SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
+ [/* no pattern */]>,
RegConstraint<"$rCarry = $rT">,
NoEncode<"$rCarry">;
@@ -797,14 +789,12 @@ class BGInst<dag OOL, dag IOL, list<dag> pattern>:
class BGVecInst<ValueType vectype>:
BGInst<(outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+ [/* no pattern */]>;
class BGRegInst<RegisterClass rclass>:
BGInst<(outs rclass:$rT),
(ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT,
- (SPUborrow_gen rclass:$rA, rclass:$rB))]>;
+ [/* no pattern */]>;
multiclass BorrowGenerate {
def v4i32 : BGVecInst<v4i32>;
@@ -894,7 +884,7 @@ class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
pattern>;
-def MPYAvec:
+def MPYAv4i32:
MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
[(set (v4i32 VECREG:$rT),
(add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
@@ -939,7 +929,7 @@ class MPYSInst<dag OOL, dag IOL>:
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
[/* no pattern */]>;
-def MPYSvec:
+def MPYSv4i32:
MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYSr16:
@@ -972,14 +962,20 @@ def MPYHHAvec:
def MPYHHAr32:
MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-// mpyhhu: Multiply high-high, unsigned
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+ +-------+-------+ +---------+
+// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
+// +-------+-------+ +-------+-------+ +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
class MPYHHUInst<dag OOL, dag IOL>:
RRForm<0b01110011110, OOL, IOL,
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
[/* no pattern */]>;
-def MPYHHUvec:
+def MPYHHUv4i32:
MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYHHUr32:
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
index 38279a0..64548fd 100644
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -8,8 +8,6 @@
//
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
// purely and completely coincidental.
-//
-// Primary author: Scott Michel (scottm@aero.org)
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index cae6023..87c4115 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -61,18 +61,20 @@ def SPUselb_type: SDTypeProfile<1, 3, [
def SPUvecshift_type: SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
//===----------------------------------------------------------------------===//
// Synthetic/pseudo-instructions
//===----------------------------------------------------------------------===//
-/// Add extended, carry generate:
-def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>;
-def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>;
-
-// Subtract extended, borrow generate
-def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>;
-def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>;
-
// SPU CNTB:
def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
@@ -127,6 +129,12 @@ def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
//===----------------------------------------------------------------------===//
// Constraints: (taken from PPCInstrInfo.td)
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
index d118c5f..dd67827 100644
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ b/test/CodeGen/CellSPU/i64ops.ll
@@ -2,9 +2,15 @@
; RUN: grep xswd %t1.s | count 3
; RUN: grep xsbh %t1.s | count 1
; RUN: grep xshw %t1.s | count 2
-; RUN: grep shufb %t1.s | count 4
-; RUN: grep cg %t1.s | count 1
-; RUN: grep addx %t1.s | count 1
+; RUN: grep shufb %t1.s | count 7
+; RUN: grep cg %t1.s | count 4
+; RUN: grep addx %t1.s | count 4
+; RUN: grep fsmbi %t1.s | count 3
+; RUN: grep il %t1.s | count 2
+; RUN: grep mpy %t1.s | count 10
+; RUN: grep mpyh %t1.s | count 6
+; RUN: grep mpyhhu %t1.s | count 2
+; RUN: grep mpyu %t1.s | count 4
; ModuleID = 'stores.bc'
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -44,3 +50,8 @@ define i64 @add_i64(i64 %a, i64 %b) nounwind {
%1 = add i64 %a, %b
ret i64 %1
}
+
+define i64 @mul_i64(i64 %a, i64 %b) nounwind {
+ %1 = mul i64 %a, %b
+ ret i64 %1
+}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
index 7a4bf1a..b613bd8 100644
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
+++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
@@ -7,6 +7,7 @@ int64_t tval_c = 1234567890001LL;
int64_t tval_d = 10001LL;
int64_t tval_e = 10000LL;
uint64_t tval_f = 0xffffff0750135eb9;
+int64_t tval_g = -1;
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
@@ -546,6 +547,12 @@ test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), in
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+int64_t i64_mul(int64_t a, int64_t b) {
+ return a * b;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
int
main(void)
{
@@ -553,12 +560,13 @@ main(void)
const char *something_failed = " %d tests failed.\n";
const char *all_tests_passed = " All tests passed.\n";
- printf("tval_a = %20lld (0x%020llx)\n", tval_a, tval_a);
- printf("tval_b = %20lld (0x%020llx)\n", tval_b, tval_b);
- printf("tval_c = %20lld (0x%020llx)\n", tval_c, tval_c);
- printf("tval_d = %20lld (0x%020llx)\n", tval_d, tval_d);
- printf("tval_e = %20lld (0x%020llx)\n", tval_e, tval_e);
- printf("tval_f = %20llu (0x%020llx)\n", tval_f, tval_f);
+ printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
+ printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
+ printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
+ printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
+ printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
+ printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
+ printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
printf("----------------------------------------\n");
for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
@@ -649,5 +657,17 @@ main(void)
printf("----------------------------------------\n");
+ int64_t result;
+
+ result = i64_mul(tval_g, tval_g);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
+ result = i64_mul(tval_d, tval_e);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
+ /* 0xba7a664f13077c9 */
+ result = i64_mul(tval_a, tval_b);
+ printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
+
+ printf("----------------------------------------\n");
+
return 0;
}