aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorScott Michel <scottm@aero.org>2008-12-30 23:28:25 +0000
committerScott Michel <scottm@aero.org>2008-12-30 23:28:25 +0000
commit02d711b93e3e0d2f0dae278360abe35305913e23 (patch)
tree8e85a3e48020ea52de566e67942de5b319c180fd
parent998dee96d3ca506cf73a617c0b7fc7f0e467a127 (diff)
downloadexternal_llvm-02d711b93e3e0d2f0dae278360abe35305913e23.zip
external_llvm-02d711b93e3e0d2f0dae278360abe35305913e23.tar.gz
external_llvm-02d711b93e3e0d2f0dae278360abe35305913e23.tar.bz2
- Start moving target-dependent nodes that could be represented by an
instruction sequence and cannot ordinarily be simplified by DAGcombine into the various target description files or SPUDAGToDAGISel.cpp. This makes some 64-bit operations legal. - Eliminate target-dependent ISD enums. - Update tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61508 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/CellSPU/SPU.td7
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td44
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp282
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp387
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h24
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp29
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td278
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td99
-rw-r--r--lib/Target/CellSPU/SPUNodes.td29
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp43
-rw-r--r--test/CodeGen/CellSPU/fdiv.ll6
-rw-r--r--test/CodeGen/CellSPU/i64ops.ll5
-rw-r--r--test/CodeGen/CellSPU/mul_ops.ll2
-rw-r--r--test/CodeGen/CellSPU/shift_ops.ll77
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i64operations.c54
15 files changed, 734 insertions, 632 deletions
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
index a5db1d9..8327fe0 100644
--- a/lib/Target/CellSPU/SPU.td
+++ b/lib/Target/CellSPU/SPU.td
@@ -15,6 +15,13 @@
//
include "llvm/Target/Target.td"
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 6d679ba..4159133 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -1,8 +1,17 @@
+//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//
+// Cell SPU 64-bit operations
+//
+// Primary author: Scott Michel (scottm@aero.org)
+//===----------------------------------------------------------------------===//
+
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// 64-bit comparisons:
//
// 1. The instruction sequences for vector vice scalar differ by a
-// constant.
+// constant. In the scalar case, we're only interested in the
+// top two 32-bit slots, whereas we're interested in an exact
+// all-four-slot match in the vector case.
//
// 2. There are no "immediate" forms, since loading 64-bit constants
// could be a constant pool load.
@@ -10,10 +19,10 @@
// 3. i64 setcc results are i32, which are subsequently converted to a FSM
// mask when used in a select pattern.
//
-// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask
-// (TODO)
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+// [Note: this may be moot, since gb produces v4i32 or r32.]
//
-// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// selb instruction definition for i64. Note that the selection mask is
@@ -22,17 +31,15 @@ def SELBr64_cond:
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
[/* no pattern */]>;
-class CodeFrag<dag frag> {
- dag Fragment = frag;
-}
-
-class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>:
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
- (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>;
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
-class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>:
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
Pat<(cond R64C:$rA, R64C:$rB),
- (XORIr32 cmpare.Fragment, -1)>;
+ (XORIr32 compare.Fragment, -1)>;
// The i64 seteq fragment that does the scalar->vector conversion and
// comparison:
@@ -64,14 +71,13 @@ multiclass CompareEqual64 {
defm I64EQ: CompareEqual64;
def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
-def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
- I64EQv2i64.Fragment>;
-
-def I64Select:
- Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
- (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
+def : Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
+ (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
+// i64 setne:
def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
-def : I64SELECTNegCond<setne, I64EQr64>; \ No newline at end of file
+// i64 setugt:
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index f51aba2..76b2284 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -149,7 +149,7 @@ namespace {
}
bool
- isHighLow(const SDValue &Op)
+ isHighLow(const SDValue &Op)
{
return (Op.getOpcode() == SPUISD::IndirectAddr
&& ((Op.getOperand(0).getOpcode() == SPUISD::Hi
@@ -229,14 +229,14 @@ public:
TM(tm),
SPUtli(*tm.getTargetLowering())
{}
-
+
virtual bool runOnFunction(Function &Fn) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
SelectionDAGISel::runOnFunction(Fn);
return true;
}
-
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(uint32_t Imm) {
@@ -248,7 +248,7 @@ public:
inline SDValue getI64Imm(uint64_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i64);
}
-
+
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
@@ -258,6 +258,15 @@ public:
/// target-specific node if it hasn't already been changed.
SDNode *Select(SDValue Op);
+ //! Emit the instruction sequence for i64 shl
+ SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 srl
+ SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 sra
+ SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
+
//! Returns true if the address N is an A-form (local store) address
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
@@ -287,7 +296,7 @@ public:
switch (ConstraintCode) {
default: return true;
case 'm': // memory
- if (!SelectDFormAddr(Op, Op, Op0, Op1)
+ if (!SelectDFormAddr(Op, Op, Op0, Op1)
&& !SelectAFormAddr(Op, Op, Op0, Op1))
SelectXFormAddr(Op, Op, Op0, Op1);
break;
@@ -306,7 +315,7 @@ public:
#endif
break;
}
-
+
OutOps.push_back(Op0);
OutOps.push_back(Op1);
return false;
@@ -318,14 +327,14 @@ public:
virtual const char *getPassName() const {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
- }
-
+ }
+
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
virtual HazardRecognizer *CreateTargetHazardRecognizer() {
const TargetInstrInfo *II = TM.getInstrInfo();
assert(II && "No InstrInfo?");
- return new SPUHazardRecognizer(*II);
+ return new SPUHazardRecognizer(*II);
}
// Include the pieces autogenerated from the target description.
@@ -375,7 +384,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
abort();
/*NOTREACHED*/
- case SPUISD::AFormAddr:
+ case SPUISD::AFormAddr:
// Just load from memory if there's only a single use of the location,
// otherwise, this will get handled below with D-form offset addresses
if (N.hasOneUse()) {
@@ -404,7 +413,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
return false;
}
-bool
+bool
SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
SDValue &Base) {
const int minDForm2Offset = -(1 << 7);
@@ -527,7 +536,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
offset = int32_t(CN->getSExtValue());
idxOp = Op1;
- }
+ }
if (offset >= minOffset && offset <= maxOffset) {
Base = CurDAG->getTargetConstant(offset, PtrTy);
@@ -622,27 +631,20 @@ SPUDAGToDAGISel::Select(SDValue Op) {
if (N->isMachineOpcode()) {
return NULL; // Already selected.
} else if (Opc == ISD::FrameIndex) {
- // Selects to (add $sp, FI * stackSlotSize)
- int FI =
- SPUFrameInfo::FItoStackOffset(cast<FrameIndexSDNode>(N)->getIndex());
- MVT PtrVT = SPUtli.getPointerTy();
-
- // Adjust stack slot to actual offset in frame:
- if (isS10Constant(FI)) {
- DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with AIr32 $sp, "
- << FI
- << "\n");
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
+ SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
+
+ if (FI < 128) {
NewOpc = SPU::AIr32;
- Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT);
- Ops[1] = CurDAG->getTargetConstant(FI, PtrVT);
+ Ops[0] = TFI;
+ Ops[1] = Imm0;
n_ops = 2;
} else {
- DEBUG(cerr << "SPUDAGToDAGISel: Replacing FrameIndex with Ar32 $sp, "
- << FI
- << "\n");
NewOpc = SPU::Ar32;
- Ops[0] = CurDAG->getRegister(SPU::R1, PtrVT);
- Ops[1] = CurDAG->getConstant(FI, PtrVT);
+ Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType());
+ Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, Op.getValueType(),
+ TFI, Imm0), 0);
n_ops = 2;
}
} else if (Opc == ISD::ZERO_EXTEND) {
@@ -661,6 +663,18 @@ SPUDAGToDAGISel::Select(SDValue Op) {
n_ops = 2;
}
}
+ } else if (Opc == ISD::SHL) {
+ if (OpVT == MVT::i64) {
+ return SelectSHLi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::SRL) {
+ if (OpVT == MVT::i64) {
+ return SelectSRLi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::SRA) {
+ if (OpVT == MVT::i64) {
+ return SelectSRAi64(Op, OpVT);
+ }
} else if (Opc == SPUISD::LDRESULT) {
// Custom select instructions for LDRESULT
MVT VT = N->getValueType(0);
@@ -713,7 +727,7 @@ SPUDAGToDAGISel::Select(SDValue Op) {
n_ops = 2;
}
}
-
+
if (n_ops > 0) {
if (N->hasOneUse())
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
@@ -723,7 +737,213 @@ SPUDAGToDAGISel::Select(SDValue Op) {
return SelectCode(Op);
}
-/// createPPCISelDag - This pass converts a legalized DAG into a
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+ SDValue SelMaskVal;
+
+ VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0);
+ SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+ SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT, SelMaskVal);
+ ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, VecVT,
+ CurDAG->getTargetConstant(0, OpVT));
+ VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, VecVT,
+ SDValue(ZeroFill, 0),
+ SDValue(VecOp0, 0),
+ SDValue(SelMask, 0));
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBYIv2i64, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBIIv2i64, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBYv2i64, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBIv2i64, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *Shift = 0;
+
+ VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op0);
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getTargetNode(SPU::ROTMIr32, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getTargetNode(SPU::ANDIr32, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+ // Ensure that the shift amounts are negated!
+ Bytes = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
+ SDValue(Bytes, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Bits = CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
+ SDValue(Bits, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBYv2i64, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBIv2i64, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
+ // Promote Op0 to vector
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+
+ SDNode *VecOp0 =
+ CurDAG->getTargetNode(SPU::ORv2i64_i64, VecVT, Op.getOperand(0));
+
+ SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+ SDNode *SignRot =
+ CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, MVT::v2i64,
+ SDValue(VecOp0, 0), SignRotAmt);
+ SDNode *UpperHalfSign =
+ CurDAG->getTargetNode(SPU::ORi32_v4i32, MVT::i32, SDValue(SignRot, 0));
+
+ SDNode *UpperHalfSignMask =
+ CurDAG->getTargetNode(SPU::FSM64r32, VecVT, SDValue(UpperHalfSign, 0));
+ SDNode *UpperLowerMask =
+ CurDAG->getTargetNode(SPU::FSMBIv2i64, VecVT,
+ CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+ SDNode *UpperLowerSelect =
+ CurDAG->getTargetNode(SPU::SELBv2i64, VecVT,
+ SDValue(UpperHalfSignMask, 0),
+ SDValue(VecOp0, 0),
+ SDValue(UpperLowerMask, 0));
+
+ SDNode *Shift = 0;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ bytes = 31 - bytes;
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBYIv2i64, VecVT,
+ SDValue(UpperLowerSelect, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ bits = 8 - bits;
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBIIv2i64, VecVT,
+ SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *NegShift =
+ CurDAG->getTargetNode(SPU::SFIr32, ShiftAmtVT,
+ ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, VecVT,
+ SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBIv2i64, VecVT,
+ SDValue(Shift, 0), SDValue(NegShift, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
/// SPU-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 0822181..5ccfd14 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -204,10 +204,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SRL, MVT::i8, Custom);
setOperationAction(ISD::SRA, MVT::i8, Custom);
- // SPU needs custom lowering for shift left/right for i64
- setOperationAction(ISD::SHL, MVT::i64, Custom);
- setOperationAction(ISD::SRL, MVT::i64, Custom);
- setOperationAction(ISD::SRA, MVT::i64, Custom);
+ // Make these operations legal and handle them during instruction selection:
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
@@ -215,6 +215,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
// Need to custom handle (some) common i8, i64 math ops
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
setOperationAction(ISD::SUB, MVT::i64, Custom);
@@ -249,7 +250,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Zero extension and sign extension for i64 have to be
// custom legalized
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
// Custom lower i128 -> i64 truncates
@@ -262,7 +262,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
// FDIV on SPU requires custom lowering
- setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
// SPU has [U|S]INT_TO_FP
@@ -340,7 +339,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
// mul has to be custom lowered.
- setOperationAction(ISD::MUL , VT, Custom);
+ // TODO: v2i64 vector multiply
+ setOperationAction(ISD::MUL , VT, Legal);
setOperationAction(ISD::AND , VT, Legal);
setOperationAction(ISD::OR , VT, Legal);
@@ -354,7 +354,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::FDIV, VT, Custom);
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
@@ -371,9 +370,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
- // FIXME: This is only temporary until I put all vector multiplications in
- // SPUInstrInfo.td:
- setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setShiftAmountType(MVT::i32);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -411,10 +408,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
- node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
- node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
- node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
@@ -422,21 +415,12 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
- node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
- "SPUISD::ROTQUAD_RZ_BYTES";
- node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
- "SPUISD::ROTQUAD_RZ_BITS";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
- "SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
- node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
- node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
}
@@ -1922,182 +1906,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
- switch (Op.getValueType().getSimpleVT()) {
- default:
- cerr << "CellSPU: Unknown vector multiplication, got "
- << Op.getValueType().getMVTString()
- << "\n";
- abort();
- /*NOTREACHED*/
-
- case MVT::v4i32:
- break;
-
- // Multiply two v8i16 vectors (pipeline friendly version):
- // a) multiply lower halves, mask off upper 16-bit of 32-bit product
- // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
- // c) Use SELB to select upper and lower halves from the intermediate results
- //
- // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
- // dual-issue. This code does manage to do this, even if it's a little on
- // the wacky side
- case MVT::v8i16: {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- SDValue Chain = Op.getOperand(0);
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
- unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
- unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
-
- SDValue FSMBOp =
- DAG.getCopyToReg(Chain, FSMBIreg,
- DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
- DAG.getConstant(0xcccc, MVT::i16)));
-
- SDValue HHProd =
- DAG.getCopyToReg(FSMBOp, HiProdReg,
- DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
-
- SDValue HHProd_v4i32 =
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
- DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
-
- return DAG.getNode(SPUISD::SELB, MVT::v8i16,
- DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
- DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
- DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
- HHProd_v4i32,
- DAG.getConstant(16, MVT::i16))),
- DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
- }
-
- // This M00sE is N@stI! (apologies to Monty Python)
- //
- // SPU doesn't know how to do any 8-bit multiplication, so the solution
- // is to break it all apart, sign extend, and reassemble the various
- // intermediate products.
- case MVT::v16i8: {
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
- SDValue c8 = DAG.getConstant(8, MVT::i32);
- SDValue c16 = DAG.getConstant(16, MVT::i32);
-
- SDValue LLProd =
- DAG.getNode(SPUISD::MPY, MVT::v8i16,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
-
- SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
-
- SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
-
- SDValue LHProd =
- DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
- DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
-
- SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
- DAG.getConstant(0x2222, MVT::i16));
-
- SDValue LoProdParts =
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
- DAG.getNode(SPUISD::SELB, MVT::v8i16,
- LLProd, LHProd, FSMBmask));
-
- SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
-
- SDValue LoProd =
- DAG.getNode(ISD::AND, MVT::v4i32,
- LoProdParts,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- LoProdMask, LoProdMask,
- LoProdMask, LoProdMask));
-
- SDValue rAH =
- DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
-
- SDValue rBH =
- DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
-
- SDValue HLProd =
- DAG.getNode(SPUISD::MPY, MVT::v8i16,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
-
- SDValue HHProd_1 =
- DAG.getNode(SPUISD::MPY, MVT::v8i16,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
- DAG.getNode(SPUISD::VEC_SRA,
- MVT::v4i32, rAH, c8)),
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
- DAG.getNode(SPUISD::VEC_SRA,
- MVT::v4i32, rBH, c8)));
-
- SDValue HHProd =
- DAG.getNode(SPUISD::SELB, MVT::v8i16,
- HLProd,
- DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
- FSMBmask);
-
- SDValue HiProd =
- DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
-
- return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
- DAG.getNode(ISD::OR, MVT::v4i32,
- LoProd, HiProd));
- }
- }
-
- return SDValue();
-}
-
-static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
- MVT VT = Op.getValueType();
-
- unsigned VRegBR, VRegC;
-
- if (VT == MVT::f32) {
- VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
- VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
- } else {
- VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
- VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
- }
- // TODO: make sure we're feeding FPInterp the right arguments
- // Right now: fi B, frest(B)
-
- // Computes BRcpl =
- // (Floating Interpolate (FP Reciprocal Estimate B))
- SDValue BRcpl =
- DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
- DAG.getNode(SPUISD::FPInterp, VT, B,
- DAG.getNode(SPUISD::FPRecipEst, VT, B)));
-
- // Computes A * BRcpl and stores in a temporary register
- SDValue AxBRcpl =
- DAG.getCopyToReg(BRcpl, VRegC,
- DAG.getNode(ISD::FMUL, VT, A,
- DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
- // What's the Chain variable do? It's magic!
- // TODO: set Chain = Op(0).getEntryNode()
-
- return DAG.getNode(ISD::FADD, VT,
- DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
- DAG.getNode(ISD::FMUL, VT,
- DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
- DAG.getNode(ISD::FSUB, VT, A,
- DAG.getNode(ISD::FMUL, VT, B,
- DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
-}
-
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
SDValue N = Op.getOperand(0);
@@ -2296,18 +2104,23 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
assert(0 && "Unhandled i8 math operator");
/*NOTREACHED*/
break;
+ case ISD::ADD: {
+ // 8-bit addition: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, MVT::i8,
+ DAG.getNode(Opc, MVT::i16, N0, N1));
+
+ }
+
case ISD::SUB: {
// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
// the result:
SDValue N1 = Op.getOperand(1);
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
- MVT::i16));
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
- MVT::i16));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
}
@@ -2397,7 +2210,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
switch (Opc) {
case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
MVT Op0VT = Op0.getValueType();
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
@@ -2410,39 +2222,16 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
SDValue PromoteScalar =
DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
- if (Opc != ISD::SIGN_EXTEND) {
- // Use a shuffle to zero extend the i32 to i64 directly:
- SDValue shufMask =
- DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x00010203, MVT::i32),
- DAG.getConstant(0x80808080, MVT::i32),
- DAG.getConstant(0x08090a0b, MVT::i32));
- SDValue zextShuffle =
- DAG.getNode(SPUISD::SHUFB, Op0VecVT,
- PromoteScalar, PromoteScalar, shufMask);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
- } else {
- // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
- // right and propagate the sign bit) instruction.
- SDValue RotQuad =
- DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
- PromoteScalar, DAG.getConstant(4, MVT::i32));
- SDValue SignQuad =
- DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
- PromoteScalar, DAG.getConstant(32, MVT::i32));
- SDValue SelMask =
- DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
- DAG.getConstant(0xf0f0, MVT::i16));
- SDValue CombineQuad =
- DAG.getNode(SPUISD::SELB, Op0VecVT,
- SignQuad, RotQuad, SelMask);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
- }
+ // Use a shuffle to zero extend the i32 to i64 directly:
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
+ DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
+ MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
+ 0x08090a0b, MVT::i32));
+ SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
+ PromoteScalar, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
+ VecVT, zextShuffle));
}
case ISD::ADD: {
@@ -2502,88 +2291,6 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedBorrow));
}
-
- case ISD::SHL: {
- SDValue ShiftAmt = Op.getOperand(1);
- MVT ShiftAmtVT = ShiftAmt.getValueType();
- SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0);
- SDValue MaskLower =
- DAG.getNode(SPUISD::SELB, VecVT,
- Op0Vec,
- DAG.getConstant(0, VecVT),
- DAG.getNode(SPUISD::SELECT_MASK, VecVT,
- DAG.getConstant(0xff00ULL, MVT::i16)));
- SDValue ShiftAmtBytes =
- DAG.getNode(ISD::SRL, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(3, ShiftAmtVT));
- SDValue ShiftAmtBits =
- DAG.getNode(ISD::AND, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(7, ShiftAmtVT));
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
- DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
- MaskLower, ShiftAmtBytes),
- ShiftAmtBits));
- }
-
- case ISD::SRL: {
- MVT VT = Op.getValueType();
- SDValue ShiftAmt = Op.getOperand(1);
- MVT ShiftAmtVT = ShiftAmt.getValueType();
- SDValue ShiftAmtBytes =
- DAG.getNode(ISD::SRL, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(3, ShiftAmtVT));
- SDValue ShiftAmtBits =
- DAG.getNode(ISD::AND, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(7, ShiftAmtVT));
-
- return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
- DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
- Op0, ShiftAmtBytes),
- ShiftAmtBits);
- }
-
- case ISD::SRA: {
- // Promote Op0 to vector
- SDValue Op0 =
- DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
- SDValue ShiftAmt = Op.getOperand(1);
- MVT ShiftVT = ShiftAmt.getValueType();
-
- // Negate variable shift amounts
- if (!isa<ConstantSDNode>(ShiftAmt)) {
- ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
- DAG.getConstant(0, ShiftVT), ShiftAmt);
- }
-
- SDValue UpperHalfSign =
- DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
- DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
- Op0, DAG.getConstant(31, MVT::i32))));
- SDValue UpperHalfSignMask =
- DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
- SDValue UpperLowerMask =
- DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
- DAG.getConstant(0xff00, MVT::i16));
- SDValue UpperLowerSelect =
- DAG.getNode(SPUISD::SELB, MVT::v2i64,
- UpperHalfSignMask, Op0, UpperLowerMask);
- SDValue RotateLeftBytes =
- DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
- UpperLowerSelect, ShiftAmt);
- SDValue RotateLeftBits =
- DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
- RotateLeftBytes, ShiftAmt);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
- RotateLeftBits);
- }
}
return SDValue();
@@ -2890,10 +2597,11 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
return LowerRET(Op, DAG, getTargetMachine());
- // i8, i64 math ops:
case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND:
+ return LowerI64Math(Op, DAG, Opc);
+
+ // i8, i64 math ops:
case ISD::ADD:
case ISD::SUB:
case ISD::ROTR:
@@ -2928,22 +2636,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
// Vector and i8 multiply:
case ISD::MUL:
- if (VT.isVector())
- return LowerVectorMUL(Op, DAG);
- else if (VT == MVT::i8)
+ if (VT == MVT::i8)
return LowerI8Math(Op, DAG, Opc, *this);
- case ISD::FDIV:
- if (VT == MVT::f32 || VT == MVT::v4f32)
- return LowerFDIVf32(Op, DAG);
-#if 0
- // This is probably a libcall
- else if (Op.getValueType() == MVT::f64)
- return LowerFDIVf64(Op, DAG);
-#endif
- else
- assert(0 && "Calling FDIV on unsupported MVT");
-
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
@@ -3119,8 +2814,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case SPUISD::VEC_SHL:
case SPUISD::VEC_SRL:
case SPUISD::VEC_SRA:
- case SPUISD::ROTQUAD_RZ_BYTES:
- case SPUISD::ROTQUAD_RZ_BITS:
case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
@@ -3268,10 +2961,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
}
#if 0
- case MPY:
- case MPYU:
- case MPYH:
- case MPYHH:
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
case SPUISD::VEC_SHL:
@@ -3279,18 +2968,14 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case SPUISD::VEC_SRA:
case SPUISD::VEC_ROTL:
case SPUISD::VEC_ROTR:
- case SPUISD::ROTQUAD_RZ_BYTES:
- case SPUISD::ROTQUAD_RZ_BITS:
case SPUISD::ROTBYTES_LEFT:
case SPUISD::SELECT_MASK:
case SPUISD::SELB:
- case SPUISD::FPInterp:
- case SPUISD::FPRecipEst:
case SPUISD::SEXT32TO64:
#endif
}
}
-
+
unsigned
SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
unsigned Depth) const {
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 8d2e994..0eed9b0 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -24,10 +24,10 @@ namespace llvm {
enum NodeType {
// Start the numbering where the builting ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
+
// Pseudo instructions:
RET_FLAG, ///< Return with flag, matched by bi instruction
-
+
Hi, ///< High address component (upper 16)
Lo, ///< Low address component (lower 16)
PCRelAddr, ///< Program counter relative address
@@ -41,10 +41,6 @@ namespace llvm {
CNTB, ///< Count leading ones in bytes
PREFSLOT2VEC, ///< Promote scalar->vector
VEC2PREFSLOT, ///< Extract element 0
- MPY, ///< 16-bit Multiply (low parts of a 32-bit)
- MPYU, ///< Multiply Unsigned
- MPYH, ///< Multiply High
- MPYHH, ///< Multiply High-High
SHLQUAD_L_BITS, ///< Rotate quad left, by bits
SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
VEC_SHL, ///< Vector shift left
@@ -52,8 +48,6 @@ namespace llvm {
VEC_SRA, ///< Vector shift right (arithmetic)
VEC_ROTL, ///< Vector rotate left
VEC_ROTR, ///< Vector rotate right
- ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
- ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
@@ -63,8 +57,6 @@ namespace llvm {
CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
SUB_EXTENDED, ///< Subtract extended, with borrow
BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
- FPInterp, ///< Floating point interpolate
- FPRecipEst, ///< Floating point reciprocal estimate
SEXT32TO64, ///< Sign-extended 32-bit const -> 64-bits
LAST_SPUISD ///< Last user-defined instruction
};
@@ -87,7 +79,7 @@ namespace llvm {
}
class SPUTargetMachine; // forward dec'l.
-
+
class SPUTargetLowering :
public TargetLowering
{
@@ -97,14 +89,14 @@ namespace llvm {
public:
SPUTargetLowering(SPUTargetMachine &TM);
-
+
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
/// getSetCCResultType - Return the ValueType for ISD::SETCC
virtual MVT getSetCCResultType(const SDValue &) const;
-
+
//! Custom lowering hooks
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
@@ -116,7 +108,7 @@ namespace llvm {
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
@@ -126,12 +118,12 @@ namespace llvm {
ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
- std::pair<unsigned, const TargetRegisterClass*>
+ std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const;
void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
- bool hasMemory,
+ bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 37a5870..3c8165f 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -82,7 +82,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORIi8i32:
case SPU::AHIvec:
case SPU::AHIr16:
- case SPU::AIvec:
+ case SPU::AIv4i32:
assert(MI.getNumOperands() == 3 &&
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&
@@ -98,8 +98,7 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
assert(MI.getNumOperands() == 3 &&
"wrong number of operands to AIr32");
if (MI.getOperand(0).isReg() &&
- (MI.getOperand(1).isReg() ||
- MI.getOperand(1).isFI()) &&
+ MI.getOperand(1).isReg() &&
(MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0)) {
sourceReg = MI.getOperand(1).getReg();
@@ -265,7 +264,7 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
// reg class to any other reg class containing R3. This is required because
// we instruction select bitconvert i64 -> f64 as a noop for example, so our
// types have no specific meaning.
-
+
if (DestRC == SPU::R8CRegisterClass) {
BuildMI(MBB, MI, get(SPU::ORBIr8), DestReg).addReg(SrcReg).addImm(0);
} else if (DestRC == SPU::R16CRegisterClass) {
@@ -291,7 +290,7 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
// Attempt to copy unknown/unsupported register class!
return false;
}
-
+
return true;
}
@@ -464,7 +463,7 @@ SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned OpNum = Ops[0];
unsigned Opc = MI->getOpcode();
MachineInstr *NewMI = 0;
-
+
if ((Opc == SPU::ORr32
|| Opc == SPU::ORv4i32)
&& MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
@@ -508,7 +507,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Get the last instruction in the block.
MachineInstr *LastInst = I;
-
+
// If there is only one terminator instruction, process it.
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (isUncondBranch(LastInst)) {
@@ -524,7 +523,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Otherwise, don't know what this is.
return true;
}
-
+
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = I;
@@ -532,7 +531,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
if (SecondLastInst && I != MBB.begin() &&
isUnpredicatedTerminator(--I))
return true;
-
+
// If the block ends with a conditional and unconditional branch, handle it.
if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
TBB = SecondLastInst->getOperand(1).getMBB();
@@ -541,7 +540,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
FBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
// If the block ends with two unconditional branches, handle it. The second
// one is not executed, so remove it.
if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
@@ -554,7 +553,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Otherwise, can't handle this.
return true;
}
-
+
unsigned
SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
@@ -578,16 +577,16 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I->eraseFromParent();
return 2;
}
-
+
unsigned
SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 2 || Cond.size() == 0) &&
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
"SPU branch conditions have two components!");
-
+
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
@@ -600,7 +599,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
return 1;
}
-
+
// Two-way Conditional Branch.
#if 0
BuildMI(&MBB, get(SPU::BRNZ))
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 1abbc0a..751f36e 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -583,7 +583,9 @@ def AHIvec:
def AHIr16:
RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
"ahi\t$rT, $rA, $val", IntegerOp,
- [(set R16C:$rT, (add R16C:$rA, v8i16SExt10Imm:$val))]>;
+ [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
class AInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00000011000, OOL, IOL,
@@ -604,21 +606,42 @@ multiclass AddInstruction {
def v16i8: AVecInst<v16i8>;
def r32: ARegInst<R32C>;
- def r8: AInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), [/* no pattern */]>;
}
defm A : AddInstruction;
-def AIvec:
- RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ai\t$rT, $rA, $val", IntegerOp,
- [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA),
- v4i32SExt10Imm:$val))]>;
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00111000, OOL, IOL,
+ "ai\t$rT, $rA, $val", IntegerOp,
+ pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
-def AIr32:
- RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- "ai\t$rT, $rA, $val", IntegerOp,
- [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>;
+multiclass AddImmediate {
+ def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+ def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+ def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+ def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
def SFHvec:
RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
@@ -795,8 +818,7 @@ def BGXvec:
def MPYv8i16:
RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"mpy\t$rT, $rA, $rB", IntegerMulDiv,
- [(set (v8i16 VECREG:$rT), (SPUmpy_vec (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
+ [/* no pattern */]>;
def MPYr16:
RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
@@ -812,8 +834,7 @@ class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
def MPYUv4i32:
MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (SPUmpyu_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ [/* no pattern */]>;
def MPYUr16:
MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
@@ -821,7 +842,7 @@ def MPYUr16:
def MPYUr32:
MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (SPUmpyu_int R32C:$rA, R32C:$rB))]>;
+ [/* no pattern */]>;
// mpyi: multiply 16 x s10imm -> 32 result.
@@ -892,87 +913,78 @@ class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
def MPYHv4i32:
MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (SPUmpyh_vec (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ [/* no pattern */]>;
def MPYHr32:
MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (SPUmpyh_int R32C:$rA, R32C:$rB))]>;
+ [/* no pattern */]>;
// mpys: multiply high and shift right (returns the top half of
// a 16-bit multiply, sign extended to 32 bits.)
-def MPYSvec:
- RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+
+class MPYSInst<dag OOL, dag IOL>:
+ RRForm<0b11100011110, OOL, IOL,
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ [/* no pattern */]>;
+def MPYSvec:
+ MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
def MPYSr16:
- RRForm<0b11100011110, (outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
- "mpys\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
// mpyhh: multiply high-high (returns the 32-bit result from multiplying
// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+ RRForm<0b01100011110, OOL, IOL,
+ "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
def MPYHHv8i16:
- RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
- [(set (v8i16 VECREG:$rT),
- (SPUmpyhh_vec (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+ MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
def MPYHHr32:
- RRForm<0b01100011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
// mpyhha: Multiply high-high, add to $rT:
-def MPYHHAvec:
- RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+
+class MPYHHAInst<dag OOL, dag IOL>:
+ RRForm<0b01100010110, OOL, IOL,
"mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ [/* no pattern */]>;
+def MPYHHAvec:
+ MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
def MPYHHAr32:
- RRForm<0b01100010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
// mpyhhu: Multiply high-high, unsigned
-def MPYHHUvec:
- RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+
+class MPYHHUInst<dag OOL, dag IOL>:
+ RRForm<0b01110011110, OOL, IOL,
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ [/* no pattern */]>;
+def MPYHHUvec:
+ MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
def MPYHHUr32:
- RRForm<0b01110011110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
// mpyhhau: Multiply high-high, unsigned
-def MPYHHAUvec:
- RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
-def MPYHHAUr32:
- RRForm<0b01110010110, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+class MPYHHAUInst<dag OOL, dag IOL>:
+ RRForm<0b01110010110, OOL, IOL,
"mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
- []>;
+ [/* no pattern */]>;
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v4i32, i32 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-def MPYv4i32:
- Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
- (Av4i32
- (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
- (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
- (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
-
-def MPYi32:
- Pat<(mul R32C:$rA, R32C:$rB),
- (Ar32
- (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
- (MPYHr32 R32C:$rB, R32C:$rA)),
- (MPYUr32 R32C:$rA, R32C:$rB))>;
+def MPYHHAUvec:
+ MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAUr32:
+ MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// clz: Count leading zeroes
@@ -983,7 +995,7 @@ class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
class CLZRegInst<RegisterClass rclass>:
CLZInst<(outs rclass:$rT), (ins rclass:$rA),
- [(set rclass:$rT, (ctlz rclass:$rA))]>;
+ [(set rclass:$rT, (ctlz rclass:$rA))]>;
class CLZVecInst<ValueType vectype>:
CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
@@ -1424,7 +1436,7 @@ multiclass BitwiseOr
def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
[/* no pattern */]>;
- // scalar->vector promotion:
+ // scalar->vector promotion, prefslot2vec:
def v16i8_i8: ORPromoteScalar<R8C>;
def v8i16_i16: ORPromoteScalar<R16C>;
def v4i32_i32: ORPromoteScalar<R32C>;
@@ -1432,7 +1444,7 @@ multiclass BitwiseOr
def v4f32_f32: ORPromoteScalar<R32FP>;
def v2f64_f64: ORPromoteScalar<R64FP>;
- // extract element 0:
+ // vector->scalar demotion, vec2prefslot:
def i8_v16i8: ORExtractElt<R8C>;
def i16_v8i16: ORExtractElt<R16C>;
def i32_v4i32: ORExtractElt<R32C>;
@@ -1831,6 +1843,13 @@ class SELBVecInst<ValueType vectype>:
(and (vnot (vectype VECREG:$rC)),
(vectype VECREG:$rA))))]>;
+class SELBVecVCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (select (vectype VECREG:$rC),
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
class SELBVecCondInst<ValueType vectype>:
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
[(set (vectype VECREG:$rT),
@@ -1867,8 +1886,21 @@ multiclass SelectBits
def v4i32_cond: SELBVecCondInst<v4i32>;
def v2i64_cond: SELBVecCondInst<v2i64>;
+ def v16i8_vcond: SELBVecCondInst<v16i8>;
+ def v8i16_vcond: SELBVecCondInst<v8i16>;
+ def v4i32_vcond: SELBVecCondInst<v4i32>;
+ def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+ def v4f32_cond:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4f32 VECREG:$rT),
+ (select (v4i32 VECREG:$rC),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)))]>;
+
// SELBr64_cond is defined further down, look for i64 comparisons
def r32_cond: SELBRegCondInst<R32C, R32C>;
+ def f32_cond: SELBRegCondInst<R32C, R32FP>;
def r16_cond: SELBRegCondInst<R16C, R16C>;
def r8_cond: SELBRegCondInst<R8C, R8C>;
}
@@ -2454,11 +2486,11 @@ class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
RotateShift, pattern>;
class ROTQBIVecInst<ValueType vectype>:
- ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
[/* no pattern yet */]>;
class ROTQBIRegInst<RegisterClass rclass>:
- ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
[/* no pattern yet */]>;
multiclass RotateQuadByBitCount
@@ -2645,9 +2677,6 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)),
// ROTQMBYvec: This is a vector form merely so that when used in an
// instruction pattern, type checking will succeed. This instruction assumes
// that the user knew to negate $rB.
-//
-// Using the SPUrotquad_rz_bytes target-specific DAG node, the patterns
-// ensure that $rB is negated.
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
@@ -2660,8 +2689,7 @@ class ROTQMBYVecInst<ValueType vectype>:
class ROTQMBYRegInst<RegisterClass rclass>:
ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [(set rclass:$rT,
- (SPUrotquad_rz_bytes rclass:$rA, R32C:$rB))]>;
+ [/* no pattern */]>;
multiclass RotateQuadBytes
{
@@ -2676,32 +2704,17 @@ multiclass RotateQuadBytes
defm ROTQMBY : RotateQuadBytes;
-def : Pat<(SPUrotquad_rz_bytes (v16i8 VECREG:$rA), R32C:$rB),
- (ROTQMBYv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bytes (v8i16 VECREG:$rA), R32C:$rB),
- (ROTQMBYv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bytes (v4i32 VECREG:$rA), R32C:$rB),
- (ROTQMBYv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bytes (v2i64 VECREG:$rA), R32C:$rB),
- (ROTQMBYv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bytes GPRC:$rA, R32C:$rB),
- (ROTQMBYr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bytes R64C:$rA, R32C:$rB),
- (ROTQMBYr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>;
-
class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
RotateShift, pattern>;
class ROTQMBYIVecInst<ValueType vectype>:
ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUrotquad_rz_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+ [/* no pattern */]>;
class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
- [(set rclass:$rT,
- (SPUrotquad_rz_bytes rclass:$rA, (inttype pred:$val)))]>;
+ [/* no pattern */]>;
multiclass RotateQuadBytesImm
{
@@ -2725,8 +2738,8 @@ class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RotateShift, pattern>;
class ROTQMBYBIVecInst<ValueType vectype>:
- ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern, intrinsic? */]>;
+ ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, */]>;
multiclass RotateMaskQuadByBitCount
{
@@ -2768,19 +2781,6 @@ multiclass RotateMaskQuadByBits
defm ROTQMBI: RotateMaskQuadByBits;
-def : Pat<(SPUrotquad_rz_bits (v16i8 VECREG:$rA), R32C:$rB),
- (ROTQMBIv16i8 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bits (v8i16 VECREG:$rA), R32C:$rB),
- (ROTQMBIv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bits (v4i32 VECREG:$rA), R32C:$rB),
- (ROTQMBIv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bits (v2i64 VECREG:$rA), R32C:$rB),
- (ROTQMBIv2i64 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bits GPRC:$rA, R32C:$rB),
- (ROTQMBIr128 GPRC:$rA, (SFIr32 R32C:$rB, 0))>;
-def : Pat<(SPUrotquad_rz_bits R64C:$rA, R32C:$rB),
- (ROTQMBIr64 R64C:$rA, (SFIr32 R32C:$rB, 0))>;
-
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate quad and mask by bits, immediate
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -2791,13 +2791,11 @@ class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
class ROTQMBIIVecInst<ValueType vectype>:
ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUrotquad_rz_bits (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+ [/* no pattern */]>;
class ROTQMBIIRegInst<RegisterClass rclass>:
ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
- [(set rclass:$rT,
- (SPUrotquad_rz_bits rclass:$rA, (i32 uimm7:$val)))]>;
+ [/* no pattern */]>;
multiclass RotateMaskQuadByBitsImm
{
@@ -3142,6 +3140,15 @@ multiclass CmpGtrWordImm
def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
[(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+ def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
}
class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
@@ -3750,62 +3757,63 @@ let isTerminator = 1, isBarrier = 1 in {
class FAInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
- SPrecFP, pattern>;
+ SPrecFP, pattern>;
class FAVecInst<ValueType vectype>:
FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
- (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+ (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
multiclass SFPAdd
{
def v4f32: FAVecInst<v4f32>;
- def r32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+ def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
}
defm FA : SFPAdd;
class FSInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
- SPrecFP, pattern>;
+ SPrecFP, pattern>;
class FSVecInst<ValueType vectype>:
FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+ [(set (vectype VECREG:$rT),
+ (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
multiclass SFPSub
{
def v4f32: FSVecInst<v4f32>;
- def r32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+ def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
}
defm FS : SFPSub;
// Floating point reciprocal estimate
-def FREv4f32 :
- RRForm_1<0b00011101100, (outs VECREG:$rT), (ins VECREG:$rA),
- "frest\t$rT, $rA", SPrecFP,
- [(set (v4f32 VECREG:$rT), (SPUreciprocalEst (v4f32 VECREG:$rA)))]>;
-def FREf32 :
- RRForm_1<0b00011101100, (outs R32FP:$rT), (ins R32FP:$rA),
- "frest\t$rT, $rA", SPrecFP,
- [(set R32FP:$rT, (SPUreciprocalEst R32FP:$rA))]>;
+class FRESTInst<dag OOL, dag IOL>:
+ RRForm_1<0b00110111000, OOL, IOL,
+ "frest\t$rT, $rA", SPrecFP,
+ [/* no pattern */]>;
+
+def FRESTv4f32 :
+ FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+ FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
// Floating point interpolate (used in conjunction with reciprocal estimate)
def FIv4f32 :
RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"fi\t$rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (SPUinterpolate (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
+ [/* no pattern */]>;
def FIf32 :
RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
"fi\t$rT, $rA, $rB", SPrecFP,
- [(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>;
+ [/* no pattern */]>;
//--------------------------------------------------------------------------
// Basic single precision floating point comparisons:
@@ -4445,12 +4453,14 @@ def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
(SPUlo tconstpool:$in, 0)),
(IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+/*
def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm),
(AIr32 R32C:$sp, i32ImmSExt10:$imm)>;
def : Pat<(SPUindirect R32C:$sp, imm:$imm),
(Ar32 R32C:$sp,
(IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>;
+ */
def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
(IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
@@ -4466,5 +4476,7 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
// Instrinsics:
include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
// 64-bit "instructions"/support
include "SPU64InstrInfo.td"
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 0000000..38279a0
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,99 @@
+//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//
+// Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//
+// Primary author: Scott Michel (scottm@aero.org)
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+ (ORv4i32
+ (ANDv4i32
+ (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+ (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)),
+ (ILAv4i32 0x0000ffff)),
+ (SHLIv4i32
+ (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+ (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+ (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)), 16))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+ (FSMBIv8i16 0xcccc))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+ Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (Av4i32
+ (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
+ (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
+ (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
+
+def MPYi32:
+ Pat<(mul R32C:$rA, R32C:$rB),
+ (Ar32
+ (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+ (MPYHr32 R32C:$rB, R32C:$rA)),
+ (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+ Interpf32.Fragment,
+ DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+ (SELBf32_cond NRaphf32.Fragment,
+ Epsilonf32.Fragment,
+ (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)),
+ Interpv4f32.Fragment,
+ DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (SELBv4f32_cond NRaphv4f32.Fragment,
+ Epsilonv4f32.Fragment,
+ (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+ Epsilonv4f32.Fragment,
+ (v4f32 VECREG:$rA)), -1))>;
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 5cf229e..89a52ee 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -87,24 +87,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
// SPUISelLowering.h):
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
-// SPU 16-bit multiply
-def SPUmpy_vec: SDNode<"SPUISD::MPY", SPUVecBinop, []>;
-
-// SPU multiply unsigned, used in instruction lowering for v4i32
-// multiplies:
-def SPUmpyu_vec: SDNode<"SPUISD::MPYU", SPUVecBinop, []>;
-def SPUmpyu_int: SDNode<"SPUISD::MPYU", SDTIntBinOp, []>;
-
-// SPU 16-bit multiply high x low, shift result 16-bits
-// Used to compute intermediate products for 32-bit multiplies
-def SPUmpyh_vec: SDNode<"SPUISD::MPYH", SPUVecBinop, []>;
-def SPUmpyh_int: SDNode<"SPUISD::MPYH", SDTIntBinOp, []>;
-
-// SPU 16-bit multiply high x high, 32-bit product
-// Used to compute intermediate products for 16-bit multiplies
-def SPUmpyhh_vec: SDNode<"SPUISD::MPYHH", SPUVecBinop, []>;
-def SPUmpyhh_int: SDNode<"SPUISD::MPYHH", SDTIntBinOp, []>;
-
// Shift left quadword by bits and bytes
def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
@@ -117,11 +99,6 @@ def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>;
def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
-def SPUrotquad_rz_bytes: SDNode<"SPUISD::ROTQUAD_RZ_BYTES",
- SPUvecshift_type, []>;
-def SPUrotquad_rz_bits: SDNode<"SPUISD::ROTQUAD_RZ_BITS",
- SPUvecshift_type, []>;
-
// Vector rotate left, bits shifted out of the left are rotated in on the right
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
SPUvecshift_type, []>;
@@ -141,12 +118,6 @@ def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
// SPU gather bits instruction:
def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>;
-// SPU floating point interpolate
-def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>;
-
-// SPU floating point reciprocal estimate (used for fdiv)
-def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>;
-
def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index cf4089f..381522d 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -238,7 +238,7 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
SPU::R0, /* link register */
0 /* end */
};
-
+
return SPU_CalleeSaveRegs;
}
@@ -268,7 +268,7 @@ SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
&SPU::GPRCRegClass, /* link register */
0 /* end */
};
-
+
return SPU_CalleeSaveRegClasses;
}
@@ -339,10 +339,13 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
- // Most instructions, except for generated FrameIndex additions using AIr32,
- // have the immediate in operand 1. AIr32, in this case, has the immediate
- // in operand 2.
- unsigned OpNo = (MI.getOpcode() != SPU::AIr32 ? 1 : 2);
+ // Most instructions, except for generated FrameIndex additions using AIr32
+ // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+ // immediate in operand 2.
+ unsigned OpNo = 1;
+ if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+ OpNo = 2;
+
MachineOperand &MO = MI.getOperand(OpNo);
// Offset is biased by $lr's slot at the bottom.
@@ -355,7 +358,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
if (Offset > SPUFrameInfo::maxFrameOffset()
|| Offset < SPUFrameInfo::minFrameOffset()) {
cerr << "Large stack adjustment ("
- << Offset
+ << Offset
<< ") in SPURegisterInfo::eliminateFrameIndex.";
} else {
MO.ChangeToImmediate(Offset);
@@ -371,7 +374,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize = MFI->getStackSize();
-
+
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
@@ -381,7 +384,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
+
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
// that allocations will be aligned.
if (MFI->hasVarSizedObjects())
@@ -389,7 +392,7 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
// Update maximum call frame size.
MFI->setMaxCallFrameSize(maxCallFrameSize);
-
+
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -418,18 +421,18 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
-
+
// Prepare for debug frame info.
bool hasDebugInfo = MMI && MMI->hasDebugInfo();
unsigned FrameLabelId = 0;
-
+
// Move MBBI back to the beginning of the function.
MBBI = MBB.begin();
-
+
// Work out frame sizes.
determineFrameLayout(MF);
int FrameSize = MFI->getStackSize();
-
+
assert((FrameSize & 0xf) == 0
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
@@ -440,7 +443,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
FrameLabelId = MMI->NextLabelID();
BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId);
}
-
+
// Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
// for the ABI
BuildMI(MBB, MBBI, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
@@ -476,15 +479,15 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
cerr << "Unhandled frame size: " << FrameSize << "\n";
abort();
}
-
+
if (hasDebugInfo) {
std::vector<MachineMove> &Moves = MMI->getFrameMoves();
-
+
// Show update of SP.
MachineLocation SPDst(MachineLocation::VirtualFP);
MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
-
+
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
@@ -495,11 +498,11 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
MachineLocation CSSrc(Reg);
Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
}
-
+
// Mark effective beginning of when frame pointer is ready.
unsigned ReadyLabelId = MMI->NextLabelID();
BuildMI(MBB, MBBI, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId);
-
+
MachineLocation FPDst(SPU::R1);
MachineLocation FPSrc(MachineLocation::VirtualFP);
Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
index 826a2fa..d121c3f 100644
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ b/test/CodeGen/CellSPU/fdiv.ll
@@ -1,9 +1,11 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep frest %t1.s | count 2
; RUN: grep -w fi %t1.s | count 2
-; RUN: grep fm %t1.s | count 4
+; RUN: grep -w fm %t1.s | count 2
; RUN: grep fma %t1.s | count 2
-; RUN: grep fnms %t1.s | count 2
+; RUN: grep fnms %t1.s | count 4
+; RUN: grep cgti %t1.s | count 2
+; RUN: grep selb %t1.s | count 2
;
; This file includes standard floating point arithmetic instructions
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
index 5e7897b..51abd44 100644
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ b/test/CodeGen/CellSPU/i64ops.ll
@@ -1,8 +1,5 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep {fsmbi.*61680} %t1.s | count 1
-; RUN: grep rotqmbyi %t1.s | count 1
-; RUN: grep rotmai %t1.s | count 1
-; RUN: grep selb %t1.s | count 1
+; RUN: grep xswd %t1.s | count 1
; RUN: grep shufb %t1.s | count 2
; RUN: grep cg %t1.s | count 1
; RUN: grep addx %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
index 843505f..085ce55 100644
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ b/test/CodeGen/CellSPU/mul_ops.ll
@@ -8,7 +8,7 @@
; RUN: grep and %t1.s | count 2
; RUN: grep selb %t1.s | count 6
; RUN: grep fsmbi %t1.s | count 4
-; RUN: grep shli %t1.s | count 2
+; RUN: grep shli %t1.s | count 4
; RUN: grep shlhi %t1.s | count 4
; RUN: grep ila %t1.s | count 2
; RUN: grep xsbh %t1.s | count 4
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index b6629ca..5b60dc1 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,10 +1,21 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep shlh %t1.s | count 84
-; RUN: grep shlhi %t1.s | count 51
-; RUN: grep shl %t1.s | count 168
-; RUN: grep shli %t1.s | count 51
-; RUN: grep xshw %t1.s | count 5
-; RUN: grep and %t1.s | count 5
+; RUN: grep -w shlh %t1.s | count 9
+; RUN: grep -w shlhi %t1.s | count 3
+; RUN: grep -w shl %t1.s | count 9
+; RUN: grep -w shli %t1.s | count 3
+; RUN: grep -w xshw %t1.s | count 5
+; RUN: grep -w and %t1.s | count 5
+; RUN: grep -w andi %t1.s | count 2
+; RUN: grep -w rotmi %t1.s | count 2
+; RUN: grep -w rotqmbyi %t1.s | count 1
+; RUN: grep -w rotqmbii %t1.s | count 2
+; RUN: grep -w rotqmby %t1.s | count 1
+; RUN: grep -w rotqmbi %t1.s | count 1
+; RUN: grep -w rotqbyi %t1.s | count 1
+; RUN: grep -w rotqbii %t1.s | count 2
+; RUN: grep -w rotqbybi %t1.s | count 1
+; RUN: grep -w sfi %t1.s | count 3
+
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@@ -210,3 +221,57 @@ define i32 @shli_i32_12(i32 zeroext %arg1) zeroext {
%A = shl i32 0, %arg1
ret i32 %A
}
+
+;; i64 shift left
+
+define i64 @shl_i64_1(i64 %arg1) {
+ %A = shl i64 %arg1, 9
+ ret i64 %A
+}
+
+define i64 @shl_i64_2(i64 %arg1) {
+ %A = shl i64 %arg1, 3
+ ret i64 %A
+}
+
+define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
+ %1 = zext i32 %shift to i64
+ %2 = shl i64 %arg1, %1
+ ret i64 %2
+}
+
+;; i64 shift right logical (shift 0s from the right)
+
+define i64 @lshr_i64_1(i64 %arg1) {
+ %1 = lshr i64 %arg1, 9
+ ret i64 %1
+}
+
+define i64 @lshr_i64_2(i64 %arg1) {
+ %1 = lshr i64 %arg1, 3
+ ret i64 %1
+}
+
+define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
+ %1 = zext i32 %shift to i64
+ %2 = lshr i64 %arg1, %1
+ ret i64 %2
+}
+
+;; i64 shift right arithmetic (shift 1s from the right)
+
+define i64 @ashr_i64_1(i64 %arg) {
+ %1 = ashr i64 %arg, 9
+ ret i64 %1
+}
+
+define i64 @ashr_i64_2(i64 %arg) {
+ %1 = ashr i64 %arg, 3
+ ret i64 %1
+}
+
+define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
+ %1 = zext i32 %shift to i64
+ %2 = ashr i64 %arg1, %1
+ ret i64 %2
+}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
index 7b86070..3819797 100644
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
+++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
@@ -34,19 +34,45 @@ struct pred_s preds[] = {
{ "neq", i64_neq, i64_neq_select }
};
+uint64_t i64_shl_const(uint64_t a) {
+ return a << 10;
+}
+
+uint64_t i64_shl(uint64_t a, int amt) {
+ return a << amt;
+}
+
+uint64_t i64_srl_const(uint64_t a) {
+ return a >> 10;
+}
+
+uint64_t i64_srl(uint64_t a, int amt) {
+ return a >> amt;
+}
+
+int64_t i64_sra_const(int64_t a) {
+ return a >> 10;
+}
+
+int64_t i64_sra(int64_t a, int amt) {
+ return a >> amt;
+}
+
int main(void) {
int i;
- int64_t a = 1234567890000LL;
- int64_t b = 2345678901234LL;
- int64_t c = 1234567890001LL;
- int64_t d = 10001LL;
- int64_t e = 10000LL;
+ int64_t a = 1234567890003LL;
+ int64_t b = 2345678901235LL;
+ int64_t c = 1234567890001LL;
+ int64_t d = 10001LL;
+ int64_t e = 10000LL;
+ int64_t f = -1068103409991LL;
printf("a = %16lld (0x%016llx)\n", a, a);
printf("b = %16lld (0x%016llx)\n", b, b);
printf("c = %16lld (0x%016llx)\n", c, c);
printf("d = %16lld (0x%016llx)\n", d, d);
printf("e = %16lld (0x%016llx)\n", e, e);
+ printf("f = %16lld (0x%016llx)\n", f, f);
printf("----------------------------------------\n");
for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
@@ -64,5 +90,23 @@ int main(void) {
printf("----------------------------------------\n");
}
+ printf("a = 0x%016llx\n", a);
+ printf("i64_shl_const(a) = 0x%016llx\n", i64_shl_const(a));
+ printf("i64_shl(a) = 0x%016llx\n", i64_shl(a, 5));
+ printf("i64_srl_const(a) = 0x%016llx\n", i64_srl_const(a));
+ printf("i64_srl(a) = 0x%016llx\n", i64_srl(a, 5));
+ printf("i64_sra_const(a) = 0x%016llx\n", i64_sra_const(a));
+ printf("i64_sra(a) = 0x%016llx\n", i64_sra(a, 5));
+ printf("----------------------------------------\n");
+
+ printf("f = 0x%016llx\n", f);
+ printf("i64_shl_const(f) = 0x%016llx\n", i64_shl_const(f));
+ printf("i64_shl(f) = 0x%016llx\n", i64_shl(f, 10));
+ printf("i64_srl_const(f) = 0x%016llx\n", i64_srl_const(f));
+ printf("i64_srl(f) = 0x%016llx\n", i64_srl(f, 10));
+ printf("i64_sra_const(f) = 0x%016llx\n", i64_sra_const(f));
+ printf("i64_sra(f) = 0x%016llx\n", i64_sra(f, 10));
+ printf("----------------------------------------\n");
+
return 0;
}