aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorScott Michel <scottm@aero.org>2009-01-26 03:31:40 +0000
committerScott Michel <scottm@aero.org>2009-01-26 03:31:40 +0000
commitc9c8b2a804b2cd3d33a6a965e06a21ff93968f97 (patch)
tree6141f9f0ec12fefbdd984667613aaf33da6068af /lib/Target
parent5bf4b7556f025587a8d1a14bd0fb39c12fc9c170 (diff)
downloadexternal_llvm-c9c8b2a804b2cd3d33a6a965e06a21ff93968f97.zip
external_llvm-c9c8b2a804b2cd3d33a6a965e06a21ff93968f97.tar.gz
external_llvm-c9c8b2a804b2cd3d33a6a965e06a21ff93968f97.tar.bz2
CellSPU:
- Rename fcmp.ll test to fcmp32.ll, start adding new double tests to fcmp64.ll - Fix select_bits.ll test - Capitulate to the DAGCombiner and move i64 constant loads to instruction selection (SPUISelDAGtoDAG.cpp). <rant>DAGCombiner will insert all kinds of 64-bit optimizations after operation legalization occurs and now we have to do most of the work that instruction selection should be doing twice (once to determine if v2i64 build_vector can be handled by SelectCode(), which then runs all of the predicates a second time to select the necessary instructions.) But, CellSPU is a good citizen.</rant> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62990 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td4
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp173
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp484
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td30
6 files changed, 468 insertions, 231 deletions
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 3329894..06eb149 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -30,8 +30,8 @@
// selb instruction definition for i64. Note that the selection mask is
// a vector, produced by various forms of FSM:
def SELBr64_cond:
- SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
- [/* no pattern */]>;
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
// The generic i64 select pattern, which assumes that the comparison result
// is in a 32-bit register that contains a select mask pattern (i.e., gather
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 0fc7aec..6d7f40d 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -254,26 +254,56 @@ public:
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
- }
+ }
SDNode *emitBuildVector(SDValue build_vec) {
+ MVT vecVT = build_vec.getValueType();
+ SDNode *bvNode = build_vec.getNode();
+ bool canBeSelected = false;
+
+ // Check to see if this vector can be represented as a CellSPU immediate
+ // constant.
+ if (vecVT == MVT::v8i16) {
+ if (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0) {
+ canBeSelected = true;
+ }
+ } else if (vecVT == MVT::v4i32) {
+ if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
+ || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
+ || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
+ || (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0)) {
+ canBeSelected = true;
+ }
+ } else if (vecVT == MVT::v2i64) {
+ if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
+ || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
+ || (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)) {
+ canBeSelected = true;
+ }
+ }
+
+ if (canBeSelected) {
+ return Select(build_vec);
+ }
+
+ // No, need to emit a constant pool spill:
std::vector<Constant*> CV;
for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(build_vec.getOperand(i));
- CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
+ CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
}
Constant *CP = ConstantVector::get(CV);
SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
- unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ unsigned Alignment = 1 << cast<ConstantPoolSDNode > (CPIdx)->getAlignment();
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG,
SPUtli.getSPUTargetMachine());
return SelectCode(CurDAG->getLoad(build_vec.getValueType(),
- CurDAG->getEntryNode(), CGPoolOffset,
- PseudoSourceValue::getConstantPool(), 0,
- false, Alignment));
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment));
}
/// Select - Convert the specified operand from a target-independent to a
@@ -289,6 +319,9 @@ public:
//! Emit the instruction sequence for i64 sra
SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
+ //! Emit the necessary sequence for loading i64 constants:
+ SDNode *SelectI64Constant(SDValue &Op, MVT OpVT);
+
//! Returns true if the address N is an A-form (local store) address
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
@@ -652,7 +685,9 @@ SPUDAGToDAGISel::Select(SDValue Op) {
if (N->isMachineOpcode()) {
return NULL; // Already selected.
- } else if (Opc == ISD::FrameIndex) {
+ }
+
+ if (Opc == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
@@ -669,6 +704,11 @@ SPUDAGToDAGISel::Select(SDValue Op) {
TFI, Imm0), 0);
n_ops = 2;
}
+ } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+ // Catch the i64 constants that end up here. Note: The backend doesn't
+ // attempt to legalize the constant (it's useless because DAGCombiner
+ // will insert 64-bit constants and we can't stop it).
+ return SelectI64Constant(Op, OpVT);
} else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
&& OpVT == MVT::i64) {
SDValue Op0 = Op.getOperand(0);
@@ -745,27 +785,38 @@ SPUDAGToDAGISel::Select(SDValue Op) {
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
- } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
-
- return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
- Op.getOperand(0), Op.getOperand(1),
- SDValue(CGLoad, 0)));
- } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
-
- return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
- Op.getOperand(0), Op.getOperand(1),
- SDValue(CGLoad, 0)));
- } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
-
- return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
- Op.getOperand(0), Op.getOperand(1),
- SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::TRUNCATE) {
+ SDValue Op0 = Op.getOperand(0);
+ if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+ && OpVT == MVT::i32
+ && Op0.getValueType() == MVT::i64) {
+ // Catch the (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 to
+ // take advantage of the fact that the upper 32 bits are in the
+ // i32 preferred slot and avoid all kinds of other shuffle gymnastics:
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (CN != 0) {
+ unsigned shift_amt = unsigned(CN->getZExtValue());
+
+ if (shift_amt >= 32) {
+ SDNode *hi32 =
+ CurDAG->getTargetNode(SPU::ORr32_r64, OpVT, Op0.getOperand(0));
+
+ shift_amt -= 32;
+ if (shift_amt > 0) {
+ // Take care of the additional shift, if present:
+ SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+ unsigned Opc = SPU::ROTMAIr32_i32;
+
+ if (Op0.getOpcode() == ISD::SRL)
+ Opc = SPU::ROTMr32;
+
+ hi32 = CurDAG->getTargetNode(Opc, OpVT, SDValue(hi32, 0), shift);
+ }
+
+ return hi32;
+ }
+ }
+ }
} else if (Opc == ISD::SHL) {
if (OpVT == MVT::i64) {
return SelectSHLi64(Op, OpVT);
@@ -1046,6 +1097,70 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
}
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
+ MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
+ SDValue i64vec =
+ SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue());
+
+ // Here's where it gets interesting, because we have to parse out the
+ // subtree handed back in i64vec:
+
+ if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
+ // The degenerate case where the upper and lower bits in the splat are
+ // identical:
+ SDValue Op0 = i64vec.getOperand(0);
+ ReplaceUses(i64vec, Op0);
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT,
+ SDValue(emitBuildVector(Op0), 0));
+ } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+ SDValue lhs = i64vec.getOperand(0);
+ SDValue rhs = i64vec.getOperand(1);
+ SDValue shufmask = i64vec.getOperand(2);
+
+ if (lhs.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(lhs, lhs.getOperand(0));
+ lhs = lhs.getOperand(0);
+ }
+
+ SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+ ? lhs.getNode()
+ : emitBuildVector(lhs));
+
+ if (rhs.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(rhs, rhs.getOperand(0));
+ rhs = rhs.getOperand(0);
+ }
+
+ SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+ ? rhs.getNode()
+ : emitBuildVector(rhs));
+
+ if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(shufmask, shufmask.getOperand(0));
+ shufmask = shufmask.getOperand(0);
+ }
+
+ SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+ ? shufmask.getNode()
+ : emitBuildVector(shufmask));
+
+ SDNode *shufNode =
+ Select(CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
+ SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+ SDValue(shufMaskNode, 0)));
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(shufNode, 0));
+ } else {
+ cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
+ abort();
+ }
+}
+
/// createSPUISelDag - This pass converts a legalized DAG into a
/// SPU-specific DAG, ready for instruction scheduling.
///
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 124f1a7..6bb76d8 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -17,6 +17,7 @@
#include "SPUFrameInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -79,6 +80,43 @@ namespace {
return retval;
}
+ //! Expand a library call into an actual call DAG node
+ /*!
+ \note
+ This code is taken from SelectionDAGLegalize, since it is not exposed as
+ part of the LLVM SelectionDAG API.
+ */
+
+ SDValue
+ ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+ bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ MVT ArgVT = Op.getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForMVT();
+ Entry.Node = Op.getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ CallingConv::C, false, Callee, Args, DAG);
+
+ return CallInfo.first;
+ }
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@@ -113,7 +151,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
// SPU constant load actions are custom lowered:
- setOperationAction(ISD::Constant, MVT::i64, Custom);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -128,10 +165,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
- // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
MVT StoreVT = (MVT::SimpleValueType) stype;
setTruncStoreAction(VT, StoreVT, Expand);
@@ -179,16 +212,14 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
- // If we're enabling GP optimizations, use hardware square root
+ // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+ // for f32!)
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- // Make sure that DAGCombine doesn't insert illegal 64-bit constants
- setOperationAction(ISD::FABS, MVT::f64, Custom);
-
// SPU can do rotate right and left, so legalize it... but customize for i8
// because instructions don't exist.
@@ -254,22 +285,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
- // SPU has a legal FP -> signed INT instruction
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+ // to expand to a libcall, hence the custom lowering:
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
// FDIV on SPU requires custom lowering
- setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
// SPU has [U|S]INT_TO_FP
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
@@ -338,24 +368,23 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
MVT VT = (MVT::SimpleValueType)i;
// add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD , VT, Legal);
- setOperationAction(ISD::SUB , VT, Legal);
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
// mul has to be custom lowered.
- // TODO: v2i64 vector multiply
- setOperationAction(ISD::MUL , VT, Legal);
+ setOperationAction(ISD::MUL, VT, Legal);
- setOperationAction(ISD::AND , VT, Legal);
- setOperationAction(ISD::OR , VT, Legal);
- setOperationAction(ISD::XOR , VT, Legal);
- setOperationAction(ISD::LOAD , VT, Legal);
- setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
// These operations need to be expanded:
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
@@ -866,31 +895,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
return SDValue();
}
-//! Custom lower i64 integer constants
-/*!
- This code inserts all of the necessary juggling that needs to occur to load
- a 64-bit constant into a register.
- */
-static SDValue
-LowerConstant(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
-
- if (VT == MVT::i64) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
- SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
- } else {
- cerr << "LowerConstant: unhandled constant type "
- << VT.getMVTString()
- << "\n";
- abort();
- /*NOTREACHED*/
- }
-
- return SDValue();
-}
-
//! Custom lower double precision floating point constants
static SDValue
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
@@ -1564,7 +1568,7 @@ static bool isConstantSplat(const uint64_t Bits128[2],
//! Lower a BUILD_VECTOR instruction creatively:
SDValue
-SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
@@ -1588,7 +1592,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
abort();
/*NOTREACHED*/
case MVT::v4f32: {
- uint32_t Value32 = SplatBits;
+ uint32_t Value32 = uint32_t(SplatBits);
assert(SplatSize == 4
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
@@ -1598,7 +1602,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
break;
}
case MVT::v2f64: {
- uint64_t f64val = SplatBits;
+ uint64_t f64val = uint64_t(SplatBits);
assert(SplatSize == 8
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
@@ -1638,93 +1642,99 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
}
case MVT::v2i64: {
- uint64_t val = SplatBits;
- uint32_t upper = uint32_t(val >> 32);
- uint32_t lower = uint32_t(val);
-
- if (upper == lower) {
- // Magic constant that can be matched by IL, ILA, et. al.
- SDValue Val = DAG.getTargetConstant(val, MVT::i64);
- return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
- } else {
- SDValue LO32;
- SDValue HI32;
- SmallVector<SDValue, 16> ShufBytes;
- SDValue Result;
- bool upper_special, lower_special;
-
- // NOTE: This code creates common-case shuffle masks that can be easily
- // detected as common expressions. It is not attempting to create highly
- // specialized masks to replace any and all 0's, 0xff's and 0x80's.
-
- // Detect if the upper or lower half is a special shuffle mask pattern:
- upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
- lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
-
- // Create lower vector if not a special pattern
- if (!lower_special) {
- SDValue LO32C = DAG.getConstant(lower, MVT::i32);
- LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- LO32C, LO32C, LO32C, LO32C));
- }
+ return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
+ }
+ }
- // Create upper vector if not a special pattern
- if (!upper_special) {
- SDValue HI32C = DAG.getConstant(upper, MVT::i32);
- HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- HI32C, HI32C, HI32C, HI32C));
- }
+ return SDValue();
+}
- // If either upper or lower are special, then the two input operands are
- // the same (basically, one of them is a "don't care")
- if (lower_special)
- LO32 = HI32;
- if (upper_special)
- HI32 = LO32;
- if (lower_special && upper_special) {
- // Unhappy situation... both upper and lower are special, so punt with
- // a target constant:
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
- Zero, Zero);
- }
+SDValue
+SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
+ uint32_t upper = uint32_t(SplatVal >> 32);
+ uint32_t lower = uint32_t(SplatVal);
+
+ if (upper == lower) {
+ // Magic constant that can be matched by IL, ILA, et. al.
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BIT_CONVERT, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ Val, Val, Val, Val));
+ } else {
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+ bool upper_special, lower_special;
+
+ // NOTE: This code creates common-case shuffle masks that can be easily
+ // detected as common expressions. It is not attempting to create highly
+ // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+ // Detect if the upper or lower half is a special shuffle mask pattern:
+ upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+ lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+ // Create lower vector if not a special pattern
+ if (!lower_special) {
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ LO32C, LO32C, LO32C, LO32C));
+ }
- for (int i = 0; i < 4; ++i) {
- uint64_t val = 0;
- for (int j = 0; j < 4; ++j) {
- SDValue V;
- bool process_upper, process_lower;
- val <<= 8;
- process_upper = (upper_special && (i & 1) == 0);
- process_lower = (lower_special && (i & 1) == 1);
-
- if (process_upper || process_lower) {
- if ((process_upper && upper == 0)
- || (process_lower && lower == 0))
- val |= 0x80;
- else if ((process_upper && upper == 0xffffffff)
- || (process_lower && lower == 0xffffffff))
- val |= 0xc0;
- else if ((process_upper && upper == 0x80000000)
- || (process_lower && lower == 0x80000000))
- val |= (j == 0 ? 0xe0 : 0x80);
- } else
- val |= i * 4 + j + ((i & 1) * 16);
- }
+ // Create upper vector if not a special pattern
+ if (!upper_special) {
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ HI32C, HI32C, HI32C, HI32C));
+ }
- ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+ // If either upper or lower are special, then the two input operands are
+ // the same (basically, one of them is a "don't care")
+ if (lower_special)
+ LO32 = HI32;
+ if (upper_special)
+ HI32 = LO32;
+ if (lower_special && upper_special) {
+ // Unhappy situation... both upper and lower are special, so punt with
+ // a target constant:
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
+ Zero, Zero);
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
+ for (int j = 0; j < 4; ++j) {
+ SDValue V;
+ bool process_upper, process_lower;
+ val <<= 8;
+ process_upper = (upper_special && (i & 1) == 0);
+ process_lower = (lower_special && (i & 1) == 1);
+
+ if (process_upper || process_lower) {
+ if ((process_upper && upper == 0)
+ || (process_lower && lower == 0))
+ val |= 0x80;
+ else if ((process_upper && upper == 0xffffffff)
+ || (process_lower && lower == 0xffffffff))
+ val |= 0xc0;
+ else if ((process_upper && upper == 0x80000000)
+ || (process_lower && lower == 0x80000000))
+ val |= (j == 0 ? 0xe0 : 0x80);
+ } else
+ val |= i * 4 + j + ((i & 1) * 16);
}
- return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
}
- }
- }
- return SDValue();
+ return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+ }
}
/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
@@ -2384,81 +2394,180 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-//! Lower ISD::FABS
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
/*!
- DAGCombine does the same basic reduction: convert the double to i64 and mask
- off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
- CellSPU has to legalize. Hence, the custom lowering.
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
*/
-
-static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
MVT OpVT = Op.getValueType();
- MVT IntVT(MVT::i64);
SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
+ // Convert f32 / f64 to i32 / i64 via libcall.
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+ : RTLIB::getFPTOUINT(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return SDValue();
+}
- assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
+ MVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
- SDValue iABS =
- DAG.getNode(ISD::AND, IntVT,
- DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
- DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
+ // Convert i32, i64 to f64 via libcall:
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+ : RTLIB::getUINTTOFP(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
- return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
+ return SDValue();
}
//! Lower ISD::SETCC
/*!
This handles MVT::f64 (double floating point) condition lowering
*/
-
static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
+ CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+ assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
SDValue lhs = Op.getOperand(0);
SDValue rhs = Op.getOperand(1);
- CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2));
MVT lhsVT = lhs.getValueType();
- SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
-
- assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
- switch (CC->get()) {
- case ISD::SETOEQ:
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETONE:
- cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
- abort();
- break;
- case ISD::SETO: {
- SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
- SDValue i64lhs =
- DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
+ MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+ APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ MVT IntVT(MVT::i64);
- return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
- }
- case ISD::SETUO: {
- SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
- SDValue i64lhs =
- DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
+ // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+ // selected to a NOP:
+ SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs);
+ SDValue lhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, MVT::i32,
+ DAG.getNode(ISD::SRL, IntVT,
+ i64lhs, DAG.getConstant(32, MVT::i32)));
+ SDValue lhsHi32abs =
+ DAG.getNode(ISD::AND, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue lhsLo32 =
+ DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs);
+
+ // SETO and SETUO only use the lhs operand:
+ if (CC->get() == ISD::SETO) {
+ // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+ // SETUO
+ APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, ccResultVT,
+ DAG.getSetCC(ccResultVT,
+ lhs, DAG.getConstantFP(0.0, lhsVT),
+ ISD::SETUO),
+ DAG.getConstant(ccResultAllOnes, ccResultVT));
+ } else if (CC->get() == ISD::SETUO) {
+ // Evaluates to true if Op0 is [SQ]NaN
+ return DAG.getNode(ISD::AND, ccResultVT,
+ DAG.getSetCC(ccResultVT,
+ lhsHi32abs,
+ DAG.getConstant(0x7ff00000, MVT::i32),
+ ISD::SETGE),
+ DAG.getSetCC(ccResultVT,
+ lhsLo32,
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETGT));
+ }
+
+ SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
+ SDValue rhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, MVT::i32,
+ DAG.getNode(ISD::SRL, IntVT,
+ i64rhs, DAG.getConstant(32, MVT::i32)));
+
+ // If a value is negative, subtract from the sign magnitude constant:
+ SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+ // Convert the sign-magnitude representation into 2's complement:
+ SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
+ lhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs);
+ SDValue lhsSelect =
+ DAG.getNode(ISD::SELECT, IntVT,
+ lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+ SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
+ rhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs);
+ SDValue rhsSelect =
+ DAG.getNode(ISD::SELECT, IntVT,
+ rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+ unsigned compareOp;
- return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
- }
+ switch (CC->get()) {
+ case ISD::SETOEQ:
case ISD::SETUEQ:
+ compareOp = ISD::SETEQ; break;
+ case ISD::SETOGT:
case ISD::SETUGT:
+ compareOp = ISD::SETGT; break;
+ case ISD::SETOGE:
case ISD::SETUGE:
+ compareOp = ISD::SETGE; break;
+ case ISD::SETOLT:
case ISD::SETULT:
+ compareOp = ISD::SETLT; break;
+ case ISD::SETOLE:
case ISD::SETULE:
+ compareOp = ISD::SETLE; break;
case ISD::SETUNE:
+ case ISD::SETONE:
+ compareOp = ISD::SETNE; break;
default:
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
abort();
break;
}
- return SDValue();
+ SDValue result =
+ DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp);
+
+ if ((CC->get() & 0x8) == 0) {
+ // Ordered comparison:
+ SDValue lhsNaN = DAG.getSetCC(ccResultVT,
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue rhsNaN = DAG.getSetCC(ccResultVT,
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN);
+
+ result = DAG.getNode(ISD::AND, ccResultVT, ordered, result);
+ }
+
+ return result;
}
//! Lower ISD::SELECT_CC
@@ -2566,8 +2675,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
case ISD::JumpTable:
return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::Constant:
- return LowerConstant(Op, DAG);
case ISD::ConstantFP:
return LowerConstantFP(Op, DAG);
case ISD::FORMAL_ARGUMENTS:
@@ -2590,12 +2697,17 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
break;
}
- case ISD::FABS:
- return LowerFABS(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG, *this);
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG, *this);
// Vector-related lowering.
case ISD::BUILD_VECTOR:
- return SPU::LowerBUILD_VECTOR(Op, DAG);
+ return LowerBUILD_VECTOR(Op, DAG);
case ISD::SCALAR_TO_VECTOR:
return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 079f3ba..24c2803 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -61,7 +61,7 @@ namespace llvm {
};
}
- //! Utility functions specific to CellSPU-only:
+ //! Utility functions specific to CellSPU:
namespace SPU {
SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
MVT ValueType);
@@ -78,7 +78,7 @@ namespace llvm {
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
const SPUTargetMachine &TM);
- SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat);
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
SDValue getCarryGenerateShufMask(SelectionDAG &DAG);
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 91d52fa..f35a42d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -155,13 +155,13 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORr8_r32:
case SPU::ORr32_r16:
case SPU::ORr32_r8:
- case SPU::ORr32_r64:
case SPU::ORr16_r64:
case SPU::ORr8_r64:
- case SPU::ORr64_r32:
case SPU::ORr64_r16:
case SPU::ORr64_r8:
*/
+ case SPU::ORr64_r32:
+ case SPU::ORr32_r64:
case SPU::ORf32_r32:
case SPU::ORr32_f32:
case SPU::ORf64_r64:
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 2834a1e..8db2fa7 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1259,6 +1259,9 @@ multiclass BitwiseAnd
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
[/* Intentionally does not match a pattern */]>;
+ def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
// Could use v4i32, but won't for clarity
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* Intentionally does not match a pattern */]>;
@@ -1525,17 +1528,17 @@ multiclass BitwiseOr
// Conversion from R32C to register
def r32_r16: ORCvtFormR32Reg<R16C>;
def r32_r8: ORCvtFormR32Reg<R8C>;
+*/
- // Conversion from register to R64C:
+ // Conversion to register from R64C:
def r32_r64: ORCvtFormR64Reg<R32C>;
- def r16_r64: ORCvtFormR64Reg<R16C>;
- def r8_r64: ORCvtFormR64Reg<R8C>;
+ // def r16_r64: ORCvtFormR64Reg<R16C>;
+ // def r8_r64: ORCvtFormR64Reg<R8C>;
- // Conversion from R64C to register
+ // Conversion to R64C from register
def r64_r32: ORCvtFormRegR64<R32C>;
- def r64_r16: ORCvtFormRegR64<R16C>;
- def r64_r8: ORCvtFormRegR64<R8C>;
-*/
+ // def r64_r16: ORCvtFormRegR64<R16C>;
+ // def r64_r8: ORCvtFormRegR64<R8C>;
// bitconvert patterns:
def r32_f32: ORCvtFormR32Reg<R32FP,
@@ -1910,11 +1913,11 @@ class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
IntegerOp, pattern>;
-class SELBVecInst<ValueType vectype>:
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
[(set (vectype VECREG:$rT),
(or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
- (and (vnot (vectype VECREG:$rC)),
+ (and (vnot_frag (vectype VECREG:$rC)),
(vectype VECREG:$rA))))]>;
class SELBVecVCondInst<ValueType vectype>:
@@ -1947,7 +1950,7 @@ multiclass SelectBits
def v16i8: SELBVecInst<v16i8>;
def v8i16: SELBVecInst<v8i16>;
def v4i32: SELBVecInst<v4i32>;
- def v2i64: SELBVecInst<v2i64>;
+ def v2i64: SELBVecInst<v2i64, vnot_conv>;
def r128: SELBRegInst<GPRC>;
def r64: SELBRegInst<R64C>;
@@ -4321,6 +4324,13 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
(ANDfabsvec (v4f32 VECREG:$rA),
(v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
+def : Pat<(fabs R64FP:$rA),
+ (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
+
+def : Pat<(fabs (v2f64 VECREG:$rA)),
+ (ANDfabsvec (v2f64 VECREG:$rA),
+ (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
+
//===----------------------------------------------------------------------===//
// Hint for branch instructions:
//===----------------------------------------------------------------------===//