aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2006-04-16 01:01:29 +0000
committerChris Lattner <sabre@nondot.org>2006-04-16 01:01:29 +0000
commitb17f1679e3ce2b40fc2141fc50ef3def4bbd00bc (patch)
treea4840118737e783969ea35cab3460f56aeeea76e /lib/Target
parent7f6cc0ccb515f5d80f922111cd45407b99439a3b (diff)
downloadexternal_llvm-b17f1679e3ce2b40fc2141fc50ef3def4bbd00bc.zip
external_llvm-b17f1679e3ce2b40fc2141fc50ef3def4bbd00bc.tar.gz
external_llvm-b17f1679e3ce2b40fc2141fc50ef3def4bbd00bc.tar.bz2
Make the BUILD_VECTOR lowering code much more aggressive w.r.t constant vectors.
Remove some done items from the todo list. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27729 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp143
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt30
2 files changed, 106 insertions, 67 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 826ff4e..4cd0a47 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -936,8 +936,6 @@ static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) {
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
// zero. Return true if this is not an array of constants, false if it is.
//
-// Note that VectorBits/UndefBits are returned in 'little endian' form, so
-// elements 0,1 go in VectorBits[0] and 2,3 go in VectorBits[1] for a v4i32.
static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
uint64_t UndefBits[2]) {
// Start with zero'd results.
@@ -948,7 +946,7 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
SDOperand OpVal = BV->getOperand(i);
unsigned PartNo = i >= e/2; // In the upper 128 bits?
- unsigned SlotNo = i & (e/2-1); // Which subpiece of the uint64_t it is.
+ unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
uint64_t EltBits = 0;
if (OpVal.getOpcode() == ISD::UNDEF) {
@@ -974,6 +972,59 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
return false;
}
+// If this is a splat (repetition) of a value across the whole vector, return
+// the smallest size that splats it. For example, "0x01010101010101..." is a
+// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
+// SplatSize = 1 byte.
+static bool isConstantSplat(const uint64_t Bits128[2],
+ const uint64_t Undef128[2],
+ unsigned &SplatBits, unsigned &SplatUndef,
+ unsigned &SplatSize) {
+
+ // Don't let undefs prevent splats from matching. See if the top 64-bits are
+ // the same as the lower 64-bits, ignoring undefs.
+ if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
+ return false; // Can't be a splat if two pieces don't match.
+
+ uint64_t Bits64 = Bits128[0] | Bits128[1];
+ uint64_t Undef64 = Undef128[0] & Undef128[1];
+
+ // Check that the top 32-bits are the same as the lower 32-bits, ignoring
+ // undefs.
+ if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
+ return false; // Can't be a splat if two pieces don't match.
+
+ uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
+ uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
+
+ // If the top 16-bits are different than the lower 16-bits, ignoring
+ // undefs, we have an i32 splat.
+ if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
+ SplatBits = Bits32;
+ SplatUndef = Undef32;
+ SplatSize = 4;
+ return true;
+ }
+
+ uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
+ uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
+
+ // If the top 8-bits are different than the lower 8-bits, ignoring
+ // undefs, we have an i16 splat.
+ if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
+ SplatBits = Bits16;
+ SplatUndef = Undef16;
+ SplatSize = 2;
+ return true;
+ }
+
+ // Otherwise, we have an 8-bit splat.
+ SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
+ SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
+ SplatSize = 1;
+ return true;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -989,54 +1040,52 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
return SDOperand(); // Not a constant vector.
- // See if this is all zeros.
- if ((VectorBits[0] | VectorBits[1]) == 0) {
- // Canonicalize all zero vectors to be v4i32.
- if (Op.getValueType() != MVT::v4i32) {
- SDOperand Z = DAG.getConstant(0, MVT::i32);
- Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
- Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
+ // If this is a splat (repetition) of a value across the whole vector, return
+ // the smallest size that splats it. For example, "0x01010101010101..." is a
+ // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
+ // SplatSize = 1 byte.
+ unsigned SplatBits, SplatUndef, SplatSize;
+ if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
+ bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDOperand Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
+ }
+ return Op;
}
- return Op;
- }
-
- // Check to see if this is something we can use VSPLTI* to form.
- MVT::ValueType CanonicalVT = MVT::Other;
- SDNode *CST = 0;
-
- if ((CST = PPC::get_VSPLTI_elt(Op.Val, 4, DAG).Val)) // vspltisw
- CanonicalVT = MVT::v4i32;
- else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 2, DAG).Val)) // vspltish
- CanonicalVT = MVT::v8i16;
- else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 1, DAG).Val)) // vspltisb
- CanonicalVT = MVT::v16i8;
-
- // If this matches one of the vsplti* patterns, force it to the canonical
- // type for the pattern.
- if (CST) {
- if (Op.getValueType() != CanonicalVT) {
- // Convert the splatted element to the right element type.
- SDOperand Elt = DAG.getNode(ISD::TRUNCATE,
- MVT::getVectorBaseType(CanonicalVT),
- SDOperand(CST, 0));
- std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
- SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
- Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
+ if (SextVal >= -16 && SextVal <= 15) {
+ const MVT::ValueType VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+ MVT::ValueType CanonicalVT = VTys[SplatSize-1];
+
+ // If this is a non-canonical splat for this value,
+ if (Op.getValueType() != CanonicalVT || HasAnyUndefs) {
+ SDOperand Elt = DAG.getConstant(SplatBits,
+ MVT::getVectorBaseType(CanonicalVT));
+ std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
+ SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
+ Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
+ }
+ return Op;
}
- return Op;
- }
-
- // If this is some other splat of 4-byte elements, see if we can handle it
- // in another way.
- // FIXME: Make this more undef happy and work with other widths (1,2 bytes).
- if (VectorBits[0] == VectorBits[1] &&
- unsigned(VectorBits[0]) == unsigned(VectorBits[0] >> 32)) {
- unsigned Bits = unsigned(VectorBits[0]);
+
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). These are important
// for fneg/fabs.
- if (Bits == 0x80000000 || Bits == 0x7FFFFFFF) {
+ if (SplatSize == 4 &&
+ SplatBits == 0x80000000 || SplatBits == (0x7FFFFFFF&~SplatUndef)) {
// Make -1 and vspltisw -1:
SDOperand OnesI = DAG.getConstant(~0U, MVT::i32);
SDOperand OnesV = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
@@ -1049,13 +1098,13 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
OnesV, OnesV);
// If this is 0x7FFF_FFFF, xor by OnesV to invert it.
- if (Bits == 0x7FFFFFFF)
+ if (SplatBits == 0x80000000)
Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
}
}
-
+
return SDOperand();
}
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 33e3e2a..3c928ad 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -40,8 +40,16 @@ a load/store/lve*x sequence.
//===----------------------------------------------------------------------===//
There are a wide range of vector constants we can generate with combinations of
-altivec instructions. Examples
- GCC does: "t=vsplti*, r = t+t" for constants it can't generate with one vsplti
+altivec instructions.
+
+Examples, these work with all widths:
+ Splat(+/- 16,18,20,22,24,28,30): t = vspliti I/2, r = t+t
+ Splat(+/- 17,19,21,23,25,29): t = vsplti +/-15, t2 = vsplti I-15, r=t + t2
+ Splat(31): t = vsplti FB, r = srl t,t
+ Splat(256): t = vsplti 1, r = vsldoi t, t, 1
+
+Lots more are listed here:
+http://www.informatik.uni-bremen.de/~hobold/AltiVec.html
This should be added to the ISD::BUILD_VECTOR case in
PPCTargetLowering::LowerOperation.
@@ -52,19 +60,6 @@ FABS/FNEG can be codegen'd with the appropriate and/xor of -0.0.
//===----------------------------------------------------------------------===//
-Codegen the constant here with something better than a constant pool load.
-
-void %test_f(<4 x float>* %P, <4 x float>* %Q, float %X) {
- %tmp = load <4 x float>* %Q
- %tmp = cast <4 x float> %tmp to <4 x int>
- %tmp1 = and <4 x int> %tmp, < int 2147483647, int 2147483647, int 2147483647, int 2147483647 >
- %tmp2 = cast <4 x int> %tmp1 to <4 x float>
- store <4 x float> %tmp2, <4 x float>* %P
- ret void
-}
-
-//===----------------------------------------------------------------------===//
-
For functions that use altivec AND have calls, we are VRSAVE'ing all call
clobbered regs.
@@ -92,11 +87,6 @@ cond code on CR6.
//===----------------------------------------------------------------------===//
-SROA should turn "vector unions" into the appropriate insert/extract element
-instructions.
-
-//===----------------------------------------------------------------------===//
-
We need a way to teach tblgen that some operands of an intrinsic are required to
be constants. The verifier should enforce this constraint.