Implement support for using modeling implicit-zero-extension on x86-64

with SUBREG_TO_REG, teach SimpleRegisterCoalescing to coalesce SUBREG_TO_REG instructions (which are similar to INSERT_SUBREG instructions), and teach the DAGCombiner to take advantage of this on targets which support it. This eliminates many redundant zero-extension operations on x86-64. This adds a new TargetLowering hook, isZExtFree. It's similar to isTruncateFree, except it only applies to actual definitions, and not no-op truncates which may not zero the high bits. Also, this adds a new optimization to SimplifyDemandedBits: transform operations like x+y into (zext (add (trunc x), (trunc y))) on targets where all the casts are no-ops. In contexts where the high part of the add is explicitly masked off, this allows the mask operation to be eliminated. Fix the DAGCombiner to avoid undoing these transformations to eliminate casts on targets where the casts are no-ops. Also, this adds a new two-address lowering heuristic. Since two-address lowering runs before coalescing, it helps to be able to look through copies when deciding whether commuting and/or three-address conversion are profitable. Also, fix a bug in LiveInterval::MergeInClobberRanges. It didn't handle the case that a clobber range extended both before and beyond an existing live range. In that case, multiple live ranges need to be added. This was exposed by the new subreg coalescing code. Remove 2008-05-06-SpillerBug.ll. It was bugpoint-reduced, and the spiller behavior it was looking for no longer occurrs with the new instruction selection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68576 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dan Gohman <gohman@apple.com> 2009-04-08 00:15:30 +0000
committer: Dan Gohman <gohman@apple.com> 2009-04-08 00:15:30 +0000
commit: 97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f (patch)
tree: b530d1fd94181f009f5d7ff1d760c88336a67def /lib/CodeGen/SelectionDAG
parent: a49a671efe24aa6e2e9e2280cf714ef97a40f177 (diff)
download: external_llvm-97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f.zip
external_llvm-97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f.tar.gz
external_llvm-97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f.tar.bz2
2 files changed, 87 insertions, 9 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 78d5d40..e874f1b 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1666,9 +1666,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
   if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
-       N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::TRUNCATE) &&
+       N0.getOpcode() == ISD::SIGN_EXTEND ||
+       (N0.getOpcode() == ISD::TRUNCATE &&
+        !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
     SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                  N0.getOperand(0).getValueType(),
@@ -3121,10 +3123,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
   }
 
-  // fold (zext (and (trunc x), cst)) -> (and x, cst).
+  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+  // if either of the casts is not free.
   if (N0.getOpcode() == ISD::AND &&
       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
     SDValue X = N0.getOperand(0).getOperand(0);
     if (X.getValueType().bitsLT(VT)) {
       X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
@@ -3252,10 +3258,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
     return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
   }
 
-  // fold (aext (and (trunc x), cst)) -> (and x, cst).
+  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+  // if the trunc is not free.
   if (N0.getOpcode() == ISD::AND &&
       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                          N0.getValueType())) {
     SDValue X = N0.getOperand(0).getOperand(0);
     if (X.getValueType().bitsLT(VT)) {
       X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 79a48a6..7b83a12 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -777,6 +777,48 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
   return false;
 }
 
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+                                                    unsigned BitWidth,
+                                                    const APInt &Demanded,
+                                                    DebugLoc dl) {
+  assert(Op.getNumOperands() == 2 &&
+         "ShrinkDemandedOp only supports binary operators!");
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "ShrinkDemandedOp only supports nodes with one result!");
+
+  // Don't do this if the node has another user, which may require the
+  // full value.
+  if (!Op.getNode()->hasOneUse())
+    return false;
+
+  // Search for the smallest integer type with free casts to and from
+  // Op's type. For expedience, just check power-of-2 integer types.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+  if (!isPowerOf2_32(SmallVTBits))
+    SmallVTBits = NextPowerOf2(SmallVTBits);
+  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+    MVT SmallVT = MVT::getIntegerVT(SmallVTBits);
+    if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+        TLI.isZExtFree(SmallVT, Op.getValueType())) {
+      // We found a type with free casts.
+      SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(0)),
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(1)));
+      SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+      return CombineTo(Op, Z);
+    }
+  }
+  return false;
+}
+
 /// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
 /// DemandedMask bits of the result of Op are ever used downstream.  If we can
 /// use this information to simplify Op, create a new simplified DAG node and
@@ -865,7 +907,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If the RHS is a constant, see if we can simplify it.
     if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
       return true;
-      
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne &= KnownOne2;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
@@ -896,7 +941,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If the RHS is a constant, see if we can simplify it.
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
-          
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero &= KnownZero2;
     // Output known-1 are known to be set if set in either the LHS | RHS.
@@ -918,7 +966,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       return TLO.CombineTo(Op, Op.getOperand(0));
     if ((KnownZero2 & NewMask) == NewMask)
       return TLO.CombineTo(Op, Op.getOperand(1));
-      
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
     // If all of the unknown bits are known to be zero on one side or the other
     // (but not both) turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
@@ -1333,6 +1384,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     }
 #endif
     break;
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::SUB: {
+    // Add, Sub, and Mul don't demand any bits in positions beyond that
+    // of the highest bit demanded of them.
+    APInt LoMask = APInt::getLowBitsSet(BitWidth,
+                                        BitWidth - NewMask.countLeadingZeros());
+    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    // See if the operation should be performed at a smaller bit width.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+  }
+  // FALL THROUGH
   default:
     // Just use ComputeMaskedBits to compute output bits.
     TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
author	Dan Gohman <gohman@apple.com>	2009-04-08 00:15:30 +0000
committer	Dan Gohman <gohman@apple.com>	2009-04-08 00:15:30 +0000
commit	97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f (patch)
tree	b530d1fd94181f009f5d7ff1d760c88336a67def /lib/CodeGen/SelectionDAG
parent	a49a671efe24aa6e2e9e2280cf714ef97a40f177 (diff)
download	external_llvm-97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f.zip external_llvm-97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f.tar.gz external_llvm-97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f.tar.bz2