aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2010-06-23 17:09:26 +0000
committerDaniel Dunbar <daniel@zuster.org>2010-06-23 17:09:26 +0000
commitcbe762b5d165c565feb98b745e93b71d208a1e36 (patch)
tree0f6f0b75c5f50a24a09c41e4224b54fc1c4c0123
parentf1ab49e83abccd9f7b885e74172419249bd5fd8a (diff)
downloadexternal_llvm-cbe762b5d165c565feb98b745e93b71d208a1e36.zip
external_llvm-cbe762b5d165c565feb98b745e93b71d208a1e36.tar.gz
external_llvm-cbe762b5d165c565feb98b745e93b71d208a1e36.tar.bz2
Revert r106263, "Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass,"... it was causing both 'file' (with clang) and 176.gcc (with llvm-gcc) to be miscompiled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106634 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/SelectionDAGISel.h1
-rw-r--r--include/llvm/Target/TargetLowering.h6
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp45
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp100
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--test/CodeGen/X86/shift-folding.ll5
-rw-r--r--test/CodeGen/X86/store-narrow.ll2
9 files changed, 119 insertions, 62 deletions
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index f22ac90..3817580 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -292,6 +292,7 @@ private:
MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB);
void LowerArguments(const BasicBlock *BB);
+ void ShrinkDemandedOps();
void ComputeLiveOutVRegInfo();
/// Create the scheduler. If a specific scheduler was specified
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 7ba67e4..b44fa71 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -773,12 +773,14 @@ public:
SelectionDAG &DAG;
bool LegalTys;
bool LegalOps;
+ bool ShrinkOps;
SDValue Old;
SDValue New;
explicit TargetLoweringOpt(SelectionDAG &InDAG,
- bool LT, bool LO) :
- DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
+ bool LT, bool LO,
+ bool Shrink = false) :
+ DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {}
bool LegalTypes() const { return LegalTys; }
bool LegalOperations() const { return LegalOps; }
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a72415f..81a382b 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2030,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
- // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
//
// do not sink logical op inside of a vector extend, since it may combine
// into a vsetcc.
@@ -2040,10 +2040,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
- (N0.getOpcode() == ISD::TRUNCATE &&
- (!TLI.isZExtFree(VT, Op0VT) ||
- !TLI.isTruncateFree(Op0VT, VT)) &&
- TLI.isTypeLegal(Op0VT))) &&
+ (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
!VT.isVector() &&
Op0VT == N1.getOperand(0).getValueType() &&
(!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
@@ -2430,11 +2427,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
return SDValue(Rot, 0);
- // Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
return SDValue();
}
@@ -3168,11 +3160,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSRL;
}
- // Attempt to convert a srl of a load into a narrower zero-extending load.
- SDValue NarrowLoad = ReduceLoadWidth(N);
- if (NarrowLoad.getNode())
- return NarrowLoad;
-
// Here is a common situation. We want to optimize:
//
// %a = ...
@@ -3650,7 +3637,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -4036,7 +4026,6 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
/// extended, also fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
-
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4053,15 +4042,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
return SDValue();
- } else if (Opc == ISD::SRL) {
- // Annother special-case: SRL is basically zero-extending a narrower
- // value.
- ExtType = ISD::ZEXTLOAD;
- N0 = SDValue(N, 0);
- ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!N01) return SDValue();
- ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() - N01->getZExtValue());
}
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -4265,17 +4245,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
- if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
- SDValue Reduced = ReduceLoadWidth(N);
- if (Reduced.getNode())
- return Reduced;
- }
-
- // Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
+ return ReduceLoadWidth(N);
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4cbfaed..8197076 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2470,18 +2470,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
-
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
-
- // (ext (trunx x)) -> x
- if (OpOpcode == ISD::TRUNCATE) {
- SDValue OpOp = Operand.getNode()->getOperand(0);
- if (OpOp.getValueType() == VT)
- return OpOp;
- }
break;
case ISD::TRUNCATE:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3b0ef18..15c1ae5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -372,6 +372,102 @@ public:
};
}
+/// TrivialTruncElim - Eliminate some trivial nops that can result from
+/// ShrinkDemandedOps: (trunc (ext n)) -> n.
+static bool TrivialTruncElim(SDValue Op,
+ TargetLowering::TargetLoweringOpt &TLO) {
+ SDValue N0 = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) &&
+ N0.getOperand(0).getValueType() == VT) {
+ return TLO.CombineTo(Op, N0.getOperand(0));
+ }
+ return false;
+}
+
+/// ShrinkDemandedOps - A late transformation pass that shrink expressions
+/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
+/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+void SelectionDAGISel::ShrinkDemandedOps() {
+ SmallVector<SDNode*, 128> Worklist;
+ SmallPtrSet<SDNode*, 128> InWorklist;
+
+ // Add all the dag nodes to the worklist.
+ Worklist.reserve(CurDAG->allnodes_size());
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+ Worklist.push_back(I);
+ InWorklist.insert(I);
+ }
+
+ TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
+ while (!Worklist.empty()) {
+ SDNode *N = Worklist.pop_back_val();
+ InWorklist.erase(N);
+
+ if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+ // Deleting this node may make its operands dead, add them to the worklist
+ // if they aren't already there.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (InWorklist.insert(N->getOperand(i).getNode()))
+ Worklist.push_back(N->getOperand(i).getNode());
+
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+
+ // Run ShrinkDemandedOp on scalar binary operations.
+ if (N->getNumValues() != 1 ||
+ !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
+ continue;
+
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+ KnownZero, KnownOne, TLO) &&
+ (N->getOpcode() != ISD::TRUNCATE ||
+ !TrivialTruncElim(SDValue(N, 0), TLO)))
+ continue;
+
+ // Revisit the node.
+ assert(!InWorklist.count(N) && "Already in worklist");
+ Worklist.push_back(N);
+ InWorklist.insert(N);
+
+ // Replace the old value with the new one.
+ DEBUG(errs() << "\nShrinkDemandedOps replacing ";
+ TLO.Old.getNode()->dump(CurDAG);
+ errs() << "\nWith: ";
+ TLO.New.getNode()->dump(CurDAG);
+ errs() << '\n');
+
+ if (InWorklist.insert(TLO.New.getNode()))
+ Worklist.push_back(TLO.New.getNode());
+
+ SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
+ CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+ if (!TLO.Old.getNode()->use_empty()) continue;
+
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+ i != e; ++i) {
+ SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
+ if (OpNode->hasOneUse()) {
+ // Add OpNode to the end of the list to revisit.
+ DeadNodes.RemoveFromWorklist(OpNode);
+ Worklist.push_back(OpNode);
+ InWorklist.insert(OpNode);
+ }
+ }
+
+ DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
+ CurDAG->DeleteNode(TLO.Old.getNode());
+ }
+}
+
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
@@ -521,8 +617,10 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump());
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOpt::None) {
+ ShrinkDemandedOps();
ComputeLiveOutVRegInfo();
+ }
if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bfd689b..06f29b8 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1075,7 +1075,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1109,7 +1109,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1134,7 +1134,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownZero2 & NewMask) == NewMask)
return TLO.CombineTo(Op, Op.getOperand(1));
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// If all of the unknown bits are known to be zero on one side or the other
@@ -1581,7 +1581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne2, TLO, Depth+1))
return true;
// See if the operation should be performed at a smaller bit width.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
}
// FALL THROUGH
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index dd1c4a1..bd95c85 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9627,10 +9627,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
- SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
- if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
- ShAmt1Op1 = ShAmt1Op1.getOperand(0);
- if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
+ if (SumC->getSExtValue() == Bits &&
+ ShAmt1.getOperand(1) == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index 48ca36c..872817f 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -21,8 +21,3 @@ define i32* @test3(i32* %P, i32 %X) {
ret i32* %P2
}
-define fastcc i32 @test4(i32* %d) nounwind {
- %tmp4 = load i32* %d
- %tmp512 = lshr i32 %tmp4, 24
- ret i32 %tmp512
-}
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 5682e7c..b1100fa 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -67,7 +67,7 @@ entry:
; X64: movw %si, 2(%rdi)
; X32: test4:
-; X32: movl 8(%esp), %eax
+; X32: movzwl 8(%esp), %eax
; X32: movw %ax, 2(%{{.*}})
}