aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristopher Lamb <christopher.lamb@gmail.com>2008-03-19 08:30:06 +0000
committerChristopher Lamb <christopher.lamb@gmail.com>2008-03-19 08:30:06 +0000
commit15cbde3cf6542ec9c120f59d5d8f3586f5f332c6 (patch)
tree323a51a31cb8a7c4e37df464dd13c8708c085f7b
parente6d5d39c072d3b16ff4183c515d7ccf198192958 (diff)
downloadexternal_llvm-15cbde3cf6542ec9c120f59d5d8f3586f5f332c6.zip
external_llvm-15cbde3cf6542ec9c120f59d5d8f3586f5f332c6.tar.gz
external_llvm-15cbde3cf6542ec9c120f59d5d8f3586f5f332c6.tar.bz2
Fix X86's isTruncateFree to not claim that truncate to i1 is free. This fixes Bill's testcase that failed for r48491.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48542 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp25
-rw-r--r--lib/Target/X86/README.txt44
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--test/CodeGen/X86/field-extract-use-trunc.ll1
4 files changed, 27 insertions, 47 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index acc00fd..f33946c 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2383,6 +2383,31 @@ SDOperand DAGCombiner::visitSRA(SDNode *N) {
DAG.getConstant(Sum, N1C->getValueType(0)));
}
}
+
+ // fold sra (shl X, m), result_size - n
+ // -> (sign_extend (trunc (shl X, result_size - n - m))) for
+ // result_size - n != m. If truncate is free for the target sext(shl) is
+ // likely to result in better code.
+ if (N0.getOpcode() == ISD::SHL) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (N01C && N1C) {
+ // Determine if the truncate type's bitsize would correspond to
+ // an integer type for this target.
+ unsigned VTValSize = MVT::getSizeInBits(VT);
+ MVT::ValueType TruncVT = MVT::getIntegerType(VTValSize - N1C->getValue());
+ unsigned ShiftAmt = N1C->getValue() - N01C->getValue();
+
+ // If the shift wouldn't be a noop, the truncated type is an actual type,
+ // and the truncate is free, then proceed with the transform.
+ if (ShiftAmt != 0 && TLI.isTruncateFree(VT, TruncVT)) {
+ SDOperand Amt = DAG.getConstant(ShiftAmt, TLI.getShiftAmountTy());
+ SDOperand Shift = DAG.getNode(ISD::SRL, VT, N0.getOperand(0), Amt);
+ SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, TruncVT, Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, N->getValueType(0), Trunc);
+ }
+ }
+ }
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 1588b49..ceda932 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1647,47 +1647,3 @@ The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2
if it commuted the addl in LBB1_1.
//===---------------------------------------------------------------------===//
-
-These two functions perform identical operations:
-
-define i32 @test(i32 %f12) {
- %tmp7.25 = lshr i32 %f12, 16
- %tmp7.26 = trunc i32 %tmp7.25 to i8
- %tmp78.2 = sext i8 %tmp7.26 to i32
- ret i32 %tmp78.2
-}
-
-define i32 @test2(i32 %f12) {
- %f11 = shl i32 %f12, 8
- %tmp7.25 = ashr i32 %f11, 24
- ret i32 %tmp7.25
-}
-
-but the first compiles into significantly better code on x86-32:
-
-_test:
- movsbl 6(%esp), %eax
- ret
-_test2:
- movl 4(%esp), %eax
- shll $8, %eax
- sarl $24, %eax
- ret
-
-and on x86-64:
-
-_test:
- shrl $16, %edi
- movsbl %dil, %eax
- ret
-_test2:
- shll $8, %edi
- movl %edi, %eax
- sarl $24, %eax
- ret
-
-I would like instcombine to canonicalize the first into the second (since it is
-shorter and doesn't involve type width changes) but the x86 backend needs to do
-the right thing with the later sequence first.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5a05aba..93fb802 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5662,7 +5662,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 <= NumBits2)
+ if (NumBits1 <= NumBits2 || NumBits2 < 8)
return false;
return Subtarget->is64Bit() || NumBits1 < 64;
}
@@ -5673,7 +5673,7 @@ bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
return false;
unsigned NumBits1 = MVT::getSizeInBits(VT1);
unsigned NumBits2 = MVT::getSizeInBits(VT2);
- if (NumBits1 <= NumBits2)
+ if (NumBits1 <= NumBits2 || NumBits2 < 8)
return false;
return Subtarget->is64Bit() || NumBits1 < 64;
}
diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll
index bb46bc5..c4f9587 100644
--- a/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -1,6 +1,5 @@
; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
-; XFAIL: *
define i32 @test(i32 %f12) {
%tmp7.25 = lshr i32 %f12, 16