diff options
author | Christopher Lamb <christopher.lamb@gmail.com> | 2008-03-19 08:30:06 +0000 |
---|---|---|
committer | Christopher Lamb <christopher.lamb@gmail.com> | 2008-03-19 08:30:06 +0000 |
commit | 15cbde3cf6542ec9c120f59d5d8f3586f5f332c6 (patch) | |
tree | 323a51a31cb8a7c4e37df464dd13c8708c085f7b | |
parent | e6d5d39c072d3b16ff4183c515d7ccf198192958 (diff) | |
download | external_llvm-15cbde3cf6542ec9c120f59d5d8f3586f5f332c6.zip external_llvm-15cbde3cf6542ec9c120f59d5d8f3586f5f332c6.tar.gz external_llvm-15cbde3cf6542ec9c120f59d5d8f3586f5f332c6.tar.bz2 |
Fix X86's isTruncateFree to not claim that truncate to i1 is free. This fixes Bill's testcase that failed for r48491.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48542 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 | ||||
-rw-r--r-- | lib/Target/X86/README.txt | 44 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/field-extract-use-trunc.ll | 1 |
4 files changed, 27 insertions, 47 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index acc00fd..f33946c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2383,6 +2383,31 @@ SDOperand DAGCombiner::visitSRA(SDNode *N) { DAG.getConstant(Sum, N1C->getValueType(0))); } } + + // fold sra (shl X, m), result_size - n + // -> (sign_extend (trunc (shl X, result_size - n - m))) for + // result_size - n != m. If truncate is free for the target sext(shl) is + // likely to result in better code. + if (N0.getOpcode() == ISD::SHL) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (N01C && N1C) { + // Determine if the truncate type's bitsize would correspond to + // an integer type for this target. + unsigned VTValSize = MVT::getSizeInBits(VT); + MVT::ValueType TruncVT = MVT::getIntegerType(VTValSize - N1C->getValue()); + unsigned ShiftAmt = N1C->getValue() - N01C->getValue(); + + // If the shift wouldn't be a noop, the truncated type is an actual type, + // and the truncate is free, then proceed with the transform. + if (ShiftAmt != 0 && TLI.isTruncateFree(VT, TruncVT)) { + SDOperand Amt = DAG.getConstant(ShiftAmt, TLI.getShiftAmountTy()); + SDOperand Shift = DAG.getNode(ISD::SRL, VT, N0.getOperand(0), Amt); + SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, TruncVT, Shift); + return DAG.getNode(ISD::SIGN_EXTEND, N->getValueType(0), Trunc); + } + } + } // Simplify, based on bits shifted out of the LHS. if (N1C && SimplifyDemandedBits(SDOperand(N, 0))) diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 1588b49..ceda932 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1647,47 +1647,3 @@ The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2 if it commuted the addl in LBB1_1. //===---------------------------------------------------------------------===// - -These two functions perform identical operations: - -define i32 @test(i32 %f12) { - %tmp7.25 = lshr i32 %f12, 16 - %tmp7.26 = trunc i32 %tmp7.25 to i8 - %tmp78.2 = sext i8 %tmp7.26 to i32 - ret i32 %tmp78.2 -} - -define i32 @test2(i32 %f12) { - %f11 = shl i32 %f12, 8 - %tmp7.25 = ashr i32 %f11, 24 - ret i32 %tmp7.25 -} - -but the first compiles into significantly better code on x86-32: - -_test: - movsbl 6(%esp), %eax - ret -_test2: - movl 4(%esp), %eax - shll $8, %eax - sarl $24, %eax - ret - -and on x86-64: - -_test: - shrl $16, %edi - movsbl %dil, %eax - ret -_test2: - shll $8, %edi - movl %edi, %eax - sarl $24, %eax - ret - -I would like instcombine to canonicalize the first into the second (since it is -shorter and doesn't involve type width changes) but the x86 backend needs to do -the right thing with the later sequence first. - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5a05aba..93fb802 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5662,7 +5662,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) + if (NumBits1 <= NumBits2 || NumBits2 < 8) return false; return Subtarget->is64Bit() || NumBits1 < 64; } @@ -5673,7 +5673,7 @@ bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, return false; unsigned NumBits1 = MVT::getSizeInBits(VT1); unsigned NumBits2 = MVT::getSizeInBits(VT2); - if (NumBits1 <= NumBits2) + if (NumBits1 <= NumBits2 || NumBits2 < 8) return false; return Subtarget->is64Bit() || NumBits1 < 64; } diff --git a/test/CodeGen/X86/field-extract-use-trunc.ll b/test/CodeGen/X86/field-extract-use-trunc.ll index bb46bc5..c4f9587 100644 --- a/test/CodeGen/X86/field-extract-use-trunc.ll +++ b/test/CodeGen/X86/field-extract-use-trunc.ll @@ -1,6 +1,5 @@ ; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1 ; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar -; XFAIL: * define i32 @test(i32 %f12) { %tmp7.25 = lshr i32 %f12, 16 |