diff options
| author | Dan Gohman <gohman@apple.com> | 2010-09-02 21:18:42 +0000 |
|---|---|---|
| committer | Dan Gohman <gohman@apple.com> | 2010-09-02 21:18:42 +0000 |
| commit | 6f8a8cd3933c07a1431b798599a1e6720bfcda28 (patch) | |
| tree | 33efa1083f81d0600127fc08d38b6aac0ff7d819 /lib | |
| parent | b8151e3f0ea922dd36032c06f7cb80826e264b82 (diff) | |
| download | external_llvm-6f8a8cd3933c07a1431b798599a1e6720bfcda28.zip external_llvm-6f8a8cd3933c07a1431b798599a1e6720bfcda28.tar.gz external_llvm-6f8a8cd3933c07a1431b798599a1e6720bfcda28.tar.bz2 | |
Don't narrow the load and store in a load+twiddle+store sequence unless
there are clearly no stores between the load and the store. This fixes
this miscompile reported as PR7833.
This breaks the test/CodeGen/X86/narrow_op-2.ll optimization, which is
safe, but awkward to prove safe. Move it to X86's README.txt.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112861 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 | ||||
| -rw-r--r-- | lib/Target/X86/README.txt | 45 |
2 files changed, 47 insertions, 1 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7ffbf8d..c9c4d91 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5798,7 +5798,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); SDValue N0 = Value.getOperand(0); - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + Chain == SDValue(N0.getNode(), 1)) { LoadSDNode *LD = cast<LoadSDNode>(N0); if (LD->getBasePtr() != Ptr) return SDValue(); diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 69c6d33..a305ae6 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1915,3 +1915,48 @@ And the following x86 code: It should be possible to eliminate the sign extensions. //===---------------------------------------------------------------------===// + +LLVM misses a load+store narrowing opportunity in this code: + +%struct.bf = type { i64, i16, i16, i32 } + +@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2] + +define void @t1() nounwind ssp { +entry: + %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1] + %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2] + %3 = load i32* %2, align 1 ; <i32> [#uses=1] + %4 = and i32 %3, -65537 ; <i32> [#uses=1] + store i32 %4, i32* %2, align 1 + %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1] + %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2] + %8 = load i32* %7, align 1 ; <i32> [#uses=1] + %9 = and i32 %8, -131073 ; <i32> [#uses=1] + store i32 %9, i32* %7, align 1 + ret void +} + +LLVM currently emits this: + + movq bfi(%rip), %rax + andl $-65537, 8(%rax) + movq bfi(%rip), %rax + andl $-131073, 8(%rax) + ret + +It could narrow the loads and stores to emit this: + + movq bfi(%rip), %rax + andb $-2, 10(%rax) + movq bfi(%rip), %rax + andb $-3, 10(%rax) + ret + +The trouble is that there is a TokenFactor between the store and the +load, making it non-trivial to determine if there's anything between +the load and the store which would prohibit narrowing. + +//===---------------------------------------------------------------------===// |
