aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2007-12-28 21:50:40 +0000
committerChris Lattner <sabre@nondot.org>2007-12-28 21:50:40 +0000
commit9bfcc624bad51270cc033f09a0a23bfc9cabe094 (patch)
treec72b6c2703ae641126941a4e9fb5fc663d0e8dc4
parent2910f68a5240141933483b3a5db0a703648b3d24 (diff)
downloadexternal_llvm-9bfcc624bad51270cc033f09a0a23bfc9cabe094.zip
external_llvm-9bfcc624bad51270cc033f09a0a23bfc9cabe094.tar.gz
external_llvm-9bfcc624bad51270cc033f09a0a23bfc9cabe094.tar.bz2
add a note.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45387 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/README.txt32
1 files changed, 32 insertions, 0 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index ad15b14..3bb92d1 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1585,3 +1585,35 @@ movl $0, 124(%esp)
if the flags of the xor are dead.
//===---------------------------------------------------------------------===//
+
+This testcase misses a read/modify/write opportunity (from PR1425):
+
+void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
+ int i;
+ for(i=0; i<width; i++)
+ b1[i] += (1*(b0[i] + b2[i])+0)>>0;
+}
+
+We compile it down to:
+
+LBB1_2: # bb
+ movl (%esi,%edi,4), %ebx
+ addl (%ecx,%edi,4), %ebx
+ addl (%edx,%edi,4), %ebx
+ movl %ebx, (%ecx,%edi,4)
+ incl %edi
+ cmpl %eax, %edi
+ jne LBB1_2 # bb
+
+the inner loop should add to the memory location (%ecx,%edi,4), saving
+a mov. Something like:
+
+ movl (%esi,%edi,4), %ebx
+ addl (%edx,%edi,4), %ebx
+ addl %ebx, (%ecx,%edi,4)
+
+Additionally, LSR should rewrite the exit condition of the loop to use
+a stride-4 IV, would would allow all the scales in the loop to go away.
+This would result in smaller code and more efficient microops.
+
+//===---------------------------------------------------------------------===//