aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2006-01-16 17:53:00 +0000
committerChris Lattner <sabre@nondot.org>2006-01-16 17:53:00 +0000
commit1db4b4f5c4229d69ca7a3125d59cb20676795858 (patch)
treedbab33c1cb604cb5d23a73129f7604ee3a16d2a7 /lib
parent84b26b600d34e7317997c695afcb4f5873a05c5f (diff)
downloadexternal_llvm-1db4b4f5c4229d69ca7a3125d59cb20676795858.zip
external_llvm-1db4b4f5c4229d69ca7a3125d59cb20676795858.tar.gz
external_llvm-1db4b4f5c4229d69ca7a3125d59cb20676795858.tar.bz2
transfer some notes from my email to somewhere useful.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25361 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/PowerPC/README.txt26
-rw-r--r--lib/Target/X86/README.txt45
2 files changed, 71 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 84536ef..ffae611 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -225,3 +225,29 @@ struct foo { double X, Y; };
void xxx(struct foo F);
void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
+===-------------------------------------------------------------------------===
+
+For this:
+
+int h(int i, int j, int k) {
+ return (i==0||j==0||k == 0);
+}
+
+We currently emit this:
+
+_h:
+ cntlzw r2, r3
+ cntlzw r3, r4
+ cntlzw r4, r5
+ srwi r2, r2, 5
+ srwi r3, r3, 5
+ srwi r4, r4, 5
+ or r2, r3, r2
+ or r3, r2, r4
+ blr
+
+The ctlz/shift instructions are created by the isel, so the dag combiner doesn't
+have a chance to pull the shifts through the or's (eliminating two
+instructions). SETCC nodes should be custom lowered in this case, not expanded
+by the isel.
+
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 4869c5e..cb206f3 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -54,6 +54,10 @@ fxch -> fucomi
fucomi jl X
jg X
+Ideas:
+http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
+
+
//===---------------------------------------------------------------------===//
Improvements to the multiply -> shift/add algorithm:
@@ -121,3 +125,44 @@ Model X86 EFLAGS as a real register to avoid redudant cmp / test. e.g.
setg %al
testb %al, %al # unnecessary
jne .BB7
+
+//===---------------------------------------------------------------------===//
+
+Count leading zeros and count trailing zeros:
+
+int clz(int X) { return __builtin_clz(X); }
+int ctz(int X) { return __builtin_ctz(X); }
+
+$ gcc t.c -S -o - -O3 -fomit-frame-pointer -masm=intel
+clz:
+ bsr %eax, DWORD PTR [%esp+4]
+ xor %eax, 31
+ ret
+ctz:
+ bsf %eax, DWORD PTR [%esp+4]
+ ret
+
+however, check that these are defined for 0 and 32. Our intrinsics are, GCC's
+aren't.
+
+//===---------------------------------------------------------------------===//
+
+Use push/pop instructions in prolog/epilog sequences instead of stores off
+ESP (certain code size win, perf win on some [which?] processors).
+
+//===---------------------------------------------------------------------===//
+
+Only use inc/neg/not instructions on processors where they are faster than
+add/sub/xor. They are slower on the P4 due to only updating some processor
+flags.
+
+//===---------------------------------------------------------------------===//
+
+Open code rint,floor,ceil,trunc:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
+
+//===---------------------------------------------------------------------===//
+
+Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
+