aboutsummaryrefslogtreecommitdiffstats
path: root/test/Analysis/CostModel/X86/testshiftlshr.ll
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-04-04 23:26:24 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-04-04 23:26:24 +0000
commit2537f3c6597bc1b8eb14c76c8f8e7046be41c9ba (patch)
tree781143f2b27f08fe01dcfe79e732057fc6847445 /test/Analysis/CostModel/X86/testshiftlshr.ll
parent6bf4f676413b8f7d97aaff289997aab344180957 (diff)
downloadexternal_llvm-2537f3c6597bc1b8eb14c76c8f8e7046be41c9ba.zip
external_llvm-2537f3c6597bc1b8eb14c76c8f8e7046be41c9ba.tar.gz
external_llvm-2537f3c6597bc1b8eb14c76c8f8e7046be41c9ba.tar.bz2
X86 cost model: Differentiate cost for vector shifts of constants
SSE2 has efficient support for shifts by a scalar. My previous change of making shifts expensive did not take this into account marking all shifts as expensive. This would prevent vectorization from happening where it is actually beneficial. With this change we differentiate between shifts of constants and other shifts. radar://13576547 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178808 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Analysis/CostModel/X86/testshiftlshr.ll')
-rw-r--r--test/Analysis/CostModel/X86/testshiftlshr.ll287
1 files changed, 287 insertions, 0 deletions
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
index 7d665fc..8d6ef38 100644
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -241,3 +241,290 @@ entry:
ret %shifttype32i8 %0
}
+; Test shift by a constant vector.
+
+%shifttypec = type <2 x i16>
+define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
+entry:
+ ; SSE2: shift2i16const
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift2i16const
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec %a , <i16 3, i16 3>
+ ret %shifttypec %0
+}
+
+%shifttypec4i16 = type <4 x i16>
+define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
+entry:
+ ; SSE2: shift4i16const
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift4i16const
+ ; SSE2-CODEGEN: psrld $3
+
+ %0 = lshr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
+ ret %shifttypec4i16 %0
+}
+
+%shifttypec8i16 = type <8 x i16>
+define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
+entry:
+ ; SSE2: shift8i16const
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift8i16const
+ ; SSE2-CODEGEN: psrlw $3
+
+ %0 = lshr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3>
+ ret %shifttypec8i16 %0
+}
+
+%shifttypec16i16 = type <16 x i16>
+define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
+ %shifttypec16i16 %b) {
+entry:
+ ; SSE2: shift16i16const
+ ; SSE2: cost of 2 {{.*}} lshr
+ ; SSE2-CODEGEN: shift16i16const
+ ; SSE2-CODEGEN: psrlw $3
+
+ %0 = lshr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3>
+ ret %shifttypec16i16 %0
+}
+
+%shifttypec32i16 = type <32 x i16>
+define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
+ %shifttypec32i16 %b) {
+entry:
+ ; SSE2: shift32i16const
+ ; SSE2: cost of 4 {{.*}} lshr
+ ; SSE2-CODEGEN: shift32i16const
+ ; SSE2-CODEGEN: psrlw $3
+
+ %0 = lshr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3,
+ i16 3, i16 3, i16 3, i16 3>
+ ret %shifttypec32i16 %0
+}
+
+%shifttypec2i32 = type <2 x i32>
+define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
+entry:
+ ; SSE2: shift2i32c
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift2i32c
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec2i32 %a , <i32 3, i32 3>
+ ret %shifttypec2i32 %0
+}
+
+%shifttypec4i32 = type <4 x i32>
+define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
+entry:
+ ; SSE2: shift4i32c
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift4i32c
+ ; SSE2-CODEGEN: psrld $3
+
+ %0 = lshr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
+ ret %shifttypec4i32 %0
+}
+
+%shifttypec8i32 = type <8 x i32>
+define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
+entry:
+ ; SSE2: shift8i32c
+ ; SSE2: cost of 2 {{.*}} lshr
+ ; SSE2-CODEGEN: shift8i32c
+ ; SSE2-CODEGEN: psrld $3
+
+ %0 = lshr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3>
+ ret %shifttypec8i32 %0
+}
+
+%shifttypec16i32 = type <16 x i32>
+define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
+entry:
+ ; SSE2: shift16i32c
+ ; SSE2: cost of 4 {{.*}} lshr
+ ; SSE2-CODEGEN: shift16i32c
+ ; SSE2-CODEGEN: psrld $3
+
+ %0 = lshr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3>
+ ret %shifttypec16i32 %0
+}
+
+%shifttypec32i32 = type <32 x i32>
+define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
+entry:
+ ; SSE2: shift32i32c
+ ; getTypeConversion fails here and promotes this to a i64.
+ ; SSE2: cost of 256 {{.*}} lshr
+ ; SSE2-CODEGEN: shift32i32c
+ ; SSE2-CODEGEN: psrld $3
+ %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3,
+ i32 3, i32 3, i32 3, i32 3>
+ ret %shifttypec32i32 %0
+}
+
+%shifttypec2i64 = type <2 x i64>
+define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
+entry:
+ ; SSE2: shift2i64c
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift2i64c
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec2i64 %a , <i64 3, i64 3>
+ ret %shifttypec2i64 %0
+}
+
+%shifttypec4i64 = type <4 x i64>
+define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
+entry:
+ ; SSE2: shift4i64c
+ ; SSE2: cost of 2 {{.*}} lshr
+ ; SSE2-CODEGEN: shift4i64c
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
+ ret %shifttypec4i64 %0
+}
+
+%shifttypec8i64 = type <8 x i64>
+define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
+entry:
+ ; SSE2: shift8i64c
+ ; SSE2: cost of 4 {{.*}} lshr
+ ; SSE2-CODEGEN: shift8i64c
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3>
+ ret %shifttypec8i64 %0
+}
+
+%shifttypec16i64 = type <16 x i64>
+define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
+entry:
+ ; SSE2: shift16i64c
+ ; SSE2: cost of 8 {{.*}} lshr
+ ; SSE2-CODEGEN: shift16i64c
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3>
+ ret %shifttypec16i64 %0
+}
+
+%shifttypec32i64 = type <32 x i64>
+define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
+entry:
+ ; SSE2: shift32i64c
+ ; SSE2: cost of 256 {{.*}} lshr
+ ; SSE2-CODEGEN: shift32i64c
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3,
+ i64 3, i64 3, i64 3, i64 3>
+ ret %shifttypec32i64 %0
+}
+
+%shifttypec2i8 = type <2 x i8>
+define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
+entry:
+ ; SSE2: shift2i8c
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift2i8c
+ ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec2i8 %a , <i8 3, i8 3>
+ ret %shifttypec2i8 %0
+}
+
+%shifttypec4i8 = type <4 x i8>
+define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
+entry:
+ ; SSE2: shift4i8c
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift4i8c
+ ; SSE2-CODEGEN: psrld $3
+
+ %0 = lshr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
+ ret %shifttypec4i8 %0
+}
+
+%shifttypec8i8 = type <8 x i8>
+define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
+entry:
+ ; SSE2: shift8i8c
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift8i8c
+ ; SSE2-CODEGEN: psrlw $3
+
+ %0 = lshr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3>
+ ret %shifttypec8i8 %0
+}
+
+%shifttypec16i8 = type <16 x i8>
+define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
+entry:
+ ; SSE2: shift16i8c
+ ; SSE2: cost of 1 {{.*}} lshr
+ ; SSE2-CODEGEN: shift16i8c
+ ; SSE2-CODEGEN: psrlw $3
+
+ %0 = lshr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3>
+ ret %shifttypec16i8 %0
+}
+
+%shifttypec32i8 = type <32 x i8>
+define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
+entry:
+ ; SSE2: shift32i8c
+ ; SSE2: cost of 2 {{.*}} lshr
+ ; SSE2-CODEGEN: shift32i8c
+ ; SSE2-CODEGEN: psrlw $3
+
+ %0 = lshr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3,
+ i8 3, i8 3, i8 3, i8 3>
+ ret %shifttypec32i8 %0
+}