diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-04-03 21:46:05 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-04-03 21:46:05 +0000 |
commit | 6b6050b229976a2f53184f6d6857e6f445a869d0 (patch) | |
tree | 3f91e43d68b7aedf1ce51e9da9ff457b47f03ecb /test/Analysis/CostModel | |
parent | d787a41b118a3724d1df87dc3d38cc3fddb3a145 (diff) | |
download | external_llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.zip external_llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.gz external_llvm-6b6050b229976a2f53184f6d6857e6f445a869d0.tar.bz2 |
X86 cost model: Vector shifts are expensive in most cases
The default logic does not correctly identify costs of casts because they are
marked as custom on x86.
For some cases, where the shift amount is a scalar we would be able to generate
better code. Unfortunately, when this is the case the value (the splat) will get
hoisted out of the loop, thereby making it invisible to ISel.
radar://13130673
radar://13537826
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178703 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Analysis/CostModel')
-rw-r--r-- | test/Analysis/CostModel/X86/arith.ll | 4 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/testshiftashr.ll | 243 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/testshiftlshr.ll | 243 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/testshiftshl.ll | 242 |
4 files changed, 730 insertions, 2 deletions
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index 5f94441..85b4425 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -94,7 +94,7 @@ define void @shift() { ; AVX2: cost of 1 {{.*}} ashr %C0 = ashr <4 x i32> undef, undef ; AVX: cost of 6 {{.*}} ashr - ; AVX2: cost of 6 {{.*}} ashr + ; AVX2: cost of 20 {{.*}} ashr %C1 = ashr <2 x i64> undef, undef ret void @@ -121,7 +121,7 @@ define void @avx2shift() { ; AVX2: cost of 1 {{.*}} ashr %C0 = ashr <8 x i32> undef, undef ; AVX: cost of 12 {{.*}} ashr - ; AVX2: cost of 12 {{.*}} ashr + ; AVX2: cost of 40 {{.*}} ashr %C1 = ashr <4 x i64> undef, undef ret void diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll new file mode 100644 index 0000000..d932b2a --- /dev/null +++ b/test/Analysis/CostModel/X86/testshiftashr.ll @@ -0,0 +1,243 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s + +%shifttype = type <2 x i16> +define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { +entry: + ; SSE2: shift2i16 + ; SSE2: cost of 20 {{.*}} ashr + ; SSE2-CODEGEN: shift2i16 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype %a , %b + ret %shifttype %0 +} + +%shifttype4i16 = type <4 x i16> +define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { +entry: + ; SSE2: shift4i16 + ; SSE2: cost of 40 {{.*}} ashr + ; SSE2-CODEGEN: shift4i16 + ; SSE2-CODEGEN: sarl %cl + + %0 = ashr %shifttype4i16 %a , %b + ret %shifttype4i16 %0 +} + +%shifttype8i16 = type <8 x i16> +define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) { +entry: + ; SSE2: shift8i16 + ; SSE2: cost of 80 {{.*}} ashr + ; SSE2-CODEGEN: shift8i16 + ; SSE2-CODEGEN: sarw %cl + + %0 = ashr %shifttype8i16 %a , %b + ret %shifttype8i16 %0 +} + +%shifttype16i16 = type <16 x i16> +define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) { +entry: + ; SSE2: shift16i16 + ; SSE2: cost of 160 {{.*}} ashr + ; SSE2-CODEGEN: shift16i16 + ; SSE2-CODEGEN: sarw %cl + + %0 = ashr %shifttype16i16 %a , %b + ret %shifttype16i16 %0 +} + +%shifttype32i16 = type <32 x i16> +define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) { +entry: + ; SSE2: shift32i16 + ; SSE2: cost of 320 {{.*}} ashr + ; SSE2-CODEGEN: shift32i16 + ; SSE2-CODEGEN: sarw %cl + + %0 = ashr %shifttype32i16 %a , %b + ret %shifttype32i16 %0 +} + +%shifttype2i32 = type <2 x i32> +define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { +entry: + ; SSE2: shift2i32 + ; SSE2: cost of 20 {{.*}} ashr + ; SSE2-CODEGEN: shift2i32 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype2i32 %a , %b + ret %shifttype2i32 %0 +} + +%shifttype4i32 = type <4 x i32> +define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { +entry: + ; SSE2: shift4i32 + ; SSE2: cost of 40 {{.*}} ashr + ; SSE2-CODEGEN: shift4i32 + ; SSE2-CODEGEN: sarl %cl + + %0 = ashr %shifttype4i32 %a , %b + ret %shifttype4i32 %0 +} + +%shifttype8i32 = type <8 x i32> +define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { +entry: + ; SSE2: shift8i32 + ; SSE2: cost of 80 {{.*}} ashr + ; SSE2-CODEGEN: shift8i32 + ; SSE2-CODEGEN: sarl %cl + + %0 = ashr %shifttype8i32 %a , %b + ret %shifttype8i32 %0 +} + +%shifttype16i32 = type <16 x i32> +define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { +entry: + ; SSE2: shift16i32 + ; SSE2: cost of 160 {{.*}} ashr + ; SSE2-CODEGEN: shift16i32 + ; SSE2-CODEGEN: sarl %cl + + %0 = ashr %shifttype16i32 %a , %b + ret %shifttype16i32 %0 +} + +%shifttype32i32 = type <32 x i32> +define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { +entry: + ; SSE2: shift32i32 + ; SSE2: cost of 256 {{.*}} ashr + ; SSE2-CODEGEN: shift32i32 + ; SSE2-CODEGEN: sarl %cl + + %0 = ashr %shifttype32i32 %a , %b + ret %shifttype32i32 %0 +} + +%shifttype2i64 = type <2 x i64> +define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { +entry: + ; SSE2: shift2i64 + ; SSE2: cost of 20 {{.*}} ashr + ; SSE2-CODEGEN: shift2i64 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype2i64 %a , %b + ret %shifttype2i64 %0 +} + +%shifttype4i64 = type <4 x i64> +define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { +entry: + ; SSE2: shift4i64 + ; SSE2: cost of 40 {{.*}} ashr + ; SSE2-CODEGEN: shift4i64 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype4i64 %a , %b + ret %shifttype4i64 %0 +} + +%shifttype8i64 = type <8 x i64> +define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { +entry: + ; SSE2: shift8i64 + ; SSE2: cost of 80 {{.*}} ashr + ; SSE2-CODEGEN: shift8i64 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype8i64 %a , %b + ret %shifttype8i64 %0 +} + +%shifttype16i64 = type <16 x i64> +define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { +entry: + ; SSE2: shift16i64 + ; SSE2: cost of 160 {{.*}} ashr + ; SSE2-CODEGEN: shift16i64 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype16i64 %a , %b + ret %shifttype16i64 %0 +} + +%shifttype32i64 = type <32 x i64> +define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { +entry: + ; SSE2: shift32i64 + ; SSE2: cost of 256 {{.*}} ashr + ; SSE2-CODEGEN: shift32i64 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype32i64 %a , %b + ret %shifttype32i64 %0 +} + +%shifttype2i8 = type <2 x i8> +define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { +entry: + ; SSE2: shift2i8 + ; SSE2: cost of 20 {{.*}} ashr + ; SSE2-CODEGEN: shift2i8 + ; SSE2-CODEGEN: sarq %cl + + %0 = ashr %shifttype2i8 %a , %b + ret %shifttype2i8 %0 +} + +%shifttype4i8 = type <4 x i8> +define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { +entry: + ; SSE2: shift4i8 + ; SSE2: cost of 40 {{.*}} ashr + ; SSE2-CODEGEN: shift4i8 + ; SSE2-CODEGEN: sarl %cl + + %0 = ashr %shifttype4i8 %a , %b + ret %shifttype4i8 %0 +} + +%shifttype8i8 = type <8 x i8> +define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { +entry: + ; SSE2: shift8i8 + ; SSE2: cost of 80 {{.*}} ashr + ; SSE2-CODEGEN: shift8i8 + ; SSE2-CODEGEN: sarw %cl + + %0 = ashr %shifttype8i8 %a , %b + ret %shifttype8i8 %0 +} + +%shifttype16i8 = type <16 x i8> +define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) { +entry: + ; SSE2: shift16i8 + ; SSE2: cost of 160 {{.*}} ashr + ; SSE2-CODEGEN: shift16i8 + ; SSE2-CODEGEN: sarb %cl + + %0 = ashr %shifttype16i8 %a , %b + ret %shifttype16i8 %0 +} + +%shifttype32i8 = type <32 x i8> +define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) { +entry: + ; SSE2: shift32i8 + ; SSE2: cost of 320 {{.*}} ashr + ; SSE2-CODEGEN: shift32i8 + ; SSE2-CODEGEN: sarb %cl + + %0 = ashr %shifttype32i8 %a , %b + ret %shifttype32i8 %0 +} + diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll new file mode 100644 index 0000000..7d665fc --- /dev/null +++ b/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -0,0 +1,243 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s + +%shifttype = type <2 x i16> +define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { +entry: + ; SSE2: shift2i16 + ; SSE2: cost of 20 {{.*}} lshr + ; SSE2-CODEGEN: shift2i16 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype %a , %b + ret %shifttype %0 +} + +%shifttype4i16 = type <4 x i16> +define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { +entry: + ; SSE2: shift4i16 + ; SSE2: cost of 40 {{.*}} lshr + ; SSE2-CODEGEN: shift4i16 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype4i16 %a , %b + ret %shifttype4i16 %0 +} + +%shifttype8i16 = type <8 x i16> +define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) { +entry: + ; SSE2: shift8i16 + ; SSE2: cost of 80 {{.*}} lshr + ; SSE2-CODEGEN: shift8i16 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype8i16 %a , %b + ret %shifttype8i16 %0 +} + +%shifttype16i16 = type <16 x i16> +define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) { +entry: + ; SSE2: shift16i16 + ; SSE2: cost of 160 {{.*}} lshr + ; SSE2-CODEGEN: shift16i16 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype16i16 %a , %b + ret %shifttype16i16 %0 +} + +%shifttype32i16 = type <32 x i16> +define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) { +entry: + ; SSE2: shift32i16 + ; SSE2: cost of 320 {{.*}} lshr + ; SSE2-CODEGEN: shift32i16 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype32i16 %a , %b + ret %shifttype32i16 %0 +} + +%shifttype2i32 = type <2 x i32> +define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { +entry: + ; SSE2: shift2i32 + ; SSE2: cost of 20 {{.*}} lshr + ; SSE2-CODEGEN: shift2i32 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype2i32 %a , %b + ret %shifttype2i32 %0 +} + +%shifttype4i32 = type <4 x i32> +define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { +entry: + ; SSE2: shift4i32 + ; SSE2: cost of 40 {{.*}} lshr + ; SSE2-CODEGEN: shift4i32 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype4i32 %a , %b + ret %shifttype4i32 %0 +} + +%shifttype8i32 = type <8 x i32> +define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { +entry: + ; SSE2: shift8i32 + ; SSE2: cost of 80 {{.*}} lshr + ; SSE2-CODEGEN: shift8i32 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype8i32 %a , %b + ret %shifttype8i32 %0 +} + +%shifttype16i32 = type <16 x i32> +define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { +entry: + ; SSE2: shift16i32 + ; SSE2: cost of 160 {{.*}} lshr + ; SSE2-CODEGEN: shift16i32 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype16i32 %a , %b + ret %shifttype16i32 %0 +} + +%shifttype32i32 = type <32 x i32> +define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { +entry: + ; SSE2: shift32i32 + ; SSE2: cost of 256 {{.*}} lshr + ; SSE2-CODEGEN: shift32i32 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype32i32 %a , %b + ret %shifttype32i32 %0 +} + +%shifttype2i64 = type <2 x i64> +define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { +entry: + ; SSE2: shift2i64 + ; SSE2: cost of 20 {{.*}} lshr + ; SSE2-CODEGEN: shift2i64 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype2i64 %a , %b + ret %shifttype2i64 %0 +} + +%shifttype4i64 = type <4 x i64> +define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { +entry: + ; SSE2: shift4i64 + ; SSE2: cost of 40 {{.*}} lshr + ; SSE2-CODEGEN: shift4i64 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype4i64 %a , %b + ret %shifttype4i64 %0 +} + +%shifttype8i64 = type <8 x i64> +define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { +entry: + ; SSE2: shift8i64 + ; SSE2: cost of 80 {{.*}} lshr + ; SSE2-CODEGEN: shift8i64 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype8i64 %a , %b + ret %shifttype8i64 %0 +} + +%shifttype16i64 = type <16 x i64> +define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { +entry: + ; SSE2: shift16i64 + ; SSE2: cost of 160 {{.*}} lshr + ; SSE2-CODEGEN: shift16i64 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype16i64 %a , %b + ret %shifttype16i64 %0 +} + +%shifttype32i64 = type <32 x i64> +define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { +entry: + ; SSE2: shift32i64 + ; SSE2: cost of 256 {{.*}} lshr + ; SSE2-CODEGEN: shift32i64 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype32i64 %a , %b + ret %shifttype32i64 %0 +} + +%shifttype2i8 = type <2 x i8> +define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { +entry: + ; SSE2: shift2i8 + ; SSE2: cost of 20 {{.*}} lshr + ; SSE2-CODEGEN: shift2i8 + ; SSE2-CODEGEN: shrq %cl + + %0 = lshr %shifttype2i8 %a , %b + ret %shifttype2i8 %0 +} + +%shifttype4i8 = type <4 x i8> +define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { +entry: + ; SSE2: shift4i8 + ; SSE2: cost of 40 {{.*}} lshr + ; SSE2-CODEGEN: shift4i8 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype4i8 %a , %b + ret %shifttype4i8 %0 +} + +%shifttype8i8 = type <8 x i8> +define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { +entry: + ; SSE2: shift8i8 + ; SSE2: cost of 80 {{.*}} lshr + ; SSE2-CODEGEN: shift8i8 + ; SSE2-CODEGEN: shrl %cl + + %0 = lshr %shifttype8i8 %a , %b + ret %shifttype8i8 %0 +} + +%shifttype16i8 = type <16 x i8> +define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) { +entry: + ; SSE2: shift16i8 + ; SSE2: cost of 160 {{.*}} lshr + ; SSE2-CODEGEN: shift16i8 + ; SSE2-CODEGEN: shrb %cl + + %0 = lshr %shifttype16i8 %a , %b + ret %shifttype16i8 %0 +} + +%shifttype32i8 = type <32 x i8> +define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) { +entry: + ; SSE2: shift32i8 + ; SSE2: cost of 320 {{.*}} lshr + ; SSE2-CODEGEN: shift32i8 + ; SSE2-CODEGEN: shrb %cl + + %0 = lshr %shifttype32i8 %a , %b + ret %shifttype32i8 %0 +} + diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll new file mode 100644 index 0000000..897d983 --- /dev/null +++ b/test/Analysis/CostModel/X86/testshiftshl.ll @@ -0,0 +1,242 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s + +%shifttype = type <2 x i16> +define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { +entry: + ; SSE2: shift2i16 + ; SSE2: cost of 20 {{.*}} shl + ; SSE2-CODEGEN: shift2i16 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype %a , %b + ret %shifttype %0 +} + +%shifttype4i16 = type <4 x i16> +define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) { +entry: + ; SSE2: shift4i16 + ; SSE2: cost of 10 {{.*}} shl + ; SSE2-CODEGEN: shift4i16 + ; SSE2-CODEGEN: pmuludq + + %0 = shl %shifttype4i16 %a , %b + ret %shifttype4i16 %0 +} + +%shifttype8i16 = type <8 x i16> +define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) { +entry: + ; SSE2: shift8i16 + ; SSE2: cost of 80 {{.*}} shl + ; SSE2-CODEGEN: shift8i16 + ; SSE2-CODEGEN: shll %cl + + %0 = shl %shifttype8i16 %a , %b + ret %shifttype8i16 %0 +} + +%shifttype16i16 = type <16 x i16> +define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) { +entry: + ; SSE2: shift16i16 + ; SSE2: cost of 160 {{.*}} shl + ; SSE2-CODEGEN: shift16i16 + ; SSE2-CODEGEN: shll %cl + + %0 = shl %shifttype16i16 %a , %b + ret %shifttype16i16 %0 +} + +%shifttype32i16 = type <32 x i16> +define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) { +entry: + ; SSE2: shift32i16 + ; SSE2: cost of 320 {{.*}} shl + ; SSE2-CODEGEN: shift32i16 + ; SSE2-CODEGEN: shll %cl + + %0 = shl %shifttype32i16 %a , %b + ret %shifttype32i16 %0 +} + +%shifttype2i32 = type <2 x i32> +define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { +entry: + ; SSE2: shift2i32 + ; SSE2: cost of 20 {{.*}} shl + ; SSE2-CODEGEN: shift2i32 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype2i32 %a , %b + ret %shifttype2i32 %0 +} + +%shifttype4i32 = type <4 x i32> +define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) { +entry: + ; SSE2: shift4i32 + ; SSE2: cost of 10 {{.*}} shl + ; SSE2-CODEGEN: shift4i32 + ; SSE2-CODEGEN: pmuludq + + %0 = shl %shifttype4i32 %a , %b + ret %shifttype4i32 %0 +} + +%shifttype8i32 = type <8 x i32> +define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) { +entry: + ; SSE2: shift8i32 + ; SSE2: cost of 20 {{.*}} shl + ; SSE2-CODEGEN: shift8i32 + ; SSE2-CODEGEN: pmuludq + + %0 = shl %shifttype8i32 %a , %b + ret %shifttype8i32 %0 +} + +%shifttype16i32 = type <16 x i32> +define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) { +entry: + ; SSE2: shift16i32 + ; SSE2: cost of 40 {{.*}} shl + ; SSE2-CODEGEN: shift16i32 + ; SSE2-CODEGEN: pmuludq + + %0 = shl %shifttype16i32 %a , %b + ret %shifttype16i32 %0 +} + +%shifttype32i32 = type <32 x i32> +define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) { +entry: + ; SSE2: shift32i32 + ; SSE2: cost of 256 {{.*}} shl + ; SSE2-CODEGEN: shift32i32 + ; SSE2-CODEGEN: pmuludq + + %0 = shl %shifttype32i32 %a , %b + ret %shifttype32i32 %0 +} + +%shifttype2i64 = type <2 x i64> +define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { +entry: + ; SSE2: shift2i64 + ; SSE2: cost of 20 {{.*}} shl + ; SSE2-CODEGEN: shift2i64 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype2i64 %a , %b + ret %shifttype2i64 %0 +} + +%shifttype4i64 = type <4 x i64> +define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { +entry: + ; SSE2: shift4i64 + ; SSE2: cost of 40 {{.*}} shl + ; SSE2-CODEGEN: shift4i64 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype4i64 %a , %b + ret %shifttype4i64 %0 +} + +%shifttype8i64 = type <8 x i64> +define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { +entry: + ; SSE2: shift8i64 + ; SSE2: cost of 80 {{.*}} shl + ; SSE2-CODEGEN: shift8i64 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype8i64 %a , %b + ret %shifttype8i64 %0 +} + +%shifttype16i64 = type <16 x i64> +define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { +entry: + ; SSE2: shift16i64 + ; SSE2: cost of 160 {{.*}} shl + ; SSE2-CODEGEN: shift16i64 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype16i64 %a , %b + ret %shifttype16i64 %0 +} + +%shifttype32i64 = type <32 x i64> +define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { +entry: + ; SSE2: shift32i64 + ; SSE2: cost of 256 {{.*}} shl + ; SSE2-CODEGEN: shift32i64 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype32i64 %a , %b + ret %shifttype32i64 %0 +} + +%shifttype2i8 = type <2 x i8> +define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { +entry: + ; SSE2: shift2i8 + ; SSE2: cost of 20 {{.*}} shl + ; SSE2-CODEGEN: shift2i8 + ; SSE2-CODEGEN: shlq %cl + + %0 = shl %shifttype2i8 %a , %b + ret %shifttype2i8 %0 +} + +%shifttype4i8 = type <4 x i8> +define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) { +entry: + ; SSE2: shift4i8 + ; SSE2: cost of 10 {{.*}} shl + ; SSE2-CODEGEN: shift4i8 + ; SSE2-CODEGEN: pmuludq + + %0 = shl %shifttype4i8 %a , %b + ret %shifttype4i8 %0 +} + +%shifttype8i8 = type <8 x i8> +define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) { +entry: + ; SSE2: shift8i8 + ; SSE2: cost of 80 {{.*}} shl + ; SSE2-CODEGEN: shift8i8 + ; SSE2-CODEGEN: shll + + %0 = shl %shifttype8i8 %a , %b + ret %shifttype8i8 %0 +} + +%shifttype16i8 = type <16 x i8> +define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) { +entry: + ; SSE2: shift16i8 + ; SSE2: cost of 30 {{.*}} shl + ; SSE2-CODEGEN: shift16i8 + ; SSE2-CODEGEN: cmpeqb + + %0 = shl %shifttype16i8 %a , %b + ret %shifttype16i8 %0 +} + +%shifttype32i8 = type <32 x i8> +define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) { +entry: + ; SSE2: shift32i8 + ; SSE2: cost of 60 {{.*}} shl + ; SSE2-CODEGEN: shift32i8 + ; SSE2-CODEGEN: cmpeqb + + %0 = shl %shifttype32i8 %a , %b + ret %shifttype32i8 %0 +} |