Use movups to lower memcpy and memset even if it's not fast (like corei7).

The theory is it's still faster than a pair of movq / a quad of movl. This will probably hurt older chips like P4 but should run faster on current and future Intel processors. rdar://8817010 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122955 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2011-01-06 07:58:36 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2011-01-06 07:58:36 +0000
commit: 461f1fc359dff438dad25e809499845b10a3d032 (patch)
tree: 143a2a682ffdd84409d6bd1673e22630d42d565e /test/CodeGen/X86/unaligned-load.ll
parent: cce240d26bbf1c2bec9cfff4838d8d807b215586 (diff)
download: external_llvm-461f1fc359dff438dad25e809499845b10a3d032.zip
external_llvm-461f1fc359dff438dad25e809499845b10a3d032.tar.gz
external_llvm-461f1fc359dff438dad25e809499845b10a3d032.tar.bz2
1 files changed, 9 insertions, 16 deletions
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index 6a493c0..0408577 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0   | FileCheck -check-prefix=I386 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck %s
 
 @.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
 @.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
@@ -13,13 +11,8 @@ entry:
 bb:
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
   call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
-; I386: calll {{_?}}memcpy
-
-; CORE2: movabsq
-; CORE2: movabsq
-; CORE2: movabsq
-
-; COREI7: movups _.str3
+; CHECK: movabsq $2325069237881678925, %rax
+; CHECK: movups _.str3(%rip), %xmm0
   br label %bb
 
 return:
@@ -28,9 +21,9 @@ return:
 
 declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 
-; CORE2: .section
-; CORE2: .align  4
-; CORE2-NEXT: _.str1:
-; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
-; CORE2: .align 4
-; CORE2-NEXT: _.str3:
+; CHECK: .section
+; CHECK: .align  4
+; CHECK-NEXT: _.str1:
+; CHECK-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
+; CHECK: .align 4
+; CHECK-NEXT: _.str3:
author	Evan Cheng <evan.cheng@apple.com>	2011-01-06 07:58:36 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2011-01-06 07:58:36 +0000
commit	461f1fc359dff438dad25e809499845b10a3d032 (patch)
tree	143a2a682ffdd84409d6bd1673e22630d42d565e /test/CodeGen/X86/unaligned-load.ll
parent	cce240d26bbf1c2bec9cfff4838d8d807b215586 (diff)
download	external_llvm-461f1fc359dff438dad25e809499845b10a3d032.zip external_llvm-461f1fc359dff438dad25e809499845b10a3d032.tar.gz external_llvm-461f1fc359dff438dad25e809499845b10a3d032.tar.bz2