diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-09-04 22:59:58 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-09-04 22:59:58 +0000 |
commit | 4a03775777785ef758cff8c0699a6bf571a1f2b9 (patch) | |
tree | a3e8d527e079840bf5627f2e68f73f901728eb1e /test/CodeGen/X86 | |
parent | deafefabae915cf20884fd829d2542f9626786e8 (diff) | |
download | external_llvm-4a03775777785ef758cff8c0699a6bf571a1f2b9.zip external_llvm-4a03775777785ef758cff8c0699a6bf571a1f2b9.tar.gz external_llvm-4a03775777785ef758cff8c0699a6bf571a1f2b9.tar.bz2 |
For whatever the reason, x86 CallingConv::Fast (i.e. fastcc) was not passing scalar arguments in registers. This patch defines a new fastcc CC which is slightly different from the FastCall CC. In addition to passing integer arguments in ECX and EDX, it also specify doubles are passed in 8-byte slots which are 8-byte aligned (instead of 4-byte aligned). This avoids a potential performance hazard where doubles span cacheline boundaries.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55807 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/2007-08-13-SpillerReuse.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/2008-02-22-ReMatBug.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/coalescer-commute3.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/combine-lds.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/fastcc.ll | 19 |
5 files changed, 34 insertions, 16 deletions
diff --git a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll index 8cc235b..edcb823 100644 --- a/test/CodeGen/X86/2007-08-13-SpillerReuse.ll +++ b/test/CodeGen/X86/2007-08-13-SpillerReuse.ll @@ -6,9 +6,9 @@ @rtx_format = external global [116 x i8*] ; <[116 x i8*]*> [#uses=1] @rtx_length = external global [117 x i32] ; <[117 x i32]*> [#uses=1] -declare fastcc %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32) +declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32) -define fastcc %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) { +define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) { entry: %tmp2 = icmp eq %struct.rtx_def* %x, null ; <i1> [#uses=1] br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next @@ -30,7 +30,7 @@ cond_true13: ; preds = %cond_next br i1 %tmp22, label %cond_true25, label %cond_next32 cond_true25: ; preds = %cond_true13 - %tmp29 = tail call fastcc %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1] + %tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) ; <%struct.rtx_def*> [#uses=1] ret %struct.rtx_def* %tmp29 cond_next32: ; preds = %cond_true13, %cond_next @@ -56,7 +56,7 @@ cond_true47: ; preds = %bb %tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1] %tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] %tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1] - %tmp58 = tail call fastcc %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] + %tmp58 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] %tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1] %tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1] store i32 %tmp58.c, i32* %tmp62 @@ -79,7 +79,7 @@ bb73: ; preds = %bb73, %bb105.preheader %tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1] %tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1] %tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1] - %tmp98 = tail call fastcc %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] + %tmp98 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) ; <%struct.rtx_def*> [#uses=1] %tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1] %tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1] store i32 %tmp98.c, i32* %tmp101 diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll index fd97b61..f78d526 100644 --- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll +++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll @@ -4,7 +4,7 @@ %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } -define fastcc %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind { +define %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind { entry: br i1 true, label %bb43.i, label %bb.i @@ -39,12 +39,12 @@ bb34: ; preds = %CheckOutside.exit20.i %tmp15.reg2mem.0 = sdiv i32 %size, 2 ; <i32> [#uses=7] %tmp85 = sub i32 %center_y, %tmp15.reg2mem.0 ; <i32> [#uses=2] %tmp88 = sub i32 %center_x, %tmp15.reg2mem.0 ; <i32> [#uses=2] - %tmp92 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] + %tmp92 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] %tmp99 = add i32 0, %hi_proc ; <i32> [#uses=1] %tmp100 = sdiv i32 %tmp99, 2 ; <i32> [#uses=1] - %tmp110 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] + %tmp110 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] %tmp122 = add i32 %tmp15.reg2mem.0, %center_y ; <i32> [#uses=2] - %tmp129 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] - %tmp147 = tail call fastcc %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] + %tmp129 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] + %tmp147 = tail call %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind ; <%struct.quad_struct*> [#uses=0] unreachable } diff --git a/test/CodeGen/X86/coalescer-commute3.ll b/test/CodeGen/X86/coalescer-commute3.ll index ea75637..7d4a80a 100644 --- a/test/CodeGen/X86/coalescer-commute3.ll +++ b/test/CodeGen/X86/coalescer-commute3.ll @@ -2,7 +2,7 @@ %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } -define fastcc i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind { +define i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind { entry: switch i32 %size, label %UnifiedReturnBlock [ i32 2, label %bb @@ -10,8 +10,8 @@ entry: ] bb: ; preds = %entry - %tmp31 = tail call fastcc i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1] - %tmp40 = tail call fastcc i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1] + %tmp31 = tail call i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1] + %tmp40 = tail call i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind ; <i32> [#uses=1] %tmp33 = add i32 0, %tmp31 ; <i32> [#uses=1] %tmp42 = add i32 %tmp33, %tmp40 ; <i32> [#uses=1] ret i32 %tmp42 diff --git a/test/CodeGen/X86/combine-lds.ll b/test/CodeGen/X86/combine-lds.ll index 5e0ad99..a78a042 100644 --- a/test/CodeGen/X86/combine-lds.ll +++ b/test/CodeGen/X86/combine-lds.ll @@ -1,7 +1,6 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1 +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep fldl | count 1 -define fastcc double @doload64(i64 %x) nounwind { +define double @doload64(i64 %x) nounwind { %tmp717 = bitcast i64 %x to double ret double %tmp717 } diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll new file mode 100644 index 0000000..13068ba --- /dev/null +++ b/test/CodeGen/X86/fastcc.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep mov | grep ecx | grep 0 +; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin | grep mov | grep xmm0 | grep 16 + +@d = external global double ; <double*> [#uses=1] +@c = external global double ; <double*> [#uses=1] +@b = external global double ; <double*> [#uses=1] +@a = external global double ; <double*> [#uses=1] + +define i32 @foo() nounwind { +entry: + %0 = load double* @d, align 8 ; <double> [#uses=1] + %1 = load double* @c, align 8 ; <double> [#uses=1] + %2 = load double* @b, align 8 ; <double> [#uses=1] + %3 = load double* @a, align 8 ; <double> [#uses=1] + tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind + ret i32 0 +} + +declare fastcc void @bar(i32, i32, i32, double, double, double, double, double) |