Add CoalescerPair helper class.

Given a copy instruction, CoalescerPair can determine which registers to coalesce in order to eliminate the copy. It deals with all the subreg fun to determine a tuple (DstReg, SrcReg, SubIdx) such that: - SrcReg is a virtual register that will disappear after coalescing. - DstReg is a virtual or physical register whose live range will be extended. - SubIdx is 0 when DstReg is a physical register. - SrcReg can be joined with DstReg:SubIdx. CoalescerPair::isCoalescable() determines if another copy instruction is compatible with the same tuple. This fixes some NEON miscompilations where shuffles are getting coalesced as if they were copies. The CoalescerPair class will replace a lot of the spaghetti logic in JoinCopy later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105997 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jakob Stoklund Olesen <stoklund@2pi.dk> 2010-06-15 16:04:21 +0000
committer: Jakob Stoklund Olesen <stoklund@2pi.dk> 2010-06-15 16:04:21 +0000
commit: 40d07bbebbe73914af28be1bdab169ce8333adca (patch)
tree: c1e7ca4b5aa99d285a603d8394c312a4762622fd /test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
parent: f4a7bf4ec3faf80a9c890408d574a3d2e7ed1e1e (diff)
download: external_llvm-40d07bbebbe73914af28be1bdab169ce8333adca.zip
external_llvm-40d07bbebbe73914af28be1bdab169ce8333adca.tar.gz
external_llvm-40d07bbebbe73914af28be1bdab169ce8333adca.tar.bz2
1 files changed, 41 insertions, 0 deletions
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
new file mode 100644
index 0000000..62c5790
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This is a case where the coalescer was too eager. These two copies were
+; considered equivalent and coalescable:
+;
+; 140 %reg1038:dsub_0<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
+; 148 %reg1038:dsub_1<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
+;
+; Only one can be coalesced.
+
+@.str = private constant [7 x i8] c"%g %g\0A\00", align 4 ; <[7 x i8]*> [#uses=1]
+
+define arm_apcscc i32 @main(i32 %argc, i8** nocapture %Argv) nounwind {
+entry:
+  %0 = icmp eq i32 %argc, 2123                    ; <i1> [#uses=1]
+  %U.0 = select i1 %0, double 3.282190e+01, double 8.731834e+02 ; <double> [#uses=2]
+  %1 = icmp eq i32 %argc, 5123                    ; <i1> [#uses=1]
+  %V.0.ph = select i1 %1, double 7.779980e+01, double 0x409CCB9C779A6B51 ; <double> [#uses=1]
+  %2 = insertelement <2 x double> undef, double %U.0, i32 0 ; <<2 x double>> [#uses=2]
+  %3 = insertelement <2 x double> %2, double %U.0, i32 1 ; <<2 x double>> [#uses=2]
+  %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
+; Constant pool load followed by add.
+; Then clobber the loaded register, not the sum.
+; CHECK: vldr.64 [[LDR:d.]]
+; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]]
+; CHECK: vmov.f64 [[LDR]]
+  %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
+  %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
+  %tmp7 = extractelement <2 x double> %5, i32 0   ; <double> [#uses=1]
+  %tmp5 = extractelement <2 x double> %5, i32 1   ; <double> [#uses=1]
+; CHECK: printf
+  %7 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
+  %tmp3 = extractelement <2 x double> %6, i32 0   ; <double> [#uses=1]
+  %tmp1 = extractelement <2 x double> %6, i32 1   ; <double> [#uses=1]
+  %8 = tail call arm_apcscc  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind
author	Jakob Stoklund Olesen <stoklund@2pi.dk>	2010-06-15 16:04:21 +0000
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>	2010-06-15 16:04:21 +0000
commit	40d07bbebbe73914af28be1bdab169ce8333adca (patch)
tree	c1e7ca4b5aa99d285a603d8394c312a4762622fd /test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
parent	f4a7bf4ec3faf80a9c890408d574a3d2e7ed1e1e (diff)
download	external_llvm-40d07bbebbe73914af28be1bdab169ce8333adca.zip external_llvm-40d07bbebbe73914af28be1bdab169ce8333adca.tar.gz external_llvm-40d07bbebbe73914af28be1bdab169ce8333adca.tar.bz2