X86 SSE: update rsqrtss and rcpss to use two source operands and

the first source operand is tied to the destination operand. This is to accurately model the corresponding instructions where the upper bits are unmodified. rdar://12558838 PR14221 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167064 91177308-0d34-0410-b5e6-96231b3b80d8
author: Manman Ren <mren@apple.com> 2012-10-30 23:53:59 +0000
committer: Manman Ren <mren@apple.com> 2012-10-30 23:53:59 +0000
commit: dfd0b9b460686ca9491e49dd3647beec5e748a1a (patch)
tree: 1713bc57a1af86db5c6db4072b03a90e02bfd179 /lib/Target
parent: bbc6e671b1a902c96aff152cc524a5ee6e253907 (diff)
download: external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.zip
external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.tar.gz
external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.tar.bz2
1 files changed, 39 insertions, 4 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 3fcc0dc..dff2d4e 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3293,17 +3293,52 @@ defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss,
              sse2_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
              sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
 
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                               Intrinsic F32Int, OpndItins itins> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode FR32:$src))]>;
+  // For scalar unary operations, fold a load into the operation
+  // only in OptForSize mode. It eliminates an instruction, but it also
+  // eliminates a whole-register clobber (the load), so it introduces a
+  // partial register update condition.
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+            Requires<[UseSSE1, OptForSize]>;
+  let Constraints = "$src1 = $dst" in {
+    def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+                      (ins VR128:$src1, VR128:$src2),
+                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                      [], itins.rr>;
+    def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, ssmem:$src2),
+                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                      [], itins.rm>;
+  }
+}
+
 // Reciprocal approximations. Note that these typically require refinement
 // in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
-                            SSE_SQRTS>,
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
+                             SSE_SQRTS>,
              sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
              sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
                             SSE_SQRTS>;
-defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
-                            SSE_RCPS>,
+let Predicates = [UseSSE1] in {
+  def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+            (RSQRTSSr_Int VR128:$src, VR128:$src)>;
+}
+
+defm RCP   : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
+                             SSE_RCPS>,
              sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
              sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
+let Predicates = [UseSSE1] in {
+  def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+            (RCPSSr_Int VR128:$src, VR128:$src)>;
+}
 
 // There is no f64 version of the reciprocal approximation instructions.
author	Manman Ren <mren@apple.com>	2012-10-30 23:53:59 +0000
committer	Manman Ren <mren@apple.com>	2012-10-30 23:53:59 +0000
commit	dfd0b9b460686ca9491e49dd3647beec5e748a1a (patch)
tree	1713bc57a1af86db5c6db4072b03a90e02bfd179 /lib/Target
parent	bbc6e671b1a902c96aff152cc524a5ee6e253907 (diff)
download	external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.zip external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.tar.gz external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.tar.bz2