aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorManman Ren <mren@apple.com>2012-10-30 23:53:59 +0000
committerManman Ren <mren@apple.com>2012-10-30 23:53:59 +0000
commitdfd0b9b460686ca9491e49dd3647beec5e748a1a (patch)
tree1713bc57a1af86db5c6db4072b03a90e02bfd179 /lib/Target
parentbbc6e671b1a902c96aff152cc524a5ee6e253907 (diff)
downloadexternal_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.zip
external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.tar.gz
external_llvm-dfd0b9b460686ca9491e49dd3647beec5e748a1a.tar.bz2
X86 SSE: update rsqrtss and rcpss to use two source operands and
the first source operand is tied to the destination operand. This is to accurately model the corresponding instructions where the upper bits are unmodified. rdar://12558838 PR14221 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167064 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/X86InstrSSE.td43
1 files changed, 39 insertions, 4 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 3fcc0dc..dff2d4e 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3293,17 +3293,52 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>,
sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ Intrinsic F32Int, OpndItins itins> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+ Requires<[UseSSE1, OptForSize]>;
+ let Constraints = "$src1 = $dst" in {
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rr>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [], itins.rm>;
+ }
+}
+
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
- SSE_SQRTS>,
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
+ SSE_SQRTS>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
SSE_SQRTS>;
-defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
- SSE_RCPS>,
+let Predicates = [UseSSE1] in {
+ def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
+ (RSQRTSSr_Int VR128:$src, VR128:$src)>;
+}
+
+defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
+ SSE_RCPS>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
+let Predicates = [UseSSE1] in {
+ def : Pat<(int_x86_sse_rcp_ss VR128:$src),
+ (RCPSSr_Int VR128:$src, VR128:$src)>;
+}
// There is no f64 version of the reciprocal approximation instructions.