diff options
author | Craig Topper <craig.topper@gmail.com> | 2013-01-02 08:00:39 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2013-01-02 08:00:39 +0000 |
commit | 3af932322859285988a4c53551540fc0658b2116 (patch) | |
tree | 3ff182b56549d6633f5e3176a484076873133719 /lib | |
parent | 3cca7df7c76a668e3ffd0ca20e4324df897ede87 (diff) | |
download | external_llvm-3af932322859285988a4c53551540fc0658b2116.zip external_llvm-3af932322859285988a4c53551540fc0658b2116.tar.gz external_llvm-3af932322859285988a4c53551540fc0658b2116.tar.bz2 |
Merge SSE and AVX instruction definitions for scalar forms of SQRT, RSQRT, and RCP.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171356 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 179 |
1 files changed, 97 insertions, 82 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 661023e..ec7d20b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2936,6 +2936,26 @@ def SSE_RCPS : OpndItins< /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), + (ins FR32:$src1, FR32:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), + (ins FR32:$src1,f32mem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + } +} + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode FR32:$src))]>; @@ -2955,19 +2975,50 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; } -/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. -multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { - def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; +/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. +multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), + (ins FR32:$src1, FR32:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; let mayLoad = 1 in { - def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), + (ins FR32:$src1,f32mem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + } +} + + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode FR32:$src))]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, + Requires<[UseSSE1, OptForSize]>; + let Constraints = "$src1 = $dst" in { + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rr>; + let mayLoad = 1, hasSideEffects = 0 in + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rm>; } } @@ -3046,6 +3097,26 @@ let Predicates = [HasAVX] in { /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), + (ins FR64:$src1, FR64:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1,f64mem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, sdmem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + } +} + def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; @@ -3062,24 +3133,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; } -/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. -let hasSideEffects = 0 in -multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { - def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in { - def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - } -} - -/// sse2_fp_unop_p_new - SSE2 unops in vector forms. +/// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { @@ -3113,26 +3167,25 @@ let Predicates = [HasAVX] in { [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } -defm SQRT : sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, +// Square root. +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, + SSE_SQRTS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, + SSE_SQRTS>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; -defm RSQRT : sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, + +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; -defm RCP : sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, int_x86_avx_rcp_ps_256, SSE_RCPP>; -let Predicates = [HasAVX] in { - // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, - sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; - - // Reciprocal approximations. Note that these typically require refinement - // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; - defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; -} - def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3186,49 +3239,11 @@ let Predicates = [HasAVX] in { (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } -// Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, - SSE_SQRTS>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>; - -/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. -multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; - // For scalar unary operations, fold a load into the operation - // only in OptForSize mode. It eliminates an instruction, but it also - // eliminates a whole-register clobber (the load), so it introduces a - // partial register update condition. - def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; - let Constraints = "$src1 = $dst" in { - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; - let mayLoad = 1, hasSideEffects = 0 in - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; - } -} - // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; -} - -defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>; -let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; } |