diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-12-29 20:43:40 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-12-29 20:43:40 +0000 |
commit | 1604ccfc01f1151537350c07bcbce0f9816b57c4 (patch) | |
tree | afb58bd516f0ea448018782abe0c0bf7ddcd60c1 /lib/Target | |
parent | 6f0b181bc70318f8d5d4b9bdead7fc748677fe2a (diff) | |
download | external_llvm-1604ccfc01f1151537350c07bcbce0f9816b57c4.zip external_llvm-1604ccfc01f1151537350c07bcbce0f9816b57c4.tar.gz external_llvm-1604ccfc01f1151537350c07bcbce0f9816b57c4.tar.bz2 |
Fix execution domains for PS/PD FMA3 instructions. Add SS/SD forms o FMA3 instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147353 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/X86InstrFMA.td | 72 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 2 |
2 files changed, 55 insertions, 19 deletions
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index e0ac33d..83429eb 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -15,7 +15,7 @@ // FMA3 - Intel 3 operand Fused Multiply-Add instructions //===----------------------------------------------------------------------===// -multiclass fma_rm<bits<8> opc, string OpcodeStr> { +multiclass fma3p_rm<bits<8> opc, string OpcodeStr> { def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -34,28 +34,64 @@ multiclass fma_rm<bits<8> opc, string OpcodeStr> { []>; } -multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpcodeStr, string PackTy> { - defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; - defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; - defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; +multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpcodeStr, string PackTy> { + defm r132 : fma3p_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; + defm r213 : fma3p_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; + defm r231 : fma3p_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; } // Fused Multiply-Add -defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">; -defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W; -defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">; -defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W; -defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">; -defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W; -defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">; -defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W; +let ExeDomain = SSEPackedSingle in { + defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">; + defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">; + defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">; + defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">; +} + +let ExeDomain = SSEPackedDouble in { + defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W; + defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W; + defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W; + defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W; +} // Fused Negative Multiply-Add -defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">; -defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W; -defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; -defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; +let ExeDomain = SSEPackedSingle in { + defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">; + defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; +} +let ExeDomain = SSEPackedDouble in { + defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W; + defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; +} + +multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> { + def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} + +multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpcodeStr> { + defm SSr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132ss"), f32mem>; + defm SSr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213ss"), f32mem>; + defm SSr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231ss"), f32mem>; + defm SDr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132sd"), f64mem>, VEX_W; + defm SDr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213sd"), f64mem>, VEX_W; + defm SDr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231sd"), f64mem>, VEX_W; +} + +defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd">; +defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub">; + +defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd">; +defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub">; //===----------------------------------------------------------------------===// // FMA4 - AMD 4 operand Fused Multiply-Add instructions diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index b3a7087..5fe7527 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -504,7 +504,7 @@ class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // FMA3 Instruction Templates class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + : I<o, F, outs, ins, asm, pattern>, T8, OpSize, VEX_4V, Requires<[HasFMA3]>; // FMA4 Instruction Templates |