diff options
author | Craig Topper <craig.topper@gmail.com> | 2012-01-10 06:30:56 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2012-01-10 06:30:56 +0000 |
commit | c6d59954d8b4bc63aa8e6333d902a3fffc774189 (patch) | |
tree | f6ab9250a5ed74f11ff92bcdea53a53aa0dd37cc /lib/Target | |
parent | 97b5beb7fe7bb776654b04ae6c18af6ea15c74f7 (diff) | |
download | external_llvm-c6d59954d8b4bc63aa8e6333d902a3fffc774189.zip external_llvm-c6d59954d8b4bc63aa8e6333d902a3fffc774189.tar.gz external_llvm-c6d59954d8b4bc63aa8e6333d902a3fffc774189.tar.bz2 |
Instruction selection priority fixes to remove the XMM/XMMInt/orAVX predicates. Another commit will remove orAVX functions from X86SubTarget.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147841 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/X86InstrFPStack.td | 29 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 25 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 40 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 89 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 12 |
6 files changed, 89 insertions, 116 deletions
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index cecd592..adfd98b 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -437,33 +437,26 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">; } // FISTTP requires SSE3 even though it's a FPStack op. +let Predicates = [HasSSE3] in { def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, - [(X86fp_to_i16mem RFP32:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i16mem RFP32:$src, addr:$op)]>; def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, - [(X86fp_to_i32mem RFP32:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i32mem RFP32:$src, addr:$op)]>; def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, - [(X86fp_to_i64mem RFP32:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i64mem RFP32:$src, addr:$op)]>; def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, - [(X86fp_to_i16mem RFP64:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i16mem RFP64:$src, addr:$op)]>; def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, - [(X86fp_to_i32mem RFP64:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i32mem RFP64:$src, addr:$op)]>; def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, - [(X86fp_to_i64mem RFP64:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i64mem RFP64:$src, addr:$op)]>; def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, - [(X86fp_to_i16mem RFP80:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i16mem RFP80:$src, addr:$op)]>; def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, - [(X86fp_to_i32mem RFP80:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i32mem RFP80:$src, addr:$op)]>; def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, - [(X86fp_to_i64mem RFP80:$src, addr:$op)]>, - Requires<[HasSSE3orAVX]>; + [(X86fp_to_i64mem RFP80:$src, addr:$op)]>; +} // Predicates = [HasSSE3] let mayStore = 1 in { def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 76cae18..957a923 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -436,7 +436,7 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, // SS42FI - SSE 4.2 instructions with T8XD prefix. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42orAVX]>; + : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>; // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -569,11 +569,6 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, // MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. // MMXID - MMX instructions with XD prefix. // MMXIS - MMX instructions with XS prefix. -// MMXPI - SSE 1 & 2 packed instructions for MMX with no AVX equivalents -// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. No AVX equiv. -// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. No AVX equiv. -// MMXSS38I - SSSE3 instructions with T8 prefix for MMX registers. No AVX equiv. -// MMXSS3AI - SSSE3 instructions with TA prefix for MMX registers. No AVX equiv. class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; @@ -595,21 +590,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>; - -class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, - Domain d> - : I<o, F, outs, ins, asm, pattern, d> { - let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasXMMInt], [HasXMM]); -} -class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasXMMInt]>; -class MMXSSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasXMMInt]>; -class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3orAVX]>; -class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3orAVX]>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 62eadcf..aeb1275 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -470,14 +470,8 @@ def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; - def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; -def HasXMM : Predicate<"Subtarget->hasXMM()">; -def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; -def HasSSE3orAVX : Predicate<"Subtarget->hasSSE3orAVX()">; -def HasSSSE3orAVX : Predicate<"Subtarget->hasSSSE3orAVX()">; -def HasSSE42orAVX : Predicate<"Subtarget->hasSSE42orAVX()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; @@ -492,8 +486,8 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; -def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; -def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; +def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; +def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit">; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 5bbf86a..3025a4d 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -60,14 +60,14 @@ let Constraints = "$src1 = $dst" in { /// Unary MMX instructions requiring SSSE3. multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64> { - def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))]>; - - def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, - (IntId64 (bitconvert (memopmmx addr:$src))))]>; + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR64:$dst, (IntId64 VR64:$src))]>; + + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR64:$dst, + (IntId64 (bitconvert (memopmmx addr:$src))))]>; } /// Binary MMX instructions requiring SSSE3. @@ -75,11 +75,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in { multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64> { let isCommutable = 0 in - def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; - def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, @@ -90,11 +90,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, /// PALIGN MMX instructions (require SSSE3). multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { - def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; - def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, @@ -104,18 +104,18 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { - def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (Int SrcRC:$src))], d>; - def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { - def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), + def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; - def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), + def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), asm, [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; } @@ -175,24 +175,24 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (x86mmx VR64:$src), addr:$dst)]>; -def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), +def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))))]>; -def MMX_MOVQ2DQrr : MMXSSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), +def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector (i64 (bitconvert (x86mmx VR64:$src))))))]>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: MMXSSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), +def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), +def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 266d1c5..95185f1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -171,7 +171,7 @@ def : Pat<(v4f64 (scalar_to_vector FR64:$src)), // Bitcasts between 128-bit vector types. Return the original type since // no instruction is needed for the conversion -let Predicates = [HasXMMInt] in { +let Predicates = [HasSSE2] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; @@ -244,9 +244,9 @@ let Predicates = [HasAVX] in { let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1 in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>; + [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>; + [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>; } //===----------------------------------------------------------------------===// @@ -1407,9 +1407,11 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>; +} // neverHasSideEffects = 1 } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1423,12 +1425,14 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; +} // neverHasSideEffects = 1 } defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, @@ -1459,7 +1463,7 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; -let Predicates = [HasAVX] in { +let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1623,26 +1627,26 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } -let Predicates = [HasSSE1] in { +let Predicates = [HasAVX] in { def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (CVTSS2SIrm addr:$src)>; + (VCVTSS2SIrm addr:$src)>; def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (CVTSS2SI64rm addr:$src)>; + (VCVTSS2SI64rm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE1] in { def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (VCVTSS2SIrm addr:$src)>; + (CVTSS2SIrm addr:$src)>; def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (VCVTSS2SI64rm addr:$src)>; + (CVTSS2SI64rm addr:$src)>; } /// SSE 2 Only @@ -1844,6 +1848,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix +let neverHasSideEffects = 1 in { def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; let mayLoad = 1 in @@ -1854,14 +1859,7 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), let mayLoad = 1 in def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))]>; -def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; +} // neverHasSideEffects = 1 def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvttps2dq\t{$src, $dst|$dst, $src}", @@ -1874,12 +1872,14 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, VEX, Requires<[HasAVX]>; -let Predicates = [HasSSE2] in { - def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>; - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (CVTTPS2DQrr VR128:$src)>; -} +def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))]>; +def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1892,6 +1892,13 @@ let Predicates = [HasAVX] in { (VCVTTPS2DQYrr VR256:$src)>; } +let Predicates = [HasSSE2] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (Int_CVTDQ2PSrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (CVTTPS2DQrr VR128:$src)>; +} + def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3145,6 +3152,7 @@ let Predicates = [HasAVX] in { sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX; } +let AddedComplexity = 1 in { def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3167,8 +3175,9 @@ def : Pat<(f32 (X86frcp FR32:$src)), def : Pat<(f32 (X86frcp (load addr:$src))), (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX, OptForSize]>; +} -let Predicates = [HasAVX] in { +let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), (VSQRTSSr (f32 (IMPLICIT_DEF)), @@ -3292,11 +3301,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, - TB, Requires<[HasXMMInt]>; + TB, Requires<[HasSSE2]>; def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movnti{q}\t{$src, $dst|$dst, $src}", [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, - TB, Requires<[HasXMMInt]>; + TB, Requires<[HasSSE2]>; } //===----------------------------------------------------------------------===// @@ -3304,7 +3313,7 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasXMM] in { +let Predicates = [HasSSE1] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), @@ -3318,7 +3327,7 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasXMMInt]>; + TB, Requires<[HasSSE2]>; // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. @@ -3326,11 +3335,11 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), - "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasXMM]>; + "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasXMMInt]>; + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasXMMInt]>; + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -5475,18 +5484,18 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, - Requires<[HasSSE3orAVX]>; + Requires<[HasSSE3]>; def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>, - Requires<[HasSSE3orAVX]>; + Requires<[HasSSE3]>; } let Uses = [EAX, ECX, EDX] in def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, - Requires<[HasSSE3orAVX]>; + Requires<[HasSSE3]>; let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, - Requires<[HasSSE3orAVX]>; + Requires<[HasSSE3]>; def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 10ef868..5a32240 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -173,12 +173,12 @@ public: bool hasCMov() const { return HasCMov; } bool hasMMX() const { return X86SSELevel >= MMX; } - bool hasSSE1() const { return X86SSELevel >= SSE1 && !hasAVX(); } - bool hasSSE2() const { return X86SSELevel >= SSE2 && !hasAVX(); } - bool hasSSE3() const { return X86SSELevel >= SSE3 && !hasAVX(); } - bool hasSSSE3() const { return X86SSELevel >= SSSE3 && !hasAVX(); } - bool hasSSE41() const { return X86SSELevel >= SSE41 && !hasAVX(); } - bool hasSSE42() const { return X86SSELevel >= SSE42 && !hasAVX(); } + bool hasSSE1() const { return X86SSELevel >= SSE1; } + bool hasSSE2() const { return X86SSELevel >= SSE2; } + bool hasSSE3() const { return X86SSELevel >= SSE3; } + bool hasSSSE3() const { return X86SSELevel >= SSSE3; } + bool hasSSE41() const { return X86SSELevel >= SSE41; } + bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } |