diff options
53 files changed, 1121 insertions, 591 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index dd36955..d5db45b 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,8 +120,16 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", // X86 processors supported. //===----------------------------------------------------------------------===// +include "X86Schedule.td" + +def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", + "Intel Atom processors">; + class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; + : Processor<Name, GenericItineraries, Features>; + +class AtomProc<string Name, list<SubtargetFeature> Features> + : Processor<Name, AtomItineraries, Features>; def : Proc<"generic", []>; def : Proc<"i386", []>; @@ -146,8 +154,8 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE, - FeatureSlowBTMem]>; +def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, + FeatureMOVBE, FeatureSlowBTMem]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 68df786..658837c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -179,8 +179,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // For 64-bit since we have so many registers use the ILP scheduler, for // 32-bit code use the register pressure specific scheduling. + // For 32 bit Atom, use Hybrid (register pressure + latency) scheduling. if (Subtarget->is64Bit()) setSchedulingPreference(Sched::ILP); + else if (Subtarget->isAtom()) + setSchedulingPreference(Sched::Hybrid); else setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index c99c52d..7029b71 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -18,22 +18,24 @@ let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), - "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; + "lea{w}\t{$src|$dst}, {$dst|$src}", [], IIC_LEA_16>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; + [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, + Requires<[In32BitMode]>; def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; + [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, + Requires<[In64BitMode]>; let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", - [(set GR64:$dst, lea64addr:$src)]>; + [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; @@ -56,16 +58,18 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*GR16 + [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", // EAX,EDX = EAX*GR32 - [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>; + [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], + IIC_MUL32_REG>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), "mul{q}\t$src", // RAX,RDX = RAX*GR64 - [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; + [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], + IIC_MUL64>; let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), @@ -74,21 +78,21 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>; // AL,AH = AL*[mem8] + (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*[mem16] + [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - []>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] } let neverHasSideEffects = 1 in { @@ -130,16 +134,19 @@ let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize; + (X86smul_flag GR16:$src1, GR16:$src2))], IIC_IMUL16_RR>, + TB, OpSize; def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, GR32:$src2))]>, TB; + (X86smul_flag GR32:$src1, GR32:$src2))], IIC_IMUL32_RR>, + TB; def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; + (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, + TB; } // Register-Memory Signed Integer Multiply @@ -147,18 +154,23 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, (load addr:$src2)))]>, + (X86smul_flag GR16:$src1, (load addr:$src2)))], + IIC_IMUL16_RM>, TB, OpSize; def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB; + (X86smul_flag GR32:$src1, (load addr:$src2)))], + IIC_IMUL32_RM>, + TB; def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; + (X86smul_flag GR64:$src1, (load addr:$src2)))], + IIC_IMUL64_RM>, + TB; } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] @@ -170,33 +182,39 @@ def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize; + (X86smul_flag GR16:$src1, imm:$src2))], + IIC_IMUL16_RRI>, OpSize; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, + (X86smul_flag GR16:$src1, i16immSExt8:$src2))], + IIC_IMUL16_RRI>, OpSize; def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, imm:$src2))]>; + (X86smul_flag GR32:$src1, imm:$src2))], + IIC_IMUL32_RRI>; def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>; + (X86smul_flag GR32:$src1, i32immSExt8:$src2))], + IIC_IMUL32_RRI>; def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; + (X86smul_flag GR64:$src1, i64immSExt32:$src2))], + IIC_IMUL64_RRI>; def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; + (X86smul_flag GR64:$src1, i64immSExt8:$src2))], + IIC_IMUL64_RRI>; // Memory-Integer Signed Integer Multiply @@ -204,37 +222,43 @@ def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>, + (X86smul_flag (load addr:$src1), imm:$src2))], + IIC_IMUL16_RMI>, OpSize; def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i16immSExt8:$src2))]>, OpSize; + i16immSExt8:$src2))], IIC_IMUL16_RMI>, + OpSize; def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>; + (X86smul_flag (load addr:$src1), imm:$src2))], + IIC_IMUL32_RMI>; def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i32immSExt8:$src2))]>; + i32immSExt8:$src2))], + IIC_IMUL32_RMI>; def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i64immSExt32:$src2))]>; + i64immSExt32:$src2))], + IIC_IMUL64_RMI>; def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i64immSExt8:$src2))]>; + i64immSExt8:$src2))], + IIC_IMUL64_RMI>; } // Defs = [EFLAGS] @@ -243,62 +267,62 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "div{b}\t$src", []>; + "div{b}\t$src", [], IIC_DIV8_REG>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "div{w}\t$src", []>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "div{l}\t$src", []>; + "div{l}\t$src", [], IIC_DIV32>; // RDX:RAX/r64 = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), - "div{q}\t$src", []>; + "div{q}\t$src", [], IIC_DIV64>; let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", []>; + "div{b}\t$src", [], IIC_DIV8_MEM>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", []>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", []>; + "div{l}\t$src", [], IIC_DIV32>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", []>; + "div{q}\t$src", [], IIC_DIV64>; } // Signed division/remainder. let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "idiv{b}\t$src", []>; + "idiv{b}\t$src", [], IIC_IDIV8>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "idiv{w}\t$src", []>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "idiv{l}\t$src", []>; + "idiv{l}\t$src", [], IIC_IDIV32>; // RDX:RAX/r64 = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), - "idiv{q}\t$src", []>; + "idiv{q}\t$src", [], IIC_IDIV64>; let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", []>; + "idiv{b}\t$src", [], IIC_IDIV8>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", []>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", []>; + "idiv{l}\t$src", [], IIC_IDIV32>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", []>; + "idiv{q}\t$src", [], IIC_IDIV64>; } //===----------------------------------------------------------------------===// @@ -312,35 +336,35 @@ let Constraints = "$src1 = $dst" in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_REG>; def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "neg{w}\t$dst", [(set GR16:$dst, (ineg GR16:$src1)), - (implicit EFLAGS)]>, OpSize; + (implicit EFLAGS)], IIC_UNARY_REG>, OpSize; def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "neg{l}\t$dst", [(set GR32:$dst, (ineg GR32:$src1)), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_REG>; def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_REG>; } // Constraints = "$src1 = $dst" def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst", [(store (ineg (loadi16 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>, OpSize; + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize; def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst", [(store (ineg (loadi32 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; } // Defs = [EFLAGS] @@ -351,29 +375,30 @@ let Constraints = "$src1 = $dst" in { let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), "not{b}\t$dst", - [(set GR8:$dst, (not GR8:$src1))]>; + [(set GR8:$dst, (not GR8:$src1))], IIC_UNARY_REG>; def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "not{w}\t$dst", - [(set GR16:$dst, (not GR16:$src1))]>, OpSize; + [(set GR16:$dst, (not GR16:$src1))], IIC_UNARY_REG>, OpSize; def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "not{l}\t$dst", - [(set GR32:$dst, (not GR32:$src1))]>; + [(set GR32:$dst, (not GR32:$src1))], IIC_UNARY_REG>; def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", - [(set GR64:$dst, (not GR64:$src1))]>; + [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } } // Constraints = "$src1 = $dst" def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", - [(store (not (loadi8 addr:$dst)), addr:$dst)]>; + [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst", - [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; + [(store (not (loadi16 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>, + OpSize; def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst", - [(store (not (loadi32 addr:$dst)), addr:$dst)]>; + [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", - [(store (not (loadi64 addr:$dst)), addr:$dst)]>; + [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. @@ -382,19 +407,22 @@ let Constraints = "$src1 = $dst" in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))], + IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In32BitMode]>; def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))], + IIC_UNARY_REG>; } // isConvertibleToThreeAddress = 1, CodeSize = 1 @@ -403,19 +431,23 @@ let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can transform into LEA. def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In64BitMode]>; def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 @@ -424,37 +456,37 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), let CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In32BitMode]>; def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In32BitMode]>; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; // These are duplicates of their 32-bit counterparts. Only needed so X86 knows // how to unfold them. // FIXME: What is this for?? def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In64BitMode]>; def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In64BitMode]>; def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; } // CodeSize = 2 @@ -462,18 +494,22 @@ let Constraints = "$src1 = $dst" in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))], + IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In32BitMode]>; def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], + IIC_UNARY_REG>; } // CodeSize = 2 } // Constraints = "$src1 = $dst" @@ -481,18 +517,18 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", let CodeSize = 2 in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In32BitMode]>; def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In32BitMode]>; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; } // CodeSize = 2 } // Defs = [EFLAGS] @@ -588,11 +624,13 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, /// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations) /// or 1 (for i16,i32,i64 operations). class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, - string mnemonic, string args, list<dag> pattern> + string mnemonic, string args, list<dag> pattern, + InstrItinClass itin = IIC_BIN_NONMEM> : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode }, f, outs, ins, - !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> { + !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern, + itin> { // Infer instruction prefixes from type info. let hasOpSizePrefix = typeinfo.HasOpSizePrefix; @@ -664,7 +702,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_MEM>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -776,7 +814,7 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -804,7 +842,7 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { let ImmT = typeinfo.ImmEncoding; } @@ -837,7 +875,7 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -1150,7 +1188,7 @@ let Defs = [EFLAGS] in { // register class is constrained to GR8_NOREX. let isPseudo = 1 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), - "", []>; + "", [], IIC_BIN_NONMEM>; } //===----------------------------------------------------------------------===// @@ -1160,11 +1198,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag> { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>; + [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))], + IIC_BIN_NONMEM>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>; + (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 3a43b22..c6d1e14 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -21,17 +21,20 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), [(set GR16:$dst, - (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize; + (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))], + IIC_CMOV16_RR>,TB,OpSize; def #NAME#32rr : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), [(set GR32:$dst, - (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB; + (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))], + IIC_CMOV32_RR>, TB; def #NAME#64rr :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), [(set GR64:$dst, - (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB; + (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))], + IIC_CMOV32_RR>, TB; } let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { @@ -39,17 +42,18 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - CondNode, EFLAGS))]>, TB, OpSize; + CondNode, EFLAGS))], IIC_CMOV16_RM>, + TB, OpSize; def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - CondNode, EFLAGS))]>, TB; + CondNode, EFLAGS))], IIC_CMOV32_RM>, TB; def #NAME#64rm :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - CondNode, EFLAGS))]>, TB; + CondNode, EFLAGS))], IIC_CMOV32_RM>, TB; } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" } // end multiclass @@ -78,10 +82,12 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { let Uses = [EFLAGS] in { def r : I<opc, MRM0r, (outs GR8:$dst), (ins), !strconcat(Mnemonic, "\t$dst"), - [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB; + [(set GR8:$dst, (X86setcc OpNode, EFLAGS))], + IIC_SET_R>, TB; def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), !strconcat(Mnemonic, "\t$dst"), - [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB; + [(store (X86setcc OpNode, EFLAGS), addr:$dst)], + IIC_SET_M>, TB; } // Uses = [EFLAGS] } diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 9b167f7..31dd529 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -20,41 +20,42 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP in { def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", - [(X86retflag 0)]>; + [(X86retflag 0)], IIC_RET>; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", - [(X86retflag timm:$amt)]>; + [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "retw\t$amt", - []>, OpSize; + [], IIC_RET_IMM>, OpSize; def LRETL : I <0xCB, RawFrm, (outs), (ins), - "lretl", []>; + "lretl", [], IIC_RET>; def LRETQ : RI <0xCB, RawFrm, (outs), (ins), - "lretq", []>; + "lretq", [], IIC_RET>; def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", []>; + "lret\t$amt", [], IIC_RET>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lretw\t$amt", []>, OpSize; + "lretw\t$amt", [], IIC_RET>, OpSize; } // Unconditional branches. let isBarrier = 1, isBranch = 1, isTerminator = 1 in { def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmp\t$dst", [(br bb:$dst)]>; + "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>; def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), - "jmp\t$dst", []>; + "jmp\t$dst", [], IIC_JMP_REL>; // FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious // with JMP_1. def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmpq\t$dst", []>; + "jmpq\t$dst", [], IIC_JMP_REL>; } // Conditional Branches. let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { - def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [], + IIC_Jcc>; def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, - [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>, TB; } } @@ -82,55 +83,55 @@ let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in { // jecxz. let Uses = [CX] in def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>; + "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In32BitMode]>; let Uses = [ECX] in def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jecxz\t$dst", []>, Requires<[In32BitMode]>; + "jecxz\t$dst", [], IIC_JCXZ>, Requires<[In32BitMode]>; // J*CXZ instruction: 64-bit versions of this instruction for the asmparser. // In 64-bit mode, the address size prefix is jecxz and the unprefixed version // is jrcxz. let Uses = [ECX] in def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>; + "jecxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In64BitMode]>; let Uses = [RCX] in def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jrcxz\t$dst", []>, Requires<[In64BitMode]>; + "jrcxz\t$dst", [], IIC_JCXZ>, Requires<[In64BitMode]>; } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)]>, Requires<[In32BitMode]>; + [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; + [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)]>, Requires<[In64BitMode]>; + [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; + [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>; def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), - "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize; def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), - "ljmp{l}\t{$seg, $off|$off, $seg}", []>; + "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>; def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), - "ljmp{q}\t{*}$dst", []>; + "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), - "ljmp{w}\t{*}$dst", []>, OpSize; + "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize; def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), - "ljmp{l}\t{*}$dst", []>; + "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>; } // Loop instructions -def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; -def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; -def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; +def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>; +def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>; +def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>; //===----------------------------------------------------------------------===// // Call Instructions... @@ -147,25 +148,27 @@ let isCall = 1 in Uses = [ESP] in { def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst,variable_ops), - "call{l}\t$dst", []>, Requires<[In32BitMode]>; + "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), - "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, + "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, Requires<[In32BitMode]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), - "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, + "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, Requires<[In32BitMode]>; def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), - "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + "lcall{w}\t{$seg, $off|$off, $seg}", [], + IIC_CALL_FAR_PTR>, OpSize; def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), - "lcall{l}\t{$seg, $off|$off, $seg}", []>; + "lcall{l}\t{$seg, $off|$off, $seg}", [], + IIC_CALL_FAR_PTR>; def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), - "lcall{w}\t{*}$dst", []>, OpSize; + "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), - "lcall{l}\t{*}$dst", []>; + "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>; // callw for 16 bit code for the assembler. let isAsmParserOnly = 1 in @@ -196,13 +199,13 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, // mcinst. def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), (ins i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", - []>; + "jmp\t$dst # TAILCALL", + [], IIC_JMP_REL>; def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), - "", []>; // FIXME: Remove encoding when JIT is dead. + "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), - "jmp{l}\t{*}$dst # TAILCALL", []>; + "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; } @@ -226,17 +229,19 @@ let isCall = 1 in // the 32-bit pcrel field that we have. def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, + "call{q}\t$dst", [], IIC_CALL_RI>, Requires<[In64BitMode, NotWin64]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, + "call{q}\t{*}$dst", [(X86call GR64:$dst)], + IIC_CALL_RI>, Requires<[In64BitMode, NotWin64]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], + IIC_CALL_MEM>, Requires<[In64BitMode, NotWin64]>; def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), - "lcall{q}\t{*}$dst", []>; + "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>; } // FIXME: We need to teach codegen about single list of call-clobbered @@ -253,15 +258,16 @@ let isCall = 1, isCodeGenOnly = 1 in Uses = [RSP] in { def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, + "call{q}\t$dst", [], IIC_CALL_RI>, Requires<[IsWin64]>; def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), "call{q}\t{*}$dst", - [(X86call GR64:$dst)]>, Requires<[IsWin64]>; + [(X86call GR64:$dst)], IIC_CALL_RI>, + Requires<[IsWin64]>; def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst,variable_ops), "call{q}\t{*}$dst", - [(X86call (loadi64 addr:$dst))]>, + [(X86call (loadi64 addr:$dst))], IIC_CALL_MEM>, Requires<[IsWin64]>; } @@ -272,7 +278,7 @@ let isCall = 1, isCodeGenOnly = 1 in Uses = [RSP] in { def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, + "call{q}\t$dst", [], IIC_CALL_RI>, Requires<[IsWin64]>; } @@ -296,11 +302,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", []>; + "jmp\t$dst # TAILCALL", [], IIC_JMP_REL>; def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; + "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; + "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 957a923..ecb1fc8 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -123,7 +123,9 @@ class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class MemOp4 { bit hasMemOp4Prefix = 1; } class XOP { bit hasXOP_Prefix = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, - string AsmStr, Domain d = GenericDomain> + string AsmStr, + InstrItinClass itin, + Domain d = GenericDomain> : Instruction { let Namespace = "X86"; @@ -139,6 +141,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, // If this is a pseudo instruction, mark it isCodeGenOnly. let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo"); + let Itinerary = itin; + // // Attributes specific to X86 instructions... // @@ -189,51 +193,53 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, } class PseudoI<dag oops, dag iops, list<dag> pattern> - : X86Inst<0, Pseudo, NoImm, oops, iops, ""> { + : X86Inst<0, Pseudo, NoImm, oops, iops, "", NoItinerary> { let Pattern = pattern; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, Domain d = GenericDomain> - : X86Inst<o, f, NoImm, outs, ins, asm, d> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + Domain d = GenericDomain> + : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, Domain d = GenericDomain> - : X86Inst<o, f, Imm8, outs, ins, asm, d> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + Domain d = GenericDomain> + : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm8PCRel, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm16, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm32, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm16PCRel, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } @@ -244,8 +250,9 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> : I<o, F, outs, ins, asm, []> {} // FpI_ - Floating Point Pseudo Instruction template. Not Predicated. -class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> - : X86Inst<0, Pseudo, NoImm, outs, ins, ""> { +class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern, + InstrItinClass itin = IIC_DEFAULT> + : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> { let FPForm = fp; let Pattern = pattern; } @@ -257,20 +264,23 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> // Iseg32 - 16-bit segment selector, 32-bit offset class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } // SI - SSE 1 & 2 scalar instructions -class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern> { +class SI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); @@ -280,8 +290,8 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> // SIi8 - SSE 1 & 2 scalar instructions class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); @@ -291,8 +301,8 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // PI - SSE 1 & 2 packed instructions class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, - Domain d> - : I<o, F, outs, ins, asm, pattern, d> { + InstrItinClass itin, Domain d> + : I<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); @@ -302,8 +312,8 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, // PIi8 - SSE 1 & 2 packed instructions with immediate class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, Domain d> - : Ii8<o, F, outs, ins, asm, pattern, d> { + list<dag> pattern, InstrItinClass itin, Domain d> + : Ii8<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); @@ -319,25 +329,27 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // VSSI - SSE1 instructions with XS prefix in AVX form. // VPSI - SSE1 instructions with TB prefix in AVX form. -class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; +class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; -class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; +class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[HasSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[HasSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, Requires<[HasAVX]>; class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB, Requires<[HasAVX]>; // SSE2 Instruction Templates: @@ -350,28 +362,30 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // VSDI - SSE2 instructions with XD prefix in AVX form. // VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. -class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>; +class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; -class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, +class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; // SSE3 Instruction Templates: @@ -381,15 +395,16 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, // S3DI - SSE3 instructions with XD prefix. class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, Requires<[HasSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, Requires<[HasSSE3]>; -class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, +class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE3]>; @@ -403,12 +418,12 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> // classes. They need to be enabled even if AVX is enabled. class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: @@ -417,31 +432,31 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, // SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8. // class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSE41]>; // SSE4.2 Instruction Templates: // // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSE42]>; // AVX Instruction Templates: @@ -450,12 +465,12 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX8I - AVX instructions with T8 and OpSize prefix. // AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX]>; class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX]>; // AVX2 Instruction Templates: @@ -464,12 +479,12 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX28I - AVX2 instructions with T8 and OpSize prefix. // AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8. class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX2]>; class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX2]>; // AES Instruction Templates: @@ -477,87 +492,88 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AES8I // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSE2, HasAES]>; class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSE2, HasAES]>; // CLMUL Instruction Templates class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasSSE2, HasCLMUL]>; class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; // FMA3 Instruction Templates class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : I<o, F, outs, ins, asm, pattern>, T8, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, T8, OpSize, VEX_4V, Requires<[HasFMA3]>; // FMA4 Instruction Templates class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP9, Requires<[HasXOP]>; // XOP 2, 3 and 4 Operand Instruction Templates with imm byte class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP8, Requires<[HasXOP]>; // XOP 5 operand instruction (VEX encoding!) class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; // X86-64 Instruction templates... // -class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, REX_W; +class RI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii32<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W { let Pattern = pattern; let CodeSize = 3; } class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : SSI<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : SDI<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : PDI<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : VPDI<o, F, outs, ins, asm, pattern>, VEX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W; // MMX Instruction templates // @@ -570,23 +586,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, // MMXID - MMX instructions with XD prefix. // MMXIS - MMX instructions with XS prefix. class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>; class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>; class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>; class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>; class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 3025a4d..79179e6 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -105,19 +105,23 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], d>; + [(set DstRC:$dst, (Int SrcRC:$src))], + IIC_DEFAULT, d>; def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], + IIC_DEFAULT, d>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), - asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], + IIC_DEFAULT, d>; def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), asm, - [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], + IIC_DEFAULT, d>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 01b4dd6..8f6df30 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -67,13 +67,14 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_DEFAULT, d>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>; + [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], + IIC_DEFAULT, d>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -87,12 +88,12 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, d>; + pat_rr, IIC_DEFAULT, d>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, d>; + pat_rm, IIC_DEFAULT, d>; } /// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class @@ -106,14 +107,14 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], d>; + RC:$src1, RC:$src2))], IIC_DEFAULT, d>; def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, (mem_frag addr:$src2)))], d>; + RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>; } //===----------------------------------------------------------------------===// @@ -737,11 +738,11 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], IIC_DEFAULT, d>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], d>; + [(set RC:$dst, (ld_frag addr:$src))], IIC_DEFAULT, d>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -1003,14 +1004,14 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, [(set RC:$dst, (mov_frag RC:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - SSEPackedSingle>, TB; + IIC_DEFAULT, SSEPackedSingle>, TB; def PDrm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set RC:$dst, (v2f64 (mov_frag RC:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - SSEPackedDouble>, TB, OpSize; + IIC_DEFAULT, SSEPackedDouble>, TB, OpSize; } let AddedComplexity = 20 in { @@ -1413,9 +1414,11 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (OpNode SrcRC:$src))], d>; + [(set DstRC:$dst, (OpNode SrcRC:$src))], + IIC_DEFAULT, d>; def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>; + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], + IIC_DEFAULT, d>; } multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -2124,11 +2127,13 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, PatFrag ld_frag, string OpcodeStr, Domain d> { def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), - [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>; + [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], + IIC_DEFAULT, d>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), - (ld_frag addr:$src2)))], d>; + (ld_frag addr:$src2)))], + IIC_DEFAULT, d>; } let Defs = [EFLAGS] in { @@ -2185,19 +2190,21 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, let isAsmParserOnly = 1 in { def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>; + [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], + IIC_DEFAULT, d>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], + IIC_DEFAULT, d>; } // Accept explicit immediate argument form instead of comparison code. def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], d>; + asm_alt, [], IIC_DEFAULT, d>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], d>; + asm_alt, [], IIC_DEFAULT, d>; } defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, @@ -2272,12 +2279,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (shufp:$src3 - RC:$src1, (mem_frag addr:$src2))))], d>; + RC:$src1, (mem_frag addr:$src2))))], + IIC_DEFAULT, d>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, - (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>; + (vt (shufp:$src3 RC:$src1, RC:$src2)))], + IIC_DEFAULT, d>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2448,12 +2457,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, - (vt (OpNode RC:$src1, RC:$src2)))], d>; + (vt (OpNode RC:$src1, RC:$src2)))], + IIC_DEFAULT, d>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, - (mem_frag addr:$src2))))], d>; + (mem_frag addr:$src2))))], + IIC_DEFAULT, d>; } let AddedComplexity = 10 in { @@ -2589,9 +2600,10 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], d>; + [(set GR32:$dst, (Int RC:$src))], IIC_DEFAULT, d>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], + IIC_DEFAULT, d>, REX_W; } let Predicates = [HasAVX] in { @@ -2621,14 +2633,18 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + "movmskps\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT, + SSEPackedSingle>, TB, VEX; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT, + SSEPackedDouble>, TB, OpSize, VEX; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + "movmskps\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT, + SSEPackedSingle>, TB, VEX; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_DEFAULT, + SSEPackedDouble>, TB, OpSize, VEX; } @@ -6395,7 +6411,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], - SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), @@ -6404,7 +6420,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], - SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; } let Predicates = [HasAVX] in { diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index d717dd7..65dbb32 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -19,44 +19,46 @@ let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (shl GR8:$src1, CL))]>; + [(set GR8:$dst, (shl GR8:$src1, CL))], IIC_SR>; def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), "shl{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (shl GR16:$src1, CL))], IIC_SR>, OpSize; def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), "shl{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (shl GR32:$src1, CL))]>; + [(set GR32:$dst, (shl GR32:$src1, CL))], IIC_SR>; def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t{%cl, $dst|$dst, CL}", - [(set GR64:$dst, (shl GR64:$src1, CL))]>; + [(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>; } // Uses = [CL] def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize; + [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>, + OpSize; def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>; def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // NOTE: We don't include patterns for shifts of a register by one, because // 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one). def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), - "shl{b}\t$dst", []>; + "shl{b}\t$dst", [], IIC_SR>; def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), - "shl{w}\t$dst", []>, OpSize; + "shl{w}\t$dst", [], IIC_SR>, OpSize; def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), - "shl{l}\t$dst", []>; + "shl{l}\t$dst", [], IIC_SR>; def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shl{q}\t$dst", []>; + "shl{q}\t$dst", [], IIC_SR>; } // isConvertibleToThreeAddress = 1 } // Constraints = "$src = $dst" @@ -66,223 +68,266 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), let Uses = [CL] in { def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), "shl{b}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (shl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), "shl{w}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (shl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>, + OpSize; def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), "shl{l}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (shl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>; + [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), "shl{b}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), "shl{w}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), "shl{l}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), "shl{q}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Shift by 1 def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), "shl{b}\t$dst", - [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), "shl{w}\t$dst", - [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), "shl{l}\t$dst", - [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", - [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (srl GR8:$src1, CL))]>; + [(set GR8:$dst, (srl GR8:$src1, CL))], IIC_SR>; def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), "shr{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (srl GR16:$src1, CL))], IIC_SR>, OpSize; def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (srl GR32:$src1, CL))]>; + [(set GR32:$dst, (srl GR32:$src1, CL))], IIC_SR>; def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t{%cl, $dst|$dst, CL}", - [(set GR64:$dst, (srl GR64:$src1, CL))]>; + [(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>; } def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "shr{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shr{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize; + [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shr{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shr{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>; // Shift right by 1 def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), "shr{b}\t$dst", - [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (srl GR8:$src1, (i8 1)))], IIC_SR>; def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), "shr{w}\t$dst", - [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (srl GR16:$src1, (i8 1)))], IIC_SR>, OpSize; def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t$dst", - [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (srl GR32:$src1, (i8 1)))], IIC_SR>; def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", - [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (srl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), "shr{w}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, + [(store (srl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>, OpSize; def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), "shr{l}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (srl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>; + [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), "shr{b}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), "shr{w}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), "shr{l}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), "shr{q}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Shift by 1 def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t$dst", - [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), "shr{w}\t$dst", - [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize; + [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>,OpSize; def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), "shr{l}\t$dst", - [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", - [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (sra GR8:$src1, CL))]>; + [(set GR8:$dst, (sra GR8:$src1, CL))], + IIC_SR>; def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), "sar{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (sra GR16:$src1, CL))], + IIC_SR>, OpSize; def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (sra GR32:$src1, CL))]>; + [(set GR32:$dst, (sra GR32:$src1, CL))], + IIC_SR>; def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t{%cl, $dst|$dst, CL}", - [(set GR64:$dst, (sra GR64:$src1, CL))]>; + [(set GR64:$dst, (sra GR64:$src1, CL))], + IIC_SR>; } def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "sar{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))], + IIC_SR>; def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "sar{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>, + [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "sar{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "sar{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // Shift by 1 def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t$dst", - [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (sra GR8:$src1, (i8 1)))], + IIC_SR>; def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), "sar{w}\t$dst", - [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (sra GR16:$src1, (i8 1)))], + IIC_SR>, OpSize; def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t$dst", - [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (sra GR32:$src1, (i8 1)))], + IIC_SR>; def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", - [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (sra GR64:$src1, (i8 1)))], + IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (sra (loadi8 addr:$dst), CL), addr:$dst)], + IIC_SR>; def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), "sar{w}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (sra (loadi16 addr:$dst), CL), addr:$dst)], + IIC_SR>, OpSize; def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (sra (loadi32 addr:$dst), CL), addr:$dst)], + IIC_SR>; def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>; + [(store (sra (loadi64 addr:$dst), CL), addr:$dst)], + IIC_SR>; } def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), "sar{b}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), "sar{w}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), "sar{l}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), "sar{q}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Shift by 1 def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t$dst", - [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), "sar{w}\t$dst", - [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t$dst", - [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t$dst", - [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; //===----------------------------------------------------------------------===// // Rotate instructions @@ -290,125 +335,125 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), let Constraints = "$src1 = $dst" in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t$dst", []>; + "rcl{b}\t$dst", [], IIC_SR>; def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; + "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t$dst", []>, OpSize; + "rcl{w}\t$dst", [], IIC_SR>, OpSize; def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; let Uses = [CL] in def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t$dst", []>; + "rcl{l}\t$dst", [], IIC_SR>; def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; + "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1), - "rcl{q}\t$dst", []>; + "rcl{q}\t$dst", [], IIC_SR>; def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t$dst", []>; + "rcr{b}\t$dst", [], IIC_SR>; def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; + "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t$dst", []>, OpSize; + "rcr{w}\t$dst", [], IIC_SR>, OpSize; def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; let Uses = [CL] in def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t$dst", []>; + "rcr{l}\t$dst", [], IIC_SR>; def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; + "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), - "rcr{q}\t$dst", []>; + "rcr{q}\t$dst", [], IIC_SR>; def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; + "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } // Constraints = "$src = $dst" def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t$dst", []>; + "rcl{b}\t$dst", [], IIC_SR>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t$dst", []>, OpSize; + "rcl{w}\t$dst", [], IIC_SR>, OpSize; def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t$dst", []>; + "rcl{l}\t$dst", [], IIC_SR>; def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t$dst", []>; + "rcl{q}\t$dst", [], IIC_SR>; def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t$dst", []>; + "rcr{b}\t$dst", [], IIC_SR>; def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t$dst", []>, OpSize; + "rcr{w}\t$dst", [], IIC_SR>, OpSize; def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t$dst", []>; + "rcr{l}\t$dst", [], IIC_SR>; def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t$dst", []>; + "rcr{q}\t$dst", [], IIC_SR>; def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in { def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; + "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; + "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; + "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; + "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; + "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } let Constraints = "$src1 = $dst" in { @@ -416,179 +461,217 @@ let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "rol{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotl GR8:$src1, CL))]>; + [(set GR8:$dst, (rotl GR8:$src1, CL))], IIC_SR>; def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "rol{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (rotl GR16:$src1, CL))], IIC_SR>, OpSize; def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotl GR32:$src1, CL))]>; + [(set GR32:$dst, (rotl GR32:$src1, CL))], IIC_SR>; def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t{%cl, $dst|$dst, CL}", - [(set GR64:$dst, (rotl GR64:$src1, CL))]>; + [(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>; } def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "rol{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "rol{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, + [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "rol{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // Rotate by 1 def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "rol{b}\t$dst", - [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))], + IIC_SR>; def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "rol{w}\t$dst", - [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))], + IIC_SR>, OpSize; def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t$dst", - [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))], + IIC_SR>; def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", - [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))], + IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), "rol{w}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)], + IIC_SR>, OpSize; def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), "rol{l}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t{%cl, $dst|$dst, %cl}", - [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; + [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)], + IIC_SR>; } def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1), "rol{b}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>; def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1), "rol{w}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>, + [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>, OpSize; def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1), "rol{l}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>; def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1), "rol{q}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>; // Rotate by 1 def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t$dst", - [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), "rol{w}\t$dst", - [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), "rol{l}\t$dst", - [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", - [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotr GR8:$src1, CL))]>; + [(set GR8:$dst, (rotr GR8:$src1, CL))], IIC_SR>; def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "ror{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (rotr GR16:$src1, CL))], IIC_SR>, OpSize; def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotr GR32:$src1, CL))]>; + [(set GR32:$dst, (rotr GR32:$src1, CL))], IIC_SR>; def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t{%cl, $dst|$dst, CL}", - [(set GR64:$dst, (rotr GR64:$src1, CL))]>; + [(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>; } def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "ror{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>; def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "ror{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, + [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "ror{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // Rotate by 1 def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t$dst", - [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))], + IIC_SR>; def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "ror{w}\t$dst", - [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))], + IIC_SR>, OpSize; def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t$dst", - [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))], + IIC_SR>; def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", - [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))], + IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), "ror{w}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)], + IIC_SR>, OpSize; def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>; + [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)], + IIC_SR>; } def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), "ror{b}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), "ror{w}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), "ror{l}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), "ror{q}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Rotate by 1 def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t$dst", - [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), "ror{w}\t$dst", - [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t$dst", - [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t$dst", - [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; //===----------------------------------------------------------------------===// @@ -601,30 +684,36 @@ let Uses = [CL] in { def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))], + IIC_SHD16_REG_CL>, TB, OpSize; def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))], + IIC_SHD16_REG_CL>, TB, OpSize; def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))], + IIC_SHD32_REG_CL>, TB; def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))], + IIC_SHD32_REG_CL>, TB; def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))], + IIC_SHD64_REG_CL>, TB; def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))], + IIC_SHD64_REG_CL>, TB; } @@ -634,42 +723,42 @@ def SHLD16rri8 : Ii8<0xA4, MRMDestReg, (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize; def SHRD16rri8 : Ii8<0xAC, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize; def SHLD32rri8 : Ii8<0xA4, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB; def SHRD32rri8 : Ii8<0xAC, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB; def SHLD64rri8 : RIi8<0xA4, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; } } // Constraints = "$src = $dst" @@ -678,68 +767,74 @@ let Uses = [CL] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; + addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize; def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; + addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize; def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD32_MEM_CL>, TB; def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD32_MEM_CL>, TB; def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD64_MEM_CL>, TB; def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD64_MEM_CL>, TB; } def SHLD16mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD16_MEM_IM>, TB, OpSize; def SHRD16mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD16_MEM_IM>, TB, OpSize; def SHLD32mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD32_MEM_IM>, TB; def SHRD32mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD32_MEM_IM>, TB; def SHLD64mri8 : RIi8<0xA4, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD64_MEM_IM>, TB; def SHRD64mri8 : RIi8<0xAC, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD64_MEM_IM>, TB; } // Defs = [EFLAGS] diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td new file mode 100644 index 0000000..78037c6 --- /dev/null +++ b/lib/Target/X86/X86Schedule.td @@ -0,0 +1,115 @@ +//===- X86Schedule.td - X86 Scheduling Definitions ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for X86 +def IIC_DEFAULT : InstrItinClass; +def IIC_ALU_MEM : InstrItinClass; +def IIC_ALU_NONMEM : InstrItinClass; +def IIC_LEA : InstrItinClass; +def IIC_LEA_16 : InstrItinClass; +def IIC_MUL8 : InstrItinClass; +def IIC_MUL16_MEM : InstrItinClass; +def IIC_MUL16_REG : InstrItinClass; +def IIC_MUL32_MEM : InstrItinClass; +def IIC_MUL32_REG : InstrItinClass; +def IIC_MUL64 : InstrItinClass; +// imul by al, ax, eax, tax +def IIC_IMUL8 : InstrItinClass; +def IIC_IMUL16_MEM : InstrItinClass; +def IIC_IMUL16_REG : InstrItinClass; +def IIC_IMUL32_MEM : InstrItinClass; +def IIC_IMUL32_REG : InstrItinClass; +def IIC_IMUL64 : InstrItinClass; +// imul reg by reg|mem +def IIC_IMUL16_RM : InstrItinClass; +def IIC_IMUL16_RR : InstrItinClass; +def IIC_IMUL32_RM : InstrItinClass; +def IIC_IMUL32_RR : InstrItinClass; +def IIC_IMUL64_RM : InstrItinClass; +def IIC_IMUL64_RR : InstrItinClass; +// imul reg = reg/mem * imm +def IIC_IMUL16_RMI : InstrItinClass; +def IIC_IMUL16_RRI : InstrItinClass; +def IIC_IMUL32_RMI : InstrItinClass; +def IIC_IMUL32_RRI : InstrItinClass; +def IIC_IMUL64_RMI : InstrItinClass; +def IIC_IMUL64_RRI : InstrItinClass; +// div +def IIC_DIV8_MEM : InstrItinClass; +def IIC_DIV8_REG : InstrItinClass; +def IIC_DIV16 : InstrItinClass; +def IIC_DIV32 : InstrItinClass; +def IIC_DIV64 : InstrItinClass; +// idiv +def IIC_IDIV8 : InstrItinClass; +def IIC_IDIV16 : InstrItinClass; +def IIC_IDIV32 : InstrItinClass; +def IIC_IDIV64 : InstrItinClass; +// neg/not/inc/dec +def IIC_UNARY_REG : InstrItinClass; +def IIC_UNARY_MEM : InstrItinClass; +// add/sub/and/or/xor/adc/sbc/cmp/test +def IIC_BIN_MEM : InstrItinClass; +def IIC_BIN_NONMEM : InstrItinClass; +// shift/rotate +def IIC_SR : InstrItinClass; +// shift double +def IIC_SHD16_REG_IM : InstrItinClass; +def IIC_SHD16_REG_CL : InstrItinClass; +def IIC_SHD16_MEM_IM : InstrItinClass; +def IIC_SHD16_MEM_CL : InstrItinClass; +def IIC_SHD32_REG_IM : InstrItinClass; +def IIC_SHD32_REG_CL : InstrItinClass; +def IIC_SHD32_MEM_IM : InstrItinClass; +def IIC_SHD32_MEM_CL : InstrItinClass; +def IIC_SHD64_REG_IM : InstrItinClass; +def IIC_SHD64_REG_CL : InstrItinClass; +def IIC_SHD64_MEM_IM : InstrItinClass; +def IIC_SHD64_MEM_CL : InstrItinClass; +// cmov +def IIC_CMOV16_RM : InstrItinClass; +def IIC_CMOV16_RR : InstrItinClass; +def IIC_CMOV32_RM : InstrItinClass; +def IIC_CMOV32_RR : InstrItinClass; +def IIC_CMOV64_RM : InstrItinClass; +def IIC_CMOV64_RR : InstrItinClass; +// set +def IIC_SET_R : InstrItinClass; +def IIC_SET_M : InstrItinClass; +// jmp/jcc/jcxz +def IIC_Jcc : InstrItinClass; +def IIC_JCXZ : InstrItinClass; +def IIC_JMP_REL : InstrItinClass; +def IIC_JMP_REG : InstrItinClass; +def IIC_JMP_MEM : InstrItinClass; +def IIC_JMP_FAR_MEM : InstrItinClass; +def IIC_JMP_FAR_PTR : InstrItinClass; +// loop +def IIC_LOOP : InstrItinClass; +def IIC_LOOPE : InstrItinClass; +def IIC_LOOPNE : InstrItinClass; +// call +def IIC_CALL_RI : InstrItinClass; +def IIC_CALL_MEM : InstrItinClass; +def IIC_CALL_FAR_MEM : InstrItinClass; +def IIC_CALL_FAR_PTR : InstrItinClass; +// ret +def IIC_RET : InstrItinClass; +def IIC_RET_IMM : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Processor instruction itineraries. + +def GenericItineraries : ProcessorItineraries<[], [], []>; + +include "X86ScheduleAtom.td" + + + diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td new file mode 100644 index 0000000..a0dbf6d --- /dev/null +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -0,0 +1,136 @@ +//=- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Intel Atom (Bonnell) +// processors. +// +//===----------------------------------------------------------------------===// + +// +// Scheduling information derived from the "Intel 64 and IA32 Architectures +// Optimization Reference Manual", Chapter 13, Section 4. +// Functional Units +// Port 0 +def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store + // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide +def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA + // SIMD/FP: SIMD ALU, FP Adder + +def AtomItineraries : ProcessorItineraries< + [ Port0, Port1 ], + [], [ + // P0 only + // InstrItinData<class, [InstrStage<N, [P0]>] >, + // P0 or P1 + // InstrItinData<class, [InstrStage<N, [P0, P1]>] >, + // P0 and P1 + // InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >, + // + // Default is 1 cycle, port0 or port1 + InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >, + InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >, + // mul + InstrItinData<IIC_MUL8, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_MUL64, [InstrStage<12, [Port0, Port1]>] >, + // imul by al, ax, eax, rax + InstrItinData<IIC_IMUL8, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL64, [InstrStage<12, [Port0, Port1]>] >, + // imul reg by reg|mem + InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >, + // imul reg = reg/mem * imm + InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >, + // idiv + InstrItinData<IIC_IDIV8, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_IDIV16, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_IDIV32, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_IDIV64, [InstrStage<130, [Port0, Port1]>] >, + // div + InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >, + InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >, + InstrItinData<IIC_DIV16, [InstrStage<50, [Port0, Port1]>] >, + InstrItinData<IIC_DIV32, [InstrStage<50, [Port0, Port1]>] >, + InstrItinData<IIC_DIV64, [InstrStage<130, [Port0, Port1]>] >, + // neg/not/inc/dec + InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >, + // add/sub/and/or/xor/adc/sbc/cmp/test + InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >, + // shift/rotate + InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >, + // shift double + InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >, + // cmov + InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >, + // set + InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >, + // jcc + InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >, + // jcxz/jecxz/jrcxz + InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >, + // jmp rel + InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >, + // jmp indirect + InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >, + InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >, + // jmp far + InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >, + InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >, + // loop/loope/loopne + InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >, + InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >, + // call - all but reg/imm + InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >, + InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >, + InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >, + InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >, + //ret + InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >, + InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] > +]>; + diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 718497e..a9d95d3 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -246,6 +246,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { IsBTMemSlow = true; ToggleFeature(X86::FeatureSlowBTMem); } + // If it's Nehalem, unaligned memory access is fast. // FIXME: Nehalem is family 6. Also include Westmere and later processors? if (Family == 15 && Model == 26) { @@ -253,6 +254,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureFastUAMem); } + // Set processor type. Currently only Atom is detected. + if (Family == 6 && Model == 28) { + X86ProcFamily = IntelAtom; + } + unsigned MaxExtLevel; X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -310,6 +316,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) + , X86ProcFamily(Others) , PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) @@ -333,14 +340,15 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , IsUAMemFast(false) , HasVectorUAMem(false) , HasCmpxchg16b(false) + , PostRAScheduler(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) , TargetTriple(TT) , In64BitMode(is64Bit) { // Determine default and user specified characteristics + std::string CPUName = CPU; if (!FS.empty() || !CPU.empty()) { - std::string CPUName = CPU; if (CPUName.empty()) { #if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) @@ -363,6 +371,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, // If feature string is not empty, parse features string. ParseSubtargetFeatures(CPUName, FullFS); } else { + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } // Otherwise, use CPUID to auto-detect feature set. AutoDetectSubtargetFeatures(); @@ -379,6 +394,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, } } + if (X86ProcFamily == IntelAtom) { + PostRAScheduler = true; + InstrItins = getInstrItineraryForCPU(CPUName); + } + // It's important to keep the MCSubtargetInfo feature bits in sync with // target data structure which is shared with MC code emitter, etc. if (In64BitMode) @@ -398,3 +418,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, isTargetSolaris() || In64BitMode) stackAlignment = 16; } + +bool X86Subtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + return PostRAScheduler && OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 94a2808..347da95 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -49,6 +49,13 @@ protected: NoThreeDNow, ThreeDNow, ThreeDNowA }; + enum X86ProcFamilyEnum { + Others, IntelAtom + }; + + /// X86ProcFamily - X86 processor family: Intel Atom, and others + X86ProcFamilyEnum X86ProcFamily; + /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; @@ -125,6 +132,9 @@ protected: /// this is true for most x86-64 chips, but not the first AMD chips. bool HasCmpxchg16b; + /// PostRAScheduler - True if using post-register-allocation scheduler. + bool PostRAScheduler; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -135,6 +145,9 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + + /// Instruction itineraries for scheduling + InstrItineraryData InstrItins; private: /// In64BitMode - True if compiling for 64-bit, false for 32-bit. @@ -202,6 +215,8 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } + bool isAtom() const { return X86ProcFamily == IntelAtom; } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } @@ -291,6 +306,15 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; + + /// enablePostRAScheduler - run for Atom optimization. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; + + /// getInstrItins = Return the instruction itineraries based on the + /// subtarget selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index b8002d5..88406ca 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -78,7 +78,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), - ELFWriterInfo(is64Bit, true) { + ELFWriterInfo(is64Bit, true), + InstrItins(Subtarget.getInstrItineraryData()){ // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 16092b8e..0e0e2ba 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -32,9 +32,10 @@ class formatted_raw_ostream; class StringRef; class X86TargetMachine : public LLVMTargetMachine { - X86Subtarget Subtarget; - X86FrameLowering FrameLowering; - X86ELFWriterInfo ELFWriterInfo; + X86Subtarget Subtarget; + X86FrameLowering FrameLowering; + X86ELFWriterInfo ELFWriterInfo; + InstrItineraryData InstrItins; public: X86TargetMachine(const Target &T, StringRef TT, @@ -65,6 +66,9 @@ public: virtual const X86ELFWriterInfo *getELFWriterInfo() const { return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; + } // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM); diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll index 6f8b89c..24aa5b9 100644 --- a/test/CodeGen/X86/2007-01-08-InstrSched.ll +++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll @@ -1,5 +1,5 @@ ; PR1075 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -O3 | FileCheck %s define float @foo(float %x) nounwind { %tmp1 = fmul float %x, 3.000000e+00 diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll index f6db0d0..838a0c3 100644 --- a/test/CodeGen/X86/2007-11-06-InstrSched.ll +++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea +; RUN: llc < %s -march=x86 -mcpu=generic -mattr=+sse2 | not grep lea define float @foo(i32* %x, float* %y, i32 %c) nounwind { entry: diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll index 265d968..2e95082 100644 --- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll +++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2 +; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2 ; PR1872 %struct.c34007g__designated___XUB = type { i32, i32, i32, i32 } diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll index 75e0b8a..435adbb 100644 --- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll +++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s ; PR3149 ; Make sure the copy after inline asm is not coalesced away. diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll index 12bd285..1259cf4 100644 --- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll +++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s +; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s ; CHECK: subq $40, %rsp ; CHECK: movaps %xmm8, (%rsp) ; CHECK: movaps %xmm7, 16(%rsp) diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll index f6ac2ba..d4a74c9 100644 --- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll +++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s +; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s ; Check that lowered argumens do not overwrite the return address before it is moved. ; Bug 6225 ; diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll index 5accfd7..e0c2c6c 100644 --- a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll +++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -mcpu=generic | FileCheck %s ; PR6941 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll index 5068d29..658ccaa 100644 --- a/test/CodeGen/X86/abi-isel.ll +++ b/test/CodeGen/X86/abi-isel.ll @@ -1,16 +1,16 @@ -; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC -; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC -; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC -; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC @src = external global [131072 x i32] @dst = external global [131072 x i32] diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll index 7bf527a..8e871f4 100644 --- a/test/CodeGen/X86/add.ll +++ b/test/CodeGen/X86/add.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64 ; Some of these tests depend on -join-physregs to commute instructions. diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll new file mode 100644 index 0000000..2301dfc --- /dev/null +++ b/test/CodeGen/X86/atom-sched.ll @@ -0,0 +1,28 @@ +; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s +; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s + +@a = common global i32 0, align 4 +@b = common global i32 0, align 4 +@c = common global i32 0, align 4 +@d = common global i32 0, align 4 +@e = common global i32 0, align 4 +@f = common global i32 0, align 4 + +define void @func() nounwind uwtable { +; atom: imull +; atom-NOT: movl +; atom: imull +; CHECK: imull +; CHECK: movl +; CHECK: imull +entry: + %0 = load i32* @b, align 4 + %1 = load i32* @c, align 4 + %mul = mul nsw i32 %0, %1 + store i32 %mul, i32* @a, align 4 + %2 = load i32* @e, align 4 + %3 = load i32* @f, align 4 + %mul1 = mul nsw i32 %2, %3 + store i32 %mul1, i32* @d, align 4 + ret void +} diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll index b060369..2d39901 100644 --- a/test/CodeGen/X86/byval6.ll +++ b/test/CodeGen/X86/byval6.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep add | not grep 16 +; RUN: llc < %s -mcpu=generic -march=x86 | grep add | not grep 16 %struct.W = type { x86_fp80, x86_fp80 } @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32 diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll index 87c1be5..e577ecb 100644 --- a/test/CodeGen/X86/divide-by-constant.ll +++ b/test/CodeGen/X86/divide-by-constant.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i686-pc-linux-gnu" diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll index 52dcb61..0f16a64 100644 --- a/test/CodeGen/X86/epilogue.ll +++ b/test/CodeGen/X86/epilogue.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 | not grep lea -; RUN: llc < %s -march=x86 | grep {movl %ebp} +; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea +; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl %ebp} declare void @bar(<2 x i64>* %n) diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll index e151821..e4982f0 100644 --- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll +++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ +; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \ ; RUN: grep {add ESP, 8} target triple = "i686-pc-linux-gnu" diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll index 19972f7..b9598bb 100644 --- a/test/CodeGen/X86/fast-isel-x86.ll +++ b/test/CodeGen/X86/fast-isel-x86.ll @@ -1,4 +1,4 @@ -; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s +; RUN: llc -fast-isel -O0 -mcpu=generic -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s ; This should use flds to set the return value. ; CHECK: test0: diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index 5525af2..e03cb7e 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s %struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] } %struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 } @stmt_obstack = external global %struct.obstack ; <%struct.obstack*> [#uses=1] diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll index c9a1c1c..2249618 100644 --- a/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/test/CodeGen/X86/inline-asm-fpstack.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin | FileCheck %s ; There should be no stack manipulations between the inline asm and ret. ; CHECK: test1 diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index 3a4acb8..a7b036e 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 > %t +; RUN: llc < %s -mcpu=generic -march=x86-64 > %t ; RUN: not grep and %t ; RUN: not grep movz %t ; RUN: not grep sar %t diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll index e42aa9d..d092916 100644 --- a/test/CodeGen/X86/optimize-max-3.ll +++ b/test/CodeGen/X86/optimize-max-3.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s ; LSR's OptimizeMax should eliminate the select (max). diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll index 528c4bc..a379980 100644 --- a/test/CodeGen/X86/peep-test-3.ll +++ b/test/CodeGen/X86/peep-test-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 -post-RA-scheduler=false | FileCheck %s ; rdar://7226797 ; LLVM should omit the testl and use the flags result from the orl. diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll index fb60ac2..fc06309 100644 --- a/test/CodeGen/X86/pic.ll +++ b/test/CodeGen/X86/pic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX +; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX @ptr = external global i32* @dst = external global i32 diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll index d936971..d99a7a4 100644 --- a/test/CodeGen/X86/red-zone.ll +++ b/test/CodeGen/X86/red-zone.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s ; First without noredzone. ; CHECK: f0: diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll index 9557d17..f092163 100644 --- a/test/CodeGen/X86/red-zone2.ll +++ b/test/CodeGen/X86/red-zone2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 > %t +; RUN: llc < %s -mcpu=generic -march=x86-64 > %t ; RUN: grep subq %t | count 1 ; RUN: grep addq %t | count 1 diff --git a/test/CodeGen/X86/reghinting.ll b/test/CodeGen/X86/reghinting.ll index 87f65ed..6759115 100644 --- a/test/CodeGen/X86/reghinting.ll +++ b/test/CodeGen/X86/reghinting.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-macosx | FileCheck %s ; PR10221 ;; The registers %x and %y must both spill across the finit call. diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll index 103551b..5ce08aa 100644 --- a/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll index 899ee88..5407b87 100644 --- a/test/CodeGen/X86/segmented-stacks.ll +++ b/test/CodeGen/X86/segmented-stacks.ll @@ -1,23 +1,23 @@ -; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux -; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux -; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin -; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin -; RUN: llc < %s -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW -; RUN: llc < %s -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux +; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin +; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD ; We used to crash with filetype=obj -; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=i686-mingw32 -segmented-stacks -filetype=obj -; RUN: llc < %s -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj - -; RUN: not llc < %s -mtriple=x86_64-solaris -segmented-stacks 2> %t.log +; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj + +; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris -; RUN: not llc < %s -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log +; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-MinGW -; RUN: not llc < %s -mtriple=i686-freebsd -segmented-stacks 2> %t.log +; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log ; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD ; X64-Solaris: Segmented stacks not supported on this platform diff --git a/test/CodeGen/X86/stack-align2.ll b/test/CodeGen/X86/stack-align2.ll index 5523c0e..18cce72 100644 --- a/test/CodeGen/X86/stack-align2.ll +++ b/test/CodeGen/X86/stack-align2.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=i386-linux | FileCheck %s -check-prefix=LINUX-I386 -; RUN: llc < %s -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386 -; RUN: llc < %s -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386 -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64 -; RUN: llc < %s -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64 -; RUN: llc < %s -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64 +; RUN: llc < %s -mcpu=generic -mtriple=i386-linux | FileCheck %s -check-prefix=LINUX-I386 +; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386 +; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64 define i32 @test() nounwind { entry: diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll index 7ecf379..7621602 100644 --- a/test/CodeGen/X86/tailcallbyval64.ll +++ b/test/CodeGen/X86/tailcallbyval64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux -tailcallopt | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -tailcallopt | FileCheck %s ; FIXME: Win64 does not support byval. diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll index c18c7aa..bff5f99 100644 --- a/test/CodeGen/X86/tailcallstack64.ll +++ b/test/CodeGen/X86/tailcallstack64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s -; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s ; FIXME: Redundant unused stack allocation could be eliminated. ; CHECK: subq ${{24|72|80}}, %rsp diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll index b7fe039..9d58019 100644 --- a/test/CodeGen/X86/twoaddr-lea.ll +++ b/test/CodeGen/X86/twoaddr-lea.ll @@ -5,7 +5,7 @@ ;; allocator turns the shift into an LEA. This also occurs for ADD. ; Check that the shift gets turned into an LEA. -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin | FileCheck %s @G = external global i32 diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll index 3bee700..8655c6c 100644 --- a/test/CodeGen/X86/v-binop-widen.ll +++ b/test/CodeGen/X86/v-binop-widen.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s +; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s ; CHECK: divss ; CHECK: divps ; CHECK: divps diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll index b3efc7b..f2fc7e7 100644 --- a/test/CodeGen/X86/vec_call.ll +++ b/test/CodeGen/X86/vec_call.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ ; RUN: grep {subl.*60} -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ ; RUN: grep {movaps.*32} diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll index 85367e8..661cde8 100644 --- a/test/CodeGen/X86/widen_arith-1.ll +++ b/test/CodeGen/X86/widen_arith-1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 | FileCheck %s define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind { entry: diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll index b959ce8..f55b184 100644 --- a/test/CodeGen/X86/widen_arith-3.ll +++ b/test/CodeGen/X86/widen_arith-3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s ; CHECK: incl ; CHECK: incl ; CHECK: incl diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 29689dd..79aa000 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s +; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s ; Test based on pr5626 to load/store ; diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll index e39d007..a961c6a 100644 --- a/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 -; RUN: llc < %s -join-physregs -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 -; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI +; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 +; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 +; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI ; PR8777 ; PR8778 diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll index efe8bca..52bc509 100644 --- a/test/CodeGen/X86/win64_vararg.ll +++ b/test/CodeGen/X86/win64_vararg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-win32 | FileCheck %s ; Verify that the var arg parameters which are passed in registers are stored ; in home stack slots allocated by the caller and that AP is correctly diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll index 8e4c2a5..ff93c68 100644 --- a/test/CodeGen/X86/zext-fold.ll +++ b/test/CodeGen/X86/zext-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s ;; Simple case define i32 @test1(i8 %x) nounwind readnone { |