diff options
Diffstat (limited to 'lib/Target/X86/X86Schedule.td')
| -rw-r--r-- | lib/Target/X86/X86Schedule.td | 42 |
1 files changed, 29 insertions, 13 deletions
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index da0ca7d..ceb2e05 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -42,6 +42,7 @@ multiclass X86SchedWritePair { // Arithmetic. defm WriteALU : X86SchedWritePair; // Simple integer ALU op. defm WriteIMul : X86SchedWritePair; // Integer multiplication. +def WriteIMulH : SchedWrite; // Integer multiplication, high part. defm WriteIDiv : X86SchedWritePair; // Integer division. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. @@ -53,6 +54,10 @@ def WriteLoad : SchedWrite; def WriteStore : SchedWrite; def WriteMove : SchedWrite; +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + // Branches don't produce values, so they have no latency, but they still // consume resources. Indirect branches can fold loads. defm WriteJump : X86SchedWritePair; @@ -63,6 +68,10 @@ defm WriteFMul : X86SchedWritePair; // Floating point multiplication. defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFSqrt : X86SchedWritePair; // Floating point square root. defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. + +// FMA Scheduling helper class. +class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Vector integer operations. defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. @@ -79,9 +88,14 @@ defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. +// Catch-all for expensive system instructions. +def WriteSystem : SchedWrite; + +// Old microcoded instructions that nobody use. +def WriteMicrocoded : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 -def IIC_DEFAULT : InstrItinClass; def IIC_ALU_MEM : InstrItinClass; def IIC_ALU_NONMEM : InstrItinClass; def IIC_LEA : InstrItinClass; @@ -253,10 +267,14 @@ def IIC_SSE_PINSRW : InstrItinClass; def IIC_SSE_PABS_RR : InstrItinClass; def IIC_SSE_PABS_RM : InstrItinClass; -def IIC_SSE_SQRTP_RR : InstrItinClass; -def IIC_SSE_SQRTP_RM : InstrItinClass; -def IIC_SSE_SQRTS_RR : InstrItinClass; -def IIC_SSE_SQRTS_RM : InstrItinClass; +def IIC_SSE_SQRTPS_RR : InstrItinClass; +def IIC_SSE_SQRTPS_RM : InstrItinClass; +def IIC_SSE_SQRTSS_RR : InstrItinClass; +def IIC_SSE_SQRTSS_RM : InstrItinClass; +def IIC_SSE_SQRTPD_RR : InstrItinClass; +def IIC_SSE_SQRTPD_RM : InstrItinClass; +def IIC_SSE_SQRTSD_RR : InstrItinClass; +def IIC_SSE_SQRTSD_RM : InstrItinClass; def IIC_SSE_RCPP_RR : InstrItinClass; def IIC_SSE_RCPP_RM : InstrItinClass; @@ -533,8 +551,9 @@ def IIC_NOP : InstrItinClass; // Resources beyond the decoder operate on micro-ops and are bufferred // so adjacent micro-ops don't directly compete. // -// MinLatency=0 indicates that RAW dependencies can be decoded in the -// same cycle. +// MicroOpBufferSize > 1 indicates that RAW dependencies can be +// decoded in the same cycle. The value 32 is a reasonably arbitrary +// number of in-flight instructions. // // HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef // indicates high latency opcodes. Alternatively, InstrItinData @@ -542,17 +561,14 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // -// ILPWindow=10 is an arbitrary threshold that approximates cycles of -// latency hidden by instruction buffers. The actual value is not very -// important but should be zero for inorder and nonzero for OOO processors. -// // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; - let MinLatency = 0; + let MicroOpBufferSize = 32; let LoadLatency = 4; let HighLatency = 10; - let ILPWindow = 10; } include "X86ScheduleAtom.td" +include "X86SchedSandyBridge.td" +include "X86SchedHaswell.td" |
