diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/X86.td | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 5 |
4 files changed, 15 insertions, 1 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 18e6b7c..d078a7b 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,6 +120,9 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; +def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -160,7 +163,8 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>; + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9f487a6..67ad99d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -182,6 +182,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); + // Bypass i32 with i8 on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + addBypassSlowDivType(Type::getInt32Ty(getGlobalContext()), Type::getInt8Ty(getGlobalContext())); + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 9087852..0d7b664 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -346,6 +346,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasVectorUAMem(false) , HasCmpxchg16b(false) , UseLeaForSP(false) + , HasSlowDivide(false) , PostRAScheduler(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 33608bb..dde7e24 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -136,6 +136,10 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// HasSlowDivide - True if smaller divides are significantly faster than + /// full divides and should be used when possible. + bool HasSlowDivide; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -221,6 +225,7 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasSlowDivide() const { return HasSlowDivide; } bool isAtom() const { return X86ProcFamily == IntelAtom; } |