aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp84
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/X86/X86Subtarget.cpp72
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--test/CodeGen/X86/subtarget-feature-change.ll66
5 files changed, 156 insertions, 74 deletions
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index c33bb9d..e11314d 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -45,51 +45,55 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
- , HasV4TOps(false)
- , HasV5TOps(false)
- , HasV5TEOps(false)
- , HasV6Ops(false)
- , HasV6T2Ops(false)
- , HasV7Ops(false)
- , HasVFPv2(false)
- , HasVFPv3(false)
- , HasVFPv4(false)
- , HasNEON(false)
- , UseNEONForSinglePrecisionFP(false)
- , UseMulOps(UseFusedMulOps)
- , SlowFPVMLx(false)
- , HasVMLxForwarding(false)
- , SlowFPBrcc(false)
- , InThumbMode(false)
- , HasThumb2(false)
- , IsMClass(false)
- , NoARM(false)
- , PostRAScheduler(false)
- , IsR9Reserved(ReserveR9)
- , UseMovt(false)
- , SupportsTailCall(false)
- , HasFP16(false)
- , HasD16(false)
- , HasHardwareDivide(false)
- , HasHardwareDivideInARM(false)
- , HasT2ExtractPack(false)
- , HasDataBarrier(false)
- , Pref32BitThumb(false)
- , AvoidCPSRPartialUpdate(false)
- , AvoidMOVsShifterOperand(false)
- , HasRAS(false)
- , HasMPExtension(false)
- , FPOnlySP(false)
- , AllowsUnalignedMem(false)
- , Thumb2DSP(false)
- , UseNaClTrap(false)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
, TargetABI(ARM_ABI_APCS) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
+void ARMSubtarget::initializeEnvironment() {
+ HasV4TOps = false;
+ HasV5TOps = false;
+ HasV5TEOps = false;
+ HasV6Ops = false;
+ HasV6T2Ops = false;
+ HasV7Ops = false;
+ HasVFPv2 = false;
+ HasVFPv3 = false;
+ HasVFPv4 = false;
+ HasNEON = false;
+ UseNEONForSinglePrecisionFP = false;
+ UseMulOps = UseFusedMulOps;
+ SlowFPVMLx = false;
+ HasVMLxForwarding = false;
+ SlowFPBrcc = false;
+ InThumbMode = false;
+ HasThumb2 = false;
+ IsMClass = false;
+ NoARM = false;
+ PostRAScheduler = false;
+ IsR9Reserved = ReserveR9;
+ UseMovt = false;
+ SupportsTailCall = false;
+ HasFP16 = false;
+ HasD16 = false;
+ HasHardwareDivide = false;
+ HasHardwareDivideInARM = false;
+ HasT2ExtractPack = false;
+ HasDataBarrier = false;
+ Pref32BitThumb = false;
+ AvoidCPSRPartialUpdate = false;
+ AvoidMOVsShifterOperand = false;
+ HasRAS = false;
+ HasMPExtension = false;
+ FPOnlySP = false;
+ AllowsUnalignedMem = false;
+ Thumb2DSP = false;
+ UseNaClTrap = false;
+}
+
void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
AttributeSet FnAttrs = MF->getFunction()->getAttributes();
Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
@@ -100,8 +104,10 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
!CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
std::string FS =
!FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty())
+ if (!FS.empty()) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ }
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 87834b8..f47555c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -204,8 +204,10 @@ protected:
/// \brief Reset the features for the X86 target.
virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
-
+public:
void computeIssueWidth();
bool hasV4TOps() const { return HasV4TOps; }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 58e0d06..6391acf 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -336,8 +336,10 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
!CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
std::string FS =
!FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
- if (!FS.empty())
+ if (!FS.empty()) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ }
}
void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -417,46 +419,50 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
stackAlignment = 16;
}
+void X86Subtarget::initializeEnvironment() {
+ PICStyle = PICStyles::None;
+ X86SSELevel = NoMMXSSE;
+ X863DNowLevel = NoThreeDNow;
+ HasCMov = false;
+ HasX86_64 = false;
+ HasPOPCNT = false;
+ HasSSE4A = false;
+ HasAES = false;
+ HasPCLMUL = false;
+ HasFMA = false;
+ HasFMA4 = false;
+ HasXOP = false;
+ HasMOVBE = false;
+ HasRDRAND = false;
+ HasF16C = false;
+ HasFSGSBase = false;
+ HasLZCNT = false;
+ HasBMI = false;
+ HasBMI2 = false;
+ HasRTM = false;
+ HasADX = false;
+ IsBTMemSlow = false;
+ IsUAMemFast = false;
+ HasVectorUAMem = false;
+ HasCmpxchg16b = false;
+ UseLeaForSP = false;
+ HasSlowDivide = false;
+ PostRAScheduler = false;
+ PadShortFunctions = false;
+ stackAlignment = 4;
+ // FIXME: this is a known good value for Yonah. How about others?
+ MaxInlineSizeThreshold = 128;
+}
+
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
const std::string &FS,
unsigned StackAlignOverride, bool is64Bit)
: X86GenSubtargetInfo(TT, CPU, FS)
, X86ProcFamily(Others)
- , PICStyle(PICStyles::None)
- , X86SSELevel(NoMMXSSE)
- , X863DNowLevel(NoThreeDNow)
- , HasCMov(false)
- , HasX86_64(false)
- , HasPOPCNT(false)
- , HasSSE4A(false)
- , HasAES(false)
- , HasPCLMUL(false)
- , HasFMA(false)
- , HasFMA4(false)
- , HasXOP(false)
- , HasMOVBE(false)
- , HasRDRAND(false)
- , HasF16C(false)
- , HasFSGSBase(false)
- , HasLZCNT(false)
- , HasBMI(false)
- , HasBMI2(false)
- , HasRTM(false)
- , HasADX(false)
- , IsBTMemSlow(false)
- , IsUAMemFast(false)
- , HasVectorUAMem(false)
- , HasCmpxchg16b(false)
- , UseLeaForSP(false)
- , HasSlowDivide(false)
- , PostRAScheduler(false)
- , PadShortFunctions(false)
- , stackAlignment(4)
- // FIXME: this is a known good value for Yonah. How about others?
- , MaxInlineSizeThreshold(128)
, TargetTriple(TT)
, StackAlignOverride(StackAlignOverride)
, In64BitMode(is64Bit) {
+ initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d1c7067..e97da4b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -201,8 +201,10 @@ public:
/// \brief Reset the features for the X86 target.
virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
-
+public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
return In64BitMode;
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
new file mode 100644
index 0000000..cd67729
--- /dev/null
+++ b/test/CodeGen/X86/subtarget-feature-change.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; This should not generate SSE instructions:
+;
+; CHECK: without.sse:
+; CHECK: flds
+; CHECK: fmuls
+; CHECK: fstps
+define void @without.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #0 {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+ %1 = load float* %arrayidx2, align 4, !tbaa !0
+ %mul = fmul float %0, %1
+ %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+ store float %mul, float* %arrayidx4, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; This should generate SSE instructions:
+;
+; CHECK: with.sse
+; CHECK: movss
+; CHECK: mulss
+; CHECK: movss
+define void @with.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #1 {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+ %1 = load float* %arrayidx2, align 4, !tbaa !0
+ %mul = fmul float %0, %1
+ %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+ store float %mul, float* %arrayidx4, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
+attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}