aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/assembler.h5
-rw-r--r--arch/arm/mm/cache-v7.S6
-rw-r--r--arch/powerpc/kernel/perf_event.c8
-rw-r--r--arch/x86/include/asm/i387.h284
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c44
-rw-r--r--arch/x86/kernel/process_32.c25
-rw-r--r--arch/x86/kernel/process_64.c29
-rw-r--r--arch/x86/kernel/traps.c41
-rw-r--r--arch/x86/kernel/xsave.c12
-rw-r--r--arch/x86/kvm/vmx.c2
12 files changed, 322 insertions, 137 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 65c3f24..4e25f18 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -137,6 +137,11 @@
disable_irq
.endm
+ .macro save_and_disable_irqs_notrace, oldcpsr
+ mrs \oldcpsr, cpsr
+ disable_irq_notrace
+ .endm
+
/*
* Restore interrupt state previously stored in a register. We don't
* guarantee that this will preserve the flags.
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 3593119..1ed1fd3 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -54,9 +54,15 @@ loop1:
and r1, r1, #7 @ mask of the bits for current cache only
cmp r1, #2 @ see what cache we have at this level
blt skip @ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+ save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic
+#endif
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
isb @ isb to sych the new cssr&csidr
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
+#ifdef CONFIG_PREEMPT
+ restore_irqs_notrace r9
+#endif
and r2, r1, #7 @ extract the length of the cache lines
add r2, r2, #4 @ add 4 (line length offset)
ldr r4, =0x3ff
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 822f630..5793c4b 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -865,6 +865,7 @@ static void power_pmu_start(struct perf_event *event, int ef_flags)
{
unsigned long flags;
s64 left;
+ unsigned long val;
if (!event->hw.idx || !event->hw.sample_period)
return;
@@ -880,7 +881,12 @@ static void power_pmu_start(struct perf_event *event, int ef_flags)
event->hw.state = 0;
left = local64_read(&event->hw.period_left);
- write_pmc(event->hw.idx, left);
+
+ val = 0;
+ if (left < 0x80000000L)
+ val = 0x80000000L - left;
+
+ write_pmc(event->hw.idx, val);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index c9e09ea..a850b4d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,8 +29,8 @@ extern unsigned int sig_xstate_size;
extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child);
-extern asmlinkage void math_state_restore(void);
-extern void __math_state_restore(void);
+extern void __math_state_restore(struct task_struct *);
+extern void math_state_restore(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
extern user_regset_active_fn fpregs_active, xfpregs_active;
@@ -212,19 +212,11 @@ static inline void fpu_fxsave(struct fpu *fpu)
#endif /* CONFIG_X86_64 */
-/* We need a safe address that is cheap to find and that is already
- in L1 during context switch. The best choices are unfortunately
- different for UP and SMP */
-#ifdef CONFIG_SMP
-#define safe_address (__per_cpu_offset[0])
-#else
-#define safe_address (kstat_cpu(0).cpustat.user)
-#endif
-
/*
- * These must be called with preempt disabled
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact.
*/
-static inline void fpu_save_init(struct fpu *fpu)
+static inline int fpu_save_init(struct fpu *fpu)
{
if (use_xsave()) {
fpu_xsave(fpu);
@@ -233,33 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
* xsave header may indicate the init state of the FP.
*/
if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
- return;
+ return 1;
} else if (use_fxsr()) {
fpu_fxsave(fpu);
} else {
asm volatile("fnsave %[fx]; fwait"
: [fx] "=m" (fpu->state->fsave));
- return;
+ return 0;
}
- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
+ /*
+ * If exceptions are pending, we need to clear them so
+ * that we don't randomly get exceptions later.
+ *
+ * FIXME! Is this perhaps only true for the old-style
+ * irq13 case? Maybe we could leave the x87 state
+ * intact otherwise?
+ */
+ if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
asm volatile("fnclex");
-
- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
- is pending. Clear the x87 state here by setting it to fixed
- values. safe_address is a random variable that should be in L1 */
- alternative_input(
- ASM_NOP8 ASM_NOP2,
- "emms\n\t" /* clear stack tags */
- "fildl %P[addr]", /* set F?P to defined value */
- X86_FEATURE_FXSAVE_LEAK,
- [addr] "m" (safe_address));
+ return 0;
+ }
+ return 1;
}
-static inline void __save_init_fpu(struct task_struct *tsk)
+static inline int __save_init_fpu(struct task_struct *tsk)
{
- fpu_save_init(&tsk->thread.fpu);
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ return fpu_save_init(&tsk->thread.fpu);
}
static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -281,39 +273,185 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
}
/*
- * Signal frame handlers...
+ * Software FPU state helpers. Careful: these need to
+ * be preemption protection *and* they need to be
+ * properly paired with the CR0.TS changes!
*/
-extern int save_i387_xstate(void __user *buf);
-extern int restore_i387_xstate(void __user *buf);
+static inline int __thread_has_fpu(struct task_struct *tsk)
+{
+ return tsk->thread.has_fpu;
+}
-static inline void __unlazy_fpu(struct task_struct *tsk)
+/* Must be paired with an 'stts' after! */
+static inline void __thread_clear_has_fpu(struct task_struct *tsk)
{
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- __save_init_fpu(tsk);
- stts();
- } else
- tsk->fpu_counter = 0;
+ tsk->thread.has_fpu = 0;
+}
+
+/* Must be paired with a 'clts' before! */
+static inline void __thread_set_has_fpu(struct task_struct *tsk)
+{
+ tsk->thread.has_fpu = 1;
}
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void __thread_fpu_end(struct task_struct *tsk)
+{
+ __thread_clear_has_fpu(tsk);
+ stts();
+}
+
+static inline void __thread_fpu_begin(struct task_struct *tsk)
+{
+ clts();
+ __thread_set_has_fpu(tsk);
+}
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ * - switch_fpu_prepare() saves the old state and
+ * sets the new state of the CR0.TS bit. This is
+ * done within the context of the old process.
+ *
+ * - switch_fpu_finish() restores the new state as
+ * necessary.
+ */
+typedef struct { int preload; } fpu_switch_t;
+
+/*
+ * FIXME! We could do a totally lazy restore, but we need to
+ * add a per-cpu "this was the task that last touched the FPU
+ * on this CPU" variable, and the task needs to have a "I last
+ * touched the FPU on this CPU" and check them.
+ *
+ * We don't do that yet, so "fpu_lazy_restore()" always returns
+ * false, but some day..
+ */
+#define fpu_lazy_restore(tsk) (0)
+#define fpu_lazy_state_intact(tsk) do { } while (0)
+
+static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
+{
+ fpu_switch_t fpu;
+
+ fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+ if (__thread_has_fpu(old)) {
+ if (__save_init_fpu(old))
+ fpu_lazy_state_intact(old);
+ __thread_clear_has_fpu(old);
+ old->fpu_counter++;
+
+ /* Don't change CR0.TS if we just switch! */
+ if (fpu.preload) {
+ __thread_set_has_fpu(new);
+ prefetch(new->thread.fpu.state);
+ } else
+ stts();
+ } else {
+ old->fpu_counter = 0;
+ if (fpu.preload) {
+ if (fpu_lazy_restore(new))
+ fpu.preload = 0;
+ else
+ prefetch(new->thread.fpu.state);
+ __thread_fpu_begin(new);
+ }
+ }
+ return fpu;
+}
+
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
+{
+ if (fpu.preload)
+ __math_state_restore(new);
+}
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387_xstate(void __user *buf);
+extern int restore_i387_xstate(void __user *buf);
+
static inline void __clear_fpu(struct task_struct *tsk)
{
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
+ __thread_fpu_end(tsk);
}
}
+/*
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * pair does nothing at all: the thread must not have fpu (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ */
+static inline bool interrupted_kernel_fpu_idle(void)
+{
+ return !__thread_has_fpu(current) &&
+ (read_cr0() & X86_CR0_TS);
+}
+
+/*
+ * Were we in user mode (or vm86 mode) when we were
+ * interrupted?
+ *
+ * Doing kernel_fpu_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static inline bool interrupted_user_mode(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ return regs && user_mode_vm(regs);
+}
+
+/*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+static inline bool irq_fpu_usable(void)
+{
+ return !in_interrupt() ||
+ interrupted_user_mode() ||
+ interrupted_kernel_fpu_idle();
+}
+
static inline void kernel_fpu_begin(void)
{
- struct thread_info *me = current_thread_info();
+ struct task_struct *me = current;
+
+ WARN_ON_ONCE(!irq_fpu_usable());
preempt_disable();
- if (me->status & TS_USEDFPU)
- __save_init_fpu(me->task);
- else
+ if (__thread_has_fpu(me)) {
+ __save_init_fpu(me);
+ __thread_clear_has_fpu(me);
+ /* We do 'stts()' in kernel_fpu_end() */
+ } else
clts();
}
@@ -323,14 +461,6 @@ static inline void kernel_fpu_end(void)
preempt_enable();
}
-static inline bool irq_fpu_usable(void)
-{
- struct pt_regs *regs;
-
- return !in_interrupt() || !(regs = get_irq_regs()) || \
- user_mode(regs) || (read_cr0() & X86_CR0_TS);
-}
-
/*
* Some instructions like VIA's padlock instructions generate a spurious
* DNA fault but don't modify SSE registers. And these instructions
@@ -363,20 +493,64 @@ static inline void irq_ts_restore(int TS_state)
}
/*
+ * The question "does this thread have fpu access?"
+ * is slightly racy, since preemption could come in
+ * and revoke it immediately after the test.
+ *
+ * However, even in that very unlikely scenario,
+ * we can just assume we have FPU access - typically
+ * to save the FP state - we'll just take a #NM
+ * fault and get the FPU access back.
+ *
+ * The actual user_fpu_begin/end() functions
+ * need to be preemption-safe, though.
+ *
+ * NOTE! user_fpu_end() must be used only after you
+ * have saved the FP state, and user_fpu_begin() must
+ * be used only immediately before restoring it.
+ * These functions do not do any save/restore on
+ * their own.
+ */
+static inline int user_has_fpu(void)
+{
+ return __thread_has_fpu(current);
+}
+
+static inline void user_fpu_end(void)
+{
+ preempt_disable();
+ __thread_fpu_end(current);
+ preempt_enable();
+}
+
+static inline void user_fpu_begin(void)
+{
+ preempt_disable();
+ if (!user_has_fpu())
+ __thread_fpu_begin(current);
+ preempt_enable();
+}
+
+/*
* These disable preemption on their own and are safe
*/
static inline void save_init_fpu(struct task_struct *tsk)
{
+ WARN_ON_ONCE(!__thread_has_fpu(tsk));
preempt_disable();
__save_init_fpu(tsk);
- stts();
+ __thread_fpu_end(tsk);
preempt_enable();
}
static inline void unlazy_fpu(struct task_struct *tsk)
{
preempt_disable();
- __unlazy_fpu(tsk);
+ if (__thread_has_fpu(tsk)) {
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ } else
+ tsk->fpu_counter = 0;
preempt_enable();
}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2193715..5d9c61d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -454,6 +454,7 @@ struct thread_struct {
unsigned long trap_no;
unsigned long error_code;
/* floating point and extended processor state */
+ unsigned long has_fpu;
struct fpu fpu;
#ifdef CONFIG_X86_32
/* Virtual 86 mode info */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1f2e61e..278d3d5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -242,8 +242,6 @@ static inline struct thread_info *current_thread_info(void)
* ever touches our thread-synchronous status, so we don't
* have to worry about atomic accesses.
*/
-#define TS_USEDFPU 0x0001 /* FPU was used by this task
- this quantum (SMP) */
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
#define TS_POLLING 0x0004 /* idle task polling need_resched,
skip sending interrupt */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c105c53..fde4428 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -330,8 +330,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
-static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
- int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
{
static struct amd_l3_cache *__cpuinitdata l3_caches;
int node;
@@ -748,14 +747,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+
+static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
{
- struct _cpuid4_info *this_leaf, *sibling_leaf;
- unsigned long num_threads_sharing;
- int index_msb, i, sibling;
+ struct _cpuid4_info *this_leaf;
+ int ret, i, sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
+ ret = 0;
+ if (index == 3) {
+ ret = 1;
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
@@ -766,8 +767,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
- return;
+ } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
+ ret = 1;
+ for_each_cpu(i, cpu_sibling_mask(cpu)) {
+ if (!per_cpu(ici_cpuid4_info, i))
+ continue;
+ this_leaf = CPUID4_INFO_IDX(i, index);
+ for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+ if (!cpu_online(sibling))
+ continue;
+ set_bit(sibling, this_leaf->shared_cpu_map);
+ }
+ }
}
+
+ return ret;
+}
+
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+ struct _cpuid4_info *this_leaf, *sibling_leaf;
+ unsigned long num_threads_sharing;
+ int index_msb, i;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (cache_shared_amd_cpu_map_setup(cpu, index))
+ return;
+ }
+
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc5..fcdb1b3 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -293,22 +293,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
- bool preload_fpu;
+ fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- __unlazy_fpu(prev_p);
-
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
+ fpu = switch_fpu_prepare(prev_p, next_p);
/*
* Reload esp0.
@@ -348,11 +337,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
- /* If we're going to preload the fpu context, make sure clts
- is run while we're batching the cpu state updates. */
- if (preload_fpu)
- clts();
-
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -362,15 +346,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
- if (preload_fpu)
- __math_state_restore();
-
/*
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
+ switch_fpu_finish(next_p, fpu);
+
percpu_write(current_task, next_p);
return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63c8aed..eeb5004 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -363,18 +363,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
- bool preload_fpu;
+ fpu_switch_t fpu;
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
+ fpu = switch_fpu_prepare(prev_p, next_p);
/*
* Reload esp0, LDT and the page table pointer:
@@ -404,13 +395,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
- /* Must be after DS reload */
- __unlazy_fpu(prev_p);
-
- /* Make sure cpu is ready for new context */
- if (preload_fpu)
- clts();
-
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -451,6 +435,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
+ switch_fpu_finish(next_p, fpu);
+
/*
* Switch the PDA and FPU contexts.
*/
@@ -469,13 +455,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
- /*
- * Preload the FPU context, now that we've determined that the
- * task is likely to be using it.
- */
- if (preload_fpu)
- __math_state_restore();
-
return prev_p;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b6716..1b26e01 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -717,25 +717,34 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
}
/*
- * __math_state_restore assumes that cr0.TS is already clear and the
- * fpu state is all ready for use. Used during context switch.
+ * This gets called with the process already owning the
+ * FPU state, and with CR0.TS cleared. It just needs to
+ * restore the FPU register state.
*/
-void __math_state_restore(void)
+void __math_state_restore(struct task_struct *tsk)
{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
+ /* We need a safe address that is cheap to find and that is already
+ in L1. We've just brought in "tsk->thread.has_fpu", so use that */
+#define safe_address (tsk->thread.has_fpu)
+
+ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. safe_address is a random variable that should be in L1 */
+ alternative_input(
+ ASM_NOP8 ASM_NOP2,
+ "emms\n\t" /* clear stack tags */
+ "fildl %P[addr]", /* set F?P to defined value */
+ X86_FEATURE_FXSAVE_LEAK,
+ [addr] "m" (safe_address));
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
- stts();
+ __thread_fpu_end(tsk);
force_sig(SIGSEGV, tsk);
return;
}
-
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
- tsk->fpu_counter++;
}
/*
@@ -745,13 +754,12 @@ void __math_state_restore(void)
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
* Don't touch unless you *really* know how it works.
*
- * Must be called with kernel preemption disabled (in this case,
- * local interrupts are disabled at the call-site in entry.S).
+ * Must be called with kernel preemption disabled (eg with local
+ * local interrupts as in the case of do_device_not_available).
*/
-asmlinkage void math_state_restore(void)
+void math_state_restore(void)
{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
if (!tsk_used_math(tsk)) {
local_irq_enable();
@@ -768,9 +776,10 @@ asmlinkage void math_state_restore(void)
local_irq_disable();
}
- clts(); /* Allow maths ops (or we recurse) */
+ __thread_fpu_begin(tsk);
+ __math_state_restore(tsk);
- __math_state_restore();
+ tsk->fpu_counter++;
}
EXPORT_SYMBOL_GPL(math_state_restore);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a391134..7110911 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
if (!fx)
return;
- BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU);
+ BUG_ON(__thread_has_fpu(tsk));
xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
@@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf)
if (!used_math())
return 0;
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ if (user_has_fpu()) {
if (use_xsave())
err = xsave_user(buf);
else
@@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf)
if (err)
return err;
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
+ user_fpu_end();
} else {
sanitize_i387_state(tsk);
if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
@@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf)
return err;
}
- if (!(task_thread_info(current)->status & TS_USEDFPU)) {
- clts();
- task_thread_info(current)->status |= TS_USEDFPU;
- }
+ user_fpu_begin();
if (use_xsave())
err = restore_user_xstate(buf);
else
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d48ec60..2ad060a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -948,7 +948,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
- if (current_thread_info()->status & TS_USEDFPU)
+ if (__thread_has_fpu(current))
clts();
load_gdt(&__get_cpu_var(host_gdt));
}