aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/amd_nb.h2
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/i387.h284
-rw-r--r--arch/x86/include/asm/kvm_emulate.h16
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/timer.h19
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h1
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h39
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c24
-rw-r--r--arch/x86/kernel/amd_nb.c31
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c20
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c14
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c44
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/entry_32.S17
-rw-r--r--arch/x86/kernel/hpet.c21
-rw-r--r--arch/x86/kernel/kgdb.c60
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/microcode_amd.c24
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/process_32.c25
-rw-r--r--arch/x86/kernel/process_64.c29
-rw-r--r--arch/x86/kernel/reboot.c13
-rw-r--r--arch/x86/kernel/tls.c4
-rw-r--r--arch/x86/kernel/traps.c41
-rw-r--r--arch/x86/kernel/tsc.c13
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/xsave.c12
-rw-r--r--arch/x86/kvm/emulate.c51
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c23
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/mm/gup.c12
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/srat.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c50
-rw-r--r--arch/x86/oprofile/init.c7
-rw-r--r--arch/x86/pci/Makefile3
-rw-r--r--arch/x86/pci/acpi.c28
-rw-r--r--arch/x86/pci/amd_bus.c42
-rw-r--r--arch/x86/pci/xen.c2
-rw-r--r--arch/x86/platform/mrst/mrst.c24
-rw-r--r--arch/x86/platform/uv/tlb_uv.c30
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/setup.c18
49 files changed, 798 insertions, 295 deletions
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 67f87f2..78a1eff 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_AMD_NB_H
#define _ASM_X86_AMD_NB_H
+#include <linux/ioport.h>
#include <linux/pci.h>
struct amd_nb_bus_dev_range {
@@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
extern bool early_is_amd_nb(u32 value);
+extern struct resource *amd_get_mmconfig_range(struct resource *res);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern int amd_numa_init(void);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 4a0b7c7..244ac77 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -495,7 +495,7 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert)
return;
}
-extern struct apic *generic_bigsmp_probe(void);
+extern void generic_bigsmp_probe(void);
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index c9e09ea..a850b4d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,8 +29,8 @@ extern unsigned int sig_xstate_size;
extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child);
-extern asmlinkage void math_state_restore(void);
-extern void __math_state_restore(void);
+extern void __math_state_restore(struct task_struct *);
+extern void math_state_restore(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
extern user_regset_active_fn fpregs_active, xfpregs_active;
@@ -212,19 +212,11 @@ static inline void fpu_fxsave(struct fpu *fpu)
#endif /* CONFIG_X86_64 */
-/* We need a safe address that is cheap to find and that is already
- in L1 during context switch. The best choices are unfortunately
- different for UP and SMP */
-#ifdef CONFIG_SMP
-#define safe_address (__per_cpu_offset[0])
-#else
-#define safe_address (kstat_cpu(0).cpustat.user)
-#endif
-
/*
- * These must be called with preempt disabled
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact.
*/
-static inline void fpu_save_init(struct fpu *fpu)
+static inline int fpu_save_init(struct fpu *fpu)
{
if (use_xsave()) {
fpu_xsave(fpu);
@@ -233,33 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
* xsave header may indicate the init state of the FP.
*/
if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
- return;
+ return 1;
} else if (use_fxsr()) {
fpu_fxsave(fpu);
} else {
asm volatile("fnsave %[fx]; fwait"
: [fx] "=m" (fpu->state->fsave));
- return;
+ return 0;
}
- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
+ /*
+ * If exceptions are pending, we need to clear them so
+ * that we don't randomly get exceptions later.
+ *
+ * FIXME! Is this perhaps only true for the old-style
+ * irq13 case? Maybe we could leave the x87 state
+ * intact otherwise?
+ */
+ if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
asm volatile("fnclex");
-
- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
- is pending. Clear the x87 state here by setting it to fixed
- values. safe_address is a random variable that should be in L1 */
- alternative_input(
- ASM_NOP8 ASM_NOP2,
- "emms\n\t" /* clear stack tags */
- "fildl %P[addr]", /* set F?P to defined value */
- X86_FEATURE_FXSAVE_LEAK,
- [addr] "m" (safe_address));
+ return 0;
+ }
+ return 1;
}
-static inline void __save_init_fpu(struct task_struct *tsk)
+static inline int __save_init_fpu(struct task_struct *tsk)
{
- fpu_save_init(&tsk->thread.fpu);
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ return fpu_save_init(&tsk->thread.fpu);
}
static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -281,39 +273,185 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
}
/*
- * Signal frame handlers...
+ * Software FPU state helpers. Careful: these need to
+ * be preemption protection *and* they need to be
+ * properly paired with the CR0.TS changes!
*/
-extern int save_i387_xstate(void __user *buf);
-extern int restore_i387_xstate(void __user *buf);
+static inline int __thread_has_fpu(struct task_struct *tsk)
+{
+ return tsk->thread.has_fpu;
+}
-static inline void __unlazy_fpu(struct task_struct *tsk)
+/* Must be paired with an 'stts' after! */
+static inline void __thread_clear_has_fpu(struct task_struct *tsk)
{
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
- __save_init_fpu(tsk);
- stts();
- } else
- tsk->fpu_counter = 0;
+ tsk->thread.has_fpu = 0;
+}
+
+/* Must be paired with a 'clts' before! */
+static inline void __thread_set_has_fpu(struct task_struct *tsk)
+{
+ tsk->thread.has_fpu = 1;
}
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void __thread_fpu_end(struct task_struct *tsk)
+{
+ __thread_clear_has_fpu(tsk);
+ stts();
+}
+
+static inline void __thread_fpu_begin(struct task_struct *tsk)
+{
+ clts();
+ __thread_set_has_fpu(tsk);
+}
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ * - switch_fpu_prepare() saves the old state and
+ * sets the new state of the CR0.TS bit. This is
+ * done within the context of the old process.
+ *
+ * - switch_fpu_finish() restores the new state as
+ * necessary.
+ */
+typedef struct { int preload; } fpu_switch_t;
+
+/*
+ * FIXME! We could do a totally lazy restore, but we need to
+ * add a per-cpu "this was the task that last touched the FPU
+ * on this CPU" variable, and the task needs to have a "I last
+ * touched the FPU on this CPU" and check them.
+ *
+ * We don't do that yet, so "fpu_lazy_restore()" always returns
+ * false, but some day..
+ */
+#define fpu_lazy_restore(tsk) (0)
+#define fpu_lazy_state_intact(tsk) do { } while (0)
+
+static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
+{
+ fpu_switch_t fpu;
+
+ fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+ if (__thread_has_fpu(old)) {
+ if (__save_init_fpu(old))
+ fpu_lazy_state_intact(old);
+ __thread_clear_has_fpu(old);
+ old->fpu_counter++;
+
+ /* Don't change CR0.TS if we just switch! */
+ if (fpu.preload) {
+ __thread_set_has_fpu(new);
+ prefetch(new->thread.fpu.state);
+ } else
+ stts();
+ } else {
+ old->fpu_counter = 0;
+ if (fpu.preload) {
+ if (fpu_lazy_restore(new))
+ fpu.preload = 0;
+ else
+ prefetch(new->thread.fpu.state);
+ __thread_fpu_begin(new);
+ }
+ }
+ return fpu;
+}
+
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
+{
+ if (fpu.preload)
+ __math_state_restore(new);
+}
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387_xstate(void __user *buf);
+extern int restore_i387_xstate(void __user *buf);
+
static inline void __clear_fpu(struct task_struct *tsk)
{
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
+ __thread_fpu_end(tsk);
}
}
+/*
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * pair does nothing at all: the thread must not have fpu (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ */
+static inline bool interrupted_kernel_fpu_idle(void)
+{
+ return !__thread_has_fpu(current) &&
+ (read_cr0() & X86_CR0_TS);
+}
+
+/*
+ * Were we in user mode (or vm86 mode) when we were
+ * interrupted?
+ *
+ * Doing kernel_fpu_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static inline bool interrupted_user_mode(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ return regs && user_mode_vm(regs);
+}
+
+/*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+static inline bool irq_fpu_usable(void)
+{
+ return !in_interrupt() ||
+ interrupted_user_mode() ||
+ interrupted_kernel_fpu_idle();
+}
+
static inline void kernel_fpu_begin(void)
{
- struct thread_info *me = current_thread_info();
+ struct task_struct *me = current;
+
+ WARN_ON_ONCE(!irq_fpu_usable());
preempt_disable();
- if (me->status & TS_USEDFPU)
- __save_init_fpu(me->task);
- else
+ if (__thread_has_fpu(me)) {
+ __save_init_fpu(me);
+ __thread_clear_has_fpu(me);
+ /* We do 'stts()' in kernel_fpu_end() */
+ } else
clts();
}
@@ -323,14 +461,6 @@ static inline void kernel_fpu_end(void)
preempt_enable();
}
-static inline bool irq_fpu_usable(void)
-{
- struct pt_regs *regs;
-
- return !in_interrupt() || !(regs = get_irq_regs()) || \
- user_mode(regs) || (read_cr0() & X86_CR0_TS);
-}
-
/*
* Some instructions like VIA's padlock instructions generate a spurious
* DNA fault but don't modify SSE registers. And these instructions
@@ -363,20 +493,64 @@ static inline void irq_ts_restore(int TS_state)
}
/*
+ * The question "does this thread have fpu access?"
+ * is slightly racy, since preemption could come in
+ * and revoke it immediately after the test.
+ *
+ * However, even in that very unlikely scenario,
+ * we can just assume we have FPU access - typically
+ * to save the FP state - we'll just take a #NM
+ * fault and get the FPU access back.
+ *
+ * The actual user_fpu_begin/end() functions
+ * need to be preemption-safe, though.
+ *
+ * NOTE! user_fpu_end() must be used only after you
+ * have saved the FP state, and user_fpu_begin() must
+ * be used only immediately before restoring it.
+ * These functions do not do any save/restore on
+ * their own.
+ */
+static inline int user_has_fpu(void)
+{
+ return __thread_has_fpu(current);
+}
+
+static inline void user_fpu_end(void)
+{
+ preempt_disable();
+ __thread_fpu_end(current);
+ preempt_enable();
+}
+
+static inline void user_fpu_begin(void)
+{
+ preempt_disable();
+ if (!user_has_fpu())
+ __thread_fpu_begin(current);
+ preempt_enable();
+}
+
+/*
* These disable preemption on their own and are safe
*/
static inline void save_init_fpu(struct task_struct *tsk)
{
+ WARN_ON_ONCE(!__thread_has_fpu(tsk));
preempt_disable();
__save_init_fpu(tsk);
- stts();
+ __thread_fpu_end(tsk);
preempt_enable();
}
static inline void unlazy_fpu(struct task_struct *tsk)
{
preempt_disable();
- __unlazy_fpu(tsk);
+ if (__thread_has_fpu(tsk)) {
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ } else
+ tsk->fpu_counter = 0;
preempt_enable();
}
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0049211..0ab6a4d 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -189,6 +189,9 @@ struct x86_emulate_ops {
int (*intercept)(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
+
+ bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -298,6 +301,19 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
X86EMUL_MODE_PROT64)
+/* CPUID vendors */
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
+
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
+
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
+
enum x86_intercept_stage {
X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */
X86_ICPT_PRE_EXCEPT,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2193715..5d9c61d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -454,6 +454,7 @@ struct thread_struct {
unsigned long trap_no;
unsigned long error_code;
/* floating point and extended processor state */
+ unsigned long has_fpu;
struct fpu fpu;
#ifdef CONFIG_X86_32
/* Virtual 86 mode info */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1f2e61e..278d3d5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -242,8 +242,6 @@ static inline struct thread_info *current_thread_info(void)
* ever touches our thread-synchronous status, so we don't
* have to worry about atomic accesses.
*/
-#define TS_USEDFPU 0x0001 /* FPU was used by this task
- this quantum (SMP) */
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
#define TS_POLLING 0x0004 /* idle task polling need_resched,
skip sending interrupt */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index fa7b917..34baa0e 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -32,6 +32,22 @@ extern int no_timer_check;
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ *
+ * In:
+ *
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * Although we may still have enough bits to store the value of ns,
+ * in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
+ * leading to an incorrect result.
+ *
+ * To avoid this, we can decompose 'cycles' into quotient and remainder
+ * of division by SC. Then,
+ *
+ * ns = (quot * SC + rem) * cyc2ns_scale / SC
+ * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
+ *
+ * - sqazi@google.com
*/
DECLARE_PER_CPU(unsigned long, cyc2ns);
@@ -43,7 +59,8 @@ static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
int cpu = smp_processor_id();
unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
- ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR;
+ ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
+ (1UL << CYC2NS_SCALE_FACTOR));
return ns;
}
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index a291c40..5d62d65 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -55,6 +55,7 @@
#define UV_BAU_TUNABLES_DIR "sgi_uv"
#define UV_BAU_TUNABLES_FILE "bau_tunables"
#define WHITESPACE " \t\n"
+#define uv_mmask ((1UL << uv_hub_info->m_val) - 1)
#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
#define cpubit_isset(cpu, bau_local_cpumask) \
test_bit((cpu), (bau_local_cpumask).bits)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index f26544a..21f7385 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -46,6 +46,13 @@
* PNODE - the low N bits of the GNODE. The PNODE is the most useful variant
* of the nasid for socket usage.
*
+ * GPA - (global physical address) a socket physical address converted
+ * so that it can be used by the GRU as a global address. Socket
+ * physical addresses 1) need additional NASID (node) bits added
+ * to the high end of the address, and 2) unaliased if the
+ * partition does not have a physical address 0. In addition, on
+ * UV2 rev 1, GPAs need the gnode left shifted to bits 39 or 40.
+ *
*
* NumaLink Global Physical Address Format:
* +--------------------------------+---------------------+
@@ -141,6 +148,8 @@ struct uv_hub_info_s {
unsigned int gnode_extra;
unsigned char hub_revision;
unsigned char apic_pnode_shift;
+ unsigned char m_shift;
+ unsigned char n_lshift;
unsigned long gnode_upper;
unsigned long lowmem_remap_top;
unsigned long lowmem_remap_base;
@@ -177,6 +186,16 @@ static inline int is_uv2_hub(void)
return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
}
+static inline int is_uv2_1_hub(void)
+{
+ return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE;
+}
+
+static inline int is_uv2_2_hub(void)
+{
+ return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE + 1;
+}
+
union uvh_apicid {
unsigned long v;
struct uvh_apicid_s {
@@ -276,7 +295,10 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
{
if (paddr < uv_hub_info->lowmem_remap_top)
paddr |= uv_hub_info->lowmem_remap_base;
- return paddr | uv_hub_info->gnode_upper;
+ paddr |= uv_hub_info->gnode_upper;
+ paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
+ ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift);
+ return paddr;
}
@@ -296,20 +318,23 @@ uv_gpa_in_mmr_space(unsigned long gpa)
/* UV global physical address --> socket phys RAM */
static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
{
- unsigned long paddr = gpa & uv_hub_info->gpa_mask;
+ unsigned long paddr;
unsigned long remap_base = uv_hub_info->lowmem_remap_base;
unsigned long remap_top = uv_hub_info->lowmem_remap_top;
+ gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
+ ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
+ paddr = gpa & uv_hub_info->gpa_mask;
if (paddr >= remap_base && paddr < remap_base + remap_top)
paddr -= remap_base;
return paddr;
}
-/* gnode -> pnode */
+/* gpa -> pnode */
static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
{
- return gpa >> uv_hub_info->m_val;
+ return gpa >> uv_hub_info->n_lshift;
}
/* gpa -> pnode */
@@ -320,6 +345,12 @@ static inline int uv_gpa_to_pnode(unsigned long gpa)
return uv_gpa_to_gnode(gpa) & n_mask;
}
+/* gpa -> node offset*/
+static inline unsigned long uv_gpa_to_offset(unsigned long gpa)
+{
+ return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift;
+}
+
/* pnode, offset --> socket virtual */
static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
{
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index d3d9d50..bfd75ff 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1203,7 +1203,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
if (!pte || !IOMMU_PTE_PRESENT(*pte))
continue;
- dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+ dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, 1);
}
update_domain(&dma_dom->domain);
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index bfc8453..33df6e8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1031,8 +1031,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
{
int r;
- if (pci_enable_msi(iommu->dev))
- return 1;
+ r = pci_enable_msi(iommu->dev);
+ if (r)
+ return r;
r = request_threaded_irq(iommu->dev->irq,
amd_iommu_int_handler,
@@ -1042,24 +1043,33 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
if (r) {
pci_disable_msi(iommu->dev);
- return 1;
+ return r;
}
iommu->int_enabled = true;
- iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
return 0;
}
static int iommu_init_msi(struct amd_iommu *iommu)
{
+ int ret;
+
if (iommu->int_enabled)
- return 0;
+ goto enable_faults;
if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
- return iommu_setup_msi(iommu);
+ ret = iommu_setup_msi(iommu);
+ else
+ ret = -ENODEV;
- return 1;
+ if (ret)
+ return ret;
+
+enable_faults:
+ iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
+ return 0;
}
/****************************************************************************
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4c39baa..bae1efe 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -119,6 +119,37 @@ bool __init early_is_amd_nb(u32 device)
return false;
}
+struct resource *amd_get_mmconfig_range(struct resource *res)
+{
+ u32 address;
+ u64 base, msr;
+ unsigned segn_busn_bits;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return NULL;
+
+ /* assume all cpus from fam10h have mmconfig */
+ if (boot_cpu_data.x86 < 0x10)
+ return NULL;
+
+ address = MSR_FAM10H_MMIO_CONF_BASE;
+ rdmsrl(address, msr);
+
+ /* mmconfig is not enabled */
+ if (!(msr & FAM10H_MMIO_CONF_ENABLE))
+ return NULL;
+
+ base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
+
+ segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
+ FAM10H_MMIO_CONF_BUSRANGE_MASK;
+
+ res->flags = IORESOURCE_MEM;
+ res->start = base;
+ res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
+ return res;
+}
+
int amd_get_subcaches(int cpu)
{
struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index efd737e..521bead 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -255,12 +255,24 @@ static struct apic apic_bigsmp = {
.x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
};
-struct apic * __init generic_bigsmp_probe(void)
+void __init generic_bigsmp_probe(void)
{
- if (probe_bigsmp())
- return &apic_bigsmp;
+ unsigned int cpu;
- return NULL;
+ if (!probe_bigsmp())
+ return;
+
+ apic = &apic_bigsmp;
+
+ for_each_possible_cpu(cpu) {
+ if (early_per_cpu(x86_cpu_to_logical_apicid,
+ cpu) == BAD_APICID)
+ continue;
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ bigsmp_early_logical_apicid(cpu);
+ }
+
+ pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name);
}
apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index b5254ad..0787bb3 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -200,14 +200,8 @@ void __init default_setup_apic_routing(void)
* - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
*/
- if (!cmdline_apic && apic == &apic_default) {
- struct apic *bigsmp = generic_bigsmp_probe();
- if (bigsmp) {
- apic = bigsmp;
- printk(KERN_INFO "Overriding APIC driver with %s\n",
- apic->name);
- }
- }
+ if (!cmdline_apic && apic == &apic_default)
+ generic_bigsmp_probe();
#endif
if (apic->setup_apic_routing)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 34b1859..874c208 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -779,7 +779,12 @@ void __init uv_system_init(void)
for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
uv_possible_blades +=
hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
- printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+ /* uv_num_possible_blades() is really the hub count */
+ printk(KERN_INFO "UV: Found %d blades, %d hubs\n",
+ is_uv1_hub() ? uv_num_possible_blades() :
+ (uv_num_possible_blades() + 1) / 2,
+ uv_num_possible_blades());
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
uv_blade_info = kzalloc(bytes, GFP_KERNEL);
@@ -832,6 +837,10 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
+ uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
+ uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ?
+ (m_val == 40 ? 40 : 39) : m_val;
+
pnode = uv_apicid_to_pnode(apicid);
blade = boot_pnode_to_blade(pnode);
lcpu = uv_blade_info[blade].nr_possible_cpus;
@@ -862,8 +871,7 @@ void __init uv_system_init(void)
if (uv_node_to_blade[nid] >= 0)
continue;
paddr = node_start_pfn(nid) << PAGE_SHIFT;
- paddr = uv_soc_phys_ram_to_gpa(paddr);
- pnode = (paddr >> m_val) & pnode_mask;
+ pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
blade = boot_pnode_to_blade(pnode);
uv_node_to_blade[nid] = blade;
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c105c53..fde4428 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -330,8 +330,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
-static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
- int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
{
static struct amd_l3_cache *__cpuinitdata l3_caches;
int node;
@@ -748,14 +747,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+
+static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
{
- struct _cpuid4_info *this_leaf, *sibling_leaf;
- unsigned long num_threads_sharing;
- int index_msb, i, sibling;
+ struct _cpuid4_info *this_leaf;
+ int ret, i, sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
+ ret = 0;
+ if (index == 3) {
+ ret = 1;
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
@@ -766,8 +767,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
- return;
+ } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
+ ret = 1;
+ for_each_cpu(i, cpu_sibling_mask(cpu)) {
+ if (!per_cpu(ici_cpuid4_info, i))
+ continue;
+ this_leaf = CPUID4_INFO_IDX(i, index);
+ for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+ if (!cpu_online(sibling))
+ continue;
+ set_bit(sibling, this_leaf->shared_cpu_map);
+ }
+ }
}
+
+ return ret;
+}
+
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+ struct _cpuid4_info *this_leaf, *sibling_leaf;
+ unsigned long num_threads_sharing;
+ int index_msb, i;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (cache_shared_amd_cpu_map_setup(cpu, index))
+ return;
+ }
+
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index bab491b..d812fe2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -508,6 +508,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
unsigned long from = cpuc->lbr_entries[0].from;
unsigned long old_to, to = cpuc->lbr_entries[0].to;
unsigned long ip = regs->ip;
+ int is_64bit = 0;
/*
* We don't need to fixup if the PEBS assist is fault like
@@ -559,7 +560,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
} else
kaddr = (void *)to;
- kernel_insn_init(&insn, kaddr);
+#ifdef CONFIG_X86_64
+ is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
+#endif
+ insn_init(&insn, kaddr, is_64bit);
insn_get_length(&insn);
to += insn.length;
} while (to < ip);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a9197..edb3d46 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -98,12 +98,6 @@
#endif
.endm
-#ifdef CONFIG_VM86
-#define resume_userspace_sig check_userspace
-#else
-#define resume_userspace_sig resume_userspace
-#endif
-
/*
* User gs save/restore
*
@@ -327,10 +321,19 @@ ret_from_exception:
preempt_stop(CLBR_ANY)
ret_from_intr:
GET_THREAD_INFO(%ebp)
-check_userspace:
+resume_userspace_sig:
+#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al
andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+#else
+ /*
+ * We can be coming here from a syscall done in the kernel space,
+ * e.g. a failed kernel_execve().
+ */
+ movl PT_CS(%esp), %eax
+ andl $SEGMENT_RPL_MASK, %eax
+#endif
cmpl $USER_RPL, %eax
jb resume_kernel # not returning to v8086 or userspace
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 6781765..aa083d3 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1054,6 +1054,14 @@ int hpet_rtc_timer_init(void)
}
EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
+static void hpet_disable_rtc_channel(void)
+{
+ unsigned long cfg;
+ cfg = hpet_readl(HPET_T1_CFG);
+ cfg &= ~HPET_TN_ENABLE;
+ hpet_writel(cfg, HPET_T1_CFG);
+}
+
/*
* The functions below are called from rtc driver.
* Return 0 if HPET is not being used.
@@ -1065,6 +1073,9 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
return 0;
hpet_rtc_flags &= ~bit_mask;
+ if (unlikely(!hpet_rtc_flags))
+ hpet_disable_rtc_channel();
+
return 1;
}
EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit);
@@ -1130,15 +1141,11 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
static void hpet_rtc_timer_reinit(void)
{
- unsigned int cfg, delta;
+ unsigned int delta;
int lost_ints = -1;
- if (unlikely(!hpet_rtc_flags)) {
- cfg = hpet_readl(HPET_T1_CFG);
- cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_T1_CFG);
- return;
- }
+ if (unlikely(!hpet_rtc_flags))
+ hpet_disable_rtc_channel();
if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
delta = hpet_default_delta;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5f9ecff..fc1f48d 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,8 @@
#include <linux/smp.h>
#include <linux/nmi.h>
#include <linux/hw_breakpoint.h>
+#include <linux/uaccess.h>
+#include <linux/memory.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
@@ -710,6 +712,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ char opc[BREAK_INSTR_SIZE];
+
+ bpt->type = BP_BREAKPOINT;
+ err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+ err = probe_kernel_write((char *)bpt->bpt_addr,
+ arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+#ifdef CONFIG_DEBUG_RODATA
+ if (!err)
+ return err;
+ /*
+ * It is safe to call text_poke() because normal kernel execution
+ * is stopped on all cores, so long as the text_mutex is not locked.
+ */
+ if (mutex_is_locked(&text_mutex))
+ return -EBUSY;
+ text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
+ err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+ if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
+ return -EINVAL;
+ bpt->type = BP_POKE_BREAKPOINT;
+#endif /* CONFIG_DEBUG_RODATA */
+ return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+#ifdef CONFIG_DEBUG_RODATA
+ int err;
+ char opc[BREAK_INSTR_SIZE];
+
+ if (bpt->type != BP_POKE_BREAKPOINT)
+ goto knl_write;
+ /*
+ * It is safe to call text_poke() because normal kernel execution
+ * is stopped on all cores, so long as the text_mutex is not locked.
+ */
+ if (mutex_is_locked(&text_mutex))
+ goto knl_write;
+ text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
+ err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+ if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
+ goto knl_write;
+ return err;
+knl_write:
+#endif /* CONFIG_DEBUG_RODATA */
+ return probe_kernel_write((char *)bpt->bpt_addr,
+ (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
+}
+
struct kgdb_arch arch_kgdb_ops = {
/* Breakpoint instruction: */
.gdb_bpt_instr = { 0xcc },
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f1a6244..794bc95 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -75,8 +75,10 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
/*
* Undefined/reserved opcodes, conditional jump, Opcode Extension
* Groups, and some special opcodes can not boost.
+ * This is non-const to keep gcc from statically optimizing it out, as
+ * variable_test_bit makes gcc think only *(unsigned long*) is used.
*/
-static const u32 twobyte_is_boostable[256 / 32] = {
+static u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c561038..b727450 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -298,13 +298,33 @@ free_table:
return state;
}
+/*
+ * AMD microcode firmware naming convention, up to family 15h they are in
+ * the legacy file:
+ *
+ * amd-ucode/microcode_amd.bin
+ *
+ * This legacy file is always smaller than 2K in size.
+ *
+ * Starting at family 15h they are in family specific firmware files:
+ *
+ * amd-ucode/microcode_amd_fam15h.bin
+ * amd-ucode/microcode_amd_fam16h.bin
+ * ...
+ *
+ * These might be larger than 2K.
+ */
static enum ucode_state request_microcode_amd(int cpu, struct device *device)
{
- const char *fw_name = "amd-ucode/microcode_amd.bin";
+ char fw_name[36] = "amd-ucode/microcode_amd.bin";
const struct firmware *fw;
enum ucode_state ret = UCODE_NFOUND;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86 >= 0x15)
+ snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
- if (request_firmware(&fw, fw_name, device)) {
+ if (request_firmware(&fw, (const char *)fw_name, device)) {
pr_err("failed to load file %s\n", fw_name);
goto out;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9103b89..0741b062 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
}
#endif
+ set_bit(m->busid, mp_bus_not_pci);
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
- set_bit(m->busid, mp_bus_not_pci);
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc5..fcdb1b3 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -293,22 +293,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
- bool preload_fpu;
+ fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- __unlazy_fpu(prev_p);
-
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
+ fpu = switch_fpu_prepare(prev_p, next_p);
/*
* Reload esp0.
@@ -348,11 +337,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
- /* If we're going to preload the fpu context, make sure clts
- is run while we're batching the cpu state updates. */
- if (preload_fpu)
- clts();
-
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -362,15 +346,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
- if (preload_fpu)
- __math_state_restore();
-
/*
* Restore %gs if needed (which is common)
*/
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
+ switch_fpu_finish(next_p, fpu);
+
percpu_write(current_task, next_p);
return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63c8aed..eeb5004 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -363,18 +363,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
- bool preload_fpu;
+ fpu_switch_t fpu;
- /*
- * If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- */
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- /* we're going to use this soon, after a few expensive things */
- if (preload_fpu)
- prefetch(next->fpu.state);
+ fpu = switch_fpu_prepare(prev_p, next_p);
/*
* Reload esp0, LDT and the page table pointer:
@@ -404,13 +395,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
- /* Must be after DS reload */
- __unlazy_fpu(prev_p);
-
- /* Make sure cpu is ready for new context */
- if (preload_fpu)
- clts();
-
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -451,6 +435,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
+ switch_fpu_finish(next_p, fpu);
+
/*
* Switch the PDA and FPU contexts.
*/
@@ -469,13 +455,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
- /*
- * Preload the FPU context, now that we've determined that the
- * task is likely to be using it.
- */
- if (preload_fpu)
- __math_state_restore();
-
return prev_p;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9242436..d4a705f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -124,7 +124,7 @@ __setup("reboot=", reboot_setup);
*/
/*
- * Some machines require the "reboot=b" commandline option,
+ * Some machines require the "reboot=b" or "reboot=k" commandline options,
* this quirk makes that automatic.
*/
static int __init set_bios_reboot(const struct dmi_system_id *d)
@@ -136,6 +136,15 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
return 0;
}
+static int __init set_kbd_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_KBD) {
+ reboot_type = BOOT_KBD;
+ printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident);
+ }
+ return 0;
+}
+
static struct dmi_system_id __initdata reboot_dmi_table[] = {
{ /* Handle problems with rebooting on Dell E520's */
.callback = set_bios_reboot,
@@ -295,7 +304,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
},
{ /* Handle reboot issue on Acer Aspire one */
- .callback = set_bios_reboot,
+ .callback = set_kbd_reboot,
.ident = "Acer Aspire One A110",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 6bb7b85..bcfec2d 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -163,7 +163,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
{
const struct desc_struct *tls;
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
(pos % sizeof(struct user_desc)) != 0 ||
(count % sizeof(struct user_desc)) != 0)
return -EINVAL;
@@ -198,7 +198,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
const struct user_desc *info;
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
(pos % sizeof(struct user_desc)) != 0 ||
(count % sizeof(struct user_desc)) != 0)
return -EINVAL;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b6716..1b26e01 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -717,25 +717,34 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
}
/*
- * __math_state_restore assumes that cr0.TS is already clear and the
- * fpu state is all ready for use. Used during context switch.
+ * This gets called with the process already owning the
+ * FPU state, and with CR0.TS cleared. It just needs to
+ * restore the FPU register state.
*/
-void __math_state_restore(void)
+void __math_state_restore(struct task_struct *tsk)
{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
+ /* We need a safe address that is cheap to find and that is already
+ in L1. We've just brought in "tsk->thread.has_fpu", so use that */
+#define safe_address (tsk->thread.has_fpu)
+
+ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. safe_address is a random variable that should be in L1 */
+ alternative_input(
+ ASM_NOP8 ASM_NOP2,
+ "emms\n\t" /* clear stack tags */
+ "fildl %P[addr]", /* set F?P to defined value */
+ X86_FEATURE_FXSAVE_LEAK,
+ [addr] "m" (safe_address));
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
if (unlikely(restore_fpu_checking(tsk))) {
- stts();
+ __thread_fpu_end(tsk);
force_sig(SIGSEGV, tsk);
return;
}
-
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
- tsk->fpu_counter++;
}
/*
@@ -745,13 +754,12 @@ void __math_state_restore(void)
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
* Don't touch unless you *really* know how it works.
*
- * Must be called with kernel preemption disabled (in this case,
- * local interrupts are disabled at the call-site in entry.S).
+ * Must be called with kernel preemption disabled (eg with local
+ * local interrupts as in the case of do_device_not_available).
*/
-asmlinkage void math_state_restore(void)
+void math_state_restore(void)
{
- struct thread_info *thread = current_thread_info();
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
if (!tsk_used_math(tsk)) {
local_irq_enable();
@@ -768,9 +776,10 @@ asmlinkage void math_state_restore(void)
local_irq_disable();
}
- clts(); /* Allow maths ops (or we recurse) */
+ __thread_fpu_begin(tsk);
+ __math_state_restore(tsk);
- __math_state_restore();
+ tsk->fpu_counter++;
}
EXPORT_SYMBOL_GPL(math_state_restore);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6cc6922..4406c03 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
if (cpu_khz) {
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
- *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+ *offset = ns_now - mult_frac(tsc_now, *scale,
+ (1UL << CYC2NS_SCALE_FACTOR));
}
sched_clock_idle_wakeup_event(0);
@@ -956,6 +957,16 @@ static int __init init_tsc_clocksource(void)
clocksource_tsc.rating = 0;
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
}
+
+ /*
+ * Trust the results of the earlier calibration on systems
+ * exporting a reliable TSC.
+ */
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+ clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ return 0;
+ }
+
schedule_delayed_work(&tsc_irqwork, 0);
return 0;
}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f875..04b8726 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
spinlock_t *ptl;
int i;
+ down_write(&mm->mmap_sem);
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
}
pte_unmap_unlock(pte, ptl);
out:
+ up_write(&mm->mmap_sem);
flush_tlb();
}
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a391134..7110911 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
if (!fx)
return;
- BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU);
+ BUG_ON(__thread_has_fpu(tsk));
xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
@@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf)
if (!used_math())
return 0;
- if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ if (user_has_fpu()) {
if (use_xsave())
err = xsave_user(buf);
else
@@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf)
if (err)
return err;
- task_thread_info(tsk)->status &= ~TS_USEDFPU;
- stts();
+ user_fpu_end();
} else {
sanitize_i387_state(tsk);
if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
@@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf)
return err;
}
- if (!(task_thread_info(current)->status & TS_USEDFPU)) {
- clts();
- task_thread_info(current)->status |= TS_USEDFPU;
- }
+ user_fpu_begin();
if (use_xsave())
err = restore_user_xstate(buf);
else
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index adc9867..3e7d913 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1901,6 +1901,51 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
ss->p = 1;
}
+static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
+{
+ struct x86_emulate_ops *ops = ctxt->ops;
+ u32 eax, ebx, ecx, edx;
+
+ /*
+ * syscall should always be enabled in longmode - so only become
+ * vendor specific (cpuid) if other modes are active...
+ */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ return true;
+
+ eax = 0x00000000;
+ ecx = 0x00000000;
+ if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) {
+ /*
+ * Intel ("GenuineIntel")
+ * remark: Intel CPUs only support "syscall" in 64bit
+ * longmode. Also an 64bit guest with a
+ * 32bit compat-app running will #UD !! While this
+ * behaviour can be fixed (by emulating) into AMD
+ * response - CPUs of AMD can't behave like Intel.
+ */
+ if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
+ return false;
+
+ /* AMD ("AuthenticAMD") */
+ if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+ return true;
+
+ /* AMD ("AMDisbetter!") */
+ if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
+ return true;
+ }
+
+ /* default: (not Intel, not AMD), apply Intel's stricter rules... */
+ return false;
+}
+
static int
emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@@ -1915,9 +1960,15 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
ctxt->mode == X86EMUL_MODE_VM86)
return emulate_ud(ctxt);
+ if (!(em_syscall_is_enabled(ctxt)))
+ return emulate_ud(ctxt);
+
ops->get_msr(ctxt, MSR_EFER, &efer);
setup_syscalls_segments(ctxt, ops, &cs, &ss);
+ if (!(efer & EFER_SCE))
+ return emulate_ud(ctxt);
+
ops->get_msr(ctxt, MSR_STAR, &msr_data);
msr_data >>= 32;
cs_sel = (u16)(msr_data & 0xfffc);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d48ec60..2ad060a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -948,7 +948,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
- if (current_thread_info()->status & TS_USEDFPU)
+ if (__thread_has_fpu(current))
clts();
load_gdt(&__get_cpu_var(host_gdt));
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 77c9d86..fbb0936 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4407,6 +4407,28 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
}
+static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+ struct kvm_cpuid_entry2 *cpuid = NULL;
+
+ if (eax && ecx)
+ cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt),
+ *eax, *ecx);
+
+ if (cpuid) {
+ *eax = cpuid->eax;
+ *ecx = cpuid->ecx;
+ if (ebx)
+ *ebx = cpuid->ebx;
+ if (edx)
+ *edx = cpuid->edx;
+ return true;
+ }
+
+ return false;
+}
+
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt_system,
.write_std = kvm_write_guest_virt_system,
@@ -4437,6 +4459,7 @@ static struct x86_emulate_ops emulate_ops = {
.get_fpu = emulator_get_fpu,
.put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
+ .get_cpuid = emulator_get_cpuid,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fc45ba8..e395693 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
}
/* TSC based delay: */
-static void delay_tsc(unsigned long loops)
+static void delay_tsc(unsigned long __loops)
{
- unsigned long bclock, now;
+ u32 bclock, now, loops = __loops;
int cpu;
preempt_disable();
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index dbe34b9..dd74e46 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -108,16 +108,6 @@ static inline void get_head_page_multiple(struct page *page, int nr)
SetPageReferenced(page);
}
-static inline void get_huge_page_tail(struct page *page)
-{
- /*
- * __split_huge_page_refcount() cannot run
- * from under us.
- */
- VM_BUG_ON(atomic_read(&page->_count) < 0);
- atomic_inc(&page->_count);
-}
-
static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
@@ -211,6 +201,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
+ if (PageTail(page))
+ get_huge_page_tail(page);
(*nr)++;
page++;
refs++;
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index b499626..f4f29b1 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -45,6 +45,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
set_pte(kmap_pte-idx, mk_pte(page, prot));
+ arch_flush_lazy_mmu_mode();
return (void *)vaddr;
}
@@ -88,6 +89,7 @@ void __kunmap_atomic(void *kvaddr)
*/
kpte_clear_flush(kmap_pte-idx, vaddr);
kmap_atomic_idx_pop();
+ arch_flush_lazy_mmu_mode();
}
#ifdef CONFIG_DEBUG_HIGHMEM
else {
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1dab519..f927429 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -87,9 +87,9 @@ static unsigned long mmap_rnd(void)
*/
if (current->flags & PF_RANDOMIZE) {
if (mmap_is_ia32())
- rnd = (long)get_random_int() % (1<<8);
+ rnd = get_random_int() % (1<<8);
else
- rnd = (long)(get_random_int() % (1<<28));
+ rnd = get_random_int() % (1<<28);
}
return rnd << PAGE_SHIFT;
}
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 81dbfde..7efd0c6 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -104,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain_lo;
+ if (acpi_srat_revision >= 2)
+ pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
@@ -155,6 +157,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
+ if (acpi_srat_revision <= 1)
+ pxm &= 0xff;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index bfab3fa..5a5b6e4 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -151,17 +151,18 @@ void bpf_jit_compile(struct sk_filter *fp)
cleanup_addr = proglen; /* epilogue address */
for (pass = 0; pass < 10; pass++) {
+ u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
/* no prologue/epilogue for trivial filters (RET something) */
proglen = 0;
prog = temp;
- if (seen) {
+ if (seen_or_pass0) {
EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
/* note : must save %rbx in case bpf_error is hit */
- if (seen & (SEEN_XREG | SEEN_DATAREF))
+ if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
- if (seen & SEEN_XREG)
+ if (seen_or_pass0 & SEEN_XREG)
CLEAR_X(); /* make sure we dont leek kernel memory */
/*
@@ -170,7 +171,7 @@ void bpf_jit_compile(struct sk_filter *fp)
* r9 = skb->len - skb->data_len
* r8 = skb->data
*/
- if (seen & SEEN_DATAREF) {
+ if (seen_or_pass0 & SEEN_DATAREF) {
if (offsetof(struct sk_buff, len) <= 127)
/* mov off8(%rdi),%r9d */
EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
@@ -260,9 +261,14 @@ void bpf_jit_compile(struct sk_filter *fp)
case BPF_S_ALU_DIV_X: /* A /= X; */
seen |= SEEN_XREG;
EMIT2(0x85, 0xdb); /* test %ebx,%ebx */
- if (pc_ret0 != -1)
- EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
- else {
+ if (pc_ret0 > 0) {
+ /* addrs[pc_ret0 - 1] is start address of target
+ * (addrs[i] - 4) is the address following this jmp
+ * ("xor %edx,%edx; div %ebx" being 4 bytes long)
+ */
+ EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
+ (addrs[i] - 4));
+ } else {
EMIT_COND_JMP(X86_JNE, 2 + 5);
CLEAR_A();
EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
@@ -283,7 +289,7 @@ void bpf_jit_compile(struct sk_filter *fp)
EMIT2(0x24, K & 0xFF); /* and imm8,%al */
} else if (K >= 0xFFFF0000) {
EMIT2(0x66, 0x25); /* and imm16,%ax */
- EMIT2(K, 2);
+ EMIT(K, 2);
} else {
EMIT1_off32(0x25, K); /* and imm32,%eax */
}
@@ -335,12 +341,12 @@ void bpf_jit_compile(struct sk_filter *fp)
}
/* fallinto */
case BPF_S_RET_A:
- if (seen) {
+ if (seen_or_pass0) {
if (i != flen - 1) {
EMIT_JMP(cleanup_addr - addrs[i]);
break;
}
- if (seen & SEEN_XREG)
+ if (seen_or_pass0 & SEEN_XREG)
EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */
EMIT1(0xc9); /* leaveq */
}
@@ -469,8 +475,10 @@ void bpf_jit_compile(struct sk_filter *fp)
case BPF_S_LD_W_ABS:
func = sk_load_word;
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0)
+ if ((int)K < 0) {
+ /* Abort the JIT because __load_pointer() is needed. */
goto out;
+ }
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
@@ -483,13 +491,8 @@ common_load: seen |= SEEN_DATAREF;
goto common_load;
case BPF_S_LDX_B_MSH:
if ((int)K < 0) {
- if (pc_ret0 != -1) {
- EMIT_JMP(addrs[pc_ret0] - addrs[i]);
- break;
- }
- CLEAR_A();
- EMIT_JMP(cleanup_addr - addrs[i]);
- break;
+ /* Abort the JIT because __load_pointer() is needed. */
+ goto out;
}
seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = sk_load_byte_msh - (image + addrs[i]);
@@ -568,8 +571,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
break;
}
if (filter[i].jt != 0) {
- if (filter[i].jf)
- t_offset += is_near(f_offset) ? 2 : 6;
+ if (filter[i].jf && f_offset)
+ t_offset += is_near(f_offset) ? 2 : 5;
EMIT_COND_JMP(t_op, t_offset);
if (filter[i].jf)
EMIT_JMP(f_offset);
@@ -599,13 +602,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
* use it to give the cleanup instruction(s) addr
*/
cleanup_addr = proglen - 1; /* ret */
- if (seen)
+ if (seen_or_pass0)
cleanup_addr -= 1; /* leaveq */
- if (seen & SEEN_XREG)
+ if (seen_or_pass0 & SEEN_XREG)
cleanup_addr -= 4; /* mov -8(%rbp),%rbx */
if (image) {
- WARN_ON(proglen != oldproglen);
+ if (proglen != oldproglen)
+ pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen);
break;
}
if (proglen == oldproglen) {
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index cdfe4c5..f148cf6 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -21,6 +21,7 @@ extern int op_nmi_timer_init(struct oprofile_operations *ops);
extern void op_nmi_exit(void);
extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+static int nmi_timer;
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
@@ -31,8 +32,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
#ifdef CONFIG_X86_LOCAL_APIC
ret = op_nmi_init(ops);
#endif
+ nmi_timer = (ret != 0);
#ifdef CONFIG_X86_IO_APIC
- if (ret < 0)
+ if (nmi_timer)
ret = op_nmi_timer_init(ops);
#endif
ops->backtrace = x86_backtrace;
@@ -44,6 +46,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
void oprofile_arch_exit(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
- op_nmi_exit();
+ if (!nmi_timer)
+ op_nmi_exit();
#endif
}
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 6b8759f..d24d3da 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -18,8 +18,9 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-$(CONFIG_X86_MRST) += mrst.o
obj-y += common.o early.o
-obj-y += amd_bus.o bus_numa.o
+obj-y += bus_numa.o
+obj-$(CONFIG_AMD_NB) += amd_bus.o
obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o
ifeq ($(CONFIG_PCI_DEBUG),y)
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 50b3f14..0473a8f 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -54,6 +54,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
},
},
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
+ {
+ .callback = set_use_crs,
+ .ident = "MSI MS-7253",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
+ DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+ },
+ },
{}
};
@@ -149,7 +159,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
struct acpi_resource_address64 addr;
acpi_status status;
unsigned long flags;
- u64 start, end;
+ u64 start, orig_end, end;
status = resource_to_addr(acpi_res, &addr);
if (!ACPI_SUCCESS(status))
@@ -165,7 +175,21 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
return AE_OK;
start = addr.minimum + addr.translation_offset;
- end = addr.maximum + addr.translation_offset;
+ orig_end = end = addr.maximum + addr.translation_offset;
+
+ /* Exclude non-addressable range or non-addressable portion of range */
+ end = min(end, (u64)iomem_resource.end);
+ if (end <= start) {
+ dev_info(&info->bridge->dev,
+ "host bridge window [%#llx-%#llx] "
+ "(ignored, not CPU addressable)\n", start, orig_end);
+ return AE_OK;
+ } else if (orig_end != end) {
+ dev_info(&info->bridge->dev,
+ "host bridge window [%#llx-%#llx] "
+ "([%#llx-%#llx] ignored, not CPU addressable)\n",
+ start, orig_end, end + 1, orig_end);
+ }
res = &info->res[info->res_num];
res->name = info->name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 026e493..385a940 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
};
-static u64 __initdata fam10h_mmconf_start;
-static u64 __initdata fam10h_mmconf_end;
-static void __init get_pci_mmcfg_amd_fam10h_range(void)
-{
- u32 address;
- u64 base, msr;
- unsigned segn_busn_bits;
-
- /* assume all cpus from fam10h have mmconf */
- if (boot_cpu_data.x86 < 0x10)
- return;
-
- address = MSR_FAM10H_MMIO_CONF_BASE;
- rdmsrl(address, msr);
-
- /* mmconfig is not enable */
- if (!(msr & FAM10H_MMIO_CONF_ENABLE))
- return;
-
- base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
-
- segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
- FAM10H_MMIO_CONF_BUSRANGE_MASK;
-
- fam10h_mmconf_start = base;
- fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
-}
-
#define RANGE_NUM 16
/**
@@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info(void)
u64 val;
u32 address;
bool found;
+ struct resource fam10h_mmconf_res, *fam10h_mmconf;
+ u64 fam10h_mmconf_start;
+ u64 fam10h_mmconf_end;
if (!early_pci_allowed())
return -1;
@@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info(void)
subtract_range(range, RANGE_NUM, 0, end);
/* get mmconfig */
- get_pci_mmcfg_amd_fam10h_range();
+ fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res);
/* need to take out mmconf range */
- if (fam10h_mmconf_end) {
- printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
+ if (fam10h_mmconf) {
+ printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf);
+ fam10h_mmconf_start = fam10h_mmconf->start;
+ fam10h_mmconf_end = fam10h_mmconf->end;
subtract_range(range, RANGE_NUM, fam10h_mmconf_start,
fam10h_mmconf_end + 1);
+ } else {
+ fam10h_mmconf_start = 0;
+ fam10h_mmconf_end = 0;
}
/* mmio resource */
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index f567965..6e96e65 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -308,7 +308,7 @@ int __init pci_xen_init(void)
int __init pci_xen_hvm_init(void)
{
- if (!xen_feature(XENFEAT_hvm_pirqs))
+ if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
return 0;
#ifdef CONFIG_ACPI
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 7000e74..fe73276 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -678,36 +678,40 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
pentry = (struct sfi_device_table_entry *)sb->pentry;
for (i = 0; i < num; i++, pentry++) {
- if (pentry->irq != (u8)0xff) { /* native RTE case */
+ int irq = pentry->irq;
+
+ if (irq != (u8)0xff) { /* native RTE case */
/* these SPI2 devices are not exposed to system as PCI
* devices, but they have separate RTE entry in IOAPIC
* so we have to enable them one by one here
*/
- ioapic = mp_find_ioapic(pentry->irq);
+ ioapic = mp_find_ioapic(irq);
irq_attr.ioapic = ioapic;
- irq_attr.ioapic_pin = pentry->irq;
+ irq_attr.ioapic_pin = irq;
irq_attr.trigger = 1;
irq_attr.polarity = 1;
- io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
- }
+ io_apic_set_pci_routing(NULL, irq, &irq_attr);
+ } else
+ irq = 0; /* No irq */
+
switch (pentry->type) {
case SFI_DEV_TYPE_IPC:
/* ID as IRQ is a hack that will go away */
- pdev = platform_device_alloc(pentry->name, pentry->irq);
+ pdev = platform_device_alloc(pentry->name, irq);
if (pdev == NULL) {
pr_err("out of memory for SFI platform device '%s'.\n",
pentry->name);
continue;
}
- install_irq_resource(pdev, pentry->irq);
+ install_irq_resource(pdev, irq);
pr_debug("info[%2d]: IPC bus, name = %16.16s, "
- "irq = 0x%2x\n", i, pentry->name, pentry->irq);
+ "irq = 0x%2x\n", i, pentry->name, irq);
sfi_handle_ipc_dev(pdev);
break;
case SFI_DEV_TYPE_SPI:
memset(&spi_info, 0, sizeof(spi_info));
strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
- spi_info.irq = pentry->irq;
+ spi_info.irq = irq;
spi_info.bus_num = pentry->host_num;
spi_info.chip_select = pentry->addr;
spi_info.max_speed_hz = pentry->max_freq;
@@ -724,7 +728,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
memset(&i2c_info, 0, sizeof(i2c_info));
bus = pentry->host_num;
strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
- i2c_info.irq = pentry->irq;
+ i2c_info.irq = irq;
i2c_info.addr = pentry->addr;
pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
"irq = 0x%2x, addr = 0x%x\n", i, bus,
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 68e467f..edf435b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -115,9 +115,6 @@ early_param("nobau", setup_nobau);
/* base pnode in this partition */
static int uv_base_pnode __read_mostly;
-/* position of pnode (which is nasid>>1): */
-static int uv_nshift __read_mostly;
-static unsigned long uv_mmask __read_mostly;
static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
static DEFINE_PER_CPU(struct bau_control, bau_control);
@@ -1426,7 +1423,7 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
{
int i;
int cpu;
- unsigned long pa;
+ unsigned long gpa;
unsigned long m;
unsigned long n;
size_t dsize;
@@ -1442,9 +1439,9 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
BUG_ON(!bau_desc);
- pa = uv_gpa(bau_desc); /* need the real nasid*/
- n = pa >> uv_nshift;
- m = pa & uv_mmask;
+ gpa = uv_gpa(bau_desc);
+ n = uv_gpa_to_gnode(gpa);
+ m = uv_gpa_to_offset(gpa);
/* the 14-bit pnode */
write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
@@ -1516,9 +1513,9 @@ static void pq_init(int node, int pnode)
bcp->queue_last = pqp + (DEST_Q_SIZE - 1);
}
/*
- * need the pnode of where the memory was really allocated
+ * need the gnode of where the memory was really allocated
*/
- pn = uv_gpa(pqp) >> uv_nshift;
+ pn = uv_gpa_to_gnode(uv_gpa(pqp));
first = uv_physnodeaddr(pqp);
pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
@@ -1578,14 +1575,14 @@ static int calculate_destination_timeout(void)
ts_ns = base * mult1 * mult2;
ret = ts_ns / 1000;
} else {
- /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */
- mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
+ /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
+ mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
- mult1 = 80;
+ base = 80;
else
- mult1 = 10;
- base = mmr_image & UV2_ACK_MASK;
+ base = 10;
+ mult1 = mmr_image & UV2_ACK_MASK;
ret = mult1 * base;
}
return ret;
@@ -1812,8 +1809,6 @@ static int __init uv_bau_init(void)
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
}
- uv_nshift = uv_hub_info->m_val;
- uv_mmask = (1UL << uv_hub_info->m_val) - 1;
nuvhubs = uv_num_possible_blades();
spin_lock_init(&disable_lock);
congested_cycles = usec_2_cycles(congested_respns_us);
@@ -1825,6 +1820,8 @@ static int __init uv_bau_init(void)
uv_base_pnode = uv_blade_to_pnode(uvhub);
}
+ enable_timeouts();
+
if (init_per_cpu(nuvhubs, uv_base_pnode)) {
nobau = 1;
return 0;
@@ -1835,7 +1832,6 @@ static int __init uv_bau_init(void)
if (uv_blade_nr_possible_cpus(uvhub))
init_uvhub(uvhub, vector, uv_base_pnode);
- enable_timeouts();
alloc_intr_gate(vector, uv_bau_message_intr1);
for_each_possible_blade(uvhub) {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 67d69f1..0fb662a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1337,7 +1337,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
int cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
- per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ xen_vcpu_setup(cpu);
if (xen_have_vector_callback)
xen_init_lock_cpu(cpu);
break;
@@ -1367,7 +1367,6 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_smp_init();
register_cpu_notifier(&xen_hvm_cpu_notifier);
xen_unplug_emulated_devices();
- have_vcpu_info_placement = 0;
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index acea42e..f8dcda4 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -192,9 +192,21 @@ static unsigned long __init xen_get_max_pages(void)
domid_t domid = DOMID_SELF;
int ret;
- ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
- if (ret > 0)
- max_pages = ret;
+ /*
+ * For the initial domain we use the maximum reservation as
+ * the maximum page.
+ *
+ * For guest domains the current maximum reservation reflects
+ * the current maximum rather than the static maximum. In this
+ * case the e820 map provided to us will cover the static
+ * maximum region.
+ */
+ if (xen_initial_domain()) {
+ ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+ if (ret > 0)
+ max_pages = ret;
+ }
+
return min(max_pages, MAX_DOMAIN_PAGES);
}