diff options
Diffstat (limited to 'arch/x86')
49 files changed, 798 insertions, 295 deletions
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 67f87f2..78a1eff 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_AMD_NB_H #define _ASM_X86_AMD_NB_H +#include <linux/ioport.h> #include <linux/pci.h> struct amd_nb_bus_dev_range { @@ -13,6 +14,7 @@ extern const struct pci_device_id amd_nb_misc_ids[]; extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; extern bool early_is_amd_nb(u32 value); +extern struct resource *amd_get_mmconfig_range(struct resource *res); extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); extern int amd_numa_init(void); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4a0b7c7..244ac77 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -495,7 +495,7 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } -extern struct apic *generic_bigsmp_probe(void); +extern void generic_bigsmp_probe(void); #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c9e09ea..a850b4d 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -29,8 +29,8 @@ extern unsigned int sig_xstate_size; extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); -extern asmlinkage void math_state_restore(void); -extern void __math_state_restore(void); +extern void __math_state_restore(struct task_struct *); +extern void math_state_restore(void); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern user_regset_active_fn fpregs_active, xfpregs_active; @@ -212,19 +212,11 @@ static inline void fpu_fxsave(struct fpu *fpu) #endif /* CONFIG_X86_64 */ -/* We need a safe address that is cheap to find and that is already - in L1 during context switch. The best choices are unfortunately - different for UP and SMP */ -#ifdef CONFIG_SMP -#define safe_address (__per_cpu_offset[0]) -#else -#define safe_address (kstat_cpu(0).cpustat.user) -#endif - /* - * These must be called with preempt disabled + * These must be called with preempt disabled. Returns + * 'true' if the FPU state is still intact. */ -static inline void fpu_save_init(struct fpu *fpu) +static inline int fpu_save_init(struct fpu *fpu) { if (use_xsave()) { fpu_xsave(fpu); @@ -233,33 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu) * xsave header may indicate the init state of the FP. */ if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) - return; + return 1; } else if (use_fxsr()) { fpu_fxsave(fpu); } else { asm volatile("fnsave %[fx]; fwait" : [fx] "=m" (fpu->state->fsave)); - return; + return 0; } - if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) + /* + * If exceptions are pending, we need to clear them so + * that we don't randomly get exceptions later. + * + * FIXME! Is this perhaps only true for the old-style + * irq13 case? Maybe we could leave the x87 state + * intact otherwise? + */ + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { asm volatile("fnclex"); - - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. safe_address is a random variable that should be in L1 */ - alternative_input( - ASM_NOP8 ASM_NOP2, - "emms\n\t" /* clear stack tags */ - "fildl %P[addr]", /* set F?P to defined value */ - X86_FEATURE_FXSAVE_LEAK, - [addr] "m" (safe_address)); + return 0; + } + return 1; } -static inline void __save_init_fpu(struct task_struct *tsk) +static inline int __save_init_fpu(struct task_struct *tsk) { - fpu_save_init(&tsk->thread.fpu); - task_thread_info(tsk)->status &= ~TS_USEDFPU; + return fpu_save_init(&tsk->thread.fpu); } static inline int fpu_fxrstor_checking(struct fpu *fpu) @@ -281,39 +273,185 @@ static inline int restore_fpu_checking(struct task_struct *tsk) } /* - * Signal frame handlers... + * Software FPU state helpers. Careful: these need to + * be preemption protection *and* they need to be + * properly paired with the CR0.TS changes! */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +static inline int __thread_has_fpu(struct task_struct *tsk) +{ + return tsk->thread.has_fpu; +} -static inline void __unlazy_fpu(struct task_struct *tsk) +/* Must be paired with an 'stts' after! */ +static inline void __thread_clear_has_fpu(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_USEDFPU) { - __save_init_fpu(tsk); - stts(); - } else - tsk->fpu_counter = 0; + tsk->thread.has_fpu = 0; +} + +/* Must be paired with a 'clts' before! */ +static inline void __thread_set_has_fpu(struct task_struct *tsk) +{ + tsk->thread.has_fpu = 1; } +/* + * Encapsulate the CR0.TS handling together with the + * software flag. + * + * These generally need preemption protection to work, + * do try to avoid using these on their own. + */ +static inline void __thread_fpu_end(struct task_struct *tsk) +{ + __thread_clear_has_fpu(tsk); + stts(); +} + +static inline void __thread_fpu_begin(struct task_struct *tsk) +{ + clts(); + __thread_set_has_fpu(tsk); +} + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state and + * sets the new state of the CR0.TS bit. This is + * done within the context of the old process. + * + * - switch_fpu_finish() restores the new state as + * necessary. + */ +typedef struct { int preload; } fpu_switch_t; + +/* + * FIXME! We could do a totally lazy restore, but we need to + * add a per-cpu "this was the task that last touched the FPU + * on this CPU" variable, and the task needs to have a "I last + * touched the FPU on this CPU" and check them. + * + * We don't do that yet, so "fpu_lazy_restore()" always returns + * false, but some day.. + */ +#define fpu_lazy_restore(tsk) (0) +#define fpu_lazy_state_intact(tsk) do { } while (0) + +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) +{ + fpu_switch_t fpu; + + fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + if (__thread_has_fpu(old)) { + if (__save_init_fpu(old)) + fpu_lazy_state_intact(old); + __thread_clear_has_fpu(old); + old->fpu_counter++; + + /* Don't change CR0.TS if we just switch! */ + if (fpu.preload) { + __thread_set_has_fpu(new); + prefetch(new->thread.fpu.state); + } else + stts(); + } else { + old->fpu_counter = 0; + if (fpu.preload) { + if (fpu_lazy_restore(new)) + fpu.preload = 0; + else + prefetch(new->thread.fpu.state); + __thread_fpu_begin(new); + } + } + return fpu; +} + +/* + * By the time this gets called, we've already cleared CR0.TS and + * given the process the FPU if we are going to preload the FPU + * state - all we need to do is to conditionally restore the register + * state itself. + */ +static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) +{ + if (fpu.preload) + __math_state_restore(new); +} + +/* + * Signal frame handlers... + */ +extern int save_i387_xstate(void __user *buf); +extern int restore_i387_xstate(void __user *buf); + static inline void __clear_fpu(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_USEDFPU) { + if (__thread_has_fpu(tsk)) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" _ASM_EXTABLE(1b, 2b)); - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); + __thread_fpu_end(tsk); } } +/* + * Were we in an interrupt that interrupted kernel mode? + * + * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * pair does nothing at all: the thread must not have fpu (so + * that we don't try to save the FPU state), and TS must + * be set (so that the clts/stts pair does nothing that is + * visible in the interrupted kernel thread). + */ +static inline bool interrupted_kernel_fpu_idle(void) +{ + return !__thread_has_fpu(current) && + (read_cr0() & X86_CR0_TS); +} + +/* + * Were we in user mode (or vm86 mode) when we were + * interrupted? + * + * Doing kernel_fpu_begin/end() is ok if we are running + * in an interrupt context from user mode - we'll just + * save the FPU state as required. + */ +static inline bool interrupted_user_mode(void) +{ + struct pt_regs *regs = get_irq_regs(); + return regs && user_mode_vm(regs); +} + +/* + * Can we use the FPU in kernel mode with the + * whole "kernel_fpu_begin/end()" sequence? + * + * It's always ok in process context (ie "not interrupt") + * but it is sometimes ok even from an irq. + */ +static inline bool irq_fpu_usable(void) +{ + return !in_interrupt() || + interrupted_user_mode() || + interrupted_kernel_fpu_idle(); +} + static inline void kernel_fpu_begin(void) { - struct thread_info *me = current_thread_info(); + struct task_struct *me = current; + + WARN_ON_ONCE(!irq_fpu_usable()); preempt_disable(); - if (me->status & TS_USEDFPU) - __save_init_fpu(me->task); - else + if (__thread_has_fpu(me)) { + __save_init_fpu(me); + __thread_clear_has_fpu(me); + /* We do 'stts()' in kernel_fpu_end() */ + } else clts(); } @@ -323,14 +461,6 @@ static inline void kernel_fpu_end(void) preempt_enable(); } -static inline bool irq_fpu_usable(void) -{ - struct pt_regs *regs; - - return !in_interrupt() || !(regs = get_irq_regs()) || \ - user_mode(regs) || (read_cr0() & X86_CR0_TS); -} - /* * Some instructions like VIA's padlock instructions generate a spurious * DNA fault but don't modify SSE registers. And these instructions @@ -363,20 +493,64 @@ static inline void irq_ts_restore(int TS_state) } /* + * The question "does this thread have fpu access?" + * is slightly racy, since preemption could come in + * and revoke it immediately after the test. + * + * However, even in that very unlikely scenario, + * we can just assume we have FPU access - typically + * to save the FP state - we'll just take a #NM + * fault and get the FPU access back. + * + * The actual user_fpu_begin/end() functions + * need to be preemption-safe, though. + * + * NOTE! user_fpu_end() must be used only after you + * have saved the FP state, and user_fpu_begin() must + * be used only immediately before restoring it. + * These functions do not do any save/restore on + * their own. + */ +static inline int user_has_fpu(void) +{ + return __thread_has_fpu(current); +} + +static inline void user_fpu_end(void) +{ + preempt_disable(); + __thread_fpu_end(current); + preempt_enable(); +} + +static inline void user_fpu_begin(void) +{ + preempt_disable(); + if (!user_has_fpu()) + __thread_fpu_begin(current); + preempt_enable(); +} + +/* * These disable preemption on their own and are safe */ static inline void save_init_fpu(struct task_struct *tsk) { + WARN_ON_ONCE(!__thread_has_fpu(tsk)); preempt_disable(); __save_init_fpu(tsk); - stts(); + __thread_fpu_end(tsk); preempt_enable(); } static inline void unlazy_fpu(struct task_struct *tsk) { preempt_disable(); - __unlazy_fpu(tsk); + if (__thread_has_fpu(tsk)) { + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + } else + tsk->fpu_counter = 0; preempt_enable(); } diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0049211..0ab6a4d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -189,6 +189,9 @@ struct x86_emulate_ops { int (*intercept)(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage); + + bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); }; typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -298,6 +301,19 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ X86EMUL_MODE_PROT64) +/* CPUID vendors */ +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41 +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574 +#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273 + +#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547 +#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e +#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69 + enum x86_intercept_stage { X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ X86_ICPT_PRE_EXCEPT, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2193715..5d9c61d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -454,6 +454,7 @@ struct thread_struct { unsigned long trap_no; unsigned long error_code; /* floating point and extended processor state */ + unsigned long has_fpu; struct fpu fpu; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 1f2e61e..278d3d5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -242,8 +242,6 @@ static inline struct thread_info *current_thread_info(void) * ever touches our thread-synchronous status, so we don't * have to worry about atomic accesses. */ -#define TS_USEDFPU 0x0001 /* FPU was used by this task - this quantum (SMP) */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ #define TS_POLLING 0x0004 /* idle task polling need_resched, skip sending interrupt */ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index fa7b917..34baa0e 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -32,6 +32,22 @@ extern int no_timer_check; * (mathieu.desnoyers@polymtl.ca) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" + * + * In: + * + * ns = cycles * cyc2ns_scale / SC + * + * Although we may still have enough bits to store the value of ns, + * in some cases, we may not have enough bits to store cycles * cyc2ns_scale, + * leading to an incorrect result. + * + * To avoid this, we can decompose 'cycles' into quotient and remainder + * of division by SC. Then, + * + * ns = (quot * SC + rem) * cyc2ns_scale / SC + * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC + * + * - sqazi@google.com */ DECLARE_PER_CPU(unsigned long, cyc2ns); @@ -43,7 +59,8 @@ static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { int cpu = smp_processor_id(); unsigned long long ns = per_cpu(cyc2ns_offset, cpu); - ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR; + ns += mult_frac(cyc, per_cpu(cyc2ns, cpu), + (1UL << CYC2NS_SCALE_FACTOR)); return ns; } diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index a291c40..5d62d65 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -55,6 +55,7 @@ #define UV_BAU_TUNABLES_DIR "sgi_uv" #define UV_BAU_TUNABLES_FILE "bau_tunables" #define WHITESPACE " \t\n" +#define uv_mmask ((1UL << uv_hub_info->m_val) - 1) #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index f26544a..21f7385 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -46,6 +46,13 @@ * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant * of the nasid for socket usage. * + * GPA - (global physical address) a socket physical address converted + * so that it can be used by the GRU as a global address. Socket + * physical addresses 1) need additional NASID (node) bits added + * to the high end of the address, and 2) unaliased if the + * partition does not have a physical address 0. In addition, on + * UV2 rev 1, GPAs need the gnode left shifted to bits 39 or 40. + * * * NumaLink Global Physical Address Format: * +--------------------------------+---------------------+ @@ -141,6 +148,8 @@ struct uv_hub_info_s { unsigned int gnode_extra; unsigned char hub_revision; unsigned char apic_pnode_shift; + unsigned char m_shift; + unsigned char n_lshift; unsigned long gnode_upper; unsigned long lowmem_remap_top; unsigned long lowmem_remap_base; @@ -177,6 +186,16 @@ static inline int is_uv2_hub(void) return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; } +static inline int is_uv2_1_hub(void) +{ + return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE; +} + +static inline int is_uv2_2_hub(void) +{ + return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE + 1; +} + union uvh_apicid { unsigned long v; struct uvh_apicid_s { @@ -276,7 +295,10 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) { if (paddr < uv_hub_info->lowmem_remap_top) paddr |= uv_hub_info->lowmem_remap_base; - return paddr | uv_hub_info->gnode_upper; + paddr |= uv_hub_info->gnode_upper; + paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | + ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift); + return paddr; } @@ -296,20 +318,23 @@ uv_gpa_in_mmr_space(unsigned long gpa) /* UV global physical address --> socket phys RAM */ static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa) { - unsigned long paddr = gpa & uv_hub_info->gpa_mask; + unsigned long paddr; unsigned long remap_base = uv_hub_info->lowmem_remap_base; unsigned long remap_top = uv_hub_info->lowmem_remap_top; + gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | + ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); + paddr = gpa & uv_hub_info->gpa_mask; if (paddr >= remap_base && paddr < remap_base + remap_top) paddr -= remap_base; return paddr; } -/* gnode -> pnode */ +/* gpa -> pnode */ static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) { - return gpa >> uv_hub_info->m_val; + return gpa >> uv_hub_info->n_lshift; } /* gpa -> pnode */ @@ -320,6 +345,12 @@ static inline int uv_gpa_to_pnode(unsigned long gpa) return uv_gpa_to_gnode(gpa) & n_mask; } +/* gpa -> node offset*/ +static inline unsigned long uv_gpa_to_offset(unsigned long gpa) +{ + return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift; +} + /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d3d9d50..bfd75ff 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1203,7 +1203,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, if (!pte || !IOMMU_PTE_PRESENT(*pte)) continue; - dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); + dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, 1); } update_domain(&dma_dom->domain); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index bfc8453..33df6e8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1031,8 +1031,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu) { int r; - if (pci_enable_msi(iommu->dev)) - return 1; + r = pci_enable_msi(iommu->dev); + if (r) + return r; r = request_threaded_irq(iommu->dev->irq, amd_iommu_int_handler, @@ -1042,24 +1043,33 @@ static int iommu_setup_msi(struct amd_iommu *iommu) if (r) { pci_disable_msi(iommu->dev); - return 1; + return r; } iommu->int_enabled = true; - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); return 0; } static int iommu_init_msi(struct amd_iommu *iommu) { + int ret; + if (iommu->int_enabled) - return 0; + goto enable_faults; if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) - return iommu_setup_msi(iommu); + ret = iommu_setup_msi(iommu); + else + ret = -ENODEV; - return 1; + if (ret) + return ret; + +enable_faults: + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + + return 0; } /**************************************************************************** diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4c39baa..bae1efe 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -119,6 +119,37 @@ bool __init early_is_amd_nb(u32 device) return false; } +struct resource *amd_get_mmconfig_range(struct resource *res) +{ + u32 address; + u64 base, msr; + unsigned segn_busn_bits; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return NULL; + + /* assume all cpus from fam10h have mmconfig */ + if (boot_cpu_data.x86 < 0x10) + return NULL; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, msr); + + /* mmconfig is not enabled */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return NULL; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); + + segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + res->flags = IORESOURCE_MEM; + res->start = base; + res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1; + return res; +} + int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index efd737e..521bead 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -255,12 +255,24 @@ static struct apic apic_bigsmp = { .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, }; -struct apic * __init generic_bigsmp_probe(void) +void __init generic_bigsmp_probe(void) { - if (probe_bigsmp()) - return &apic_bigsmp; + unsigned int cpu; - return NULL; + if (!probe_bigsmp()) + return; + + apic = &apic_bigsmp; + + for_each_possible_cpu(cpu) { + if (early_per_cpu(x86_cpu_to_logical_apicid, + cpu) == BAD_APICID) + continue; + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = + bigsmp_early_logical_apicid(cpu); + } + + pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name); } apic_driver(apic_bigsmp); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index b5254ad..0787bb3 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -200,14 +200,8 @@ void __init default_setup_apic_routing(void) * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ - if (!cmdline_apic && apic == &apic_default) { - struct apic *bigsmp = generic_bigsmp_probe(); - if (bigsmp) { - apic = bigsmp; - printk(KERN_INFO "Overriding APIC driver with %s\n", - apic->name); - } - } + if (!cmdline_apic && apic == &apic_default) + generic_bigsmp_probe(); #endif if (apic->setup_apic_routing) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 34b1859..874c208 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -779,7 +779,12 @@ void __init uv_system_init(void) for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) uv_possible_blades += hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); - printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); + + /* uv_num_possible_blades() is really the hub count */ + printk(KERN_INFO "UV: Found %d blades, %d hubs\n", + is_uv1_hub() ? uv_num_possible_blades() : + (uv_num_possible_blades() + 1) / 2, + uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = kzalloc(bytes, GFP_KERNEL); @@ -832,6 +837,10 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; + uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; + uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ? + (m_val == 40 ? 40 : 39) : m_val; + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; @@ -862,8 +871,7 @@ void __init uv_system_init(void) if (uv_node_to_blade[nid] >= 0) continue; paddr = node_start_pfn(nid) << PAGE_SHIFT; - paddr = uv_soc_phys_ram_to_gpa(paddr); - pnode = (paddr >> m_val) & pnode_mask; + pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index c105c53..fde4428 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -330,8 +330,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; } -static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, - int index) +static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) { static struct amd_l3_cache *__cpuinitdata l3_caches; int node; @@ -748,14 +747,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) #ifdef CONFIG_SMP -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) + +static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) { - struct _cpuid4_info *this_leaf, *sibling_leaf; - unsigned long num_threads_sharing; - int index_msb, i, sibling; + struct _cpuid4_info *this_leaf; + int ret, i, sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); - if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { + ret = 0; + if (index == 3) { + ret = 1; for_each_cpu(i, cpu_llc_shared_mask(cpu)) { if (!per_cpu(ici_cpuid4_info, i)) continue; @@ -766,8 +767,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) set_bit(sibling, this_leaf->shared_cpu_map); } } - return; + } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) { + ret = 1; + for_each_cpu(i, cpu_sibling_mask(cpu)) { + if (!per_cpu(ici_cpuid4_info, i)) + continue; + this_leaf = CPUID4_INFO_IDX(i, index); + for_each_cpu(sibling, cpu_sibling_mask(cpu)) { + if (!cpu_online(sibling)) + continue; + set_bit(sibling, this_leaf->shared_cpu_map); + } + } } + + return ret; +} + +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + unsigned long num_threads_sharing; + int index_msb, i; + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86_vendor == X86_VENDOR_AMD) { + if (cache_shared_amd_cpu_map_setup(cpu, index)) + return; + } + this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index bab491b..d812fe2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -508,6 +508,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) unsigned long from = cpuc->lbr_entries[0].from; unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; + int is_64bit = 0; /* * We don't need to fixup if the PEBS assist is fault like @@ -559,7 +560,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) } else kaddr = (void *)to; - kernel_insn_init(&insn, kaddr); +#ifdef CONFIG_X86_64 + is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); +#endif + insn_init(&insn, kaddr, is_64bit); insn_get_length(&insn); to += insn.length; } while (to < ip); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5c1a9197..edb3d46 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -98,12 +98,6 @@ #endif .endm -#ifdef CONFIG_VM86 -#define resume_userspace_sig check_userspace -#else -#define resume_userspace_sig resume_userspace -#endif - /* * User gs save/restore * @@ -327,10 +321,19 @@ ret_from_exception: preempt_stop(CLBR_ANY) ret_from_intr: GET_THREAD_INFO(%ebp) -check_userspace: +resume_userspace_sig: +#ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax +#else + /* + * We can be coming here from a syscall done in the kernel space, + * e.g. a failed kernel_execve(). + */ + movl PT_CS(%esp), %eax + andl $SEGMENT_RPL_MASK, %eax +#endif cmpl $USER_RPL, %eax jb resume_kernel # not returning to v8086 or userspace diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 6781765..aa083d3 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1054,6 +1054,14 @@ int hpet_rtc_timer_init(void) } EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); +static void hpet_disable_rtc_channel(void) +{ + unsigned long cfg; + cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_ENABLE; + hpet_writel(cfg, HPET_T1_CFG); +} + /* * The functions below are called from rtc driver. * Return 0 if HPET is not being used. @@ -1065,6 +1073,9 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask) return 0; hpet_rtc_flags &= ~bit_mask; + if (unlikely(!hpet_rtc_flags)) + hpet_disable_rtc_channel(); + return 1; } EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); @@ -1130,15 +1141,11 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); static void hpet_rtc_timer_reinit(void) { - unsigned int cfg, delta; + unsigned int delta; int lost_ints = -1; - if (unlikely(!hpet_rtc_flags)) { - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T1_CFG); - return; - } + if (unlikely(!hpet_rtc_flags)) + hpet_disable_rtc_channel(); if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) delta = hpet_default_delta; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5f9ecff..fc1f48d 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,8 @@ #include <linux/smp.h> #include <linux/nmi.h> #include <linux/hw_breakpoint.h> +#include <linux/uaccess.h> +#include <linux/memory.h> #include <asm/debugreg.h> #include <asm/apicdef.h> @@ -710,6 +712,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + char opc[BREAK_INSTR_SIZE]; + + bpt->type = BP_BREAKPOINT; + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (err) + return err; + err = probe_kernel_write((char *)bpt->bpt_addr, + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); +#ifdef CONFIG_DEBUG_RODATA + if (!err) + return err; + /* + * It is safe to call text_poke() because normal kernel execution + * is stopped on all cores, so long as the text_mutex is not locked. + */ + if (mutex_is_locked(&text_mutex)) + return -EBUSY; + text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, + BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + if (err) + return err; + if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) + return -EINVAL; + bpt->type = BP_POKE_BREAKPOINT; +#endif /* CONFIG_DEBUG_RODATA */ + return err; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ +#ifdef CONFIG_DEBUG_RODATA + int err; + char opc[BREAK_INSTR_SIZE]; + + if (bpt->type != BP_POKE_BREAKPOINT) + goto knl_write; + /* + * It is safe to call text_poke() because normal kernel execution + * is stopped on all cores, so long as the text_mutex is not locked. + */ + if (mutex_is_locked(&text_mutex)) + goto knl_write; + text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) + goto knl_write; + return err; +knl_write: +#endif /* CONFIG_DEBUG_RODATA */ + return probe_kernel_write((char *)bpt->bpt_addr, + (char *)bpt->saved_instr, BREAK_INSTR_SIZE); +} + struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: */ .gdb_bpt_instr = { 0xcc }, diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index f1a6244..794bc95 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -75,8 +75,10 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* * Undefined/reserved opcodes, conditional jump, Opcode Extension * Groups, and some special opcodes can not boost. + * This is non-const to keep gcc from statically optimizing it out, as + * variable_test_bit makes gcc think only *(unsigned long*) is used. */ -static const u32 twobyte_is_boostable[256 / 32] = { +static u32 twobyte_is_boostable[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index c561038..b727450 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -298,13 +298,33 @@ free_table: return state; } +/* + * AMD microcode firmware naming convention, up to family 15h they are in + * the legacy file: + * + * amd-ucode/microcode_amd.bin + * + * This legacy file is always smaller than 2K in size. + * + * Starting at family 15h they are in family specific firmware files: + * + * amd-ucode/microcode_amd_fam15h.bin + * amd-ucode/microcode_amd_fam16h.bin + * ... + * + * These might be larger than 2K. + */ static enum ucode_state request_microcode_amd(int cpu, struct device *device) { - const char *fw_name = "amd-ucode/microcode_amd.bin"; + char fw_name[36] = "amd-ucode/microcode_amd.bin"; const struct firmware *fw; enum ucode_state ret = UCODE_NFOUND; + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86 >= 0x15) + snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); - if (request_firmware(&fw, fw_name, device)) { + if (request_firmware(&fw, (const char *)fw_name, device)) { pr_err("failed to load file %s\n", fw_name); goto out; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9103b89..0741b062 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_bus *m) } #endif + set_bit(m->busid, mp_bus_not_pci); if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->busid, mp_bus_not_pci); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->busid] = MP_BUS_ISA; #endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a3d0dc5..fcdb1b3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -293,22 +293,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); - bool preload_fpu; + fpu_switch_t fpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - /* - * If the task has used fpu the last 5 timeslices, just do a full - * restore of the math state immediately to avoid the trap; the - * chances of needing FPU soon are obviously high now - */ - preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; - - __unlazy_fpu(prev_p); - - /* we're going to use this soon, after a few expensive things */ - if (preload_fpu) - prefetch(next->fpu.state); + fpu = switch_fpu_prepare(prev_p, next_p); /* * Reload esp0. @@ -348,11 +337,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); - /* If we're going to preload the fpu context, make sure clts - is run while we're batching the cpu state updates. */ - if (preload_fpu) - clts(); - /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so @@ -362,15 +346,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_end_context_switch(next_p); - if (preload_fpu) - __math_state_restore(); - /* * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) lazy_load_gs(next->gs); + switch_fpu_finish(next_p, fpu); + percpu_write(current_task, next_p); return prev_p; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63c8aed..eeb5004 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -363,18 +363,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); unsigned fsindex, gsindex; - bool preload_fpu; + fpu_switch_t fpu; - /* - * If the task has used fpu the last 5 timeslices, just do a full - * restore of the math state immediately to avoid the trap; the - * chances of needing FPU soon are obviously high now - */ - preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; - - /* we're going to use this soon, after a few expensive things */ - if (preload_fpu) - prefetch(next->fpu.state); + fpu = switch_fpu_prepare(prev_p, next_p); /* * Reload esp0, LDT and the page table pointer: @@ -404,13 +395,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) load_TLS(next, cpu); - /* Must be after DS reload */ - __unlazy_fpu(prev_p); - - /* Make sure cpu is ready for new context */ - if (preload_fpu) - clts(); - /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so @@ -451,6 +435,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); prev->gsindex = gsindex; + switch_fpu_finish(next_p, fpu); + /* * Switch the PDA and FPU contexts. */ @@ -469,13 +455,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) __switch_to_xtra(prev_p, next_p, tss); - /* - * Preload the FPU context, now that we've determined that the - * task is likely to be using it. - */ - if (preload_fpu) - __math_state_restore(); - return prev_p; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9242436..d4a705f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -124,7 +124,7 @@ __setup("reboot=", reboot_setup); */ /* - * Some machines require the "reboot=b" commandline option, + * Some machines require the "reboot=b" or "reboot=k" commandline options, * this quirk makes that automatic. */ static int __init set_bios_reboot(const struct dmi_system_id *d) @@ -136,6 +136,15 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; } +static int __init set_kbd_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_KBD) { + reboot_type = BOOT_KBD; + printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); + } + return 0; +} + static struct dmi_system_id __initdata reboot_dmi_table[] = { { /* Handle problems with rebooting on Dell E520's */ .callback = set_bios_reboot, @@ -295,7 +304,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, }, { /* Handle reboot issue on Acer Aspire one */ - .callback = set_bios_reboot, + .callback = set_kbd_reboot, .ident = "Acer Aspire One A110", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 6bb7b85..bcfec2d 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -163,7 +163,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset, { const struct desc_struct *tls; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; @@ -198,7 +198,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; - if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || + if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || (count % sizeof(struct user_desc)) != 0) return -EINVAL; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b9b6716..1b26e01 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -717,25 +717,34 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) } /* - * __math_state_restore assumes that cr0.TS is already clear and the - * fpu state is all ready for use. Used during context switch. + * This gets called with the process already owning the + * FPU state, and with CR0.TS cleared. It just needs to + * restore the FPU register state. */ -void __math_state_restore(void) +void __math_state_restore(struct task_struct *tsk) { - struct thread_info *thread = current_thread_info(); - struct task_struct *tsk = thread->task; + /* We need a safe address that is cheap to find and that is already + in L1. We've just brought in "tsk->thread.has_fpu", so use that */ +#define safe_address (tsk->thread.has_fpu) + + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. safe_address is a random variable that should be in L1 */ + alternative_input( + ASM_NOP8 ASM_NOP2, + "emms\n\t" /* clear stack tags */ + "fildl %P[addr]", /* set F?P to defined value */ + X86_FEATURE_FXSAVE_LEAK, + [addr] "m" (safe_address)); /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ if (unlikely(restore_fpu_checking(tsk))) { - stts(); + __thread_fpu_end(tsk); force_sig(SIGSEGV, tsk); return; } - - thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ - tsk->fpu_counter++; } /* @@ -745,13 +754,12 @@ void __math_state_restore(void) * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. * - * Must be called with kernel preemption disabled (in this case, - * local interrupts are disabled at the call-site in entry.S). + * Must be called with kernel preemption disabled (eg with local + * local interrupts as in the case of do_device_not_available). */ -asmlinkage void math_state_restore(void) +void math_state_restore(void) { - struct thread_info *thread = current_thread_info(); - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; if (!tsk_used_math(tsk)) { local_irq_enable(); @@ -768,9 +776,10 @@ asmlinkage void math_state_restore(void) local_irq_disable(); } - clts(); /* Allow maths ops (or we recurse) */ + __thread_fpu_begin(tsk); + __math_state_restore(tsk); - __math_state_restore(); + tsk->fpu_counter++; } EXPORT_SYMBOL_GPL(math_state_restore); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6cc6922..4406c03 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) if (cpu_khz) { *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); + *offset = ns_now - mult_frac(tsc_now, *scale, + (1UL << CYC2NS_SCALE_FACTOR)); } sched_clock_idle_wakeup_event(0); @@ -956,6 +957,16 @@ static int __init init_tsc_clocksource(void) clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } + + /* + * Trust the results of the earlier calibration on systems + * exporting a reliable TSC. + */ + if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { + clocksource_register_khz(&clocksource_tsc, tsc_khz); + return 0; + } + schedule_delayed_work(&tsc_irqwork, 0); return 0; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 863f875..04b8726 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) spinlock_t *ptl; int i; + down_write(&mm->mmap_sem); pgd = pgd_offset(mm, 0xA0000); if (pgd_none_or_clear_bad(pgd)) goto out; @@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) } pte_unmap_unlock(pte, ptl); out: + up_write(&mm->mmap_sem); flush_tlb(); } diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a391134..7110911 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk) if (!fx) return; - BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); + BUG_ON(__thread_has_fpu(tsk)); xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; @@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf) if (!used_math()) return 0; - if (task_thread_info(tsk)->status & TS_USEDFPU) { + if (user_has_fpu()) { if (use_xsave()) err = xsave_user(buf); else @@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf) if (err) return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); + user_fpu_end(); } else { sanitize_i387_state(tsk); if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, @@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf) return err; } - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } + user_fpu_begin(); if (use_xsave()) err = restore_user_xstate(buf); else diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index adc9867..3e7d913 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1901,6 +1901,51 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, ss->p = 1; } +static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) +{ + struct x86_emulate_ops *ops = ctxt->ops; + u32 eax, ebx, ecx, edx; + + /* + * syscall should always be enabled in longmode - so only become + * vendor specific (cpuid) if other modes are active... + */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + return true; + + eax = 0x00000000; + ecx = 0x00000000; + if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) { + /* + * Intel ("GenuineIntel") + * remark: Intel CPUs only support "syscall" in 64bit + * longmode. Also an 64bit guest with a + * 32bit compat-app running will #UD !! While this + * behaviour can be fixed (by emulating) into AMD + * response - CPUs of AMD can't behave like Intel. + */ + if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx && + ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx && + edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx) + return false; + + /* AMD ("AuthenticAMD") */ + if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx && + ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx && + edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + return true; + + /* AMD ("AMDisbetter!") */ + if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx && + ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx && + edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx) + return true; + } + + /* default: (not Intel, not AMD), apply Intel's stricter rules... */ + return false; +} + static int emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1915,9 +1960,15 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ctxt->mode == X86EMUL_MODE_VM86) return emulate_ud(ctxt); + if (!(em_syscall_is_enabled(ctxt))) + return emulate_ud(ctxt); + ops->get_msr(ctxt, MSR_EFER, &efer); setup_syscalls_segments(ctxt, ops, &cs, &ss); + if (!(efer & EFER_SCE)) + return emulate_ud(ctxt); + ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; cs_sel = (u16)(msr_data & 0xfffc); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d48ec60..2ad060a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -948,7 +948,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif - if (current_thread_info()->status & TS_USEDFPU) + if (__thread_has_fpu(current)) clts(); load_gdt(&__get_cpu_var(host_gdt)); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 77c9d86..fbb0936 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4407,6 +4407,28 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } +static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +{ + struct kvm_cpuid_entry2 *cpuid = NULL; + + if (eax && ecx) + cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt), + *eax, *ecx); + + if (cpuid) { + *eax = cpuid->eax; + *ecx = cpuid->ecx; + if (ebx) + *ebx = cpuid->ebx; + if (edx) + *edx = cpuid->edx; + return true; + } + + return false; +} + static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, @@ -4437,6 +4459,7 @@ static struct x86_emulate_ops emulate_ops = { .get_fpu = emulator_get_fpu, .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, + .get_cpuid = emulator_get_cpuid, }; static void cache_all_regs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index fc45ba8..e395693 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops) } /* TSC based delay: */ -static void delay_tsc(unsigned long loops) +static void delay_tsc(unsigned long __loops) { - unsigned long bclock, now; + u32 bclock, now, loops = __loops; int cpu; preempt_disable(); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index dbe34b9..dd74e46 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -108,16 +108,6 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(atomic_read(&page->_count) < 0); - atomic_inc(&page->_count); -} - static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -211,6 +201,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; + if (PageTail(page)) + get_huge_page_tail(page); (*nr)++; page++; refs++; diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index b499626..f4f29b1 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -45,6 +45,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); + arch_flush_lazy_mmu_mode(); return (void *)vaddr; } @@ -88,6 +89,7 @@ void __kunmap_atomic(void *kvaddr) */ kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); + arch_flush_lazy_mmu_mode(); } #ifdef CONFIG_DEBUG_HIGHMEM else { diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 1dab519..f927429 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -87,9 +87,9 @@ static unsigned long mmap_rnd(void) */ if (current->flags & PF_RANDOMIZE) { if (mmap_is_ia32()) - rnd = (long)get_random_int() % (1<<8); + rnd = get_random_int() % (1<<8); else - rnd = (long)(get_random_int() % (1<<28)); + rnd = get_random_int() % (1<<28); } return rnd << PAGE_SHIFT; } diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 81dbfde..7efd0c6 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -104,6 +104,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) return; pxm = pa->proximity_domain_lo; + if (acpi_srat_revision >= 2) + pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8; node = setup_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); @@ -155,6 +157,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) start = ma->base_address; end = start + ma->length; pxm = ma->proximity_domain; + if (acpi_srat_revision <= 1) + pxm &= 0xff; node = setup_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains.\n"); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index bfab3fa..5a5b6e4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -151,17 +151,18 @@ void bpf_jit_compile(struct sk_filter *fp) cleanup_addr = proglen; /* epilogue address */ for (pass = 0; pass < 10; pass++) { + u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen; /* no prologue/epilogue for trivial filters (RET something) */ proglen = 0; prog = temp; - if (seen) { + if (seen_or_pass0) { EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ /* note : must save %rbx in case bpf_error is hit */ - if (seen & (SEEN_XREG | SEEN_DATAREF)) + if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF)) EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ - if (seen & SEEN_XREG) + if (seen_or_pass0 & SEEN_XREG) CLEAR_X(); /* make sure we dont leek kernel memory */ /* @@ -170,7 +171,7 @@ void bpf_jit_compile(struct sk_filter *fp) * r9 = skb->len - skb->data_len * r8 = skb->data */ - if (seen & SEEN_DATAREF) { + if (seen_or_pass0 & SEEN_DATAREF) { if (offsetof(struct sk_buff, len) <= 127) /* mov off8(%rdi),%r9d */ EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); @@ -260,9 +261,14 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ALU_DIV_X: /* A /= X; */ seen |= SEEN_XREG; EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (pc_ret0 != -1) - EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4)); - else { + if (pc_ret0 > 0) { + /* addrs[pc_ret0 - 1] is start address of target + * (addrs[i] - 4) is the address following this jmp + * ("xor %edx,%edx; div %ebx" being 4 bytes long) + */ + EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - + (addrs[i] - 4)); + } else { EMIT_COND_JMP(X86_JNE, 2 + 5); CLEAR_A(); EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ @@ -283,7 +289,7 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT2(0x24, K & 0xFF); /* and imm8,%al */ } else if (K >= 0xFFFF0000) { EMIT2(0x66, 0x25); /* and imm16,%ax */ - EMIT2(K, 2); + EMIT(K, 2); } else { EMIT1_off32(0x25, K); /* and imm32,%eax */ } @@ -335,12 +341,12 @@ void bpf_jit_compile(struct sk_filter *fp) } /* fallinto */ case BPF_S_RET_A: - if (seen) { + if (seen_or_pass0) { if (i != flen - 1) { EMIT_JMP(cleanup_addr - addrs[i]); break; } - if (seen & SEEN_XREG) + if (seen_or_pass0 & SEEN_XREG) EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ EMIT1(0xc9); /* leaveq */ } @@ -469,8 +475,10 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_LD_W_ABS: func = sk_load_word; common_load: seen |= SEEN_DATAREF; - if ((int)K < 0) + if ((int)K < 0) { + /* Abort the JIT because __load_pointer() is needed. */ goto out; + } t_offset = func - (image + addrs[i]); EMIT1_off32(0xbe, K); /* mov imm32,%esi */ EMIT1_off32(0xe8, t_offset); /* call */ @@ -483,13 +491,8 @@ common_load: seen |= SEEN_DATAREF; goto common_load; case BPF_S_LDX_B_MSH: if ((int)K < 0) { - if (pc_ret0 != -1) { - EMIT_JMP(addrs[pc_ret0] - addrs[i]); - break; - } - CLEAR_A(); - EMIT_JMP(cleanup_addr - addrs[i]); - break; + /* Abort the JIT because __load_pointer() is needed. */ + goto out; } seen |= SEEN_DATAREF | SEEN_XREG; t_offset = sk_load_byte_msh - (image + addrs[i]); @@ -568,8 +571,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; break; } if (filter[i].jt != 0) { - if (filter[i].jf) - t_offset += is_near(f_offset) ? 2 : 6; + if (filter[i].jf && f_offset) + t_offset += is_near(f_offset) ? 2 : 5; EMIT_COND_JMP(t_op, t_offset); if (filter[i].jf) EMIT_JMP(f_offset); @@ -599,13 +602,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; * use it to give the cleanup instruction(s) addr */ cleanup_addr = proglen - 1; /* ret */ - if (seen) + if (seen_or_pass0) cleanup_addr -= 1; /* leaveq */ - if (seen & SEEN_XREG) + if (seen_or_pass0 & SEEN_XREG) cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ if (image) { - WARN_ON(proglen != oldproglen); + if (proglen != oldproglen) + pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen); break; } if (proglen == oldproglen) { diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index cdfe4c5..f148cf6 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -21,6 +21,7 @@ extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +static int nmi_timer; int __init oprofile_arch_init(struct oprofile_operations *ops) { @@ -31,8 +32,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) #ifdef CONFIG_X86_LOCAL_APIC ret = op_nmi_init(ops); #endif + nmi_timer = (ret != 0); #ifdef CONFIG_X86_IO_APIC - if (ret < 0) + if (nmi_timer) ret = op_nmi_timer_init(ops); #endif ops->backtrace = x86_backtrace; @@ -44,6 +46,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) void oprofile_arch_exit(void) { #ifdef CONFIG_X86_LOCAL_APIC - op_nmi_exit(); + if (!nmi_timer) + op_nmi_exit(); #endif } diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 6b8759f..d24d3da 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -18,8 +18,9 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_MRST) += mrst.o obj-y += common.o early.o -obj-y += amd_bus.o bus_numa.o +obj-y += bus_numa.o +obj-$(CONFIG_AMD_NB) += amd_bus.o obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o ifeq ($(CONFIG_PCI_DEBUG),y) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 50b3f14..0473a8f 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -54,6 +54,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), }, }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */ + { + .callback = set_use_crs, + .ident = "MSI MS-7253", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_BOARD_NAME, "MS-7253"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + }, + }, {} }; @@ -149,7 +159,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data) struct acpi_resource_address64 addr; acpi_status status; unsigned long flags; - u64 start, end; + u64 start, orig_end, end; status = resource_to_addr(acpi_res, &addr); if (!ACPI_SUCCESS(status)) @@ -165,7 +175,21 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; start = addr.minimum + addr.translation_offset; - end = addr.maximum + addr.translation_offset; + orig_end = end = addr.maximum + addr.translation_offset; + + /* Exclude non-addressable range or non-addressable portion of range */ + end = min(end, (u64)iomem_resource.end); + if (end <= start) { + dev_info(&info->bridge->dev, + "host bridge window [%#llx-%#llx] " + "(ignored, not CPU addressable)\n", start, orig_end); + return AE_OK; + } else if (orig_end != end) { + dev_info(&info->bridge->dev, + "host bridge window [%#llx-%#llx] " + "([%#llx-%#llx] ignored, not CPU addressable)\n", + start, orig_end, end + 1, orig_end); + } res = &info->res[info->res_num]; res->name = info->name; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 026e493..385a940 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -30,34 +30,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = { { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, }; -static u64 __initdata fam10h_mmconf_start; -static u64 __initdata fam10h_mmconf_end; -static void __init get_pci_mmcfg_amd_fam10h_range(void) -{ - u32 address; - u64 base, msr; - unsigned segn_busn_bits; - - /* assume all cpus from fam10h have mmconf */ - if (boot_cpu_data.x86 < 0x10) - return; - - address = MSR_FAM10H_MMIO_CONF_BASE; - rdmsrl(address, msr); - - /* mmconfig is not enable */ - if (!(msr & FAM10H_MMIO_CONF_ENABLE)) - return; - - base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); - - segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & - FAM10H_MMIO_CONF_BUSRANGE_MASK; - - fam10h_mmconf_start = base; - fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; -} - #define RANGE_NUM 16 /** @@ -85,6 +57,9 @@ static int __init early_fill_mp_bus_info(void) u64 val; u32 address; bool found; + struct resource fam10h_mmconf_res, *fam10h_mmconf; + u64 fam10h_mmconf_start; + u64 fam10h_mmconf_end; if (!early_pci_allowed()) return -1; @@ -211,12 +186,17 @@ static int __init early_fill_mp_bus_info(void) subtract_range(range, RANGE_NUM, 0, end); /* get mmconfig */ - get_pci_mmcfg_amd_fam10h_range(); + fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res); /* need to take out mmconf range */ - if (fam10h_mmconf_end) { - printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); + if (fam10h_mmconf) { + printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf); + fam10h_mmconf_start = fam10h_mmconf->start; + fam10h_mmconf_end = fam10h_mmconf->end; subtract_range(range, RANGE_NUM, fam10h_mmconf_start, fam10h_mmconf_end + 1); + } else { + fam10h_mmconf_start = 0; + fam10h_mmconf_end = 0; } /* mmio resource */ diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index f567965..6e96e65 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -308,7 +308,7 @@ int __init pci_xen_init(void) int __init pci_xen_hvm_init(void) { - if (!xen_feature(XENFEAT_hvm_pirqs)) + if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs)) return 0; #ifdef CONFIG_ACPI diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index 7000e74..fe73276 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -678,36 +678,40 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) pentry = (struct sfi_device_table_entry *)sb->pentry; for (i = 0; i < num; i++, pentry++) { - if (pentry->irq != (u8)0xff) { /* native RTE case */ + int irq = pentry->irq; + + if (irq != (u8)0xff) { /* native RTE case */ /* these SPI2 devices are not exposed to system as PCI * devices, but they have separate RTE entry in IOAPIC * so we have to enable them one by one here */ - ioapic = mp_find_ioapic(pentry->irq); + ioapic = mp_find_ioapic(irq); irq_attr.ioapic = ioapic; - irq_attr.ioapic_pin = pentry->irq; + irq_attr.ioapic_pin = irq; irq_attr.trigger = 1; irq_attr.polarity = 1; - io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr); - } + io_apic_set_pci_routing(NULL, irq, &irq_attr); + } else + irq = 0; /* No irq */ + switch (pentry->type) { case SFI_DEV_TYPE_IPC: /* ID as IRQ is a hack that will go away */ - pdev = platform_device_alloc(pentry->name, pentry->irq); + pdev = platform_device_alloc(pentry->name, irq); if (pdev == NULL) { pr_err("out of memory for SFI platform device '%s'.\n", pentry->name); continue; } - install_irq_resource(pdev, pentry->irq); + install_irq_resource(pdev, irq); pr_debug("info[%2d]: IPC bus, name = %16.16s, " - "irq = 0x%2x\n", i, pentry->name, pentry->irq); + "irq = 0x%2x\n", i, pentry->name, irq); sfi_handle_ipc_dev(pdev); break; case SFI_DEV_TYPE_SPI: memset(&spi_info, 0, sizeof(spi_info)); strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN); - spi_info.irq = pentry->irq; + spi_info.irq = irq; spi_info.bus_num = pentry->host_num; spi_info.chip_select = pentry->addr; spi_info.max_speed_hz = pentry->max_freq; @@ -724,7 +728,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) memset(&i2c_info, 0, sizeof(i2c_info)); bus = pentry->host_num; strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN); - i2c_info.irq = pentry->irq; + i2c_info.irq = irq; i2c_info.addr = pentry->addr; pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, " "irq = 0x%2x, addr = 0x%x\n", i, bus, diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 68e467f..edf435b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -115,9 +115,6 @@ early_param("nobau", setup_nobau); /* base pnode in this partition */ static int uv_base_pnode __read_mostly; -/* position of pnode (which is nasid>>1): */ -static int uv_nshift __read_mostly; -static unsigned long uv_mmask __read_mostly; static DEFINE_PER_CPU(struct ptc_stats, ptcstats); static DEFINE_PER_CPU(struct bau_control, bau_control); @@ -1426,7 +1423,7 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) { int i; int cpu; - unsigned long pa; + unsigned long gpa; unsigned long m; unsigned long n; size_t dsize; @@ -1442,9 +1439,9 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); BUG_ON(!bau_desc); - pa = uv_gpa(bau_desc); /* need the real nasid*/ - n = pa >> uv_nshift; - m = pa & uv_mmask; + gpa = uv_gpa(bau_desc); + n = uv_gpa_to_gnode(gpa); + m = uv_gpa_to_offset(gpa); /* the 14-bit pnode */ write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); @@ -1516,9 +1513,9 @@ static void pq_init(int node, int pnode) bcp->queue_last = pqp + (DEST_Q_SIZE - 1); } /* - * need the pnode of where the memory was really allocated + * need the gnode of where the memory was really allocated */ - pn = uv_gpa(pqp) >> uv_nshift; + pn = uv_gpa_to_gnode(uv_gpa(pqp)); first = uv_physnodeaddr(pqp); pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); @@ -1578,14 +1575,14 @@ static int calculate_destination_timeout(void) ts_ns = base * mult1 * mult2; ret = ts_ns / 1000; } else { - /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ - mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); + /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ + mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL); mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) - mult1 = 80; + base = 80; else - mult1 = 10; - base = mmr_image & UV2_ACK_MASK; + base = 10; + mult1 = mmr_image & UV2_ACK_MASK; ret = mult1 * base; } return ret; @@ -1812,8 +1809,6 @@ static int __init uv_bau_init(void) zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); } - uv_nshift = uv_hub_info->m_val; - uv_mmask = (1UL << uv_hub_info->m_val) - 1; nuvhubs = uv_num_possible_blades(); spin_lock_init(&disable_lock); congested_cycles = usec_2_cycles(congested_respns_us); @@ -1825,6 +1820,8 @@ static int __init uv_bau_init(void) uv_base_pnode = uv_blade_to_pnode(uvhub); } + enable_timeouts(); + if (init_per_cpu(nuvhubs, uv_base_pnode)) { nobau = 1; return 0; @@ -1835,7 +1832,6 @@ static int __init uv_bau_init(void) if (uv_blade_nr_possible_cpus(uvhub)) init_uvhub(uvhub, vector, uv_base_pnode); - enable_timeouts(); alloc_intr_gate(vector, uv_bau_message_intr1); for_each_possible_blade(uvhub) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 67d69f1..0fb662a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1337,7 +1337,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + xen_vcpu_setup(cpu); if (xen_have_vector_callback) xen_init_lock_cpu(cpu); break; @@ -1367,7 +1367,6 @@ static void __init xen_hvm_guest_init(void) xen_hvm_smp_init(); register_cpu_notifier(&xen_hvm_cpu_notifier); xen_unplug_emulated_devices(); - have_vcpu_info_placement = 0; x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index acea42e..f8dcda4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -192,9 +192,21 @@ static unsigned long __init xen_get_max_pages(void) domid_t domid = DOMID_SELF; int ret; - ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); - if (ret > 0) - max_pages = ret; + /* + * For the initial domain we use the maximum reservation as + * the maximum page. + * + * For guest domains the current maximum reservation reflects + * the current maximum rather than the static maximum. In this + * case the e820 map provided to us will cover the static + * maximum region. + */ + if (xen_initial_domain()) { + ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid); + if (ret > 0) + max_pages = ret; + } + return min(max_pages, MAX_DOMAIN_PAGES); } |