From fb87ec382f9d95469df494bdee9db922594f5cd4 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 19 Jan 2011 20:20:56 -0500 Subject: x86: Update CPU cache attributes table descriptors Update to latest definitions in: http://www.intel.com/Assets/PDF/appnote/241618.pdf [ Note, this update of the doc has removed some old values which we have listed. I think until we have clarification that they were never used in production, they should be left there. ] Signed-off-by: Dave Jones Cc: Arjan van de Ven Cc: "H. Peter Anvin" LKML-Reference: <20110120012055.GA15985@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 7283e98..ec2c19a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -45,6 +45,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ + { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ @@ -66,6 +67,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ + { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */ { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ @@ -87,6 +89,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ + { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */ { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ -- cgit v1.1 From cc67ba6352ecbf6891083a18f8c78fa639ebd274 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 20 Jan 2011 20:32:14 +0900 Subject: x86: Use asm-generic/cacheflush.h The implementation of the cache flushing interfaces on the x86 is identical with the default implementation in asm-generic. Signed-off-by: Akinobu Mita Cc: "H. Peter Anvin" Cc: arnd@arndb.de LKML-Reference: <1295523136-4277-2-git-send-email-akinobu.mita@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cacheflush.h | 42 +-------------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 63e35ec..62f0844 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -1,48 +1,8 @@ #ifndef _ASM_X86_CACHEFLUSH_H #define _ASM_X86_CACHEFLUSH_H -/* Keep includes the same across arches. */ -#include - /* Caches aren't brain-dead on the intel. */ -static inline void flush_cache_all(void) { } -static inline void flush_cache_mm(struct mm_struct *mm) { } -static inline void flush_cache_dup_mm(struct mm_struct *mm) { } -static inline void flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) { } -static inline void flush_cache_page(struct vm_area_struct *vma, - unsigned long vmaddr, unsigned long pfn) { } -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -static inline void flush_dcache_page(struct page *page) { } -static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } -static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } -static inline void flush_icache_range(unsigned long start, - unsigned long end) { } -static inline void flush_icache_page(struct vm_area_struct *vma, - struct page *page) { } -static inline void flush_icache_user_range(struct vm_area_struct *vma, - struct page *page, - unsigned long addr, - unsigned long len) { } -static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } -static inline void flush_cache_vunmap(unsigned long start, - unsigned long end) { } - -static inline void copy_to_user_page(struct vm_area_struct *vma, - struct page *page, unsigned long vaddr, - void *dst, const void *src, - unsigned long len) -{ - memcpy(dst, src, len); -} - -static inline void copy_from_user_page(struct vm_area_struct *vma, - struct page *page, unsigned long vaddr, - void *dst, const void *src, - unsigned long len) -{ - memcpy(dst, src, len); -} +#include #ifdef CONFIG_X86_PAT /* -- cgit v1.1 From f21bbec9ffcaf73e99938209e52b757d93f9133b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Jan 2011 20:12:40 -0800 Subject: x86, mcheck, therm_throt.c: Export symbol platform_thermal_notify to allow coretemp to handler intr In therm_throt.c, commit 9e76a97efd31a08cb19d0ba12013b8fb4ad3e474 patch doesn't export the symbol platform_thermal_notify. Other drivers (e.g. drivers/hwmon/coretemp.c) can not find the symbol platform_thermal_notify when defining threshould interrupt handler. Please apply this patch to allow threshold interrupt handler in coretemp. Signed-off-by: Fenghua Yu Cc: R Durgadoss Cc: khali@linux-fr.org Cc: lm-sensors@lm-sensors.org Cc: Guenter Roeck LKML-Reference: <20110121041239.GB26954@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index e12246f..6f8c5e9 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -59,6 +59,7 @@ struct thermal_state { /* Callback to handle core threshold interrupts */ int (*platform_thermal_notify)(__u64 msr_val); +EXPORT_SYMBOL(platform_thermal_notify); static DEFINE_PER_CPU(struct thermal_state, thermal_state); -- cgit v1.1 From 93789b32dbf355e70f18b17a82e8661677a7f7fb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 20 Jan 2011 15:42:52 +0100 Subject: x86, hotplug: Fix powersavings with offlined cores on AMD ea53069231f9317062910d6e772cca4ce93de8c8 made a CPU use monitor/mwait when offline. This is not the optimal choice for AMD wrt to powersavings and we'd prefer our cores to halt (i.e. enter C1) instead. For this, the same selection whether to use monitor/mwait has to be used as when we select the idle routine for the machine. With this patch, offlining cores 1-5 on a X6 machine allows core0 to boost again. [ hpa: putting this in urgent since it is a (power) regression fix ] Reported-by: Andreas Herrmann Cc: stable@kernel.org # 37.x Cc: H. Peter Anvin Cc: Arjan van de Ven Cc: Len Brown Cc: Venkatesh Pallipadi Cc: Peter Zijlstra Signed-off-by: Borislav Petkov LKML-Reference: <1295534572-10730-1-git-send-email-bp@amd64.org> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/cpu.h | 1 + arch/x86/kernel/process.c | 3 ++- arch/x86/kernel/smpboot.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 4fab24d..6e6e755 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -32,5 +32,6 @@ extern void arch_unregister_cpu(int); DECLARE_PER_CPU(int, cpu_state); +int __cpuinit mwait_usable(const struct cpuinfo_x86 *); #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d8286ed..e764fc0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -505,7 +506,7 @@ static void poll_idle(void) #define MWAIT_ECX_EXTENDED_INFO 0x01 #define MWAIT_EDX_C1 0xf0 -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 763df77..0cbe8c0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1402,8 +1402,9 @@ static inline void mwait_play_dead(void) unsigned int highest_subcstate = 0; int i; void *mwait_ptr; + struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT)) + if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c))) return; if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) return; -- cgit v1.1 From 8969691343354bdd80eff5405a0f879edbf013d6 Mon Sep 17 00:00:00 2001 From: matthieu castet Date: Sun, 23 Jan 2011 15:45:52 +0100 Subject: x86: Fix jump label with RO/NX module protection crash If we use jump table in module init, there are marked as removed in __jump_table section after init is done. But we already applied ro permissions on the module, so we can't modify a read only section (crash in remove_jump_label_module_init). Make the __jump_table section rw. Signed-off-by: Matthieu CASTET Cc: Xiaotian Feng Cc: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arjan van de Ven Cc: Siarhei Liakh Cc: Xuxian Jiang Cc: James Morris Cc: Rusty Russell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4D3C3F20.7030203@free.fr> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/jump_label.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index f52d42e..574dbc2 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -14,7 +14,7 @@ do { \ asm goto("1:" \ JUMP_LABEL_INITIAL_NOP \ - ".pushsection __jump_table, \"a\" \n\t"\ + ".pushsection __jump_table, \"aw\" \n\t"\ _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ ".popsection \n\t" \ : : "i" (key) : : label); \ -- cgit v1.1 From 2e5aa6824d9e0248d734573dad8858a2cc279cfe Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 24 Jan 2011 22:41:11 +0100 Subject: x86-64: Don't use pointer to out-of-scope variable in dump_trace() In arch/x86/kernel/dumpstack_64.c::dump_trace() we have this code: ... if (!stack) { unsigned long dummy; stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; } bp = stack_frame(task, regs); /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested * exceptions */ tinfo = task_thread_info(task); for (;;) { char *id; unsigned long *estack_end; estack_end = in_exception_stack(cpu, (unsigned long)stack, &used, &id); ... You'll notice that we assign to 'stack' the address of the variable 'dummy' which is only in-scope inside the 'if (!stack)'. So when we later access stack (at the end of the above, and assuming we did not take the 'if (task && task != current)' branch) we'll be using the address of a variable that is no longer in scope. I believe this patch is the proper fix, but I freely admit that I'm not 100% certain. Signed-off-by: Jesper Juhl LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/dumpstack_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6410133..a6b6fcf 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -149,13 +149,13 @@ void dump_trace(struct task_struct *task, unsigned used = 0; struct thread_info *tinfo; int graph = 0; + unsigned long dummy; unsigned long bp; if (!task) task = current; if (!stack) { - unsigned long dummy; stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; -- cgit v1.1 From cacf061c5e42a040200463afccd9178ace680322 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 25 Jan 2011 15:07:09 -0800 Subject: thp: fix PARAVIRT x86 32bit noPAE This fixes TRANSPARENT_HUGEPAGE=y with PARAVIRT=y and HIGHMEM64=n. The #ifdef that this patch removes was erratically introduced to fix a build error for noPAE (where pmd.pmd doesn't exist). So then the kernel built but it failed at runtime because set_pmd_at was a noop. This will correct it by enabling set_pmd_at for noPAE mode too. Signed-off-by: Andrea Arcangeli Reported-by: werner Reported-by: Minchan Kim Tested-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/paravirt.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 2071a8b..ebbc4d8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -558,13 +558,12 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { -#if PAGETABLE_LEVELS >= 3 if (sizeof(pmdval_t) > sizeof(long)) /* 5 arg words */ pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd); else - PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, pmd.pmd); -#endif + PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, + native_pmd_val(pmd)); } #endif -- cgit v1.1 From 9a57c3e487d25f69715705dfeef6eb9e4d666ad7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 24 Jan 2011 17:13:53 -0800 Subject: x86: Remove left over system_64.h Left-over from the x86 merge ... Signed-off-by: Yinghai Lu LKML-Reference: <4D3E23D1.7010405@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system_64.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 arch/x86/include/asm/system_64.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system_64.h b/arch/x86/include/asm/system_64.h deleted file mode 100644 index 1159e09..0000000 --- a/arch/x86/include/asm/system_64.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _ASM_X86_SYSTEM_64_H -#define _ASM_X86_SYSTEM_64_H - -#include -#include - - -static inline unsigned long read_cr8(void) -{ - unsigned long cr8; - asm volatile("movq %%cr8,%0" : "=r" (cr8)); - return cr8; -} - -static inline void write_cr8(unsigned long val) -{ - asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); -} - -#include - -#endif /* _ASM_X86_SYSTEM_64_H */ -- cgit v1.1 From 889a7a6a5d5e64063effd40056bdc7b8fb336bd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jan 2011 17:31:54 +0100 Subject: percpu, x86: Fix percpu_xchg_op() These recent percpu commits: 2485b6464cf8: x86,percpu: Move out of place 64 bit ops into X86_64 section 8270137a0d50: cpuops: Use cmpxchg for xchg to avoid lock semantics Caused this 'perf top' crash: Kernel panic - not syncing: Fatal exception in interrupt Pid: 0, comm: swapper Tainted: G D 2.6.38-rc2-00181-gef71723 #413 Call Trace: [] ? panic ? kmsg_dump ? kmsg_dump ? oops_end ? no_context ? __bad_area_nosemaphore ? perf_output_begin ? bad_area_nosemaphore ? do_page_fault ? __task_pid_nr_ns ? perf_event_tid ? __perf_event_header__init_id ? validate_chain ? perf_output_sample ? trace_hardirqs_off ? page_fault ? irq_work_run ? update_process_times ? tick_sched_timer ? tick_sched_timer ? __run_hrtimer ? hrtimer_interrupt ? account_system_vtime ? smp_apic_timer_interrupt ? apic_timer_interrupt ... Looking at assembly code, I found: list = this_cpu_xchg(irq_work_list, NULL); gives this wrong code : (gcc-4.1.2 cross compiler) ffffffff810bc45e: mov %gs:0xead0,%rax cmpxchg %rax,%gs:0xead0 jne ffffffff810bc45e test %rax,%rax je ffffffff810bc4aa Tell gcc we dirty eax/rax register in percpu_xchg_op() Compiler must use another register to store pxo_new__ We also dont need to reload percpu value after a jump, since a 'failed' cmpxchg already updated eax/rax Wrong generated code was : xor %rax,%rax /* load 0 into %rax */ 1: mov %gs:0xead0,%rax cmpxchg %rax,%gs:0xead0 jne 1b test %rax,%rax After patch : xor %rdx,%rdx /* load 0 into %rdx */ mov %gs:0xead0,%rax 1: cmpxchg %rdx,%gs:0xead0 jne 1b: test %rax,%rax Signed-off-by: Eric Dumazet Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Tejun Heo LKML-Reference: <1295973114.3588.312.camel@edumazet-laptop> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3788f46..7e17295 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -273,34 +273,34 @@ do { \ typeof(var) pxo_new__ = (nval); \ switch (sizeof(var)) { \ case 1: \ - asm("\n1:mov "__percpu_arg(1)",%%al" \ - "\n\tcmpxchgb %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%al" \ + "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "q" (pxo_new__) \ : "memory"); \ break; \ case 2: \ - asm("\n1:mov "__percpu_arg(1)",%%ax" \ - "\n\tcmpxchgw %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%ax" \ + "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ case 4: \ - asm("\n1:mov "__percpu_arg(1)",%%eax" \ - "\n\tcmpxchgl %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%eax" \ + "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ case 8: \ - asm("\n1:mov "__percpu_arg(1)",%%rax" \ - "\n\tcmpxchgq %2, "__percpu_arg(1) \ + asm("\n\tmov "__percpu_arg(1)",%%rax" \ + "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ - : "=a" (pxo_ret__), "+m" (var) \ + : "=&a" (pxo_ret__), "+m" (var) \ : "r" (pxo_new__) \ : "memory"); \ break; \ -- cgit v1.1 From cf04d120d9413de581437cf9a29f138ec1178f65 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Thu, 27 Jan 2011 10:03:14 -0500 Subject: xen/p2m: Mark INVALID_P2M_ENTRY the mfn_list past max_pfn. In case the mfn_list does not have enough entries to fill a p2m page we do not want the entries from max_pfn up to the boundary to be filled with unknown values. Hence set them to INVALID_P2M_ENTRY. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index ddc81a0..fd12d7c 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -241,21 +241,15 @@ void __init xen_build_dynamic_phys_to_machine(void) * As long as the mfn_list has enough entries to completely * fill a p2m page, pointing into the array is ok. But if * not the entries beyond the last pfn will be undefined. - * And guessing that the 'what-ever-there-is' does not take it - * too kindly when changing it to invalid markers, a new page - * is allocated, initialized and filled with the valid part. */ if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { unsigned long p2midx; - unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m); - - for (p2midx = 0; pfn + p2midx < max_pfn; p2midx++) { - p2m[p2midx] = mfn_list[pfn + p2midx]; - } - p2m_top[topidx][mididx] = p2m; - } else - p2m_top[topidx][mididx] = &mfn_list[pfn]; + + p2midx = max_pfn % P2M_PER_PAGE; + for ( ; p2midx < P2M_PER_PAGE; p2midx++) + mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; + } + p2m_top[topidx][mididx] = &mfn_list[pfn]; } m2p_override_init(); -- cgit v1.1 From 7cb31b752c71e0bd405c1139e1907c3335877dff Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 27 Jan 2011 10:13:25 -0500 Subject: xen/e820: Guard against E820_RAM not having page-aligned size or start. Under Dell Inspiron 1525, and Intel SandyBridge SDP's the BIOS e820 RAM is not page-aligned: [ 0.000000] Xen: 0000000000100000 - 00000000df66d800 (usable) We were not handling that and ended up setting up a pagetable that included up to df66e000 with the disastrous effect that when memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); tried to clear the page it would crash at the 2K mark. Initially reported by Michael Young @ http://lists.xensource.com/archives/html/xen-devel/2011-01/msg00108.html The fix is to page-align the size and also take into consideration the start of the E820 (in case that is not page-aligned either). This fixes the bootup failure on those affected machines. This patch is a rework of the Micheal A Young initial patch and considers the case if the start is not page-aligned. Reported-by: Michael A Young Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Michael A Young --- arch/x86/xen/setup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index b5a7f92..75bdf2a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -179,8 +179,13 @@ char * __init xen_memory_setup(void) e820.nr_map = 0; xen_extra_mem_start = mem_end; for (i = 0; i < memmap.nr_entries; i++) { - unsigned long long end = map[i].addr + map[i].size; + unsigned long long end; + /* Guard against non-page aligned E820 entries. */ + if (map[i].type == E820_RAM) + map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; + + end = map[i].addr + map[i].size; if (map[i].type == E820_RAM && end > mem_end) { /* RAM off the end - may be partially included */ u64 delta = min(map[i].size, end - mem_end); -- cgit v1.1 From 23febeddbe67e5160929f7c48f7bfe83c2eecb99 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 26 Jan 2011 17:07:27 +0000 Subject: xen/setup: Route halt operations to safe_halt pvop. With this patch, the cpuidle driver does not load and does not issue the mwait operations. Instead the hypervisor is doing them (b/c we call the safe_halt pvops call). This fixes quite a lot of bootup issues wherein the user had to force interrupts for the continuation of the bootup. Details are discussed in: http://lists.xensource.com/archives/html/xen-devel/2011-01/msg00535.html [v2: Wrote the commit description] Reported-by: Daniel De Graaf Tested-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 75bdf2a..a8a66a5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -355,6 +355,7 @@ void __init xen_arch_setup(void) boot_cpu_data.hlt_works_ok = 1; #endif pm_idle = default_idle; + boot_option_idle_override = IDLE_HALT; fiddle_vdso(); } -- cgit v1.1 From d038b12c6d773a4b9f69ca5243773bf6314f7ee9 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 25 Jan 2011 17:32:01 +0200 Subject: perf: Fix Pentium4 raw event validation This patch fixes some issues with raw event validation on Pentium 4 (Netburst) based processors. As I was testing libpfm4 Netburst support, I ran into two problems in the p4_validate_raw_event() function: - the shared field must be checked ONLY when HT is on - the binding to ESCR register was missing The second item was causing raw events to not be encoded correctly compared to generic PMU events. With this patch, I can now pass Netburst events to libpfm4 examples and get meaningful results: $ task -e global_power_events:running:u noploop 1 noploop for 1 seconds 3,206,304,898 global_power_events:running Signed-off-by: Stephane Eranian Acked-by: Cyrill Gorcunov Cc: peterz@infradead.org Cc: paulus@samba.org Cc: davem@davemloft.net Cc: fweisbec@gmail.com Cc: perfmon2-devel@lists.sf.net Cc: eranian@gmail.com Cc: robert.richter@amd.com Cc: acme@redhat.com Cc: gorcunov@gmail.com Cc: ming.m.lin@intel.com LKML-Reference: <4d3efb2f.1252d80a.1a80.ffffc83f@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index e56b9bf..f7a0993 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -682,7 +682,7 @@ static int p4_validate_raw_event(struct perf_event *event) * if an event is shared accross the logical threads * the user needs special permissions to be able to use it */ - if (p4_event_bind_map[v].shared) { + if (p4_ht_active() && p4_event_bind_map[v].shared) { if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) return -EACCES; } @@ -727,7 +727,8 @@ static int p4_hw_config(struct perf_event *event) event->hw.config = p4_set_ht_bit(event->hw.config); if (event->attr.type == PERF_TYPE_RAW) { - + struct p4_event_bind *bind; + unsigned int esel; /* * Clear bits we reserve to be managed by kernel itself * and never allowed from a user space @@ -743,6 +744,13 @@ static int p4_hw_config(struct perf_event *event) * bits since we keep additional info here (for cache events and etc) */ event->hw.config |= event->attr.config; + bind = p4_config_get_bind(event->attr.config); + if (!bind) { + rc = -EINVAL; + goto out; + } + esel = P4_OPCODE_ESEL(bind->opcode); + event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); } rc = x86_setup_perfctr(event); -- cgit v1.1 From f12d3d04e8f6223276abb068c5d72852174b8c31 Mon Sep 17 00:00:00 2001 From: Matthieu CASTET Date: Thu, 20 Jan 2011 21:11:45 +0100 Subject: x86, nx: Don't force pages RW when setting NX bits Xen want page table pages read only. But the initial page table (from head_*.S) live in .data or .bss. That was broken by 64edc8ed5ffae999d8d413ba006850e9e34166cb. There is absolutely no reason to force these pages RW after they have already been marked RO. Signed-off-by: Matthieu CASTET Tested-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8b830ca..d343b3c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -256,7 +256,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, unsigned long pfn) { pgprot_t forbidden = __pgprot(0); - pgprot_t required = __pgprot(0); /* * The BIOS area between 640k and 1Mb needs to be executable for @@ -282,12 +281,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; - /* - * .data and .bss should always be writable. - */ - if (within(address, (unsigned long)_sdata, (unsigned long)_edata) || - within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop)) - pgprot_val(required) |= _PAGE_RW; #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) /* @@ -327,7 +320,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, #endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); - prot = __pgprot(pgprot_val(prot) | pgprot_val(required)); return prot; } -- cgit v1.1 From f7448548a9f32db38f243ccd4271617758ddfe2c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 2 Feb 2011 17:02:55 -0800 Subject: x86, mtrr: Avoid MTRR reprogramming on BP during boot on UP platforms Markus Kohn ran into a hard hang regression on an acer aspire 1310, when acpi is enabled. git bisect showed the following commit as the bad one that introduced the boot regression. commit d0af9eed5aa91b6b7b5049cae69e5ea956fd85c3 Author: Suresh Siddha Date: Wed Aug 19 18:05:36 2009 -0700 x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT init Because of the UP configuration of that platform, native_smp_prepare_cpus() bailed out (in smp_sanity_check()) before doing the set_mtrr_aps_delayed_init() Further down the boot path, native_smp_cpus_done() will call the delayed MTRR initialization for the AP's (mtrr_aps_init()) with mtrr_aps_delayed_init not set. This resulted in the boot processor reprogramming its MTRR's to the values seen during the start of the OS boot. While this is not needed ideally, this shouldn't have caused any side-effects. This is because the reprogramming of MTRR's (set_mtrr_state() that gets called via set_mtrr()) will check if the live register contents are different from what is being asked to write and will do the actual write only if they are different. BP's mtrr state is read during the start of the OS boot and typically nothing would have changed when we ask to reprogram it on BP again because of the above scenario on an UP platform. So on a normal UP platform no reprogramming of BP MTRR MSR's happens and all is well. However, on this platform, bios seems to be modifying the fixed mtrr range registers between the start of OS boot and when we double check the live registers for reprogramming BP MTRR registers. And as the live registers are modified, we end up reprogramming the MTRR's to the state seen during the start of the OS boot. During ACPI initialization, something in the bios (probably smi handler?) don't like this fact and results in a hard lockup. We didn't see this boot hang issue on this platform before the commit d0af9eed5aa91b6b7b5049cae69e5ea956fd85c3, because only the AP's (if any) will program its MTRR's to the value that BP had at the start of the OS boot. Fix this issue by checking mtrr_aps_delayed_init before continuing further in the mtrr_aps_init(). Now, only AP's (if any) will program its MTRR's to the BP values during boot. Addresses https://bugzilla.novell.com/show_bug.cgi?id=623393 [ By the way, this behavior of the bios modifying MTRR's after the start of the OS boot is not common and the kernel is not prepared to handle this situation well. Irrespective of this issue, during suspend/resume, linux kernel will try to reprogram the BP's MTRR values to the values seen during the start of the OS boot. So suspend/resume might be already broken on this platform for all linux kernel versions. ] Reported-and-bisected-by: Markus Kohn Tested-by: Markus Kohn Signed-off-by: Suresh Siddha Cc: Thomas Renninger Cc: Rafael Wysocki Cc: Venkatesh Pallipadi Cc: stable@kernel.org # [v2.6.32+] LKML-Reference: <1296694975.4418.402.camel@sbsiddha-MOBL3.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 01c0f3e..bebabec 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -793,13 +793,21 @@ void set_mtrr_aps_delayed_init(void) } /* - * MTRR initialization for all AP's + * Delayed MTRR initialization for all AP's */ void mtrr_aps_init(void) { if (!use_intel()) return; + /* + * Check if someone has requested the delay of AP MTRR initialization, + * by doing set_mtrr_aps_delayed_init(), prior to this point. If not, + * then we are done. + */ + if (!mtrr_aps_delayed_init) + return; + set_mtrr(~0U, 0, 0, 0); mtrr_aps_delayed_init = false; } -- cgit v1.1 From 831d52bc153971b70e64eccfbed2b232394f22f8 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 3 Feb 2011 12:20:04 -0800 Subject: x86, mm: avoid possible bogus tlb entries by clearing prev mm_cpumask after switching mm Clearing the cpu in prev's mm_cpumask early will avoid the flush tlb IPI's while the cr3 is still pointing to the prev mm. And this window can lead to the possibility of bogus TLB fills resulting in strange failures. One such problematic scenario is mentioned below. T1. CPU-1 is context switching from mm1 to mm2 context and got a NMI etc between the point of clearing the cpu from the mm_cpumask(mm1) and before reloading the cr3 with the new mm2. T2. CPU-2 is tearing down a specific vma for mm1 and will proceed with flushing the TLB for mm1. It doesn't send the flush TLB to CPU-1 as it doesn't see that cpu listed in the mm_cpumask(mm1). T3. After the TLB flush is complete, CPU-2 goes ahead and frees the page-table pages associated with the removed vma mapping. T4. CPU-2 now allocates those freed page-table pages for something else. T5. As the CR3 and TLB caches for mm1 is still active on CPU-1, CPU-1 can potentially speculate and walk through the page-table caches and can insert new TLB entries. As the page-table pages are already freed and being used on CPU-2, this page walk can potentially insert a bogus global TLB entry depending on the (random) contents of the page that is being used on CPU-2. T6. This bogus TLB entry being global will be active across future CR3 changes and can result in weird memory corruption etc. To avoid this issue, for the prev mm that is handing over the cpu to another mm, clear the cpu from the mm_cpumask(prev) after the cr3 is changed. Marking it for -stable, though we haven't seen any reported failure that can be attributed to this. Signed-off-by: Suresh Siddha Acked-by: Ingo Molnar Cc: stable@kernel.org [v2.6.32+] Signed-off-by: Linus Torvalds --- arch/x86/include/asm/mmu_context.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4a2d4e0..8b5393e 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -36,8 +36,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); #ifdef CONFIG_SMP percpu_write(cpu_tlbstate.state, TLBSTATE_OK); percpu_write(cpu_tlbstate.active_mm, next); @@ -47,6 +45,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Re-load page tables */ load_cr3(next->pgd); + /* stop flush ipis for the previous mm */ + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + /* * load the LDT, if the LDT is different: */ -- cgit v1.1 From 11d4c3f9b671720e80353dd7e433ff2bf65e9500 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 4 Feb 2011 16:14:11 -0800 Subject: x86-32: Make sure the stack is set up before we use it Since checkin ebba638ae723d8a8fc2f7abce5ec18b688b791d7 we call verify_cpu even in 32-bit mode. Unfortunately, calling a function means using the stack, and the stack pointer was not initialized in the 32-bit setup code! This code initializes the stack pointer, and simplifies the interface slightly since it is easier to rely on just a pointer value rather than a descriptor; we need to have different values for the segment register anyway. This retains start_stack as a virtual address, even though a physical address would be more convenient for 32 bits; the 64-bit code wants the other way around... Reported-by: Matthieu Castet LKML-Reference: <4D41E86D.8060205@free.fr> Tested-by: Kees Cook Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/smp.h | 5 +---- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/head_32.S | 30 +++++++++++++----------------- arch/x86/kernel/smpboot.c | 4 ++-- 4 files changed, 17 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4c2f63c..1f46951 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -40,10 +40,7 @@ DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); /* Static state in head.S used to set up a CPU */ -extern struct { - void *sp; - unsigned short ss; -} stack_start; +extern unsigned long stack_start; /* Initial stack pointer address */ struct smp_ops { void (*smp_prepare_boot_cpu)(void); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 69fd72a..4d9ebba 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -100,7 +100,7 @@ int acpi_save_state_mem(void) #else /* CONFIG_64BIT */ header->trampoline_segment = setup_trampoline() >> 4; #ifdef CONFIG_SMP - stack_start.sp = temp_stack + sizeof(temp_stack); + stack_start = (unsigned long)temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); initial_gs = per_cpu_offset(smp_processor_id()); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index fc293dc..767d6c4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -85,6 +85,8 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) */ __HEAD ENTRY(startup_32) + movl pa(stack_start),%ecx + /* test KEEP_SEGMENTS flag to see if the bootloader is asking us to not reload segments */ testb $(1<<6), BP_loadflags(%esi) @@ -99,7 +101,9 @@ ENTRY(startup_32) movl %eax,%es movl %eax,%fs movl %eax,%gs + movl %eax,%ss 2: + leal -__PAGE_OFFSET(%ecx),%esp /* * Clear BSS first so that there are no surprises... @@ -145,8 +149,6 @@ ENTRY(startup_32) * _brk_end is set up to point to the first "safe" location. * Mappings are created both at virtual address 0 (identity mapping) * and PAGE_OFFSET for up to _end. - * - * Note that the stack is not yet set up! */ #ifdef CONFIG_X86_PAE @@ -282,6 +284,9 @@ ENTRY(startup_32_smp) movl %eax,%es movl %eax,%fs movl %eax,%gs + movl pa(stack_start),%ecx + movl %eax,%ss + leal -__PAGE_OFFSET(%ecx),%esp #endif /* CONFIG_SMP */ default_entry: @@ -347,8 +352,8 @@ default_entry: movl %eax,%cr0 /* ..and set paging (PG) bit */ ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 1: - /* Set up the stack pointer */ - lss stack_start,%esp + /* Shift the stack pointer to a virtual address */ + addl $__PAGE_OFFSET, %esp /* * Initialize eflags. Some BIOS's leave bits like NT set. This would @@ -360,9 +365,7 @@ default_entry: #ifdef CONFIG_SMP cmpb $0, ready - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: + jnz checkCPUtype #endif /* CONFIG_SMP */ /* @@ -470,14 +473,7 @@ is386: movl $2,%ecx # set MP cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder -#ifdef CONFIG_SMP - movb ready, %cl movb $1, ready - cmpb $0,%cl # the first CPU calls start_kernel - je 1f - movl (stack_start), %esp -1: -#endif /* CONFIG_SMP */ jmp *(initial_code) /* @@ -670,15 +666,15 @@ ENTRY(initial_page_table) #endif .data +.balign 4 ENTRY(stack_start) .long init_thread_union+THREAD_SIZE - .long __BOOT_DS - -ready: .byte 0 early_recursion_flag: .long 0 +ready: .byte 0 + int_msg: .asciz "Unknown interrupt or fault at: %p %p %p\n" diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0cbe8c0..03273b6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -638,7 +638,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) * target processor state. */ startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, - (unsigned long)stack_start.sp); + stack_start); /* * Run STARTUP IPI loop. @@ -785,7 +785,7 @@ do_rest: #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; - stack_start.sp = (void *) c_idle.idle->thread.sp; + stack_start = c_idle.idle->thread.sp; /* start_ip had better be page-aligned! */ start_ip = setup_trampoline(); -- cgit v1.1 From d344e38b2c151ca5e5e39f562017127e93912528 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 6 Feb 2011 21:16:09 -0800 Subject: x86, nx: Mark the ACPI resume trampoline code as +x We reserve lowmem for the things that need it, like the ACPI wakeup code, way early to guarantee availability. This happens before we set up the proper pagetables, so set_memory_x() has no effect. Until we have a better solution, use an initcall to mark the wakeup code executable. Originally-by: Matthieu Castet Signed-off-by: H. Peter Anvin Cc: Matthias Hopf Cc: rjw@sisk.pl Cc: Suresh Siddha LKML-Reference: <4D4F8019.2090104@zytor.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/sleep.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 4d9ebba..68d1537 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -12,10 +12,8 @@ #include #include #include - -#ifdef CONFIG_X86_32 #include -#endif +#include #include "realmode/wakeup.h" #include "sleep.h" @@ -149,6 +147,15 @@ void __init acpi_reserve_wakeup_memory(void) memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); } +int __init acpi_configure_wakeup_memory(void) +{ + if (acpi_realmode) + set_memory_x(acpi_realmode, WAKEUP_SIZE >> PAGE_SHIFT); + + return 0; +} +arch_initcall(acpi_configure_wakeup_memory); + static int __init acpi_sleep_setup(char *str) { -- cgit v1.1 From 893a5ab6ee7d51b231ed45aa844f8088642cb6bf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 14 Jan 2011 16:45:01 +0100 Subject: KVM: SVM: Make sure KERNEL_GS_BASE is valid when loading gs_index The gs_index loading code uses the swapgs instruction to switch to the user gs_base temporarily. This is unsave in an lightweight exit-path in KVM on AMD because the KERNEL_GS_BASE MSR is switches lazily. An NMI happening in the critical path of load_gs_index may use the wrong GS_BASE value then leading to unpredictable behavior, e.g. a triple-fault. This patch fixes the issue by making sure that load_gs_index is called only with a valid KERNEL_GS_BASE value loaded in KVM. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 25bd1bc..54ce246 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1150,8 +1150,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) kvm_load_ldt(svm->host.ldt); #ifdef CONFIG_X86_64 loadsegment(fs, svm->host.fs); - load_gs_index(svm->host.gs); wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); + load_gs_index(svm->host.gs); #else loadsegment(gs, svm->host.gs); #endif -- cgit v1.1 From 2fb270f3212a1e6a73f86f76c85caee93aae4386 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 9 Feb 2011 08:21:02 +0000 Subject: x86: Fix section mismatch in LAPIC initialization Additionally doing things conditionally upon smp_processor_id() being zero is generally a bad idea, as this means CPU 0 cannot be offlined and brought back online later again. While there may be other places where this is done, I think adding more of those should be avoided so that some day SMP can really become "symmetrical". Signed-off-by: Jan Beulich Cc: Cyrill Gorcunov LKML-Reference: <4D525C7E0200007800030EE1@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 1 + arch/x86/kernel/apic/apic.c | 9 +++++++-- arch/x86/kernel/smpboot.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 5e3969c..3c89694 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -233,6 +233,7 @@ extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); extern void setup_local_APIC(void); extern void end_local_APIC_setup(void); +extern void bsp_end_local_APIC_setup(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 06c196d..76b96d7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1381,12 +1381,17 @@ void __cpuinit end_local_APIC_setup(void) #endif apic_pm_activate(); +} + +void __init bsp_end_local_APIC_setup(void) +{ + end_local_APIC_setup(); /* * Now that local APIC setup is completed for BP, configure the fault * handling for interrupt remapping. */ - if (!smp_processor_id() && intr_remapping_enabled) + if (intr_remapping_enabled) enable_drhd_fault_handling(); } @@ -1756,7 +1761,7 @@ int __init APIC_init_uniprocessor(void) enable_IO_APIC(); #endif - end_local_APIC_setup(); + bsp_end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config && !skip_ioapic_setup && nr_ioapics) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 03273b6..08776a9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1060,7 +1060,7 @@ static int __init smp_sanity_check(unsigned max_cpus) connect_bsp_APIC(); setup_local_APIC(); - end_local_APIC_setup(); + bsp_end_local_APIC_setup(); return -1; } @@ -1137,7 +1137,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); - end_local_APIC_setup(); + bsp_end_local_APIC_setup(); map_cpu_to_logical_apicid(); -- cgit v1.1 From d91309f69b7bdb64aeb30106fde8d18c5dd354b5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Feb 2011 22:07:46 +0100 Subject: x86: Fix text_poke_smp_batch() deadlock Fix this deadlock - we are already holding the mutex: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.38-rc4-test+ #1 ------------------------------------------------------- bash/1850 is trying to acquire lock: (text_mutex){+.+.+.}, at: [] return_to_handler+0x0/0x2f but task is already holding lock: (smp_alt){+.+...}, at: [] return_to_handler+0x0/0x2f which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (smp_alt){+.+...}: [] lock_acquire+0xcd/0xf8 [] __mutex_lock_common+0x4c/0x339 [] mutex_lock_nested+0x3e/0x43 [] alternatives_smp_switch+0x77/0x1d8 [] do_boot_cpu+0xd7/0x762 [] native_cpu_up+0xe6/0x16a [] _cpu_up+0x9d/0xee [] cpu_up+0xd3/0xe7 [] kernel_init+0xe8/0x20a [] kernel_thread_helper+0x4/0x10 -> #1 (cpu_hotplug.lock){+.+.+.}: [] lock_acquire+0xcd/0xf8 [] __mutex_lock_common+0x4c/0x339 [] mutex_lock_nested+0x3e/0x43 [] get_online_cpus+0x41/0x55 [] stop_machine+0x1e/0x3e [] text_poke_smp_batch+0x3a/0x3c [] arch_optimize_kprobes+0x10d/0x11c [] kprobe_optimizer+0x152/0x222 [] process_one_work+0x1d3/0x335 [] worker_thread+0x104/0x1a4 [] kthread+0x9d/0xa5 [] kernel_thread_helper+0x4/0x10 -> #0 (text_mutex){+.+.+.}: other info that might help us debug this: 6 locks held by bash/1850: #0: (&buffer->mutex){+.+.+.}, at: [] return_to_handler+0x0/0x2f #1: (s_active#75){.+.+.+}, at: [] return_to_handler+0x0/0x2f #2: (x86_cpu_hotplug_driver_mutex){+.+.+.}, at: [] return_to_handler+0x0/0x2f #3: (cpu_add_remove_lock){+.+.+.}, at: [] return_to_handler+0x0/0x2f #4: (cpu_hotplug.lock){+.+.+.}, at: [] return_to_handler+0x0/0x2f #5: (smp_alt){+.+...}, at: [] return_to_handler+0x0/0x2f stack backtrace: Pid: 1850, comm: bash Not tainted 2.6.38-rc4-test+ #1 Call Trace: [] print_circular_bug+0xa8/0xb7 [] mutex_lock_nested+0x3e/0x43 [] alternatives_smp_unlock+0x3d/0x93 [] alternatives_smp_switch+0x198/0x1d8 [] native_cpu_die+0x65/0x95 [] _cpu_down+0x13e/0x202 [] sysfs_write_file+0x108/0x144 [] vfs_write+0xac/0xff [] sys_write+0x4a/0x6e Reported-by: Steven Rostedt Tested-by: Steven Rostedt Signed-off-by: Peter Zijlstra Cc: mathieu.desnoyers@efficios.com Cc: rusty@rustcorp.com.au Cc: ananth@in.ibm.com Cc: masami.hiramatsu.pt@hitachi.com Cc: fweisbec@gmail.com Cc: jbeulich@novell.com Cc: jbaron@redhat.com Cc: mhiramat@redhat.com LKML-Reference: <1297458466.5226.93.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1236085..7038b95 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -671,7 +671,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) atomic_set(&stop_machine_first, 1); wrote_text = 0; - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); + __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); } #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) -- cgit v1.1 From 5117348dea5e3ecbb785cfa1271386fb49332b41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 12 Feb 2011 11:51:03 +0100 Subject: x86: Readd missing irq_to_desc() in fixup_irq() commit a3c08e5d(x86: Convert irq_chip access to new functions) accidentally zapped desc = irq_to_desc(irq); in the vector loop. So we lock some random irq descriptor. Add it back. Signed-off-by: Thomas Gleixner Cc: # .37 --- arch/x86/kernel/irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 52945da..387b6a0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -367,7 +367,8 @@ void fixup_irqs(void) if (irr & (1 << (vector % 32))) { irq = __this_cpu_read(vector_irq[vector]); - data = irq_get_irq_data(irq); + desc = irq_to_desc(irq); + data = &desc->irq_data; raw_spin_lock(&desc->lock); if (data->chip->irq_retrigger) data->chip->irq_retrigger(data); -- cgit v1.1 From 1c9d16e35911090dee3f9313e6af13af623d66ee Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 11 Feb 2011 18:17:54 +0100 Subject: x86: Fix mwait_usable section mismatch We use it in non __cpuinit code now too so drop marker. Signed-off-by: Borislav Petkov LKML-Reference: <20110211171754.GA21047@aftab> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 2 +- arch/x86/kernel/process.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 6e6e755..4564c8e 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -32,6 +32,6 @@ extern void arch_unregister_cpu(int); DECLARE_PER_CPU(int, cpu_state); -int __cpuinit mwait_usable(const struct cpuinfo_x86 *); +int mwait_usable(const struct cpuinfo_x86 *); #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e764fc0..3c189e9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -506,7 +506,7 @@ static void poll_idle(void) #define MWAIT_ECX_EXTENDED_INFO 0x01 #define MWAIT_EDX_C1 0xf0 -int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +int mwait_usable(const struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; -- cgit v1.1 From 678301ecadec24ff77ab310eebf8a32ccddb1850 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Mon, 14 Feb 2011 22:52:38 +0100 Subject: x86, ioapic: Don't warn about non-existing IOAPICs if we have none mp_find_ioapic() prints errors like: ERROR: Unable to locate IOAPIC for GSI 13 if it can't find the IOAPIC that manages that specific GSI. I see errors like that at every boot of a laptop that apparently doesn't have any IOAPICs. But if there are no IOAPICs it doesn't seem to be an error that none can be found. A solution that gets rid of this message is to directly return if nr_ioapics (still) is zero. (But keep returning -1 in that case, so nothing breaks from this change.) The call chain that generates this error is: pnpacpi_allocated_resource() case ACPI_RESOURCE_TYPE_IRQ: pnpacpi_parse_allocated_irqresource() acpi_get_override_irq() mp_find_ioapic() Signed-off-by: Paul Bolle Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 697dc34..ca9e2a35 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4002,6 +4002,9 @@ int mp_find_ioapic(u32 gsi) { int i = 0; + if (nr_ioapics == 0) + return -1; + /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { if ((gsi >= mp_gsi_routing[i].gsi_base) -- cgit v1.1 From 84e383b322e5348db03be54ff64cc6da87003717 Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Mon, 14 Feb 2011 22:47:17 +0000 Subject: x86, dmi, debug: Log board name (when present) in dmesg/oops output The "Type 2" SMBIOS record that contains Board Name is not strictly required and may be absent in the SMBIOS on some platforms. ( Please note that Type 2 is not listed in Table 3 in Sec 6.2 ("Required Structures and Data") of the SMBIOS v2.7 Specification. ) Use the Manufacturer Name (aka System Vendor) name. Print Board Name only when it is present. Before the fix: (i) dmesg output: DMI: /ProLiant DL380 G6, BIOS P62 01/29/2011 (ii) oops output: Pid: 2170, comm: bash Not tainted 2.6.38-rc4+ #3 /ProLiant DL380 G6 After the fix: (i) dmesg output: DMI: HP ProLiant DL380 G6, BIOS P62 01/29/2011 (ii) oops output: Pid: 2278, comm: bash Not tainted 2.6.38-rc4+ #4 HP ProLiant DL380 G6 Signed-off-by: Naga Chumbalkar Reviewed-by: Bjorn Helgaas Cc: # .3x - good for debugging, please apply as far back as it applies cleanly LKML-Reference: <20110214224423.2182.13929.sendpatchset@nchumbalkar.americas.hpqcorp.net> Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3c189e9..ff45541 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -92,21 +92,31 @@ void show_regs(struct pt_regs *regs) void show_regs_common(void) { - const char *board, *product; + const char *vendor, *product, *board; - board = dmi_get_system_info(DMI_BOARD_NAME); - if (!board) - board = ""; + vendor = dmi_get_system_info(DMI_SYS_VENDOR); + if (!vendor) + vendor = ""; product = dmi_get_system_info(DMI_PRODUCT_NAME); if (!product) product = ""; + /* Board Name is optional */ + board = dmi_get_system_info(DMI_BOARD_NAME); + printk(KERN_CONT "\n"); - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", + printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board, product); + init_utsname()->version); + printk(KERN_CONT " "); + printk(KERN_CONT "%s %s", vendor, product); + if (board) { + printk(KERN_CONT "/"); + printk(KERN_CONT "%s", board); + } + printk(KERN_CONT "\n"); } void flush_thread(void) -- cgit v1.1 From 7d44ec193d95416d1342cdd86392a1eeb7461186 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 16 Feb 2011 14:08:02 +0300 Subject: perf, x86: P4 PMU: Fix spurious NMI messages Several people have reported spurious unknown NMI messages on some P4 CPUs. This patch fixes it by checking for an overflow (negative counter values) directly, instead of relying on the P4_CCCR_OVF bit. Reported-by: George Spelvin Reported-by: Meelis Roos Reported-by: Don Zickus Reported-by: Dave Airlie Signed-off-by: Cyrill Gorcunov Cc: Lin Ming Cc: Don Zickus Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 1 + arch/x86/kernel/cpu/perf_event_p4.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index e2f6a99..cc29086 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -22,6 +22,7 @@ #define ARCH_P4_CNTRVAL_BITS (40) #define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) +#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1)) #define P4_ESCR_EVENT_MASK 0x7e000000U #define P4_ESCR_EVENT_SHIFT 25 diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index f7a0993..ff751a9 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -770,9 +770,14 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) return 1; } - /* it might be unflagged overflow */ - rdmsrl(hwc->event_base + hwc->idx, v); - if (!(v & ARCH_P4_CNTRVAL_MASK)) + /* + * In some circumstances the overflow might issue an NMI but did + * not set P4_CCCR_OVF bit. Because a counter holds a negative value + * we simply check for high bit being set, if it's cleared it means + * the counter has reached zero value and continued counting before + * real NMI signal was received: + */ + if (!(v & ARCH_P4_UNFLAGGED_BIT)) return 1; return 0; -- cgit v1.1 From 14796fca2bd22acc73dd0887248d003b0f441d08 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 18 Jan 2011 20:48:27 -0500 Subject: intel_idle: disable NHM/WSM HW C-state auto-demotion Hardware C-state auto-demotion is a mechanism where the HW overrides the OS C-state request, instead demoting to a shallower state, which is less expensive, but saves less power. Modern Linux should generally get exactly the states it requests. In particular, when a CPU is taken off-line, it must not be demoted, else it can prevent the entire package from reaching deep C-states. https://bugzilla.kernel.org/show_bug.cgi?id=25252 Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4d0dfa0..b75eeab 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -36,6 +36,10 @@ #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd +#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) + #define MSR_MTRRcap 0x000000fe #define MSR_IA32_BBL_CR_CTL 0x00000119 -- cgit v1.1 From bfb53ccf1c734b1907df7189eef4c08489827951 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 16 Feb 2011 01:32:48 -0500 Subject: intel_idle: disable Atom/Lincroft HW C-state auto-demotion Just as we had to disable auto-demotion for NHM/WSM, we need to do the same for Atom (Lincroft version). In particular, auto-demotion will prevent Lincroft from entering the S0i3 idle power saving state. https://bugzilla.kernel.org/show_bug.cgi?id=25252 Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b75eeab..43a18c7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -39,6 +39,7 @@ #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) #define MSR_MTRRcap 0x000000fe #define MSR_IA32_BBL_CR_CTL 0x00000119 -- cgit v1.1 From e19e074b1525d11a66c8e3386fec7db248ad3005 Mon Sep 17 00:00:00 2001 From: Kushal Koolwal Date: Sat, 19 Feb 2011 13:56:03 -0800 Subject: x86: Fix reboot problem on VersaLogic Menlow boards VersaLogic Menlow based boards hang on reboot unless reboot=bios is used. Add quirk to reboot through the BIOS. Tested on at least four boards. Signed-off-by: Kushal Koolwal LKML-Reference: <1298152563-21594-1-git-send-email-kushalkoolwal@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index fc7aae1..715037c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -285,6 +285,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, + { /* Handle problems with rebooting on VersaLogic Menlow boards */ + .callback = set_bios_reboot, + .ident = "VersaLogic Menlow based board", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"), + }, + }, { } }; -- cgit v1.1 From 2c46d2aec054e61a33feac8c3992218eabdcc22a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 9 Feb 2011 18:29:39 +0100 Subject: KVM: SVM: Advance instruction pointer in dr_intercept In the dr_intercept function a new cpu-feature called decode-assists is implemented and used when available. This code-path does not advance the guest-rip causing the guest to dead-loop over mov-dr instructions. This is fixed by this patch. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 54ce246..63fec15 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2777,6 +2777,8 @@ static int dr_interception(struct vcpu_svm *svm) kvm_register_write(&svm->vcpu, reg, val); } + skip_emulated_instruction(&svm->vcpu); + return 1; } -- cgit v1.1 From 7b62dbec908a29f448047099bedb5c64c9cb8808 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 23 Feb 2011 16:07:26 -0800 Subject: x86/mrst: Fix apb timer rating when lapic timer is used Need to adjust the clockevent device rating for the structure that will be registered with clockevent system instead of the temporary structure. Without this fix, APB timer rating will be higher than LAPIC timer such that it can not be released later to be used as the broadcast timer. Signed-off-by: Jacob Pan Cc: Arjan van de Ven Cc: Alan Cox Cc: Thomas Gleixner Cc: John Stultz LKML-Reference: <1298506046-439-1-git-send-email-jacob.jun.pan@linux.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apb_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 51ef31a..51d4e16 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -284,7 +284,7 @@ static int __init apbt_clockevent_register(void) memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { - apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; + adev->evt.rating = APBT_CLOCKEVENT_RATING - 100; global_clock_event = &adev->evt; printk(KERN_DEBUG "%s clockevent registered as global\n", global_clock_event->name); -- cgit v1.1 From 7f74f8f28a2bd9db9404f7d364e2097a0c42cc12 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 24 Feb 2011 15:53:46 +0100 Subject: x86 quirk: Fix polarity for IRQ0 pin2 override on SB800 systems On some SB800 systems polarity for IOAPIC pin2 is wrongly specified as low active by BIOS. This caused system hangs after resume from S3 when HPET was used in one-shot mode on such systems because a timer interrupt was missed (HPET signal is high active). For more details see: http://marc.info/?l=linux-kernel&m=129623757413868 Tested-by: Manoj Iyer Tested-by: Andre Przywara Signed-off-by: Andreas Herrmann Cc: Borislav Petkov Cc: stable@kernel.org # 37.x, 32.x LKML-Reference: <20110224145346.GD3658@alberich.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 1 + arch/x86/kernel/acpi/boot.c | 14 ++++++++++---- arch/x86/kernel/early-quirks.c | 16 +++++++--------- 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 211ca3f..4ea15ca 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -88,6 +88,7 @@ extern int acpi_disabled; extern int acpi_pci_disabled; extern int acpi_skip_timer_override; extern int acpi_use_timer_override; +extern int acpi_fix_pin2_polarity; extern u8 acpi_sci_flags; extern int acpi_sci_override_gsi; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b3a7113..3e6e2d6 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -72,6 +72,7 @@ u8 acpi_sci_flags __initdata; int acpi_sci_override_gsi __initdata; int acpi_skip_timer_override __initdata; int acpi_use_timer_override __initdata; +int acpi_fix_pin2_polarity __initdata; #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; @@ -415,10 +416,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, return 0; } - if (acpi_skip_timer_override && - intsrc->source_irq == 0 && intsrc->global_irq == 2) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); - return 0; + if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { + if (acpi_skip_timer_override) { + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + return 0; + } + if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { + intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; + printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); + } } mp_override_legacy_irq(intsrc->source_irq, diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 76b8cd9..9efbdcc 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -143,15 +143,10 @@ static void __init ati_bugs(int num, int slot, int func) static u32 __init ati_sbx00_rev(int num, int slot, int func) { - u32 old, d; + u32 d; - d = read_pci_config(num, slot, func, 0x70); - old = d; - d &= ~(1<<8); - write_pci_config(num, slot, func, 0x70, d); d = read_pci_config(num, slot, func, 0x8); d &= 0xff; - write_pci_config(num, slot, func, 0x70, old); return d; } @@ -160,13 +155,16 @@ static void __init ati_bugs_contd(int num, int slot, int func) { u32 d, rev; - if (acpi_use_timer_override) - return; - rev = ati_sbx00_rev(num, slot, func); + if (rev >= 0x40) + acpi_fix_pin2_polarity = 1; + if (rev > 0x13) return; + if (acpi_use_timer_override) + return; + /* check for IRQ0 interrupt swap */ d = read_pci_config(num, slot, func, 0x64); if (!(d & (1<<14))) -- cgit v1.1 From 299c56966a72b9109d47c71a6db52097098703dd Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 7 Feb 2011 23:25:00 -0500 Subject: x86: Use u32 instead of long to set reset vector back to 0 A customer of ours, complained that when setting the reset vector back to 0, it trashed other data and hung their box. They noticed when only 4 bytes were set to 0 instead of 8, everything worked correctly. Mathew pointed out: | | We're supposed to be resetting trampoline_phys_low and | trampoline_phys_high here, which are two 16-bit values. | Writing 64 bits is definitely going to overwrite space | that we're not supposed to be touching. | So limit the area modified to u32. Signed-off-by: Don Zickus Acked-by: Matthew Garrett Cc: LKML-Reference: <1297139100-424-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smpboot_hooks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 6c22bf3..725b778 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h @@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void) */ CMOS_WRITE(0, 0xf); - *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0; + *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0; } static inline void __init smpboot_setup_io_apic(void) -- cgit v1.1 From ac818314499b707a97690d5ee835e6ba40a407c1 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 24 Nov 2010 11:28:01 +1100 Subject: [CPUFREQ] Missing "unregister_cpu_notifier" in powernow-k8.c It appears that when powernow-k8 finds that No compatible ACPI _PSS objects found. and suggests Try again with latest BIOS. it fails the module load, but does not unregister the cpu_notifier that was registered in powernowk8_init This ends up leaving freed memory on the cpu notifier list for some other poor module (e.g. md/raid5) to come along and trip over. The following might be a partial fix, but I suspect there is probably other clean-up that is needed. ( https://bugzilla.novell.com/show_bug.cgi?id=655215 has full dmesg traces). Signed-off-by: Dave Jones Signed-off-by: Neil Brown --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 35c7e65..302963f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1537,6 +1537,7 @@ static struct notifier_block cpb_nb = { static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0, cpu; + int rv; for_each_online_cpu(i) { int rc; @@ -1574,7 +1575,13 @@ static int __cpuinit powernowk8_init(void) (cpb_enabled ? "on" : "off")); } - return cpufreq_register_driver(&cpufreq_amd64_driver); + rv = cpufreq_register_driver(&cpufreq_amd64_driver); + if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) { + unregister_cpu_notifier(&cpb_nb); + msrs_free(msrs); + msrs = NULL; + } + return rv; } /* driver entry point for term */ -- cgit v1.1 From a536b126f211bdf9a0eecce0d403a26900d2106c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 23 Nov 2010 21:29:31 -0500 Subject: [CPUFREQ] Fix another notifier leak in powernow-k8. Do the notifier registration later, so we don't have to worry about freeing it if we fail the msr allocation. Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 302963f..c567dec 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1556,14 +1556,14 @@ static int __cpuinit powernowk8_init(void) cpb_capable = true; - register_cpu_notifier(&cpb_nb); - msrs = msrs_alloc(); if (!msrs) { printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); return -ENOMEM; } + register_cpu_notifier(&cpb_nb); + rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); for_each_cpu(cpu, cpu_online_mask) { -- cgit v1.1 From 853cee26e2a0c5f97386beca4c67b11c3cd85b8e Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Tue, 15 Feb 2011 17:44:11 +0000 Subject: [CPUFREQ] p4-clockmod: print EST-capable warning message only once Print the message only once. I see it 16 times on a 2P box with 16 logical CPUs. Signed-off-by: Naga Chumbalkar --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index bd1cac7..52c9364 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -158,9 +158,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) { if (c->x86 == 0x06) { if (cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Warning: EST-capable CPU " - "detected. The acpi-cpufreq module offers " - "voltage scaling in addition of frequency " + printk_once(KERN_WARNING PFX "Warning: EST-capable " + "CPU detected. The acpi-cpufreq module offers " + "voltage scaling in addition to frequency " "scaling. You should use that instead of " "p4-clockmod, if possible.\n"); switch (c->x86_model) { -- cgit v1.1 From 60cba5a57b8affe98ea9f2bac147be0fb253d5f4 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Thu, 24 Feb 2011 20:06:31 -0800 Subject: x86: OLPC: have prom_early_alloc BUG rather than return NULL ..similar to what sparc's prom_early_alloc does. Signed-off-by: Andres Salomon Signed-off-by: Grant Likely --- arch/x86/platform/olpc/olpc_dt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index dab8746..044bda5 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -140,8 +140,7 @@ void * __init prom_early_alloc(unsigned long size) * wasted bootmem) and hand off chunks of it to callers. */ res = alloc_bootmem(chunk_size); - if (!res) - return NULL; + BUG_ON(!res); prom_early_allocated += chunk_size; memset(res, 0, chunk_size); free_mem = chunk_size; -- cgit v1.1