From faee9a5dc9d8399cc3b1b8e18b6d7ff7b17f1af1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:56:10 +0200 Subject: [PATCH] i386/x86-64: Use new official CPUID to get APICID/core split on AMD platforms Previously the apicid<->coreid split was computed based on the max number of cores. Now use a new CPUID AMD defined for that. On most systems right now it should be 0 and the old method will be used. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 786d1a5..b9c93d9 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_HT /* - * On a AMD dual core setup the lower bits of the APIC id - * distingush the cores. Assumes number of cores is a power - * of two. + * On a AMD multi core setup the lower bits of the APIC id + * distingush the cores. */ if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); - unsigned bits = 0; - while ((1 << bits) < c->x86_max_cores) - bits++; + unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", -- cgit v1.1 From 240cd6a80642da528bfa382ec2ae4e3cb8991ea7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:56:13 +0200 Subject: [PATCH] i386/x86-64: Emulate CPUID4 on AMD Intel systems report the cache level data from CPUID 4 in sysfs. Add a CPUID 4 emulation for AMD CPUs to report the same information for them. This allows programs to read this information in a uniform way. The AMD way to report this is less flexible so some assumptions are hardcoded (e.g. no L3) Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 2 + arch/i386/kernel/cpu/intel_cacheinfo.c | 113 +++++++++++++++++++++++++++++---- 2 files changed, 102 insertions(+), 13 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index b9c93d9..fd0457c 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -242,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c) } #endif + if (cpuid_eax(0x80000000) >= 0x80000006) + num_cache_leaves = 3; } static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index c8547a6..6c37b4f 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -4,6 +4,7 @@ * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj : Work with CPU hotplug infrastructure. + * Andi Kleen : CPUID4 emulation on AMD. */ #include @@ -130,25 +131,111 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; }; -static unsigned short num_cache_leaves; +unsigned short num_cache_leaves; + +/* AMD doesn't have CPUID4. Emulate it here to report the same + information to the user. This makes some assumptions about the machine: + No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs. + + In theory the TLBs could be reported as fake type (they are in "dummy"). + Maybe later */ +union l1_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 8; + unsigned assoc : 8; + unsigned size_in_kb : 8; + }; + unsigned val; +}; + +union l2_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned size_in_kb : 16; + }; + unsigned val; +}; + +static unsigned short assocs[] = { + [1] = 1, [2] = 2, [4] = 4, [6] = 8, + [8] = 16, + [0xf] = 0xffff // ?? + }; +static unsigned char levels[] = { 1, 1, 2 }; +static unsigned char types[] = { 1, 2, 3 }; + +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) +{ + unsigned dummy; + unsigned line_size, lines_per_tag, assoc, size_in_kb; + union l1_cache l1i, l1d; + union l2_cache l2; + + eax->full = 0; + ebx->full = 0; + ecx->full = 0; + + cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); + cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy); + + if (leaf > 2 || !l1d.val || !l1i.val || !l2.val) + return; + + eax->split.is_self_initializing = 1; + eax->split.type = types[leaf]; + eax->split.level = levels[leaf]; + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + + if (leaf <= 1) { + union l1_cache *l1 = leaf == 0 ? &l1d : &l1i; + assoc = l1->assoc; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; + } else { + assoc = l2.assoc; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + /* cpu_data has errata corrections for K7 applied */ + size_in_kb = current_cpu_data.x86_cache_size; + } + + if (assoc == 0xf) + eax->split.is_fully_associative = 1; + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assocs[assoc] - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + (ebx->split.ways_of_associativity + 1) - 1; +} static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { - unsigned int eax, ebx, ecx, edx; - union _cpuid4_leaf_eax cache_eax; + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned edx; - cpuid_count(4, index, &eax, &ebx, &ecx, &edx); - cache_eax.full = eax; - if (cache_eax.split.type == CACHE_TYPE_NULL) + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + amd_cpuid4(index, &eax, &ebx, &ecx); + else + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ - this_leaf->eax.full = eax; - this_leaf->ebx.full = ebx; - this_leaf->ecx.full = ecx; - this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * - (this_leaf->ebx.split.coherency_line_size + 1) * - (this_leaf->ebx.split.physical_line_partition + 1) * - (this_leaf->ebx.split.ways_of_associativity + 1); + this_leaf->eax = eax; + this_leaf->ebx = ebx; + this_leaf->ecx = ecx; + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } -- cgit v1.1 From d167a51877e94dda73dd656c51f363502309f713 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 26 Jun 2006 13:56:16 +0200 Subject: [PATCH] x86_64: x86_64 version of the smp alternative patch. Changes are largely identical to the i386 version: * alternative #define are moved to the new alternative.h file. * one new elf section with pointers to the lock prefixes which can be nop'ed out for non-smp. * two new elf sections simliar to the "classic" alternatives to replace SMP code with simpler UP code. * fixup headers to use alternative.h instead of defining their own LOCK / LOCK_PREFIX macros. The patch reuses the i386 version of the alternatives code to avoid code duplication. The code in alternatives.c was shuffled around a bit to reduce the number of #ifdefs needed. It also got some tweaks needed for x86_64 (vsyscall page handling) and new features (noreplacement option which was x86_64 only up to now). Debug printk's are changed from compile-time to runtime. Loosely based on a early version from Bastian Blank Signed-off-by: Gerd Hoffmann Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/alternative.c | 118 +++++++++++++++++++++++++++++------------ 1 file changed, 85 insertions(+), 33 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 5cbd6f9..50eb0e0 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -4,27 +4,41 @@ #include #include -#define DEBUG 0 -#if DEBUG -# define DPRINTK(fmt, args...) printk(fmt, args) -#else -# define DPRINTK(fmt, args...) -#endif +static int no_replacement = 0; +static int smp_alt_once = 0; +static int debug_alternative = 0; + +static int __init noreplacement_setup(char *s) +{ + no_replacement = 1; + return 1; +} +static int __init bootonly(char *str) +{ + smp_alt_once = 1; + return 1; +} +static int __init debug_alt(char *str) +{ + debug_alternative = 1; + return 1; +} +__setup("noreplacement", noreplacement_setup); +__setup("smp-alt-boot", bootonly); +__setup("debug-alternative", debug_alt); + +#define DPRINTK(fmt, args...) if (debug_alternative) \ + printk(KERN_DEBUG fmt, args) + +#ifdef GENERIC_NOP1 /* Use inline assembly to define this because the nops are defined as inline assembly strings in the include files and we cannot get them easily into strings. */ asm("\t.data\nintelnops: " GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; +extern unsigned char intelnops[]; static unsigned char *intel_nops[ASM_NOP_MAX+1] = { NULL, intelnops, @@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = { intelnops + 1 + 2 + 3 + 4 + 5 + 6, intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; +#endif + +#ifdef K8_NOP1 +asm("\t.data\nk8nops: " + K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 + K8_NOP7 K8_NOP8); +extern unsigned char k8nops[]; static unsigned char *k8_nops[ASM_NOP_MAX+1] = { NULL, k8nops, @@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { k8nops + 1 + 2 + 3 + 4 + 5 + 6, k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; +#endif + +#ifdef K7_NOP1 +asm("\t.data\nk7nops: " + K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 + K7_NOP7 K7_NOP8); +extern unsigned char k7nops[]; static unsigned char *k7_nops[ASM_NOP_MAX+1] = { NULL, k7nops, @@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = { k7nops + 1 + 2 + 3 + 4 + 5 + 6, k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; +#endif + +#ifdef CONFIG_X86_64 + +extern char __vsyscall_0; +static inline unsigned char** find_nop_table(void) +{ + return k8_nops; +} + +#else /* CONFIG_X86_64 */ + static struct nop { int cpuid; unsigned char **noptable; @@ -67,14 +107,6 @@ static struct nop { { -1, NULL } }; - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; -extern u8 *__smp_locks[], *__smp_locks_end[]; - -extern u8 __smp_alt_begin[], __smp_alt_end[]; - - static unsigned char** find_nop_table(void) { unsigned char **noptable = intel_nops; @@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void) return noptable; } +#endif /* CONFIG_X86_64 */ + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; +extern u8 *__smp_locks[], *__smp_locks_end[]; + +extern u8 __smp_alt_begin[], __smp_alt_end[]; + /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with self modifying code. This implies that assymetric systems where @@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { unsigned char **noptable = find_nop_table(); struct alt_instr *a; + u8 *instr; int diff, i, k; DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); @@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) BUG_ON(a->replacementlen > a->instrlen); if (!boot_cpu_has(a->cpuid)) continue; - memcpy(a->instr, a->replacement, a->replacementlen); + instr = a->instr; +#ifdef CONFIG_X86_64 + /* vsyscall code is not mapped yet. resolve it manually. */ + if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { + instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); + DPRINTK("%s: vsyscall fixup: %p => %p\n", + __FUNCTION__, a->instr, instr); + } +#endif + memcpy(instr, a->replacement, a->replacementlen); diff = a->instrlen - a->replacementlen; /* Pad the rest with nops */ for (i = a->replacementlen; diff > 0; diff -= k, i += k) { @@ -186,14 +236,6 @@ struct smp_alt_module { static LIST_HEAD(smp_alt_modules); static DEFINE_SPINLOCK(smp_alt); -static int smp_alt_once = 0; -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); - void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) @@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name, struct smp_alt_module *smp; unsigned long flags; + if (no_replacement) + return; + if (smp_alt_once) { if (boot_cpu_has(X86_FEATURE_UP)) alternatives_smp_unlock(locks, locks_end, @@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod) struct smp_alt_module *item; unsigned long flags; - if (smp_alt_once) + if (no_replacement || smp_alt_once) return; spin_lock_irqsave(&smp_alt, flags); @@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp) struct smp_alt_module *mod; unsigned long flags; - if (smp_alt_once) + if (no_replacement || smp_alt_once) return; BUG_ON(!smp && (num_online_cpus() > 1)); @@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp) void __init alternative_instructions(void) { + if (no_replacement) { + printk(KERN_INFO "(SMP-)alternatives turned off\n"); + free_init_pages("SMP alternatives", + (unsigned long)__smp_alt_begin, + (unsigned long)__smp_alt_end); + return; + } apply_alternatives(__alt_instructions, __alt_instructions_end); /* switch to patch-once-at-boottime-only mode and free the -- cgit v1.1 From a32073bffc656ca4bde6002b6cf7c1a8e0e22712 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:56:40 +0200 Subject: [PATCH] x86_64: Clean and enhance up K8 northbridge access code - Factor out the duplicated access/cache code into a single file * Shared between i386/x86-64. - Share flush code between AGP and IOMMU * Fix a bug: AGP didn't wait for end of flush before - Drop 8 northbridges limit and allocate dynamically - Add lock to serialize AGP and IOMMU GART flushes - Add PCI ID for next AMD northbridge - Random related cleanups The old K8 NUMA discovery code is unchanged. New systems should all use SRAT for this. Cc: "Navin Boppuri" Cc: Dave Jones Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 4 ++++ arch/i386/kernel/Makefile | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1596101..2206bf6 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1054,6 +1054,10 @@ config SCx200 This support is also available as a module. If compiled as a module, it will be called scx200. +config K8_NB + def_bool y + depends on AGP_AMD64 + source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 96fb8a0..28b14d6 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_DOUBLEFAULT) += doublefault.o obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_K8_NB) += k8.o EXTRA_AFLAGS := -traditional @@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE $(call if_changed,syscall) + +k8-y += ../../x86_64/kernel/k8.o + -- cgit v1.1 From 0a1ad60d7a7eb433095bc1b2c8b475f3f278f61d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:56:43 +0200 Subject: [PATCH] x86_64: serialize assign_irq_vector() use of static variables Since assign_irq_vector() can be called at runtime, its access of static variables should be protected by a lock. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index a62df3e..43ffdd0 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -50,6 +50,7 @@ atomic_t irq_mis_count; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_SPINLOCK(vector_lock); int timer_over_8254 __initdata = 1; @@ -1161,10 +1162,16 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; + int vector; + + BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); + + spin_lock(&vector_lock); - BUG_ON(irq >= NR_IRQ_VECTORS); - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) + if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { + spin_unlock(&vector_lock); return IO_APIC_VECTOR(irq); + } next: current_vector += 8; if (current_vector == SYSCALL_VECTOR) @@ -1172,16 +1179,21 @@ next: if (current_vector >= FIRST_SYSTEM_VECTOR) { offset++; - if (!(offset%8)) + if (!(offset%8)) { + spin_unlock(&vector_lock); return -ENOSPC; + } current_vector = FIRST_DEVICE_VECTOR + offset; } - vector_irq[current_vector] = irq; + vector = current_vector; + vector_irq[vector] = irq; if (irq != AUTO_ASSIGN) - IO_APIC_VECTOR(irq) = current_vector; + IO_APIC_VECTOR(irq) = vector; + + spin_unlock(&vector_lock); - return current_vector; + return vector; } static struct hw_interrupt_type ioapic_level_type; -- cgit v1.1 From 6ebcc00e95fa78218a048a88d8d5ee491aa7d6d0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:56:46 +0200 Subject: [PATCH] i386/x86-64: simplify ioapic_register_intr() Simplify (remove duplication of) code in ioapic_register_intr(). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 43ffdd0..5a1a541 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1205,21 +1205,14 @@ static struct hw_interrupt_type ioapic_edge_type; static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) { - if (use_pci_vector() && !platform_legacy_irq(irq)) { - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - irq_desc[vector].handler = &ioapic_level_type; - else - irq_desc[vector].handler = &ioapic_edge_type; - set_intr_gate(vector, interrupt[vector]); - } else { - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - irq_desc[irq].handler = &ioapic_level_type; - else - irq_desc[irq].handler = &ioapic_edge_type; - set_intr_gate(vector, interrupt[irq]); - } + unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; + + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + irq_desc[idx].handler = &ioapic_level_type; + else + irq_desc[idx].handler = &ioapic_edge_type; + set_intr_gate(vector, interrupt[idx]); } static void __init setup_IO_APIC_irqs(void) -- cgit v1.1 From 3e4ff115740c28dea463561aa1405a3c0de0d2d0 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 26 Jun 2006 13:57:01 +0200 Subject: [PATCH] x86_64: nmi watchdog header cleanup Misc header cleanup for nmi watchdog. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 1 + arch/i386/kernel/io_apic.c | 1 + arch/i386/kernel/nmi.c | 7 +------ arch/i386/kernel/smpboot.c | 1 + arch/i386/oprofile/op_model_athlon.c | 1 + arch/i386/oprofile/op_model_p4.c | 1 + arch/i386/oprofile/op_model_ppro.c | 1 + 7 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 5ab59c1..5f197e1 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 5a1a541..61317f4 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -38,6 +38,7 @@ #include #include #include +#include #include diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index d43b498..bd38754 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -14,20 +14,15 @@ */ #include -#include #include -#include -#include #include -#include -#include #include #include #include #include +#include #include -#include #include #include "mach_traps.h" diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bd0ca5c..bce5470 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c index 3ad9a72..693bdea 100644 --- a/arch/i386/oprofile/op_model_athlon.c +++ b/arch/i386/oprofile/op_model_athlon.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c index ac8a066..7c61d35 100644 --- a/arch/i386/oprofile/op_model_p4.c +++ b/arch/i386/oprofile/op_model_p4.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c index d719015..5c3ab4b 100644 --- a/arch/i386/oprofile/op_model_ppro.c +++ b/arch/i386/oprofile/op_model_ppro.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" -- cgit v1.1 From 26a3c49cec96ffb9cfcc30dfa0cd05ccc25dcb3a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jun 2006 13:57:16 +0200 Subject: [PATCH] x86_64: fix vector_lock deadlock in io_apic.c Fix a potential deadlock scenario introduced by io_apic.c's new vector_lock on i386 and x86_64. Found by the locking correctness validator. The patch was boot-tested on x86. For details of the deadlock scenario, see the validator output: ====================================================== [ BUG: hard-safe -> hard-unsafe lock order detected! ] ------------------------------------------------------ idle/1 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: (msi_lock){....}, at: [] startup_msi_irq_wo_maskbit+0x10/0x35 and this task is already holding: (&irq_desc[i].lock){++..}, at: [] probe_irq_on+0x36/0x107 which would create a new lock dependency: (&irq_desc[i].lock){++..} -> (msi_lock){....} but this new dependency connects a hard-irq-safe lock: (&irq_desc[i].lock){++..} ... which became hard-irq-safe at: [] lockdep_acquire+0x68/0x84 [] _spin_lock+0x21/0x2f [] __do_IRQ+0x3d/0x113 [] do_IRQ+0x8c/0xad to a hard-irq-unsafe lock: (vector_lock){--..} ... which became hard-irq-unsafe at: ... [] lockdep_acquire+0x68/0x84 [] _spin_lock+0x21/0x2f [] assign_irq_vector+0x34/0xc8 [] setup_IO_APIC+0x45a/0xcff [] smp_prepare_cpus+0x5ea/0x8aa [] init+0x32/0x2cb [] kernel_thread_helper+0x5/0xb which could potentially lead to deadlocks! other info that might help us debug this: 3 locks held by idle/1: #0: (port_mutex){--..}, at: [] uart_add_one_port+0x61/0x289 #1: (&state->mutex){--..}, at: [] uart_add_one_port+0x73/0x289 #2: (&irq_desc[i].lock){++..}, at: [] probe_irq_on+0x36/0x107 the hard-irq-safe lock's dependencies: -> (&irq_desc[i].lock){++..} ops: 9861 { initial-use at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] setup_irq+0x9b/0x14d [] time_init_hook+0xf/0x11 [] time_init+0x44/0x46 [] start_kernel+0x191/0x38f [] 0xc0100210 in-hardirq-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock+0x21/0x2f [] __do_IRQ+0x3d/0x113 [] do_IRQ+0x8c/0xad in-softirq-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock+0x21/0x2f [] __do_IRQ+0x3d/0x113 [] do_IRQ+0x8c/0xad } ... key at: [] irq_desc_lock_type+0x0/0x20 -> (i8259A_lock){++..} ops: 5149 { initial-use at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] init_8259A+0x11/0x8f [] init_ISA_irqs+0x12/0x4d [] pre_intr_init_hook+0x8/0xa [] init_IRQ+0xe/0x65 [] start_kernel+0x178/0x38f [] 0xc0100210 in-hardirq-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] mask_and_ack_8259A+0x1b/0xcc [] __do_IRQ+0x4f/0x113 [] do_IRQ+0x8c/0xad in-softirq-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] mask_and_ack_8259A+0x1b/0xcc [] __do_IRQ+0x4f/0x113 [] do_IRQ+0x8c/0xad } ... key at: [] i8259A_lock+0x14/0x40 ... acquired at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] enable_8259A_irq+0x10/0x47 [] startup_8259A_irq+0x8/0xc [] setup_irq+0xe4/0x14d [] time_init_hook+0xf/0x11 [] time_init+0x44/0x46 [] start_kernel+0x191/0x38f [] 0xc0100210 -> (ioapic_lock){+...} ops: 122 { initial-use at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] io_apic_get_version+0x16/0x55 [] mp_register_ioapic+0xc6/0x127 [] acpi_parse_ioapic+0x2d/0x39 [] acpi_table_parse_madt_family+0xb4/0x100 [] acpi_table_parse_madt+0x16/0x18 [] acpi_boot_init+0x132/0x251 [] setup_arch+0xd36/0xe37 [] start_kernel+0x66/0x38f [] 0xc0100210 in-hardirq-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] mask_IO_APIC_irq+0x11/0x31 [] ack_edge_ioapic_vector+0x31/0x41 [] __do_IRQ+0x4f/0x113 [] do_IRQ+0x8c/0xad } ... key at: [] ioapic_lock+0x14/0x3c -> (i8259A_lock){++..} ops: 5149 { initial-use at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] init_8259A+0x11/0x8f [] init_ISA_irqs+0x12/0x4d [] pre_intr_init_hook+0x8/0xa [] init_IRQ+0xe/0x65 [] start_kernel+0x178/0x38f [] 0xc0100210 in-hardirq-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] mask_and_ack_8259A+0x1b/0xcc [] __do_IRQ+0x4f/0x113 [] do_IRQ+0x8c/0xad in-softirq-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] mask_and_ack_8259A+0x1b/0xcc [] __do_IRQ+0x4f/0x113 [] do_IRQ+0x8c/0xad } ... key at: [] i8259A_lock+0x14/0x40 ... acquired at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] disable_8259A_irq+0x10/0x47 [] startup_edge_ioapic_vector+0x31/0x58 [] setup_irq+0xe4/0x14d [] request_irq+0xda/0xf9 [] rtc_init+0x6a/0x1a7 [] init+0x14a/0x2cb [] kernel_thread_helper+0x5/0xb ... acquired at: [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] mask_IO_APIC_irq+0x11/0x31 [] ack_edge_ioapic_vector+0x31/0x41 [] __do_IRQ+0x4f/0x113 [] do_IRQ+0x8c/0xad the hard-irq-unsafe lock's dependencies: -> (vector_lock){--..} ops: 31 { initial-use at: [] lockdep_acquire+0x68/0x84 [] _spin_lock+0x21/0x2f [] assign_irq_vector+0x34/0xc8 [] setup_IO_APIC+0x45a/0xcff [] smp_prepare_cpus+0x5ea/0x8aa [] init+0x32/0x2cb [] kernel_thread_helper+0x5/0xb softirq-on-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock+0x21/0x2f [] assign_irq_vector+0x34/0xc8 [] setup_IO_APIC+0x45a/0xcff [] smp_prepare_cpus+0x5ea/0x8aa [] init+0x32/0x2cb [] kernel_thread_helper+0x5/0xb hardirq-on-W at: [] lockdep_acquire+0x68/0x84 [] _spin_lock+0x21/0x2f [] assign_irq_vector+0x34/0xc8 [] setup_IO_APIC+0x45a/0xcff [] smp_prepare_cpus+0x5ea/0x8aa [] init+0x32/0x2cb [] kernel_thread_helper+0x5/0xb } ... key at: [] vector_lock+0x14/0x3c stack backtrace: [] show_trace+0xd/0xf [] dump_stack+0x17/0x19 [] check_usage+0x1f6/0x203 [] __lockdep_acquire+0x8c2/0xaa5 [] lockdep_acquire+0x68/0x84 [] _spin_lock_irqsave+0x2a/0x3a [] startup_msi_irq_wo_maskbit+0x10/0x35 [] probe_irq_on+0x44/0x107 [] serial8250_config_port+0x84b/0x986 [] uart_add_one_port+0x105/0x289 [] serial8250_init+0xc3/0x10a [] init+0x14a/0x2cb [] kernel_thread_helper+0x5/0xb Signed-off-by: Ingo Molnar Cc: Jan Beulich Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 61317f4..72ae414 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1163,14 +1163,15 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; + unsigned long flags; int vector; BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); - spin_lock(&vector_lock); + spin_lock_irqsave(&vector_lock, flags); if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { - spin_unlock(&vector_lock); + spin_unlock_irqrestore(&vector_lock, flags); return IO_APIC_VECTOR(irq); } next: @@ -1181,7 +1182,7 @@ next: if (current_vector >= FIRST_SYSTEM_VECTOR) { offset++; if (!(offset%8)) { - spin_unlock(&vector_lock); + spin_unlock_irqrestore(&vector_lock, flags); return -ENOSPC; } current_vector = FIRST_DEVICE_VECTOR + offset; @@ -1192,7 +1193,7 @@ next: if (irq != AUTO_ASSIGN) IO_APIC_VECTOR(irq) = vector; - spin_unlock(&vector_lock); + spin_unlock_irqrestore(&vector_lock, flags); return vector; } -- cgit v1.1 From 176a2718f408ce92788b29127050b04dfd6e4f68 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:57:41 +0200 Subject: [PATCH] i386: reliable stack trace support (i386) These are the i386-specific pieces to enable reliable stack traces. This is going to be even more useful once CFI annotations get added to he assembly code, namely to entry.S. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/entry.S | 29 ++++++++++++++++++++++++ arch/i386/kernel/process.c | 2 +- arch/i386/kernel/traps.c | 50 +++++++++++++++++++++++++++++++++--------- arch/i386/kernel/vmlinux.lds.S | 9 ++++++++ 4 files changed, 79 insertions(+), 11 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index cfc683f..e802f3c 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -663,6 +663,35 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +#ifdef CONFIG_STACK_UNWIND +ENTRY(arch_unwind_init_running) + movl 4(%esp), %edx + movl (%esp), %ecx + leal 4(%esp), %eax + movl %ebx, EBX(%edx) + xorl %ebx, %ebx + movl %ebx, ECX(%edx) + movl %ebx, EDX(%edx) + movl %esi, ESI(%edx) + movl %edi, EDI(%edx) + movl %ebp, EBP(%edx) + movl %ebx, EAX(%edx) + movl $__USER_DS, DS(%edx) + movl $__USER_DS, ES(%edx) + movl %ebx, ORIG_EAX(%edx) + movl %ecx, EIP(%edx) + movl 12(%esp), %ecx + movl $__KERNEL_CS, CS(%edx) + movl %ebx, EFLAGS(%edx) + movl %eax, OLDESP(%edx) + movl 8(%esp), %eax + movl %ecx, 8(%esp) + movl EBX(%edx), %ebx + movl $__KERNEL_DS, OLDSS(%edx) + jmpl *%eax +ENDPROC(arch_unwind_init_running) +#endif + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 6259afe..525432e 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs) cr3 = read_cr3(); cr4 = read_cr4_safe(); printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); - show_trace(NULL, ®s->esp); + show_trace(NULL, regs, ®s->esp); } /* diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index dcc1447..2865846 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -47,7 +48,7 @@ #include #include #include - +#include #include #include #include @@ -170,14 +171,43 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, return ebp; } -static void show_trace_log_lvl(struct task_struct *task, +static asmlinkage void show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) +{ + int printed = 0; /* nr of entries already printed on current line */ + + while (unwind(info) == 0 && UNW_PC(info)) { + printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); + if (arch_unw_user_mode(info)) + break; + } + if (printed) + printk("\n"); +} + +static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl) { unsigned long ebp; + struct unwind_frame_info info; if (!task) task = current; + if (regs) { + if (unwind_init_frame_info(&info, task, regs) == 0) { + show_trace_unwind(&info, log_lvl); + return; + } + } else if (task == current) { + if (unwind_init_running(&info, show_trace_unwind, log_lvl) == 0) + return; + } else { + if (unwind_init_blocked(&info, task) == 0) { + show_trace_unwind(&info, log_lvl); + return; + } + } + if (task == current) { /* Grab ebp right from our regs */ asm ("movl %%ebp, %0" : "=r" (ebp) : ); @@ -198,13 +228,13 @@ static void show_trace_log_lvl(struct task_struct *task, } } -void show_trace(struct task_struct *task, unsigned long * stack) +void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) { - show_trace_log_lvl(task, stack, ""); + show_trace_log_lvl(task, regs, stack, ""); } -static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, - char *log_lvl) +static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *esp, char *log_lvl) { unsigned long *stack; int i; @@ -225,13 +255,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, printk("%08lx ", *stack++); } printk("\n%sCall Trace:\n", log_lvl); - show_trace_log_lvl(task, esp, log_lvl); + show_trace_log_lvl(task, regs, esp, log_lvl); } void show_stack(struct task_struct *task, unsigned long *esp) { printk(" "); - show_stack_log_lvl(task, esp, ""); + show_stack_log_lvl(task, NULL, esp, ""); } /* @@ -241,7 +271,7 @@ void dump_stack(void) { unsigned long stack; - show_trace(current, &stack); + show_trace(current, NULL, &stack); } EXPORT_SYMBOL(dump_stack); @@ -285,7 +315,7 @@ void show_registers(struct pt_regs *regs) u8 __user *eip; printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7512f39..2d4f138 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -71,6 +71,15 @@ SECTIONS .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } _edata = .; /* End of data section */ +#ifdef CONFIG_STACK_UNWIND + . = ALIGN(4); + .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { + __start_unwind = .; + *(.eh_frame) + __end_unwind = .; + } +#endif + . = ALIGN(THREAD_SIZE); /* init_task */ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { *(.data.init_task) -- cgit v1.1 From fe7cacc1c25e286872b878c5d46880b620cd1e2d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:57:44 +0200 Subject: [PATCH] i386: reliable stack trace support i386 entry.S To increase the usefulness of reliable stack unwinding, this adds CFI unwind annotations to many low-level i386 routines. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/entry.S | 234 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 223 insertions(+), 11 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e802f3c..e6e4506 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -48,6 +48,7 @@ #include #include #include +#include #include "irq_vectors.h" #define nr_syscalls ((syscall_table_size)/4) @@ -85,31 +86,67 @@ VM_MASK = 0x00020000 #define SAVE_ALL \ cld; \ pushl %es; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET es, 0;*/\ pushl %ds; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET ds, 0;*/\ pushl %eax; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET eax, 0;\ pushl %ebp; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ebp, 0;\ pushl %edi; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET edi, 0;\ pushl %esi; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET esi, 0;\ pushl %edx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET edx, 0;\ pushl %ecx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ecx, 0;\ pushl %ebx; \ + CFI_ADJUST_CFA_OFFSET 4;\ + CFI_REL_OFFSET ebx, 0;\ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; #define RESTORE_INT_REGS \ popl %ebx; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE ebx;\ popl %ecx; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE ecx;\ popl %edx; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE edx;\ popl %esi; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE esi;\ popl %edi; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE edi;\ popl %ebp; \ - popl %eax + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE ebp;\ + popl %eax; \ + CFI_ADJUST_CFA_OFFSET -4;\ + CFI_RESTORE eax #define RESTORE_REGS \ RESTORE_INT_REGS; \ 1: popl %ds; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE ds;*/\ 2: popl %es; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE es;*/\ .section .fixup,"ax"; \ 3: movl $0,(%esp); \ jmp 1b; \ @@ -122,13 +159,43 @@ VM_MASK = 0x00020000 .long 2b,4b; \ .previous +#define RING0_INT_FRAME \ + CFI_STARTPROC simple;\ + CFI_DEF_CFA esp, 3*4;\ + /*CFI_OFFSET cs, -2*4;*/\ + CFI_OFFSET eip, -3*4 + +#define RING0_EC_FRAME \ + CFI_STARTPROC simple;\ + CFI_DEF_CFA esp, 4*4;\ + /*CFI_OFFSET cs, -2*4;*/\ + CFI_OFFSET eip, -3*4 + +#define RING0_PTREGS_FRAME \ + CFI_STARTPROC simple;\ + CFI_DEF_CFA esp, OLDESP-EBX;\ + /*CFI_OFFSET cs, CS-OLDESP;*/\ + CFI_OFFSET eip, EIP-OLDESP;\ + /*CFI_OFFSET es, ES-OLDESP;*/\ + /*CFI_OFFSET ds, DS-OLDESP;*/\ + CFI_OFFSET eax, EAX-OLDESP;\ + CFI_OFFSET ebp, EBP-OLDESP;\ + CFI_OFFSET edi, EDI-OLDESP;\ + CFI_OFFSET esi, ESI-OLDESP;\ + CFI_OFFSET edx, EDX-OLDESP;\ + CFI_OFFSET ecx, ECX-OLDESP;\ + CFI_OFFSET ebx, EBX-OLDESP ENTRY(ret_from_fork) + CFI_STARTPROC pushl %eax + CFI_ADJUST_CFA_OFFSET -4 call schedule_tail GET_THREAD_INFO(%ebp) popl %eax + CFI_ADJUST_CFA_OFFSET -4 jmp syscall_exit + CFI_ENDPROC /* * Return to user mode is not as complex as all this looks, @@ -139,6 +206,7 @@ ENTRY(ret_from_fork) # userspace resumption stub bypassing syscall exit tracing ALIGN + RING0_PTREGS_FRAME ret_from_exception: preempt_stop ret_from_intr: @@ -171,20 +239,33 @@ need_resched: call preempt_schedule_irq jmp need_resched #endif + CFI_ENDPROC /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ # sysenter call handler stub ENTRY(sysenter_entry) + CFI_STARTPROC simple + CFI_DEF_CFA esp, 0 + CFI_REGISTER esp, ebp movl TSS_sysenter_esp0(%esp),%esp sysenter_past_esp: sti pushl $(__USER_DS) + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ss, 0*/ pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esp, 0 pushfl + CFI_ADJUST_CFA_OFFSET 4 pushl $(__USER_CS) + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET cs, 0*/ pushl $SYSENTER_RETURN + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eip, 0 /* * Load the potential sixth argument from user stack. @@ -199,6 +280,7 @@ sysenter_past_esp: .previous pushl %eax + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) @@ -219,11 +301,14 @@ sysenter_past_esp: xorl %ebp,%ebp sti sysexit + CFI_ENDPROC # system call handler stub ENTRY(system_call) + RING0_INT_FRAME # can't unwind into user space anyway pushl %eax # save orig_eax + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) testl $TF_MASK,EFLAGS(%esp) @@ -256,10 +341,12 @@ restore_all: movb CS(%esp), %al andl $(VM_MASK | (4 << 8) | 3), %eax cmpl $((4 << 8) | 3), %eax + CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: RESTORE_REGS addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 1: iret .section .fixup,"ax" iret_exc: @@ -273,6 +360,7 @@ iret_exc: .long 1b,iret_exc .previous + CFI_RESTORE_STATE ldt_ss: larl OLDSS(%esp), %eax jnz restore_nocheck @@ -285,11 +373,13 @@ ldt_ss: * CPUs, which we can try to work around to make * dosemu and wine happy. */ subl $8, %esp # reserve space for switch16 pointer + CFI_ADJUST_CFA_OFFSET 8 cli movl %esp, %eax /* Set up the 16bit stack frame with switch32 pointer on top, * and a switch16 pointer on top of the current frame. */ call setup_x86_bogus_stack + CFI_ADJUST_CFA_OFFSET -8 # frame has moved RESTORE_REGS lss 20+4(%esp), %esp # switch to 16bit stack 1: iret @@ -297,9 +387,11 @@ ldt_ss: .align 4 .long 1b,iret_exc .previous + CFI_ENDPROC # perform work that needs to be done immediately before resumption ALIGN + RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig @@ -329,8 +421,10 @@ work_notifysig: # deal with pending signals and work_notifysig_v86: #ifdef CONFIG_VM86 pushl %ecx # save ti_flags for do_notify_resume + CFI_ADJUST_CFA_OFFSET 4 call save_v86_state # %eax contains pt_regs pointer popl %ecx + CFI_ADJUST_CFA_OFFSET -4 movl %eax, %esp xorl %edx, %edx call do_notify_resume @@ -363,19 +457,21 @@ syscall_exit_work: movl $1, %edx call do_syscall_trace jmp resume_userspace + CFI_ENDPROC - ALIGN + RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: pushl %eax # save orig_eax + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) movl $-EFAULT,EAX(%esp) jmp resume_userspace - ALIGN syscall_badsys: movl $-ENOSYS,EAX(%esp) jmp resume_userspace + CFI_ENDPROC #define FIXUP_ESPFIX_STACK \ movl %esp, %eax; \ @@ -387,16 +483,21 @@ syscall_badsys: movl %eax, %esp; #define UNWIND_ESPFIX_STACK \ pushl %eax; \ + CFI_ADJUST_CFA_OFFSET 4; \ movl %ss, %eax; \ /* see if on 16bit stack */ \ cmpw $__ESPFIX_SS, %ax; \ - jne 28f; \ - movl $__KERNEL_DS, %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ + je 28f; \ +27: popl %eax; \ + CFI_ADJUST_CFA_OFFSET -4; \ +.section .fixup,"ax"; \ +28: movl $__KERNEL_DS, %eax; \ + movl %eax, %ds; \ + movl %eax, %es; \ /* switch to 32bit stack */ \ - FIXUP_ESPFIX_STACK \ -28: popl %eax; + FIXUP_ESPFIX_STACK; \ + jmp 27b; \ +.previous /* * Build the entry stubs and pointer table with @@ -408,9 +509,14 @@ ENTRY(interrupt) vector=0 ENTRY(irq_entries_start) + RING0_INT_FRAME .rept NR_IRQS ALIGN + .if vector + CFI_ADJUST_CFA_OFFSET -4 + .endif 1: pushl $vector-256 + CFI_ADJUST_CFA_OFFSET 4 jmp common_interrupt .data .long 1b @@ -424,60 +530,99 @@ common_interrupt: movl %esp,%eax call do_IRQ jmp ret_from_intr + CFI_ENDPROC #define BUILD_INTERRUPT(name, nr) \ ENTRY(name) \ + RING0_INT_FRAME; \ pushl $nr-256; \ - SAVE_ALL \ + CFI_ADJUST_CFA_OFFSET 4; \ + SAVE_ALL; \ movl %esp,%eax; \ call smp_/**/name; \ - jmp ret_from_intr; + jmp ret_from_intr; \ + CFI_ENDPROC /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" ENTRY(divide_error) + RING0_INT_FRAME pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 pushl $do_divide_error + CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0*/ pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 xorl %eax, %eax pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 decl %eax # eax = -1 pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 cld pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ UNWIND_ESPFIX_STACK popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ movl ES(%esp), %edi # get the function address movl ORIG_EAX(%esp), %edx # get the error code movl %eax, ORIG_EAX(%esp) movl %ecx, ES(%esp) + /*CFI_REL_OFFSET es, ES*/ movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es movl %esp,%eax # pt_regs pointer call *%edi jmp ret_from_exception + CFI_ENDPROC ENTRY(coprocessor_error) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl $do_coprocessor_error + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(simd_coprocessor_error) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl $do_simd_coprocessor_error + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(device_not_available) + RING0_INT_FRAME pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL movl %cr0, %eax testl $0x4, %eax # EM (math emulation bit) @@ -487,9 +632,12 @@ ENTRY(device_not_available) jmp ret_from_exception device_not_available_emulate: pushl $0 # temporary storage for ORIG_EIP + CFI_ADJUST_CFA_OFFSET 4 call math_emulate addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 jmp ret_from_exception + CFI_ENDPROC /* * Debug traps and NMI can happen at the one SYSENTER instruction @@ -514,16 +662,19 @@ label: \ pushl $sysenter_past_esp KPROBE_ENTRY(debug) + RING0_INT_FRAME cmpl $sysenter_entry,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) debug_stack_correct: pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug jmp ret_from_exception + CFI_ENDPROC .previous .text /* * NMI is doubly nasty. It can happen _while_ we're handling @@ -534,14 +685,18 @@ debug_stack_correct: * fault happened on the sysenter path. */ ENTRY(nmi) + RING0_INT_FRAME pushl %eax + CFI_ADJUST_CFA_OFFSET 4 movl %ss, %eax cmpw $__ESPFIX_SS, %ax popl %eax + CFI_ADJUST_CFA_OFFSET -4 je nmi_16bit_stack cmpl $sysenter_entry,(%esp) je nmi_stack_fixup pushl %eax + CFI_ADJUST_CFA_OFFSET 4 movl %esp,%eax /* Do not access memory above the end of our stack page, * it might not exist. @@ -549,16 +704,19 @@ ENTRY(nmi) andl $(THREAD_SIZE-1),%eax cmpl $(THREAD_SIZE-20),%eax popl %eax + CFI_ADJUST_CFA_OFFSET -4 jae nmi_stack_correct cmpl $sysenter_entry,12(%esp) je nmi_debug_stack_check nmi_stack_correct: pushl %eax + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi jmp restore_all + CFI_ENDPROC nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -574,97 +732,150 @@ nmi_debug_stack_check: jmp nmi_stack_correct nmi_16bit_stack: + RING0_INT_FRAME /* create the pointer to lss back */ pushl %ss + CFI_ADJUST_CFA_OFFSET 4 pushl %esp + CFI_ADJUST_CFA_OFFSET 4 movzwl %sp, %esp addw $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 pushl 16(%esp) + CFI_ADJUST_CFA_OFFSET 4 .endr pushl %eax + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL FIXUP_ESPFIX_STACK # %eax == %esp + CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved xorl %edx,%edx # zero error code call do_nmi RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack 1: iret + CFI_ENDPROC .section __ex_table,"a" .align 4 .long 1b,iret_exc .previous KPROBE_ENTRY(int3) + RING0_INT_FRAME pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 jmp ret_from_exception + CFI_ENDPROC .previous .text ENTRY(overflow) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl $do_overflow + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(bounds) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl $do_bounds + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(invalid_op) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl $do_invalid_op + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(coprocessor_segment_overrun) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl $do_coprocessor_segment_overrun + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(invalid_TSS) + RING0_EC_FRAME pushl $do_invalid_TSS + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(segment_not_present) + RING0_EC_FRAME pushl $do_segment_not_present + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC ENTRY(stack_segment) + RING0_EC_FRAME pushl $do_stack_segment + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC KPROBE_ENTRY(general_protection) + RING0_EC_FRAME pushl $do_general_protection + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC .previous .text ENTRY(alignment_check) + RING0_EC_FRAME pushl $do_alignment_check + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC KPROBE_ENTRY(page_fault) + RING0_EC_FRAME pushl $do_page_fault + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl machine_check_vector + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC #endif ENTRY(spurious_interrupt_bug) + RING0_INT_FRAME pushl $0 + CFI_ADJUST_CFA_OFFSET 4 pushl $do_spurious_interrupt_bug + CFI_ADJUST_CFA_OFFSET 4 jmp error_code + CFI_ENDPROC #ifdef CONFIG_STACK_UNWIND ENTRY(arch_unwind_init_running) + CFI_STARTPROC movl 4(%esp), %edx movl (%esp), %ecx leal 4(%esp), %eax @@ -689,6 +900,7 @@ ENTRY(arch_unwind_init_running) movl EBX(%edx), %ebx movl $__KERNEL_DS, OLDSS(%edx) jmpl *%eax + CFI_ENDPROC ENDPROC(arch_unwind_init_running) #endif -- cgit v1.1 From c33bd9aac0597eeedaaa01ea5aafe456894b2f2b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:57:47 +0200 Subject: [PATCH] i386/x86-64: fall back to old-style call trace if no unwinding If no unwinding is possible at all for a certain exception instance, fall back to the old style call trace instead of not showing any trace at all. Also, allow setting the stack trace mode at the command line. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 2865846..7846409 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -93,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; +static int call_trace = 1; ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) @@ -171,40 +172,47 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, return ebp; } -static asmlinkage void show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) +static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) { + int n = 0; int printed = 0; /* nr of entries already printed on current line */ while (unwind(info) == 0 && UNW_PC(info)) { + ++n; printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); if (arch_unw_user_mode(info)) break; } if (printed) printk("\n"); + return n; } static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl) { unsigned long ebp; - struct unwind_frame_info info; if (!task) task = current; - if (regs) { - if (unwind_init_frame_info(&info, task, regs) == 0) { - show_trace_unwind(&info, log_lvl); - return; + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; + + if (regs) { + if (unwind_init_frame_info(&info, task, regs) == 0) + unw_ret = show_trace_unwind(&info, log_lvl); + } else if (task == current) + unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); + else { + if (unwind_init_blocked(&info, task) == 0) + unw_ret = show_trace_unwind(&info, log_lvl); } - } else if (task == current) { - if (unwind_init_running(&info, show_trace_unwind, log_lvl) == 0) - return; - } else { - if (unwind_init_blocked(&info, task) == 0) { - show_trace_unwind(&info, log_lvl); - return; + if (unw_ret > 0) { + if (call_trace > 0) + return; + printk("%sLegacy call trace:\n", log_lvl); } } @@ -1245,3 +1253,15 @@ static int __init kstack_setup(char *s) return 1; } __setup("kstack=", kstack_setup); + +static int __init call_trace_setup(char *s) +{ + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) + call_trace = 0; + else if (strcmp(s, "new") == 0) + call_trace = 1; + return 1; +} +__setup("call_trace=", call_trace_setup); -- cgit v1.1 From b79c4df714ff190f7149d3d5a86278bdc25a90ec Mon Sep 17 00:00:00 2001 From: Carl-Daniel Hailfinger Date: Mon, 26 Jun 2006 13:57:53 +0200 Subject: [PATCH] i386/x86-64: Consolidate arch/{i386,x86_64}/boot/compressed/misc.c Clean up arch/{i386,x86_64}/boot/compressed/misc.c a bit to reduce their differences. Should have zero effect on code generation. Signed-off-by: Carl-Daniel Hailfinger Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/boot/compressed/misc.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index f19f3a7..b2ccd54 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -24,14 +24,6 @@ #undef memset #undef memcpy - -/* - * Why do we do this? Don't ask me.. - * - * Incomprehensible are the ways of bootloaders. - */ -static void* memset(void *, int, size_t); -static void* memcpy(void *, __const void *, size_t); #define memzero(s, n) memset ((s), 0, (n)) typedef unsigned char uch; @@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */ #endif #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) -extern char input_data[]; +extern unsigned char input_data[]; extern int input_len; static long bytes_out = 0; @@ -103,6 +95,9 @@ static unsigned long output_ptr = 0; static void *malloc(int size); static void free(void *where); +static void *memset(void *s, int c, unsigned n); +static void *memcpy(void *dest, const void *src, unsigned n); + static void putstr(const char *); extern int end; @@ -205,7 +200,7 @@ static void putstr(const char *s) outb_p(0xff & (pos >> 1), vidport+1); } -static void* memset(void* s, int c, size_t n) +static void* memset(void* s, int c, unsigned n) { int i; char *ss = (char*)s; @@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n) return s; } -static void* memcpy(void* __dest, __const void* __src, - size_t __n) +static void* memcpy(void* dest, const void* src, unsigned n) { int i; - char *d = (char *)__dest, *s = (char *)__src; + char *d = (char *)dest, *s = (char *)src; - for (i=0;i<__n;i++) d[i] = s[i]; - return __dest; + for (i=0;i RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); #endif - output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ + output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ free_mem_end_ptr = (long)real_mode; } @@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv) #ifdef STANDARD_MEMORY_BIOS_CALL if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); #else - if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < - (3*1024)) - error("Less than 4MB of memory"); + if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); #endif - mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START; + mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; low_buffer_size = low_buffer_end - LOW_BUFFER_START; -- cgit v1.1 From bf660254e0d430e5d991e432d5dad8d6714828f6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:58:41 +0200 Subject: [PATCH] i386/x86-64: Fix isoimage when syslinux is in /usr/share It's like this on SUSE systems. Cc: hpa@zytor.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/boot/Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile index 33e5547..e979466 100644 --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile @@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf isoimage: $(BOOTIMAGE) -rm -rf $(obj)/isoimage mkdir $(obj)/isoimage - cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ - $(obj)/isoimage + for i in lib lib64 share end ; do \ + if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ + cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ + break ; \ + fi ; \ + if [ $$i = end ] ; then exit 1 ; fi ; \ + done cp $(BOOTIMAGE) $(obj)/isoimage/linux echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg if [ -f '$(FDINITRD)' ] ; then \ -- cgit v1.1 From c38bfdc85aae0c6d1458269c0e063c2f4a116711 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:58:44 +0200 Subject: [PATCH] x86_64: Move VM86 config into arch/i386/Kconfig Architecture specific configs like this have no business at all in init/Kconfig. This prevents it from being set on x86-64 Pointed out by H.Peter Anvin Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 2206bf6..0f8da3b 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -324,6 +324,15 @@ config X86_MCE_P4THERMAL Enabling this feature will cause a message to be printed when the P4 enters thermal throttling. +config VM86 + default y + bool "Enable VM86 support" if EMBEDDED + help + This option is required by programs like DOSEMU to run 16-bit legacy + code on X86 processors. It also may be needed by software like + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. + config TOSHIBA tristate "Toshiba Laptop support" ---help--- -- cgit v1.1 From 495ab9c045e1b0e5c82951b762257fe1c9d81564 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:59:11 +0200 Subject: [PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status During some profiling I noticed that default_idle causes a lot of memory traffic. I think that is caused by the atomic operations to clear/set the polling flag in thread_info. There is actually no reason to make this atomic - only the idle thread does it to itself, other CPUs only read it. So I moved it into ti->status. Converted i386/x86-64/ia64 for now because that was the easiest way to fix ACPI which also manipulates these flags in its idle function. Cc: Nick Piggin Cc: Tony Luck Cc: Len Brown Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 6 +++--- arch/i386/kernel/process.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 9e819eb..7c5729d 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -764,9 +764,9 @@ static int apm_do_idle(void) int idled = 0; int polling; - polling = test_thread_flag(TIF_POLLING_NRFLAG); + polling = !!(current_thread_info()->status & TS_POLLING); if (polling) { - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); } if (!need_resched()) { @@ -774,7 +774,7 @@ static int apm_do_idle(void) ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); } if (polling) - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; if (!idled) return 0; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 525432e..6946b06 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -102,7 +102,7 @@ void default_idle(void) local_irq_enable(); if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - clear_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status &= ~TS_POLLING; smp_mb__after_clear_bit(); while (!need_resched()) { local_irq_disable(); @@ -111,7 +111,7 @@ void default_idle(void) else local_irq_enable(); } - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; } else { while (!need_resched()) cpu_relax(); @@ -174,7 +174,7 @@ void cpu_idle(void) { int cpu = smp_processor_id(); - set_thread_flag(TIF_POLLING_NRFLAG); + current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ while (1) { -- cgit v1.1 From bdbdaa791fb5b97ba5b3124c7593ffe308e2afef Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 26 Jun 2006 13:59:23 +0200 Subject: [PATCH] i386/x86-64: adjust /proc/interrupts column headings With (significantly) more than 10 CPUs online, the column headings drifted off the positions of the column contents with growing CPU numbers. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 49ce4c3..061533e 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); for_each_online_cpu(j) - seq_printf(p, "CPU%d ",j); + seq_printf(p, "CPU%-8d",j); seq_putc(p, '\n'); } -- cgit v1.1 From 45486f81c9aa07218b73a38cbcf62ffa66e99088 Mon Sep 17 00:00:00 2001 From: Keith Owens Date: Mon, 26 Jun 2006 13:59:41 +0200 Subject: [PATCH] x86_64: Standardize i386/x86_64 handling of NMI_VECTOR x86_64 and i386 behave inconsistently when sending an IPI on vector 2 (NMI_VECTOR). Make both behave the same, so IPI 2 is sent as NMI. The crash code was abusing send_IPI_allbutself() by passing a code instead of a vector, it only worked because crash knew about the internal code of send_IPI_allbutself(). Change crash to use NMI_VECTOR instead, and remove the comment about how crash was abusing the function. This patch is a pre-requisite for fixing the problem where sending an IPI as NMI would reboot some Dell Xeon systems. I cannot fix that problem while crash continus to abuse send_IPI_allbutself(). It also removes the inconsistency between i386 and x86_64 for NMI_VECTOR. That will simplify all the RAS code that needs to bring all the cpus to a clean stop, even when one or more cpus are spinning disabled. Signed-off-by: Keith Owens Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 7 +------ arch/i386/kernel/smp.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 21dc1bb..0c88d3e 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) return 1; } -/* - * By using the NMI code instead of a vector we just sneak thru the - * word generator coming out with just what we want. AND it does - * not matter if clustered_apic_mode is set or not. - */ static void smp_send_nmi_allbutself(void) { - send_IPI_allbutself(APIC_DM_NMI); + send_IPI_allbutself(NMI_VECTOR); } static void nmi_shootdown_cpus(void) diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index d134e96..c10789d 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m static inline int __prepare_ICR (unsigned int shortcut, int vector) { - return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; + unsigned int icr = shortcut | APIC_DEST_LOGICAL; + + switch (vector) { + default: + icr |= APIC_DM_FIXED | vector; + break; + case NMI_VECTOR: + icr |= APIC_DM_NMI; + break; + } + return icr; } static inline int __prepare_ICR2 (unsigned int mask) -- cgit v1.1 From 704fc59e1d056de80beaf30174bc8e0b1682efbb Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Mon, 26 Jun 2006 13:59:53 +0200 Subject: [PATCH] x86_64: fix apic error on bootup Appended patch fixes the "APIC error on CPUX: 00(40)" observed during bootup. From SDM Vol-3A "Valid Interrupt Vectors" section: "When an illegal vector value (0-15) is written to an LVT entry and the delivery mode is Fixed, the APIC may signal an illegal vector error, with out regard to whether the mask bit is set or whether an interrupt is actually seen on input." Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 5f197e1..7ce0949 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -157,7 +157,7 @@ void clear_local_APIC(void) maxlvt = get_maxlvt(); /* - * Masking an LVT entry on a P6 can trigger a local APIC error + * Masking an LVT entry can trigger a local APIC error * if the vector is zero. Mask LVTERR first to prevent this. */ if (maxlvt >= 3) { @@ -1118,7 +1118,18 @@ void disable_APIC_timer(void) unsigned long v; v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + /* + * When an illegal vector value (0-15) is written to an LVT + * entry and delivery mode is Fixed, the APIC may signal an + * illegal vector error, with out regard to whether the mask + * bit is set or whether an interrupt is actually seen on input. + * + * Boot sequence might call this function when the LVTT has + * '0' vector value. So make sure vector field is set to + * valid value. + */ + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write_around(APIC_LVTT, v); } } -- cgit v1.1 From 0080e667550db5ae8c9318181500c413b99ff164 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 26 Jun 2006 13:59:59 +0200 Subject: [PATCH] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs Intel now has support for Architectural Performance Monitoring Counters ( Refer to IA-32 Intel Architecture Software Developer's Manual http://www.intel.com/design/pentium4/manuals/253669.htm ). This feature is present starting from Intel Core Duo and Intel Core Solo processors. What this means is, the performance monitoring counters and some performance monitoring events are now defined in an architectural way (using cpuid). And there will be no need to check for family/model etc for these architectural events. Below is the patch to use this performance counters in nmi watchdog driver. Patch handles both i386 and x86-64 kernels. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel.c | 6 ++++ arch/i386/kernel/nmi.c | 65 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5386b29..10afc64 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) select_idle_routine(c); l2 = init_intel_cacheinfo(c); + if (c->cpuid_level > 9 ) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); + } /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index bd38754..a76e931 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -24,6 +24,7 @@ #include #include +#include #include "mach_traps.h" @@ -95,6 +96,9 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -207,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str) __setup("nmi_watchdog=", setup_nmi_watchdog); +static void disable_intel_arch_watchdog(void); + static void disable_lapic_nmi_watchdog(void) { if (nmi_active <= 0) @@ -216,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void) wrmsr(MSR_K7_EVNTSEL0, 0, 0); break; case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + disable_intel_arch_watchdog(); + break; + } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -444,6 +454,53 @@ static int setup_p4_watchdog(void) return 1; } +static void disable_intel_arch_watchdog(void) +{ + unsigned ebx; + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebp indicates event present. + */ + ebx = cpuid_ebx(10); + if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); +} + +static int setup_intel_arch_watchdog(void) +{ + unsigned int evntsel; + unsigned ebx; + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebp indicates event present. + */ + ebx = cpuid_ebx(10); + if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; + + clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); + clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); + write_watchdog_counter("INTEL_ARCH_PERFCTR0"); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); + return 1; +} + void setup_apic_nmi_watchdog (void) { switch (boot_cpu_data.x86_vendor) { @@ -453,6 +510,11 @@ void setup_apic_nmi_watchdog (void) setup_k7_watchdog(); break; case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + if (!setup_intel_arch_watchdog()) + return; + break; + } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -556,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); apic_write(APIC_LVTPC, APIC_DM_NMI); } - else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { + else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || + nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { /* Only P6 based Pentium M need to re-unmask * the apic vector but it doesn't hurt * other P6 variant */ -- cgit v1.1