aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorJack F Vogel <jfv@bluesong.net>2005-05-01 08:58:48 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-01 08:58:48 -0700
commit67701ae9767534534d3710664037dfde2cc04935 (patch)
tree6adb8d33585f8eee20794827c79e40991aeeaee5 /arch/x86_64
parentfd51f666fa591294bd7462447512666e61c56ea0 (diff)
downloadkernel_samsung_smdk4412-67701ae9767534534d3710664037dfde2cc04935.zip
kernel_samsung_smdk4412-67701ae9767534534d3710664037dfde2cc04935.tar.gz
kernel_samsung_smdk4412-67701ae9767534534d3710664037dfde2cc04935.tar.bz2
[PATCH] check nmi watchdog is broken
A bug against an xSeries system showed up recently noting that the check_nmi_watchdog() test was failing. I have been investigating it and discovered in both i386 and x86_64 the recent change to the routine to use the cpu_callin_map has uncovered a problem. Prior to that change, on an SMP box, the test was trivally passing because all cpu's were found to not yet be online, but now with the callin_map they are discovered, it goes on to test the counter and they have not yet begun to increment, so it announces a CPU is stuck and bails out. On all the systems I have access to test, the announcement of failure is also bougs... by the time you can login and check /proc/interrupts, the NMI count is happily incrementing on all CPUs. Its just that the test is being done too early. I have tried moving the call to the test around a bit, and it was always too early. I finally hit on this proposed solution, it delays the routine via a late_initcall(), seems like the right solution to me. Signed-off-by: Adrian Bunk <bunk@stusta.de> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/kernel/io_apic.c2
-rw-r--r--arch/x86_64/kernel/nmi.c9
2 files changed, 7 insertions, 4 deletions
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 29a2572..60be586 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1607,7 +1607,6 @@ static inline void check_timer(void)
disable_8259A_irq(0);
setup_nmi();
enable_8259A_irq(0);
- check_nmi_watchdog();
}
return;
}
@@ -1627,7 +1626,6 @@ static inline void check_timer(void)
nmi_watchdog_default();
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
- check_nmi_watchdog();
}
return;
}
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index e00d4ad..61de0b3 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -112,17 +112,20 @@ static __init int cpu_has_lapic(void)
}
}
-int __init check_nmi_watchdog (void)
+static int __init check_nmi_watchdog (void)
{
int counts[NR_CPUS];
int cpu;
+ if (nmi_watchdog == NMI_NONE)
+ return 0;
+
if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) {
nmi_watchdog = NMI_NONE;
return -1;
}
- printk(KERN_INFO "testing NMI watchdog ... ");
+ printk(KERN_INFO "Testing NMI watchdog ... ");
for (cpu = 0; cpu < NR_CPUS; cpu++)
counts[cpu] = cpu_pda[cpu].__nmi_count;
@@ -148,6 +151,8 @@ int __init check_nmi_watchdog (void)
return 0;
}
+/* Have this called later during boot so counters are updating */
+late_initcall(check_nmi_watchdog);
int __init setup_nmi_watchdog(char *str)
{