aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-08 11:59:23 +0200
committerIngo Molnar <mingo@elte.hu>2008-07-08 11:59:23 +0200
commit2b4fa851b2f06fdb04cac808b57324f5e51e1578 (patch)
tree97db3ad5adda7683923630982f68b8b52c86e790 /arch/x86/kernel
parent3de352bbd86f890dd0c5e1c09a6a1b0b29e0f8ce (diff)
parent46f68e1c6b04a04772e828ff3bcd07ed708805c2 (diff)
downloadkernel_samsung_crespo-2b4fa851b2f06fdb04cac808b57324f5e51e1578.zip
kernel_samsung_crespo-2b4fa851b2f06fdb04cac808b57324f5e51e1578.tar.gz
kernel_samsung_crespo-2b4fa851b2f06fdb04cac808b57324f5e51e1578.tar.bz2
Merge branch 'x86/numa' into x86/devel
Conflicts: arch/x86/Kconfig arch/x86/kernel/e820.c arch/x86/kernel/efi_64.c arch/x86/kernel/mpparse.c arch/x86/kernel/setup.c arch/x86/kernel/setup_32.c arch/x86/mm/init_64.c include/asm-x86/proto.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic_32.c9
-rw-r--r--arch/x86/kernel/apic_64.c11
-rw-r--r--arch/x86/kernel/head64.c22
-rw-r--r--arch/x86/kernel/nmi_64.c4
-rw-r--r--arch/x86/kernel/setup.c297
-rw-r--r--arch/x86/kernel/setup64.c8
-rw-r--r--arch/x86/kernel/setup_32.c24
-rw-r--r--arch/x86/kernel/setup_64.c9
-rw-r--r--arch/x86/kernel/smpboot.c81
9 files changed, 352 insertions, 113 deletions
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 570c362..84ce106 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -52,9 +52,6 @@
unsigned long mp_lapic_addr;
-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
/*
* Knob to control our willingness to enable the local APIC.
*
@@ -1546,9 +1543,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
#ifdef CONFIG_SMP
/* are we being called early in kernel startup? */
- if (x86_cpu_to_apicid_early_ptr) {
- u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
- u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+ if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
+ u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+ u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
cpu_to_apicid[cpu] = apicid;
bios_cpu_apicid[cpu] = apicid;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index d7406aa..e494809 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -90,9 +90,6 @@ static unsigned long apic_phys;
unsigned long mp_lapic_addr;
-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
unsigned int __cpuinitdata maxcpus = NR_CPUS;
/*
* Get the LAPIC version
@@ -1075,9 +1072,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
max_physical_apicid = apicid;
/* are we being called early in kernel startup? */
- if (x86_cpu_to_apicid_early_ptr) {
- u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
- u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+ if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
+ u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+ u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
cpu_to_apicid[cpu] = apicid;
bios_cpu_apicid[cpu] = apicid;
@@ -1253,7 +1250,7 @@ __cpuinit int apic_is_clustered_box(void)
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
return 0;
- bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+ bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
for (i = 0; i < NR_CPUS; i++) {
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5fbed45..c970929 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,6 +25,20 @@
#include <asm/e820.h>
#include <asm/bios_ebda.h>
+/* boot cpu pda */
+static struct x8664_pda _boot_cpu_pda __read_mostly;
+
+#ifdef CONFIG_SMP
+/*
+ * We install an empty cpu_pda pointer table to indicate to early users
+ * (numa_set_node) that the cpu_pda pointer table for cpus other than
+ * the boot cpu is not yet setup.
+ */
+static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
+#else
+static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
+#endif
+
static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
@@ -88,10 +102,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
early_printk("Kernel alive\n");
- for (i = 0; i < NR_CPUS; i++)
- cpu_pda(i) = &boot_cpu_pda[i];
-
+ _cpu_pda = __cpu_pda;
+ cpu_pda(0) = &_boot_cpu_pda;
pda_init(0);
+
+ early_printk("Kernel really alive\n");
+
copy_bootdata(__va(real_mode_data));
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 0060e44..d62f3b6 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -90,7 +90,7 @@ int __init check_nmi_watchdog(void)
if (!atomic_read(&nmi_active))
return 0;
- prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+ prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
if (!prev_nmi_count)
goto error;
@@ -101,7 +101,7 @@ int __init check_nmi_watchdog(void)
smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
#endif
- for (cpu = 0; cpu < NR_CPUS; cpu++)
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
local_irq_enable();
mdelay((20*1000)/nmi_hz); // wait 20 ticks
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 5b0de38..ebb0a2b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -20,13 +20,34 @@ unsigned int boot_cpu_physical_apicid = -1U;
unsigned int max_physical_apicid;
EXPORT_SYMBOL(boot_cpu_physical_apicid);
-DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
#endif
+/* map cpu index to physical APIC ID */
+DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#define X86_64_NUMA 1
+
+/* map cpu index to node index */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+/* which logical CPUs are on which nodes */
+cpumask_t *node_to_cpumask_map;
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+/* setup node_to_cpumask_map */
+static void __init setup_node_to_cpumask_map(void);
+
+#else
+static inline void setup_node_to_cpumask_map(void) { }
+#endif
+
#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
/*
* Copy data used in early init routines from the initial arrays to the
@@ -38,20 +59,21 @@ static void __init setup_per_cpu_maps(void)
int cpu;
for_each_possible_cpu(cpu) {
- per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
+ per_cpu(x86_cpu_to_apicid, cpu) =
+ early_per_cpu_map(x86_cpu_to_apicid, cpu);
per_cpu(x86_bios_cpu_apicid, cpu) =
- x86_bios_cpu_apicid_init[cpu];
-#ifdef CONFIG_NUMA
+ early_per_cpu_map(x86_bios_cpu_apicid, cpu);
+#ifdef X86_64_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
- x86_cpu_to_node_map_init[cpu];
+ early_per_cpu_map(x86_cpu_to_node_map, cpu);
#endif
}
/* indicate the early static arrays will soon be gone */
- x86_cpu_to_apicid_early_ptr = NULL;
- x86_bios_cpu_apicid_early_ptr = NULL;
-#ifdef CONFIG_NUMA
- x86_cpu_to_node_map_early_ptr = NULL;
+ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
+ early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
+#ifdef X86_64_NUMA
+ early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
}
@@ -80,6 +102,50 @@ static inline void setup_cpumask_of_cpu(void) { }
*/
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
+static inline void setup_cpu_pda_map(void) { }
+
+#elif !defined(CONFIG_SMP)
+static inline void setup_cpu_pda_map(void) { }
+
+#else /* CONFIG_SMP && CONFIG_X86_64 */
+
+/*
+ * Allocate cpu_pda pointer table and array via alloc_bootmem.
+ */
+static void __init setup_cpu_pda_map(void)
+{
+ char *pda;
+ struct x8664_pda **new_cpu_pda;
+ unsigned long size;
+ int cpu;
+
+ size = roundup(sizeof(struct x8664_pda), cache_line_size());
+
+ /* allocate cpu_pda array and pointer table */
+ {
+ unsigned long tsize = nr_cpu_ids * sizeof(void *);
+ unsigned long asize = size * (nr_cpu_ids - 1);
+
+ tsize = roundup(tsize, cache_line_size());
+ new_cpu_pda = alloc_bootmem(tsize + asize);
+ pda = (char *)new_cpu_pda + tsize;
+ }
+
+ /* initialize pointer table to static pda's */
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0) {
+ /* leave boot cpu pda in place */
+ new_cpu_pda[0] = cpu_pda(0);
+ continue;
+ }
+ new_cpu_pda[cpu] = (struct x8664_pda *)pda;
+ new_cpu_pda[cpu]->in_bootmem = 1;
+ pda += size;
+ }
+
+ /* point to new pointer table */
+ _cpu_pda = new_cpu_pda;
+}
#endif
/*
@@ -89,50 +155,52 @@ EXPORT_SYMBOL(__per_cpu_offset);
*/
void __init setup_per_cpu_areas(void)
{
- int i, highest_cpu = 0;
- unsigned long size;
+ ssize_t size = PERCPU_ENOUGH_ROOM;
+ char *ptr;
+ int cpu;
#ifdef CONFIG_HOTPLUG_CPU
prefill_possible_map();
+#else
+ nr_cpu_ids = num_processors;
#endif
+ /* Setup cpu_pda map */
+ setup_cpu_pda_map();
+
/* Copy section for each CPU (we discard the original) */
size = PERCPU_ENOUGH_ROOM;
- printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
+ printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
size);
- for_each_possible_cpu(i) {
- char *ptr;
+ for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
ptr = alloc_bootmem_pages(size);
#else
- int node = early_cpu_to_node(i);
+ int node = early_cpu_to_node(cpu);
if (!node_online(node) || !NODE_DATA(node)) {
ptr = alloc_bootmem_pages(size);
printk(KERN_INFO
- "cpu %d has no node or node-local memory\n", i);
+ "cpu %d has no node %d or node-local memory\n",
+ cpu, node);
}
else
ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
- if (!ptr)
- panic("Cannot allocate cpu data for CPU %d\n", i);
-#ifdef CONFIG_X86_64
- cpu_pda(i)->data_offset = ptr - __per_cpu_start;
-#else
- __per_cpu_offset[i] = ptr - __per_cpu_start;
-#endif
+ per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
- highest_cpu = i;
}
- nr_cpu_ids = highest_cpu + 1;
- printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
+ printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
+ NR_CPUS, nr_cpu_ids, nr_node_ids);
/* Setup percpu data maps */
setup_per_cpu_maps();
+ /* Setup node to cpumask map */
+ setup_node_to_cpumask_map();
+
/* Setup cpumask_of_cpu map */
setup_cpumask_of_cpu();
}
@@ -163,3 +231,176 @@ void __init parse_setup_data(void)
early_iounmap(data, PAGE_SIZE);
}
}
+
+#ifdef X86_64_NUMA
+
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: node_to_cpumask() is not valid until after this is done.
+ */
+static void __init setup_node_to_cpumask_map(void)
+{
+ unsigned int node, num = 0;
+ cpumask_t *map;
+
+ /* setup nr_node_ids if not done yet */
+ if (nr_node_ids == MAX_NUMNODES) {
+ for_each_node_mask(node, node_possible_map)
+ num = node;
+ nr_node_ids = num + 1;
+ }
+
+ /* allocate the map */
+ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
+
+ Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
+ map, nr_node_ids);
+
+ /* node_to_cpumask() will now work */
+ node_to_cpumask_map = map;
+}
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+ if (cpu_pda(cpu) && node != NUMA_NO_NODE)
+ cpu_pda(cpu)->nodenumber = node;
+
+ if (cpu_to_node_map)
+ cpu_to_node_map[cpu] = node;
+
+ else if (per_cpu_offset(cpu))
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+ else
+ Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+ numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
+}
+
+#else /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+/*
+ * --------- debug versions of the numa functions ---------
+ */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ int node = cpu_to_node(cpu);
+ cpumask_t *mask;
+ char buf[64];
+
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_ERR "node_to_cpumask_map NULL\n");
+ dump_stack();
+ return;
+ }
+
+ mask = &node_to_cpumask_map[node];
+ if (enable)
+ cpu_set(cpu, *mask);
+ else
+ cpu_clear(cpu, *mask);
+
+ cpulist_scnprintf(buf, sizeof(buf), *mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
+ }
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+
+int cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+ printk(KERN_WARNING
+ "cpu_to_node(%d): usage too early!\n", cpu);
+ dump_stack();
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+ if (!per_cpu_offset(cpu)) {
+ printk(KERN_WARNING
+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+ dump_stack();
+ return NUMA_NO_NODE;
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+cpumask_t *_node_to_cpumask_ptr(int node)
+{
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_WARNING
+ "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
+ node);
+ dump_stack();
+ return &cpu_online_map;
+ }
+ BUG_ON(node >= nr_node_ids);
+ return &node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(_node_to_cpumask_ptr);
+
+/*
+ * Returns a bitmask of CPUs on Node 'node'.
+ */
+cpumask_t node_to_cpumask(int node)
+{
+ if (node_to_cpumask_map == NULL) {
+ printk(KERN_WARNING
+ "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
+ dump_stack();
+ return cpu_online_map;
+ }
+ BUG_ON(node >= nr_node_ids);
+ return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(node_to_cpumask);
+
+/*
+ * --------- end of debug versions of the numa functions ---------
+ */
+
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+#endif /* X86_64_NUMA */
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index aee0e82..631ea6c 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -12,6 +12,7 @@
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/kgdb.h>
+#include <linux/topology.h>
#include <asm/pda.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -34,9 +35,8 @@ struct boot_params boot_params;
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
+struct x8664_pda **_cpu_pda __read_mostly;
EXPORT_SYMBOL(_cpu_pda);
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -114,8 +114,10 @@ void pda_init(int cpu)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
if (!pda->irqstackptr)
panic("cannot allocate irqstack for cpu %d", cpu);
- }
+ if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
+ pda->nodenumber = cpu_to_node(cpu);
+ }
pda->irqstackptr += IRQSTACKSIZE-64;
}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 7e06ecd..a9b19ad 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -659,18 +659,6 @@ static void set_mca_bus(int x)
static void set_mca_bus(int x) { }
#endif
-#ifdef CONFIG_NUMA
-/*
- * In the golden day, when everything among i386 and x86_64 will be
- * integrated, this will not live here
- */
-void *x86_cpu_to_node_map_early_ptr;
-int x86_cpu_to_node_map_init[NR_CPUS] = {
- [0 ... NR_CPUS-1] = NUMA_NO_NODE
-};
-DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
-#endif
-
static void probe_roms(void);
/*
@@ -866,18 +854,6 @@ void __init setup_arch(char **cmdline_p)
paravirt_post_allocator_init();
-#ifdef CONFIG_X86_SMP
- /*
- * setup to use the early static init tables during kernel startup
- * X86_SMP will exclude sub-arches that don't deal well with it.
- */
- x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
- x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
-#ifdef CONFIG_NUMA
- x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
-#endif
-#endif
-
#ifdef CONFIG_X86_GENERICARCH
generic_apic_probe();
#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 9a87113..16ef53a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -376,15 +376,6 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
#endif
-#ifdef CONFIG_SMP
- /* setup to use the early static init tables during kernel startup */
- x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
- x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
-#ifdef CONFIG_NUMA
- x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
-#endif
-#endif
-
#ifdef CONFIG_ACPI
/*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6be701f..ae0a7a2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -67,22 +67,6 @@
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
-/*
- * FIXME: For x86_64, those are defined in other files. But moving them here,
- * would make the setup areas dependent on smp, which is a loss. When we
- * integrate apic between arches, we can probably do a better job, but
- * right now, they'll stay here -- glommer
- */
-
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
- { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_early_ptr;
-
-u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
- = { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_bios_cpu_apicid_early_ptr;
-
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
static int low_mappings;
@@ -814,6 +798,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
complete(&c_idle->done);
}
+#ifdef CONFIG_X86_64
+/*
+ * Allocate node local memory for the AP pda.
+ *
+ * Must be called after the _cpu_pda pointer table is initialized.
+ */
+static int __cpuinit get_local_pda(int cpu)
+{
+ struct x8664_pda *oldpda, *newpda;
+ unsigned long size = sizeof(struct x8664_pda);
+ int node = cpu_to_node(cpu);
+
+ if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
+ return 0;
+
+ oldpda = cpu_pda(cpu);
+ newpda = kmalloc_node(size, GFP_ATOMIC, node);
+ if (!newpda) {
+ printk(KERN_ERR "Could not allocate node local PDA "
+ "for CPU %d on node %d\n", cpu, node);
+
+ if (oldpda)
+ return 0; /* have a usable pda */
+ else
+ return -1;
+ }
+
+ if (oldpda) {
+ memcpy(newpda, oldpda, size);
+ if (!after_bootmem)
+ free_bootmem((unsigned long)oldpda, size);
+ }
+
+ newpda->in_bootmem = 0;
+ cpu_pda(cpu) = newpda;
+ return 0;
+}
+#endif /* CONFIG_X86_64 */
+
static int __cpuinit do_boot_cpu(int apicid, int cpu)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -839,19 +862,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
}
/* Allocate node local memory for AP pdas */
- if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
- struct x8664_pda *newpda, *pda;
- int node = cpu_to_node(cpu);
- pda = cpu_pda(cpu);
- newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
- node);
- if (newpda) {
- memcpy(newpda, pda, sizeof(struct x8664_pda));
- cpu_pda(cpu) = newpda;
- } else
- printk(KERN_ERR
- "Could not allocate node local PDA for CPU %d on node %d\n",
- cpu, node);
+ if (cpu > 0) {
+ boot_error = get_local_pda(cpu);
+ if (boot_error)
+ goto restore_state;
+ /* if can't get pda memory, can't start cpu */
}
#endif
@@ -970,11 +985,13 @@ do_rest:
}
}
+restore_state:
+
if (boot_error) {
/* Try to put things back the way they were before ... */
unmap_cpu_to_logical_apicid(cpu);
#ifdef CONFIG_X86_64
- clear_node_cpumask(cpu); /* was set by numa_add_cpu */
+ numa_remove_cpu(cpu); /* was set by numa_add_cpu */
#endif
cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
@@ -1347,6 +1364,8 @@ __init void prefill_possible_map(void)
for (i = 0; i < possible; i++)
cpu_set(i, cpu_possible_map);
+
+ nr_cpu_ids = possible;
}
static void __ref remove_cpu_from_maps(int cpu)
@@ -1357,7 +1376,7 @@ static void __ref remove_cpu_from_maps(int cpu)
cpu_clear(cpu, cpu_callin_map);
/* was set by cpu_init() */
clear_bit(cpu, (unsigned long *)&cpu_initialized);
- clear_node_cpumask(cpu);
+ numa_remove_cpu(cpu);
#endif
}