From 6400c8a382ded237df5dc1605f0d06e7939ff4da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Nov 2011 10:55:35 -0800 Subject: percpu: fix chunk range calculation commit a855b84c3d8c73220d4d3cd392a7bee7c83de70e upstream. Percpu allocator recorded the cpus which map to the first and last units in pcpu_first/last_unit_cpu respectively and used them to determine the address range of a chunk - e.g. it assumed that the first unit has the lowest address in a chunk while the last unit has the highest address. This simply isn't true. Groups in a chunk can have arbitrary positive or negative offsets from the previous one and there is no guarantee that the first unit occupies the lowest offset while the last one the highest. Fix it by actually comparing unit offsets to determine cpus occupying the lowest and highest offsets. Also, rename pcu_first/last_unit_cpu to pcpu_low/high_unit_cpu to avoid confusion. The chunk address range is used to flush cache on vmalloc area map/unmap and decide whether a given address is in the first chunk by per_cpu_ptr_to_phys() and the bug was discovered by invalid per_cpu_ptr_to_phys() translation for crash_note. Kudos to Dave Young for tracking down the problem. Signed-off-by: Tejun Heo Reported-by: WANG Cong Reported-by: Dave Young Tested-by: Dave Young LKML-Reference: <4EC21F67.10905@redhat.com> Signed-off-by: Thomas Renninger Signed-off-by: Greg Kroah-Hartman --- mm/percpu.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'mm/percpu.c') diff --git a/mm/percpu.c b/mm/percpu.c index bf80e55..93b5a7c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly; static int pcpu_nr_slots __read_mostly; static size_t pcpu_chunk_struct_size __read_mostly; -/* cpus with the lowest and highest unit numbers */ -static unsigned int pcpu_first_unit_cpu __read_mostly; -static unsigned int pcpu_last_unit_cpu __read_mostly; +/* cpus with the lowest and highest unit addresses */ +static unsigned int pcpu_low_unit_cpu __read_mostly; +static unsigned int pcpu_high_unit_cpu __read_mostly; /* the address of the first chunk which starts with the kernel static area */ void *pcpu_base_addr __read_mostly; @@ -984,19 +984,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) { void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); bool in_first_chunk = false; - unsigned long first_start, first_end; + unsigned long first_low, first_high; unsigned int cpu; /* - * The following test on first_start/end isn't strictly + * The following test on unit_low/high isn't strictly * necessary but will speed up lookups of addresses which * aren't in the first chunk. */ - first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0); - first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu, - pcpu_unit_pages); - if ((unsigned long)addr >= first_start && - (unsigned long)addr < first_end) { + first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); + first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, + pcpu_unit_pages); + if ((unsigned long)addr >= first_low && + (unsigned long)addr < first_high) { for_each_possible_cpu(cpu) { void *start = per_cpu_ptr(base, cpu); @@ -1233,7 +1233,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, for (cpu = 0; cpu < nr_cpu_ids; cpu++) unit_map[cpu] = UINT_MAX; - pcpu_first_unit_cpu = NR_CPUS; + + pcpu_low_unit_cpu = NR_CPUS; + pcpu_high_unit_cpu = NR_CPUS; for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { const struct pcpu_group_info *gi = &ai->groups[group]; @@ -1253,9 +1255,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, unit_map[cpu] = unit + i; unit_off[cpu] = gi->base_offset + i * ai->unit_size; - if (pcpu_first_unit_cpu == NR_CPUS) - pcpu_first_unit_cpu = cpu; - pcpu_last_unit_cpu = cpu; + /* determine low/high unit_cpu */ + if (pcpu_low_unit_cpu == NR_CPUS || + unit_off[cpu] < unit_off[pcpu_low_unit_cpu]) + pcpu_low_unit_cpu = cpu; + if (pcpu_high_unit_cpu == NR_CPUS || + unit_off[cpu] > unit_off[pcpu_high_unit_cpu]) + pcpu_high_unit_cpu = cpu; } } pcpu_nr_units = unit; -- cgit v1.1 From d051424f29eac6b4c173365a3ba5b9bb76870ce6 Mon Sep 17 00:00:00 2001 From: Eugene Surovegin Date: Thu, 15 Dec 2011 11:25:59 -0800 Subject: percpu: fix per_cpu_ptr_to_phys() handling of non-page-aligned addresses commit 9f57bd4d6dc69a4e3bf43044fa00fcd24dd363e3 upstream. per_cpu_ptr_to_phys() incorrectly rounds up its result for non-kmalloc case to the page boundary, which is bogus for any non-page-aligned address. This affects the only in-tree user of this function - sysfs handler for per-cpu 'crash_notes' physical address. The trouble is that the crash_notes per-cpu variable is not page-aligned: crash_notes = 0xc08e8ed4 PER-CPU OFFSET VALUES: CPU 0: 3711f000 CPU 1: 37129000 CPU 2: 37133000 CPU 3: 3713d000 So, the per-cpu addresses are: crash_notes on CPU 0: f7a07ed4 => phys 36b57ed4 crash_notes on CPU 1: f7a11ed4 => phys 36b4ded4 crash_notes on CPU 2: f7a1bed4 => phys 36b43ed4 crash_notes on CPU 3: f7a25ed4 => phys 36b39ed4 However, /sys/devices/system/cpu/cpu*/crash_notes says: /sys/devices/system/cpu/cpu0/crash_notes: 36b57000 /sys/devices/system/cpu/cpu1/crash_notes: 36b4d000 /sys/devices/system/cpu/cpu2/crash_notes: 36b43000 /sys/devices/system/cpu/cpu3/crash_notes: 36b39000 As you can see, all values are rounded down to a page boundary. Consequently, this is where kexec sets up the NOTE segments, and thus where the secondary kernel is looking for them. However, when the first kernel crashes, it saves the notes to the unaligned addresses, where they are not found. Fix it by adding offset_in_page() to the translated page address. -tj: Combined Eugene's and Petr's commit messages. Signed-off-by: Eugene Surovegin Signed-off-by: Tejun Heo Reported-by: Petr Tesarik Signed-off-by: Greg Kroah-Hartman --- mm/percpu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'mm/percpu.c') diff --git a/mm/percpu.c b/mm/percpu.c index 93b5a7c..0ae7a09 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1011,9 +1011,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) if (!is_vmalloc_addr(addr)) return __pa(addr); else - return page_to_phys(vmalloc_to_page(addr)); + return page_to_phys(vmalloc_to_page(addr)) + + offset_in_page(addr); } else - return page_to_phys(pcpu_addr_to_page(addr)); + return page_to_phys(pcpu_addr_to_page(addr)) + + offset_in_page(addr); } /** -- cgit v1.1