From dd17c8f72993f9461e9c19250e3f155d6d99df22 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: remove per_cpu__ prefix. Now that the return from alloc_percpu is compatible with the address of per-cpu vars, it makes sense to hand around the address of per-cpu variables. To make this sane, we remove the per_cpu__ prefix we used created to stop people accidentally using these vars directly. Now we have sparse, we can use that (next patch). tj: * Updated to convert stuff which were missed by or added after the original patch. * Kill per_cpu_var() macro. Signed-off-by: Rusty Russell Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- arch/blackfin/mach-common/entry.S | 4 ++-- arch/cris/arch-v10/kernel/entry.S | 2 +- arch/cris/arch-v32/mm/mmu.S | 2 +- arch/ia64/include/asm/percpu.h | 4 ++-- arch/ia64/kernel/ia64_ksyms.c | 4 ++-- arch/ia64/mm/discontig.c | 2 +- arch/microblaze/include/asm/entry.h | 2 +- arch/parisc/lib/fixup.S | 8 +++---- arch/powerpc/platforms/pseries/hvCall.S | 2 +- arch/sparc/kernel/nmi.c | 6 +++--- arch/sparc/kernel/rtrap_64.S | 8 +++---- arch/x86/include/asm/percpu.h | 37 +++++++++++++++------------------ arch/x86/include/asm/system.h | 8 +++---- arch/x86/kernel/apic/nmi.c | 6 +++--- arch/x86/kernel/head_32.S | 6 +++--- arch/x86/kernel/vmlinux.lds.S | 4 ++-- arch/x86/xen/xen-asm_32.S | 4 ++-- include/asm-generic/percpu.h | 12 +++++------ include/linux/percpu-defs.h | 18 ++++++---------- include/linux/percpu.h | 5 ++--- include/linux/vmstat.h | 8 +++---- kernel/rcutorture.c | 8 +++---- kernel/trace/trace.c | 6 +++--- kernel/trace/trace_functions_graph.c | 4 ++-- 24 files changed, 80 insertions(+), 90 deletions(-) diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 1e7cac2..a3ea7e9 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -835,8 +835,8 @@ ENDPROC(_resume) ENTRY(_ret_from_exception) #ifdef CONFIG_IPIPE - p2.l = _per_cpu__ipipe_percpu_domain; - p2.h = _per_cpu__ipipe_percpu_domain; + p2.l = _ipipe_percpu_domain; + p2.h = _ipipe_percpu_domain; r0.l = _ipipe_root; r0.h = _ipipe_root; r2 = [p2]; diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index 2c18d08..c52bef3 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -358,7 +358,7 @@ mmu_bus_fault: 1: btstq 12, $r1 ; Refill? bpl 2f lsrq 24, $r1 ; Get PGD index (bit 24-31) - move.d [per_cpu__current_pgd], $r0 ; PGD for the current process + move.d [current_pgd], $r0 ; PGD for the current process move.d [$r0+$r1.d], $r0 ; Get PMD beq 2f nop diff --git a/arch/cris/arch-v32/mm/mmu.S b/arch/cris/arch-v32/mm/mmu.S index 2238d15..f125d91 100644 --- a/arch/cris/arch-v32/mm/mmu.S +++ b/arch/cris/arch-v32/mm/mmu.S @@ -115,7 +115,7 @@ #ifdef CONFIG_SMP move $s7, $acr ; PGD #else - move.d per_cpu__current_pgd, $acr ; PGD + move.d current_pgd, $acr ; PGD #endif ; Look up PMD in PGD lsrq 24, $r0 ; Get PMD index into PGD (bit 24-31) diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 30cf465..f7c00a5 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -9,7 +9,7 @@ #define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE #ifdef __ASSEMBLY__ -# define THIS_CPU(var) (per_cpu__##var) /* use this to mark accesses to per-CPU variables... */ +# define THIS_CPU(var) (var) /* use this to mark accesses to per-CPU variables... */ #else /* !__ASSEMBLY__ */ @@ -39,7 +39,7 @@ extern void *per_cpu_init(void); * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly * more efficient. */ -#define __ia64_per_cpu_var(var) per_cpu__##var +#define __ia64_per_cpu_var(var) var #include diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 461b999..7f4a0ed 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -30,9 +30,9 @@ EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic #endif #include -EXPORT_SYMBOL(per_cpu__ia64_cpu_info); +EXPORT_SYMBOL(ia64_cpu_info); #ifdef CONFIG_SMP -EXPORT_SYMBOL(per_cpu__local_per_cpu_offset); +EXPORT_SYMBOL(local_per_cpu_offset); #endif #include diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 19c4b21..8d586d1 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -459,7 +459,7 @@ static void __init initialize_pernode_data(void) cpu = 0; node = node_cpuid[cpu].nid; cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + - ((char *)&per_cpu__ia64_cpu_info - __per_cpu_start)); + ((char *)&ia64_cpu_info - __per_cpu_start)); cpu0_cpu_info->node_data = mem_data[node].node_data; } #endif /* CONFIG_SMP */ diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h index 61abbd2..ec89f2a 100644 --- a/arch/microblaze/include/asm/entry.h +++ b/arch/microblaze/include/asm/entry.h @@ -21,7 +21,7 @@ * places */ -#define PER_CPU(var) per_cpu__##var +#define PER_CPU(var) var # ifndef __ASSEMBLY__ DECLARE_PER_CPU(unsigned int, KSP); /* Saved kernel stack pointer */ diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S index d172d42..f8c45cc 100644 --- a/arch/parisc/lib/fixup.S +++ b/arch/parisc/lib/fixup.S @@ -36,8 +36,8 @@ #endif /* t2 = &__per_cpu_offset[smp_processor_id()]; */ LDREGX \t2(\t1),\t2 - addil LT%per_cpu__exception_data,%r27 - LDREG RT%per_cpu__exception_data(%r1),\t1 + addil LT%exception_data,%r27 + LDREG RT%exception_data(%r1),\t1 /* t1 = &__get_cpu_var(exception_data) */ add,l \t1,\t2,\t1 /* t1 = t1->fault_ip */ @@ -46,8 +46,8 @@ #else .macro get_fault_ip t1 t2 /* t1 = &__get_cpu_var(exception_data) */ - addil LT%per_cpu__exception_data,%r27 - LDREG RT%per_cpu__exception_data(%r1),\t2 + addil LT%exception_data,%r27 + LDREG RT%exception_data(%r1),\t2 /* t1 = t2->fault_ip */ LDREG EXCDATA_IP(\t2), \t1 .endm diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3..580f789 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -55,7 +55,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ /* calculate address of stat structure r4 = opcode */ \ srdi r4,r4,2; /* index into array */ \ mulli r4,r4,HCALL_STAT_SIZE; \ - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ + LOAD_REG_ADDR(r7, hcall_stats); \ add r4,r4,r7; \ ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ add r4,r4,r7; \ diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index f30f4a1..2ad288f 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -112,13 +112,13 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) touched = 1; } if (!touched && __get_cpu_var(last_irq_sum) == sum) { - __this_cpu_inc(per_cpu_var(alert_counter)); - if (__this_cpu_read(per_cpu_var(alert_counter)) == 30 * nmi_hz) + __this_cpu_inc(alert_counter); + if (__this_cpu_read(alert_counter) == 30 * nmi_hz) die_nmi("BUG: NMI Watchdog detected LOCKUP", regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(per_cpu_var(alert_counter), 0); + __this_cpu_write(alert_counter, 0); } if (__get_cpu_var(wd_enabled)) { write_pic(picl_value(nmi_hz)); diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index fd3cee4..1ddec40 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -149,11 +149,11 @@ rtrap_nmi: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 rtrap_irq: rtrap: #ifndef CONFIG_SMP - sethi %hi(per_cpu____cpu_data), %l0 - lduw [%l0 + %lo(per_cpu____cpu_data)], %l1 + sethi %hi(__cpu_data), %l0 + lduw [%l0 + %lo(__cpu_data)], %l1 #else - sethi %hi(per_cpu____cpu_data), %l0 - or %l0, %lo(per_cpu____cpu_data), %l0 + sethi %hi(__cpu_data), %l0 + or %l0, %lo(__cpu_data), %l0 lduw [%l0 + %g5], %l1 #endif cmp %l1, 0 diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0c44196..4c170cc 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -25,19 +25,18 @@ */ #ifdef CONFIG_SMP #define PER_CPU(var, reg) \ - __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ - lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var + __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \ + lea var(reg), reg +#define PER_CPU_VAR(var) %__percpu_seg:var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - __percpu_mov_op $per_cpu__##var, reg -#define PER_CPU_VAR(var) per_cpu__##var +#define PER_CPU(var, reg) __percpu_mov_op $var, reg +#define PER_CPU_VAR(var) var #endif /* SMP */ #ifdef CONFIG_X86_64_SMP #define INIT_PER_CPU_VAR(var) init_per_cpu__##var #else -#define INIT_PER_CPU_VAR(var) per_cpu__##var +#define INIT_PER_CPU_VAR(var) var #endif #else /* ...!ASSEMBLY */ @@ -60,12 +59,12 @@ * There also must be an entry in vmlinux_64.lds.S */ #define DECLARE_INIT_PER_CPU(var) \ - extern typeof(per_cpu_var(var)) init_per_cpu_var(var) + extern typeof(var) init_per_cpu_var(var) #ifdef CONFIG_X86_64_SMP #define init_per_cpu_var(var) init_per_cpu__##var #else -#define init_per_cpu_var(var) per_cpu_var(var) +#define init_per_cpu_var(var) var #endif /* For arch-specific code, we can use direct single-insn ops (they @@ -142,16 +141,14 @@ do { \ * per-thread variables implemented as per-cpu variables and thus * stable for the duration of the respective task. */ -#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \ - "m" (per_cpu__##var)) -#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \ - "p" (&per_cpu__##var)) -#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) -#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) -#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) -#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", var, "m" (var)) +#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) +#define percpu_write(var, val) percpu_to_op("mov", var, val) +#define percpu_add(var, val) percpu_to_op("add", var, val) +#define percpu_sub(var, val) percpu_to_op("sub", var, val) +#define percpu_and(var, val) percpu_to_op("and", var, val) +#define percpu_or(var, val) percpu_to_op("or", var, val) +#define percpu_xor(var, val) percpu_to_op("xor", var, val) #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) @@ -236,7 +233,7 @@ do { \ ({ \ int old__; \ asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (per_cpu__##var) \ + : "=r" (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f973..de10c19 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, "movl %P[task_canary](%[next]), %%ebx\n\t" \ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" #define __switch_canary_oparam \ - , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) + , [stack_canary] "=m" (stack_canary.canary) #define __switch_canary_iparam \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ @@ -113,7 +113,7 @@ do { \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ "movq %%r8,"__percpu_arg([gs_canary])"\n\t" #define __switch_canary_oparam \ - , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) + , [gs_canary] "=m" (irq_stack_union.stack_canary) #define __switch_canary_iparam \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ @@ -134,7 +134,7 @@ do { \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ "jnz ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ @@ -144,7 +144,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [_tif_fork] "i" (_TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (per_cpu_var(current_task)) \ + [current_task] "m" (current_task) \ __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index e631cc4..4540437 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -437,8 +437,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - __this_cpu_inc(per_cpu_var(alert_counter)); - if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) + __this_cpu_inc(alert_counter); + if (__this_cpu_read(alert_counter) == 5 * nmi_hz) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ @@ -446,7 +446,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(per_cpu_var(alert_counter), 0); + __this_cpu_write(alert_counter, 0); } /* see if the nmi watchdog went off */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 050c278..fd39eaf 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -438,8 +438,8 @@ is386: movl $2,%ecx # set MP */ cmpb $0,ready jne 1f - movl $per_cpu__gdt_page,%eax - movl $per_cpu__stack_canary,%ecx + movl $gdt_page,%eax + movl $stack_canary,%ecx movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) @@ -702,7 +702,7 @@ idt_descr: .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) .word GDT_ENTRIES*8-1 - .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ + .long gdt_page /* Overwritten for secondary CPUs */ /* * The boot_gdt must mirror the equivalent in setup.S and is diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 92929fb..ecb9271 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -312,7 +312,7 @@ SECTIONS * Per-cpu symbols which need to be offset from __per_cpu_load * for the boot processor. */ -#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load INIT_PER_CPU(gdt_page); INIT_PER_CPU(irq_stack_union); @@ -323,7 +323,7 @@ INIT_PER_CPU(irq_stack_union); "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -. = ASSERT((per_cpu__irq_stack_union == 0), +. = ASSERT((irq_stack_union == 0), "irq_stack_union is not at start of per-cpu area"); #endif diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 88e15de..22a2093 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -90,9 +90,9 @@ ENTRY(xen_iret) GET_THREAD_INFO(%eax) movl TI_cpu(%eax), %eax movl __per_cpu_offset(,%eax,4), %eax - mov per_cpu__xen_vcpu(%eax), %eax + mov xen_vcpu(%eax), %eax #else - movl per_cpu__xen_vcpu, %eax + movl xen_vcpu, %eax #endif /* check IF state we're restoring */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 8087b90..ca6f049 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -50,11 +50,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * offset. */ #define per_cpu(var, cpu) \ - (*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu))) + (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) #define __get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset)) + (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) #define __raw_get_cpu_var(var) \ - (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset)) + (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) @@ -66,9 +66,9 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) -#define __get_cpu_var(var) per_cpu_var(var) -#define __raw_get_cpu_var(var) per_cpu_var(var) +#define per_cpu(var, cpu) (*((void)(cpu), &(var))) +#define __get_cpu_var(var) (var) +#define __raw_get_cpu_var(var) (var) #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) #define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5a5d6ce..ee99f6c 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -2,12 +2,6 @@ #define _LINUX_PERCPU_DEFS_H /* - * Determine the real variable name from the name visible in the - * kernel sources. - */ -#define per_cpu_var(var) per_cpu__##var - -/* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the * 'sec' argument. This may be used to affect the parameters governing the @@ -56,24 +50,24 @@ */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ - extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak \ - __typeof__(type) per_cpu__##name + __typeof__(type) name #else /* * Normal declaration and definition macros. */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ - extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES \ - __typeof__(type) per_cpu__##name + __typeof__(type) name #endif /* @@ -137,8 +131,8 @@ /* * Intermodule exports for per-CPU variables. */ -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var) #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 522f421..e12410e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -182,7 +182,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #ifndef percpu_read # define percpu_read(var) \ ({ \ - typeof(per_cpu_var(var)) __tmp_var__; \ + typeof(var) __tmp_var__; \ __tmp_var__ = get_cpu_var(var); \ put_cpu_var(var); \ __tmp_var__; \ @@ -253,8 +253,7 @@ do { \ /* * Optimized manipulation for memory allocated through the per cpu - * allocator or for addresses of per cpu variables (can be determined - * using per_cpu_var(xx). + * allocator or for addresses of per cpu variables. * * These operation guarantee exclusivity of access for other operations * on the *same* processor. The assumption is that per cpu data is only diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d858897..3e489fd 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -76,22 +76,22 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); static inline void __count_vm_event(enum vm_event_item item) { - __this_cpu_inc(per_cpu_var(vm_event_states).event[item]); + __this_cpu_inc(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) { - this_cpu_inc(per_cpu_var(vm_event_states).event[item]); + this_cpu_inc(vm_event_states.event[item]); } static inline void __count_vm_events(enum vm_event_item item, long delta) { - __this_cpu_add(per_cpu_var(vm_event_states).event[item], delta); + __this_cpu_add(vm_event_states.event[item], delta); } static inline void count_vm_events(enum vm_event_item item, long delta) { - this_cpu_add(per_cpu_var(vm_event_states).event[item], delta); + this_cpu_add(vm_event_states.event[item], delta); } extern void all_vm_events(unsigned long *); diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 178967b..e339ab3 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -731,13 +731,13 @@ static void rcu_torture_timer(unsigned long unused) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]); + __this_cpu_inc(rcu_torture_count[pipe_count]); completed = cur_ops->completed() - completed; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]); + __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); cur_ops->readunlock(idx); } @@ -786,13 +786,13 @@ rcu_torture_reader(void *arg) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]); + __this_cpu_inc(rcu_torture_count[pipe_count]); completed = cur_ops->completed() - completed; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; } - __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]); + __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); cur_ops->readunlock(idx); schedule(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85a5ed7..b808177 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -91,12 +91,12 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) { preempt_disable(); - __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_inc(ftrace_cpu_disabled); } static inline void ftrace_enable_cpu(void) { - __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_dec(ftrace_cpu_disabled); preempt_enable(); } @@ -1085,7 +1085,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; /* If we are reading the ring buffer, don't trace */ - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 90a6daa..8614e32 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -176,7 +176,7 @@ static int __trace_graph_entry(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ent_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return 0; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -240,7 +240,7 @@ static void __trace_graph_return(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ret_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, -- cgit v1.1 From f7b64fe806029e0a0454df132eec3c5ab576102c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: make access macros universal Now that per_cpu__ prefix is gone, there's no distinction between static and dynamic percpu variables. Make get_cpu_var() take dynamic percpu variables and ensure that all macros have parentheses around the parameter evaluation and evaluate the variable parameter only once such that any expression which evaluates to percpu address can be used safely. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e12410e..f965f83 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -27,10 +27,13 @@ * we force a syntax error here if it isn't. */ #define get_cpu_var(var) (*({ \ - extern int simple_identifier_##var(void); \ preempt_disable(); \ &__get_cpu_var(var); })) -#define put_cpu_var(var) preempt_enable() + +#define put_cpu_var(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) #ifdef CONFIG_SMP @@ -182,17 +185,19 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #ifndef percpu_read # define percpu_read(var) \ ({ \ - typeof(var) __tmp_var__; \ - __tmp_var__ = get_cpu_var(var); \ - put_cpu_var(var); \ - __tmp_var__; \ + typeof(var) *pr_ptr__ = &(var); \ + typeof(var) pr_ret__; \ + pr_ret__ = get_cpu_var(*pr_ptr__); \ + put_cpu_var(*pr_ptr__); \ + pr_ret__; \ }) #endif #define __percpu_generic_to_op(var, val, op) \ do { \ - get_cpu_var(var) op val; \ - put_cpu_var(var); \ + typeof(var) *pgto_ptr__ = &(var); \ + get_cpu_var(*pgto_ptr__) op val; \ + put_cpu_var(*pgto_ptr__); \ } while (0) #ifndef percpu_write @@ -304,7 +309,7 @@ do { \ #define _this_cpu_generic_to_op(pcp, val, op) \ do { \ preempt_disable(); \ - *__this_cpu_ptr(&pcp) op val; \ + *__this_cpu_ptr(&(pcp)) op val; \ preempt_enable(); \ } while (0) -- cgit v1.1 From e0fdb0e050eae331046385643618f12452aa7e73 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: add __percpu for sparse. We have to make __kernel "__attribute__((address_space(0)))" so we can cast to it. tj: * put_cpu_var() update. * Annotations added to dynamic allocator interface. Signed-off-by: Rusty Russell Cc: Al Viro Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 4 +++- include/linux/compiler.h | 4 +++- include/linux/percpu-defs.h | 2 +- include/linux/percpu.h | 18 +++++++++++------- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index ca6f049..fded453 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -41,7 +41,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; * Only S390 provides its own means of moving the pointer. */ #ifndef SHIFT_PERCPU_PTR -#define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset)) +/* Weird cast keeps both GCC and sparse happy. */ +#define SHIFT_PERCPU_PTR(__p, __offset) \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) #endif /* diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb513..abba804 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -5,7 +5,7 @@ #ifdef __CHECKER__ # define __user __attribute__((noderef, address_space(1))) -# define __kernel /* default address space */ +# define __kernel __attribute__((address_space(0))) # define __safe __attribute__((safe)) # define __force __attribute__((force)) # define __nocast __attribute__((nocast)) @@ -15,6 +15,7 @@ # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +# define __percpu __attribute__((noderef, address_space(3))) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else @@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __acquire(x) (void)0 # define __release(x) (void)0 # define __cond_lock(x,c) (c) +# define __percpu #endif #ifdef __KERNEL__ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ee99f6c..0fa0cb5 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -12,7 +12,7 @@ * that section. */ #define __PCPU_ATTRS(sec) \ - __attribute__((section(PER_CPU_BASE_SECTION sec))) \ + __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \ PER_CPU_ATTRIBUTES #define __PCPU_DUMMY_ATTRS \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f965f83..2c0d31a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -30,8 +30,12 @@ preempt_disable(); \ &__get_cpu_var(var); })) +/* + * The weird & is necessary because sparse considers (void)(var) to be + * a direct dereference of percpu variable (var). + */ #define put_cpu_var(var) do { \ - (void)(var); \ + (void)&(var); \ preempt_enable(); \ } while (0) @@ -130,9 +134,9 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) -extern void *__alloc_reserved_percpu(size_t size, size_t align); -extern void *__alloc_percpu(size_t size, size_t align); -extern void free_percpu(void *__pdata); +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern void __percpu *__alloc_percpu(size_t size, size_t align); +extern void free_percpu(void __percpu *__pdata); #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void __init setup_per_cpu_areas(void); @@ -142,7 +146,7 @@ extern void __init setup_per_cpu_areas(void); #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static inline void *__alloc_percpu(size_t size, size_t align) +static inline void __percpu *__alloc_percpu(size_t size, size_t align) { /* * Can't easily make larger alignment work with kmalloc. WARN @@ -153,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) return kzalloc(size, GFP_KERNEL); } -static inline void free_percpu(void *p) +static inline void free_percpu(void __percpu *p) { kfree(p); } @@ -168,7 +172,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr) #endif /* CONFIG_SMP */ #define alloc_percpu(type) \ - (typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type)) + (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type)) /* * Optional methods for optimized non-lvalue per-cpu variable access. -- cgit v1.1 From 545695fb41da117928ab946067a42d9e15fd009d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: make accessors check for percpu pointer in sparse The previous patch made sparse warn about percpu variables being used directly without going through percpu accessors. This patch implements the other half - checking whether non percpu variable is passed into percpu accessors. Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: Al Viro --- include/asm-generic/percpu.h | 6 ++++-- include/linux/percpu-defs.h | 20 ++++++++++++++++++-- include/linux/percpu.h | 2 ++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index fded453..04f91c2 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -42,8 +42,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; */ #ifndef SHIFT_PERCPU_PTR /* Weird cast keeps both GCC and sparse happy. */ -#define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) +#define SHIFT_PERCPU_PTR(__p, __offset) ({ \ + __verify_pcpu_ptr((__p)); \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ +}) #endif /* diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 0fa0cb5..1fa36eb 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -19,6 +19,16 @@ __attribute__((section(".discard"), unused)) /* + * Macro which verifies @ptr is a percpu pointer without evaluating + * @ptr. This is to be used in percpu accessors to verify that the + * input parameter is a percpu pointer. + */ +#define __verify_pcpu_ptr(ptr) do { \ + void __percpu *__vpp_verify = (typeof(ptr))NULL; \ + (void)__vpp_verify; \ +} while (0) + +/* * s390 and alpha modules require percpu variables to be defined as * weak to force the compiler to generate GOT based external * references for them. This is necessary because percpu sections @@ -129,10 +139,16 @@ __aligned(PAGE_SIZE) /* - * Intermodule exports for per-CPU variables. + * Intermodule exports for per-CPU variables. sparse forgets about + * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to + * noop if __CHECKER__. */ +#ifndef __CHECKER__ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var) - +#else +#define EXPORT_PER_CPU_SYMBOL(var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) +#endif #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 2c0d31a..42878f0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -237,6 +237,7 @@ extern void __bad_size_call_parameter(void); #define __pcpu_size_call_return(stem, variable) \ ({ typeof(variable) pscr_ret__; \ + __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr_ret__ = stem##1(variable);break; \ case 2: pscr_ret__ = stem##2(variable);break; \ @@ -250,6 +251,7 @@ extern void __bad_size_call_parameter(void); #define __pcpu_size_call(stem, variable, ...) \ do { \ + __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: stem##1(variable, __VA_ARGS__);break; \ case 2: stem##2(variable, __VA_ARGS__);break; \ -- cgit v1.1 From 85438592f179c126ad4cb9a280046d4f0a501e6d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 18 Nov 2009 17:53:21 +0900 Subject: percpu: remove compile warnings caused by __verify_pcpu_ptr() If percpu pointer is const, __verify_pcpu_ptr() triggers warnings like the following. drivers/net/loopback.c: In function 'loopback_get_stats': drivers/net/loopback.c:109: warning: initialization discards qualifiers from pointer target type Fix it by adding const to the verification target pointer used in __verify_pcpu_ptr(). Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 1fa36eb..68567c0 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -24,7 +24,7 @@ * input parameter is a percpu pointer. */ #define __verify_pcpu_ptr(ptr) do { \ - void __percpu *__vpp_verify = (typeof(ptr))NULL; \ + const void __percpu *__vpp_verify = (typeof(ptr))NULL; \ (void)__vpp_verify; \ } while (0) -- cgit v1.1 From 22b737f4c75197372d64afc6ed1bccd58c00e549 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 1 Dec 2009 23:28:10 +0900 Subject: percpu: refactor the code in pcpu_[de]populate_chunk() Using break statement at the end of a for loop is confusing, refactor it by replacing the for loop. Signed-off-by: WANG Cong Signed-off-by: Tejun Heo --- mm/percpu.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index e2e80fc..77c6f79 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -886,11 +886,10 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) int rs, re; /* quick path, check whether it's empty already */ - pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { - if (rs == page_start && re == page_end) - return; - break; - } + rs = page_start; + pcpu_next_unpop(chunk, &rs, &re, page_end); + if (rs == page_start && re == page_end) + return; /* immutable chunks can't be depopulated */ WARN_ON(chunk->immutable); @@ -941,11 +940,10 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) int rs, re, rc; /* quick path, check whether all pages are already there */ - pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { - if (rs == page_start && re == page_end) - goto clear; - break; - } + rs = page_start; + pcpu_next_pop(chunk, &rs, &re, page_end); + if (rs == page_start && re == page_end) + goto clear; /* need to allocate and map pages, this chunk can't be immutable */ WARN_ON(chunk->immutable); -- cgit v1.1 From 38b7827fcdd660f591d645bd3ae6644456a4773c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:49 +0900 Subject: local_t: Remove cpu_local_xx macros These macros have not been used for awhile now. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/alpha/include/asm/local.h | 17 ----------------- arch/m32r/include/asm/local.h | 25 ------------------------- arch/mips/include/asm/local.h | 25 ------------------------- arch/powerpc/include/asm/local.h | 25 ------------------------- arch/x86/include/asm/local.h | 37 ------------------------------------- include/asm-generic/local.h | 19 ------------------- 6 files changed, 148 deletions(-) diff --git a/arch/alpha/include/asm/local.h b/arch/alpha/include/asm/local.h index 6ad3ea6..b9e3e33 100644 --- a/arch/alpha/include/asm/local.h +++ b/arch/alpha/include/asm/local.h @@ -98,21 +98,4 @@ static __inline__ long local_sub_return(long i, local_t * l) #define __local_add(i,l) ((l)->a.counter+=(i)) #define __local_sub(i,l) ((l)->a.counter-=(i)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(l) local_read(&__get_cpu_var(l)) -#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) - -#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) -#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) -#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) -#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) - -#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) -#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) -#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) -#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) - #endif /* _ALPHA_LOCAL_H */ diff --git a/arch/m32r/include/asm/local.h b/arch/m32r/include/asm/local.h index 22256d1..734bca8 100644 --- a/arch/m32r/include/asm/local.h +++ b/arch/m32r/include/asm/local.h @@ -338,29 +338,4 @@ static inline void local_set_mask(unsigned long mask, local_t *addr) * a variable, not an address. */ -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non local way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* __M32R_LOCAL_H */ diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index 361f4f1..bdcdef0 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -193,29 +193,4 @@ static __inline__ long local_sub_return(long i, local_t * l) #define __local_add(i, l) ((l)->a.counter+=(i)) #define __local_sub(i, l) ((l)->a.counter-=(i)) -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* _ARCH_MIPS_LOCAL_H */ diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index 84b457a..227753d 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -172,29 +172,4 @@ static __inline__ long local_dec_if_positive(local_t *l) #define __local_add(i,l) ((l)->a.counter+=(i)) #define __local_sub(i,l) ((l)->a.counter-=(i)) -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ - ({ local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; }) -#define cpu_local_wrap(l) \ - ({ preempt_disable(); \ - l; \ - preempt_enable(); }) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var(l))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var(l), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var(l))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var(l))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var(l))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var(l))) - -#define __cpu_local_inc(l) cpu_local_inc(l) -#define __cpu_local_dec(l) cpu_local_dec(l) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* _ARCH_POWERPC_LOCAL_H */ diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 47b9b6f..2e99724 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -195,41 +195,4 @@ static inline long local_sub_return(long i, local_t *l) #define __local_add(i, l) local_add((i), (l)) #define __local_sub(i, l) local_sub((i), (l)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - * - * X86_64: This could be done better if we moved the per cpu data directly - * after GS. - */ - -/* Need to disable preemption for the cpu local counters otherwise we could - still access a variable of a previous CPU in a non atomic way. */ -#define cpu_local_wrap_v(l) \ -({ \ - local_t res__; \ - preempt_disable(); \ - res__ = (l); \ - preempt_enable(); \ - res__; \ -}) -#define cpu_local_wrap(l) \ -({ \ - preempt_disable(); \ - (l); \ - preempt_enable(); \ -}) \ - -#define cpu_local_read(l) cpu_local_wrap_v(local_read(&__get_cpu_var((l)))) -#define cpu_local_set(l, i) cpu_local_wrap(local_set(&__get_cpu_var((l)), (i))) -#define cpu_local_inc(l) cpu_local_wrap(local_inc(&__get_cpu_var((l)))) -#define cpu_local_dec(l) cpu_local_wrap(local_dec(&__get_cpu_var((l)))) -#define cpu_local_add(i, l) cpu_local_wrap(local_add((i), &__get_cpu_var((l)))) -#define cpu_local_sub(i, l) cpu_local_wrap(local_sub((i), &__get_cpu_var((l)))) - -#define __cpu_local_inc(l) cpu_local_inc((l)) -#define __cpu_local_dec(l) cpu_local_dec((l)) -#define __cpu_local_add(i, l) cpu_local_add((i), (l)) -#define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) - #endif /* _ASM_X86_LOCAL_H */ diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h index fc21844..c8a5d68 100644 --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -52,23 +52,4 @@ typedef struct #define __local_add(i,l) local_set((l), local_read(l) + (i)) #define __local_sub(i,l) local_set((l), local_read(l) - (i)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable (eg. mystruct.foo), not an address. - */ -#define cpu_local_read(l) local_read(&__get_cpu_var(l)) -#define cpu_local_set(l, i) local_set(&__get_cpu_var(l), (i)) -#define cpu_local_inc(l) local_inc(&__get_cpu_var(l)) -#define cpu_local_dec(l) local_dec(&__get_cpu_var(l)) -#define cpu_local_add(i, l) local_add((i), &__get_cpu_var(l)) -#define cpu_local_sub(i, l) local_sub((i), &__get_cpu_var(l)) - -/* Non-atomic increments, ie. preemption disabled and won't be touched - * in interrupt, etc. Some archs can optimize this case well. - */ -#define __cpu_local_inc(l) __local_inc(&__get_cpu_var(l)) -#define __cpu_local_dec(l) __local_dec(&__get_cpu_var(l)) -#define __cpu_local_add(i, l) __local_add((i), &__get_cpu_var(l)) -#define __cpu_local_sub(i, l) __local_sub((i), &__get_cpu_var(l)) - #endif /* _ASM_GENERIC_LOCAL_H */ -- cgit v1.1 From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: module: Use this_cpu_xx to dynamically allocate counters Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory requirements, cache footprint and decreases cycle counts. The this_cpu_xx operations are also used for !SMP mode. Otherwise we could not drop the use of __module_ref_addr() which would make per cpu data handling complicated. this_cpu_xx operations have their own fallback for !SMP. V8-V9: - Leave include asm/module.h since ringbuffer.c depends on it. Nothing else does though. Another patch will deal with that. - Remove spurious free. Signed-off-by: Christoph Lameter Acked-by: Rusty Russell Signed-off-by: Tejun Heo --- include/linux/module.h | 36 ++++++++++++++---------------------- kernel/module.c | 29 +++++++++++++++-------------- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 6cb1a3c..2302f09 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -363,11 +364,9 @@ struct module /* Destruction function. */ void (*exit)(void); -#ifdef CONFIG_SMP - char *refptr; -#else - local_t ref; -#endif + struct module_ref { + int count; + } *refptr; #endif #ifdef CONFIG_CONSTRUCTORS @@ -454,25 +453,16 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); -static inline local_t *__module_ref_addr(struct module *mod, int cpu) -{ -#ifdef CONFIG_SMP - return (local_t *) (mod->refptr + per_cpu_offset(cpu)); -#else - return &mod->ref; -#endif -} - /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_inc(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); - put_cpu(); + __this_cpu_read(module->refptr->count)); + preempt_enable(); } } @@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module) int ret = 1; if (module) { - unsigned int cpu = get_cpu(); + preempt_disable(); + if (likely(module_is_live(module))) { - local_inc(__module_ref_addr(module, cpu)); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); } else ret = 0; - put_cpu(); + + preempt_enable(); } return ret; } diff --git a/kernel/module.c b/kernel/module.c index e96b8ed..9bf2280 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -474,9 +474,10 @@ static void module_unload_init(struct module *mod) INIT_LIST_HEAD(&mod->modules_which_use_me); for_each_possible_cpu(cpu) - local_set(__module_ref_addr(mod, cpu), 0); + per_cpu_ptr(mod->refptr, cpu)->count = 0; + /* Hold reference count during initialization. */ - local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); + __this_cpu_write(mod->refptr->count, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -619,7 +620,7 @@ unsigned int module_refcount(struct module *mod) int cpu; for_each_possible_cpu(cpu) - total += local_read(__module_ref_addr(mod, cpu)); + total += per_cpu_ptr(mod->refptr, cpu)->count; return total; } EXPORT_SYMBOL(module_refcount); @@ -796,14 +797,15 @@ static struct module_attribute refcnt = { void module_put(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_dec(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_dec(module->refptr->count); + trace_module_put(module, _RET_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); - put_cpu(); + preempt_enable(); } } EXPORT_SYMBOL(module_put); @@ -1394,9 +1396,9 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) +#if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) - percpu_modfree(mod->refptr); + free_percpu(mod->refptr); #endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2159,9 +2161,8 @@ static noinline struct module *load_module(void __user *umod, mod = (void *)sechdrs[modindex].sh_addr; kmemleak_load_module(mod, hdr, sechdrs, secstrings); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); +#if defined(CONFIG_MODULE_UNLOAD) + mod->refptr = alloc_percpu(struct module_ref); if (!mod->refptr) { err = -ENOMEM; goto free_init; @@ -2393,8 +2394,8 @@ static noinline struct module *load_module(void __user *umod, kobject_put(&mod->mkobj.kobj); free_unload: module_unload_free(mod); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); +#if defined(CONFIG_MODULE_UNLOAD) + free_percpu(mod->refptr); free_init: #endif module_free(mod, mod->module_init); -- cgit v1.1 From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: local_t: Move local.h include to ringbuffer.c and ring_buffer_benchmark.c ringbuffer*.c are the last users of local.h. Remove the include from modules.h and add it to ringbuffer files. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/module.h | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/ring_buffer_benchmark.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index 2302f09..7e74ae0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04..eb6c898 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace.h" /* diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index b2477ca..df74c79 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -8,6 +8,7 @@ #include #include #include +#include struct rb_page { u64 ts; -- cgit v1.1 From 5917dae83cb02dfe74c9167b79e86e6d65183fa3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: percpu, x86: Generic inc / dec percpu instructions Optimize code generated for percpu access by checking for increment and decrements. tj: fix incorrect usage of __builtin_constant_p() and restructure percpu_add_op() macro. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 86 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 4c170cc..66a272d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -103,6 +103,64 @@ do { \ } \ } while (0) +/* + * Generate a percpu add to memory instruction and optimize code + * if a one is added or subtracted. + */ +#define percpu_add_op(var, val) \ +do { \ + typedef typeof(var) pao_T__; \ + const int pao_ID__ = (__builtin_constant_p(val) && \ + ((val) == 1 || (val) == -1)) ? (val) : 0; \ + if (0) { \ + pao_T__ pao_tmp__; \ + pao_tmp__ = (val); \ + } \ + switch (sizeof(var)) { \ + case 1: \ + if (pao_ID__ == 1) \ + asm("incb "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decb "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addb %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "qi" ((pao_T__)(val))); \ + break; \ + case 2: \ + if (pao_ID__ == 1) \ + asm("incw "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decw "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addw %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "ri" ((pao_T__)(val))); \ + break; \ + case 4: \ + if (pao_ID__ == 1) \ + asm("incl "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decl "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addl %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "ri" ((pao_T__)(val))); \ + break; \ + case 8: \ + if (pao_ID__ == 1) \ + asm("incq "__percpu_arg(0) : "+m" (var)); \ + else if (pao_ID__ == -1) \ + asm("decq "__percpu_arg(0) : "+m" (var)); \ + else \ + asm("addq %1, "__percpu_arg(0) \ + : "+m" (var) \ + : "re" ((pao_T__)(val))); \ + break; \ + default: __bad_percpu_size(); \ + } \ +} while (0) + #define percpu_from_op(op, var, constraint) \ ({ \ typeof(var) pfo_ret__; \ @@ -144,8 +202,8 @@ do { \ #define percpu_read(var) percpu_from_op("mov", var, "m" (var)) #define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) #define percpu_write(var, val) percpu_to_op("mov", var, val) -#define percpu_add(var, val) percpu_to_op("add", var, val) -#define percpu_sub(var, val) percpu_to_op("sub", var, val) +#define percpu_add(var, val) percpu_add_op(var, val) +#define percpu_sub(var, val) percpu_add_op(var, -(val)) #define percpu_and(var, val) percpu_to_op("and", var, val) #define percpu_or(var, val) percpu_to_op("or", var, val) #define percpu_xor(var, val) percpu_to_op("xor", var, val) @@ -157,9 +215,9 @@ do { \ #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) -#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) -#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) @@ -176,9 +234,9 @@ do { \ #define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) #define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) -#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) -#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) #define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) @@ -189,9 +247,9 @@ do { \ #define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) #define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) -#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) -#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) -#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val) #define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) @@ -209,19 +267,19 @@ do { \ #ifdef CONFIG_X86_64 #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) -#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) -- cgit v1.1 From 99dcc3e5a94ed491fbef402831d8c0bbb267f995 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:51 +0900 Subject: this_cpu: Page allocator conversion Use the per cpu allocator functionality to avoid per cpu arrays in struct zone. This drastically reduces the size of struct zone for systems with large amounts of processors and allows placement of critical variables of struct zone in one cacheline even on very large systems. Another effect is that the pagesets of one processor are placed near one another. If multiple pagesets from different zones fit into one cacheline then additional cacheline fetches can be avoided on the hot paths when allocating memory from multiple zones. Bootstrap becomes simpler if we use the same scheme for UP, SMP, NUMA. #ifdefs are reduced and we can drop the zone_pcp macro. Hotplug handling is also simplified since cpu alloc can bring up and shut down cpu areas for a specific cpu as a whole. So there is no need to allocate or free individual pagesets. V7-V8: - Explain chicken egg dilemmna with percpu allocator. V4-V5: - Fix up cases where per_cpu_ptr is called before irq disable - Integrate the bootstrap logic that was separate before. tj: Build failure in pageset_cpuup_callback() due to missing ret variable fixed. Reviewed-by: Mel Gorman Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/mm.h | 4 - include/linux/mmzone.h | 12 +-- mm/page_alloc.c | 202 +++++++++++++++++-------------------------------- mm/vmstat.c | 14 ++-- 4 files changed, 81 insertions(+), 151 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2265f28..554fa39 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1079,11 +1079,7 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; -#ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -#else -static inline void setup_per_cpu_pageset(void) {} -#endif extern void zone_pcp_update(struct zone *zone); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 30fe668..7874201 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -184,13 +184,7 @@ struct per_cpu_pageset { s8 stat_threshold; s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; #endif -} ____cacheline_aligned_in_smp; - -#ifdef CONFIG_NUMA -#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)]) -#else -#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) -#endif +}; #endif /* !__GENERATING_BOUNDS.H */ @@ -306,10 +300,8 @@ struct zone { */ unsigned long min_unmapped_pages; unsigned long min_slab_pages; - struct per_cpu_pageset *pageset[NR_CPUS]; -#else - struct per_cpu_pageset pageset[NR_CPUS]; #endif + struct per_cpu_pageset *pageset; /* * free areas of different sizes */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e9f5cc..6849e87 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu) struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - pset = zone_pcp(zone, cpu); + local_irq_save(flags); + pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; - local_irq_save(flags); free_pcppages_bulk(zone, pcp->count, pcp); pcp->count = 0; local_irq_restore(flags); @@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold) arch_free_page(page, 0); kernel_map_pages(page, 1, 0); - pcp = &zone_pcp(zone, get_cpu())->pcp; migratetype = get_pageblock_migratetype(page); set_page_private(page, migratetype); local_irq_save(flags); @@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold) migratetype = MIGRATE_MOVABLE; } + pcp = &this_cpu_ptr(zone->pageset)->pcp; if (cold) list_add_tail(&page->lru, &pcp->lists[migratetype]); else @@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold) out: local_irq_restore(flags); - put_cpu(); } void free_hot_page(struct page *page) @@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); - int cpu; again: - cpu = get_cpu(); if (likely(order == 0)) { struct per_cpu_pages *pcp; struct list_head *list; - pcp = &zone_pcp(zone, cpu)->pcp; - list = &pcp->lists[migratetype]; local_irq_save(flags); + pcp = &this_cpu_ptr(zone->pageset)->pcp; + list = &pcp->lists[migratetype]; if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, pcp->batch, list, @@ -1231,7 +1228,6 @@ again: __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone); local_irq_restore(flags); - put_cpu(); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) @@ -1240,7 +1236,6 @@ again: failed: local_irq_restore(flags); - put_cpu(); return NULL; } @@ -2179,7 +2174,7 @@ void show_free_areas(void) for_each_online_cpu(cpu) { struct per_cpu_pageset *pageset; - pageset = zone_pcp(zone, cpu); + pageset = per_cpu_ptr(zone->pageset, cpu); printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", cpu, pageset->pcp.high, @@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat) #endif /* CONFIG_NUMA */ +/* + * Boot pageset table. One per cpu which is going to be used for all + * zones and all nodes. The parameters will be set in such a way + * that an item put on a list will immediately be handed over to + * the buddy list. This is safe since pageset manipulation is done + * with interrupts disabled. + * + * The boot_pagesets must be kept even after bootup is complete for + * unused processors and/or zones. They do play a role for bootstrapping + * hotplugged processors. + * + * zoneinfo_show() and maybe other functions do + * not check if the processor is online before following the pageset pointer. + * Other parts of the kernel may not check if the zone is available. + */ +static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); +static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); + /* return values int ....just for stop_machine() */ static int __build_all_zonelists(void *dummy) { int nid; + int cpu; #ifdef CONFIG_NUMA memset(node_load, 0, sizeof(node_load)); @@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy) build_zonelists(pgdat); build_zonelist_cache(pgdat); } + + /* + * Initialize the boot_pagesets that are going to be used + * for bootstrapping processors. The real pagesets for + * each zone will be allocated later when the per cpu + * allocator is available. + * + * boot_pagesets are used also for bootstrapping offline + * cpus if the system is already booted because the pagesets + * are needed to initialize allocators on a specific cpu too. + * F.e. the percpu allocator needs the page allocator which + * needs the percpu allocator in order to allocate its pagesets + * (a chicken-egg dilemma). + */ + for_each_possible_cpu(cpu) + setup_pageset(&per_cpu(boot_pageset, cpu), 0); + return 0; } @@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, pcp->batch = PAGE_SHIFT * 8; } - -#ifdef CONFIG_NUMA -/* - * Boot pageset table. One per cpu which is going to be used for all - * zones and all nodes. The parameters will be set in such a way - * that an item put on a list will immediately be handed over to - * the buddy list. This is safe since pageset manipulation is done - * with interrupts disabled. - * - * Some NUMA counter updates may also be caught by the boot pagesets. - * - * The boot_pagesets must be kept even after bootup is complete for - * unused processors and/or zones. They do play a role for bootstrapping - * hotplugged processors. - * - * zoneinfo_show() and maybe other functions do - * not check if the processor is online before following the pageset pointer. - * Other parts of the kernel may not check if the zone is available. - */ -static struct per_cpu_pageset boot_pageset[NR_CPUS]; - /* - * Dynamically allocate memory for the - * per cpu pageset array in struct zone. + * Allocate per cpu pagesets and initialize them. + * Before this call only boot pagesets were available. + * Boot pagesets will no longer be used by this processorr + * after setup_per_cpu_pageset(). */ -static int __cpuinit process_zones(int cpu) +void __init setup_per_cpu_pageset(void) { - struct zone *zone, *dzone; - int node = cpu_to_node(cpu); - - node_set_state(node, N_CPU); /* this node has a cpu */ + struct zone *zone; + int cpu; for_each_populated_zone(zone) { - zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), - GFP_KERNEL, node); - if (!zone_pcp(zone, cpu)) - goto bad; - - setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); - - if (percpu_pagelist_fraction) - setup_pagelist_highmark(zone_pcp(zone, cpu), - (zone->present_pages / percpu_pagelist_fraction)); - } - - return 0; -bad: - for_each_zone(dzone) { - if (!populated_zone(dzone)) - continue; - if (dzone == zone) - break; - kfree(zone_pcp(dzone, cpu)); - zone_pcp(dzone, cpu) = &boot_pageset[cpu]; - } - return -ENOMEM; -} + zone->pageset = alloc_percpu(struct per_cpu_pageset); -static inline void free_zone_pagesets(int cpu) -{ - struct zone *zone; - - for_each_zone(zone) { - struct per_cpu_pageset *pset = zone_pcp(zone, cpu); + for_each_possible_cpu(cpu) { + struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); - /* Free per_cpu_pageset if it is slab allocated */ - if (pset != &boot_pageset[cpu]) - kfree(pset); - zone_pcp(zone, cpu) = &boot_pageset[cpu]; - } -} + setup_pageset(pcp, zone_batchsize(zone)); -static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - int cpu = (long)hcpu; - int ret = NOTIFY_OK; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (process_zones(cpu)) - ret = NOTIFY_BAD; - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - free_zone_pagesets(cpu); - break; - default: - break; + if (percpu_pagelist_fraction) + setup_pagelist_highmark(pcp, + (zone->present_pages / + percpu_pagelist_fraction)); + } } - return ret; } -static struct notifier_block __cpuinitdata pageset_notifier = - { &pageset_cpuup_callback, NULL, 0 }; - -void __init setup_per_cpu_pageset(void) -{ - int err; - - /* Initialize per_cpu_pageset for cpu 0. - * A cpuup callback will do this for every cpu - * as it comes online - */ - err = process_zones(smp_processor_id()); - BUG_ON(err); - register_cpu_notifier(&pageset_notifier); -} - -#endif - static noinline __init_refok int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { @@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data) struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; - pset = zone_pcp(zone, cpu); + pset = per_cpu_ptr(zone->pageset, cpu); pcp = &pset->pcp; local_irq_save(flags); @@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone) static __meminit void zone_pcp_init(struct zone *zone) { - int cpu; - unsigned long batch = zone_batchsize(zone); + /* + * per cpu subsystem is not up at this point. The following code + * relies on the ability of the linker to provide the + * offset of a (static) per cpu variable into the per cpu area. + */ + zone->pageset = &boot_pageset; - for (cpu = 0; cpu < NR_CPUS; cpu++) { -#ifdef CONFIG_NUMA - /* Early boot. Slab allocator not functional yet */ - zone_pcp(zone, cpu) = &boot_pageset[cpu]; - setup_pageset(&boot_pageset[cpu],0); -#else - setup_pageset(zone_pcp(zone,cpu), batch); -#endif - } if (zone->present_pages) - printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", - zone->name, zone->present_pages, batch); + printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", + zone->name, zone->present_pages, + zone_batchsize(zone)); } __meminit int init_currently_empty_zone(struct zone *zone, @@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, if (!write || (ret == -EINVAL)) return ret; for_each_populated_zone(zone) { - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { unsigned long high; high = zone->present_pages / percpu_pagelist_fraction; - setup_pagelist_highmark(zone_pcp(zone, cpu), high); + setup_pagelist_highmark( + per_cpu_ptr(zone->pageset, cpu), high); } } return 0; diff --git a/mm/vmstat.c b/mm/vmstat.c index 6051fba..1ba0bb7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void) threshold = calculate_threshold(zone); for_each_online_cpu(cpu) - zone_pcp(zone, cpu)->stat_threshold = threshold; + per_cpu_ptr(zone->pageset, cpu)->stat_threshold + = threshold; } } @@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void) void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); + s8 *p = pcp->vm_stat_diff + item; long x; @@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state); */ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); s8 *p = pcp->vm_stat_diff + item; (*p)++; @@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state); void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); + struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); s8 *p = pcp->vm_stat_diff + item; (*p)--; @@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu) for_each_populated_zone(zone) { struct per_cpu_pageset *p; - p = zone_pcp(zone, cpu); + p = per_cpu_ptr(zone->pageset, cpu); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) if (p->vm_stat_diff[i]) { @@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, for_each_online_cpu(i) { struct per_cpu_pageset *pageset; - pageset = zone_pcp(zone, i); + pageset = per_cpu_ptr(zone->pageset, i); seq_printf(m, "\n cpu: %i" "\n count: %i" -- cgit v1.1 From ad596925eaf9a48ed61bc9210088828f1f8e0552 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:51 +0900 Subject: this_cpu: Remove pageset_notifier Remove the pageset notifier since it only marks that a processor exists on a specific node. Move that code into the vmstat notifier. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/vmstat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/vmstat.c b/mm/vmstat.c index 1ba0bb7..fc5aa18 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -908,6 +908,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: start_cpu_timer(cpu); + node_set_state(cpu_to_node(cpu), N_CPU); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: -- cgit v1.1 From dbfc196a3cc1a2514ad0737a82f764de23bd65e6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 4 Jan 2010 16:34:46 -0600 Subject: local_t: Remove leftover local.h Somehow the local.h was not removed when taking out the local_t usage during the 2.6.32 merge. CC: David Miller Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/sparc/kernel/nmi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 2ad288f..9b9f5b4 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -21,7 +21,6 @@ #include #include -#include #include /* We don't have a real NMI on sparc64, but we can fake one -- cgit v1.1 From 43cf38eb5cea91245502df3fcee4dbfc1c74dd1c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:38:57 +0900 Subject: percpu: add __percpu sparse annotations to core kernel subsystems Add __percpu sparse annotations to core subsystems. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Acked-by: Paul E. McKenney Cc: Jens Axboe Cc: linux-mm@kvack.org Cc: Rusty Russell Cc: Dipankar Sarma Cc: Peter Zijlstra Cc: Andrew Morton Cc: Eric Biederman --- include/linux/blktrace_api.h | 4 ++-- include/linux/genhd.h | 2 +- include/linux/kexec.h | 2 +- include/linux/mmzone.h | 2 +- include/linux/module.h | 2 +- include/linux/percpu_counter.h | 2 +- include/linux/srcu.h | 2 +- kernel/kexec.c | 2 +- kernel/sched.c | 4 ++-- kernel/stop_machine.c | 2 +- mm/percpu.c | 18 ++++++++++-------- 11 files changed, 22 insertions(+), 20 deletions(-) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b73b99..416bf62 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -150,8 +150,8 @@ struct blk_user_trace_setup { struct blk_trace { int trace_state; struct rchan *rchan; - unsigned long *sequence; - unsigned char *msg_data; + unsigned long __percpu *sequence; + unsigned char __percpu *msg_data; u16 act_mask; u64 start_lba; u64 end_lba; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9717081..56b5051 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -101,7 +101,7 @@ struct hd_struct { unsigned long stamp; int in_flight[2]; #ifdef CONFIG_SMP - struct disk_stats *dkstats; + struct disk_stats __percpu *dkstats; #else struct disk_stats dkstats; #endif diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c356b69..03e8e8d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -199,7 +199,7 @@ extern struct kimage *kexec_crash_image; */ extern struct resource crashk_res; typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; -extern note_buf_t *crash_notes; +extern note_buf_t __percpu *crash_notes; extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; extern size_t vmcoreinfo_size; extern size_t vmcoreinfo_max_size; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7874201..41acd4b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -301,7 +301,7 @@ struct zone { unsigned long min_unmapped_pages; unsigned long min_slab_pages; #endif - struct per_cpu_pageset *pageset; + struct per_cpu_pageset __percpu *pageset; /* * free areas of different sizes */ diff --git a/include/linux/module.h b/include/linux/module.h index 7e74ae0..dd618eb 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -365,7 +365,7 @@ struct module struct module_ref { int count; - } *refptr; + } __percpu *refptr; #endif #ifdef CONFIG_CONSTRUCTORS diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index a7684a5..9bd103c 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -21,7 +21,7 @@ struct percpu_counter { #ifdef CONFIG_HOTPLUG_CPU struct list_head list; /* All percpu_counters are on a list */ #endif - s32 *counters; + s32 __percpu *counters; }; extern int percpu_counter_batch; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4765d97..41eedcc 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -33,7 +33,7 @@ struct srcu_struct_array { struct srcu_struct { int completed; - struct srcu_struct_array *per_cpu_ref; + struct srcu_struct_array __percpu *per_cpu_ref; struct mutex mutex; }; diff --git a/kernel/kexec.c b/kernel/kexec.c index ef077fb..87ebe8a 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -41,7 +41,7 @@ #include /* Per cpu memory for storing cpu states in case of system crash. */ -note_buf_t* crash_notes; +note_buf_t __percpu *crash_notes; /* vmcoreinfo stuff */ static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; diff --git a/kernel/sched.c b/kernel/sched.c index 3a8fb30..978edfd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1566,7 +1566,7 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED -static __read_mostly unsigned long *update_shares_data; +static __read_mostly unsigned long __percpu *update_shares_data; static void __set_se_shares(struct sched_entity *se, unsigned long shares); @@ -10683,7 +10683,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { struct cpuacct { struct cgroup_subsys_state css; /* cpuusage holds pointer to a u64-type object on every cpu */ - u64 *cpuusage; + u64 __percpu *cpuusage; struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; struct cpuacct *parent; }; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 912823e..9bb9fb1 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -45,7 +45,7 @@ static int refcount; static struct workqueue_struct *stop_machine_wq; static struct stop_machine_data active, idle; static const struct cpumask *active_cpus; -static void *stop_machine_work; +static void __percpu *stop_machine_work; static void set_state(enum stopmachine_state newstate) { diff --git a/mm/percpu.c b/mm/percpu.c index b336638..768419d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -80,13 +80,15 @@ /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ #ifndef __addr_to_pcpu_ptr #define __addr_to_pcpu_ptr(addr) \ - (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ - + (unsigned long)__per_cpu_start) + (void __percpu *)((unsigned long)(addr) - \ + (unsigned long)pcpu_base_addr + \ + (unsigned long)__per_cpu_start) #endif #ifndef __pcpu_ptr_to_addr #define __pcpu_ptr_to_addr(ptr) \ - (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ - - (unsigned long)__per_cpu_start) + (void __force *)((unsigned long)(ptr) + \ + (unsigned long)pcpu_base_addr - \ + (unsigned long)__per_cpu_start) #endif struct pcpu_chunk { @@ -1065,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_alloc(size_t size, size_t align, bool reserved) +static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) { static int warn_limit = 10; struct pcpu_chunk *chunk; @@ -1194,7 +1196,7 @@ fail_unlock_mutex: * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_percpu(size_t size, size_t align) +void __percpu *__alloc_percpu(size_t size, size_t align) { return pcpu_alloc(size, align, false); } @@ -1215,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_reserved_percpu(size_t size, size_t align) +void __percpu *__alloc_reserved_percpu(size_t size, size_t align) { return pcpu_alloc(size, align, true); } @@ -1267,7 +1269,7 @@ static void pcpu_reclaim(struct work_struct *work) * CONTEXT: * Can be called from atomic context. */ -void free_percpu(void *ptr) +void free_percpu(void __percpu *ptr) { void *addr; struct pcpu_chunk *chunk; -- cgit v1.1 From 003cb608a2533d0927a83bc4e07e46d7a622eda9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:39:01 +0900 Subject: percpu: add __percpu sparse annotations to fs Add __percpu sparse annotations to fs. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Cc: "Theodore Ts'o" Cc: Trond Myklebust Cc: Alex Elder Cc: Christoph Hellwig Cc: Alexander Viro --- fs/ext4/ext4.h | 2 +- fs/nfs/iostat.h | 4 ++-- fs/xfs/xfs_mount.h | 2 +- include/linux/mount.h | 2 +- include/linux/nfs_fs_sb.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 874d169..4cedc91 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1014,7 +1014,7 @@ struct ext4_sb_info { atomic_t s_lock_busy; /* locality groups */ - struct ext4_locality_group *s_locality_groups; + struct ext4_locality_group __percpu *s_locality_groups; /* for write statistics */ unsigned long s_sectors_written_start; diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 46d779a..1d8d5c8 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -57,12 +57,12 @@ static inline void nfs_add_fscache_stats(struct inode *inode, } #endif -static inline struct nfs_iostats *nfs_alloc_iostats(void) +static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) { return alloc_percpu(struct nfs_iostats); } -static inline void nfs_free_iostats(struct nfs_iostats *stats) +static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) { if (stats != NULL) free_percpu(stats); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1df7e45..24c8887 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -243,7 +243,7 @@ typedef struct xfs_mount { struct xfs_qmops *m_qm_ops; /* vector of XQM ops */ atomic_t m_active_trans; /* number trans frozen */ #ifdef HAVE_PERCPU_SB - xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ + xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ unsigned long m_icsb_counters; /* disabled per-cpu counters */ struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ struct mutex m_icsb_mutex; /* balancer sync lock */ diff --git a/include/linux/mount.h b/include/linux/mount.h index 5d52753..b5f43a3 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -66,7 +66,7 @@ struct vfsmount { int mnt_pinned; int mnt_ghosts; #ifdef CONFIG_SMP - int *mnt_writers; + int __percpu *mnt_writers; #else int mnt_writers; #endif diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 34fc6be..6a2e44f 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -105,7 +105,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nlm_host *nlm_host; /* NLM client handle */ - struct nfs_iostats * io_stats; /* I/O statistics */ + struct nfs_iostats __percpu *io_stats; /* I/O statistics */ struct backing_dev_info backing_dev_info; atomic_long_t writeback; /* number of writeback pages */ int flags; /* various flags */ -- cgit v1.1 From a29d8b8e2d811a24bbe49215a0f0c536b72ebc18 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:39:15 +0900 Subject: percpu: add __percpu sparse annotations to what's left Add __percpu sparse annotations to places which didn't make it in one of the previous patches. All converions are trivial. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Acked-by: Borislav Petkov Cc: Dan Williams Cc: Huang Ying Cc: Len Brown Cc: Neil Brown --- crypto/cryptd.c | 2 +- drivers/acpi/processor_perflib.c | 2 +- drivers/dma/dmaengine.c | 2 +- drivers/edac/amd64_edac.c | 2 +- drivers/md/raid5.c | 2 +- drivers/md/raid5.h | 2 +- include/acpi/processor.h | 2 +- include/linux/dmaengine.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 704c141..ef71318 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -31,7 +31,7 @@ struct cryptd_cpu_queue { }; struct cryptd_queue { - struct cryptd_cpu_queue *cpu_queue; + struct cryptd_cpu_queue __percpu *cpu_queue; }; struct cryptd_instance_ctx { diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 2cabadc..8c6a649 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -557,7 +557,7 @@ end: } int acpi_processor_preregister_performance( - struct acpi_processor_performance *performance) + struct acpi_processor_performance __percpu *performance) { int count, count_target; int retval = 0; diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 6f51a0a..4eadd98 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -284,7 +284,7 @@ struct dma_chan_tbl_ent { /** * channel_table - percpu lookup table for memory-to-memory offload providers */ -static struct dma_chan_tbl_ent *channel_table[DMA_TX_TYPE_END]; +static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END]; static int __init dma_channel_table_init(void) { diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 000dc67..7b36c88 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -13,7 +13,7 @@ module_param(report_gart_errors, int, 0644); static int ecc_enable_override; module_param(ecc_enable_override, int, 0644); -static struct msr *msrs; +static struct msr __percpu *msrs; /* Lookup table for all possible MC control instances */ struct amd64_pvt; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e84204e..77cb3ab 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4680,7 +4680,7 @@ static int raid5_alloc_percpu(raid5_conf_t *conf) { unsigned long cpu; struct page *spare_page; - struct raid5_percpu *allcpus; + struct raid5_percpu __percpu *allcpus; void *scribble; int err; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index dd70835..0f86f5e 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -405,7 +405,7 @@ struct raid5_private_data { * lists and performing address * conversions */ - } *percpu; + } __percpu *percpu; size_t scribble_len; /* size of scribble region must be * associated with conf to handle * cpu hotplug while reshaping diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 0ea5ef4..477544f 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -238,7 +238,7 @@ struct acpi_processor_errata { extern int acpi_processor_preregister_performance(struct acpi_processor_performance - *performance); + __percpu *performance); extern int acpi_processor_register_performance(struct acpi_processor_performance *performance, unsigned int cpu); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 7878498..21fd9b7 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -162,7 +162,7 @@ struct dma_chan { struct dma_chan_dev *dev; struct list_head device_node; - struct dma_chan_percpu *local; + struct dma_chan_percpu __percpu *local; int client_count; int table_count; void *private; -- cgit v1.1