diff options
author | Tejun Heo <tj@kernel.org> | 2011-05-02 14:08:43 +0200 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-05-02 14:08:47 +0200 |
commit | aff364860aa105b2deacc6f21ec8ef524460e3fc (patch) | |
tree | 18409ebe16b25b141598da9b6386d69416c06afa /arch | |
parent | c7a7b814c9dca9ee01b38e63b4a46de87156d3b6 (diff) | |
parent | 993ba1585cbb03fab012e41d1a5d24330a283b31 (diff) | |
download | kernel_samsung_aries-aff364860aa105b2deacc6f21ec8ef524460e3fc.zip kernel_samsung_aries-aff364860aa105b2deacc6f21ec8ef524460e3fc.tar.gz kernel_samsung_aries-aff364860aa105b2deacc6f21ec8ef524460e3fc.tar.bz2 |
Merge branch 'x86/numa' into x86-mm
Merge reason: Pick up x86-32 remap allocator cleanup changes - 14
commits, 3fe14ab541^..993ba1585c.
3fe14ab541: x86-32, numa: Fix failure condition check in alloc_remap()
993ba1585c: x86-32, numa: Update remap allocator comments
Scheduled NUMA init 32/64bit unification changes depend on them.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch')
44 files changed, 220 insertions, 417 deletions
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index d770b00..30edd61 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -386,8 +386,12 @@ #define __NR_fanotify_init 368 #define __NR_fanotify_mark 369 #define __NR_prlimit64 370 +#define __NR_name_to_handle_at 371 +#define __NR_open_by_handle_at 372 +#define __NR_clock_adjtime 373 +#define __NR_syncfs 374 -#define __NR_syscalls 371 +#define __NR_syscalls 375 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index f0cb5c2..494b63b 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -10,6 +10,7 @@ CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_selfmod.o = -pg CFLAGS_REMOVE_heartbeat.o = -pg CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_process.o = -pg endif extra-y := head.o vmlinux.lds diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index 515feb4..357d56a 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -51,6 +51,9 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) : "r" (parent), "r" (return_hooker) ); + flush_dcache_range((u32)parent, (u32)parent + 4); + flush_icache_range((u32)parent, (u32)parent + 4); + if (unlikely(faulted)) { ftrace_graph_stop(); WARN_ON(1); @@ -95,6 +98,9 @@ static int ftrace_modify_code(unsigned long addr, unsigned int value) if (unlikely(faulted)) return -EFAULT; + flush_dcache_range(addr, addr + 4); + flush_icache_range(addr, addr + 4); + return 0; } @@ -195,8 +201,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret += ftrace_modify_code((unsigned long)&ftrace_caller, MICROBLAZE_NOP); - /* All changes are done - lets do caches consistent */ - flush_icache(); return ret; } @@ -210,7 +214,6 @@ int ftrace_enable_ftrace_graph_caller(void) old_jump = *(unsigned int *)ip; /* save jump over instruction */ ret = ftrace_modify_code(ip, MICROBLAZE_NOP); - flush_icache(); pr_debug("%s: Replace instruction: 0x%x\n", __func__, old_jump); return ret; @@ -222,7 +225,6 @@ int ftrace_disable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_call_graph); ret = ftrace_modify_code(ip, old_jump); - flush_icache(); pr_debug("%s\n", __func__); return ret; diff --git a/arch/microblaze/kernel/intc.c b/arch/microblaze/kernel/intc.c index 5ba7e16..c88f066 100644 --- a/arch/microblaze/kernel/intc.c +++ b/arch/microblaze/kernel/intc.c @@ -158,11 +158,11 @@ void __init init_IRQ(void) for (i = 0; i < nr_irq; ++i) { if (intr_type & (0x00000001 << i)) { irq_set_chip_and_handler_name(i, &intc_dev, - handle_edge_irq, intc_dev.name); + handle_edge_irq, "edge"); irq_clear_status_flags(i, IRQ_LEVEL); } else { irq_set_chip_and_handler_name(i, &intc_dev, - handle_level_irq, intc_dev.name); + handle_level_irq, "level"); irq_set_status_flags(i, IRQ_LEVEL); } } diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index e88a930..85cea81 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -375,3 +375,7 @@ ENTRY(sys_call_table) .long sys_fanotify_init .long sys_fanotify_mark .long sys_prlimit64 /* 370 */ + .long sys_name_to_handle_at + .long sys_open_by_handle_at + .long sys_clock_adjtime + .long sys_syncfs diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile index f1fcbff..10c320a 100644 --- a/arch/microblaze/lib/Makefile +++ b/arch/microblaze/lib/Makefile @@ -2,6 +2,12 @@ # Makefile # +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_ashldi3.o = -pg +CFLAGS_REMOVE_ashrdi3.o = -pg +CFLAGS_REMOVE_lshrdi3.o = -pg +endif + lib-y := memset.o ifeq ($(CONFIG_OPT_LIB_ASM),y) diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts index 22f64b6..e0668f8 100644 --- a/arch/powerpc/boot/dts/p1020rdb.dts +++ b/arch/powerpc/boot/dts/p1020rdb.dts @@ -1,7 +1,7 @@ /* * P1020 RDB Device Tree Source * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -553,7 +553,7 @@ reg = <0 0xffe09000 0 0x1000>; bus-range = <0 255>; ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 - 0x1000000 0x0 0x00000000 0 0xffc30000 0x0 0x10000>; + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; interrupts = <16 2>; @@ -580,8 +580,8 @@ #address-cells = <3>; reg = <0 0xffe0a000 0 0x1000>; bus-range = <0 255>; - ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000 - 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; + ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; interrupts = <16 2>; @@ -590,8 +590,8 @@ #size-cells = <2>; #address-cells = <3>; device_type = "pci"; - ranges = <0x2000000 0x0 0xc0000000 - 0x2000000 0x0 0xc0000000 + ranges = <0x2000000 0x0 0x80000000 + 0x2000000 0x0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x0 diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts index da4cb0d..e2d48fd 100644 --- a/arch/powerpc/boot/dts/p2020rdb.dts +++ b/arch/powerpc/boot/dts/p2020rdb.dts @@ -1,7 +1,7 @@ /* * P2020 RDB Device Tree Source * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -537,7 +537,7 @@ reg = <0 0xffe09000 0 0x1000>; bus-range = <0 255>; ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 - 0x1000000 0x0 0x00000000 0 0xffc30000 0x0 0x10000>; + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; interrupts = <25 2>; @@ -564,8 +564,8 @@ #address-cells = <3>; reg = <0 0xffe0a000 0 0x1000>; bus-range = <0 255>; - ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000 - 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; + ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; interrupts = <26 2>; @@ -574,8 +574,8 @@ #size-cells = <2>; #address-cells = <3>; device_type = "pci"; - ranges = <0x2000000 0x0 0xc0000000 - 0x2000000 0x0 0xc0000000 + ranges = <0x2000000 0x0 0x80000000 + 0x2000000 0x0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x0 diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts index 0fe93d0..b69c3a5 100644 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts +++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts @@ -6,7 +6,7 @@ * This dts file allows core0 to have memory, l2, i2c, spi, gpio, dma1, usb, * eth1, eth2, sdhc, crypto, global-util, pci0. * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -342,7 +342,7 @@ reg = <0 0xffe09000 0 0x1000>; bus-range = <0 255>; ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 - 0x1000000 0x0 0x00000000 0 0xffc30000 0x0 0x10000>; + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; interrupts = <25 2>; diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts index e95a512..7a31d46c 100644 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts +++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts @@ -7,7 +7,7 @@ * * Please note to add "-b 1" for core1's dts compiling. * - * Copyright 2009 Freescale Semiconductor Inc. + * Copyright 2009-2011 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -162,8 +162,8 @@ #address-cells = <3>; reg = <0 0xffe0a000 0 0x1000>; bus-range = <0 255>; - ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000 - 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; + ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; interrupts = <26 2>; @@ -172,8 +172,8 @@ #size-cells = <2>; #address-cells = <3>; device_type = "pci"; - ranges = <0x2000000 0x0 0xc0000000 - 0x2000000 0x0 0xc0000000 + ranges = <0x2000000 0x0 0x80000000 + 0x2000000 0x0 0x80000000 0x0 0x20000000 0x1000000 0x0 0x0 diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3d569e2..3d3d416 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -163,7 +163,7 @@ static void crash_kexec_prepare_cpus(int cpu) } /* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 +#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) static void crash_kexec_wait_realmode(int cpu) { unsigned int msecs; @@ -188,6 +188,8 @@ static void crash_kexec_wait_realmode(int cpu) } mb(); } +#else +static inline void crash_kexec_wait_realmode(int cpu) {} #endif /* @@ -344,9 +346,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); -#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) crash_kexec_wait_realmode(crashing_cpu); -#endif machine_kexec_mask_interrupts(); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index f4adf89..10f0aad 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -203,7 +203,7 @@ void __init free_unused_pacas(void) { int new_size; - new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); if (new_size >= paca_size) return; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d4882a..21f30cb 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -509,6 +509,9 @@ void __init smp_setup_cpu_maps(void) */ cpu_init_thread_core_maps(nthreads); + /* Now that possible cpus are set, set nr_cpu_ids for later use */ + nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; + free_unused_pacas(); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 8526bd9..2228151 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -431,7 +431,7 @@ virt_page_table_tlb_miss_fault: * The thing is, we know that in normal circumstances, this is * always called as a second level tlb miss for SW load or as a first * level TLB miss for HW load, so we should be able to peek at the - * relevant informations in the first exception frame in the PACA. + * relevant information in the first exception frame in the PACA. * * However, we do need to double check that, because we may just hit * a stray kernel pointer or a userland attack trying to hit those diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 2057682..f7b0772 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -46,7 +46,7 @@ config PPC_OF_BOOT_TRAMPOLINE help Support from booting from Open Firmware or yaboot using an Open Firmware client interface. This enables the kernel to - communicate with open firmware to retrieve system informations + communicate with open firmware to retrieve system information such as the device tree. In case of doubt, say Y diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c319d04..0007241 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -378,7 +378,7 @@ static int __init pSeries_init_panel(void) return 0; } -arch_initcall(pSeries_init_panel); +machine_arch_initcall(pseries, pSeries_init_panel); static int pseries_set_dabr(unsigned long dabr) { diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index d6479f9..a509c52 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -112,10 +112,10 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) /* Fixup atomic count: it exited inside IRQ handler. */ task_thread_info(paca[lcpu].__current)->preempt_count = 0; - +#ifdef CONFIG_HOTPLUG_CPU if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) goto out; - +#endif /* * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. @@ -130,7 +130,9 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) return 0; } +#ifdef CONFIG_HOTPLUG_CPU out: +#endif return 1; } @@ -144,16 +146,15 @@ static void __devinit smp_xics_setup_cpu(int cpu) vpa_init(cpu); cpumask_clear_cpu(cpu, of_spin_mask); +#ifdef CONFIG_HOTPLUG_CPU set_cpu_current_state(cpu, CPU_STATE_ONLINE); set_default_offline_state(cpu); - +#endif } #endif /* CONFIG_XICS */ static void __devinit smp_pSeries_kick_cpu(int nr) { - long rc; - unsigned long hcpuid; BUG_ON(nr < 0 || nr >= NR_CPUS); if (!smp_startup_cpu(nr)) @@ -165,16 +166,20 @@ static void __devinit smp_pSeries_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca[nr].cpu_start = 1; - +#ifdef CONFIG_HOTPLUG_CPU set_preferred_offline_state(nr, CPU_STATE_ONLINE); if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { + long rc; + unsigned long hcpuid; + hcpuid = get_hard_smp_processor_id(nr); rc = plpar_hcall_norets(H_PROD, hcpuid); if (rc != H_SUCCESS) printk(KERN_ERR "Error: Prod to wake up processor %d " "Ret= %ld\n", nr, rc); } +#endif } static int smp_pSeries_cpu_bootable(unsigned int nr) diff --git a/arch/unicore32/Makefile b/arch/unicore32/Makefile index e08d6d3..76a8bee 100644 --- a/arch/unicore32/Makefile +++ b/arch/unicore32/Makefile @@ -48,7 +48,7 @@ ASM_GENERIC_HEADERS += bitsperlong.h bug.h bugs.h ASM_GENERIC_HEADERS += cputime.h current.h ASM_GENERIC_HEADERS += device.h div64.h ASM_GENERIC_HEADERS += emergency-restart.h errno.h -ASM_GENERIC_HEADERS += fb.h fcntl.h ftrace.h +ASM_GENERIC_HEADERS += fb.h fcntl.h ftrace.h futex.h ASM_GENERIC_HEADERS += hardirq.h hw_irq.h ASM_GENERIC_HEADERS += ioctl.h ioctls.h ipcbuf.h irq_regs.h ASM_GENERIC_HEADERS += kdebug.h kmap_types.h diff --git a/arch/unicore32/include/asm/futex.h b/arch/unicore32/include/asm/futex.h deleted file mode 100644 index 07dea61..0000000 --- a/arch/unicore32/include/asm/futex.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * linux/arch/unicore32/include/asm/futex.h - * - * Code specific to PKUnity SoC and UniCore ISA - * - * Copyright (C) 2001-2010 GUAN Xue-tao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __UNICORE_FUTEX_H__ -#define __UNICORE_FUTEX_H__ - -#ifdef __KERNEL__ - -#include <linux/futex.h> -#include <linux/preempt.h> -#include <linux/uaccess.h> -#include <linux/errno.h> - -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ - __asm__ __volatile__( \ - "1: ldw.u %1, [%2]\n" \ - " " insn "\n" \ - "2: stw.u %0, [%2]\n" \ - " mov %0, #0\n" \ - "3:\n" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 4f, 2b, 4f\n" \ - " .popsection\n" \ - " .pushsection .fixup,\"ax\"\n" \ - "4: mov %0, %4\n" \ - " b 3b\n" \ - " .popsection" \ - : "=&r" (ret), "=&r" (oldval) \ - : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") - -static inline int -futex_atomic_op_inuser(int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; - - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - pagefault_disable(); /* implies preempt_disable() */ - - switch (op) { - case FUTEX_OP_SET: - __futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_ADD: - __futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_OR: - __futex_atomic_op("or %0, %1, %3", ret, oldval, uaddr, oparg); - break; - case FUTEX_OP_ANDN: - __futex_atomic_op("and %0, %1, %3", - ret, oldval, uaddr, ~oparg); - break; - case FUTEX_OP_XOR: - __futex_atomic_op("xor %0, %1, %3", ret, oldval, uaddr, oparg); - break; - default: - ret = -ENOSYS; - } - - pagefault_enable(); /* subsumes preempt_enable() */ - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: - ret = (oldval == cmparg); - break; - case FUTEX_OP_CMP_NE: - ret = (oldval != cmparg); - break; - case FUTEX_OP_CMP_LT: - ret = (oldval < cmparg); - break; - case FUTEX_OP_CMP_GE: - ret = (oldval >= cmparg); - break; - case FUTEX_OP_CMP_LE: - ret = (oldval <= cmparg); - break; - case FUTEX_OP_CMP_GT: - ret = (oldval > cmparg); - break; - default: - ret = -ENOSYS; - } - } - return ret; -} - -static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) -{ - int val; - - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - pagefault_disable(); /* implies preempt_disable() */ - - __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: ldw.u %0, [%3]\n" - " cmpxor.a %0, %1\n" - " bne 3f\n" - "2: stw.u %2, [%3]\n" - "3:\n" - " .pushsection __ex_table,\"a\"\n" - " .align 3\n" - " .long 1b, 4f, 2b, 4f\n" - " .popsection\n" - " .pushsection .fixup,\"ax\"\n" - "4: mov %0, %4\n" - " b 3b\n" - " .popsection" - : "=&r" (val) - : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) - : "cc", "memory"); - - pagefault_enable(); /* subsumes preempt_enable() */ - - return val; -} - -#endif /* __KERNEL__ */ -#endif /* __UNICORE_FUTEX_H__ */ diff --git a/arch/unicore32/include/mach/PKUnity.h b/arch/unicore32/include/mach/PKUnity.h index a18bdc3..8040d57 100644 --- a/arch/unicore32/include/mach/PKUnity.h +++ b/arch/unicore32/include/mach/PKUnity.h @@ -24,16 +24,6 @@ #define PKUNITY_MMIO_BASE 0x80000000 /* 0x80000000 - 0xFFFFFFFF 2GB */ /* - * PKUNITY Memory Map Addresses: 0x0D000000 - 0x0EFFFFFF (32MB) - * 0x0D000000 - 0x0DFFFFFF 16MB: for UVC - * 0x0E000000 - 0x0EFFFFFF 16MB: for UNIGFX - */ -#define PKUNITY_UVC_MMAP_BASE 0x0D000000 -#define PKUNITY_UVC_MMAP_SIZE 0x01000000 /* 16MB */ -#define PKUNITY_UNIGFX_MMAP_BASE 0x0E000000 -#define PKUNITY_UNIGFX_MMAP_SIZE 0x01000000 /* 16MB */ - -/* * PKUNITY System Bus Addresses (PCI): 0x80000000 - 0xBFFFFFFF (1GB) * 0x80000000 - 0x8000000B 12B PCI Configuration regs * 0x80010000 - 0x80010250 592B PCI Bridge Base diff --git a/arch/unicore32/include/mach/memory.h b/arch/unicore32/include/mach/memory.h index 0bf21c9..4be72c2 100644 --- a/arch/unicore32/include/mach/memory.h +++ b/arch/unicore32/include/mach/memory.h @@ -50,7 +50,6 @@ void puv3_pci_adjust_zones(unsigned long *size, unsigned long *holes); /* kuser area */ #define KUSER_VECPAGE_BASE (KUSER_BASE + UL(0x3fff0000)) -#define KUSER_UNIGFX_BASE (PAGE_OFFSET + PKUNITY_UNIGFX_MMAP_BASE) /* kuser_vecpage (0xbfff0000) is ro, and vectors page (0xffff0000) is rw */ #define kuser_vecpage_to_vectors(x) ((x) - (KUSER_VECPAGE_BASE) \ + (VECTORS_BASE)) diff --git a/arch/unicore32/kernel/puv3-core.c b/arch/unicore32/kernel/puv3-core.c index 8b1b6be..1a505a7 100644 --- a/arch/unicore32/kernel/puv3-core.c +++ b/arch/unicore32/kernel/puv3-core.c @@ -99,11 +99,6 @@ static struct resource puv3_unigfx_resources[] = { .end = io_v2p(PKUNITY_UNIGFX_BASE) + 0xfff, .flags = IORESOURCE_MEM, }, - [1] = { - .start = PKUNITY_UNIGFX_MMAP_BASE, - .end = PKUNITY_UNIGFX_MMAP_BASE + PKUNITY_UNIGFX_MMAP_SIZE, - .flags = IORESOURCE_MEM, - }, }; static struct resource puv3_rtc_resources[] = { diff --git a/arch/unicore32/kernel/rtc.c b/arch/unicore32/kernel/rtc.c index c5f0682..8cad70b 100644 --- a/arch/unicore32/kernel/rtc.c +++ b/arch/unicore32/kernel/rtc.c @@ -88,11 +88,6 @@ static int puv3_rtc_setpie(struct device *dev, int enabled) return 0; } -static int puv3_rtc_setfreq(struct device *dev, int freq) -{ - return 0; -} - /* Time read/write */ static int puv3_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) @@ -214,8 +209,6 @@ static const struct rtc_class_ops puv3_rtcops = { .set_time = puv3_rtc_settime, .read_alarm = puv3_rtc_getalarm, .set_alarm = puv3_rtc_setalarm, - .irq_set_freq = puv3_rtc_setfreq, - .irq_set_state = puv3_rtc_setpie, .proc = puv3_rtc_proc, }; @@ -294,8 +287,6 @@ static int puv3_rtc_probe(struct platform_device *pdev) puv3_rtc_enable(pdev, 1); - puv3_rtc_setfreq(&pdev->dev, 1); - /* register RTC and exit */ rtc = rtc_device_register("pkunity", &pdev->dev, &puv3_rtcops, diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 1e175a8..471b6bc 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -64,12 +64,6 @@ static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; */ static struct resource mem_res[] = { { - .name = "Video RAM", - .start = 0, - .end = 0, - .flags = IORESOURCE_MEM - }, - { .name = "Kernel text", .start = 0, .end = 0, @@ -83,9 +77,8 @@ static struct resource mem_res[] = { } }; -#define video_ram mem_res[0] -#define kernel_code mem_res[1] -#define kernel_data mem_res[2] +#define kernel_code mem_res[0] +#define kernel_data mem_res[1] /* * These functions re-use the assembly code in head.S, which @@ -224,10 +217,6 @@ request_standard_resources(struct meminfo *mi) kernel_data.end <= res->end) request_resource(res, &kernel_data); } - - video_ram.start = PKUNITY_UNIGFX_MMAP_BASE; - video_ram.end = PKUNITY_UNIGFX_MMAP_BASE + PKUNITY_UNIGFX_MMAP_SIZE; - request_resource(&iomem_resource, &video_ram); } static void (*init_machine)(void) __initdata; diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 25abbb1..254e36f 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -22,7 +22,6 @@ #include <linux/delay.h> #include <linux/hardirq.h> #include <linux/init.h> -#include <linux/uaccess.h> #include <linux/atomic.h> #include <linux/unistd.h> diff --git a/arch/unicore32/kernel/vmlinux.lds.S b/arch/unicore32/kernel/vmlinux.lds.S index 0b4eb89..9bf7f7a 100644 --- a/arch/unicore32/kernel/vmlinux.lds.S +++ b/arch/unicore32/kernel/vmlinux.lds.S @@ -14,6 +14,7 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#include <asm/cache.h> OUTPUT_ARCH(unicore32) ENTRY(stext) @@ -29,7 +30,7 @@ SECTIONS HEAD_TEXT_SECTION INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(PAGE_SIZE) + PERCPU(L1_CACHE_BYTES, PAGE_SIZE) __init_end = .; _stext = .; @@ -45,10 +46,10 @@ SECTIONS _sdata = .; RO_DATA_SECTION(PAGE_SIZE) - RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE) + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) _edata = .; - EXCEPTION_TABLE(32) + EXCEPTION_TABLE(L1_CACHE_BYTES) NOTES BSS_SECTION(0, 0, 0) diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index 7bf3d58..db2d334 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -338,15 +338,6 @@ void __init uc32_mm_memblock_reserve(void) * and can only be in node 0. */ memblock_reserve(__pa(swapper_pg_dir), PTRS_PER_PGD * sizeof(pgd_t)); - -#ifdef CONFIG_PUV3_UNIGFX - /* - * These should likewise go elsewhere. They pre-reserve the - * screen/video memory region at the 48M~64M of main system memory. - */ - memblock_reserve(PKUNITY_UNIGFX_MMAP_BASE, PKUNITY_UNIGFX_MMAP_SIZE); - memblock_reserve(PKUNITY_UVC_MMAP_BASE, PKUNITY_UVC_MMAP_SIZE); -#endif } /* @@ -371,17 +362,6 @@ static void __init devicemaps_init(void) pmd_clear(pmd_off_k(addr)); /* - * Create a mapping for UniGFX VRAM - */ -#ifdef CONFIG_PUV3_UNIGFX - map.pfn = __phys_to_pfn(PKUNITY_UNIGFX_MMAP_BASE); - map.virtual = KUSER_UNIGFX_BASE; - map.length = PKUNITY_UNIGFX_MMAP_SIZE; - map.type = MT_KUSER; - create_mapping(&map); -#endif - - /* * Create a mapping for the machine vectors at the high-vectors * location (0xffff0000). If we aren't using high-vectors, also * create a mapping at the low-vectors virtual address. diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a279d98..2b7d573 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -2,7 +2,6 @@ #define _ASM_X86_APIC_H #include <linux/cpumask.h> -#include <linux/delay.h> #include <linux/pm.h> #include <asm/alternative.h> diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 97b6d81..057099e 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -10,7 +10,6 @@ #include <linux/spinlock.h> /* And spinlocks */ #include <asm/io.h> /* need byte IO */ -#include <linux/delay.h> #ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER #define dma_outb outb_p diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 910a708..8dba769 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -95,7 +95,6 @@ extern void setup_node_to_cpumask_map(void); #ifdef CONFIG_X86_32 extern unsigned long node_start_pfn[]; extern unsigned long node_end_pfn[]; -extern unsigned long node_remap_size[]; #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid]) # define SD_CACHE_NICE_TRIES 1 diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6801959..4c39baa 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -21,7 +21,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { EXPORT_SYMBOL(amd_nb_misc_ids); static struct pci_device_id amd_nb_link_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, {} }; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index c4e557a..5260fe9 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -16,6 +16,7 @@ #include <linux/kprobes.h> #include <linux/nmi.h> #include <linux/module.h> +#include <linux/delay.h> #ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(void) diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 6273eee..0aced70 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -93,10 +93,6 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd) node_end_pfn[node]); memory_present(node, node_start_pfn[node], node_end_pfn[node]); - - node_remap_size[node] = node_memmap_size_bytes(node, - node_start_pfn[node], - node_end_pfn[node]); } /* diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 3c28928..33b10a0 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -23,6 +23,8 @@ #include <linux/io.h> #include <linux/pci.h> #include <linux/kdebug.h> +#include <linux/delay.h> +#include <linux/crash_dump.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -34,6 +36,7 @@ #include <asm/ipi.h> #include <asm/smp.h> #include <asm/x86_init.h> +#include <asm/emergency-restart.h> DEFINE_PER_CPU(int, x2apic_extra_bits); @@ -810,4 +813,11 @@ void __init uv_system_init(void) /* register Legacy VGA I/O redirection handler */ pci_register_set_vga_state(uv_set_vga_state); + + /* + * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as + * EFI is not enabled in the kdump kernel. + */ + if (is_kdump_kernel()) + reboot_type = BOOT_ACPI; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5a05ef6..3385ea2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1626,7 +1626,7 @@ out: static unsigned int mce_poll(struct file *file, poll_table *wait) { poll_wait(file, &mce_wait, wait); - if (rcu_dereference_check_mce(mcelog.next)) + if (rcu_access_index(mcelog.next)) return POLLIN | POLLRDNORM; if (!mce_apei_read_done && apei_check_mce()) return POLLIN | POLLRDNORM; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 307dfbb..929739a 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -293,14 +293,24 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ /* * HACK! - * We use this same function to initialize the mtrrs on boot. - * The state of the boot cpu's mtrrs has been saved, and we want - * to replicate across all the APs. - * If we're doing that @reg is set to something special... + * + * We use this same function to initialize the mtrrs during boot, + * resume, runtime cpu online and on an explicit request to set a + * specific MTRR. + * + * During boot or suspend, the state of the boot cpu's mtrrs has been + * saved, and we want to replicate that across all the cpus that come + * online (either at the end of boot or resume or during a runtime cpu + * online). If we're doing that, @reg is set to something special and on + * this cpu we still do mtrr_if->set_all(). During boot/resume, this + * is unnecessary if at this point we are still on the cpu that started + * the boot/resume sequence. But there is no guarantee that we are still + * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be + * sure that we are in sync with everyone else. */ if (reg != ~0U) mtrr_if->set(reg, base, size, type); - else if (!mtrr_aps_delayed_init) + else mtrr_if->set_all(); /* Wait for the others */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 948a31e..1cb0b9f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -8,6 +8,7 @@ #include <linux/seq_file.h> #include <linux/smp.h> #include <linux/ftrace.h> +#include <linux/delay.h> #include <asm/apic.h> #include <asm/io_apic.h> diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 5ed0ab5..f924280 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -550,6 +550,7 @@ static void __exit microcode_exit(void) microcode_dev_exit(); unregister_hotcpu_notifier(&mc_cpu_notifier); + unregister_syscore_ops(&mc_syscore_ops); get_online_cpus(); mutex_lock(µcode_mutex); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d3ce37e..08c44b0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -6,6 +6,7 @@ #include <linux/dmi.h> #include <linux/sched.h> #include <linux/tboot.h> +#include <linux/delay.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index bde3906..c757c0a 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -104,13 +104,9 @@ extern unsigned long highend_pfn, highstart_pfn; #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) -unsigned long node_remap_size[MAX_NUMNODES]; static void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -static unsigned long kva_start_pfn; -static unsigned long kva_pages; - int __cpuinit numa_cpu_node(int cpu) { return apic->x86_32_numa_cpu_node(cpu); @@ -129,7 +125,6 @@ int __init get_memcfg_numa_flat(void) node_end_pfn[0] = max_pfn; memblock_x86_register_active_regions(0, 0, max_pfn); memory_present(0, 0, max_pfn); - node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); /* Indicate there is one node available. */ nodes_clear(node_online_map); @@ -164,9 +159,8 @@ static void __init allocate_pgdat(int nid) { char buf[16]; - if (node_has_online_mem(nid) && node_remap_start_vaddr[nid]) - NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; - else { + NODE_DATA(nid) = alloc_remap(nid, ALIGN(sizeof(pg_data_t), PAGE_SIZE)); + if (!NODE_DATA(nid)) { unsigned long pgdat_phys; pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, max_pfn_mapped<<PAGE_SHIFT, @@ -182,25 +176,38 @@ static void __init allocate_pgdat(int nid) } /* - * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel - * virtual address space (KVA) is reserved and portions of nodes are mapped - * using it. This is to allow node-local memory to be allocated for - * structures that would normally require ZONE_NORMAL. The memory is - * allocated with alloc_remap() and callers should be prepared to allocate - * from the bootmem allocator instead. + * Remap memory allocator */ static unsigned long node_remap_start_pfn[MAX_NUMNODES]; static void *node_remap_end_vaddr[MAX_NUMNODES]; static void *node_remap_alloc_vaddr[MAX_NUMNODES]; -static unsigned long node_remap_offset[MAX_NUMNODES]; +/** + * alloc_remap - Allocate remapped memory + * @nid: NUMA node to allocate memory from + * @size: The size of allocation + * + * Allocate @size bytes from the remap area of NUMA node @nid. The + * size of the remap area is predetermined by init_alloc_remap() and + * only the callers considered there should call this function. For + * more info, please read the comment on top of init_alloc_remap(). + * + * The caller must be ready to handle allocation failure from this + * function and fall back to regular memory allocator in such cases. + * + * CONTEXT: + * Single CPU early boot context. + * + * RETURNS: + * Pointer to the allocated memory on success, %NULL on failure. + */ void *alloc_remap(int nid, unsigned long size) { void *allocation = node_remap_alloc_vaddr[nid]; size = ALIGN(size, L1_CACHE_BYTES); - if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) return NULL; node_remap_alloc_vaddr[nid] += size; @@ -209,26 +216,6 @@ void *alloc_remap(int nid, unsigned long size) return allocation; } -static void __init remap_numa_kva(void) -{ - void *vaddr; - unsigned long pfn; - int node; - - for_each_online_node(node) { - printk(KERN_DEBUG "remap_numa_kva: node %d\n", node); - for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { - vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); - printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n", - (unsigned long)vaddr, - node_remap_start_pfn[node] + pfn); - set_pmd_pfn((ulong) vaddr, - node_remap_start_pfn[node] + pfn, - PAGE_KERNEL_LARGE); - } - } -} - #ifdef CONFIG_HIBERNATION /** * resume_map_numa_kva - add KVA mapping to the temporary page tables created @@ -240,15 +227,16 @@ void resume_map_numa_kva(pgd_t *pgd_base) int node; for_each_online_node(node) { - unsigned long start_va, start_pfn, size, pfn; + unsigned long start_va, start_pfn, nr_pages, pfn; start_va = (unsigned long)node_remap_start_vaddr[node]; start_pfn = node_remap_start_pfn[node]; - size = node_remap_size[node]; + nr_pages = (node_remap_end_vaddr[node] - + node_remap_start_vaddr[node]) >> PAGE_SHIFT; printk(KERN_DEBUG "%s: node %d\n", __func__, node); - for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { + for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); pgd_t *pgd = pgd_base + pgd_index(vaddr); pud_t *pud = pud_offset(pgd, vaddr); @@ -264,132 +252,102 @@ void resume_map_numa_kva(pgd_t *pgd_base) } #endif -static __init unsigned long calculate_numa_remap_pages(void) +/** + * init_alloc_remap - Initialize remap allocator for a NUMA node + * @nid: NUMA node to initizlie remap allocator for + * + * NUMA nodes may end up without any lowmem. As allocating pgdat and + * memmap on a different node with lowmem is inefficient, a special + * remap allocator is implemented which can be used by alloc_remap(). + * + * For each node, the amount of memory which will be necessary for + * pgdat and memmap is calculated and two memory areas of the size are + * allocated - one in the node and the other in lowmem; then, the area + * in the node is remapped to the lowmem area. + * + * As pgdat and memmap must be allocated in lowmem anyway, this + * doesn't waste lowmem address space; however, the actual lowmem + * which gets remapped over is wasted. The amount shouldn't be + * problematic on machines this feature will be used. + * + * Initialization failure isn't fatal. alloc_remap() is used + * opportunistically and the callers will fall back to other memory + * allocation mechanisms on failure. + */ +static __init void init_alloc_remap(int nid) { - int nid; - unsigned long size, reserve_pages = 0; + unsigned long size, pfn; + u64 node_pa, remap_pa; + void *remap_va; - for_each_online_node(nid) { - u64 node_kva_target; - u64 node_kva_final; - - /* - * The acpi/srat node info can show hot-add memroy zones - * where memory could be added but not currently present. - */ - printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", - nid, node_start_pfn[nid], node_end_pfn[nid]); - if (node_start_pfn[nid] > max_pfn) - continue; - if (!node_end_pfn[nid]) - continue; - if (node_end_pfn[nid] > max_pfn) - node_end_pfn[nid] = max_pfn; - - /* ensure the remap includes space for the pgdat. */ - size = node_remap_size[nid] + sizeof(pg_data_t); - - /* convert size to large (pmd size) pages, rounding up */ - size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; - /* now the roundup is correct, convert to PAGE_SIZE pages */ - size = size * PTRS_PER_PTE; - - node_kva_target = round_down(node_end_pfn[nid] - size, - PTRS_PER_PTE); - node_kva_target <<= PAGE_SHIFT; - do { - node_kva_final = memblock_find_in_range(node_kva_target, - ((u64)node_end_pfn[nid])<<PAGE_SHIFT, - ((u64)size)<<PAGE_SHIFT, - LARGE_PAGE_BYTES); - node_kva_target -= LARGE_PAGE_BYTES; - } while (node_kva_final == MEMBLOCK_ERROR && - (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); - - if (node_kva_final == MEMBLOCK_ERROR) - panic("Can not get kva ram\n"); - - node_remap_size[nid] = size; - node_remap_offset[nid] = reserve_pages; - reserve_pages += size; - printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of" - " node %d at %llx\n", - size, nid, node_kva_final>>PAGE_SHIFT); - - /* - * prevent kva address below max_low_pfn want it on system - * with less memory later. - * layout will be: KVA address , KVA RAM - * - * we are supposed to only record the one less then max_low_pfn - * but we could have some hole in high memory, and it will only - * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide - * to use it as free. - * So memblock_x86_reserve_range here, hope we don't run out of that array - */ - memblock_x86_reserve_range(node_kva_final, - node_kva_final+(((u64)size)<<PAGE_SHIFT), - "KVA RAM"); - - node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; - } - printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", - reserve_pages); - return reserve_pages; -} + /* + * The acpi/srat node info can show hot-add memroy zones where + * memory could be added but not currently present. + */ + printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", + nid, node_start_pfn[nid], node_end_pfn[nid]); + if (node_start_pfn[nid] > max_pfn) + return; + if (!node_end_pfn[nid]) + return; + if (node_end_pfn[nid] > max_pfn) + node_end_pfn[nid] = max_pfn; -static void init_remap_allocator(int nid) -{ - node_remap_start_vaddr[nid] = pfn_to_kaddr( - kva_start_pfn + node_remap_offset[nid]); - node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + - (node_remap_size[nid] * PAGE_SIZE); - node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + - ALIGN(sizeof(pg_data_t), PAGE_SIZE); - - printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid, - (ulong) node_remap_start_vaddr[nid], - (ulong) node_remap_end_vaddr[nid]); + /* calculate the necessary space aligned to large page size */ + size = node_memmap_size_bytes(nid, node_start_pfn[nid], + min(node_end_pfn[nid], max_pfn)); + size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); + size = ALIGN(size, LARGE_PAGE_BYTES); + + /* allocate node memory and the lowmem remap area */ + node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT, + (u64)node_end_pfn[nid] << PAGE_SHIFT, + size, LARGE_PAGE_BYTES); + if (node_pa == MEMBLOCK_ERROR) { + pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", + size, nid); + return; + } + memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); + + remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, + max_low_pfn << PAGE_SHIFT, + size, LARGE_PAGE_BYTES); + if (remap_pa == MEMBLOCK_ERROR) { + pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", + size, nid); + memblock_x86_free_range(node_pa, node_pa + size); + return; + } + memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); + remap_va = phys_to_virt(remap_pa); + + /* perform actual remap */ + for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) + set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), + (node_pa >> PAGE_SHIFT) + pfn, + PAGE_KERNEL_LARGE); + + /* initialize remap allocator parameters */ + node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; + node_remap_start_vaddr[nid] = remap_va; + node_remap_end_vaddr[nid] = remap_va + size; + node_remap_alloc_vaddr[nid] = remap_va; + + printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", + nid, node_pa, node_pa + size, remap_va, remap_va + size); } void __init initmem_init(void) { int nid; - long kva_target_pfn; - - /* - * When mapping a NUMA machine we allocate the node_mem_map arrays - * from node local memory. They are then mapped directly into KVA - * between zone normal and vmalloc space. Calculate the size of - * this space and use it to adjust the boundary between ZONE_NORMAL - * and ZONE_HIGHMEM. - */ get_memcfg_numa(); numa_init_array(); - kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); - - kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); - do { - kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT, - max_low_pfn<<PAGE_SHIFT, - kva_pages<<PAGE_SHIFT, - PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; - kva_target_pfn -= PTRS_PER_PTE; - } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); - - if (kva_start_pfn == MEMBLOCK_ERROR) - panic("Can not get kva space\n"); - - printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", - kva_start_pfn, max_low_pfn); - printk(KERN_INFO "max_pfn = %lx\n", max_pfn); + for_each_online_node(nid) + init_alloc_remap(nid); - /* avoid clash with initrd */ - memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, - (kva_start_pfn + kva_pages)<<PAGE_SHIFT, - "KVA PG"); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) @@ -409,12 +367,8 @@ void __init initmem_init(void) printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", (ulong) pfn_to_kaddr(max_low_pfn)); - for_each_online_node(nid) { - init_remap_allocator(nid); - + for_each_online_node(nid) allocate_pgdat(nid); - } - remap_numa_kva(); printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 48651c6..1b9e82c 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -276,7 +276,6 @@ int __init get_memcfg_from_srat(void) unsigned long end = min(node_end_pfn[nid], max_pfn); memory_present(nid, start, end); - node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); } return 1; out_fail: diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 8dace18..cf97500 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -49,6 +49,10 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; val |= (counter_config->unit_mask & 0xFF) << 8; + counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV | + ARCH_PERFMON_EVENTSEL_EDGE | + ARCH_PERFMON_EVENTSEL_CMASK); + val |= counter_config->extra; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; val |= (event & 0x0F00) << 24; @@ -440,6 +444,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + oprofilefs_create_ulong(sb, dir, "extra", &counter_config[i].extra); } return 0; diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index e28398d..0b7b7b1 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -22,6 +22,7 @@ struct op_counter_config { unsigned long kernel; unsigned long user; unsigned long unit_mask; + unsigned long extra; }; extern struct op_counter_config counter_config[]; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index a7b38d3..7cb6424 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -11,6 +11,7 @@ #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/delay.h> #include <asm/mmu_context.h> #include <asm/uv/uv.h> |