diff options
Diffstat (limited to 'arch')
101 files changed, 876 insertions, 486 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index e756d04..b15162f 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -14,8 +14,8 @@ */ -#define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) -#define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) +#define ATOMIC_INIT(i) { (i) } +#define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) (*(volatile int *)&(v)->counter) #define atomic64_read(v) (*(volatile long *)&(v)->counter) diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 06edfef..3eeb47c 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -69,9 +69,11 @@ #define SO_RXQ_OVFL 40 +#ifdef __KERNEL__ /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ #define SOCK_NONBLOCK 0x40000000 +#endif /* __KERNEL__ */ #endif /* _ASM_SOCKET_H */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f69529e..b83dcf4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1235,6 +1235,42 @@ config ARM_ERRATA_754327 This workaround defines cpu_relax() as smp_mb(), preventing correctly written polling loops from denying visibility of updates to memory. +config ARM_ERRATA_764369 + bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" + depends on CPU_V7 && SMP + help + This option enables the workaround for erratum 764369 + affecting Cortex-A9 MPCore with two or more processors (all + current revisions). Under certain timing circumstances, a data + cache line maintenance operation by MVA targeting an Inner + Shareable memory region may fail to proceed up to either the + Point of Coherency or to the Point of Unification of the + system. This workaround adds a DSB instruction before the + relevant cache maintenance functions and sets a specific bit + in the diagnostic control register of the SCU. + +config PL310_ERRATA_769419 + bool "PL310 errata: no automatic Store Buffer drain" + depends on CACHE_L2X0 + help + On revisions of the PL310 prior to r3p2, the Store Buffer does + not automatically drain. This can cause normal, non-cacheable + writes to be retained when the memory system is idle, leading + to suboptimal I/O performance for drivers using coherent DMA. + This option adds a write barrier to the cpu_idle loop so that, + on systems with an outer cache, the store buffer is drained + explicitly. + +config ARM_ERRATA_775420 + bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock" + depends on CPU_V7 + help + This option enables the workaround for the 775420 Cortex-A9 (r2p2, + r2p6,r2p8,r2p10,r3p0) erratum. In case a date cache maintenance + operation aborts with MMU exception, it might cause the processor + to deadlock. This workaround puts DSB before executing ISB if + an abort may occur on cache maintenance. + endmenu source "arch/arm/common/Kconfig" @@ -1299,32 +1335,6 @@ source "drivers/pci/Kconfig" source "drivers/pcmcia/Kconfig" -config ARM_ERRATA_764369 - bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" - depends on CPU_V7 && SMP - help - This option enables the workaround for erratum 764369 - affecting Cortex-A9 MPCore with two or more processors (all - current revisions). Under certain timing circumstances, a data - cache line maintenance operation by MVA targeting an Inner - Shareable memory region may fail to proceed up to either the - Point of Coherency or to the Point of Unification of the - system. This workaround adds a DSB instruction before the - relevant cache maintenance functions and sets a specific bit - in the diagnostic control register of the SCU. - -config PL310_ERRATA_769419 - bool "PL310 errata: no automatic Store Buffer drain" - depends on CACHE_L2X0 - help - On revisions of the PL310 prior to r3p2, the Store Buffer does - not automatically drain. This can cause normal, non-cacheable - writes to be retained when the memory system is idle, leading - to suboptimal I/O performance for drivers using coherent DMA. - This option adds a write barrier to the cpu_idle loop so that, - on systems with an outer cache, the store buffer is drained - explicitly. - endmenu menu "Kernel Features" @@ -1886,6 +1896,7 @@ source "drivers/cpufreq/Kconfig" config CPU_FREQ_IMX tristate "CPUfreq driver for i.MX CPUs" depends on ARCH_MXC && CPU_FREQ + select CPU_FREQ_TABLE help This enables the CPUfreq driver for i.MX CPUs. diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index e58603b..caddb9d 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -539,6 +539,7 @@ __armv7_mmu_cache_on: mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs #endif mrc p15, 0, r0, c1, c0, 0 @ read control reg + bic r0, r0, #1 << 28 @ clear SCTLR.TRE orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x003c @ write buffer #ifdef CONFIG_MMU diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index 2bf2243..166d6aa 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -29,7 +29,6 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_AEABI=y -CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 CONFIG_AUTO_ZRELADDR=y CONFIG_FPE_NWFPE=y CONFIG_NET=y diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 1252a26..42dec04 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -215,7 +215,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm) static inline void vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), vma->vm_flags); } @@ -223,7 +225,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned static inline void vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + struct mm_struct *mm = vma->vm_mm; + + if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { unsigned long addr = user_addr & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h index 93226cf..b1479fd 100644 --- a/arch/arm/include/asm/mutex.h +++ b/arch/arm/include/asm/mutex.h @@ -7,121 +7,10 @@ */ #ifndef _ASM_MUTEX_H #define _ASM_MUTEX_H - -#if __LINUX_ARM_ARCH__ < 6 -/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */ -# include <asm-generic/mutex-xchg.h> -#else - /* - * Attempting to lock a mutex on ARMv6+ can be done with a bastardized - * atomic decrement (it is not a reliable atomic decrement but it satisfies - * the defined semantics for our purpose, while being smaller and faster - * than a real atomic decrement or atomic swap. The idea is to attempt - * decrementing the lock value only once. If once decremented it isn't zero, - * or if its store-back fails due to a dispute on the exclusive store, we - * simply bail out immediately through the slow path where the lock will be - * reattempted until it succeeds. + * On pre-ARMv6 hardware this results in a swp-based implementation, + * which is the most efficient. For ARMv6+, we emit a pair of exclusive + * accesses instead. */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - fail_fn(count); -} - -static inline int -__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res; - - __asm__ ( - - "ldrex %0, [%2] \n\t" - "sub %0, %0, #1 \n\t" - "strex %1, %0, [%2] " - - : "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __res |= __ex_flag; - if (unlikely(__res != 0)) - __res = fail_fn(count); - return __res; -} - -/* - * Same trick is used for the unlock fast path. However the original value, - * rather than the result, is used to test for success in order to have - * better generated assembly. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "ldrex %0, [%3] \n\t" - "add %1, %0, #1 \n\t" - "strex %2, %1, [%3] " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&(count)->counter) - : "cc","memory" ); - - __orig |= __ex_flag; - if (unlikely(__orig != 0)) - fail_fn(count); -} - -/* - * If the unlock was done on a contended lock, or if the unlock simply fails - * then the mutex remains locked. - */ -#define __mutex_slowpath_needs_to_unlock() 1 - -/* - * For __mutex_fastpath_trylock we use another construct which could be - * described as a "single value cmpxchg". - * - * This provides the needed trylock semantics like cmpxchg would, but it is - * lighter and less generic than a true cmpxchg implementation. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __ex_flag, __res, __orig; - - __asm__ ( - - "1: ldrex %0, [%3] \n\t" - "subs %1, %0, #1 \n\t" - "strexeq %2, %1, [%3] \n\t" - "movlt %0, #0 \n\t" - "cmpeq %2, #0 \n\t" - "bgt 1b " - - : "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag) - : "r" (&count->counter) - : "cc", "memory" ); - - return __orig; -} - -#endif +#include <asm-generic/mutex-xchg.h> #endif diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 5750704..6afd081 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -360,6 +360,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) #define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) +#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) +#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) +#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) +#define pte_special(pte) (0) + +#define pte_present_user(pte) \ + ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ + (L_PTE_PRESENT | L_PTE_USER)) + #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) { @@ -371,25 +383,15 @@ extern void __sync_icache_dcache(pte_t pteval); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - if (addr >= TASK_SIZE) - set_pte_ext(ptep, pteval, 0); - else { + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_present_user(pteval)) { __sync_icache_dcache(pteval); - set_pte_ext(ptep, pteval, PTE_EXT_NG); + ext |= PTE_EXT_NG; } -} -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) -#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) -#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) -#define pte_special(pte) (0) - -#define pte_present_user(pte) \ - ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ - (L_PTE_PRESENT | L_PTE_USER)) + set_pte_ext(ptep, pteval, ext); +} #define PTE_BIT_FUNC(fn,op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -416,13 +418,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <--------------- offset --------------------> <- type --> 0 0 0 + * <--------------- offset ----------------------> < type -> 0 0 0 * - * This gives us up to 63 swap files and 32GB per swap file. Note that + * This gives us up to 31 swap files and 64GB per swap file. Note that * the offset field is always non-zero. */ #define __SWP_TYPE_SHIFT 3 -#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index 3d5fc41..bf53047 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -28,7 +28,7 @@ ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPv3D16 - ldceq p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + ldceql p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 @@ -52,7 +52,7 @@ ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPv3D16 - stceq p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + stceql p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 9739bb8..e895f97 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -278,20 +278,26 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid) asmlinkage void __cpuinit secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; - printk("CPU%u: Booted secondary processor\n", cpu); + /* + * The identity mapping is uncached (strongly ordered), so + * switch away from it before attempting any exclusive accesses. + */ + cpu_switch_mm(mm->pgd, mm); + enter_lazy_tlb(mm, current); + local_flush_tlb_all(); /* * All kernel threads share the same mm context; grab a * reference and switch to it. */ + cpu = smp_processor_id(); atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); - cpu_switch_mm(mm->pgd, mm); - enter_lazy_tlb(mm, current); - local_flush_tlb_all(); + + printk("CPU%u: Booted secondary processor\n", cpu); cpu_init(); preempt_disable(); @@ -450,9 +456,7 @@ static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent); static void ipi_timer(void) { struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent); - irq_enter(); evt->event_handler(evt); - irq_exit(); } #ifdef CONFIG_LOCAL_TIMERS @@ -463,7 +467,9 @@ asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) if (local_timer_ack()) { __inc_irq_stat(cpu, local_timer_irqs); + irq_enter(); ipi_timer(); + irq_exit(); } set_irq_regs(old_regs); @@ -625,7 +631,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) switch (ipinr) { case IPI_TIMER: + irq_enter(); ipi_timer(); + irq_exit(); break; case IPI_RESCHEDULE: @@ -633,15 +641,21 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CALL_FUNC: + irq_enter(); generic_smp_call_function_interrupt(); + irq_exit(); break; case IPI_CALL_FUNC_SINGLE: + irq_enter(); generic_smp_call_function_single_interrupt(); + irq_exit(); break; case IPI_CPU_STOP: + irq_enter(); ipi_cpu_stop(cpu); + irq_exit(); break; case IPI_CPU_BACKTRACE: diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 62e7c61..0264ab4 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -115,7 +115,7 @@ int kernel_execve(const char *filename, "Ir" (THREAD_START_SP - sizeof(regs)), "r" (®s), "Ir" (sizeof(regs)) - : "r0", "r1", "r2", "r3", "ip", "lr", "memory"); + : "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory"); out: return ret; diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c index 7227755..871a818 100644 --- a/arch/arm/mach-at91/at91rm9200_devices.c +++ b/arch/arm/mach-at91/at91rm9200_devices.c @@ -454,7 +454,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91rm9200_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c index 39f81f4..89a8414 100644 --- a/arch/arm/mach-at91/at91sam9260_devices.c +++ b/arch/arm/mach-at91/at91sam9260_devices.c @@ -459,7 +459,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9260_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c index 5004bf0..5d43cf4 100644 --- a/arch/arm/mach-at91/at91sam9261_devices.c +++ b/arch/arm/mach-at91/at91sam9261_devices.c @@ -276,7 +276,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9261_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c index a050f41..2bbd163 100644 --- a/arch/arm/mach-at91/at91sam9263_devices.c +++ b/arch/arm/mach-at91/at91sam9263_devices.c @@ -534,7 +534,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9263_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index aacb19d..659870e 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -319,7 +319,7 @@ static struct i2c_gpio_platform_data pdata = { static struct platform_device at91sam9rl_twi_device = { .name = "i2c-gpio", - .id = -1, + .id = 0, .dev.platform_data = &pdata, }; diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c index 74ac889..a37fe02 100644 --- a/arch/arm/mach-imx/mach-mx21ads.c +++ b/arch/arm/mach-imx/mach-mx21ads.c @@ -32,7 +32,7 @@ * Memory-mapped I/O on MX21ADS base board */ #define MX21ADS_MMIO_BASE_ADDR 0xf5000000 -#define MX21ADS_MMIO_SIZE SZ_16M +#define MX21ADS_MMIO_SIZE 0xc00000 #define MX21ADS_REG_ADDR(offset) (void __force __iomem *) \ (MX21ADS_MMIO_BASE_ADDR + (offset)) diff --git a/arch/arm/mach-omap2/opp.c b/arch/arm/mach-omap2/opp.c index ab8b35b..0627494 100644 --- a/arch/arm/mach-omap2/opp.c +++ b/arch/arm/mach-omap2/opp.c @@ -53,7 +53,7 @@ int __init omap_init_opp_table(struct omap_opp_def *opp_def, omap_table_init = 1; /* Lets now register with OPP library */ - for (i = 0; i < opp_def_size; i++) { + for (i = 0; i < opp_def_size; i++, opp_def++) { struct omap_hwmod *oh; struct device *dev; @@ -86,7 +86,6 @@ int __init omap_init_opp_table(struct omap_opp_def *opp_def, __func__, opp_def->freq, opp_def->hwmod_name, i, r); } - opp_def++; } return 0; diff --git a/arch/arm/mach-orion5x/mpp.h b/arch/arm/mach-orion5x/mpp.h index eac6897..db70e79 100644 --- a/arch/arm/mach-orion5x/mpp.h +++ b/arch/arm/mach-orion5x/mpp.h @@ -65,8 +65,8 @@ #define MPP8_GIGE MPP(8, 0x1, 0, 0, 1, 1, 1) #define MPP9_UNUSED MPP(9, 0x0, 0, 0, 1, 1, 1) -#define MPP9_GPIO MPP(9, 0x0, 0, 0, 1, 1, 1) -#define MPP9_GIGE MPP(9, 0x1, 1, 1, 1, 1, 1) +#define MPP9_GPIO MPP(9, 0x0, 1, 1, 1, 1, 1) +#define MPP9_GIGE MPP(9, 0x1, 0, 0, 1, 1, 1) #define MPP10_UNUSED MPP(10, 0x0, 0, 0, 1, 1, 1) #define MPP10_GPIO MPP(10, 0x0, 1, 1, 1, 1, 1) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 1ed1fd3..428b243 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -211,6 +211,9 @@ ENTRY(v7_coherent_user_range) * isn't mapped, just try the next page. */ 9001: +#ifdef CONFIG_ARM_ERRATA_775420 + dsb +#endif mov r12, r12, lsr #12 mov r12, r12, lsl #12 add r12, r12, #4096 diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index bc0e1d8..8799eae 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -266,7 +266,9 @@ good_area: return fault; check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + /* Don't allow expansion below FIRST_USER_ADDRESS */ + if (vma->vm_flags & VM_GROWSDOWN && + addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr)) goto good_area; out: return fault; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 1a8d4aa..8fda9f7 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -236,8 +236,6 @@ void __sync_icache_dcache(pte_t pteval) struct page *page; struct address_space *mapping; - if (!pte_present_user(pteval)) - return; if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) /* only flush non-aliasing VIPT caches for exec mappings */ return; diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 53cd5b4..d7d0f7f 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -39,10 +39,18 @@ ENTRY(v7wbi_flush_user_tlb_range) mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT asid r3, r3 @ mask ASID +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(W(mov) r3, #0 ) + ALT_UP(W(nop) ) +#endif orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ @@ -70,7 +78,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index 0719f49..9f422ba 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -431,7 +431,7 @@ s3c2410_dma_canload(struct s3c2410_dma_chan *chan) * when necessary. */ -int s3c2410_dma_enqueue(unsigned int channel, void *id, +int s3c2410_dma_enqueue(enum dma_ch channel, void *id, dma_addr_t data, int size) { struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel); diff --git a/arch/arm/plat-samsung/adc.c b/arch/arm/plat-samsung/adc.c index e8f2be2..df14954 100644 --- a/arch/arm/plat-samsung/adc.c +++ b/arch/arm/plat-samsung/adc.c @@ -143,11 +143,13 @@ int s3c_adc_start(struct s3c_adc_client *client, return -EINVAL; } - if (client->is_ts && adc->ts_pend) - return -EAGAIN; - spin_lock_irqsave(&adc->lock, flags); + if (client->is_ts && adc->ts_pend) { + spin_unlock_irqrestore(&adc->lock, flags); + return -EAGAIN; + } + client->channel = channel; client->nr_samples = nr_samples; diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 4468814..6fcc9a0 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -18,8 +18,8 @@ #include <asm/system.h> -#define ATOMIC_INIT(i) ((atomic_t) { (i) }) -#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) +#define ATOMIC_INIT(i) { (i) } +#define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) (*(volatile int *)&(v)->counter) #define atomic64_read(v) (*(volatile long *)&(v)->counter) diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 7c928da..d8de182 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -321,11 +321,12 @@ #define __NR_syncfs 1329 #define __NR_setns 1330 #define __NR_sendmmsg 1331 +#define __NR_accept4 1334 #ifdef __KERNEL__ -#define NR_syscalls 308 /* length of syscall table */ +#define NR_syscalls 311 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 97dd2ab..df477f8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1777,6 +1777,9 @@ sys_call_table: data8 sys_syncfs data8 sys_setns // 1330 data8 sys_sendmmsg + data8 sys_ni_syscall /* process_vm_readv */ + data8 sys_ni_syscall /* process_vm_writev */ + data8 sys_accept4 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 782c3a35..3540c5e 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -23,7 +23,6 @@ #include <linux/ioport.h> #include <linux/kernel_stat.h> #include <linux/ptrace.h> -#include <linux/random.h> /* for rand_initialize_irq() */ #include <linux/signal.h> #include <linux/smp.h> #include <linux/threads.h> diff --git a/arch/m68k/include/asm/entry_mm.h b/arch/m68k/include/asm/entry_mm.h index 73b8c8f..bdace4b 100644 --- a/arch/m68k/include/asm/entry_mm.h +++ b/arch/m68k/include/asm/entry_mm.h @@ -35,8 +35,8 @@ /* the following macro is used when enabling interrupts */ #if defined(MACH_ATARI_ONLY) - /* block out HSYNC on the atari */ -#define ALLOWINT (~0x400) + /* block out HSYNC = ipl 2 on the atari */ +#define ALLOWINT (~0x500) #define MAX_NOINT_IPL 3 #else /* portable version */ diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 8623f8d..9a5932e 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -479,9 +479,13 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5, goto bad_access; } - mem_value = *mem; + /* + * No need to check for EFAULT; we know that the page is + * present and writable. + */ + __get_user(mem_value, mem); if (mem_value == oldval) - *mem = newval; + __put_user(newval, mem); pte_unmap_unlock(pte, ptl); up_read(&mm->mmap_sem); diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 97f8bf6..adda036 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -60,6 +60,8 @@ struct thread_info { register struct thread_info *__current_thread_info __asm__("$28"); #define current_thread_info() __current_thread_info +#endif /* !__ASSEMBLY__ */ + /* thread information allocation */ #if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT) #define THREAD_SIZE_ORDER (1) @@ -97,8 +99,6 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define free_thread_info(info) kfree(info) -#endif /* !__ASSEMBLY__ */ - #define PREEMPT_ACTIVE 0x10000000 /* diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index f4546e9..23817a6 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -283,6 +283,15 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, struct pt_regs *regs = args->regs; int trap = (regs->cp0_cause & 0x7c) >> 2; +#ifdef CONFIG_KPROBES + /* + * Return immediately if the kprobes fault notifier has set + * DIE_PAGE_FAULT. + */ + if (cmd == DIE_PAGE_FAULT) + return NOTIFY_DONE; +#endif /* CONFIG_KPROBES */ + /* Userspace events, ignore. */ if (user_mode(regs)) return NOTIFY_DONE; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index a81176f..be281c6 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include <asm/asm-offsets.h> #include <asm/page.h> +#include <asm/thread_info.h> #include <asm-generic/vmlinux.lds.h> #undef mips @@ -73,7 +74,7 @@ SECTIONS .data : { /* Data */ . = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */ - INIT_TASK_DATA(PAGE_SIZE) + INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) diff --git a/arch/mn10300/Makefile b/arch/mn10300/Makefile index 7120282..3eb4a52 100644 --- a/arch/mn10300/Makefile +++ b/arch/mn10300/Makefile @@ -26,7 +26,7 @@ CHECKFLAGS += PROCESSOR := unset UNIT := unset -KBUILD_CFLAGS += -mam33 -mmem-funcs -DCPU=AM33 +KBUILD_CFLAGS += -mam33 -DCPU=AM33 $(call cc-option,-mmem-funcs,) KBUILD_AFLAGS += -mam33 -DCPU=AM33 ifeq ($(CONFIG_MN10300_CURRENT_IN_E2),y) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 26fd114..3706cf0 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -248,7 +248,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0) -#define ATOMIC_INIT(i) ((atomic_t) { (i) }) +#define ATOMIC_INIT(i) { (i) } #define smp_mb__before_atomic_dec() smp_mb() #define smp_mb__after_atomic_dec() smp_mb() @@ -257,7 +257,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT -#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) +#define ATOMIC64_INIT(i) { (i) } static __inline__ s64 __atomic64_add_return(s64 i, atomic64_t *v) diff --git a/arch/parisc/include/asm/prefetch.h b/arch/parisc/include/asm/prefetch.h index c5edc60..1ee7c82 100644 --- a/arch/parisc/include/asm/prefetch.h +++ b/arch/parisc/include/asm/prefetch.h @@ -21,7 +21,12 @@ #define ARCH_HAS_PREFETCH static inline void prefetch(const void *addr) { - __asm__("ldw 0(%0), %%r0" : : "r" (addr)); + __asm__( +#ifndef CONFIG_PA20 + /* Need to avoid prefetch of NULL on PA7300LC */ + " extrw,u,= %0,31,32,%%r0\n" +#endif + " ldw 0(%0), %%r0" : : "r" (addr)); } /* LDD is a PA2.0 addition. */ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 6f05944..07ef351 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -552,7 +552,7 @@ * entry (identifying the physical page) and %r23 up with * the from tlb entry (or nothing if only a to entry---for * clear_user_page_asm) */ - .macro do_alias spc,tmp,tmp1,va,pte,prot,fault + .macro do_alias spc,tmp,tmp1,va,pte,prot,fault,patype cmpib,COND(<>),n 0,\spc,\fault ldil L%(TMPALIAS_MAP_START),\tmp #if defined(CONFIG_64BIT) && (TMPALIAS_MAP_START >= 0x80000000) @@ -581,7 +581,15 @@ */ cmpiclr,= 0x01,\tmp,%r0 ldi (_PAGE_DIRTY|_PAGE_READ|_PAGE_WRITE),\prot +.ifc \patype,20 depd,z \prot,8,7,\prot +.else +.ifc \patype,11 + depw,z \prot,8,7,\prot +.else + .error "undefined PA type to do_alias" +.endif +.endif /* * OK, it is in the temp alias region, check whether "from" or "to". * Check "subtle" note in pacache.S re: r23/r26. @@ -1185,7 +1193,7 @@ dtlb_miss_20w: nop dtlb_check_alias_20w: - do_alias spc,t0,t1,va,pte,prot,dtlb_fault + do_alias spc,t0,t1,va,pte,prot,dtlb_fault,20 idtlbt pte,prot @@ -1209,7 +1217,7 @@ nadtlb_miss_20w: nop nadtlb_check_alias_20w: - do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate + do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,20 idtlbt pte,prot @@ -1241,7 +1249,7 @@ dtlb_miss_11: nop dtlb_check_alias_11: - do_alias spc,t0,t1,va,pte,prot,dtlb_fault + do_alias spc,t0,t1,va,pte,prot,dtlb_fault,11 idtlba pte,(va) idtlbp prot,(va) @@ -1273,7 +1281,7 @@ nadtlb_miss_11: nop nadtlb_check_alias_11: - do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate + do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,11 idtlba pte,(va) idtlbp prot,(va) @@ -1300,7 +1308,7 @@ dtlb_miss_20: nop dtlb_check_alias_20: - do_alias spc,t0,t1,va,pte,prot,dtlb_fault + do_alias spc,t0,t1,va,pte,prot,dtlb_fault,20 idtlbt pte,prot @@ -1326,7 +1334,7 @@ nadtlb_miss_20: nop nadtlb_check_alias_20: - do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate + do_alias spc,t0,t1,va,pte,prot,nadtlb_emulate,20 idtlbt pte,prot @@ -1453,7 +1461,7 @@ naitlb_miss_20w: nop naitlb_check_alias_20w: - do_alias spc,t0,t1,va,pte,prot,naitlb_fault + do_alias spc,t0,t1,va,pte,prot,naitlb_fault,20 iitlbt pte,prot @@ -1507,7 +1515,7 @@ naitlb_miss_11: nop naitlb_check_alias_11: - do_alias spc,t0,t1,va,pte,prot,itlb_fault + do_alias spc,t0,t1,va,pte,prot,itlb_fault,11 iitlba pte,(%sr0, va) iitlbp prot,(%sr0, va) @@ -1553,7 +1561,7 @@ naitlb_miss_20: nop naitlb_check_alias_20: - do_alias spc,t0,t1,va,pte,prot,naitlb_fault + do_alias spc,t0,t1,va,pte,prot,naitlb_fault,20 iitlbt pte,prot diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 93ff3d9..5d7218a 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -692,7 +692,7 @@ ENTRY(flush_icache_page_asm) /* Purge any old translation */ - pitlb (%sr0,%r28) + pitlb (%sr4,%r28) ldil L%icache_stride, %r1 ldw R%icache_stride(%r1), %r1 @@ -706,27 +706,29 @@ ENTRY(flush_icache_page_asm) sub %r25, %r1, %r25 -1: fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) - fic,m %r1(%r28) + /* fic only has the type 26 form on PA1.1, requiring an + * explicit space specification, so use %sr4 */ +1: fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) + fic,m %r1(%sr4,%r28) cmpb,COND(<<) %r28, %r25,1b - fic,m %r1(%r28) + fic,m %r1(%sr4,%r28) sync bv %r0(%r2) - pitlb (%sr0,%r25) + pitlb (%sr4,%r25) .exit .procend diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index fa6f2b8..64a9998 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -50,8 +50,10 @@ SECTIONS . = KERNEL_BINARY_TEXT_START; _text = .; /* Text and read-only data */ - .text ALIGN(16) : { + .head ALIGN(16) : { HEAD_TEXT + } = 0 + .text ALIGN(16) : { TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -65,7 +67,7 @@ SECTIONS *(.fixup) *(.lock.text) /* out-of-line lock text */ *(.gnu.warning) - } = 0 + } /* End of text section */ _etext = .; diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 1cf20bd..33a3580 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -126,11 +126,11 @@ static inline u64 cputime64_to_jiffies64(const cputime_t ct) /* * Convert cputime <-> microseconds */ -extern u64 __cputime_msec_factor; +extern u64 __cputime_usec_factor; static inline unsigned long cputime_to_usecs(const cputime_t ct) { - return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC; + return mulhdu(ct, __cputime_usec_factor); } static inline cputime_t usecs_to_cputime(const unsigned long us) @@ -143,7 +143,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us) sec = us / 1000000; if (ct) { ct *= tb_ticks_per_sec; - do_div(ct, 1000); + do_div(ct, 1000000); } if (sec) ct += (cputime_t) sec * tb_ticks_per_sec; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c5cae0d..764e99c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1000,7 +1000,8 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ #define mfmsr() ({unsigned long rval; \ - asm volatile("mfmsr %0" : "=r" (rval)); rval;}) + asm volatile("mfmsr %0" : "=r" (rval) : \ + : "memory"); rval;}) #ifdef CONFIG_PPC_BOOK3S_64 #define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \ : : "r" (v) : "memory") diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 36e1c8a..4c12a751 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -75,6 +75,7 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); + DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d834425..654fc53 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -380,6 +380,12 @@ _GLOBAL(ret_from_fork) li r3,0 b syscall_exit + .section ".toc","aw" +DSCR_DEFAULT: + .tc dscr_default[TC],dscr_default + + .section ".text" + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -519,9 +525,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) mr r1,r8 /* start using new stack pointer */ std r7,PACAKSAVE(r13) - ld r6,_CCR(r1) - mtcrf 0xFF,r6 - #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION ld r0,THREAD_VRSAVE(r4) @@ -530,14 +533,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_PPC64 BEGIN_FTR_SECTION + lwz r6,THREAD_DSCR_INHERIT(r4) + ld r7,DSCR_DEFAULT@toc(2) ld r0,THREAD_DSCR(r4) - cmpd r0,r25 - beq 1f + cmpwi r6,0 + bne 1f + ld r0,0(r7) +1: cmpd r0,r25 + beq 2f mtspr SPRN_DSCR,r0 -1: +2: END_FTR_SECTION_IFSET(CPU_FTR_DSCR) #endif + ld r6,_CCR(r1) + mtcrf 0xFF,r6 + /* r3-r13 are destroyed -- Cort */ REST_8GPRS(14, r1) REST_10GPRS(22, r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index bf99cfa..6324008 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -245,9 +245,9 @@ __ftrace_make_nop(struct module *mod, /* * On PPC32 the trampoline looks like: - * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha - * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l - * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 + * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha + * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l + * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 * 0x4e, 0x80, 0x04, 0x20 bctr */ @@ -262,9 +262,9 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %08x %08x ", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d600000) || - ((jmp[1] & 0xffff0000) != 0x396b0000) || - (jmp[2] != 0x7d6903a6) || + if (((jmp[0] & 0xffff0000) != 0x3d800000) || + ((jmp[1] & 0xffff0000) != 0x398c0000) || + (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { printk(KERN_ERR "Not a trampoline\n"); return -EINVAL; diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f832773..449a7e0 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -187,8 +187,8 @@ int apply_relocate(Elf32_Shdr *sechdrs, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) - && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x398c0000 + (val & 0xffff)) return 1; return 0; } @@ -215,10 +215,9 @@ static uint32_t do_plt_call(void *location, entry++; } - /* Stolen from Paul Mackerras as well... */ - entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ - entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ - entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ + entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ + entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 91e52df..5596397 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -794,16 +794,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_DSCR)) { - if (current->thread.dscr_inherit) { - p->thread.dscr_inherit = 1; - p->thread.dscr = current->thread.dscr; - } else if (0 != dscr_default) { - p->thread.dscr_inherit = 1; - p->thread.dscr = dscr_default; - } else { - p->thread.dscr_inherit = 0; - p->thread.dscr = 0; - } + p->thread.dscr_inherit = current->thread.dscr_inherit; + p->thread.dscr = current->thread.dscr; } #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2de304a..1becd7b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -168,13 +168,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to - * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds). + * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). * These are all stored as 0.64 fixed-point binary fractions. */ u64 __cputime_jiffies_factor; EXPORT_SYMBOL(__cputime_jiffies_factor); -u64 __cputime_msec_factor; -EXPORT_SYMBOL(__cputime_msec_factor); +u64 __cputime_usec_factor; +EXPORT_SYMBOL(__cputime_usec_factor); u64 __cputime_sec_factor; EXPORT_SYMBOL(__cputime_sec_factor); u64 __cputime_clockt_factor; @@ -192,8 +192,8 @@ static void calc_cputime_factors(void) div128_by_32(HZ, 0, tb_ticks_per_sec, &res); __cputime_jiffies_factor = res.result_low; - div128_by_32(1000, 0, tb_ticks_per_sec, &res); - __cputime_msec_factor = res.result_low; + div128_by_32(1000000, 0, tb_ticks_per_sec, &res); + __cputime_usec_factor = res.result_low; div128_by_32(1, 0, tb_ticks_per_sec, &res); __cputime_sec_factor = res.result_low; div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 42541bb..ace1784 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -975,7 +975,7 @@ static int cpu_cmd(void) /* print cpus waiting or in xmon */ printf("cpus stopped:"); count = 0; - for (cpu = 0; cpu < NR_CPUS; ++cpu) { + for_each_possible_cpu(cpu) { if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { if (count == 0) printf(" %x", cpu); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa8..f98af03 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -631,7 +631,6 @@ asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) return -EFAULT; if (a.offset & ~PAGE_MASK) return -EINVAL; - a.addr = (unsigned long) compat_ptr(a.addr); return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); } @@ -642,7 +641,6 @@ asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - a.addr = (unsigned long) compat_ptr(a.addr); return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d814f79..6903d44 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -567,6 +567,7 @@ static void pfault_interrupt(unsigned int ext_int_code, tsk->thread.pfault_wait = 0; list_del(&tsk->thread.list); wake_up_process(tsk); + put_task_struct(tsk); } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial @@ -576,14 +577,22 @@ static void pfault_interrupt(unsigned int ext_int_code, put_task_struct(tsk); } else { /* signal bit not set -> a real page is missing. */ - if (tsk->thread.pfault_wait == -1) { + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait * back to zero and exit. */ tsk->thread.pfault_wait = 0; } else { /* Initial interrupt arrived before completion - * interrupt. Let the task sleep. */ + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. */ + get_task_struct(tsk); tsk->thread.pfault_wait = 1; list_add(&tsk->thread.list, &pfault_list); set_task_state(tsk, TASK_UNINTERRUPTIBLE); @@ -608,6 +617,7 @@ static int __cpuinit pfault_cpu_notify(struct notifier_block *self, list_del(&thread->list); tsk = container_of(thread, struct task_struct, thread); wake_up_process(tsk); + put_task_struct(tsk); } spin_unlock_irq(&pfault_lock); break; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 2e79419..9e70257 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -590,6 +590,9 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y +config KEYS_COMPAT + def_bool y if COMPAT && KEYS + endmenu source "net/Kconfig" diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c index 7eef3f7..f5ddc0b 100644 --- a/arch/sparc/kernel/central.c +++ b/arch/sparc/kernel/central.c @@ -268,4 +268,4 @@ static int __init sunfire_init(void) return 0; } -subsys_initcall(sunfire_init); +fs_initcall(sunfire_init); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6860d40..904ed63 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -513,11 +513,13 @@ static u64 nop_for_index(int idx) static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) { - u64 val, mask = mask_for_index(idx); + u64 enc, val, mask = mask_for_index(idx); + + enc = perf_event_get_enc(cpuc->events[idx]); val = cpuc->pcr; val &= ~mask; - val |= hwc->config; + val |= event_encoding(enc, idx); cpuc->pcr = val; pcr_ops->write(cpuc->pcr); @@ -1380,8 +1382,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry, { unsigned long ufp; - perf_callchain_store(entry, regs->tpc); - ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { struct sparc_stackf *usf, sf; @@ -1402,8 +1402,6 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, { unsigned long ufp; - perf_callchain_store(entry, regs->tpc); - ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { struct sparc_stackf32 *usf, sf; @@ -1422,6 +1420,11 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { + perf_callchain_store(entry, regs->tpc); + + if (!current->mm) + return; + flushw_user(); if (test_thread_flag(TIF_32BIT)) perf_callchain_user_32(entry, regs); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 908b47a..10c9b36 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -519,12 +519,12 @@ SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality) { int ret; - if (current->personality == PER_LINUX32 && - personality == PER_LINUX) - personality = PER_LINUX32; + if (personality(current->personality) == PER_LINUX32 && + personality(personality) == PER_LINUX) + personality |= PER_LINUX32; ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + if (personality(ret) == PER_LINUX32) + ret &= ~PER_LINUX32; return ret; } diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index 1d7e274..7f5f65d 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -212,24 +212,20 @@ linux_sparc_syscall: 3: stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] ret_sys_call: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %g3 - ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc sra %o0, 0, %o0 mov %ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2 sllx %g2, 32, %g2 - /* Check if force_successful_syscall_return() - * was invoked. - */ - ldub [%g6 + TI_SYS_NOERROR], %l2 - brnz,a,pn %l2, 80f - stb %g0, [%g6 + TI_SYS_NOERROR] - cmp %o0, -ERESTART_RESTARTBLOCK bgeu,pn %xcc, 1f - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %l6 -80: + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc + +2: + stb %g0, [%g6 + TI_SYS_NOERROR] /* System call success, clear Carry condition code. */ andn %g3, %g2, %g3 +3: stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] bne,pn %icc, linux_syscall_trace2 add %l1, 0x4, %l2 ! npc = npc+4 @@ -238,20 +234,20 @@ ret_sys_call: stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] 1: + /* Check if force_successful_syscall_return() + * was invoked. + */ + ldub [%g6 + TI_SYS_NOERROR], %l2 + brnz,pn %l2, 2b + ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc /* System call failure, set Carry condition code. * Also, get abs(errno) to return to the process. */ - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %l6 sub %g0, %o0, %o0 - or %g3, %g2, %g3 stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] - stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] - bne,pn %icc, linux_syscall_trace2 - add %l1, 0x4, %l2 ! npc = npc+4 - stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC] + ba,pt %xcc, 3b + or %g3, %g2, %g3 - b,pt %xcc, rtrap - stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] linux_syscall_trace2: call syscall_trace_leave add %sp, PTREGS_OFF, %o0 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index f566518..248fb67 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -74,7 +74,7 @@ sys_call_table32: .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid -/*280*/ .word sys32_tee, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat +/*280*/ .word sys32_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64 /*290*/ .word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 8e073d8..6ff4d78 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2118,6 +2118,9 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP unsigned long vmemmap_table[VMEMMAP_SIZE]; +static long __meminitdata addr_start, addr_end; +static int __meminitdata node_start; + int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) { unsigned long vstart = (unsigned long) start; @@ -2148,15 +2151,30 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) *vmem_pp = pte_base | __pa(block); - printk(KERN_INFO "[%p-%p] page_structs=%lu " - "node=%d entry=%lu/%lu\n", start, block, nr, - node, - addr >> VMEMMAP_CHUNK_SHIFT, - VMEMMAP_SIZE); + /* check to see if we have contiguous blocks */ + if (addr_end != addr || node_start != node) { + if (addr_start) + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = addr; + node_start = node; + } + addr_end = addr + VMEMMAP_CHUNK; } } return 0; } + +void __meminit vmemmap_populate_print_last(void) +{ + if (addr_start) { + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = 0; + addr_end = 0; + node_start = 0; + } +} #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index b57a594..874162a 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -495,11 +495,11 @@ xcall_fetch_glob_regs: stx %o7, [%g1 + GR_SNAP_O7] stx %i7, [%g1 + GR_SNAP_I7] /* Don't try this at home kids... */ - rdpr %cwp, %g2 - sub %g2, 1, %g7 + rdpr %cwp, %g3 + sub %g3, 1, %g7 wrpr %g7, %cwp mov %i7, %g7 - wrpr %g2, %cwp + wrpr %g3, %cwp stx %g7, [%g1 + GR_SNAP_RPC] sethi %hi(trap_block), %g7 or %g7, %lo(trap_block), %g7 diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 0249b8b..532a2a4 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -11,6 +11,7 @@ config TILE select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select HAVE_SYSCALL_WRAPPERS if TILEGX select SYS_HYPERVISOR # FIXME: investigate whether we need/want these options. diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 17acce7..04c637c 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -26,6 +26,10 @@ $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH)) endif endif +# The tile compiler may emit .eh_frame information for backtracing. +# In kernel modules, this causes load failures due to unsupported relocations. +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables + ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) endif diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h index 16f1fa5..bd186c4 100644 --- a/arch/tile/include/asm/bitops.h +++ b/arch/tile/include/asm/bitops.h @@ -77,6 +77,11 @@ static inline int ffs(int x) return __builtin_ffs(x); } +static inline int fls64(__u64 w) +{ + return (sizeof(__u64) * 8) - __builtin_clzll(w); +} + /** * fls - find last set bit in word * @x: the word to search @@ -90,12 +95,7 @@ static inline int ffs(int x) */ static inline int fls(int x) { - return (sizeof(int) * 8) - __builtin_clz(x); -} - -static inline int fls64(__u64 w) -{ - return (sizeof(__u64) * 8) - __builtin_clzll(w); + return fls64((unsigned int) x); } static inline unsigned int __arch_hweight32(unsigned int w) diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index a7869ad..41459d8 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -406,19 +406,17 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Set up registers for signal handler. * Registers that we don't modify keep the value they had from * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). */ regs->pc = ptr_to_compat_reg(ka->sa.sa_handler); regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ regs->sp = ptr_to_compat_reg(frame); regs->lr = restorer; regs->regs[0] = (unsigned long) usig; - - if (ka->sa.sa_flags & SA_SIGINFO) { - /* Need extra arguments, so mark to restore caller-saves. */ - regs->regs[1] = ptr_to_compat_reg(&frame->info); - regs->regs[2] = ptr_to_compat_reg(&frame->uc); - regs->flags |= PT_FLAGS_CALLER_SAVES; - } + regs->regs[1] = ptr_to_compat_reg(&frame->info); + regs->regs[2] = ptr_to_compat_reg(&frame->uc); + regs->flags |= PT_FLAGS_CALLER_SAVES; /* * Notify any tracer that was single-stepping it. diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 41474fb..aa365c5 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -271,6 +271,12 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) } #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + return !((pte_val(pte_a) ^ pte_val(pte_b)) & ~_PAGE_NEWPAGE); +} + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -346,11 +352,11 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); #define update_mmu_cache(vma,address,ptep) do ; while (0) /* Encode and de-code a swap entry */ -#define __swp_type(x) (((x).val >> 4) & 0x3f) +#define __swp_type(x) (((x).val >> 5) & 0x1f) #define __swp_offset(x) ((x).val >> 11) #define __swp_entry(type, offset) \ - ((swp_entry_t) { ((type) << 4) | ((offset) << 11) }) + ((swp_entry_t) { ((type) << 5) | ((offset) << 11) }) #define __pte_to_swp_entry(pte) \ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 37357a5..a0e9bda 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1451,6 +1451,15 @@ config ARCH_USES_PG_UNCACHED def_bool y depends on X86_PAT +config ARCH_RANDOM + def_bool y + prompt "x86 architectural random number generator" if EXPERT + ---help--- + Enable the x86 architectural RDRAND instruction + (Intel Bull Mountain technology) to generate random numbers. + If supported, this is a high bandwidth, cryptographically + secure hardware random number generator. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index be6d9e3..3470624 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2460,10 +2460,12 @@ ENTRY(aesni_cbc_dec) pxor IN3, STATE4 movaps IN4, IV #else - pxor (INP), STATE2 - pxor 0x10(INP), STATE3 pxor IN1, STATE4 movaps IN2, IV + movups (INP), IN1 + pxor IN1, STATE2 + movups 0x10(INP), IN2 + pxor IN2, STATE3 #endif movups STATE1, (OUTP) movups STATE2, 0x10(OUTP) diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h new file mode 100644 index 0000000..0d9ec77 --- /dev/null +++ b/arch/x86/include/asm/archrandom.h @@ -0,0 +1,75 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu <fenghua.yu@intel.com>, + * H. Peter Anvin <hpa@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef ASM_X86_ARCHRANDOM_H +#define ASM_X86_ARCHRANDOM_H + +#include <asm/processor.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> +#include <asm/nops.h> + +#define RDRAND_RETRY_LOOPS 10 + +#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" +#ifdef CONFIG_X86_64 +# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" +#else +# define RDRAND_LONG RDRAND_INT +#endif + +#ifdef CONFIG_ARCH_RANDOM + +#define GET_RANDOM(name, type, rdrand, nop) \ +static inline int name(type *v) \ +{ \ + int ok; \ + alternative_io("movl $0, %0\n\t" \ + nop, \ + "\n1: " rdrand "\n\t" \ + "jc 2f\n\t" \ + "decl %0\n\t" \ + "jnz 1b\n\t" \ + "2:", \ + X86_FEATURE_RDRAND, \ + ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ + "0" (RDRAND_RETRY_LOOPS)); \ + return ok; \ +} + +#ifdef CONFIG_X86_64 + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); + +#else + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); + +#endif /* CONFIG_X86_64 */ + +#endif /* CONFIG_ARCH_RANDOM */ + +extern void x86_init_rdrand(struct cpuinfo_x86 *c); + +#endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 71cc380..c5d941f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ +#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index effff47..43876f1 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) ptep->pte_low = pte.pte_low; } +#define pmd_read_atomic pmd_read_atomic +/* + * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by gcc. Problem is, in certain places + * where pte_offset_map_lock is called, concurrent page faults are + * allowed, if the mmap_sem is hold for reading. An example is mincore + * vs page faults vs MADV_DONTNEED. On the page fault side + * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen + * because gcc will not read the 64bit of the pmd atomically. To fix + * this all places running pmd_offset_map_lock() while holding the + * mmap_sem in read mode, shall read the pmdp pointer using this + * function to know if the pmd is null nor not, and in turn to know if + * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * operations. + * + * Without THP if the mmap_sem is hold for reading, the + * pmd can only transition from null to not null while pmd_read_atomic runs. + * So there's no need of literally reading it atomically. + * + * With THP if the mmap_sem is hold for reading, the pmd can become + * THP or null or point to a pte (and in turn become "stable") at any + * time under pmd_read_atomic, so it's mandatory to read it atomically + * with cmpxchg8b. + */ +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + pmdval_t ret; + u32 *tmp = (u32 *)pmdp; + + ret = (pmdval_t) (*tmp); + if (ret) { + /* + * If the low part is null, we must not read the high part + * or we can end up with a partial pmd. + */ + smp_rmb(); + ret |= ((pmdval_t)*(tmp + 1)) << 32; + } + + return (pmd_t) { ret }; +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + return (pmd_t) { atomic64_read((atomic64_t *)pmdp) }; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18601c8..884507e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) static inline int pmd_large(pmd_t pte) { - return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == - (_PAGE_PSE | _PAGE_PRESENT); + return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte) static inline int pmd_present(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_PRESENT; + /* + * Checking for _PAGE_PSE is needed too because + * split_huge_page will temporarily clear the present bit (but + * the _PAGE_PSE flag will remain set at all times while the + * _PAGE_PRESENT bit is clear). + */ + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); } static inline int pmd_none(pmd_t pmd) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5d9c61d..e5f7248 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -99,7 +99,6 @@ struct cpuinfo_x86 { u16 apicid; u16 initial_apicid; u16 x86_clflush_size; -#ifdef CONFIG_SMP /* number of cores as seen by the OS: */ u16 booted_cores; /* Physical processor id: */ @@ -110,7 +109,6 @@ struct cpuinfo_x86 { u8 compute_unit_id; /* Index into per_cpu list: */ u16 cpu_index; -#endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index c2ff2a1..f0d89d9 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -93,10 +93,6 @@ do { \ "memory"); \ } while (0) -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT #else /* frame pointer must be last for get_wchan */ @@ -392,9 +388,6 @@ static inline void clflush(volatile void *__p) #define nop() asm volatile ("nop") -void disable_hlt(void); -void enable_hlt(void); - void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4558f0d..479d03c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -416,12 +416,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, return 0; } - if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { + if (intsrc->source_irq == 0) { if (acpi_skip_timer_override) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + printk(PREFIX "BIOS IRQ0 override ignored.\n"); return 0; } - if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { + + if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity + && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); } @@ -1327,17 +1329,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d) } /* - * Force ignoring BIOS IRQ0 pin2 override + * Force ignoring BIOS IRQ0 override */ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) { - /* - * The ati_ixp4x0_rev() early PCI quirk should have set - * the acpi_skip_timer_override flag already: - */ if (!acpi_skip_timer_override) { - WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); - pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", + pr_notice("%s detected: Ignoring BIOS IRQ0 override\n", d->ident); acpi_skip_timer_override = 1; } @@ -1431,7 +1428,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { * is enabled. This input is incorrectly designated the * ISA IRQ 0 via an interrupt source override even though * it is wired to the output of the master 8259A and INTIN0 - * is not connected at all. Force ignoring BIOS IRQ0 pin2 + * is not connected at all. Force ignoring BIOS IRQ0 * override in that cases. */ { @@ -1466,6 +1463,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "FUJITSU SIEMENS", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"), + }, + }, {} }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a81f2d5..4c734e6 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -161,7 +161,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = #endif #ifdef P6_NOP1 -static const unsigned char __initconst_or_module p6nops[] = +static const unsigned char p6nops[] = { P6_NOP1, P6_NOP2, @@ -220,7 +220,7 @@ void __init arch_init_ideal_nops(void) ideal_nops = intel_nops; #endif } - + break; default: #ifdef CONFIG_X86_64 ideal_nops = k8_nops; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index bae1efe..be16854 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -154,16 +154,14 @@ int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; unsigned int mask; - int cuid = 0; + int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return 0; pci_read_config_dword(link, 0x1d4, &mask); -#ifdef CONFIG_SMP cuid = cpu_data(cpu).compute_unit_id; -#endif return (mask >> (4 * cuid)) & 0xf; } @@ -172,7 +170,7 @@ int amd_set_subcaches(int cpu, int mask) static unsigned int reset, ban; struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); unsigned int reg; - int cuid = 0; + int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) return -EINVAL; @@ -190,9 +188,7 @@ int amd_set_subcaches(int cpu, int mask) pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } -#ifdef CONFIG_SMP cuid = cpu_data(cpu).compute_unit_id; -#endif mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6042981..0e3a82a 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -15,6 +15,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o mshyperv.o +obj-y += rdrand.o obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b13ed39..8115040 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -146,7 +146,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -190,7 +189,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) valid_k7: ; -#endif } static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22a073d..1579ab9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -15,6 +15,7 @@ #include <asm/stackprotector.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> +#include <asm/archrandom.h> #include <asm/hypervisor.h> #include <asm/processor.h> #include <asm/sections.h> @@ -675,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_early_init) this_cpu->c_early_init(c); -#ifdef CONFIG_SMP c->cpu_index = 0; -#endif filter_cpuid_features(c, false); setup_smep(c); @@ -760,10 +759,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->apicid = c->initial_apicid; # endif #endif - -#ifdef CONFIG_X86_HT c->phys_proc_id = c->initial_apicid; -#endif } setup_smep(c); @@ -857,6 +853,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #endif init_hypervisor(c); + x86_init_rdrand(c); /* * Clear/Set all flags overriden by options, need do it diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ed6086e..e0dc000 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -179,7 +179,6 @@ static void __cpuinit trap_init_f00f_bug(void) static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -196,7 +195,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) WARN_ONCE(1, "WARNING: SMP operation may be unreliable" "with B stepping processors.\n"); } -#endif } static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 1e8d66c..362190b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -101,15 +101,19 @@ static struct severity { }; /* - * If the EIPV bit is set, it means the saved IP is the - * instruction which caused the MCE. + * If mcgstatus indicated that ip/cs on the stack were + * no good, then "m->cs" will be zero and we will have + * to assume the worst case (IN_KERNEL) as we actually + * have no idea what we were executing when the machine + * check hit. + * If we do have a good "m->cs" (or a faked one in the + * case we were executing in VM86 mode) we can use it to + * distinguish an exception taken in user from from one + * taken in the kernel. */ static int error_context(struct mce *m) { - if (m->mcgstatus & MCG_STATUS_EIPV) - return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; - /* Unknown, assume kernel */ - return IN_KERNEL; + return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; } int mce_severity(struct mce *a, int tolerant, char **msg) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff1ae9b..1396edf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -122,9 +122,7 @@ void mce_setup(struct mce *m) m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; m->cpuid = cpuid_eax(1); -#ifdef CONFIG_SMP m->socketid = cpu_data(m->extcpu).phys_proc_id; -#endif m->apicid = cpu_data(m->extcpu).initial_apicid; rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); } @@ -453,6 +451,13 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { m->ip = regs->ip; m->cs = regs->cs; + /* + * When in VM86 mode make the cs look like ring 3 + * always. This is a lie, but it's better than passing + * the additional vm86 bit around everywhere. + */ + if (v8086_mode(regs)) + m->cs |= 3; } else { m->ip = 0; m->cs = 0; @@ -990,6 +995,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK); + mce_get_rip(&m, regs); severity = mce_severity(&m, tolerant, NULL); /* @@ -1028,7 +1034,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) mce_ring_add(m.addr >> PAGE_SHIFT); - mce_get_rip(&m, regs); mce_log(&m); if (severity > worst) { diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index bb0adad..b97aa72 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -52,6 +52,7 @@ struct threshold_block { unsigned int cpu; u32 address; u16 interrupt_enable; + bool interrupt_capable; u16 threshold_limit; struct kobject kobj; struct list_head miscj; @@ -64,11 +65,9 @@ struct threshold_bank { }; static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); -#ifdef CONFIG_SMP static unsigned char shared_bank[NR_BANKS] = { 0, 0, 0, 0, 1 }; -#endif static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ @@ -86,6 +85,21 @@ struct thresh_restart { u16 old_limit; }; +static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) +{ + /* + * bank 4 supports APIC LVT interrupts implicitly since forever. + */ + if (bank == 4) + return true; + + /* + * IntP: interrupt present; if this bit is set, the thresholding + * bank can generate APIC LVT interrupts + */ + return msr_high_bits & BIT(28); +} + static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; @@ -107,8 +121,10 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) return 1; }; -/* must be called with correct cpu affinity */ -/* Called via smp_call_function_single() */ +/* + * Called via smp_call_function_single(), must be called with correct + * cpu affinity. + */ static void threshold_restart_bank(void *_tr) { struct thresh_restart *tr = _tr; @@ -131,6 +147,12 @@ static void threshold_restart_bank(void *_tr) (new_count & THRESHOLD_MAX); } + /* clear IntType */ + hi &= ~MASK_INT_TYPE_HI; + + if (!tr->b->interrupt_capable) + goto done; + if (tr->set_lvt_off) { if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { /* set new lvt offset */ @@ -139,9 +161,10 @@ static void threshold_restart_bank(void *_tr) } } - tr->b->interrupt_enable ? - (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : - (hi &= ~MASK_INT_TYPE_HI); + if (tr->b->interrupt_enable) + hi |= INT_TYPE_APIC; + + done: hi |= MASK_COUNT_EN_HI; wrmsr(tr->b->address, lo, hi); @@ -202,18 +225,21 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (!block) per_cpu(bank_map, cpu) |= (1 << bank); -#ifdef CONFIG_SMP + if (shared_bank[bank] && c->cpu_core_id) break; -#endif - offset = setup_APIC_mce(offset, - (high & MASK_LVTOFF_HI) >> 20); memset(&b, 0, sizeof(b)); - b.cpu = cpu; - b.bank = bank; - b.block = block; - b.address = address; + b.cpu = cpu; + b.bank = bank; + b.block = block; + b.address = address; + b.interrupt_capable = lvt_interrupt_supported(bank, high); + + if (b.interrupt_capable) { + int new = (high & MASK_LVTOFF_HI) >> 20; + offset = setup_APIC_mce(offset, new); + } mce_threshold_block_init(&b, offset); mce_threshold_vector = amd_threshold_interrupt; @@ -313,6 +339,9 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) struct thresh_restart tr; unsigned long new; + if (!b->interrupt_capable) + return -EINVAL; + if (strict_strtoul(buf, 0, &new) < 0) return -EINVAL; @@ -471,6 +500,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, b->cpu = cpu; b->address = address; b->interrupt_enable = 0; + b->interrupt_capable = lvt_interrupt_supported(bank, high); b->threshold_limit = THRESHOLD_MAX; INIT_LIST_HEAD(&b->miscj); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index fe29c1d..4b50c96 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -437,6 +437,7 @@ static __initconst const struct x86_pmu amd_pmu = { * 0x023 DE PERF_CTL[2:0] * 0x02D LS PERF_CTL[3] * 0x02E LS PERF_CTL[3,0] + * 0x031 LS PERF_CTL[2:0] (**) * 0x043 CU PERF_CTL[2:0] * 0x045 CU PERF_CTL[2:0] * 0x046 CU PERF_CTL[2:0] @@ -450,10 +451,12 @@ static __initconst const struct x86_pmu amd_pmu = { * 0x0DD LS PERF_CTL[5:0] * 0x0DE LS PERF_CTL[5:0] * 0x0DF LS PERF_CTL[5:0] + * 0x1C0 EX PERF_CTL[5:3] * 0x1D6 EX PERF_CTL[5:0] * 0x1D8 EX PERF_CTL[5:0] * - * (*) depending on the umask all FPU counters may be used + * (*) depending on the umask all FPU counters may be used + * (**) only one unitmask enabled at a time */ static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); @@ -503,6 +506,12 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC3; case 0x02E: return &amd_f15_PMC30; + case 0x031: + if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) + return &amd_f15_PMC20; + return &emptyconstraint; + case 0x1C0: + return &amd_f15_PMC53; default: return &amd_f15_PMC50; } diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 62ac8cb..72c365a 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; - unsigned int cpu = 0; + unsigned int cpu; int i; -#ifdef CONFIG_SMP cpu = c->cpu_index; -#endif seq_printf(m, "processor\t: %u\n" "vendor_id\t: %s\n" "cpu family\t: %d\n" diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c new file mode 100644 index 0000000..feca286 --- /dev/null +++ b/arch/x86/kernel/cpu/rdrand.c @@ -0,0 +1,73 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu <fenghua.yu@intel.com>, + * H. Peter Anvin <hpa@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include <asm/processor.h> +#include <asm/archrandom.h> +#include <asm/sections.h> + +static int __init x86_rdrand_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_RDRAND); + return 1; +} +__setup("nordrand", x86_rdrand_setup); + +/* We can't use arch_get_random_long() here since alternatives haven't run */ +static inline int rdrand_long(unsigned long *v) +{ + int ok; + asm volatile("1: " RDRAND_LONG "\n\t" + "jc 2f\n\t" + "decl %0\n\t" + "jnz 1b\n\t" + "2:" + : "=r" (ok), "=a" (*v) + : "0" (RDRAND_RETRY_LOOPS)); + return ok; +} + +/* + * Force a reseed cycle; we are architecturally guaranteed a reseed + * after no more than 512 128-bit chunks of random data. This also + * acts as a test of the CPU capability. + */ +#define RESEED_LOOP ((512*128)/sizeof(unsigned long)) + +void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_RANDOM + unsigned long tmp; + int i, count, ok; + + if (!cpu_has(c, X86_FEATURE_RDRAND)) + return; /* Nothing to do */ + + for (count = i = 0; i < RESEED_LOOP; i++) { + ok = rdrand_long(&tmp); + if (ok) + count++; + } + + if (count != RESEED_LOOP) + clear_cpu_cap(c, X86_FEATURE_RDRAND); +#endif +} diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index c7f64e6..ea6106c 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { - { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, + { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index edb3d46..268b40d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1029,7 +1029,7 @@ ENTRY(xen_sysenter_target) ENTRY(xen_hypervisor_callback) CFI_STARTPROC - pushl_cfi $0 + pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL TRACE_IRQS_OFF @@ -1071,14 +1071,16 @@ ENTRY(xen_failsafe_callback) 2: mov 8(%esp),%es 3: mov 12(%esp),%fs 4: mov 16(%esp),%gs + /* EAX == 0 => Category 1 (Bad segment) + EAX != 0 => Category 2 (Bad IRET) */ testl %eax,%eax popl_cfi %eax lea 16(%esp),%esp CFI_ADJUST_CFA_OFFSET -16 jz 5f addl $16,%esp - jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) -5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment) + jmp iret_exc +5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp ret_from_exception CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8a445a0..dd4dba4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1308,7 +1308,7 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 + pushq_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp error_exit CFI_ENDPROC diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index f924280..c4e2465 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -297,20 +297,31 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t size) { unsigned long val; - int cpu = dev->id; - int ret = 0; - char *end; + int cpu; + ssize_t ret = 0, tmp_ret; - val = simple_strtoul(buf, &end, 0); - if (end == buf) + /* allow reload only from the BSP */ + if (boot_cpu_data.cpu_index != dev->id) return -EINVAL; - if (val == 1) { - get_online_cpus(); - if (cpu_online(cpu)) - ret = reload_for_cpu(cpu); - put_online_cpus(); + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val != 1) + return size; + + get_online_cpus(); + for_each_online_cpu(cpu) { + tmp_ret = reload_for_cpu(cpu); + if (tmp_ret != 0) + pr_warn("Error reloading microcode on CPU %d\n", cpu); + + /* save retval of the first encountered reload error */ + if (!ret) + ret = tmp_ret; } + put_online_cpus(); if (!ret) ret = size; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e1ba8cb..4272502 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -341,34 +341,10 @@ void (*pm_idle)(void); EXPORT_SYMBOL(pm_idle); #endif -#ifdef CONFIG_X86_32 -/* - * This halt magic was a workaround for ancient floppy DMA - * wreckage. It should be safe to remove. - */ -static int hlt_counter; -void disable_hlt(void) -{ - hlt_counter++; -} -EXPORT_SYMBOL(disable_hlt); - -void enable_hlt(void) -{ - hlt_counter--; -} -EXPORT_SYMBOL(enable_hlt); - -static inline int hlt_use_halt(void) -{ - return (!hlt_counter && boot_cpu_data.hlt_works_ok); -} -#else static inline int hlt_use_halt(void) { return 1; } -#endif /* * We use this if we don't have any better diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d4a705f..89d6877 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -452,6 +452,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), }, }, + { /* Handle problems with rebooting on the Precision M6600. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 990", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), + }, + }, { } }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index afaf384..af19a61 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -937,8 +937,21 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { - max_pfn_mapped = init_memory_mapping(1UL<<32, - max_pfn<<PAGE_SHIFT); + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (ei->addr + ei->size <= 1UL << 32) + continue; + + if (ei->type == E820_RESERVED) + continue; + + max_pfn_mapped = init_memory_mapping( + ei->addr < 1UL << 32 ? 1UL << 32 : ei->addr, + ei->addr + ei->size); + } + /* can we preseve max_low_pfn ?*/ max_low_pfn = max_pfn; } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 71f4727..5a98aa2 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -185,10 +185,22 @@ void __init setup_per_cpu_areas(void) #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { - const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; + size_t atom_size; + /* + * On 64bit, use PMD_SIZE for atom_size so that embedded + * percpu areas are aligned to PMD. This, in the future, + * can also allow using PMD mappings in vmalloc area. Use + * PAGE_SIZE on 32bit as vmalloc space is highly contended + * and large vmalloc area allocs can easily fail. + */ +#ifdef CONFIG_X86_64 + atom_size = PMD_SIZE; +#else + atom_size = PAGE_SIZE; +#endif rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f581a18..df7d12c 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) } /* - * search for a shareable pmd page for hugetlb. + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. */ -static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +static pte_t * +huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) { struct vm_area_struct *vma = find_vma(mm, addr); struct address_space *mapping = vma->vm_file->f_mapping; @@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) struct vm_area_struct *svma; unsigned long saddr; pte_t *spte = NULL; + pte_t *pte; if (!vma_shareable(vma, addr)) - return; + return (pte_t *)pmd_alloc(mm, pud, addr); mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { @@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) put_page(virt_to_page(spte)); spin_unlock(&mm->page_table_lock); out: + pte = (pte_t *)pmd_alloc(mm, pud, addr); mutex_unlock(&mapping->i_mmap_mutex); + return pte; } /* @@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } else { BUG_ON(sz != PMD_SIZE); if (pud_none(*pud)) - huge_pmd_share(mm, addr, pud); - pte = (pte_t *) pmd_alloc(mm, pud, addr); + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); } } BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 87488b9..c22c423 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -28,36 +28,50 @@ int direct_gbpages #endif ; -static void __init find_early_table_space(unsigned long end, int use_pse, - int use_gbpages) +struct map_range { + unsigned long start; + unsigned long end; + unsigned page_size_mask; +}; + +/* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) { - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; phys_addr_t base; - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; - if (use_gbpages) { - unsigned long extra; + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } - if (use_pse) { - unsigned long extra; - - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); #ifdef CONFIG_X86_32 - extra += PMD_SIZE; + extra += PMD_SIZE; #endif - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); #ifdef CONFIG_X86_32 @@ -74,8 +88,9 @@ static void __init find_early_table_space(unsigned long end, int use_pse, pgt_buf_end = pgt_buf_start; pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); - printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", - end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); + printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, + (pgt_buf_top << PAGE_SHIFT) - 1); } void __init native_pagetable_reserve(u64 start, u64 end) @@ -83,12 +98,6 @@ void __init native_pagetable_reserve(u64 start, u64 end) memblock_x86_reserve_range(start, end, "PGTABLE"); } -struct map_range { - unsigned long start; - unsigned long end; - unsigned page_size_mask; -}; - #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 #else /* CONFIG_X86_64 */ @@ -260,7 +269,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(end, use_pse, use_gbpages); + find_early_table_space(mr, nr_range); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 68894fd..a00c588 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, val |= counter_config->extra; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; - val |= (event & 0x0F00) << 24; + val |= (u64)(event & 0x0F00) << 24; return val; } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 6dd8955..0951b81 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -521,3 +521,20 @@ static void sb600_disable_hpet_bar(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar); + +/* + * Twinhead H12Y needs us to block out a region otherwise we map devices + * there and any access kills the box. + * + * See: https://bugzilla.kernel.org/show_bug.cgi?id=10231 + * + * Match off the LPC and svid/sdid (older kernels lose the bridge subvendor) + */ +static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev) +{ + if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) { + pr_info("Reserving memory on Twinhead H12Y\n"); + request_mem_region(0xFFB00000, 0x100000, "twinhead"); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0fb662a..9f808af 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -62,6 +62,7 @@ #include <asm/reboot.h> #include <asm/stackprotector.h> #include <asm/hypervisor.h> +#include <asm/pci_x86.h> #include "xen-ops.h" #include "mmu.h" @@ -197,6 +198,9 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +#define CPUID_THERM_POWER_LEAF 6 +#define APERFMPERF_PRESENT 0 + static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; @@ -217,6 +221,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, maskedx = cpuid_leaf1_edx_mask; break; + case CPUID_THERM_POWER_LEAF: + /* Disabling APERFMPERF for kernel usage */ + maskecx = ~(1 << APERFMPERF_PRESENT); + break; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; @@ -794,7 +803,16 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } - +#ifdef CONFIG_X86_64 +static inline unsigned long xen_read_cr8(void) +{ + return 0; +} +static inline void xen_write_cr8(unsigned long val) +{ + BUG_ON(val); +} +#endif static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -959,6 +977,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, +#ifdef CONFIG_X86_64 + .read_cr8 = xen_read_cr8, + .write_cr8 = xen_write_cr8, +#endif + .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -966,6 +989,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .read_tscp = native_read_tscp, + .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 @@ -1259,8 +1284,10 @@ asmlinkage void __init xen_start_kernel(void) /* Make sure ACS will be enabled */ pci_request_acs(); } - - +#ifdef CONFIG_PCI + /* PCI BIOS service won't work from a PV guest. */ + pci_probe &= ~PCI_PROBE_BIOS; +#endif xen_raw_console_write("about to get started...\n"); xen_setup_runstate_info(0); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5f76c0a..d957dce 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -320,8 +320,13 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) { if (val & _PAGE_PRESENT) { unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; + unsigned long pfn = mfn_to_pfn(mfn); + pteval_t flags = val & PTE_FLAGS_MASK; - val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; + if (unlikely(pfn == ~0)) + val = flags & ~_PAGE_PRESENT; + else + val = ((pteval_t)pfn << PAGE_SHIFT) | flags; } return val; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58efeb9..2f7847d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -683,6 +683,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); if (!PageHighMem(page)) { @@ -706,6 +707,24 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte) list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); spin_unlock_irqrestore(&m2p_override_lock, flags); + /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in + * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other + * pfn so that the following mfn_to_pfn(mfn) calls will return the + * pfn from the m2p_override (the backend pfn) instead. + * We need to do this because the pages shared by the frontend + * (xen-blkfront) can be already locked (lock_page, called by + * do_read_cache_page); when the userspace backend tries to use them + * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so + * do_blockdev_direct_IO is going to try to lock the same pages + * again resulting in a deadlock. + * As a side effect get_user_pages_fast might not be safe on the + * frontend pages while they are being shared with the backend, + * because mfn_to_pfn (that ends up being called by GUPF) will + * return the backend pfn rather than the frontend pfn. */ + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == mfn) + set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); + return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); @@ -717,6 +736,7 @@ int m2p_remove_override(struct page *page, bool clear_pte) unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; + int ret = 0; pfn = page_to_pfn(page); mfn = get_phys_to_machine(pfn); @@ -743,6 +763,22 @@ int m2p_remove_override(struct page *page, bool clear_pte) /* No tlb flush necessary because the caller already * left the pte unmapped. */ + /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present + * somewhere in this domain, even before being added to the + * m2p_override (see comment above in m2p_add_override). + * If there are no other entries in the m2p_override corresponding + * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for + * the original pfn (the one shared by the frontend): the backend + * cannot do any IO on this page anymore because it has been + * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of + * the original pfn causes mfn_to_pfn(mfn) to return the frontend + * pfn again. */ + mfn &= ~FOREIGN_FRAME_BIT; + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); + if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) && + m2p_find_override(mfn) == NULL) + set_phys_to_machine(pfn, mfn); + return 0; } EXPORT_SYMBOL_GPL(m2p_remove_override); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index f8dcda4..5669564 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -15,6 +15,7 @@ #include <asm/e820.h> #include <asm/setup.h> #include <asm/acpi.h> +#include <asm/numa.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -463,4 +464,7 @@ void __init xen_arch_setup(void) boot_option_idle_override = IDLE_HALT; fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } |