diff options
Diffstat (limited to 'arch')
32 files changed, 321 insertions, 78 deletions
diff --git a/arch/arm/mach-pxa/include/mach/smemc.h b/arch/arm/mach-pxa/include/mach/smemc.h index 654adc9..301bf0e 100644 --- a/arch/arm/mach-pxa/include/mach/smemc.h +++ b/arch/arm/mach-pxa/include/mach/smemc.h @@ -37,6 +37,7 @@ #define CSADRCFG1 (SMEMC_VIRT + 0x84) /* Address Configuration Register for CS1 */ #define CSADRCFG2 (SMEMC_VIRT + 0x88) /* Address Configuration Register for CS2 */ #define CSADRCFG3 (SMEMC_VIRT + 0x8C) /* Address Configuration Register for CS3 */ +#define CSMSADRCFG (SMEMC_VIRT + 0xA0) /* Chip Select Configuration Register */ /* * More handy macros for PCMCIA diff --git a/arch/arm/mach-pxa/smemc.c b/arch/arm/mach-pxa/smemc.c index 7992305..f38aa89 100644 --- a/arch/arm/mach-pxa/smemc.c +++ b/arch/arm/mach-pxa/smemc.c @@ -40,6 +40,8 @@ static void pxa3xx_smemc_resume(void) __raw_writel(csadrcfg[1], CSADRCFG1); __raw_writel(csadrcfg[2], CSADRCFG2); __raw_writel(csadrcfg[3], CSADRCFG3); + /* CSMSADRCFG wakes up in its default state (0), so we need to set it */ + __raw_writel(0x2, CSMSADRCFG); } static struct syscore_ops smemc_syscore_ops = { @@ -49,8 +51,19 @@ static struct syscore_ops smemc_syscore_ops = { static int __init smemc_init(void) { - if (cpu_is_pxa3xx()) + if (cpu_is_pxa3xx()) { + /* + * The only documentation we have on the + * Chip Select Configuration Register (CSMSADRCFG) is that + * it must be programmed to 0x2. + * Moreover, in the bit definitions, the second bit + * (CSMSADRCFG[1]) is called "SETALWAYS". + * Other bits are reserved in this register. + */ + __raw_writel(0x2, CSMSADRCFG); + register_syscore_ops(&smemc_syscore_ops); + } return 0; } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1a765c8..f00076c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -479,25 +479,27 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, void (*op)(const void *, size_t, int)) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + offset / PAGE_SIZE; + offset %= PAGE_SIZE; + /* * A single sg entry may refer to multiple physically contiguous * pages. But we still need to process highmem pages individually. * If highmem is not configured then the bulk of this loop gets * optimized out. */ - size_t left = size; do { size_t len = left; void *vaddr; + page = pfn_to_page(pfn); + if (PageHighMem(page)) { - if (len + offset > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset / PAGE_SIZE; - offset %= PAGE_SIZE; - } + if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - } vaddr = kmap_high_get(page); if (vaddr) { vaddr += offset; @@ -514,7 +516,7 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, op(vaddr, len, dir); } offset = 0; - page++; + pfn++; left -= len; } while (left); } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 22dadeb..9d35a3e 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -12,11 +12,10 @@ #include <linux/bitops.h> #include <linux/spinlock.h> +#include <linux/mm_types.h> #include <asm/processor.h> #include <asm/cache.h> -struct vm_area_struct; - /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel * memory. For the return value to be meaningful, ADDR must be >= @@ -40,7 +39,14 @@ struct vm_area_struct; do{ \ *(pteptr) = (pteval); \ } while(0) -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + +extern void purge_tlb_entries(struct mm_struct *, unsigned long); + +#define set_pte_at(mm, addr, ptep, pteval) \ + do { \ + set_pte(ptep, pteval); \ + purge_tlb_entries(mm, addr); \ + } while (0) #endif /* !__ASSEMBLY__ */ @@ -464,6 +470,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, old = pte_val(*ptep); new = pte_val(pte_wrprotect(__pte (old))); } while (cmpxchg((unsigned long *) ptep, old, new) != old); + purge_tlb_entries(mm, addr); #else pte_t old_pte = *ptep; set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 83335f3..5241698 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -421,6 +421,24 @@ void kunmap_parisc(void *addr) EXPORT_SYMBOL(kunmap_parisc); #endif +void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +{ + unsigned long flags; + + /* Note: purge_tlb_entries can be called at startup with + no context. */ + + /* Disable preemption while we play with %sr1. */ + preempt_disable(); + mtsp(mm->context, 1); + purge_tlb_start(flags); + pdtlb(addr); + pitlb(addr); + purge_tlb_end(flags); + preempt_enable(); +} +EXPORT_SYMBOL(purge_tlb_entries); + void __flush_tlb_range(unsigned long sid, unsigned long start, unsigned long end) { diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ba50409..e8befef 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -425,7 +425,7 @@ _STATIC(__after_prom_start) tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */ #endif -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_RELOCATABLE /* * Check if the kernel has to be running as relocatable kernel based on the * variable __run_at_load, if it is set the kernel is treated as relocatable diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 583af70..cac9d2c 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -163,6 +163,8 @@ static int kexec_all_irq_disabled = 0; static void kexec_smp_down(void *arg) { local_irq_disable(); + hard_irq_disable(); + mb(); /* make sure our irqs are disabled before we say they are */ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; while(kexec_all_irq_disabled == 0) @@ -245,6 +247,8 @@ static void kexec_prepare_cpus(void) wake_offline_cpus(); smp_call_function(kexec_smp_down, NULL, /* wait */0); local_irq_disable(); + hard_irq_disable(); + mb(); /* make sure IRQs are disabled before we say they are */ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; @@ -282,6 +286,7 @@ static void kexec_prepare_cpus(void) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 0); local_irq_disable(); + hard_irq_disable(); } #endif /* SMP */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1becd7b..818d809 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -859,13 +859,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, void update_vsyscall_tz(void) { - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; vdso_data->tz_dsttime = sys_tz.tz_dsttime; - smp_mb(); - ++vdso_data->tb_update_count; } static void __init clocksource_init(void) diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 549bb2c..ded8a1a 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -79,6 +79,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, run->dcr.dcrn = dcrn; run->dcr.data = 0; run->dcr.is_write = 0; + vcpu->arch.dcr_is_write = 0; vcpu->arch.io_gpr = rt; vcpu->arch.dcr_needed = 1; kvmppc_account_exit(vcpu, DCR_EXITS); @@ -100,6 +101,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, run->dcr.dcrn = dcrn; run->dcr.data = kvmppc_get_gpr(vcpu, rs); run->dcr.is_write = 1; + vcpu->arch.dcr_is_write = 1; vcpu->arch.dcr_needed = 1; kvmppc_account_exit(vcpu, DCR_EXITS); emulated = EMULATE_DO_DCR; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 1b5dc1a..daf793b 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -85,9 +85,11 @@ void __init wii_memory_fixups(void) wii_hole_start = p[0].base + p[0].size; wii_hole_size = p[1].base - wii_hole_start; - pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size); + pr_info("MEM1: <%08llx %08llx>\n", + (unsigned long long) p[0].base, (unsigned long long) p[0].size); pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size); - pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size); + pr_info("MEM2: <%08llx %08llx>\n", + (unsigned long long) p[1].base, (unsigned long long) p[1].size); p[0].size += wii_hole_size + p[1].size; diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a4..a4b8f60 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -126,4 +126,32 @@ static inline unsigned long long get_clock_monotonic(void) return get_clock_xt() - sched_clock_base_cc; } +/** + * tod_to_ns - convert a TOD format value to nanoseconds + * @todval: to be converted TOD format value + * Returns: number of nanoseconds that correspond to the TOD format value + * + * Converting a 64 Bit TOD format value to nanoseconds means that the value + * must be divided by 4.096. In order to achieve that we multiply with 125 + * and divide by 512: + * + * ns = (todval * 125) >> 9; + * + * In order to avoid an overflow with the multiplication we can rewrite this. + * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * we end up with + * + * ns = ((2^32 * th + tl) * 125 ) >> 9; + * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * + */ +static inline unsigned long long tod_to_ns(unsigned long long todval) +{ + unsigned long long ns; + + ns = ((todval >> 32) << 23) * 125; + ns += ((todval & 0xffffffff) * 125) >> 9; + return ns; +} + #endif diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index dff9330..943ea0e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -63,7 +63,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); */ unsigned long long notrace __kprobes sched_clock(void) { - return (get_clock_monotonic() * 125) >> 9; + return tod_to_ns(get_clock_monotonic()); } /* diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 35c21bf..a3db4c8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -358,7 +358,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return 0; } - sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9; + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2ada634..f9804b7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -584,6 +584,14 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) } else prefix = 0; + /* + * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy + * copying in vcpu load/put. Lets update our copies before we save + * it into the save area + */ + save_fp_regs(&vcpu->arch.guest_fpregs); + save_access_regs(vcpu->arch.guest_acrs); + if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), vcpu->arch.guest_fpregs.fprs, 128, prefix)) return -EFAULT; diff --git a/arch/sh/include/asm/elf.h b/arch/sh/include/asm/elf.h index f38112b..978b7fd 100644 --- a/arch/sh/include/asm/elf.h +++ b/arch/sh/include/asm/elf.h @@ -202,9 +202,9 @@ extern void __kernel_vsyscall; if (vdso_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ else \ - NEW_AUX_ENT(AT_IGNORE, 0); + NEW_AUX_ENT(AT_IGNORE, 0) #else -#define VSYSCALL_AUX_ENT +#define VSYSCALL_AUX_ENT NEW_AUX_ENT(AT_IGNORE, 0) #endif /* CONFIG_VSYSCALL */ #ifdef CONFIG_SH_FPU diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index c1870dd..26af1e3 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -208,7 +208,7 @@ sysexit_from_sys_call: testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) jnz ia32_ret_from_sys_call TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%esi /* second arg, syscall return value */ cmpl $0,%eax /* is it < 0? */ setl %al /* 1 if so, 0 if not */ @@ -218,7 +218,7 @@ sysexit_from_sys_call: GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl %edi,TI_flags(%r10) jz \exit diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index ffa037f..a6a6414 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -14,12 +14,6 @@ extern struct pglist_data *node_data[]; #include <asm/numaq.h> -extern void resume_map_numa_kva(pgd_t *pgd); - -#else /* !CONFIG_NUMA */ - -static inline void resume_map_numa_kva(pgd_t *pgd) {} - #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 884507e..6be9909 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pud_pfn(pud_t pud) +{ + return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0310da6..1d44903 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_TRAPS_H #define _ASM_X86_TRAPS_H +#include <linux/kprobes.h> #include <asm/debugreg.h> #include <asm/siginfo.h> /* TRAP_TRACE, ... */ @@ -87,4 +88,29 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +/* Interrupts/Exceptions */ +enum { + X86_TRAP_DE = 0, /* 0, Divide-by-zero */ + X86_TRAP_DB, /* 1, Debug */ + X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ + X86_TRAP_BP, /* 3, Breakpoint */ + X86_TRAP_OF, /* 4, Overflow */ + X86_TRAP_BR, /* 5, Bound Range Exceeded */ + X86_TRAP_UD, /* 6, Invalid Opcode */ + X86_TRAP_NM, /* 7, Device Not Available */ + X86_TRAP_DF, /* 8, Double Fault */ + X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ + X86_TRAP_TS, /* 10, Invalid TSS */ + X86_TRAP_NP, /* 11, Segment Not Present */ + X86_TRAP_SS, /* 12, Stack Segment Fault */ + X86_TRAP_GP, /* 13, General Protection Fault */ + X86_TRAP_PF, /* 14, Page Fault */ + X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ + X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ + X86_TRAP_AC, /* 17, Alignment Check */ + X86_TRAP_MC, /* 18, Machine Check */ + X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ + X86_TRAP_IRET = 32, /* 32, IRET Exception */ +}; + #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 33df6e8..d86aa3f 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1363,6 +1363,7 @@ static struct syscore_ops amd_iommu_syscore_ops = { */ static int __init amd_iommu_init(void) { + struct amd_iommu *iommu; int i, ret = 0; /* @@ -1411,9 +1412,6 @@ static int __init amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto free; - /* init the device table */ - init_device_table(); - /* * let all alias entries point to itself */ @@ -1463,6 +1461,12 @@ static int __init amd_iommu_init(void) if (ret) goto free_disable; + /* init the device table */ + init_device_table(); + + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + amd_iommu_init_api(); amd_iommu_init_notifier(); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index f5373df..db4f704 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -20,12 +20,19 @@ static int set_x2apic_phys_mode(char *arg) } early_param("x2apic_phys", set_x2apic_phys_mode); +static bool x2apic_fadt_phys(void) +{ + if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { + printk(KERN_DEBUG "System requires x2apic physical mode\n"); + return true; + } + return false; +} + static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (x2apic_phys) - return x2apic_enabled(); - else - return 0; + return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } static void @@ -108,7 +115,7 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && x2apic_phys) + if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) return 1; return apic == &apic_x2apic_phys; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3f4b6da..a93741d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -568,6 +568,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* + * The way access filter has a performance penalty on some workloads. + * Disable it on the affected CPUs. + */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { + u64 val; + + if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { + val |= 0x1E; + checking_wrmsrl(0xc0011021, val); + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 268b40d..2df1252 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1078,7 +1078,6 @@ ENTRY(xen_failsafe_callback) lea 16(%esp),%esp CFI_ADJUST_CFA_OFFSET -16 jz 5f - addl $16,%esp jmp iret_exc 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index af0699b..f6c4674 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -5,8 +5,6 @@ #include <asm/setup.h> #include <asm/bios_ebda.h> -#define BIOS_LOWMEM_KILOBYTES 0x413 - /* * The BIOS places the EBDA/XBDA at the top of conventional * memory, and usually decreases the reported amount of @@ -16,17 +14,30 @@ * chipset: reserve a page before VGA to prevent PCI prefetch * into it (errata #56). Usually the page is reserved anyways, * unless you have no PS/2 mouse plugged in. + * + * This functions is deliberately very conservative. Losing + * memory in the bottom megabyte is rarely a problem, as long + * as we have enough memory to install the trampoline. Using + * memory that is in use by the BIOS or by some DMA device + * the BIOS didn't shut down *is* a big problem. */ + +#define BIOS_LOWMEM_KILOBYTES 0x413 +#define LOWMEM_CAP 0x9f000U /* Absolute maximum */ +#define INSANE_CUTOFF 0x20000U /* Less than this = insane */ + void __init reserve_ebda_region(void) { unsigned int lowmem, ebda_addr; - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ + /* + * To determine the position of the EBDA and the + * end of conventional memory, we need to look at + * the BIOS data area. In a paravirtual environment + * that area is absent. We'll just have to assume + * that the paravirt case can handle memory setup + * correctly, without our help. + */ if (paravirt_enabled()) return; @@ -37,19 +48,23 @@ void __init reserve_ebda_region(void) /* start of EBDA area */ ebda_addr = get_bios_ebda(); - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; + /* + * Note: some old Dells seem to need 4k EBDA without + * reporting so, so just consider the memory above 0x9f000 + * to be off limits (bugzilla 2990). + */ + + /* If the EBDA address is below 128K, assume it is bogus */ + if (ebda_addr < INSANE_CUTOFF) + ebda_addr = LOWMEM_CAP; - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; + /* If lowmem is less than 128K, assume it is bogus */ + if (lowmem < INSANE_CUTOFF) + lowmem = LOWMEM_CAP; - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; + /* Use the lower of the lowmem and EBDA markers as the cutoff */ + lowmem = min(lowmem, ebda_addr); + lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ /* reserve all memory between lowmem and the 1MB mark */ memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 12fcbe2..f7d1a64 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -175,6 +175,9 @@ static int msr_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index af19a61..6c4e9ff 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -631,6 +631,83 @@ static __init void reserve_ibft_region(void) static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; +static bool __init snb_gfx_workaround_needed(void) +{ +#ifdef CONFIG_PCI + int i; + u16 vendor, devid; + static const u16 snb_ids[] = { + 0x0102, + 0x0112, + 0x0122, + 0x0106, + 0x0116, + 0x0126, + 0x010a, + }; + + /* Assume no if something weird is going on with PCI */ + if (!early_pci_allowed()) + return false; + + vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); + if (vendor != 0x8086) + return false; + + devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); + for (i = 0; i < ARRAY_SIZE(snb_ids); i++) + if (devid == snb_ids[i]) + return true; +#endif + + return false; +} + +/* + * Sandy Bridge graphics has trouble with certain ranges, exclude + * them from allocation. + */ +static void __init trim_snb_memory(void) +{ + static const unsigned long bad_pages[] = { + 0x20050000, + 0x20110000, + 0x20130000, + 0x20138000, + 0x40004000, + }; + int i; + + if (!snb_gfx_workaround_needed()) + return; + + printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n"); + + /* + * Reserve all memory below the 1 MB mark that has not + * already been reserved. + */ + memblock_reserve(0, 1<<20); + + for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { + if (memblock_reserve(bad_pages[i], PAGE_SIZE)) + printk(KERN_WARNING "failed to reserve 0x%08lx\n", + bad_pages[i]); + } +} + +/* + * Here we put platform-specific memory range workarounds, i.e. + * memory known to be corrupt or otherwise in need to be reserved on + * specific platforms. + * + * If this gets used more widely it could use a real dispatch mechanism. + */ +static void __init trim_platform_memory_ranges(void) +{ + trim_snb_memory(); +} + static void __init trim_bios_range(void) { /* @@ -651,6 +728,7 @@ static void __init trim_bios_range(void) * take them out. */ e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } @@ -929,6 +1007,8 @@ void __init setup_arch(char **cmdline_p) setup_trampolines(); + trim_platform_memory_ranges(); + init_gbpages(); /* max_pfn_mapped is updated here */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2dbf6bf..3b2ad91 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -720,12 +720,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_errata100(regs, address)) return; - if (unlikely(show_unhandled_signals)) + /* Kernel addresses are always protection faults: */ + if (address >= TASK_SIZE) + error_code |= PF_PROT; + + if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); - /* Kernel addresses are always protection faults: */ tsk->thread.cr2 = address; - tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bbaaa00..44b93da 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -831,6 +831,9 @@ int kern_addr_valid(unsigned long addr) if (pud_none(*pud)) return 0; + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return 0; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac3aa54..0fba86d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -38,7 +38,7 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> -static pgd_t save_pgd __initdata; +static pgd_t *save_pgd __initdata; static unsigned long efi_flags __initdata; static void __init early_code_mapping_set_exec(int executable) @@ -61,12 +61,20 @@ static void __init early_code_mapping_set_exec(int executable) void __init efi_call_phys_prelog(void) { unsigned long vaddress; + int pgd; + int n_pgds; early_code_mapping_set_exec(1); local_irq_save(efi_flags); - vaddress = (unsigned long)__va(0x0UL); - save_pgd = *pgd_offset_k(0x0UL); - set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); + + for (pgd = 0; pgd < n_pgds; pgd++) { + save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); + vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + } __flush_tlb_all(); } @@ -75,7 +83,11 @@ void __init efi_call_phys_epilog(void) /* * After the lock is released, the original page table is restored. */ - set_pgd(pgd_offset_k(0x0UL), save_pgd); + int pgd; + int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + for (pgd = 0; pgd < n_pgds; pgd++) + set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); + kfree(save_pgd); __flush_tlb_all(); local_irq_restore(efi_flags); early_code_mapping_set_exec(0); diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 3769079..a09ecb9 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -130,8 +130,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) } } - resume_map_numa_kva(pgd_base); - return 0; } diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cc9b1e1..d99537f 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -313,7 +313,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) if (per_cpu(lock_spinners, cpu) == xl) { ADD_STATS(released_slow_kicked, 1); xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); - break; } } } diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index b040b0e..7328f71 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -88,11 +88,11 @@ ENTRY(xen_iret) */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov xen_vcpu(%eax), %eax + movl %ss:TI_cpu(%eax), %eax + movl %ss:__per_cpu_offset(,%eax,4), %eax + mov %ss:xen_vcpu(%eax), %eax #else - movl xen_vcpu, %eax + movl %ss:xen_vcpu, %eax #endif /* check IF state we're restoring */ @@ -105,11 +105,11 @@ ENTRY(xen_iret) * resuming the code, so we don't have to be worried about * being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) + setz %ss:XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) + cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) /* * If there's something pending, mask events again so we can @@ -117,7 +117,7 @@ xen_iret_start_crit: * touch XEN_vcpu_info_mask. */ jne 1f - movb $1, XEN_vcpu_info_mask(%eax) + movb $1, %ss:XEN_vcpu_info_mask(%eax) 1: popl %eax |