diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/mmzone_32.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/traps.h | 26 | ||||
-rw-r--r-- | arch/x86/kernel/amd_iommu_init.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 1 | ||||
-rw-r--r-- | arch/x86/kernel/head.c | 53 | ||||
-rw-r--r-- | arch/x86/kernel/msr.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 80 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 9 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 3 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 22 | ||||
-rw-r--r-- | arch/x86/power/hibernate_32.c | 2 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_32.S | 14 |
17 files changed, 216 insertions, 54 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index c1870dd..26af1e3 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -208,7 +208,7 @@ sysexit_from_sys_call: testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) jnz ia32_ret_from_sys_call TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%esi /* second arg, syscall return value */ cmpl $0,%eax /* is it < 0? */ setl %al /* 1 if so, 0 if not */ @@ -218,7 +218,7 @@ sysexit_from_sys_call: GET_THREAD_INFO(%r10) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl %edi,TI_flags(%r10) jz \exit diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index ffa037f..a6a6414 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -14,12 +14,6 @@ extern struct pglist_data *node_data[]; #include <asm/numaq.h> -extern void resume_map_numa_kva(pgd_t *pgd); - -#else /* !CONFIG_NUMA */ - -static inline void resume_map_numa_kva(pgd_t *pgd) {} - #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 884507e..6be9909 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pud_pfn(pud_t pud) +{ + return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0310da6..1d44903 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_TRAPS_H #define _ASM_X86_TRAPS_H +#include <linux/kprobes.h> #include <asm/debugreg.h> #include <asm/siginfo.h> /* TRAP_TRACE, ... */ @@ -87,4 +88,29 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +/* Interrupts/Exceptions */ +enum { + X86_TRAP_DE = 0, /* 0, Divide-by-zero */ + X86_TRAP_DB, /* 1, Debug */ + X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ + X86_TRAP_BP, /* 3, Breakpoint */ + X86_TRAP_OF, /* 4, Overflow */ + X86_TRAP_BR, /* 5, Bound Range Exceeded */ + X86_TRAP_UD, /* 6, Invalid Opcode */ + X86_TRAP_NM, /* 7, Device Not Available */ + X86_TRAP_DF, /* 8, Double Fault */ + X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ + X86_TRAP_TS, /* 10, Invalid TSS */ + X86_TRAP_NP, /* 11, Segment Not Present */ + X86_TRAP_SS, /* 12, Stack Segment Fault */ + X86_TRAP_GP, /* 13, General Protection Fault */ + X86_TRAP_PF, /* 14, Page Fault */ + X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ + X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ + X86_TRAP_AC, /* 17, Alignment Check */ + X86_TRAP_MC, /* 18, Machine Check */ + X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ + X86_TRAP_IRET = 32, /* 32, IRET Exception */ +}; + #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 33df6e8..d86aa3f 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1363,6 +1363,7 @@ static struct syscore_ops amd_iommu_syscore_ops = { */ static int __init amd_iommu_init(void) { + struct amd_iommu *iommu; int i, ret = 0; /* @@ -1411,9 +1412,6 @@ static int __init amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto free; - /* init the device table */ - init_device_table(); - /* * let all alias entries point to itself */ @@ -1463,6 +1461,12 @@ static int __init amd_iommu_init(void) if (ret) goto free_disable; + /* init the device table */ + init_device_table(); + + for_each_iommu(iommu) + iommu_flush_all_caches(iommu); + amd_iommu_init_api(); amd_iommu_init_notifier(); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index f5373df..db4f704 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -20,12 +20,19 @@ static int set_x2apic_phys_mode(char *arg) } early_param("x2apic_phys", set_x2apic_phys_mode); +static bool x2apic_fadt_phys(void) +{ + if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { + printk(KERN_DEBUG "System requires x2apic physical mode\n"); + return true; + } + return false; +} + static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (x2apic_phys) - return x2apic_enabled(); - else - return 0; + return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } static void @@ -108,7 +115,7 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && x2apic_phys) + if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) return 1; return apic == &apic_x2apic_phys; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3f4b6da..a93741d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -568,6 +568,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* + * The way access filter has a performance penalty on some workloads. + * Disable it on the affected CPUs. + */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { + u64 val; + + if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { + val |= 0x1E; + checking_wrmsrl(0xc0011021, val); + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 268b40d..2df1252 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1078,7 +1078,6 @@ ENTRY(xen_failsafe_callback) lea 16(%esp),%esp CFI_ADJUST_CFA_OFFSET -16 jz 5f - addl $16,%esp jmp iret_exc 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index af0699b..f6c4674 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -5,8 +5,6 @@ #include <asm/setup.h> #include <asm/bios_ebda.h> -#define BIOS_LOWMEM_KILOBYTES 0x413 - /* * The BIOS places the EBDA/XBDA at the top of conventional * memory, and usually decreases the reported amount of @@ -16,17 +14,30 @@ * chipset: reserve a page before VGA to prevent PCI prefetch * into it (errata #56). Usually the page is reserved anyways, * unless you have no PS/2 mouse plugged in. + * + * This functions is deliberately very conservative. Losing + * memory in the bottom megabyte is rarely a problem, as long + * as we have enough memory to install the trampoline. Using + * memory that is in use by the BIOS or by some DMA device + * the BIOS didn't shut down *is* a big problem. */ + +#define BIOS_LOWMEM_KILOBYTES 0x413 +#define LOWMEM_CAP 0x9f000U /* Absolute maximum */ +#define INSANE_CUTOFF 0x20000U /* Less than this = insane */ + void __init reserve_ebda_region(void) { unsigned int lowmem, ebda_addr; - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ + /* + * To determine the position of the EBDA and the + * end of conventional memory, we need to look at + * the BIOS data area. In a paravirtual environment + * that area is absent. We'll just have to assume + * that the paravirt case can handle memory setup + * correctly, without our help. + */ if (paravirt_enabled()) return; @@ -37,19 +48,23 @@ void __init reserve_ebda_region(void) /* start of EBDA area */ ebda_addr = get_bios_ebda(); - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; + /* + * Note: some old Dells seem to need 4k EBDA without + * reporting so, so just consider the memory above 0x9f000 + * to be off limits (bugzilla 2990). + */ + + /* If the EBDA address is below 128K, assume it is bogus */ + if (ebda_addr < INSANE_CUTOFF) + ebda_addr = LOWMEM_CAP; - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; + /* If lowmem is less than 128K, assume it is bogus */ + if (lowmem < INSANE_CUTOFF) + lowmem = LOWMEM_CAP; - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; + /* Use the lower of the lowmem and EBDA markers as the cutoff */ + lowmem = min(lowmem, ebda_addr); + lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ /* reserve all memory between lowmem and the 1MB mark */ memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 12fcbe2..f7d1a64 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -175,6 +175,9 @@ static int msr_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index af19a61..6c4e9ff 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -631,6 +631,83 @@ static __init void reserve_ibft_region(void) static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; +static bool __init snb_gfx_workaround_needed(void) +{ +#ifdef CONFIG_PCI + int i; + u16 vendor, devid; + static const u16 snb_ids[] = { + 0x0102, + 0x0112, + 0x0122, + 0x0106, + 0x0116, + 0x0126, + 0x010a, + }; + + /* Assume no if something weird is going on with PCI */ + if (!early_pci_allowed()) + return false; + + vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID); + if (vendor != 0x8086) + return false; + + devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID); + for (i = 0; i < ARRAY_SIZE(snb_ids); i++) + if (devid == snb_ids[i]) + return true; +#endif + + return false; +} + +/* + * Sandy Bridge graphics has trouble with certain ranges, exclude + * them from allocation. + */ +static void __init trim_snb_memory(void) +{ + static const unsigned long bad_pages[] = { + 0x20050000, + 0x20110000, + 0x20130000, + 0x20138000, + 0x40004000, + }; + int i; + + if (!snb_gfx_workaround_needed()) + return; + + printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n"); + + /* + * Reserve all memory below the 1 MB mark that has not + * already been reserved. + */ + memblock_reserve(0, 1<<20); + + for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { + if (memblock_reserve(bad_pages[i], PAGE_SIZE)) + printk(KERN_WARNING "failed to reserve 0x%08lx\n", + bad_pages[i]); + } +} + +/* + * Here we put platform-specific memory range workarounds, i.e. + * memory known to be corrupt or otherwise in need to be reserved on + * specific platforms. + * + * If this gets used more widely it could use a real dispatch mechanism. + */ +static void __init trim_platform_memory_ranges(void) +{ + trim_snb_memory(); +} + static void __init trim_bios_range(void) { /* @@ -651,6 +728,7 @@ static void __init trim_bios_range(void) * take them out. */ e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } @@ -929,6 +1007,8 @@ void __init setup_arch(char **cmdline_p) setup_trampolines(); + trim_platform_memory_ranges(); + init_gbpages(); /* max_pfn_mapped is updated here */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2dbf6bf..3b2ad91 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -720,12 +720,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_errata100(regs, address)) return; - if (unlikely(show_unhandled_signals)) + /* Kernel addresses are always protection faults: */ + if (address >= TASK_SIZE) + error_code |= PF_PROT; + + if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); - /* Kernel addresses are always protection faults: */ tsk->thread.cr2 = address; - tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bbaaa00..44b93da 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -831,6 +831,9 @@ int kern_addr_valid(unsigned long addr) if (pud_none(*pud)) return 0; + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return 0; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac3aa54..0fba86d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -38,7 +38,7 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> -static pgd_t save_pgd __initdata; +static pgd_t *save_pgd __initdata; static unsigned long efi_flags __initdata; static void __init early_code_mapping_set_exec(int executable) @@ -61,12 +61,20 @@ static void __init early_code_mapping_set_exec(int executable) void __init efi_call_phys_prelog(void) { unsigned long vaddress; + int pgd; + int n_pgds; early_code_mapping_set_exec(1); local_irq_save(efi_flags); - vaddress = (unsigned long)__va(0x0UL); - save_pgd = *pgd_offset_k(0x0UL); - set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); + + for (pgd = 0; pgd < n_pgds; pgd++) { + save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); + vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + } __flush_tlb_all(); } @@ -75,7 +83,11 @@ void __init efi_call_phys_epilog(void) /* * After the lock is released, the original page table is restored. */ - set_pgd(pgd_offset_k(0x0UL), save_pgd); + int pgd; + int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + for (pgd = 0; pgd < n_pgds; pgd++) + set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); + kfree(save_pgd); __flush_tlb_all(); local_irq_restore(efi_flags); early_code_mapping_set_exec(0); diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 3769079..a09ecb9 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -130,8 +130,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) } } - resume_map_numa_kva(pgd_base); - return 0; } diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index cc9b1e1..d99537f 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -313,7 +313,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) if (per_cpu(lock_spinners, cpu) == xl) { ADD_STATS(released_slow_kicked, 1); xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); - break; } } } diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index b040b0e..7328f71 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -88,11 +88,11 @@ ENTRY(xen_iret) */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov xen_vcpu(%eax), %eax + movl %ss:TI_cpu(%eax), %eax + movl %ss:__per_cpu_offset(,%eax,4), %eax + mov %ss:xen_vcpu(%eax), %eax #else - movl xen_vcpu, %eax + movl %ss:xen_vcpu, %eax #endif /* check IF state we're restoring */ @@ -105,11 +105,11 @@ ENTRY(xen_iret) * resuming the code, so we don't have to be worried about * being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) + setz %ss:XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) + cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) /* * If there's something pending, mask events again so we can @@ -117,7 +117,7 @@ xen_iret_start_crit: * touch XEN_vcpu_info_mask. */ jne 1f - movb $1, XEN_vcpu_info_mask(%eax) + movb $1, %ss:XEN_vcpu_info_mask(%eax) 1: popl %eax |