aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/pgtable.h5
-rw-r--r--arch/x86/include/asm/traps.h26
-rw-r--r--arch/x86/kernel/amd_iommu_init.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c17
-rw-r--r--arch/x86/kernel/cpu/amd.c14
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/head.c53
-rw-r--r--arch/x86/kernel/msr.c3
-rw-r--r--arch/x86/kernel/setup.c80
-rw-r--r--arch/x86/mm/fault.c9
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/platform/efi/efi_64.c22
-rw-r--r--arch/x86/power/hibernate_32.c2
-rw-r--r--arch/x86/xen/spinlock.c1
-rw-r--r--arch/x86/xen/xen-asm_32.S14
17 files changed, 216 insertions, 54 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index c1870dd..26af1e3 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -208,7 +208,7 @@ sysexit_from_sys_call:
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%esi /* second arg, syscall return value */
cmpl $0,%eax /* is it < 0? */
setl %al /* 1 if so, 0 if not */
@@ -218,7 +218,7 @@ sysexit_from_sys_call:
GET_THREAD_INFO(%r10)
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %edi,TI_flags(%r10)
jz \exit
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index ffa037f..a6a6414 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[];
#include <asm/numaq.h>
-extern void resume_map_numa_kva(pgd_t *pgd);
-
-#else /* !CONFIG_NUMA */
-
-static inline void resume_map_numa_kva(pgd_t *pgd) {}
-
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 884507e..6be9909 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0310da6..1d44903 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_TRAPS_H
#define _ASM_X86_TRAPS_H
+#include <linux/kprobes.h>
#include <asm/debugreg.h>
#include <asm/siginfo.h> /* TRAP_TRACE, ... */
@@ -87,4 +88,29 @@ asmlinkage void smp_thermal_interrupt(void);
asmlinkage void mce_threshold_interrupt(void);
#endif
+/* Interrupts/Exceptions */
+enum {
+ X86_TRAP_DE = 0, /* 0, Divide-by-zero */
+ X86_TRAP_DB, /* 1, Debug */
+ X86_TRAP_NMI, /* 2, Non-maskable Interrupt */
+ X86_TRAP_BP, /* 3, Breakpoint */
+ X86_TRAP_OF, /* 4, Overflow */
+ X86_TRAP_BR, /* 5, Bound Range Exceeded */
+ X86_TRAP_UD, /* 6, Invalid Opcode */
+ X86_TRAP_NM, /* 7, Device Not Available */
+ X86_TRAP_DF, /* 8, Double Fault */
+ X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */
+ X86_TRAP_TS, /* 10, Invalid TSS */
+ X86_TRAP_NP, /* 11, Segment Not Present */
+ X86_TRAP_SS, /* 12, Stack Segment Fault */
+ X86_TRAP_GP, /* 13, General Protection Fault */
+ X86_TRAP_PF, /* 14, Page Fault */
+ X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */
+ X86_TRAP_MF, /* 16, x87 Floating-Point Exception */
+ X86_TRAP_AC, /* 17, Alignment Check */
+ X86_TRAP_MC, /* 18, Machine Check */
+ X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */
+ X86_TRAP_IRET = 32, /* 32, IRET Exception */
+};
+
#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 33df6e8..d86aa3f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1363,6 +1363,7 @@ static struct syscore_ops amd_iommu_syscore_ops = {
*/
static int __init amd_iommu_init(void)
{
+ struct amd_iommu *iommu;
int i, ret = 0;
/*
@@ -1411,9 +1412,6 @@ static int __init amd_iommu_init(void)
if (amd_iommu_pd_alloc_bitmap == NULL)
goto free;
- /* init the device table */
- init_device_table();
-
/*
* let all alias entries point to itself
*/
@@ -1463,6 +1461,12 @@ static int __init amd_iommu_init(void)
if (ret)
goto free_disable;
+ /* init the device table */
+ init_device_table();
+
+ for_each_iommu(iommu)
+ iommu_flush_all_caches(iommu);
+
amd_iommu_init_api();
amd_iommu_init_notifier();
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f5373df..db4f704 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,12 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
}
early_param("x2apic_phys", set_x2apic_phys_mode);
+static bool x2apic_fadt_phys(void)
+{
+ if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+ printk(KERN_DEBUG "System requires x2apic physical mode\n");
+ return true;
+ }
+ return false;
+}
+
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- if (x2apic_phys)
- return x2apic_enabled();
- else
- return 0;
+ return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
static void
@@ -108,7 +115,7 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && x2apic_phys)
+ if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3f4b6da..a93741d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -568,6 +568,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
+ /*
+ * The way access filter has a performance penalty on some workloads.
+ * Disable it on the affected CPUs.
+ */
+ if ((c->x86 == 0x15) &&
+ (c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
+ u64 val;
+
+ if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) {
+ val |= 0x1E;
+ checking_wrmsrl(0xc0011021, val);
+ }
+ }
+
cpu_detect_cache_sizes(c);
/* Multi core CPU? */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 268b40d..2df1252 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1078,7 +1078,6 @@ ENTRY(xen_failsafe_callback)
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
- addl $16,%esp
jmp iret_exc
5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699b..f6c4674 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -5,8 +5,6 @@
#include <asm/setup.h>
#include <asm/bios_ebda.h>
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
/*
* The BIOS places the EBDA/XBDA at the top of conventional
* memory, and usually decreases the reported amount of
@@ -16,17 +14,30 @@
* chipset: reserve a page before VGA to prevent PCI prefetch
* into it (errata #56). Usually the page is reserved anyways,
* unless you have no PS/2 mouse plugged in.
+ *
+ * This functions is deliberately very conservative. Losing
+ * memory in the bottom megabyte is rarely a problem, as long
+ * as we have enough memory to install the trampoline. Using
+ * memory that is in use by the BIOS or by some DMA device
+ * the BIOS didn't shut down *is* a big problem.
*/
+
+#define BIOS_LOWMEM_KILOBYTES 0x413
+#define LOWMEM_CAP 0x9f000U /* Absolute maximum */
+#define INSANE_CUTOFF 0x20000U /* Less than this = insane */
+
void __init reserve_ebda_region(void)
{
unsigned int lowmem, ebda_addr;
- /* To determine the position of the EBDA and the */
- /* end of conventional memory, we need to look at */
- /* the BIOS data area. In a paravirtual environment */
- /* that area is absent. We'll just have to assume */
- /* that the paravirt case can handle memory setup */
- /* correctly, without our help. */
+ /*
+ * To determine the position of the EBDA and the
+ * end of conventional memory, we need to look at
+ * the BIOS data area. In a paravirtual environment
+ * that area is absent. We'll just have to assume
+ * that the paravirt case can handle memory setup
+ * correctly, without our help.
+ */
if (paravirt_enabled())
return;
@@ -37,19 +48,23 @@ void __init reserve_ebda_region(void)
/* start of EBDA area */
ebda_addr = get_bios_ebda();
- /* Fixup: bios puts an EBDA in the top 64K segment */
- /* of conventional memory, but does not adjust lowmem. */
- if ((lowmem - ebda_addr) <= 0x10000)
- lowmem = ebda_addr;
+ /*
+ * Note: some old Dells seem to need 4k EBDA without
+ * reporting so, so just consider the memory above 0x9f000
+ * to be off limits (bugzilla 2990).
+ */
+
+ /* If the EBDA address is below 128K, assume it is bogus */
+ if (ebda_addr < INSANE_CUTOFF)
+ ebda_addr = LOWMEM_CAP;
- /* Fixup: bios does not report an EBDA at all. */
- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
- if ((ebda_addr == 0) && (lowmem >= 0x9f000))
- lowmem = 0x9f000;
+ /* If lowmem is less than 128K, assume it is bogus */
+ if (lowmem < INSANE_CUTOFF)
+ lowmem = LOWMEM_CAP;
- /* Paranoia: should never happen, but... */
- if ((lowmem == 0) || (lowmem >= 0x100000))
- lowmem = 0x9f000;
+ /* Use the lower of the lowmem and EBDA markers as the cutoff */
+ lowmem = min(lowmem, ebda_addr);
+ lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */
/* reserve all memory between lowmem and the 1MB mark */
memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 12fcbe2..f7d1a64 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -175,6 +175,9 @@ static int msr_open(struct inode *inode, struct file *file)
unsigned int cpu;
struct cpuinfo_x86 *c;
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
cpu = iminor(file->f_path.dentry->d_inode);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index af19a61..6c4e9ff 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -631,6 +631,83 @@ static __init void reserve_ibft_region(void)
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+static bool __init snb_gfx_workaround_needed(void)
+{
+#ifdef CONFIG_PCI
+ int i;
+ u16 vendor, devid;
+ static const u16 snb_ids[] = {
+ 0x0102,
+ 0x0112,
+ 0x0122,
+ 0x0106,
+ 0x0116,
+ 0x0126,
+ 0x010a,
+ };
+
+ /* Assume no if something weird is going on with PCI */
+ if (!early_pci_allowed())
+ return false;
+
+ vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID);
+ if (vendor != 0x8086)
+ return false;
+
+ devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID);
+ for (i = 0; i < ARRAY_SIZE(snb_ids); i++)
+ if (devid == snb_ids[i])
+ return true;
+#endif
+
+ return false;
+}
+
+/*
+ * Sandy Bridge graphics has trouble with certain ranges, exclude
+ * them from allocation.
+ */
+static void __init trim_snb_memory(void)
+{
+ static const unsigned long bad_pages[] = {
+ 0x20050000,
+ 0x20110000,
+ 0x20130000,
+ 0x20138000,
+ 0x40004000,
+ };
+ int i;
+
+ if (!snb_gfx_workaround_needed())
+ return;
+
+ printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
+
+ /*
+ * Reserve all memory below the 1 MB mark that has not
+ * already been reserved.
+ */
+ memblock_reserve(0, 1<<20);
+
+ for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
+ if (memblock_reserve(bad_pages[i], PAGE_SIZE))
+ printk(KERN_WARNING "failed to reserve 0x%08lx\n",
+ bad_pages[i]);
+ }
+}
+
+/*
+ * Here we put platform-specific memory range workarounds, i.e.
+ * memory known to be corrupt or otherwise in need to be reserved on
+ * specific platforms.
+ *
+ * If this gets used more widely it could use a real dispatch mechanism.
+ */
+static void __init trim_platform_memory_ranges(void)
+{
+ trim_snb_memory();
+}
+
static void __init trim_bios_range(void)
{
/*
@@ -651,6 +728,7 @@ static void __init trim_bios_range(void)
* take them out.
*/
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
@@ -929,6 +1007,8 @@ void __init setup_arch(char **cmdline_p)
setup_trampolines();
+ trim_platform_memory_ranges();
+
init_gbpages();
/* max_pfn_mapped is updated here */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2dbf6bf..3b2ad91 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -720,12 +720,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_errata100(regs, address))
return;
- if (unlikely(show_unhandled_signals))
+ /* Kernel addresses are always protection faults: */
+ if (address >= TASK_SIZE)
+ error_code |= PF_PROT;
+
+ if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
- /* Kernel addresses are always protection faults: */
tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bbaaa00..44b93da 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -831,6 +831,9 @@ int kern_addr_valid(unsigned long addr)
if (pud_none(*pud))
return 0;
+ if (pud_large(*pud))
+ return pfn_valid(pud_pfn(*pud));
+
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return 0;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac3aa54..0fba86d 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,7 +38,7 @@
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
-static pgd_t save_pgd __initdata;
+static pgd_t *save_pgd __initdata;
static unsigned long efi_flags __initdata;
static void __init early_code_mapping_set_exec(int executable)
@@ -61,12 +61,20 @@ static void __init early_code_mapping_set_exec(int executable)
void __init efi_call_phys_prelog(void)
{
unsigned long vaddress;
+ int pgd;
+ int n_pgds;
early_code_mapping_set_exec(1);
local_irq_save(efi_flags);
- vaddress = (unsigned long)__va(0x0UL);
- save_pgd = *pgd_offset_k(0x0UL);
- set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
+
+ n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
+ save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
+
+ for (pgd = 0; pgd < n_pgds; pgd++) {
+ save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
+ vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
+ set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+ }
__flush_tlb_all();
}
@@ -75,7 +83,11 @@ void __init efi_call_phys_epilog(void)
/*
* After the lock is released, the original page table is restored.
*/
- set_pgd(pgd_offset_k(0x0UL), save_pgd);
+ int pgd;
+ int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+ for (pgd = 0; pgd < n_pgds; pgd++)
+ set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+ kfree(save_pgd);
__flush_tlb_all();
local_irq_restore(efi_flags);
early_code_mapping_set_exec(0);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 3769079..a09ecb9 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -130,8 +130,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
}
}
- resume_map_numa_kva(pgd_base);
-
return 0;
}
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index cc9b1e1..d99537f 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -313,7 +313,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
if (per_cpu(lock_spinners, cpu) == xl) {
ADD_STATS(released_slow_kicked, 1);
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
- break;
}
}
}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index b040b0e..7328f71 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -88,11 +88,11 @@ ENTRY(xen_iret)
*/
#ifdef CONFIG_SMP
GET_THREAD_INFO(%eax)
- movl TI_cpu(%eax), %eax
- movl __per_cpu_offset(,%eax,4), %eax
- mov xen_vcpu(%eax), %eax
+ movl %ss:TI_cpu(%eax), %eax
+ movl %ss:__per_cpu_offset(,%eax,4), %eax
+ mov %ss:xen_vcpu(%eax), %eax
#else
- movl xen_vcpu, %eax
+ movl %ss:xen_vcpu, %eax
#endif
/* check IF state we're restoring */
@@ -105,11 +105,11 @@ ENTRY(xen_iret)
* resuming the code, so we don't have to be worried about
* being preempted to another CPU.
*/
- setz XEN_vcpu_info_mask(%eax)
+ setz %ss:XEN_vcpu_info_mask(%eax)
xen_iret_start_crit:
/* check for unmasked and pending */
- cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+ cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
/*
* If there's something pending, mask events again so we can
@@ -117,7 +117,7 @@ xen_iret_start_crit:
* touch XEN_vcpu_info_mask.
*/
jne 1f
- movb $1, XEN_vcpu_info_mask(%eax)
+ movb $1, %ss:XEN_vcpu_info_mask(%eax)
1: popl %eax