diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/efi.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/irqflags.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/uv/uv_hub.h | 9 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 38 | ||||
-rw-r--r-- | arch/x86/kernel/apm_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/efi.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/efi_64.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/vmi_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 16 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 48 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 44 | ||||
-rw-r--r-- | arch/x86/lib/msr.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 39 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 1 |
22 files changed, 225 insertions, 109 deletions
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index edc90f2..8406ed7 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -33,7 +33,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ efi_call_virt(f, a1, a2, a3, a4, a5, a6) -#define efi_ioremap(addr, size) ioremap_cache(addr, size) +#define efi_ioremap(addr, size, type) ioremap_cache(addr, size) #else /* !CONFIG_X86_32 */ @@ -84,7 +84,8 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) -extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); +extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, + u32 type); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 2bdab21..c6ccbe7 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -12,9 +12,15 @@ static inline unsigned long native_save_fl(void) { unsigned long flags; + /* + * Note: this needs to be "=r" not "=rm", because we have the + * stack offset from what gcc expects at the time the "pop" is + * executed, and so a memory reference with respect to the stack + * would end up using the wrong address. + */ asm volatile("# __raw_save_flags\n\t" "pushf ; pop %0" - : "=g" (flags) + : "=r" (flags) : /* no input */ : "memory"); diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 341070f..77a6850 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -175,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ - ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) + (((unsigned long)(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) #define UV_APIC_PNODE_SHIFT 6 @@ -327,6 +327,7 @@ struct uv_blade_info { unsigned short nr_possible_cpus; unsigned short nr_online_cpus; unsigned short pnode; + short memory_nid; }; extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; @@ -363,6 +364,12 @@ static inline int uv_blade_to_pnode(int bid) return uv_blade_info[bid].pnode; } +/* Nid of memory node on blade. -1 if no blade-local memory */ +static inline int uv_blade_to_memory_nid(int bid) +{ + return uv_blade_info[bid].memory_nid; +} + /* Determine the number of possible cpus on a blade */ static inline int uv_blade_nr_possible_cpus(int bid) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2284a48..d2ed6c5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3793,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + return irq; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8e4cbb2..a5371ec 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled(); } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } /* @@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a284359..a8989aa 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) @@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 096d19a..832e908 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 1, /* logical */ + .irq_dest_mode = 0, /* physical */ .target_cpus = uv_target_cpus, .disable_esr = 0, @@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) BUG(); } -static __init void map_low_mmrs(void) -{ - init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); - init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); -} - enum map_type {map_wb, map_uc}; static __init void map_high(char *id, unsigned long base, int shift, @@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode) map_high("GRU", gru.s.base, shift, max_pnode, map_wb); } -static __init void map_config_high(int max_pnode) -{ - union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; - int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; - - cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); - if (cfg.s.enable) - map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); -} - -static __init void map_mmr_high(int max_pnode) -{ - union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; - int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; - - mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); - if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); -} - static __init void map_mmioh_high(int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; @@ -566,8 +540,6 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; - map_low_mmrs(); - m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; @@ -591,6 +563,8 @@ void __init uv_system_init(void) bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = kmalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) + uv_blade_info[blade].memory_nid = -1; get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); @@ -629,6 +603,9 @@ void __init uv_system_init(void) lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; + /* Any node on the blade, else will contain -1. */ + uv_blade_info[blade].memory_nid = nid; + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; uv_cpu_hub_info(cpu)->m_val = m_val; @@ -662,11 +639,10 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; + max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); - map_mmr_high(max_pnode); - map_config_high(max_pnode); map_mmioh_high(max_pnode); uv_cpu_init(); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 79302e9..442b550 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -811,7 +811,7 @@ static int apm_do_idle(void) u8 ret = 0; int idled = 0; int polling; - int err; + int err = 0; polling = !!(current_thread_info()->status & TS_POLLING); if (polling) { diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 96f7ac0..fe26ba3 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -354,7 +354,7 @@ void __init efi_init(void) */ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); if (c16) { - for (i = 0; i < sizeof(vendor) && *c16; ++i) + for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else @@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void) && end_pfn <= max_pfn_mapped)) va = __va(md->phys_addr); else - va = efi_ioremap(md->phys_addr, size); + va = efi_ioremap(md->phys_addr, size, md->type); md->virt_addr = (u64) (unsigned long) va; diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 22c3b78..ac0621a 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void) early_runtime_code_mapping_set_exec(0); } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type) { unsigned long last_map_pfn; + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) return NULL; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8663afb..0d98a01 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -602,7 +602,11 @@ ignore_int: #endif iret -.section .cpuinit.data,"wa" +#ifndef CONFIG_HOTPLUG_CPU + __CPUINITDATA +#else + __REFDATA +#endif .align 4 ENTRY(initial_code) .long i386_start_kernel diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 508e982..9eb8976 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/pm.h> #include <linux/efi.h> +#include <linux/dmi.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -17,7 +18,6 @@ #include <asm/cpu.h> #ifdef CONFIG_X86_32 -# include <linux/dmi.h> # include <linux/ctype.h> # include <linux/mc146818rtc.h> #else @@ -404,6 +404,46 @@ EXPORT_SYMBOL(machine_real_restart); #endif /* CONFIG_X86_32 */ +/* + * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot + */ +static int __init set_pci_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_CF9) { + reboot_type = BOOT_CF9; + printk(KERN_INFO "%s series board detected. " + "Selecting PCI-method for reboots.\n", d->ident); + } + return 0; +} + +static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { + { /* Handle problems with rebooting on Apple MacBook5,2 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), + }, + }, + { /* Handle problems with rebooting on Apple MacBookPro5,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBookPro5,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,1"), + }, + }, + { } +}; + +static int __init pci_reboot_init(void) +{ + dmi_check_system(pci_reboot_dmi_table); + return 0; +} +core_initcall(pci_reboot_init); + static inline void kb_wait(void) { int i; diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b263423..95a7289 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, ap.ds = __USER_DS; ap.es = __USER_DS; ap.fs = __KERNEL_PERCPU; - ap.gs = 0; + ap.gs = __KERNEL_STACK_CANARY; ap.eflags = 0; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 59f31d2..78d185d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -393,8 +393,8 @@ SECTIONS #ifdef CONFIG_X86_32 -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") +. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -407,12 +407,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") +. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -420,7 +420,7 @@ ASSERT((per_cpu__irq_stack_union == 0), #ifdef CONFIG_KEXEC #include <asm/kexec.h> -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") +. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 4d6f0d2..21f68e0 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm) ktime_t remaining; struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + if (!ps->pit_timer.period) + return 0; + /* * The Counter does not stop when it reaches zero. In * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7030b5f..0ef5bb2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage) * * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc * containing more mappings. + * + * Returns the number of rmap entries before the spte was added or zero if + * the spte was not added. + * */ -static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) +static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) { struct kvm_mmu_page *sp; struct kvm_rmap_desc *desc; unsigned long *rmapp; - int i; + int i, count = 0; if (!is_rmap_pte(*spte)) - return; + return count; gfn = unalias_gfn(vcpu->kvm, gfn); sp = page_header(__pa(spte)); sp->gfns[spte - sp->spt] = gfn; @@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) } else { rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); - while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) + while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) { desc = desc->more; + count += RMAP_EXT; + } if (desc->shadow_ptes[RMAP_EXT-1]) { desc->more = mmu_alloc_rmap_desc(vcpu); desc = desc->more; @@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) ; desc->shadow_ptes[i] = spte; } + return count; } static void rmap_desc_remove_entry(unsigned long *rmapp, @@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) return young; } +#define RMAP_RECYCLE_THRESHOLD 1000 + +static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) +{ + unsigned long *rmapp; + + gfn = unalias_gfn(vcpu->kvm, gfn); + rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); + + kvm_unmap_rmapp(vcpu->kvm, rmapp); + kvm_flush_remote_tlbs(vcpu->kvm); +} + int kvm_age_hva(struct kvm *kvm, unsigned long hva) { return kvm_handle_hva(kvm, hva, kvm_age_rmapp); @@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) */ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) { + int used_pages; + + used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; + used_pages = max(0, used_pages); + /* * If we set the number of mmu pages to be smaller be than the * number of actived pages , we must to free some mmu pages before we * change the value */ - if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > - kvm_nr_mmu_pages) { - int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages - - kvm->arch.n_free_mmu_pages; - - while (n_used_mmu_pages > kvm_nr_mmu_pages) { + if (used_pages > kvm_nr_mmu_pages) { + while (used_pages > kvm_nr_mmu_pages) { struct kvm_mmu_page *page; page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); kvm_mmu_zap_page(kvm, page); - n_used_mmu_pages--; + used_pages--; } kvm->arch.n_free_mmu_pages = 0; } @@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, { int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); + int rmap_count; pgprintk("%s: spte %llx access %x write_fault %d" " user_fault %d gfn %lx\n", @@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, page_header_update_slot(vcpu->kvm, shadow_pte, gfn); if (!was_rmapped) { - rmap_add(vcpu, shadow_pte, gfn, largepage); + rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage); if (!is_rmap_pte(*shadow_pte)) kvm_release_pfn_clean(pfn); + if (rmap_count > RMAP_RECYCLE_THRESHOLD) + rmap_recycle(vcpu, gfn, largepage); } else { if (was_writeble) kvm_release_pfn_dirty(pfn); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 71510e0..b1f658a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) svm->vmcb->control.tsc_offset += delta; vcpu->cpu = cpu; kvm_migrate_timers(vcpu); + svm->asid_generation = 0; } for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) @@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } - svm->vcpu.cpu = svm_data->cpu; svm->asid_generation = svm_data->asid_generation; svm->vmcb->control.asid = svm_data->next_asid++; } @@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm) struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; - if (svm->vcpu.cpu != cpu || - svm->asid_generation != svm_data->asid_generation) + /* FIXME: handle wraparound of asid_generation */ + if (svm->asid_generation != svm_data->asid_generation) new_asid(svm, svm_data); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 356a0ce..29f9129 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); enum emulation_result err = EMULATE_DONE; - preempt_enable(); local_irq_enable(); + preempt_enable(); while (!guest_state_valid(vcpu)) { err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); @@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, if (err != EMULATE_DONE) { kvm_report_emulation_failure(vcpu, "emulation failure"); - return; + break; } if (signal_pending(current)) @@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, schedule(); } - local_irq_disable(); preempt_disable(); + local_irq_disable(); vmx->invalid_state_emulation_result = err; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fe5474a..3d45290 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr) return false; } +static bool valid_pat_type(unsigned t) +{ + return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ +} + +static bool valid_mtrr_type(unsigned t) +{ + return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ +} + +static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + int i; + + if (!msr_mtrr_valid(msr)) + return false; + + if (msr == MSR_IA32_CR_PAT) { + for (i = 0; i < 8; i++) + if (!valid_pat_type((data >> (i * 8)) & 0xff)) + return false; + return true; + } else if (msr == MSR_MTRRdefType) { + if (data & ~0xcff) + return false; + return valid_mtrr_type(data & 0xff); + } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { + for (i = 0; i < 8 ; i++) + if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) + return false; + return true; + } + + /* variable MTRRs */ + return valid_mtrr_type(data & 0xff); +} + static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; - if (!msr_mtrr_valid(msr)) + if (!mtrr_valid(vcpu, msr, data)) return 1; if (msr == MSR_MTRRdefType) { @@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp, if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) goto out; r = -E2BIG; - if (n < num_msrs_to_save) + if (n < msr_list.nmsrs) goto out; r = -EFAULT; if (copy_to_user(user_msr_list->indices, &msrs_to_save, num_msrs_to_save * sizeof(u32))) goto out; - if (copy_to_user(user_msr_list->indices - + num_msrs_to_save * sizeof(u32), + if (copy_to_user(user_msr_list->indices + num_msrs_to_save, &emulated_msrs, ARRAY_SIZE(emulated_msrs) * sizeof(u32))) goto out; diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 1440b9c..caa24ac 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -89,16 +89,13 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) rv.msrs = msrs; rv.msr_no = msr_no; - preempt_disable(); - /* - * FIXME: handle the CPU we're executing on separately for now until - * smp_call_function_many has been fixed to not skip it. - */ - this_cpu = raw_smp_processor_id(); - smp_call_function_single(this_cpu, __rdmsr_on_cpu, &rv, 1); + this_cpu = get_cpu(); + + if (cpumask_test_cpu(this_cpu, mask)) + __rdmsr_on_cpu(&rv); smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); - preempt_enable(); + put_cpu(); } EXPORT_SYMBOL(rdmsr_on_cpus); @@ -121,16 +118,13 @@ void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) rv.msrs = msrs; rv.msr_no = msr_no; - preempt_disable(); - /* - * FIXME: handle the CPU we're executing on separately for now until - * smp_call_function_many has been fixed to not skip it. - */ - this_cpu = raw_smp_processor_id(); - smp_call_function_single(this_cpu, __wrmsr_on_cpu, &rv, 1); + this_cpu = get_cpu(); + + if (cpumask_test_cpu(this_cpu, mask)) + __wrmsr_on_cpu(&rv); smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); - preempt_enable(); + put_cpu(); } EXPORT_SYMBOL(wrmsr_on_cpus); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1b734d7..7e600c1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -591,9 +591,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) unsigned int level; pte_t *kpte, old_pte; - if (cpa->flags & CPA_PAGES_ARRAY) - address = (unsigned long)page_address(cpa->pages[cpa->curpage]); - else if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[cpa->curpage]; + if (unlikely(PageHighMem(page))) + return 0; + address = (unsigned long)page_address(page); + } else if (cpa->flags & CPA_ARRAY) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; @@ -697,9 +700,12 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the direct * mapping already: */ - if (cpa->flags & CPA_PAGES_ARRAY) - vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); - else if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[cpa->curpage]; + if (unlikely(PageHighMem(page))) + return 0; + vaddr = (unsigned long)page_address(page); + } else if (cpa->flags & CPA_ARRAY) vaddr = cpa->vaddr[cpa->curpage]; else vaddr = *cpa->vaddr; @@ -997,12 +1003,15 @@ EXPORT_SYMBOL(set_memory_array_uc); int _set_memory_wc(unsigned long addr, int numpages) { int ret; + unsigned long addr_copy = addr; + ret = change_page_attr_set(&addr, numpages, __pgprot(_PAGE_CACHE_UC_MINUS), 0); - if (!ret) { - ret = change_page_attr_set(&addr, numpages, - __pgprot(_PAGE_CACHE_WC), 0); + ret = change_page_attr_set_clr(&addr_copy, numpages, + __pgprot(_PAGE_CACHE_WC), + __pgprot(_PAGE_CACHE_MASK), + 0, 0, NULL); } return ret; } @@ -1119,7 +1128,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray) int free_idx; for (i = 0; i < addrinarray; i++) { - start = (unsigned long)page_address(pages[i]); + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) goto err_out; @@ -1132,7 +1143,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray) err_out: free_idx = i; for (i = 0; i < free_idx; i++) { - start = (unsigned long)page_address(pages[i]); + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; free_memtype(start, end); } @@ -1161,7 +1174,9 @@ int set_pages_array_wb(struct page **pages, int addrinarray) return retval; for (i = 0; i < addrinarray; i++) { - start = (unsigned long)page_address(pages[i]); + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; free_memtype(start, end); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index af8f965..ed34f5e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -329,7 +329,6 @@ void __init reserve_top_address(unsigned long reserve) printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", (int)-reserve); __FIXADDR_TOP = -reserve - PAGE_SIZE; - __VMALLOC_RESERVE += reserve; #endif } |