diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/paravirt_types.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/relocate_kernel_64.S | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 45 | ||||
-rw-r--r-- | arch/x86/lguest/boot.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 161 | ||||
-rw-r--r-- | arch/x86/mm/numa_internal.h | 6 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 11 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 20 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 23 |
22 files changed, 107 insertions, 259 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a0e9bda..90bf314 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1219,10 +1219,6 @@ config HAVE_ARCH_BOOTMEM def_bool y depends on X86_32 && NUMA -config HAVE_ARCH_ALLOC_REMAP - def_bool y - depends on X86_32 && NUMA - config ARCH_HAVE_MEMORY_PRESENT def_bool y depends on X86_32 && DISCONTIGMEM diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d2ac8e2..1eb45de 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -391,8 +391,8 @@ struct kvm_vcpu_arch { gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; - unsigned int time_offset; - struct page *time_page; + struct gfn_to_hva_cache pv_time; + bool pv_time_enabled; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ebbc4d8..2fdfe31 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -731,7 +731,10 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -void arch_flush_lazy_mmu_mode(void); +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush); +} static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8288509..4b67ec9 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -85,6 +85,7 @@ struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ void (*enter)(void); void (*leave)(void); + void (*flush)(void); }; struct pv_time_ops { @@ -673,6 +674,7 @@ void paravirt_end_context_switch(struct task_struct *next); void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); +void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); u32 _paravirt_ident_32(u32); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a93741d..3f4b6da 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -568,20 +568,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - /* - * The way access filter has a performance penalty on some workloads. - * Disable it on the affected CPUs. - */ - if ((c->x86 == 0x15) && - (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { - u64 val; - - if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { - val |= 0x1E; - checking_wrmsrl(0xc0011021, val); - } - } - cpu_detect_cache_sizes(c); /* Multi core CPU? */ diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 12aff25..f7183ec 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -51,7 +51,7 @@ void __cpuinit mxcsr_feature_mask_init(void) clts(); if (cpu_has_fxsr) { memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (fx_scratch)); + asm volatile("fxsave %0" : "+m" (fx_scratch)); mask = fx_scratch.mxcsr_mask; if (mask == 0) mask = 0x0000ffbf; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 6c0802e..a669961 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -159,10 +159,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) u64 arch_irq_stat(void) { u64 sum = atomic_read(&irq_err_count); - -#ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); -#endif return sum; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 869e1ae..704faba 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -253,6 +253,18 @@ void paravirt_leave_lazy_mmu(void) leave_lazy(PARAVIRT_LAZY_MMU); } +void paravirt_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + void paravirt_start_context_switch(struct task_struct *prev) { BUG_ON(preemptible()); @@ -282,18 +294,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return percpu_read(paravirt_lazy_mode); } -void arch_flush_lazy_mmu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -462,6 +462,7 @@ struct pv_mmu_ops pv_mmu_ops = { .lazy_mode = { .enter = paravirt_nop, .leave = paravirt_nop, + .flush = paravirt_nop, }, .set_fixmap = native_set_fixmap, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 89d6877..282c98f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -460,6 +460,22 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), }, }, + { /* Handle problems with rebooting on the Dell PowerEdge C6100. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, + { /* Some C6100 machines were shipped with vendor being 'Dell'. */ + .callback = set_pci_reboot, + .ident = "Dell PowerEdge C6100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), + }, + }, { } }; diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 7a6f3b3..f2bb9c9 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -160,7 +160,7 @@ identity_mapped: xorq %rbp, %rbp xorq %r8, %r8 xorq %r9, %r9 - xorq %r10, %r9 + xorq %r10, %r10 xorq %r11, %r11 xorq %r12, %r12 xorq %r13, %r13 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2ad060a..be1d830 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3836,6 +3836,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (err != EMULATE_DONE) return 0; + if (vcpu->arch.halt_request) { + vcpu->arch.halt_request = 0; + ret = kvm_emulate_halt(vcpu); + goto out; + } + if (signal_pending(current)) goto out; if (need_resched()) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e329dc5..34afae8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -548,8 +548,6 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if (index != XCR_XFEATURE_ENABLED_MASK) return 1; xcr0 = xcr; - if (kvm_x86_ops->get_cpl(vcpu) != 0) - return 1; if (!(xcr0 & XSTATE_FP)) return 1; if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) @@ -563,7 +561,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { - if (__kvm_set_xcr(vcpu, index, xcr)) { + if (kvm_x86_ops->get_cpl(vcpu) != 0 || + __kvm_set_xcr(vcpu, index, xcr)) { kvm_inject_gp(vcpu, 0); return 1; } @@ -1073,7 +1072,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) { unsigned long flags; struct kvm_vcpu_arch *vcpu = &v->arch; - void *shared_kaddr; unsigned long this_tsc_khz; s64 kernel_ns, max_kernel_ns; u64 tsc_timestamp; @@ -1109,7 +1107,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_restore(flags); - if (!vcpu->time_page) + if (!vcpu->pv_time_enabled) return 0; /* @@ -1167,14 +1165,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) */ vcpu->hv_clock.version += 2; - shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); - - memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); - - kunmap_atomic(shared_kaddr, KM_USER0); - - mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); return 0; } @@ -1454,7 +1447,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, + sizeof(u32))) return 1; vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); @@ -1464,10 +1458,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { - if (vcpu->arch.time_page) { - kvm_release_page_dirty(vcpu->arch.time_page); - vcpu->arch.time_page = NULL; - } + vcpu->arch.pv_time_enabled = false; } int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) @@ -1527,6 +1518,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_KVM_SYSTEM_TIME_NEW: case MSR_KVM_SYSTEM_TIME: { + u64 gpa_offset; kvmclock_reset(vcpu); vcpu->arch.time = data; @@ -1536,16 +1528,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (!(data & 1)) break; - /* ...but clean it before doing the actual write */ - vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); - - vcpu->arch.time_page = - gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); + gpa_offset = data & ~(PAGE_MASK | 1); - if (is_error_page(vcpu->arch.time_page)) { - kvm_release_page_clean(vcpu->arch.time_page); - vcpu->arch.time_page = NULL; - } + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.pv_time, data & ~1ULL, + sizeof(struct pvclock_vcpu_time_info))) + vcpu->arch.pv_time_enabled = false; + else + vcpu->arch.pv_time_enabled = true; break; } case MSR_KVM_ASYNC_PF_EN: @@ -6252,6 +6242,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) goto fail_free_mce_banks; + vcpu->arch.pv_time_enabled = false; kvm_async_pf_hash_reset(vcpu); return 0; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index db832fd..2d45247 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1309,6 +1309,7 @@ __init void lguest_init(void) pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; + pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu; pv_mmu_ops.pte_update = lguest_pte_update; pv_mmu_ops.pte_update_defer = lguest_pte_update; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3b2ad91..7653f14 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -376,10 +376,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (pgd_none(*pgd_ref)) return -1; - if (pgd_none(*pgd)) + if (pgd_none(*pgd)) { set_pgd(pgd, *pgd_ref); - else + arch_flush_lazy_mmu_mode(); + } else { BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } /* * Below here mismatches are bugs because these lower tables diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c22c423..96c4577 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -44,11 +44,15 @@ static void __init find_early_table_space(struct map_range *mr, int nr_range) int i; unsigned long puds = 0, pmds = 0, ptes = 0, tables; unsigned long start = 0, good_end; + unsigned long pgd_extra = 0; phys_addr_t base; for (i = 0; i < nr_range; i++) { unsigned long range, extra; + if ((mr[i].end >> PGDIR_SHIFT) - (mr[i].start >> PGDIR_SHIFT)) + pgd_extra++; + range = mr[i].end - mr[i].start; puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; @@ -73,6 +77,7 @@ static void __init find_early_table_space(struct map_range *mr, int nr_range) tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + tables += (pgd_extra * PAGE_SIZE); #ifdef CONFIG_X86_32 /* for fixmap */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index f5510d8..469ccae 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -207,9 +207,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) if (end && (end - start) < NODE_MIN_SIZE) return; - /* initialize remap allocator before aligning to ZONE_ALIGN */ - init_alloc_remap(nid, start, end); - start = roundup(start, ZONE_ALIGN); printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n", diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 849a975..025d469 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, extern unsigned long highend_pfn, highstart_pfn; -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - -static void *node_remap_start_vaddr[MAX_NUMNODES]; -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -/* - * Remap memory allocator - */ -static unsigned long node_remap_start_pfn[MAX_NUMNODES]; -static void *node_remap_end_vaddr[MAX_NUMNODES]; -static void *node_remap_alloc_vaddr[MAX_NUMNODES]; - -/** - * alloc_remap - Allocate remapped memory - * @nid: NUMA node to allocate memory from - * @size: The size of allocation - * - * Allocate @size bytes from the remap area of NUMA node @nid. The - * size of the remap area is predetermined by init_alloc_remap() and - * only the callers considered there should call this function. For - * more info, please read the comment on top of init_alloc_remap(). - * - * The caller must be ready to handle allocation failure from this - * function and fall back to regular memory allocator in such cases. - * - * CONTEXT: - * Single CPU early boot context. - * - * RETURNS: - * Pointer to the allocated memory on success, %NULL on failure. - */ -void *alloc_remap(int nid, unsigned long size) -{ - void *allocation = node_remap_alloc_vaddr[nid]; - - size = ALIGN(size, L1_CACHE_BYTES); - - if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) - return NULL; - - node_remap_alloc_vaddr[nid] += size; - memset(allocation, 0, size); - - return allocation; -} - -#ifdef CONFIG_HIBERNATION -/** - * resume_map_numa_kva - add KVA mapping to the temporary page tables created - * during resume from hibernation - * @pgd_base - temporary resume page directory - */ -void resume_map_numa_kva(pgd_t *pgd_base) -{ - int node; - - for_each_online_node(node) { - unsigned long start_va, start_pfn, nr_pages, pfn; - - start_va = (unsigned long)node_remap_start_vaddr[node]; - start_pfn = node_remap_start_pfn[node]; - nr_pages = (node_remap_end_vaddr[node] - - node_remap_start_vaddr[node]) >> PAGE_SHIFT; - - printk(KERN_DEBUG "%s: node %d\n", __func__, node); - - for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { - unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); - pgd_t *pgd = pgd_base + pgd_index(vaddr); - pud_t *pud = pud_offset(pgd, vaddr); - pmd_t *pmd = pmd_offset(pud, vaddr); - - set_pmd(pmd, pfn_pmd(start_pfn + pfn, - PAGE_KERNEL_LARGE_EXEC)); - - printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", - __func__, vaddr, start_pfn + pfn); - } - } -} -#endif - -/** - * init_alloc_remap - Initialize remap allocator for a NUMA node - * @nid: NUMA node to initizlie remap allocator for - * - * NUMA nodes may end up without any lowmem. As allocating pgdat and - * memmap on a different node with lowmem is inefficient, a special - * remap allocator is implemented which can be used by alloc_remap(). - * - * For each node, the amount of memory which will be necessary for - * pgdat and memmap is calculated and two memory areas of the size are - * allocated - one in the node and the other in lowmem; then, the area - * in the node is remapped to the lowmem area. - * - * As pgdat and memmap must be allocated in lowmem anyway, this - * doesn't waste lowmem address space; however, the actual lowmem - * which gets remapped over is wasted. The amount shouldn't be - * problematic on machines this feature will be used. - * - * Initialization failure isn't fatal. alloc_remap() is used - * opportunistically and the callers will fall back to other memory - * allocation mechanisms on failure. - */ -void __init init_alloc_remap(int nid, u64 start, u64 end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long size, pfn; - u64 node_pa, remap_pa; - void *remap_va; - - /* - * The acpi/srat node info can show hot-add memroy zones where - * memory could be added but not currently present. - */ - printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", - nid, start_pfn, end_pfn); - - /* calculate the necessary space aligned to large page size */ - size = node_memmap_size_bytes(nid, start_pfn, end_pfn); - size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); - size = ALIGN(size, LARGE_PAGE_BYTES); - - /* allocate node memory and the lowmem remap area */ - node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); - if (node_pa == MEMBLOCK_ERROR) { - pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", - size, nid); - return; - } - memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM"); - - remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, - max_low_pfn << PAGE_SHIFT, - size, LARGE_PAGE_BYTES); - if (remap_pa == MEMBLOCK_ERROR) { - pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", - size, nid); - memblock_x86_free_range(node_pa, node_pa + size); - return; - } - memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG"); - remap_va = phys_to_virt(remap_pa); - - /* perform actual remap */ - for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) - set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), - (node_pa >> PAGE_SHIFT) + pfn, - PAGE_KERNEL_LARGE); - - /* initialize remap allocator parameters */ - node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; - node_remap_start_vaddr[nid] = remap_va; - node_remap_end_vaddr[nid] = remap_va + size; - node_remap_alloc_vaddr[nid] = remap_va; - - printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", - nid, node_pa, node_pa + size, remap_va, remap_va + size); -} - void __init initmem_init(void) { x86_numa_init(); diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h index 7178c3a..ad86ec9 100644 --- a/arch/x86/mm/numa_internal.h +++ b/arch/x86/mm/numa_internal.h @@ -21,12 +21,6 @@ void __init numa_reset_distance(void); void __init x86_numa_init(void); -#ifdef CONFIG_X86_64 -static inline void init_alloc_remap(int nid, u64 start, u64 end) { } -#else -void __init init_alloc_remap(int nid, u64 start, u64 end); -#endif - #ifdef CONFIG_NUMA_EMU void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 899e393..86272f0 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -588,10 +588,13 @@ void __init efi_enter_virtual_mode(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME) && - md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) - continue; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) { +#ifdef CONFIG_X86_64 + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) +#endif + continue; + } size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9f808af..e11efbd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -129,6 +129,21 @@ static void xen_vcpu_setup(int cpu) BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); + /* + * This path is called twice on PVHVM - first during bootup via + * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being + * hotplugged: cpu_up -> xen_hvm_cpu_notify. + * As we can only do the VCPUOP_register_vcpu_info once lets + * not over-write its result. + * + * For PV it is called during restore (xen_vcpu_restore) and bootup + * (xen_setup_vcpu_info_placement). The hotplug mechanism does not + * use this function. + */ + if (xen_hvm_domain()) { + if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) + return; + } if (cpu < MAX_VIRT_CPUS) per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; @@ -1365,8 +1380,11 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: xen_vcpu_setup(cpu); - if (xen_have_vector_callback) + if (xen_have_vector_callback) { xen_init_lock_cpu(cpu); + if (xen_feature(XENFEAT_hvm_safe_pvclock)) + xen_setup_timer(cpu); + } break; default: break; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d957dce..a0aed70 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2011,6 +2011,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, + .flush = paravirt_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5158c50..19568a0 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -36,9 +36,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); /* snapshots of runstate info */ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); -/* unused ns of stolen and blocked time */ +/* unused ns of stolen time */ static DEFINE_PER_CPU(u64, xen_residual_stolen); -static DEFINE_PER_CPU(u64, xen_residual_blocked); /* return an consistent snapshot of 64-bit time/counter value */ static u64 get64(const u64 *p) @@ -115,7 +114,7 @@ static void do_stolen_accounting(void) { struct vcpu_runstate_info state; struct vcpu_runstate_info *snap; - s64 blocked, runnable, offline, stolen; + s64 runnable, offline, stolen; cputime_t ticks; get_runstate_snapshot(&state); @@ -125,7 +124,6 @@ static void do_stolen_accounting(void) snap = &__get_cpu_var(xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ - blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; @@ -141,17 +139,6 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __this_cpu_write(xen_residual_stolen, stolen); account_steal_ticks(ticks); - - /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. */ - blocked += __this_cpu_read(xen_residual_blocked); - - if (blocked < 0) - blocked = 0; - - ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); - __this_cpu_write(xen_residual_blocked, blocked); - account_idle_ticks(ticks); } /* Get the TSC speed from Xen */ @@ -482,7 +469,11 @@ static void xen_hvm_setup_cpu_clockevents(void) { int cpu = smp_processor_id(); xen_setup_runstate_info(cpu); - xen_setup_timer(cpu); + /* + * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence + * doing it xen_hvm_cpu_notify (which gets called by smp_init during + * early bootup and also during CPU hotplug events). + */ xen_setup_cpu_clockevents(); } |