aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/kernel/cpu/amd.c14
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/paravirt.c25
-rw-r--r--arch/x86/kernel/reboot.c16
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S2
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c45
-rw-r--r--arch/x86/lguest/boot.c1
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/init.c5
-rw-r--r--arch/x86/mm/numa.c3
-rw-r--r--arch/x86/mm/numa_32.c161
-rw-r--r--arch/x86/mm/numa_internal.h6
-rw-r--r--arch/x86/platform/efi/efi.c11
-rw-r--r--arch/x86/xen/enlighten.c20
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/x86/xen/time.c23
22 files changed, 107 insertions, 259 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a0e9bda..90bf314 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1219,10 +1219,6 @@ config HAVE_ARCH_BOOTMEM
def_bool y
depends on X86_32 && NUMA
-config HAVE_ARCH_ALLOC_REMAP
- def_bool y
- depends on X86_32 && NUMA
-
config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d2ac8e2..1eb45de 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -391,8 +391,8 @@ struct kvm_vcpu_arch {
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
- unsigned int time_offset;
- struct page *time_page;
+ struct gfn_to_hva_cache pv_time;
+ bool pv_time_enabled;
u64 last_guest_tsc;
u64 last_kernel_ns;
u64 last_tsc_nsec;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ebbc4d8..2fdfe31 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -731,7 +731,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
}
-void arch_flush_lazy_mmu_mode(void);
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
+}
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8288509..4b67ec9 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -85,6 +85,7 @@ struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
void (*enter)(void);
void (*leave)(void);
+ void (*flush)(void);
};
struct pv_time_ops {
@@ -673,6 +674,7 @@ void paravirt_end_context_switch(struct task_struct *next);
void paravirt_enter_lazy_mmu(void);
void paravirt_leave_lazy_mmu(void);
+void paravirt_flush_lazy_mmu(void);
void _paravirt_nop(void);
u32 _paravirt_ident_32(u32);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a93741d..3f4b6da 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -568,20 +568,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
}
- /*
- * The way access filter has a performance penalty on some workloads.
- * Disable it on the affected CPUs.
- */
- if ((c->x86 == 0x15) &&
- (c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
- u64 val;
-
- if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) {
- val |= 0x1E;
- checking_wrmsrl(0xc0011021, val);
- }
- }
-
cpu_detect_cache_sizes(c);
/* Multi core CPU? */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 12aff25..f7183ec 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -51,7 +51,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
clts();
if (cpu_has_fxsr) {
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : : "m" (fx_scratch));
+ asm volatile("fxsave %0" : "+m" (fx_scratch));
mask = fx_scratch.mxcsr_mask;
if (mask == 0)
mask = 0x0000ffbf;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6c0802e..a669961 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -159,10 +159,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
u64 arch_irq_stat(void)
{
u64 sum = atomic_read(&irq_err_count);
-
-#ifdef CONFIG_X86_IO_APIC
- sum += atomic_read(&irq_mis_count);
-#endif
return sum;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1ae..704faba 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -253,6 +253,18 @@ void paravirt_leave_lazy_mmu(void)
leave_lazy(PARAVIRT_LAZY_MMU);
}
+void paravirt_flush_lazy_mmu(void)
+{
+ preempt_disable();
+
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+ arch_leave_lazy_mmu_mode();
+ arch_enter_lazy_mmu_mode();
+ }
+
+ preempt_enable();
+}
+
void paravirt_start_context_switch(struct task_struct *prev)
{
BUG_ON(preemptible());
@@ -282,18 +294,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return percpu_read(paravirt_lazy_mode);
}
-void arch_flush_lazy_mmu_mode(void)
-{
- preempt_disable();
-
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
- arch_leave_lazy_mmu_mode();
- arch_enter_lazy_mmu_mode();
- }
-
- preempt_enable();
-}
-
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -462,6 +462,7 @@ struct pv_mmu_ops pv_mmu_ops = {
.lazy_mode = {
.enter = paravirt_nop,
.leave = paravirt_nop,
+ .flush = paravirt_nop,
},
.set_fixmap = native_set_fixmap,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 89d6877..282c98f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -460,6 +460,22 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
},
},
+ { /* Handle problems with rebooting on the Dell PowerEdge C6100. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
+ { /* Some C6100 machines were shipped with vendor being 'Dell'. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 7a6f3b3..f2bb9c9 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -160,7 +160,7 @@ identity_mapped:
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
- xorq %r10, %r9
+ xorq %r10, %r10
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2ad060a..be1d830 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3836,6 +3836,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (err != EMULATE_DONE)
return 0;
+ if (vcpu->arch.halt_request) {
+ vcpu->arch.halt_request = 0;
+ ret = kvm_emulate_halt(vcpu);
+ goto out;
+ }
+
if (signal_pending(current))
goto out;
if (need_resched())
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e329dc5..34afae8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -548,8 +548,6 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if (index != XCR_XFEATURE_ENABLED_MASK)
return 1;
xcr0 = xcr;
- if (kvm_x86_ops->get_cpl(vcpu) != 0)
- return 1;
if (!(xcr0 & XSTATE_FP))
return 1;
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -563,7 +561,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
- if (__kvm_set_xcr(vcpu, index, xcr)) {
+ if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
+ __kvm_set_xcr(vcpu, index, xcr)) {
kvm_inject_gp(vcpu, 0);
return 1;
}
@@ -1073,7 +1072,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
{
unsigned long flags;
struct kvm_vcpu_arch *vcpu = &v->arch;
- void *shared_kaddr;
unsigned long this_tsc_khz;
s64 kernel_ns, max_kernel_ns;
u64 tsc_timestamp;
@@ -1109,7 +1107,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_restore(flags);
- if (!vcpu->time_page)
+ if (!vcpu->pv_time_enabled)
return 0;
/*
@@ -1167,14 +1165,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
*/
vcpu->hv_clock.version += 2;
- shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
-
- memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
-
- kunmap_atomic(shared_kaddr, KM_USER0);
-
- mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
return 0;
}
@@ -1454,7 +1447,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
+ sizeof(u32)))
return 1;
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
@@ -1464,10 +1458,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.time_page) {
- kvm_release_page_dirty(vcpu->arch.time_page);
- vcpu->arch.time_page = NULL;
- }
+ vcpu->arch.pv_time_enabled = false;
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
@@ -1527,6 +1518,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
+ u64 gpa_offset;
kvmclock_reset(vcpu);
vcpu->arch.time = data;
@@ -1536,16 +1528,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
if (!(data & 1))
break;
- /* ...but clean it before doing the actual write */
- vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
-
- vcpu->arch.time_page =
- gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+ gpa_offset = data & ~(PAGE_MASK | 1);
- if (is_error_page(vcpu->arch.time_page)) {
- kvm_release_page_clean(vcpu->arch.time_page);
- vcpu->arch.time_page = NULL;
- }
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.pv_time, data & ~1ULL,
+ sizeof(struct pvclock_vcpu_time_info)))
+ vcpu->arch.pv_time_enabled = false;
+ else
+ vcpu->arch.pv_time_enabled = true;
break;
}
case MSR_KVM_ASYNC_PF_EN:
@@ -6252,6 +6242,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
goto fail_free_mce_banks;
+ vcpu->arch.pv_time_enabled = false;
kvm_async_pf_hash_reset(vcpu);
return 0;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index db832fd..2d45247 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1309,6 +1309,7 @@ __init void lguest_init(void)
pv_mmu_ops.read_cr3 = lguest_read_cr3;
pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
+ pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
pv_mmu_ops.pte_update = lguest_pte_update;
pv_mmu_ops.pte_update_defer = lguest_pte_update;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3b2ad91..7653f14 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -376,10 +376,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_ref))
return -1;
- if (pgd_none(*pgd))
+ if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
- else
+ arch_flush_lazy_mmu_mode();
+ } else {
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
/*
* Below here mismatches are bugs because these lower tables
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index c22c423..96c4577 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -44,11 +44,15 @@ static void __init find_early_table_space(struct map_range *mr, int nr_range)
int i;
unsigned long puds = 0, pmds = 0, ptes = 0, tables;
unsigned long start = 0, good_end;
+ unsigned long pgd_extra = 0;
phys_addr_t base;
for (i = 0; i < nr_range; i++) {
unsigned long range, extra;
+ if ((mr[i].end >> PGDIR_SHIFT) - (mr[i].start >> PGDIR_SHIFT))
+ pgd_extra++;
+
range = mr[i].end - mr[i].start;
puds += (range + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -73,6 +77,7 @@ static void __init find_early_table_space(struct map_range *mr, int nr_range)
tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
+ tables += (pgd_extra * PAGE_SIZE);
#ifdef CONFIG_X86_32
/* for fixmap */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index f5510d8..469ccae 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -207,9 +207,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
if (end && (end - start) < NODE_MIN_SIZE)
return;
- /* initialize remap allocator before aligning to ZONE_ALIGN */
- init_alloc_remap(nid, start, end);
-
start = roundup(start, ZONE_ALIGN);
printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 849a975..025d469 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
extern unsigned long highend_pfn, highstart_pfn;
-#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
-
-static void *node_remap_start_vaddr[MAX_NUMNODES];
-void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-
-/*
- * Remap memory allocator
- */
-static unsigned long node_remap_start_pfn[MAX_NUMNODES];
-static void *node_remap_end_vaddr[MAX_NUMNODES];
-static void *node_remap_alloc_vaddr[MAX_NUMNODES];
-
-/**
- * alloc_remap - Allocate remapped memory
- * @nid: NUMA node to allocate memory from
- * @size: The size of allocation
- *
- * Allocate @size bytes from the remap area of NUMA node @nid. The
- * size of the remap area is predetermined by init_alloc_remap() and
- * only the callers considered there should call this function. For
- * more info, please read the comment on top of init_alloc_remap().
- *
- * The caller must be ready to handle allocation failure from this
- * function and fall back to regular memory allocator in such cases.
- *
- * CONTEXT:
- * Single CPU early boot context.
- *
- * RETURNS:
- * Pointer to the allocated memory on success, %NULL on failure.
- */
-void *alloc_remap(int nid, unsigned long size)
-{
- void *allocation = node_remap_alloc_vaddr[nid];
-
- size = ALIGN(size, L1_CACHE_BYTES);
-
- if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
- return NULL;
-
- node_remap_alloc_vaddr[nid] += size;
- memset(allocation, 0, size);
-
- return allocation;
-}
-
-#ifdef CONFIG_HIBERNATION
-/**
- * resume_map_numa_kva - add KVA mapping to the temporary page tables created
- * during resume from hibernation
- * @pgd_base - temporary resume page directory
- */
-void resume_map_numa_kva(pgd_t *pgd_base)
-{
- int node;
-
- for_each_online_node(node) {
- unsigned long start_va, start_pfn, nr_pages, pfn;
-
- start_va = (unsigned long)node_remap_start_vaddr[node];
- start_pfn = node_remap_start_pfn[node];
- nr_pages = (node_remap_end_vaddr[node] -
- node_remap_start_vaddr[node]) >> PAGE_SHIFT;
-
- printk(KERN_DEBUG "%s: node %d\n", __func__, node);
-
- for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
- unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
- pgd_t *pgd = pgd_base + pgd_index(vaddr);
- pud_t *pud = pud_offset(pgd, vaddr);
- pmd_t *pmd = pmd_offset(pud, vaddr);
-
- set_pmd(pmd, pfn_pmd(start_pfn + pfn,
- PAGE_KERNEL_LARGE_EXEC));
-
- printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
- __func__, vaddr, start_pfn + pfn);
- }
- }
-}
-#endif
-
-/**
- * init_alloc_remap - Initialize remap allocator for a NUMA node
- * @nid: NUMA node to initizlie remap allocator for
- *
- * NUMA nodes may end up without any lowmem. As allocating pgdat and
- * memmap on a different node with lowmem is inefficient, a special
- * remap allocator is implemented which can be used by alloc_remap().
- *
- * For each node, the amount of memory which will be necessary for
- * pgdat and memmap is calculated and two memory areas of the size are
- * allocated - one in the node and the other in lowmem; then, the area
- * in the node is remapped to the lowmem area.
- *
- * As pgdat and memmap must be allocated in lowmem anyway, this
- * doesn't waste lowmem address space; however, the actual lowmem
- * which gets remapped over is wasted. The amount shouldn't be
- * problematic on machines this feature will be used.
- *
- * Initialization failure isn't fatal. alloc_remap() is used
- * opportunistically and the callers will fall back to other memory
- * allocation mechanisms on failure.
- */
-void __init init_alloc_remap(int nid, u64 start, u64 end)
-{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long end_pfn = end >> PAGE_SHIFT;
- unsigned long size, pfn;
- u64 node_pa, remap_pa;
- void *remap_va;
-
- /*
- * The acpi/srat node info can show hot-add memroy zones where
- * memory could be added but not currently present.
- */
- printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
- nid, start_pfn, end_pfn);
-
- /* calculate the necessary space aligned to large page size */
- size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
- size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
- size = ALIGN(size, LARGE_PAGE_BYTES);
-
- /* allocate node memory and the lowmem remap area */
- node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
- if (node_pa == MEMBLOCK_ERROR) {
- pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
- size, nid);
- return;
- }
- memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
-
- remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
- max_low_pfn << PAGE_SHIFT,
- size, LARGE_PAGE_BYTES);
- if (remap_pa == MEMBLOCK_ERROR) {
- pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
- size, nid);
- memblock_x86_free_range(node_pa, node_pa + size);
- return;
- }
- memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
- remap_va = phys_to_virt(remap_pa);
-
- /* perform actual remap */
- for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
- set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
- (node_pa >> PAGE_SHIFT) + pfn,
- PAGE_KERNEL_LARGE);
-
- /* initialize remap allocator parameters */
- node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
- node_remap_start_vaddr[nid] = remap_va;
- node_remap_end_vaddr[nid] = remap_va + size;
- node_remap_alloc_vaddr[nid] = remap_va;
-
- printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
- nid, node_pa, node_pa + size, remap_va, remap_va + size);
-}
-
void __init initmem_init(void)
{
x86_numa_init();
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index 7178c3a..ad86ec9 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -21,12 +21,6 @@ void __init numa_reset_distance(void);
void __init x86_numa_init(void);
-#ifdef CONFIG_X86_64
-static inline void init_alloc_remap(int nid, u64 start, u64 end) { }
-#else
-void __init init_alloc_remap(int nid, u64 start, u64 end);
-#endif
-
#ifdef CONFIG_NUMA_EMU
void __init numa_emulation(struct numa_meminfo *numa_meminfo,
int numa_dist_cnt);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 899e393..86272f0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -588,10 +588,13 @@ void __init efi_enter_virtual_mode(void)
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+#ifdef CONFIG_X86_64
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+#endif
+ continue;
+ }
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9f808af..e11efbd 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -129,6 +129,21 @@ static void xen_vcpu_setup(int cpu)
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+ /*
+ * This path is called twice on PVHVM - first during bootup via
+ * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
+ * hotplugged: cpu_up -> xen_hvm_cpu_notify.
+ * As we can only do the VCPUOP_register_vcpu_info once lets
+ * not over-write its result.
+ *
+ * For PV it is called during restore (xen_vcpu_restore) and bootup
+ * (xen_setup_vcpu_info_placement). The hotplug mechanism does not
+ * use this function.
+ */
+ if (xen_hvm_domain()) {
+ if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
+ return;
+ }
if (cpu < MAX_VIRT_CPUS)
per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
@@ -1365,8 +1380,11 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
xen_vcpu_setup(cpu);
- if (xen_have_vector_callback)
+ if (xen_have_vector_callback) {
xen_init_lock_cpu(cpu);
+ if (xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_setup_timer(cpu);
+ }
break;
default:
break;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d957dce..a0aed70 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2011,6 +2011,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.lazy_mode = {
.enter = paravirt_enter_lazy_mmu,
.leave = xen_leave_lazy_mmu,
+ .flush = paravirt_flush_lazy_mmu,
},
.set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5158c50..19568a0 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -36,9 +36,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
/* snapshots of runstate info */
static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-/* unused ns of stolen and blocked time */
+/* unused ns of stolen time */
static DEFINE_PER_CPU(u64, xen_residual_stolen);
-static DEFINE_PER_CPU(u64, xen_residual_blocked);
/* return an consistent snapshot of 64-bit time/counter value */
static u64 get64(const u64 *p)
@@ -115,7 +114,7 @@ static void do_stolen_accounting(void)
{
struct vcpu_runstate_info state;
struct vcpu_runstate_info *snap;
- s64 blocked, runnable, offline, stolen;
+ s64 runnable, offline, stolen;
cputime_t ticks;
get_runstate_snapshot(&state);
@@ -125,7 +124,6 @@ static void do_stolen_accounting(void)
snap = &__get_cpu_var(xen_runstate_snapshot);
/* work out how much time the VCPU has not been runn*ing* */
- blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
@@ -141,17 +139,6 @@ static void do_stolen_accounting(void)
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
__this_cpu_write(xen_residual_stolen, stolen);
account_steal_ticks(ticks);
-
- /* Add the appropriate number of ticks of blocked time,
- including any left-overs from last time. */
- blocked += __this_cpu_read(xen_residual_blocked);
-
- if (blocked < 0)
- blocked = 0;
-
- ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
- __this_cpu_write(xen_residual_blocked, blocked);
- account_idle_ticks(ticks);
}
/* Get the TSC speed from Xen */
@@ -482,7 +469,11 @@ static void xen_hvm_setup_cpu_clockevents(void)
{
int cpu = smp_processor_id();
xen_setup_runstate_info(cpu);
- xen_setup_timer(cpu);
+ /*
+ * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
+ * doing it xen_hvm_cpu_notify (which gets called by smp_init during
+ * early bootup and also during CPU hotplug events).
+ */
xen_setup_cpu_clockevents();
}