diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-15 13:46:29 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-15 13:46:29 +0200 |
commit | b2aaf8f74cdc84a9182f6cabf198b7763bcb9d40 (patch) | |
tree | 53ccb1c2c14751fe69cf93102e76e97021f6df07 /virt/kvm | |
parent | 4f962d4d65923d7b722192e729840cfb79af0a5a (diff) | |
parent | 278429cff8809958d25415ba0ed32b59866ab1a8 (diff) | |
download | kernel_samsung_espresso10-b2aaf8f74cdc84a9182f6cabf198b7763bcb9d40.zip kernel_samsung_espresso10-b2aaf8f74cdc84a9182f6cabf198b7763bcb9d40.tar.gz kernel_samsung_espresso10-b2aaf8f74cdc84a9182f6cabf198b7763bcb9d40.tar.bz2 |
Merge branch 'linus' into stackprotector
Conflicts:
arch/x86/kernel/Makefile
include/asm-x86/pda.h
Diffstat (limited to 'virt/kvm')
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 156 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 23 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 25 | ||||
-rw-r--r-- | virt/kvm/iodev.h | 8 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 320 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 18 |
6 files changed, 504 insertions, 46 deletions
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c new file mode 100644 index 0000000..5ae620d --- /dev/null +++ b/virt/kvm/coalesced_mmio.c @@ -0,0 +1,156 @@ +/* + * KVM coalesced MMIO + * + * Copyright (c) 2008 Bull S.A.S. + * + * Author: Laurent Vivier <Laurent.Vivier@bull.net> + * + */ + +#include "iodev.h" + +#include <linux/kvm_host.h> +#include <linux/kvm.h> + +#include "coalesced_mmio.h" + +static int coalesced_mmio_in_range(struct kvm_io_device *this, + gpa_t addr, int len, int is_write) +{ + struct kvm_coalesced_mmio_dev *dev = + (struct kvm_coalesced_mmio_dev*)this->private; + struct kvm_coalesced_mmio_zone *zone; + int next; + int i; + + if (!is_write) + return 0; + + /* kvm->lock is taken by the caller and must be not released before + * dev.read/write + */ + + /* Are we able to batch it ? */ + + /* last is the first free entry + * check if we don't meet the first used entry + * there is always one unused entry in the buffer + */ + + next = (dev->kvm->coalesced_mmio_ring->last + 1) % + KVM_COALESCED_MMIO_MAX; + if (next == dev->kvm->coalesced_mmio_ring->first) { + /* full */ + return 0; + } + + /* is it in a batchable area ? */ + + for (i = 0; i < dev->nb_zones; i++) { + zone = &dev->zone[i]; + + /* (addr,len) is fully included in + * (zone->addr, zone->size) + */ + + if (zone->addr <= addr && + addr + len <= zone->addr + zone->size) + return 1; + } + return 0; +} + +static void coalesced_mmio_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + struct kvm_coalesced_mmio_dev *dev = + (struct kvm_coalesced_mmio_dev*)this->private; + struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; + + /* kvm->lock must be taken by caller before call to in_range()*/ + + /* copy data in first free entry of the ring */ + + ring->coalesced_mmio[ring->last].phys_addr = addr; + ring->coalesced_mmio[ring->last].len = len; + memcpy(ring->coalesced_mmio[ring->last].data, val, len); + smp_wmb(); + ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; +} + +static void coalesced_mmio_destructor(struct kvm_io_device *this) +{ + kfree(this); +} + +int kvm_coalesced_mmio_init(struct kvm *kvm) +{ + struct kvm_coalesced_mmio_dev *dev; + + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + dev->dev.write = coalesced_mmio_write; + dev->dev.in_range = coalesced_mmio_in_range; + dev->dev.destructor = coalesced_mmio_destructor; + dev->dev.private = dev; + dev->kvm = kvm; + kvm->coalesced_mmio_dev = dev; + kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev); + + return 0; +} + +int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone) +{ + struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; + + if (dev == NULL) + return -EINVAL; + + mutex_lock(&kvm->lock); + if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { + mutex_unlock(&kvm->lock); + return -ENOBUFS; + } + + dev->zone[dev->nb_zones] = *zone; + dev->nb_zones++; + + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone) +{ + int i; + struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; + struct kvm_coalesced_mmio_zone *z; + + if (dev == NULL) + return -EINVAL; + + mutex_lock(&kvm->lock); + + i = dev->nb_zones; + while(i) { + z = &dev->zone[i - 1]; + + /* unregister all zones + * included in (zone->addr, zone->size) + */ + + if (zone->addr <= z->addr && + z->addr + z->size <= zone->addr + zone->size) { + dev->nb_zones--; + *z = dev->zone[dev->nb_zones]; + } + i--; + } + + mutex_unlock(&kvm->lock); + + return 0; +} diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h new file mode 100644 index 0000000..5ac0ec6 --- /dev/null +++ b/virt/kvm/coalesced_mmio.h @@ -0,0 +1,23 @@ +/* + * KVM coalesced MMIO + * + * Copyright (c) 2008 Bull S.A.S. + * + * Author: Laurent Vivier <Laurent.Vivier@bull.net> + * + */ + +#define KVM_COALESCED_MMIO_ZONE_MAX 100 + +struct kvm_coalesced_mmio_dev { + struct kvm_io_device dev; + struct kvm *kvm; + int nb_zones; + struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; +}; + +int kvm_coalesced_mmio_init(struct kvm *kvm); +int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone); +int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 1dcf9f3..c0d2287 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -146,6 +146,11 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic, return kvm_apic_set_irq(vcpu, vector, trig_mode); } +static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) +{ + kvm_inject_nmi(vcpu); +} + static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, u8 dest_mode) { @@ -239,8 +244,19 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) } } break; - - /* TODO: NMI */ + case IOAPIC_NMI: + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) + ioapic_inj_nmi(vcpu); + else + ioapic_debug("NMI to vcpu %d failed\n", + vcpu->vcpu_id); + } + break; default: printk(KERN_WARNING "Unsupported delivery mode %d\n", delivery_mode); @@ -278,7 +294,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi) ent->fields.remote_irr = 0; if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) - ioapic_deliver(ioapic, gsi); + ioapic_service(ioapic, gsi); } void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) @@ -291,7 +307,8 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) __kvm_ioapic_update_eoi(ioapic, i); } -static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index c14e642..55e8846 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h @@ -27,7 +27,8 @@ struct kvm_io_device { gpa_t addr, int len, const void *val); - int (*in_range)(struct kvm_io_device *this, gpa_t addr); + int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len, + int is_write); void (*destructor)(struct kvm_io_device *this); void *private; @@ -49,9 +50,10 @@ static inline void kvm_iodevice_write(struct kvm_io_device *dev, dev->write(dev, addr, len, val); } -static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr) +static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, + gpa_t addr, int len, int is_write) { - return dev->in_range(dev, addr); + return dev->in_range(dev, addr, len, is_write); } static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2d29e26..7dd9b0b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,6 +47,10 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#include "coalesced_mmio.h" +#endif + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -65,6 +69,8 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +bool kvm_rebooting; + static inline int valid_vcpu(int n) { return likely(n >= 0 && n < KVM_MAX_VCPUS); @@ -99,10 +105,11 @@ static void ack_flush(void *_completed) void kvm_flush_remote_tlbs(struct kvm *kvm) { - int i, cpu; + int i, cpu, me; cpumask_t cpus; struct kvm_vcpu *vcpu; + me = get_cpu(); cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; @@ -111,21 +118,24 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpu != -1 && cpu != raw_smp_processor_id()) + if (cpu != -1 && cpu != me) cpu_set(cpu, cpus); } if (cpus_empty(cpus)) - return; + goto out; ++kvm->stat.remote_tlb_flush; smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: + put_cpu(); } void kvm_reload_remote_mmus(struct kvm *kvm) { - int i, cpu; + int i, cpu, me; cpumask_t cpus; struct kvm_vcpu *vcpu; + me = get_cpu(); cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; @@ -134,12 +144,14 @@ void kvm_reload_remote_mmus(struct kvm *kvm) if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpu != -1 && cpu != raw_smp_processor_id()) + if (cpu != -1 && cpu != me) cpu_set(cpu, cpus); } if (cpus_empty(cpus)) - return; + goto out; smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: + put_cpu(); } @@ -180,13 +192,158 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + return container_of(mn, struct kvm, mmu_notifier); +} + +static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int need_tlb_flush; + + /* + * When ->invalidate_page runs, the linux pte has been zapped + * already but the page is still allocated until + * ->invalidate_page returns. So if we increase the sequence + * here the kvm page fault will notice if the spte can't be + * established because the page is going to be freed. If + * instead the kvm page fault establishes the spte before + * ->invalidate_page runs, kvm_unmap_hva will release it + * before returning. + * + * The sequence increase only need to be seen at spin_unlock + * time, and not at spin_lock time. + * + * Increasing the sequence after the spin_unlock would be + * unsafe because the kvm page fault could then establish the + * pte after kvm_unmap_hva returned, without noticing the page + * is going to be freed. + */ + spin_lock(&kvm->mmu_lock); + kvm->mmu_notifier_seq++; + need_tlb_flush = kvm_unmap_hva(kvm, address); + spin_unlock(&kvm->mmu_lock); + + /* we've to flush the tlb before the pages can be freed */ + if (need_tlb_flush) + kvm_flush_remote_tlbs(kvm); + +} + +static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int need_tlb_flush = 0; + + spin_lock(&kvm->mmu_lock); + /* + * The count increase must become visible at unlock time as no + * spte can be established without taking the mmu_lock and + * count is also read inside the mmu_lock critical section. + */ + kvm->mmu_notifier_count++; + for (; start < end; start += PAGE_SIZE) + need_tlb_flush |= kvm_unmap_hva(kvm, start); + spin_unlock(&kvm->mmu_lock); + + /* we've to flush the tlb before the pages can be freed */ + if (need_tlb_flush) + kvm_flush_remote_tlbs(kvm); +} + +static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + + spin_lock(&kvm->mmu_lock); + /* + * This sequence increase will notify the kvm page fault that + * the page that is going to be mapped in the spte could have + * been freed. + */ + kvm->mmu_notifier_seq++; + /* + * The above sequence increase must be visible before the + * below count decrease but both values are read by the kvm + * page fault under mmu_lock spinlock so we don't need to add + * a smb_wmb() here in between the two. + */ + kvm->mmu_notifier_count--; + spin_unlock(&kvm->mmu_lock); + + BUG_ON(kvm->mmu_notifier_count < 0); +} + +static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int young; + + spin_lock(&kvm->mmu_lock); + young = kvm_age_hva(kvm, address); + spin_unlock(&kvm->mmu_lock); + + if (young) + kvm_flush_remote_tlbs(kvm); + + return young; +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_page = kvm_mmu_notifier_invalidate_page, + .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, + .clear_flush_young = kvm_mmu_notifier_clear_flush_young, +}; +#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ + static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kvm_arch_create_vm(); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + struct page *page; +#endif if (IS_ERR(kvm)) goto out; +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + kfree(kvm); + return ERR_PTR(-ENOMEM); + } + kvm->coalesced_mmio_ring = + (struct kvm_coalesced_mmio_ring *)page_address(page); +#endif + +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + { + int err; + kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; + err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); + if (err) { +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + put_page(page); +#endif + kfree(kvm); + return ERR_PTR(err); + } + } +#endif + kvm->mm = current->mm; atomic_inc(&kvm->mm->mm_count); spin_lock_init(&kvm->mmu_lock); @@ -198,6 +355,9 @@ static struct kvm *kvm_create_vm(void) spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + kvm_coalesced_mmio_init(kvm); +#endif out: return kvm; } @@ -240,6 +400,13 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + if (kvm->coalesced_mmio_ring != NULL) + free_page((unsigned long)kvm->coalesced_mmio_ring); +#endif +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); +#endif kvm_arch_destroy_vm(kvm); mmdrop(mm); } @@ -333,6 +500,7 @@ int __kvm_set_memory_region(struct kvm *kvm, r = -ENOMEM; /* Allocate if a slot is being created */ +#ifndef CONFIG_S390 if (npages && !new.rmap) { new.rmap = vmalloc(npages * sizeof(struct page *)); @@ -342,7 +510,15 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.rmap, 0, npages * sizeof(*new.rmap)); new.user_alloc = user_alloc; - new.userspace_addr = mem->userspace_addr; + /* + * hva_to_rmmap() serialzies with the mmu_lock and to be + * safe it has to ignore memslots with !user_alloc && + * !userspace_addr. + */ + if (user_alloc) + new.userspace_addr = mem->userspace_addr; + else + new.userspace_addr = 0; } if (npages && !new.lpage_info) { int largepages = npages / KVM_PAGES_PER_HPAGE; @@ -373,15 +549,23 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; memset(new.dirty_bitmap, 0, dirty_bytes); } +#endif /* not defined CONFIG_S390 */ + + if (!npages) + kvm_arch_flush_shadow(kvm); + spin_lock(&kvm->mmu_lock); if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; *memslot = new; + spin_unlock(&kvm->mmu_lock); r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); if (r) { + spin_lock(&kvm->mmu_lock); *memslot = old; + spin_unlock(&kvm->mmu_lock); goto out_free; } @@ -532,6 +716,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) struct page *page[1]; unsigned long addr; int npages; + pfn_t pfn; might_sleep(); @@ -544,19 +729,38 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, NULL); - if (npages != 1) { - get_page(bad_page); - return page_to_pfn(bad_page); - } + if (unlikely(npages != 1)) { + struct vm_area_struct *vma; + + vma = find_vma(current->mm, addr); + if (vma == NULL || addr < vma->vm_start || + !(vma->vm_flags & VM_PFNMAP)) { + get_page(bad_page); + return page_to_pfn(bad_page); + } - return page_to_pfn(page[0]); + pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + BUG_ON(pfn_valid(pfn)); + } else + pfn = page_to_pfn(page[0]); + + return pfn; } EXPORT_SYMBOL_GPL(gfn_to_pfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { - return pfn_to_page(gfn_to_pfn(kvm, gfn)); + pfn_t pfn; + + pfn = gfn_to_pfn(kvm, gfn); + if (pfn_valid(pfn)) + return pfn_to_page(pfn); + + WARN_ON(!pfn_valid(pfn)); + + get_page(bad_page); + return bad_page; } EXPORT_SYMBOL_GPL(gfn_to_page); @@ -569,7 +773,8 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - put_page(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -594,21 +799,25 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { - struct page *page = pfn_to_page(pfn); - if (!PageReserved(page)) - SetPageDirty(page); + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page)) + SetPageDirty(page); + } } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - mark_page_accessed(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - get_page(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); @@ -799,6 +1008,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->arch.pio_data); #endif +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) + page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); +#endif else return VM_FAULT_SIGBUS; get_page(page); @@ -836,7 +1049,7 @@ static const struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu); + int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); if (fd < 0) kvm_put_kvm(vcpu->kvm); return fd; @@ -1121,6 +1334,32 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + case KVM_REGISTER_COALESCED_MMIO: { + struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; + if (copy_from_user(&zone, argp, sizeof zone)) + goto out; + r = -ENXIO; + r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); + if (r) + goto out; + r = 0; + break; + } + case KVM_UNREGISTER_COALESCED_MMIO: { + struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; + if (copy_from_user(&zone, argp, sizeof zone)) + goto out; + r = -ENXIO; + r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); + if (r) + goto out; + r = 0; + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -1169,7 +1408,7 @@ static int kvm_dev_ioctl_create_vm(void) kvm = kvm_create_vm(); if (IS_ERR(kvm)) return PTR_ERR(kvm); - fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm); + fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, 0); if (fd < 0) kvm_put_kvm(kvm); @@ -1179,7 +1418,6 @@ static int kvm_dev_ioctl_create_vm(void) static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - void __user *argp = (void __user *)arg; long r = -EINVAL; switch (ioctl) { @@ -1196,7 +1434,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension((long)argp); + r = kvm_dev_ioctl_check_extension(arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; @@ -1206,6 +1444,9 @@ static long kvm_dev_ioctl(struct file *filp, #ifdef CONFIG_X86 r += PAGE_SIZE; /* pio data page */ #endif +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + r += PAGE_SIZE; /* coalesced mmio ring page */ +#endif break; case KVM_TRACE_ENABLE: case KVM_TRACE_PAUSE: @@ -1247,7 +1488,6 @@ static void hardware_disable(void *junk) if (!cpu_isset(cpu, cpus_hardware_enabled)) return; cpu_clear(cpu, cpus_hardware_enabled); - decache_vcpus_on_cpu(cpu); kvm_arch_hardware_disable(NULL); } @@ -1266,17 +1506,29 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, case CPU_UP_CANCELED: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - smp_call_function_single(cpu, hardware_disable, NULL, 0, 1); + smp_call_function_single(cpu, hardware_disable, NULL, 1); break; case CPU_ONLINE: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - smp_call_function_single(cpu, hardware_enable, NULL, 0, 1); + smp_call_function_single(cpu, hardware_enable, NULL, 1); break; } return NOTIFY_OK; } + +asmlinkage void kvm_handle_fault_on_reboot(void) +{ + if (kvm_rebooting) + /* spin while reset goes on */ + while (true) + ; + /* Fault while not rebooting. We want the trace. */ + BUG(); +} +EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); + static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -1286,7 +1538,8 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * in vmx root mode. */ printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - on_each_cpu(hardware_disable, NULL, 0, 1); + kvm_rebooting = true; + on_each_cpu(hardware_disable, NULL, 1); } return NOTIFY_OK; } @@ -1312,14 +1565,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) } } -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, + gpa_t addr, int len, int is_write) { int i; for (i = 0; i < bus->dev_count; i++) { struct kvm_io_device *pos = bus->devs[i]; - if (pos->in_range(pos, addr)) + if (pos->in_range(pos, addr, len, is_write)) return pos; } @@ -1474,12 +1728,12 @@ int kvm_init(void *opaque, unsigned int vcpu_size, for_each_online_cpu(cpu) { smp_call_function_single(cpu, kvm_arch_check_processor_compat, - &r, 0, 1); + &r, 1); if (r < 0) goto out_free_1; } - on_each_cpu(hardware_enable, NULL, 0, 1); + on_each_cpu(hardware_enable, NULL, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_2; @@ -1525,7 +1779,7 @@ out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_2: - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); out_free_0: @@ -1547,7 +1801,7 @@ void kvm_exit(void) sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); - on_each_cpu(hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); kvm_exit_debug(); diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 0e49547..58141f3 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -72,11 +72,7 @@ static void kvm_add_trace(void *probe_private, void *call_data, rec.cycle_in = p->cycle_in; if (rec.cycle_in) { - u64 cycle = 0; - - cycle = get_cycles(); - rec.u.cycle.cycle_lo = (u32)cycle; - rec.u.cycle.cycle_hi = (u32)(cycle >> 32); + rec.u.cycle.cycle_u64 = get_cycles(); for (i = 0; i < rec.extra_u32; i++) rec.u.cycle.extra_u32[i] = va_arg(*args, u32); @@ -114,8 +110,18 @@ static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, { struct kvm_trace *kt; - if (!relay_buf_full(buf)) + if (!relay_buf_full(buf)) { + if (!prev_subbuf) { + /* + * executed only once when the channel is opened + * save metadata as first record + */ + subbuf_start_reserve(buf, sizeof(u32)); + *(u32 *)subbuf = 0x12345678; + } + return 1; + } kt = buf->chan->private_data; atomic_inc(&kt->lost_records); |