diff options
-rw-r--r-- | arch/x86/kernel/entry_32.S | 1 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 7 | ||||
-rw-r--r-- | drivers/xen/cpu_hotplug.c | 4 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 130 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 48 | ||||
-rw-r--r-- | drivers/xen/privcmd.c | 89 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pciback.h | 2 |
7 files changed, 167 insertions, 114 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index ff84d54..6ed91d9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1065,7 +1065,6 @@ ENTRY(xen_failsafe_callback) lea 16(%esp),%esp CFI_ADJUST_CFA_OFFSET -16 jz 5f - addl $16,%esp jmp iret_exc 5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4f7d259..34bc4ce 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -432,13 +432,6 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); cpu_bringup(); - /* - * Balance out the preempt calls - as we are running in cpu_idle - * loop which has been called at bootup from cpu_bringup_and_idle. - * The cpucpu_bringup_and_idle called cpu_bringup which made a - * preempt_disable() So this preempt_enable will balance it out. - */ - preempt_enable(); } #else /* !CONFIG_HOTPLUG_CPU */ diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index 4dcfced..084041d 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -25,10 +25,10 @@ static void disable_hotplug_cpu(int cpu) static int vcpu_online(unsigned int cpu) { int err; - char dir[32], state[32]; + char dir[16], state[16]; sprintf(dir, "cpu/%u", cpu); - err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); + err = xenbus_scanf(XBT_NIL, dir, "availability", "%15s", state); if (err != 1) { if (!xen_initial_domain()) printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 2e22df2..3c8803f 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -56,10 +56,15 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " static atomic_t pages_mapped = ATOMIC_INIT(0); static int use_ptemod; +#define populate_freeable_maps use_ptemod struct gntdev_priv { + /* maps with visible offsets in the file descriptor */ struct list_head maps; - /* lock protects maps from concurrent changes */ + /* maps that are not visible; will be freed on munmap. + * Only populated if populate_freeable_maps == 1 */ + struct list_head freeable_maps; + /* lock protects maps and freeable_maps */ spinlock_t lock; struct mm_struct *mm; struct mmu_notifier mn; @@ -193,7 +198,7 @@ static struct grant_map *gntdev_find_map_index(struct gntdev_priv *priv, return NULL; } -static void gntdev_put_map(struct grant_map *map) +static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map) { if (!map) return; @@ -208,6 +213,12 @@ static void gntdev_put_map(struct grant_map *map) evtchn_put(map->notify.event); } + if (populate_freeable_maps && priv) { + spin_lock(&priv->lock); + list_del(&map->next); + spin_unlock(&priv->lock); + } + if (map->pages && !use_ptemod) unmap_grant_pages(map, 0, map->count); gntdev_free_map(map); @@ -301,17 +312,10 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { int pgno = (map->notify.addr >> PAGE_SHIFT); - if (pgno >= offset && pgno < offset + pages && use_ptemod) { - void __user *tmp = (void __user *) - map->vma->vm_start + map->notify.addr; - err = copy_to_user(tmp, &err, 1); - if (err) - return -EFAULT; - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; - } else if (pgno >= offset && pgno < offset + pages) { - uint8_t *tmp = kmap(map->pages[pgno]); + if (pgno >= offset && pgno < offset + pages) { + /* No need for kmap, pages are in lowmem */ + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; - kunmap(map->pages[pgno]); map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; } } @@ -376,11 +380,24 @@ static void gntdev_vma_open(struct vm_area_struct *vma) static void gntdev_vma_close(struct vm_area_struct *vma) { struct grant_map *map = vma->vm_private_data; + struct file *file = vma->vm_file; + struct gntdev_priv *priv = file->private_data; pr_debug("gntdev_vma_close %p\n", vma); - map->vma = NULL; + if (use_ptemod) { + /* It is possible that an mmu notifier could be running + * concurrently, so take priv->lock to ensure that the vma won't + * vanishing during the unmap_grant_pages call, since we will + * spin here until that completes. Such a concurrent call will + * not do any unmapping, since that has been done prior to + * closing the vma, but it may still iterate the unmap_ops list. + */ + spin_lock(&priv->lock); + map->vma = NULL; + spin_unlock(&priv->lock); + } vma->vm_private_data = NULL; - gntdev_put_map(map); + gntdev_put_map(priv, map); } static struct vm_operations_struct gntdev_vmops = { @@ -390,33 +407,43 @@ static struct vm_operations_struct gntdev_vmops = { /* ------------------------------------------------------------------ */ +static void unmap_if_in_range(struct grant_map *map, + unsigned long start, unsigned long end) +{ + unsigned long mstart, mend; + int err; + + if (!map->vma) + return; + if (map->vma->vm_start >= end) + return; + if (map->vma->vm_end <= start) + return; + mstart = max(start, map->vma->vm_start); + mend = min(end, map->vma->vm_end); + pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", + map->index, map->count, + map->vma->vm_start, map->vma->vm_end, + start, end, mstart, mend); + err = unmap_grant_pages(map, + (mstart - map->vma->vm_start) >> PAGE_SHIFT, + (mend - mstart) >> PAGE_SHIFT); + WARN_ON(err); +} + static void mn_invl_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) { struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); struct grant_map *map; - unsigned long mstart, mend; - int err; spin_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { - if (!map->vma) - continue; - if (map->vma->vm_start >= end) - continue; - if (map->vma->vm_end <= start) - continue; - mstart = max(start, map->vma->vm_start); - mend = min(end, map->vma->vm_end); - pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", - map->index, map->count, - map->vma->vm_start, map->vma->vm_end, - start, end, mstart, mend); - err = unmap_grant_pages(map, - (mstart - map->vma->vm_start) >> PAGE_SHIFT, - (mend - mstart) >> PAGE_SHIFT); - WARN_ON(err); + unmap_if_in_range(map, start, end); + } + list_for_each_entry(map, &priv->freeable_maps, next) { + unmap_if_in_range(map, start, end); } spin_unlock(&priv->lock); } @@ -445,6 +472,15 @@ static void mn_release(struct mmu_notifier *mn, err = unmap_grant_pages(map, /* offset */ 0, map->count); WARN_ON(err); } + list_for_each_entry(map, &priv->freeable_maps, next) { + if (!map->vma) + continue; + pr_debug("map %d+%d (%lx %lx)\n", + map->index, map->count, + map->vma->vm_start, map->vma->vm_end); + err = unmap_grant_pages(map, /* offset */ 0, map->count); + WARN_ON(err); + } spin_unlock(&priv->lock); } @@ -466,6 +502,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) return -ENOMEM; INIT_LIST_HEAD(&priv->maps); + INIT_LIST_HEAD(&priv->freeable_maps); spin_lock_init(&priv->lock); if (use_ptemod) { @@ -500,8 +537,9 @@ static int gntdev_release(struct inode *inode, struct file *flip) while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); list_del(&map->next); - gntdev_put_map(map); + gntdev_put_map(NULL /* already removed */, map); } + WARN_ON(!list_empty(&priv->freeable_maps)); if (use_ptemod) mmu_notifier_unregister(&priv->mn, priv->mm); @@ -529,14 +567,14 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, if (unlikely(atomic_add_return(op.count, &pages_mapped) > limit)) { pr_debug("can't map: over limit\n"); - gntdev_put_map(map); + gntdev_put_map(NULL, map); return err; } if (copy_from_user(map->grants, &u->refs, sizeof(map->grants[0]) * op.count) != 0) { - gntdev_put_map(map); - return err; + gntdev_put_map(NULL, map); + return -EFAULT; } spin_lock(&priv->lock); @@ -565,11 +603,13 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); + if (populate_freeable_maps) + list_add_tail(&map->next, &priv->freeable_maps); err = 0; } spin_unlock(&priv->lock); if (map) - gntdev_put_map(map); + gntdev_put_map(priv, map); return err; } @@ -579,25 +619,31 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv, struct ioctl_gntdev_get_offset_for_vaddr op; struct vm_area_struct *vma; struct grant_map *map; + int rv = -EINVAL; if (copy_from_user(&op, u, sizeof(op)) != 0) return -EFAULT; pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr); + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, op.vaddr); if (!vma || vma->vm_ops != &gntdev_vmops) - return -EINVAL; + goto out_unlock; map = vma->vm_private_data; if (!map) - return -EINVAL; + goto out_unlock; op.offset = map->index << PAGE_SHIFT; op.count = map->count; + rv = 0; - if (copy_to_user(u, &op, sizeof(op)) != 0) + out_unlock: + up_read(¤t->mm->mmap_sem); + + if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0) return -EFAULT; - return 0; + return rv; } static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) @@ -778,7 +824,7 @@ out_unlock_put: out_put_map: if (use_ptemod) map->vma = NULL; - gntdev_put_map(map); + gntdev_put_map(priv, map); return err; } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7038de5..157c0cc 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -56,10 +56,6 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define GREFS_PER_GRANT_FRAME \ -(grant_table_version == 1 ? \ -(PAGE_SIZE / sizeof(struct grant_entry_v1)) : \ -(PAGE_SIZE / sizeof(union grant_entry_v2))) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; @@ -154,6 +150,7 @@ static struct gnttab_ops *gnttab_interface; static grant_status_t *grstatus; static int grant_table_version; +static int grefs_per_grant_frame; static struct gnttab_free_callback *gnttab_free_callback_list; @@ -767,12 +764,14 @@ static int grow_gnttab_list(unsigned int more_frames) unsigned int new_nr_grant_frames, extra_entries, i; unsigned int nr_glist_frames, new_nr_glist_frames; + BUG_ON(grefs_per_grant_frame == 0); + new_nr_grant_frames = nr_grant_frames + more_frames; - extra_entries = more_frames * GREFS_PER_GRANT_FRAME; + extra_entries = more_frames * grefs_per_grant_frame; - nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; + nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP; new_nr_glist_frames = - (new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; + (new_nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP; for (i = nr_glist_frames; i < new_nr_glist_frames; i++) { gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); if (!gnttab_list[i]) @@ -780,12 +779,12 @@ static int grow_gnttab_list(unsigned int more_frames) } - for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; - i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + for (i = grefs_per_grant_frame * nr_grant_frames; + i < grefs_per_grant_frame * new_nr_grant_frames - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(i) = gnttab_free_head; - gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_head = grefs_per_grant_frame * nr_grant_frames; gnttab_free_count += extra_entries; nr_grant_frames = new_nr_grant_frames; @@ -957,7 +956,8 @@ EXPORT_SYMBOL_GPL(gnttab_unmap_refs); static unsigned nr_status_frames(unsigned nr_grant_frames) { - return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP; + BUG_ON(grefs_per_grant_frame == 0); + return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP; } static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes) @@ -1115,6 +1115,7 @@ static void gnttab_request_version(void) rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); if (rc == 0 && gsv.version == 2) { grant_table_version = 2; + grefs_per_grant_frame = PAGE_SIZE / sizeof(union grant_entry_v2); gnttab_interface = &gnttab_v2_ops; } else if (grant_table_version == 2) { /* @@ -1127,17 +1128,17 @@ static void gnttab_request_version(void) panic("we need grant tables version 2, but only version 1 is available"); } else { grant_table_version = 1; + grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1); gnttab_interface = &gnttab_v1_ops; } printk(KERN_INFO "Grant tables using version %d layout.\n", grant_table_version); } -int gnttab_resume(void) +static int gnttab_setup(void) { unsigned int max_nr_gframes; - gnttab_request_version(); max_nr_gframes = gnttab_max_grant_frames(); if (max_nr_gframes < nr_grant_frames) return -ENOSYS; @@ -1160,6 +1161,12 @@ int gnttab_resume(void) return 0; } +int gnttab_resume(void) +{ + gnttab_request_version(); + return gnttab_setup(); +} + int gnttab_suspend(void) { gnttab_interface->unmap_frames(); @@ -1171,9 +1178,10 @@ static int gnttab_expand(unsigned int req_entries) int rc; unsigned int cur, extra; + BUG_ON(grefs_per_grant_frame == 0); cur = nr_grant_frames; - extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / - GREFS_PER_GRANT_FRAME); + extra = ((req_entries + (grefs_per_grant_frame-1)) / + grefs_per_grant_frame); if (cur + extra > gnttab_max_grant_frames()) return -ENOSPC; @@ -1191,21 +1199,23 @@ int gnttab_init(void) unsigned int nr_init_grefs; int ret; + gnttab_request_version(); nr_grant_frames = 1; boot_max_nr_grant_frames = __max_nr_grant_frames(); /* Determine the maximum number of frames required for the * grant reference free list on the current hypervisor. */ + BUG_ON(grefs_per_grant_frame == 0); max_nr_glist_frames = (boot_max_nr_grant_frames * - GREFS_PER_GRANT_FRAME / RPP); + grefs_per_grant_frame / RPP); gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), GFP_KERNEL); if (gnttab_list == NULL) return -ENOMEM; - nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP; + nr_glist_frames = (nr_grant_frames * grefs_per_grant_frame + RPP - 1) / RPP; for (i = 0; i < nr_glist_frames; i++) { gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); if (gnttab_list[i] == NULL) { @@ -1214,12 +1224,12 @@ int gnttab_init(void) } } - if (gnttab_resume() < 0) { + if (gnttab_setup() < 0) { ret = -ENODEV; goto ini_nomem; } - nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; + nr_init_grefs = nr_grant_frames * grefs_per_grant_frame; for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_entry(i) = i + 1; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 0bbbccb..ca2b00e 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -199,9 +199,6 @@ static long privcmd_ioctl_mmap(void __user *udata) LIST_HEAD(pagelist); struct mmap_mfn_state state; - if (!xen_initial_domain()) - return -EPERM; - /* We only support privcmd_ioctl_mmap_batch for auto translated. */ if (xen_feature(XENFEAT_auto_translated_physmap)) return -ENOSYS; @@ -261,11 +258,12 @@ struct mmap_batch_state { * -ENOENT if at least 1 -ENOENT has happened. */ int global_error; - /* An array for individual errors */ - int *err; + int version; /* User-space mfn array to store errors in the second pass for V1. */ xen_pfn_t __user *user_mfn; + /* User-space int array to store errors in the second pass for V2. */ + int __user *user_err; }; /* auto translated dom0 note: if domU being created is PV, then mfn is @@ -288,7 +286,19 @@ static int mmap_batch_fn(void *data, void *state) &cur_page); /* Store error code for second pass. */ - *(st->err++) = ret; + if (st->version == 1) { + if (ret < 0) { + /* + * V1 encodes the error codes in the 32bit top nibble of the + * mfn (with its known limitations vis-a-vis 64 bit callers). + */ + *mfnp |= (ret == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + } + } else { /* st->version == 2 */ + *((int *) mfnp) = ret; + } /* And see if it affects the global_error. */ if (ret < 0) { @@ -305,20 +315,25 @@ static int mmap_batch_fn(void *data, void *state) return 0; } -static int mmap_return_errors_v1(void *data, void *state) +static int mmap_return_errors(void *data, void *state) { - xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - int err = *(st->err++); - /* - * V1 encodes the error codes in the 32bit top nibble of the - * mfn (with its known limitations vis-a-vis 64 bit callers). - */ - *mfnp |= (err == -ENOENT) ? - PRIVCMD_MMAPBATCH_PAGED_ERROR : - PRIVCMD_MMAPBATCH_MFN_ERROR; - return __put_user(*mfnp, st->user_mfn++); + if (st->version == 1) { + xen_pfn_t mfnp = *((xen_pfn_t *) data); + if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR) + return __put_user(mfnp, st->user_mfn++); + else + st->user_mfn++; + } else { /* st->version == 2 */ + int err = *((int *) data); + if (err) + return __put_user(err, st->user_err++); + else + st->user_err++; + } + + return 0; } /* Allocate pfns that are then mapped with gmfns from foreign domid. Update @@ -357,12 +372,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) struct vm_area_struct *vma; unsigned long nr_pages; LIST_HEAD(pagelist); - int *err_array = NULL; struct mmap_batch_state state; - if (!xen_initial_domain()) - return -EPERM; - switch (version) { case 1: if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) @@ -396,10 +407,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) goto out; } - err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); - if (err_array == NULL) { - ret = -ENOMEM; - goto out; + if (version == 2) { + /* Zero error array now to only copy back actual errors. */ + if (clear_user(m.err, sizeof(int) * m.num)) { + ret = -EFAULT; + goto out; + } } down_write(&mm->mmap_sem); @@ -427,7 +440,7 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) state.va = m.addr; state.index = 0; state.global_error = 0; - state.err = err_array; + state.version = version; /* mmap_batch_fn guarantees ret == 0 */ BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), @@ -435,21 +448,14 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) up_write(&mm->mmap_sem); - if (version == 1) { - if (state.global_error) { - /* Write back errors in second pass. */ - state.user_mfn = (xen_pfn_t *)m.arr; - state.err = err_array; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_return_errors_v1, &state); - } else - ret = 0; - - } else if (version == 2) { - ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); - if (ret) - ret = -EFAULT; - } + if (state.global_error) { + /* Write back errors in second pass. */ + state.user_mfn = (xen_pfn_t *)m.arr; + state.user_err = m.err; + ret = traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_return_errors, &state); + } else + ret = 0; /* If we have not had any EFAULT-like global errors then set the global * error to -ENOENT if necessary. */ @@ -457,7 +463,6 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version) ret = -ENOENT; out: - kfree(err_array); free_page_list(&pagelist); return ret; diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index a7def01..f72af87 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -124,7 +124,7 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev) { - if (xen_pcibk_backend && xen_pcibk_backend->free) + if (xen_pcibk_backend && xen_pcibk_backend->release) return xen_pcibk_backend->release(pdev, dev); } |