From e0edd3c65aa5b53e20280565a7ce11675eb7ed6b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Mar 2009 11:57:20 -0800 Subject: sysfs: don't block indefinitely for unmapped files. Modify sysfs bin files so that we can remove the bin file while they are still mapped. When the kobject is removed we unmap the bin file and arrange for future accesses to the mapping to receive SIGBUS. Implementing this prevents a nasty DOS when pci devices are hot plugged and unplugged. Where if any of their resources were mmaped the kernel could not free up their pci resources or release their pci data structures. [akpm@linux-foundation.org: remove unused var] Signed-off-by: Eric W. Biederman Cc: Jesse Barnes Acked-by: Tejun Heo Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/bin.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 171 insertions(+), 13 deletions(-) (limited to 'fs/sysfs/bin.c') diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index f2c478c..96cc2bf 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -21,15 +21,28 @@ #include #include #include +#include #include #include "sysfs.h" +/* + * There's one bin_buffer for each open file. + * + * filp->private_data points to bin_buffer and + * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s + * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock + */ +static DEFINE_MUTEX(sysfs_bin_lock); + struct bin_buffer { - struct mutex mutex; - void *buffer; - int mmapped; + struct mutex mutex; + void *buffer; + int mmapped; + struct vm_operations_struct *vm_ops; + struct file *file; + struct hlist_node list; }; static int @@ -168,29 +181,148 @@ out_free: return count; } +static void bin_vma_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + + if (!bb->vm_ops || !bb->vm_ops->open) + return; + + if (!sysfs_get_active_two(attr_sd)) + return; + + bb->vm_ops->open(vma); + + sysfs_put_active_two(attr_sd); +} + +static void bin_vma_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + + if (!bb->vm_ops || !bb->vm_ops->close) + return; + + if (!sysfs_get_active_two(attr_sd)) + return; + + bb->vm_ops->close(vma); + + sysfs_put_active_two(attr_sd); +} + +static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->fault) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active_two(attr_sd)) + return VM_FAULT_SIGBUS; + + ret = bb->vm_ops->fault(vma, vmf); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->page_mkwrite) + return -EINVAL; + + if (!sysfs_get_active_two(attr_sd)) + return -EINVAL; + + ret = bb->vm_ops->page_mkwrite(vma, page); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static int bin_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->access) + return -EINVAL; + + if (!sysfs_get_active_two(attr_sd)) + return -EINVAL; + + ret = bb->vm_ops->access(vma, addr, buf, len, write); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static struct vm_operations_struct bin_vm_ops = { + .open = bin_vma_open, + .close = bin_vma_close, + .fault = bin_fault, + .page_mkwrite = bin_page_mkwrite, + .access = bin_access, +}; + static int mmap(struct file *file, struct vm_area_struct *vma) { struct bin_buffer *bb = file->private_data; struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; + struct vm_operations_struct *vm_ops; int rc; mutex_lock(&bb->mutex); /* need attr_sd for attr, its parent for kobj */ + rc = -ENODEV; if (!sysfs_get_active_two(attr_sd)) - return -ENODEV; + goto out_unlock; rc = -EINVAL; - if (attr->mmap) - rc = attr->mmap(kobj, attr, vma); + if (!attr->mmap) + goto out_put; - if (rc == 0 && !bb->mmapped) - bb->mmapped = 1; - else - sysfs_put_active_two(attr_sd); + rc = attr->mmap(kobj, attr, vma); + vm_ops = vma->vm_ops; + vma->vm_ops = &bin_vm_ops; + if (rc) + goto out_put; + rc = -EINVAL; + if (bb->mmapped && bb->vm_ops != vma->vm_ops) + goto out_put; + +#ifdef CONFIG_NUMA + rc = -EINVAL; + if (vm_ops && ((vm_ops->set_policy || vm_ops->get_policy || vm_ops->migrate))) + goto out_put; +#endif + + rc = 0; + bb->mmapped = 1; + bb->vm_ops = vm_ops; +out_put: + sysfs_put_active_two(attr_sd); +out_unlock: mutex_unlock(&bb->mutex); return rc; @@ -223,8 +355,13 @@ static int open(struct inode * inode, struct file * file) goto err_out; mutex_init(&bb->mutex); + bb->file = file; file->private_data = bb; + mutex_lock(&sysfs_bin_lock); + hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers); + mutex_unlock(&sysfs_bin_lock); + /* open succeeded, put active references */ sysfs_put_active_two(attr_sd); return 0; @@ -237,11 +374,12 @@ static int open(struct inode * inode, struct file * file) static int release(struct inode * inode, struct file * file) { - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_buffer *bb = file->private_data; - if (bb->mmapped) - sysfs_put_active_two(attr_sd); + mutex_lock(&sysfs_bin_lock); + hlist_del(&bb->list); + mutex_unlock(&sysfs_bin_lock); + kfree(bb->buffer); kfree(bb); return 0; @@ -256,6 +394,26 @@ const struct file_operations bin_fops = { .release = release, }; + +void unmap_bin_file(struct sysfs_dirent *attr_sd) +{ + struct bin_buffer *bb; + struct hlist_node *tmp; + + if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) + return; + + mutex_lock(&sysfs_bin_lock); + + hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) { + struct inode *inode = bb->file->f_path.dentry->d_inode; + + unmap_mapping_range(inode->i_mapping, 0, 0, 1); + } + + mutex_unlock(&sysfs_bin_lock); +} + /** * sysfs_create_bin_file - create binary file for object. * @kobj: object. -- cgit v1.1 From 095160aee954688a9bad225952c4bee546541e19 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 23 Mar 2009 01:41:27 +0000 Subject: sysfs: fix some bin_vm_ops errors Commit 86c9508eb1c0ce5aa07b5cf1d36b60c54efc3d7a "sysfs: don't block indefinitely for unmapped files" in linux-next crashes the PowerMac G5 when X starts up. It's caught out by the way powerpc's pci_mmap of legacy_mem uses shmem_zero_setup(), substituting a new vma->vm_file whose private_data no longer points to the bin_buffer (substitution done because some versions of X crash if that mmap fails). The fix to this is straightforward: the original vm_file is fput() in that case, so this mmap won't block sysfs at all, so just don't switch over to bin_vm_ops if vm_file has changed. But more fixes made before realizing that was the problem:- It should not be an error if bin_page_mkwrite() finds no underlying page_mkwrite(). Check that a file already mmap'ed has the same underlying vm_ops _before_ pointing vma->vm_ops at bin_vm_ops. If the file being mmap'ed is a shmem/tmpfs file, don't fail the mmap on CONFIG_NUMA=y, just because that has a set_policy and get_policy: provide bin_set_policy, bin_get_policy and bin_migrate. Signed-off-by: Hugh Dickins Acked-by: Eric Biederman Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/bin.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 10 deletions(-) (limited to 'fs/sysfs/bin.c') diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 96cc2bf..07703d3 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -241,9 +241,12 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page) struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; int ret; - if (!bb->vm_ops || !bb->vm_ops->page_mkwrite) + if (!bb->vm_ops) return -EINVAL; + if (!bb->vm_ops->page_mkwrite) + return 0; + if (!sysfs_get_active_two(attr_sd)) return -EINVAL; @@ -273,12 +276,78 @@ static int bin_access(struct vm_area_struct *vma, unsigned long addr, return ret; } +#ifdef CONFIG_NUMA +static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->set_policy) + return 0; + + if (!sysfs_get_active_two(attr_sd)) + return -EINVAL; + + ret = bb->vm_ops->set_policy(vma, new); + + sysfs_put_active_two(attr_sd); + return ret; +} + +static struct mempolicy *bin_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct mempolicy *pol; + + if (!bb->vm_ops || !bb->vm_ops->get_policy) + return vma->vm_policy; + + if (!sysfs_get_active_two(attr_sd)) + return vma->vm_policy; + + pol = bb->vm_ops->get_policy(vma, addr); + + sysfs_put_active_two(attr_sd); + return pol; +} + +static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, + const nodemask_t *to, unsigned long flags) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops || !bb->vm_ops->migrate) + return 0; + + if (!sysfs_get_active_two(attr_sd)) + return 0; + + ret = bb->vm_ops->migrate(vma, from, to, flags); + + sysfs_put_active_two(attr_sd); + return ret; +} +#endif + static struct vm_operations_struct bin_vm_ops = { .open = bin_vma_open, .close = bin_vma_close, .fault = bin_fault, .page_mkwrite = bin_page_mkwrite, .access = bin_access, +#ifdef CONFIG_NUMA + .set_policy = bin_set_policy, + .get_policy = bin_get_policy, + .migrate = bin_migrate, +#endif }; static int mmap(struct file *file, struct vm_area_struct *vma) @@ -287,7 +356,6 @@ static int mmap(struct file *file, struct vm_area_struct *vma) struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - struct vm_operations_struct *vm_ops; int rc; mutex_lock(&bb->mutex); @@ -302,24 +370,25 @@ static int mmap(struct file *file, struct vm_area_struct *vma) goto out_put; rc = attr->mmap(kobj, attr, vma); - vm_ops = vma->vm_ops; - vma->vm_ops = &bin_vm_ops; if (rc) goto out_put; - rc = -EINVAL; - if (bb->mmapped && bb->vm_ops != vma->vm_ops) + /* + * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() + * to satisfy versions of X which crash if the mmap fails: that + * substitutes a new vm_file, and we don't then want bin_vm_ops. + */ + if (vma->vm_file != file) goto out_put; -#ifdef CONFIG_NUMA rc = -EINVAL; - if (vm_ops && ((vm_ops->set_policy || vm_ops->get_policy || vm_ops->migrate))) + if (bb->mmapped && bb->vm_ops != vma->vm_ops) goto out_put; -#endif rc = 0; bb->mmapped = 1; - bb->vm_ops = vm_ops; + bb->vm_ops = vma->vm_ops; + vma->vm_ops = &bin_vm_ops; out_put: sysfs_put_active_two(attr_sd); out_unlock: -- cgit v1.1