diff options
Diffstat (limited to 'fs')
75 files changed, 1234 insertions, 723 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 333a7bb..4fb8a34 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1215,12 +1215,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) res = __blkdev_get(bdev, mode, 0); - /* __blkdev_get() may alter read only status, check it afterwards */ - if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { - __blkdev_put(bdev, mode, 0); - res = -EACCES; - } - if (whole) { /* finish claiming */ mutex_lock(&bdev->bd_mutex); @@ -1298,6 +1292,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, if (err) return ERR_PTR(err); + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, mode); + return ERR_PTR(-EACCES); + } + return bdev; } EXPORT_SYMBOL(blkdev_get_by_path); diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 15b5ca2..9c94934 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) char *value = NULL; struct posix_acl *acl; + if (!IS_POSIXACL(inode)) + return NULL; + acl = get_cached_acl(inode, type); if (acl != ACL_NOT_CACHED) return acl; @@ -84,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, struct posix_acl *acl; int ret = 0; + if (!IS_POSIXACL(dentry->d_inode)) + return -EOPNOTSUPP; + acl = btrfs_get_acl(dentry->d_inode, type); if (IS_ERR(acl)) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f745287..4d2110e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -562,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, u64 em_len; u64 em_start; struct extent_map *em; - int ret; + int ret = -ENOMEM; u32 *sums; tree = &BTRFS_I(inode)->io_tree; @@ -577,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); + if (!cb) + goto out; + atomic_set(&cb->pending_bios, 0); cb->errors = 0; cb->inode = inode; @@ -597,13 +600,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE; - cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, + cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); + if (!cb->compressed_pages) + goto fail1; + bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; for (page_index = 0; page_index < nr_pages; page_index++) { cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!cb->compressed_pages[page_index]) + goto fail2; } cb->nr_pages = nr_pages; @@ -614,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->len = uncompressed_len; comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); + if (!comp_bio) + goto fail2; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; atomic_inc(&cb->pending_bios); @@ -681,6 +691,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_put(comp_bio); return 0; + +fail2: + for (page_index = 0; page_index < nr_pages; page_index++) + free_page((unsigned long)cb->compressed_pages[page_index]); + + kfree(cb->compressed_pages); +fail1: + kfree(cb); +out: + free_extent_map(em); + return ret; } static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; @@ -900,7 +921,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, return ret; } -void __exit btrfs_exit_compress(void) +void btrfs_exit_compress(void) { free_workspaces(); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b531c36..e1aa8d6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) tree = &BTRFS_I(page->mapping->host)->io_tree; - if (page->private == EXTENT_PAGE_PRIVATE) + if (page->private == EXTENT_PAGE_PRIVATE) { + WARN_ON(1); goto out; - if (!page->private) + } + if (!page->private) { + WARN_ON(1); goto out; + } len = page->private >> 2; WARN_ON(len == 0); @@ -1550,6 +1554,7 @@ static int transaction_kthread(void *arg) spin_unlock(&root->fs_info->new_trans_lock); trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); if (transid == trans->transid) { ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); @@ -2453,10 +2458,14 @@ int btrfs_commit_super(struct btrfs_root *root) up_write(&root->fs_info->cleanup_work_sem); trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); /* run commit again to drop the original snapshot */ trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); @@ -2554,6 +2563,8 @@ int close_ctree(struct btrfs_root *root) kfree(fs_info->chunk_root); kfree(fs_info->dev_root); kfree(fs_info->csum_root); + kfree(fs_info); + return 0; } diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9786963..ff27d7a 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -171,6 +171,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child) int ret; path = btrfs_alloc_path(); + if (!path) + return ERR_PTR(-ENOMEM); if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { key.objectid = root->root_key.objectid; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b552693..f3c96fc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -320,11 +320,6 @@ static int caching_kthread(void *data) if (!path) return -ENOMEM; - exclude_super_stripes(extent_root, block_group); - spin_lock(&block_group->space_info->lock); - block_group->space_info->bytes_readonly += block_group->bytes_super; - spin_unlock(&block_group->space_info->lock); - last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); /* @@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, cache->cached = BTRFS_CACHE_NO; } spin_unlock(&cache->lock); - if (ret == 1) + if (ret == 1) { + free_excluded_extents(fs_info->extent_root, cache); return 0; + } } if (load_cache_only) @@ -3344,8 +3341,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, u64 reserved; u64 max_reclaim; u64 reclaimed = 0; + long time_left; int pause = 1; int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; + int loops = 0; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; @@ -3358,7 +3357,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, max_reclaim = min(reserved, to_reclaim); - while (1) { + while (loops < 1024) { /* have the flusher threads jump in and do some IO */ smp_mb(); nr_pages = min_t(unsigned long, nr_pages, @@ -3366,8 +3365,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_reserved) + if (reserved > space_info->bytes_reserved) { + loops = 0; reclaimed += reserved - space_info->bytes_reserved; + } else { + loops++; + } reserved = space_info->bytes_reserved; spin_unlock(&space_info->lock); @@ -3378,7 +3381,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, return -EAGAIN; __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(pause); + time_left = schedule_timeout(pause); + + /* We were interrupted, exit */ + if (time_left) + break; + pause <<= 1; if (pause > HZ / 10) pause = HZ / 10; @@ -3588,8 +3596,20 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, if (num_bytes > 0) { if (dest) { - block_rsv_add_bytes(dest, num_bytes, 0); - } else { + spin_lock(&dest->lock); + if (!dest->full) { + u64 bytes_to_add; + + bytes_to_add = dest->size - dest->reserved; + bytes_to_add = min(num_bytes, bytes_to_add); + dest->reserved += bytes_to_add; + if (dest->reserved >= dest->size) + dest->full = 1; + num_bytes -= bytes_to_add; + } + spin_unlock(&dest->lock); + } + if (num_bytes) { spin_lock(&space_info->lock); space_info->bytes_reserved -= num_bytes; spin_unlock(&space_info->lock); @@ -4012,6 +4032,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); atomic_dec(&BTRFS_I(inode)->outstanding_extents); + WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); spin_lock(&BTRFS_I(inode)->accounting_lock); nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); @@ -5633,6 +5654,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 blocksize) { struct btrfs_block_rsv *block_rsv; + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; int ret; block_rsv = get_block_rsv(trans, root); @@ -5640,14 +5662,39 @@ use_block_rsv(struct btrfs_trans_handle *trans, if (block_rsv->size == 0) { ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 0); - if (ret) + /* + * If we couldn't reserve metadata bytes try and use some from + * the global reserve. + */ + if (ret && block_rsv != global_rsv) { + ret = block_rsv_use_bytes(global_rsv, blocksize); + if (!ret) + return global_rsv; + return ERR_PTR(ret); + } else if (ret) { return ERR_PTR(ret); + } return block_rsv; } ret = block_rsv_use_bytes(block_rsv, blocksize); if (!ret) return block_rsv; + if (ret) { + WARN_ON(1); + ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, + 0); + if (!ret) { + spin_lock(&block_rsv->lock); + block_rsv->size += blocksize; + spin_unlock(&block_rsv->lock); + return block_rsv; + } else if (ret && block_rsv != global_rsv) { + ret = block_rsv_use_bytes(global_rsv, blocksize); + if (!ret) + return global_rsv; + } + } return ERR_PTR(-ENOSPC); } @@ -6221,6 +6268,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, BUG_ON(!wc); trans = btrfs_start_transaction(tree_root, 0); + BUG_ON(IS_ERR(trans)); + if (block_rsv) trans->block_rsv = block_rsv; @@ -6318,6 +6367,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, btrfs_end_transaction_throttle(trans, tree_root); trans = btrfs_start_transaction(tree_root, 0); + BUG_ON(IS_ERR(trans)); if (block_rsv) trans->block_rsv = block_rsv; } @@ -6446,6 +6496,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start, int ret = 0; ra = kzalloc(sizeof(*ra), GFP_NOFS); + if (!ra) + return -ENOMEM; mutex_lock(&inode->i_mutex); first_index = start >> PAGE_CACHE_SHIFT; @@ -6531,7 +6583,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, u64 end = start + extent_key->offset - 1; em = alloc_extent_map(GFP_NOFS); - BUG_ON(!em || IS_ERR(em)); + BUG_ON(!em); em->start = start; em->len = extent_key->offset; @@ -7477,7 +7529,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) BUG_ON(reloc_root->commit_root != NULL); while (1) { trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); mutex_lock(&root->fs_info->drop_mutex); ret = btrfs_drop_snapshot(trans, reloc_root); @@ -7535,7 +7587,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) if (found) { trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); } @@ -7779,7 +7831,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, trans = btrfs_start_transaction(extent_root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); if (extent_key->objectid == 0) { ret = del_extent_zero(trans, extent_root, path, extent_key); @@ -8270,6 +8322,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) if (block_group->cached == BTRFS_CACHE_STARTED) wait_block_group_cache_done(block_group); + /* + * We haven't cached this block group, which means we could + * possibly have excluded extents on this block group. + */ + if (block_group->cached == BTRFS_CACHE_NO) + free_excluded_extents(info->extent_root, block_group); + btrfs_remove_free_space_cache(block_group); btrfs_put_block_group(block_group); @@ -8385,6 +8444,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->sectorsize = root->sectorsize; /* + * We need to exclude the super stripes now so that the space + * info has super bytes accounted for, otherwise we'll think + * we have more space than we actually do. + */ + exclude_super_stripes(root, cache); + + /* * check for two cases, either we are full, and therefore * don't need to bother with the caching work since we won't * find any space, or we are empty, and we can just add all @@ -8392,12 +8458,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) * time, particularly in the full case. */ if (found_key.offset == btrfs_block_group_used(&cache->item)) { - exclude_super_stripes(root, cache); cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; free_excluded_extents(root, cache); } else if (btrfs_block_group_used(&cache->item) == 0) { - exclude_super_stripes(root, cache); cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; add_new_free_space(cache, root->fs_info, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2e993cf..92ac519 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1865,7 +1865,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, bio_get(bio); if (tree->ops && tree->ops->submit_bio_hook) - tree->ops->submit_bio_hook(page->mapping->host, rw, bio, + ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, mirror_num, bio_flags, start); else submit_bio(rw, bio); @@ -1920,6 +1920,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, nr = bio_get_nr_vecs(bdev); bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) + return -ENOMEM; bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; @@ -1944,6 +1946,7 @@ void set_page_extent_mapped(struct page *page) static void set_page_extent_head(struct page *page, unsigned long len) { + WARN_ON(!PagePrivate(page)); set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); } @@ -2126,7 +2129,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, &bio_flags); if (bio) - submit_one_bio(READ, bio, 0, bio_flags); + ret = submit_one_bio(READ, bio, 0, bio_flags); return ret; } @@ -2819,9 +2822,17 @@ int try_release_extent_state(struct extent_map_tree *map, * at this point we can safely clear everything except the * locked bit and the nodatasum bit */ - clear_extent_bit(tree, start, end, + ret = clear_extent_bit(tree, start, end, ~(EXTENT_LOCKED | EXTENT_NODATASUM), 0, 0, NULL, mask); + + /* if clear_extent_bit failed for enomem reasons, + * we can't allow the release to continue. + */ + if (ret < 0) + ret = 0; + else + ret = 1; } return ret; } @@ -3192,7 +3203,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, } if (!PageUptodate(p)) uptodate = 0; - unlock_page(p); + + /* + * see below about how we avoid a nasty race with release page + * and why we unlock later + */ + if (i != 0) + unlock_page(p); } if (uptodate) set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); @@ -3216,9 +3233,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, atomic_inc(&eb->refs); spin_unlock(&tree->buffer_lock); radix_tree_preload_end(); + + /* + * there is a race where release page may have + * tried to find this extent buffer in the radix + * but failed. It will tell the VM it is safe to + * reclaim the, and it will clear the page private bit. + * We must make sure to set the page private bit properly + * after the extent buffer is in the radix tree so + * it doesn't get lost + */ + set_page_extent_mapped(eb->first_page); + set_page_extent_head(eb->first_page, eb->len); + if (!page0) + unlock_page(eb->first_page); return eb; free_eb: + if (eb->first_page && !page0) + unlock_page(eb->first_page); + if (!atomic_dec_and_test(&eb->refs)) return exists; btrfs_release_extent_buffer(eb); @@ -3269,10 +3303,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, continue; lock_page(page); + WARN_ON(!PagePrivate(page)); + + set_page_extent_mapped(page); if (i == 0) set_page_extent_head(page, eb->len); - else - set_page_private(page, EXTENT_PAGE_PRIVATE); clear_page_dirty_for_io(page); spin_lock_irq(&page->mapping->tree_lock); @@ -3462,6 +3497,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); + + WARN_ON(!PagePrivate(page)); + + set_page_extent_mapped(page); + if (i == 0) + set_page_extent_head(page, eb->len); + if (inc_all_pages) page_cache_get(page); if (!PageUptodate(page)) { diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b0e1fce..2b6c12e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; em = kmem_cache_alloc(extent_map_cache, mask); - if (!em || IS_ERR(em)) - return em; + if (!em) + return NULL; em->in_tree = 0; em->flags = 0; em->compress_type = BTRFS_COMPRESS_NONE; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a562a25..4f19a3e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, root = root->fs_info->csum_root; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; while (1) { key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; @@ -548,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, if (path->slots[0] == 0) goto out; path->slots[0]--; + } else if (ret < 0) { + goto out; } + leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c800d58..7084140 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -186,6 +186,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split = alloc_extent_map(GFP_NOFS); if (!split2) split2 = alloc_extent_map(GFP_NOFS); + BUG_ON(!split || !split2); write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); @@ -793,8 +794,12 @@ again: for (i = 0; i < num_pages; i++) { pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { - err = -ENOMEM; - BUG_ON(1); + int c; + for (c = i - 1; c >= 0; c--) { + unlock_page(pages[c]); + page_cache_release(pages[c]); + } + return -ENOMEM; } wait_on_page_writeback(pages[i]); } @@ -946,6 +951,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / (sizeof(struct page *))); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + goto out; + } /* generic_write_checks can change our pos */ start_pos = pos; @@ -984,8 +993,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, size_t write_bytes = min(iov_iter_count(&i), nrptrs * (size_t)PAGE_CACHE_SIZE - offset); - size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + size_t num_pages = (write_bytes + offset + + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs); @@ -1015,8 +1024,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, copied = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, &i); - dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; if (num_pages > dirty_pages) { if (copied > 0) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 60d6842..a039065 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, return entry; } -static void unlink_free_space(struct btrfs_block_group_cache *block_group, - struct btrfs_free_space *info) +static inline void +__unlink_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) { rb_erase(&info->offset_index, &block_group->free_space_offset); block_group->free_extents--; +} + +static void unlink_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info) +{ + __unlink_free_space(block_group, info); block_group->free_space -= info->bytes; } @@ -1016,14 +1023,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) u64 max_bytes; u64 bitmap_bytes; u64 extent_bytes; + u64 size = block_group->key.offset; /* * The goal is to keep the total amount of memory used per 1gb of space * at or below 32k, so we need to adjust how much memory we allow to be * used by extent based free space tracking */ - max_bytes = MAX_CACHE_BYTES_PER_GIG * - (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); + if (size < 1024 * 1024 * 1024) + max_bytes = MAX_CACHE_BYTES_PER_GIG; + else + max_bytes = MAX_CACHE_BYTES_PER_GIG * + div64_u64(size, 1024 * 1024 * 1024); /* * we want to account for 1 more bitmap than what we have so we can make @@ -1171,6 +1182,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, recalculate_thresholds(block_group); } +static void free_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *bitmap_info) +{ + unlink_free_space(block_group, bitmap_info); + kfree(bitmap_info->bitmap); + kfree(bitmap_info); + block_group->total_bitmaps--; + recalculate_thresholds(block_group); +} + static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_space *bitmap_info, u64 *offset, u64 *bytes) @@ -1195,6 +1216,7 @@ again: */ search_start = *offset; search_bytes = *bytes; + search_bytes = min(search_bytes, end - search_start + 1); ret = search_bitmap(block_group, bitmap_info, &search_start, &search_bytes); BUG_ON(ret < 0 || search_start != *offset); @@ -1211,13 +1233,8 @@ again: if (*bytes) { struct rb_node *next = rb_next(&bitmap_info->offset_index); - if (!bitmap_info->bytes) { - unlink_free_space(block_group, bitmap_info); - kfree(bitmap_info->bitmap); - kfree(bitmap_info); - block_group->total_bitmaps--; - recalculate_thresholds(block_group); - } + if (!bitmap_info->bytes) + free_bitmap(block_group, bitmap_info); /* * no entry after this bitmap, but we still have bytes to @@ -1250,13 +1267,8 @@ again: return -EAGAIN; goto again; - } else if (!bitmap_info->bytes) { - unlink_free_space(block_group, bitmap_info); - kfree(bitmap_info->bitmap); - kfree(bitmap_info); - block_group->total_bitmaps--; - recalculate_thresholds(block_group); - } + } else if (!bitmap_info->bytes) + free_bitmap(block_group, bitmap_info); return 0; } @@ -1359,22 +1371,14 @@ out: return ret; } -int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, - u64 offset, u64 bytes) +bool try_merge_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_free_space *info, bool update_stat) { - struct btrfs_free_space *right_info = NULL; - struct btrfs_free_space *left_info = NULL; - struct btrfs_free_space *info = NULL; - int ret = 0; - - info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); - if (!info) - return -ENOMEM; - - info->offset = offset; - info->bytes = bytes; - - spin_lock(&block_group->tree_lock); + struct btrfs_free_space *left_info; + struct btrfs_free_space *right_info; + bool merged = false; + u64 offset = info->offset; + u64 bytes = info->bytes; /* * first we want to see if there is free space adjacent to the range we @@ -1388,37 +1392,62 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, else left_info = tree_search_offset(block_group, offset - 1, 0, 0); - /* - * If there was no extent directly to the left or right of this new - * extent then we know we're going to have to allocate a new extent, so - * before we do that see if we need to drop this into a bitmap - */ - if ((!left_info || left_info->bitmap) && - (!right_info || right_info->bitmap)) { - ret = insert_into_bitmap(block_group, info); - - if (ret < 0) { - goto out; - } else if (ret) { - ret = 0; - goto out; - } - } - if (right_info && !right_info->bitmap) { - unlink_free_space(block_group, right_info); + if (update_stat) + unlink_free_space(block_group, right_info); + else + __unlink_free_space(block_group, right_info); info->bytes += right_info->bytes; kfree(right_info); + merged = true; } if (left_info && !left_info->bitmap && left_info->offset + left_info->bytes == offset) { - unlink_free_space(block_group, left_info); + if (update_stat) + unlink_free_space(block_group, left_info); + else + __unlink_free_space(block_group, left_info); info->offset = left_info->offset; info->bytes += left_info->bytes; kfree(left_info); + merged = true; } + return merged; +} + +int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, + u64 offset, u64 bytes) +{ + struct btrfs_free_space *info; + int ret = 0; + + info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); + if (!info) + return -ENOMEM; + + info->offset = offset; + info->bytes = bytes; + + spin_lock(&block_group->tree_lock); + + if (try_merge_free_space(block_group, info, true)) + goto link; + + /* + * There was no extent directly to the left or right of this new + * extent then we know we're going to have to allocate a new extent, so + * before we do that see if we need to drop this into a bitmap + */ + ret = insert_into_bitmap(block_group, info); + if (ret < 0) { + goto out; + } else if (ret) { + ret = 0; + goto out; + } +link: ret = link_free_space(block_group, info); if (ret) kfree(info); @@ -1621,6 +1650,7 @@ __btrfs_return_cluster_to_free_space( node = rb_next(&entry->offset_index); rb_erase(&entry->offset_index, &cluster->root); BUG_ON(entry->bitmap); + try_merge_free_space(block_group, entry, false); tree_insert_offset(&block_group->free_space_offset, entry->offset, &entry->offset_index, 0); } @@ -1685,13 +1715,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, ret = offset; if (entry->bitmap) { bitmap_clear_bits(block_group, entry, offset, bytes); - if (!entry->bytes) { - unlink_free_space(block_group, entry); - kfree(entry->bitmap); - kfree(entry); - block_group->total_bitmaps--; - recalculate_thresholds(block_group); - } + if (!entry->bytes) + free_bitmap(block_group, entry); } else { unlink_free_space(block_group, entry); entry->offset += bytes; @@ -1789,6 +1814,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, ret = search_start; bitmap_clear_bits(block_group, entry, ret, bytes); + if (entry->bytes == 0) + free_bitmap(block_group, entry); out: spin_unlock(&cluster->lock); spin_unlock(&block_group->tree_lock); @@ -1842,15 +1869,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, entry->offset += bytes; entry->bytes -= bytes; - if (entry->bytes == 0) { + if (entry->bytes == 0) rb_erase(&entry->offset_index, &cluster->root); - kfree(entry); - } break; } out: spin_unlock(&cluster->lock); + if (!ret) + return 0; + + spin_lock(&block_group->tree_lock); + + block_group->free_space -= bytes; + if (entry->bytes == 0) { + block_group->free_extents--; + kfree(entry); + } + + spin_unlock(&block_group->tree_lock); + return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 160b55b..fb9bd78 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -416,7 +416,7 @@ again: } if (start == 0) { trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -612,6 +612,7 @@ retry: GFP_NOFS); trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, @@ -643,6 +644,7 @@ retry: async_extent->ram_size - 1, 0); em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em); em->start = async_extent->start; em->len = async_extent->ram_size; em->orig_start = em->start; @@ -771,7 +773,7 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(root == root->fs_info->tree_root); trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -819,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(ret); em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em); em->start = start; em->orig_start = em->start; ram_size = ins.offset; @@ -1049,7 +1052,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, } else { trans = btrfs_join_transaction(root, 1); } - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); cow_start = (u64)-1; cur_offset = start; @@ -1168,6 +1171,7 @@ out_check: struct extent_map_tree *em_tree; em_tree = &BTRFS_I(inode)->extent_tree; em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em); em->start = cur_offset; em->orig_start = em->start; em->len = num_bytes; @@ -1557,6 +1561,7 @@ out: out_page: unlock_page(page); page_cache_release(page); + kfree(fixup); } /* @@ -1703,7 +1708,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans = btrfs_join_transaction_nolock(root, 1); else trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode(trans, root, inode); @@ -1720,6 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans = btrfs_join_transaction_nolock(root, 1); else trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -2354,6 +2360,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) */ if (is_bad_inode(inode)) { trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); btrfs_orphan_del(trans, inode); btrfs_end_transaction(trans, root); iput(inode); @@ -2381,6 +2388,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) if (root->orphan_block_rsv || root->orphan_item_inserted) { trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); btrfs_end_transaction(trans, root); } @@ -2641,7 +2649,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; - goto err; + goto out; } path->leave_spinning = 1; @@ -2714,9 +2722,10 @@ static int check_path_shared(struct btrfs_root *root, struct extent_buffer *eb; int level; u64 refs = 1; - int uninitialized_var(ret); for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + int ret; + if (!path->nodes[level]) break; eb = path->nodes[level]; @@ -2727,7 +2736,7 @@ static int check_path_shared(struct btrfs_root *root, if (refs > 1) return 1; } - return ret; /* XXX callers? */ + return 0; } /* @@ -4134,7 +4143,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) } srcu_read_unlock(&root->fs_info->subvol_srcu, index); - if (root != sub_root) { + if (!IS_ERR(inode) && root != sub_root) { down_read(&root->fs_info->cleanup_work_sem); if (!(inode->i_sb->s_flags & MS_RDONLY)) btrfs_orphan_cleanup(sub_root); @@ -4347,6 +4356,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) trans = btrfs_join_transaction_nolock(root, 1); else trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_set_trans_block_group(trans, inode); if (nolock) ret = btrfs_end_transaction_nolock(trans, root); @@ -4372,6 +4383,7 @@ void btrfs_dirty_inode(struct inode *inode) return; trans = btrfs_join_transaction(root, 1); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); @@ -5176,6 +5188,8 @@ again: em = NULL; btrfs_release_path(root, path); trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return ERR_CAST(trans); goto again; } map = kmap(page); @@ -5280,8 +5294,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, btrfs_drop_extent_cache(inode, start, start + len - 1, 0); trans = btrfs_join_transaction(root, 0); - if (!trans) - return ERR_PTR(-ENOMEM); + if (IS_ERR(trans)) + return ERR_CAST(trans); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -5505,7 +5519,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * while we look for nocow cross refs */ trans = btrfs_join_transaction(root, 0); - if (!trans) + if (IS_ERR(trans)) goto must_cow; if (can_nocow_odirect(trans, inode, start, len) == 1) { @@ -5640,7 +5654,7 @@ again: BUG_ON(!ordered); trans = btrfs_join_transaction(root, 1); - if (!trans) { + if (IS_ERR(trans)) { err = -ENOMEM; goto out; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a506a22..be2d4f6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -203,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) trans = btrfs_join_transaction(root, 1); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); @@ -907,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out_unlock; + } ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); } else { @@ -1898,7 +1902,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, memcpy(&new_key, &key, sizeof(new_key)); new_key.objectid = inode->i_ino; - new_key.offset = key.offset + destoff - off; + if (off <= key.offset) + new_key.offset = key.offset + destoff - off; + else + new_key.offset = destoff; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { @@ -2082,7 +2089,7 @@ static long btrfs_ioctl_trans_start(struct file *file) ret = -ENOMEM; trans = btrfs_start_ioctl_transaction(root, 0); - if (!trans) + if (IS_ERR(trans)) goto out_drop; file->private_data = trans; @@ -2138,9 +2145,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) path->leave_spinning = 1; trans = btrfs_start_transaction(root, 1); - if (!trans) { + if (IS_ERR(trans)) { btrfs_free_path(path); - return -ENOMEM; + return PTR_ERR(trans); } dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); @@ -2201,7 +2208,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) int num_types = 4; int alloc_size; int ret = 0; - int slot_count = 0; + u64 slot_count = 0; int i, c; if (copy_from_user(&space_args, @@ -2240,7 +2247,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) goto out; } - slot_count = min_t(int, space_args.space_slots, slot_count); + slot_count = min_t(u64, space_args.space_slots, slot_count); alloc_size = sizeof(*dest) * slot_count; @@ -2260,6 +2267,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) for (i = 0; i < num_types; i++) { struct btrfs_space_info *tmp; + if (!slot_count) + break; + info = NULL; rcu_read_lock(); list_for_each_entry_rcu(tmp, &root->fs_info->space_info, @@ -2281,7 +2291,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) memcpy(dest, &space, sizeof(space)); dest++; space_args.total_spaces++; + slot_count--; } + if (!slot_count) + break; } up_read(&info->groups_sem); } @@ -2334,6 +2347,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp u64 transid; trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); transid = trans->transid; btrfs_commit_transaction_async(trans, root, 0); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2b61e1d..083a554 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, u64 file_offset) { struct rb_root *root = &tree->tree; - struct rb_node *prev; + struct rb_node *prev = NULL; struct rb_node *ret; struct btrfs_ordered_extent *entry; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0d126be..fb2605d 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) #else BUG(); #endif + break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 045c9c2..0825e4e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, new_node->bytenr = dest->node->start; new_node->level = node->level; new_node->lowest = node->lowest; + new_node->checked = 1; new_node->root = dest; if (!node->lowest) { @@ -2028,6 +2029,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, while (1) { trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); trans->block_rsv = rc->block_rsv; ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, @@ -2147,6 +2149,12 @@ again: } trans = btrfs_join_transaction(rc->extent_root, 1); + if (IS_ERR(trans)) { + if (!err) + btrfs_block_rsv_release(rc->extent_root, + rc->block_rsv, num_bytes); + return PTR_ERR(trans); + } if (!err) { if (num_bytes != rc->merging_rsv_size) { @@ -3222,6 +3230,7 @@ truncate: trans = btrfs_join_transaction(root, 0); if (IS_ERR(trans)) { btrfs_free_path(path); + ret = PTR_ERR(trans); goto out; } @@ -3628,6 +3637,7 @@ int prepare_to_relocate(struct reloc_control *rc) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); + BUG_ON(IS_ERR(trans)); btrfs_commit_transaction(trans, rc->extent_root); return 0; } @@ -3657,6 +3667,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) while (1) { trans = btrfs_start_transaction(rc->extent_root, 0); + BUG_ON(IS_ERR(trans)); if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans, rc->extent_root); @@ -3804,7 +3815,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + btrfs_commit_transaction(trans, rc->extent_root); out_free: btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); btrfs_free_path(path); @@ -4022,6 +4036,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) int ret; trans = btrfs_start_transaction(root->fs_info->tree_root, 0); + BUG_ON(IS_ERR(trans)); memset(&root->root_item.drop_progress, 0, sizeof(root->root_item.drop_progress)); @@ -4125,6 +4140,11 @@ int btrfs_recover_relocation(struct btrfs_root *root) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + err = PTR_ERR(trans); + goto out_free; + } rc->merge_reloc_tree = 1; @@ -4154,9 +4174,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); -out: + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + btrfs_commit_transaction(trans, rc->extent_root); +out_free: kfree(rc); +out: while (!list_empty(&reloc_roots)) { reloc_root = list_entry(reloc_roots.next, struct btrfs_root, root_list); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b2130c4..a004008 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -383,7 +383,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; - char *opts, *p; + char *opts, *orig, *p; int error = 0; int intarg; @@ -397,6 +397,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, opts = kstrdup(options, GFP_KERNEL); if (!opts) return -ENOMEM; + orig = opts; while ((p = strsep(&opts, ",")) != NULL) { int token; @@ -432,7 +433,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, } out_free_opts: - kfree(opts); + kfree(orig); out: /* * If no subvolume name is specified we use the default one. Allocate @@ -623,6 +624,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) btrfs_wait_ordered_extents(root, 0, 0); trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); return ret; } @@ -761,6 +764,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } btrfs_close_devices(fs_devices); + kfree(fs_info); + kfree(tree_root); } else { char b[BDEVNAME_SIZE]; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bae5c7b..3d73c8d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1161,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, INIT_DELAYED_WORK(&ac->work, do_async_commit); ac->root = root; ac->newtrans = btrfs_join_transaction(root, 0); + if (IS_ERR(ac->newtrans)) { + int err = PTR_ERR(ac->newtrans); + kfree(ac); + return err; + } /* take transaction reference */ mutex_lock(&root->fs_info->trans_mutex); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 054744a..a4bbb85 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, } dst_copy = kmalloc(item_size, GFP_NOFS); src_copy = kmalloc(item_size, GFP_NOFS); + if (!dst_copy || !src_copy) { + btrfs_release_path(root, path); + kfree(dst_copy); + kfree(src_copy); + return -ENOMEM; + } read_extent_buffer(eb, src_copy, src_ptr, item_size); @@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(leaf, di, &location); name_len = btrfs_dir_name_len(leaf, di); name = kmalloc(name_len, GFP_NOFS); + if (!name) + return -ENOMEM; + read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); btrfs_release_path(root, path); @@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log, int match = 0; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + ret = btrfs_search_slot(NULL, log, key, path, 0, 0); if (ret != 0) goto out; @@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, key.offset = (u64)-1; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); + if (!name) + return -ENOMEM; + log_type = btrfs_dir_type(eb, di); read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len); @@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); next = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!next) + return -ENOMEM; if (*level == 1) { wc->process_func(root, next, wc, ptr_gen); @@ -2032,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, wait_log_commit(trans, log_root_tree, log_root_tree->log_transid); mutex_unlock(&log_root_tree->log_mutex); + ret = 0; goto out; } atomic_set(&log_root_tree->log_commit[index2], 1); @@ -2096,7 +2116,7 @@ out: smp_mb(); if (waitqueue_active(&root->log_commit_wait[index1])) wake_up(&root->log_commit_wait[index1]); - return 0; + return ret; } static void free_log_tree(struct btrfs_trans_handle *trans, @@ -2194,6 +2214,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, log = root->log_root; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -2594,6 +2617,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); + if (!ins_data) + return -ENOMEM; + ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); @@ -2725,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, log = root->log_root; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; dst_path = btrfs_alloc_path(); + if (!dst_path) { + btrfs_free_path(path); + return -ENOMEM; + } min_key.objectid = inode->i_ino; min_key.type = BTRFS_INODE_ITEM_KEY; @@ -3080,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) BUG_ON(!path); trans = btrfs_start_transaction(fs_info->tree_root, 0); + BUG_ON(IS_ERR(trans)); wc.trans = trans; wc.pin = 1; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d158530..af7dbca 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1213,6 +1213,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, return -ENOMEM; trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; @@ -1601,11 +1605,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = find_next_devid(root, &device->devid); if (ret) { + kfree(device->name); kfree(device); goto error; } trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + kfree(device->name); + kfree(device); + ret = PTR_ERR(trans); + goto error; + } + lock_chunks(root); device->writeable = 1; @@ -1873,7 +1885,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, return ret; trans = btrfs_start_transaction(root, 0); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); lock_chunks(root); @@ -2047,7 +2059,7 @@ int btrfs_balance(struct btrfs_root *dev_root) BUG_ON(ret); trans = btrfs_start_transaction(dev_root, 0); - BUG_ON(!trans); + BUG_ON(IS_ERR(trans)); ret = btrfs_grow_device(trans, device, old_size); BUG_ON(ret); @@ -2213,6 +2225,11 @@ again: /* Shrinking succeeded, else we would be at "done". */ trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto done; + } + lock_chunks(root); device->disk_total_bytes = new_size; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0bc68de..f0aef78 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -60,6 +60,7 @@ int ceph_init_dentry(struct dentry *dentry) } di->dentry = dentry; di->lease_session = NULL; + di->parent_inode = igrab(dentry->d_parent->d_inode); dentry->d_fsdata = di; dentry->d_time = jiffies; ceph_dentry_lru_add(dentry); @@ -1033,7 +1034,7 @@ static void ceph_dentry_release(struct dentry *dentry) u64 snapid = CEPH_NOSNAP; if (!IS_ROOT(dentry)) { - parent_inode = dentry->d_parent->d_inode; + parent_inode = di->parent_inode; if (parent_inode) snapid = ceph_snap(parent_inode); } @@ -1058,6 +1059,8 @@ static void ceph_dentry_release(struct dentry *dentry) kmem_cache_free(ceph_dentry_cachep, di); dentry->d_fsdata = NULL; } + if (parent_inode) + iput(parent_inode); } static int ceph_snapdir_d_revalidate(struct dentry *dentry, diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 39c243a..f40b913 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) if (lastinode) iput(lastinode); - dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); - list_for_each_entry(child, &realm->children, child_item) - queue_realm_cap_snaps(child); + list_for_each_entry(child, &realm->children, child_item) { + dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n", + realm, realm->ino, child, child->ino); + list_del_init(&child->dirty_item); + list_add(&child->dirty_item, &realm->dirty_item); + } + list_del_init(&realm->dirty_item); dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); } @@ -683,7 +687,9 @@ more: * queue cap snaps _after_ we've built the new snap contexts, * so that i_head_snapc can be set appropriately. */ - list_for_each_entry(realm, &dirty_realms, dirty_item) { + while (!list_empty(&dirty_realms)) { + realm = list_first_entry(&dirty_realms, struct ceph_snap_realm, + dirty_item); queue_realm_cap_snaps(realm); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 20b907d..88fcaa2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -207,6 +207,7 @@ struct ceph_dentry_info { struct dentry *dentry; u64 time; u64 offset; + struct inode *parent_inode; }; struct ceph_inode_xattrs_info { diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index ee45648..7cb0f7f 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -3,6 +3,7 @@ config CIFS depends on INET select NLS select CRYPTO + select CRYPTO_MD4 select CRYPTO_MD5 select CRYPTO_HMAC select CRYPTO_ARC4 diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index f1c6862..0a265ad 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -282,8 +282,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) cFYI(1, "in %s", __func__); BUG_ON(IS_ROOT(mntpt)); - xid = GetXid(); - /* * The MSDFS spec states that paths in DFS referral requests and * responses must be prefixed by a single '\' character instead of @@ -293,7 +291,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) mnt = ERR_PTR(-ENOMEM); full_path = build_path_from_dentry(mntpt); if (full_path == NULL) - goto free_xid; + goto cdda_exit; cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); @@ -303,9 +301,11 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) } ses = tlink_tcon(tlink)->ses; + xid = GetXid(); rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + FreeXid(xid); cifs_put_tlink(tlink); @@ -338,8 +338,7 @@ success: free_dfs_info_array(referrals, num_referrals); free_full_path: kfree(full_path); -free_xid: - FreeXid(xid); +cdda_exit: cFYI(1, "leaving %s" , __func__); return mnt; } diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 1e7636b..beeebf1 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -372,6 +372,10 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), GFP_KERNEL); + if (!ppace) { + cERROR(1, "DACL memory allocation error"); + return; + } for (i = 0; i < num_aces; ++i) { ppace[i] = (struct cifs_ace *) (acl_base + acl_size); diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 0db5f1d..a51585f 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -657,9 +657,10 @@ calc_seckey(struct cifsSesInfo *ses) get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (!tfm_arc4 || IS_ERR(tfm_arc4)) { + if (IS_ERR(tfm_arc4)) { + rc = PTR_ERR(tfm_arc4); cERROR(1, "could not allocate crypto API arc4\n"); - return PTR_ERR(tfm_arc4); + return rc; } desc.tfm = tfm_arc4; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 14789a9..a9371b6 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -127,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.69" +#define CIFS_VERSION "1.71" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index edd5b29..17afb0f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -188,6 +188,8 @@ struct TCP_Server_Info { /* multiplexed reads or writes */ unsigned int maxBuf; /* maxBuf specifies the maximum */ /* message size the server can send or receive for non-raw SMBs */ + /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */ + /* when socket is setup (and during reconnect) before NegProt sent */ unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ @@ -652,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define MID_REQUEST_SUBMITTED 2 #define MID_RESPONSE_RECEIVED 4 #define MID_RETRY_NEEDED 8 /* session closed while this request out */ -#define MID_NO_RESP_NEEDED 0x10 +#define MID_RESPONSE_MALFORMED 0x10 /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 3106f5e..904aa47 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) } } - if (ses->status == CifsExiting) - return -EIO; - /* * Give demultiplex thread up to 10 seconds to reconnect, should be * greater than cifs socket timeout which is 7 seconds @@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) * retrying until process is killed or server comes * back on-line */ - if (!tcon->retry || ses->status == CifsExiting) { + if (!tcon->retry) { cFYI(1, "gave up waiting on reconnect in smb_init"); return -EHOSTDOWN; } @@ -4914,7 +4911,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, __u16 fid, __u32 pid_of_opener, bool SetAllocation) { struct smb_com_transaction2_sfi_req *pSMB = NULL; - char *data_offset; struct file_end_of_file_info *parm_data; int rc = 0; __u16 params, param_offset, offset, byte_count, count; @@ -4938,8 +4934,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; - count = sizeof(struct file_end_of_file_info); pSMB->MaxParameterCount = cpu_to_le16(2); /* BB find exact max SMB PDU from sess structure BB */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 47d8ff6..8d6c17a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -337,8 +337,13 @@ cifs_echo_request(struct work_struct *work) struct TCP_Server_Info *server = container_of(work, struct TCP_Server_Info, echo.work); - /* no need to ping if we got a response recently */ - if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) + /* + * We cannot send an echo until the NEGOTIATE_PROTOCOL request is + * done, which is indicated by maxBuf != 0. Also, no need to ping if + * we got a response recently + */ + if (server->maxBuf == 0 || + time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) goto requeue_echo; rc = CIFSSMBEcho(server); @@ -578,14 +583,23 @@ incomplete_rcv: else if (reconnect == 1) continue; - length += 4; /* account for rfc1002 hdr */ + total_read += 4; /* account for rfc1002 hdr */ + dump_smb(smb_buffer, total_read); - dump_smb(smb_buffer, length); - if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) { - cifs_dump_mem("Bad SMB: ", smb_buffer, 48); - continue; - } + /* + * We know that we received enough to get to the MID as we + * checked the pdu_length earlier. Now check to see + * if the rest of the header is OK. We borrow the length + * var for the rest of the loop to avoid a new stack var. + * + * 48 bytes is enough to display the header and a little bit + * into the payload for debugging purposes. + */ + length = checkSMB(smb_buffer, smb_buffer->Mid, total_read); + if (length != 0) + cifs_dump_mem("Bad SMB: ", smb_buffer, + min_t(unsigned int, total_read, 48)); mid_entry = NULL; server->lstrp = jiffies; @@ -597,7 +611,8 @@ incomplete_rcv: if ((mid_entry->mid == smb_buffer->Mid) && (mid_entry->midState == MID_REQUEST_SUBMITTED) && (mid_entry->command == smb_buffer->Command)) { - if (check2ndT2(smb_buffer,server->maxBuf) > 0) { + if (length == 0 && + check2ndT2(smb_buffer, server->maxBuf) > 0) { /* We have a multipart transact2 resp */ isMultiRsp = true; if (mid_entry->resp_buf) { @@ -632,12 +647,17 @@ incomplete_rcv: mid_entry->resp_buf = smb_buffer; mid_entry->largeBuf = isLargeBuf; multi_t2_fnd: - mid_entry->midState = MID_RESPONSE_RECEIVED; - list_del_init(&mid_entry->qhead); - mid_entry->callback(mid_entry); + if (length == 0) + mid_entry->midState = + MID_RESPONSE_RECEIVED; + else + mid_entry->midState = + MID_RESPONSE_MALFORMED; #ifdef CONFIG_CIFS_STATS2 mid_entry->when_received = jiffies; #endif + list_del_init(&mid_entry->qhead); + mid_entry->callback(mid_entry); break; } mid_entry = NULL; @@ -653,6 +673,9 @@ multi_t2_fnd: else smallbuf = NULL; } + } else if (length != 0) { + /* response sanity checks failed */ + continue; } else if (!is_valid_oplock_break(smb_buffer, server) && !isMultiRsp) { cERROR(1, "No task to wake, unknown frame received! " diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0de17c1..e964b1c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -346,7 +346,6 @@ int cifs_open(struct inode *inode, struct file *file) struct cifsTconInfo *tcon; struct tcon_link *tlink; struct cifsFileInfo *pCifsFile = NULL; - struct cifsInodeInfo *pCifsInode; char *full_path = NULL; bool posix_open_ok = false; __u16 netfid; @@ -361,8 +360,6 @@ int cifs_open(struct inode *inode, struct file *file) } tcon = tlink_tcon(tlink); - pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { rc = -ENOMEM; @@ -1146,7 +1143,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) char *write_data; int rc = -EFAULT; int bytes_written = 0; - struct cifs_sb_info *cifs_sb; struct inode *inode; struct cifsFileInfo *open_file; @@ -1154,7 +1150,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return -EFAULT; inode = page->mapping->host; - cifs_sb = CIFS_SB(inode->i_sb); offset += (loff_t)from; write_data = kmap(page); @@ -1667,9 +1662,10 @@ static ssize_t cifs_iovec_write(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *poffset) { - size_t total_written = 0, written = 0; - unsigned long num_pages, npages; - size_t copied, len, cur_len, i; + unsigned int written; + unsigned long num_pages, npages, i; + size_t copied, len, cur_len; + ssize_t total_written = 0; struct kvec *to_send; struct page **pages; struct iov_iter it; @@ -1825,7 +1821,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov, { int rc; int xid; - unsigned int total_read, bytes_read = 0; + ssize_t total_read; + unsigned int bytes_read = 0; size_t len, cur_len; int iov_offset = 0; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 02cd60a..e8804d3 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -55,8 +55,9 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) md5 = crypto_alloc_shash("md5", 0, 0); if (IS_ERR(md5)) { + rc = PTR_ERR(md5); cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc); - return PTR_ERR(md5); + return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md5); sdescmd5 = kmalloc(size, GFP_KERNEL); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index a09e077..2a930a7 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -236,10 +236,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server) { __u16 mid = 0; __u16 last_mid; - int collision; - - if (server == NULL) - return mid; + bool collision; spin_lock(&GlobalMid_Lock); last_mid = server->CurrentMid; /* we do not want to loop forever */ @@ -252,24 +249,38 @@ __u16 GetNextMid(struct TCP_Server_Info *server) (and it would also have to have been a request that did not time out) */ while (server->CurrentMid != last_mid) { - struct list_head *tmp; struct mid_q_entry *mid_entry; + unsigned int num_mids; - collision = 0; + collision = false; if (server->CurrentMid == 0) server->CurrentMid++; - list_for_each(tmp, &server->pending_mid_q) { - mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - - if ((mid_entry->mid == server->CurrentMid) && - (mid_entry->midState == MID_REQUEST_SUBMITTED)) { + num_mids = 0; + list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { + ++num_mids; + if (mid_entry->mid == server->CurrentMid && + mid_entry->midState == MID_REQUEST_SUBMITTED) { /* This mid is in use, try a different one */ - collision = 1; + collision = true; break; } } - if (collision == 0) { + + /* + * if we have more than 32k mids in the list, then something + * is very wrong. Possibly a local user is trying to DoS the + * box by issuing long-running calls and SIGKILL'ing them. If + * we get to 2^16 mids then we're in big trouble as this + * function could loop forever. + * + * Go ahead and assign out the mid in this situation, but force + * an eventual reconnect to clean out the pending_mid_q. + */ + if (num_mids > 32768) + server->tcpStatus = CifsNeedReconnect; + + if (!collision) { mid = server->CurrentMid; break; } @@ -381,29 +392,31 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , } static int -checkSMBhdr(struct smb_hdr *smb, __u16 mid) +check_smb_hdr(struct smb_hdr *smb, __u16 mid) { - /* Make sure that this really is an SMB, that it is a response, - and that the message ids match */ - if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) && - (mid == smb->Mid)) { - if (smb->Flags & SMBFLG_RESPONSE) - return 0; - else { - /* only one valid case where server sends us request */ - if (smb->Command == SMB_COM_LOCKING_ANDX) - return 0; - else - cERROR(1, "Received Request not response"); - } - } else { /* bad signature or mid */ - if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) - cERROR(1, "Bad protocol string signature header %x", - *(unsigned int *) smb->Protocol); - if (mid != smb->Mid) - cERROR(1, "Mids do not match"); + /* does it have the right SMB "signature" ? */ + if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) { + cERROR(1, "Bad protocol string signature header 0x%x", + *(unsigned int *)smb->Protocol); + return 1; + } + + /* Make sure that message ids match */ + if (mid != smb->Mid) { + cERROR(1, "Mids do not match. received=%u expected=%u", + smb->Mid, mid); + return 1; } - cERROR(1, "bad smb detected. The Mid=%d", smb->Mid); + + /* if it's a response then accept */ + if (smb->Flags & SMBFLG_RESPONSE) + return 0; + + /* only one valid case where server sends us request */ + if (smb->Command == SMB_COM_LOCKING_ANDX) + return 0; + + cERROR(1, "Server sent request, not response. mid=%u", smb->Mid); return 1; } @@ -448,7 +461,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) return 1; } - if (checkSMBhdr(smb, mid)) + if (check_smb_hdr(smb, mid)) return 1; clc_len = smbCalcSize_LE(smb); @@ -465,25 +478,26 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) return 0; /* bcc wrapped */ } - cFYI(1, "Calculated size %d vs length %d mismatch for mid %d", + cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u", clc_len, 4 + len, smb->Mid); - /* Windows XP can return a few bytes too much, presumably - an illegal pad, at the end of byte range lock responses - so we allow for that three byte pad, as long as actual - received length is as long or longer than calculated length */ - /* We have now had to extend this more, since there is a - case in which it needs to be bigger still to handle a - malformed response to transact2 findfirst from WinXP when - access denied is returned and thus bcc and wct are zero - but server says length is 0x21 bytes too long as if the server - forget to reset the smb rfc1001 length when it reset the - wct and bcc to minimum size and drop the t2 parms and data */ - if ((4+len > clc_len) && (len <= clc_len + 512)) - return 0; - else { - cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d", + + if (4 + len < clc_len) { + cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u", len, smb->Mid); return 1; + } else if (len > clc_len + 512) { + /* + * Some servers (Windows XP in particular) send more + * data than the lengths in the SMB packet would + * indicate on certain calls (byte range locks and + * trans2 find first calls in particular). While the + * client can handle such a frame by ignoring the + * trailing data, we choose limit the amount of extra + * data to 512 bytes. + */ + cERROR(1, "RFC1001 size %u more than 512 bytes larger " + "than SMB for mid=%u", len, smb->Mid); + return 1; } } return 0; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 8d9189f..79f641e 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) { int rc, alen, slen; const char *pct; - char *endp, scope_id[13]; + char scope_id[13]; struct sockaddr_in *s4 = (struct sockaddr_in *) dst; struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; @@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) memcpy(scope_id, pct + 1, slen); scope_id[slen] = '\0'; - s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); - if (endp != scope_id + slen) - return 0; + rc = strict_strtoul(scope_id, 0, + (unsigned long *)&s6->sin6_scope_id); + rc = (rc == 0) ? 1 : 0; } return rc; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 7f25cc3..f8e4cd2 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -764,7 +764,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) { int rc = 0; int xid, i; - struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; struct cifsFileInfo *cifsFile = NULL; char *current_entry; @@ -775,8 +774,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) xid = GetXid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - /* * Ensure FindFirst doesn't fail before doing filldir() for '.' and * '..'. Otherwise we won't be able to notify VFS in case of failure. diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 1adc962..1676570 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate: if (type == LANMAN) { #ifdef CONFIG_CIFS_WEAK_PW_HASH - char lnm_session_key[CIFS_SESS_KEY_SIZE]; + char lnm_session_key[CIFS_AUTH_RESP_SIZE]; pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; /* no capabilities flags in old lanman negotiation */ - pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); + pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); /* Calculate hash with password and copy into bcc_ptr. * Encryption Key (stored as in cryptkey) gets used if the @@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate: true : false, lnm_session_key); ses->flags |= CIFS_SES_LANMAN; - memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); - bcc_ptr += CIFS_SESS_KEY_SIZE; + memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); + bcc_ptr += CIFS_AUTH_RESP_SIZE; /* can not sign if LANMAN negotiated so no need to calculate signing key? but what if server diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index b5450e9..b5041c8 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -58,8 +58,9 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) md4 = crypto_alloc_shash("md4", 0, 0); if (IS_ERR(md4)) { + rc = PTR_ERR(md4); cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc); - return PTR_ERR(md4); + return rc; } size = sizeof(struct shash_desc) + crypto_shash_descsize(md4); sdescmd4 = kmalloc(size, GFP_KERNEL); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c1ccca1..46d8756 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -236,9 +236,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) server->tcpStatus = CifsNeedReconnect; } - if (rc < 0) { + if (rc < 0 && rc != -EINTR) cERROR(1, "Error %d sending data on socket to server", rc); - } else + else rc = 0; /* Don't want to modify the buffer as a @@ -359,6 +359,10 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf, if (rc) return rc; + /* enable signing if server requires it */ + if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + mutex_lock(&server->srv_mutex); mid = AllocMidQEntry(in_buf, server); if (mid == NULL) { @@ -453,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) case MID_RETRY_NEEDED: rc = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + rc = -EIO; + break; default: cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, mid->mid, mid->midState); @@ -570,17 +577,33 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, #endif mutex_unlock(&ses->server->srv_mutex); - cifs_small_buf_release(in_buf); - if (rc < 0) + if (rc < 0) { + cifs_small_buf_release(in_buf); goto out; + } - if (long_op == CIFS_ASYNC_OP) + if (long_op == CIFS_ASYNC_OP) { + cifs_small_buf_release(in_buf); goto out; + } rc = wait_for_response(ses->server, midQ); - if (rc != 0) - goto out; + if (rc != 0) { + send_nt_cancel(ses->server, in_buf, midQ); + spin_lock(&GlobalMid_Lock); + if (midQ->midState == MID_REQUEST_SUBMITTED) { + midQ->callback = DeleteMidQEntry; + spin_unlock(&GlobalMid_Lock); + cifs_small_buf_release(in_buf); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; + } + spin_unlock(&GlobalMid_Lock); + } + + cifs_small_buf_release(in_buf); rc = sync_mid_result(midQ, ses->server); if (rc != 0) { @@ -724,8 +747,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, goto out; rc = wait_for_response(ses->server, midQ); - if (rc != 0) - goto out; + if (rc != 0) { + send_nt_cancel(ses->server, in_buf, midQ); + spin_lock(&GlobalMid_Lock); + if (midQ->midState == MID_REQUEST_SUBMITTED) { + /* no longer considered to be "in-flight" */ + midQ->callback = DeleteMidQEntry; + spin_unlock(&GlobalMid_Lock); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; + } + spin_unlock(&GlobalMid_Lock); + } rc = sync_mid_result(midQ, ses->server); if (rc != 0) { @@ -922,10 +956,21 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, } } - if (wait_for_response(ses->server, midQ) == 0) { - /* We got the response - restart system call. */ - rstart = 1; + rc = wait_for_response(ses->server, midQ); + if (rc) { + send_nt_cancel(ses->server, in_buf, midQ); + spin_lock(&GlobalMid_Lock); + if (midQ->midState == MID_REQUEST_SUBMITTED) { + /* no longer considered to be "in-flight" */ + midQ->callback = DeleteMidQEntry; + spin_unlock(&GlobalMid_Lock); + return rc; + } + spin_unlock(&GlobalMid_Lock); } + + /* We got the response - restart system call. */ + rstart = 1; } rc = sync_mid_result(midQ, ses->server); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9c64ae9..2d8c87b 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1468,15 +1468,13 @@ static void work_stop(void) static int work_start(void) { - recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + recv_workqueue = create_singlethread_workqueue("dlm_recv"); if (!recv_workqueue) { log_print("can't start dlm_recv"); return -ENOMEM; } - send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + send_workqueue = create_singlethread_workqueue("dlm_send"); if (!send_workqueue) { log_print("can't start dlm_send"); destroy_workqueue(recv_workqueue); diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 6fc4f31..534c1d4 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *lower_dentry; struct vfsmount *lower_mnt; - struct dentry *dentry_save; - struct vfsmount *vfsmount_save; + struct dentry *dentry_save = NULL; + struct vfsmount *vfsmount_save = NULL; int rc = 1; - if (nd->flags & LOOKUP_RCU) + if (nd && nd->flags & LOOKUP_RCU) return -ECHILD; lower_dentry = ecryptfs_dentry_to_lower(dentry); lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; - dentry_save = nd->path.dentry; - vfsmount_save = nd->path.mnt; - nd->path.dentry = lower_dentry; - nd->path.mnt = lower_mnt; + if (nd) { + dentry_save = nd->path.dentry; + vfsmount_save = nd->path.mnt; + nd->path.dentry = lower_dentry; + nd->path.mnt = lower_mnt; + } rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); - nd->path.dentry = dentry_save; - nd->path.mnt = vfsmount_save; + if (nd) { + nd->path.dentry = dentry_save; + nd->path.mnt = vfsmount_save; + } if (dentry->d_inode) { struct inode *lower_inode = ecryptfs_inode_to_lower(dentry->d_inode); diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index dbc84ed..e007534 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry, u32 flags); int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode, - struct nameidata *ecryptfs_nd); + struct inode *ecryptfs_dir_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, struct dentry *ecryptfs_dentry, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 81e10e6..7d1050e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations ecryptfs_dir_fops = { .readdir = ecryptfs_readdir, + .read = generic_read_dir, .unlocked_ioctl = ecryptfs_unlocked_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index bd33f87..b592938 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode, unsigned int flags_save; int rc; - dentry_save = nd->path.dentry; - vfsmount_save = nd->path.mnt; - flags_save = nd->flags; - nd->path.dentry = lower_dentry; - nd->path.mnt = lower_mnt; - nd->flags &= ~LOOKUP_OPEN; + if (nd) { + dentry_save = nd->path.dentry; + vfsmount_save = nd->path.mnt; + flags_save = nd->flags; + nd->path.dentry = lower_dentry; + nd->path.mnt = lower_mnt; + nd->flags &= ~LOOKUP_OPEN; + } rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); - nd->path.dentry = dentry_save; - nd->path.mnt = vfsmount_save; - nd->flags = flags_save; + if (nd) { + nd->path.dentry = dentry_save; + nd->path.mnt = vfsmount_save; + nd->flags = flags_save; + } return rc; } @@ -241,8 +245,7 @@ out: */ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode, - struct nameidata *ecryptfs_nd) + struct inode *ecryptfs_dir_inode) { struct dentry *lower_dir_dentry; struct vfsmount *lower_mnt; @@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, goto out; if (special_file(lower_inode->i_mode)) goto out; - if (!ecryptfs_nd) - goto out; /* Released in this function */ page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); if (!page_virt) { @@ -349,75 +350,6 @@ out: } /** - * ecryptfs_new_lower_dentry - * @name: The name of the new dentry. - * @lower_dir_dentry: Parent directory of the new dentry. - * @nd: nameidata from last lookup. - * - * Create a new dentry or get it from lower parent dir. - */ -static struct dentry * -ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry, - struct nameidata *nd) -{ - struct dentry *new_dentry; - struct dentry *tmp; - struct inode *lower_dir_inode; - - lower_dir_inode = lower_dir_dentry->d_inode; - - tmp = d_alloc(lower_dir_dentry, name); - if (!tmp) - return ERR_PTR(-ENOMEM); - - mutex_lock(&lower_dir_inode->i_mutex); - new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd); - mutex_unlock(&lower_dir_inode->i_mutex); - - if (!new_dentry) - new_dentry = tmp; - else - dput(tmp); - - return new_dentry; -} - - -/** - * ecryptfs_lookup_one_lower - * @ecryptfs_dentry: The eCryptfs dentry that we are looking up - * @lower_dir_dentry: lower parent directory - * @name: lower file name - * - * Get the lower dentry from vfs. If lower dentry does not exist yet, - * create it. - */ -static struct dentry * -ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, - struct dentry *lower_dir_dentry, struct qstr *name) -{ - struct nameidata nd; - struct vfsmount *lower_mnt; - int err; - - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( - ecryptfs_dentry->d_parent)); - err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); - mntput(lower_mnt); - - if (!err) { - /* we dont need the mount */ - mntput(nd.path.mnt); - return nd.path.dentry; - } - if (err != -ENOENT) - return ERR_PTR(err); - - /* create a new lower dentry */ - return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd); -} - -/** * ecryptfs_lookup * @ecryptfs_dir_inode: The eCryptfs directory inode * @ecryptfs_dentry: The eCryptfs dentry that we are looking up @@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, size_t encrypted_and_encoded_name_size; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; struct dentry *lower_dir_dentry, *lower_dentry; - struct qstr lower_name; int rc = 0; if ((ecryptfs_dentry->d_name.len == 1 @@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, goto out_d_drop; } lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); - lower_name.name = ecryptfs_dentry->d_name.name; - lower_name.len = ecryptfs_dentry->d_name.len; - lower_name.hash = ecryptfs_dentry->d_name.hash; - if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { - rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - lower_dir_dentry->d_inode, &lower_name); - if (rc < 0) - goto out_d_drop; - } - lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, - lower_dir_dentry, &lower_name); + mutex_lock(&lower_dir_dentry->d_inode->i_mutex); + lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, + lower_dir_dentry, + ecryptfs_dentry->d_name.len); + mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, encrypted_and_encoded_name); goto out_d_drop; @@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, "filename; rc = [%d]\n", __func__, rc); goto out_d_drop; } - lower_name.name = encrypted_and_encoded_name; - lower_name.len = encrypted_and_encoded_name_size; - lower_name.hash = full_name_hash(lower_name.name, lower_name.len); - if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { - rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, - lower_dir_dentry->d_inode, &lower_name); - if (rc < 0) - goto out_d_drop; - } - lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, - lower_dir_dentry, &lower_name); + mutex_lock(&lower_dir_dentry->d_inode->i_mutex); + lower_dentry = lookup_one_len(encrypted_and_encoded_name, + lower_dir_dentry, + encrypted_and_encoded_name_size); + mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); if (IS_ERR(lower_dentry)) { rc = PTR_ERR(lower_dentry); - ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " + ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " "[%d] on lower_dentry = [%s]\n", __func__, rc, encrypted_and_encoded_name); goto out_d_drop; } lookup_and_interpose: rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, - ecryptfs_dir_inode, - ecryptfs_nd); + ecryptfs_dir_inode); goto out; out_d_drop: d_drop(ecryptfs_dentry); @@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), ecryptfs_dentry_to_lower(dentry), &lower_stat); if (!rc) { + fsstack_copy_attr_all(dentry->d_inode, + ecryptfs_inode_to_lower(dentry->d_inode)); generic_fillattr(dentry->d_inode, stat); stat->blocks = lower_stat.blocks; } diff --git a/fs/eventfd.c b/fs/eventfd.c index e0194b3..d9a5917 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get); * @ctx: [in] Pointer to eventfd context. * * The eventfd context reference must have been previously acquired either - * with eventfd_ctx_get() or eventfd_ctx_fdget()). + * with eventfd_ctx_get() or eventfd_ctx_fdget(). */ void eventfd_ctx_put(struct eventfd_ctx *ctx) { @@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. * @ctx: [in] Pointer to eventfd context. * @wait: [in] Wait queue to be removed. - * @cnt: [out] Pointer to the 64bit conter value. + * @cnt: [out] Pointer to the 64-bit counter value. * - * Returns zero if successful, or the following error codes: + * Returns %0 if successful, or the following error codes: * * -EAGAIN : The operation would have blocked. * @@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. * @ctx: [in] Pointer to eventfd context. * @no_wait: [in] Different from zero if the operation should not block. - * @cnt: [out] Pointer to the 64bit conter value. + * @cnt: [out] Pointer to the 64-bit counter value. * - * Returns zero if successful, or the following error codes: + * Returns %0 if successful, or the following error codes: * - * -EAGAIN : The operation would have blocked but @no_wait was nonzero. + * -EAGAIN : The operation would have blocked but @no_wait was non-zero. * -ERESTARTSYS : A signal interrupted the wait operation. * * If @no_wait is zero, the function might sleep until the eventfd internal diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cc8a9b7..267d0ad 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1114,6 +1114,17 @@ static int ep_send_events(struct eventpoll *ep, return ep_scan_ready_list(ep, ep_send_events_proc, &esed); } +static inline struct timespec ep_set_mstimeout(long ms) +{ + struct timespec now, ts = { + .tv_sec = ms / MSEC_PER_SEC, + .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), + }; + + ktime_get_ts(&now); + return timespec_add_safe(now, ts); +} + static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { @@ -1121,12 +1132,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, unsigned long flags; long slack; wait_queue_t wait; - struct timespec end_time; ktime_t expires, *to = NULL; if (timeout > 0) { - ktime_get_ts(&end_time); - timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); + struct timespec end_time = ep_set_mstimeout(timeout); + slack = select_estimate_accuracy(&end_time); to = &expires; *to = timespec_to_ktime(end_time); @@ -120,7 +120,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto out; file = do_filp_open(AT_FDCWD, tmp, - O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, MAY_READ | MAY_EXEC | MAY_OPEN); putname(tmp); error = PTR_ERR(file); @@ -723,7 +723,7 @@ struct file *open_exec(const char *name) int err; file = do_filp_open(AT_FDCWD, name, - O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, MAY_EXEC | MAY_OPEN); if (IS_ERR(file)) goto out; diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 4268542..a755523 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); } - inode->i_mapping->backing_dev_info = sb->s_bdi; if (S_ISREG(inode->i_mode)) { inode->i_op = &exofs_file_inode_operations; inode->i_fop = &exofs_file_operations; @@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) sbi = sb->s_fs_info; - inode->i_mapping->backing_dev_info = sb->s_bdi; sb->s_dirt = 1; inode_init_owner(inode, dir, mode); inode->i_ino = sbi->s_nextid++; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0c8d97b..3aa0b72 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -848,6 +848,7 @@ struct ext4_inode_info { atomic_t i_ioend_count; /* Number of outstanding io_end structs */ /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; + atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ spinlock_t i_block_reservation_lock; @@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) +/* For ioend & aio unwritten conversion wait queues */ +#define EXT4_WQ_HASH_SZ 37 +#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ + EXT4_WQ_HASH_SZ]) +#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ + EXT4_WQ_HASH_SZ]) +extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; +extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 63a7581..ccce8a7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * that this IO needs to convertion to written when IO is * completed */ - if (io) + if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { io->flag = EXT4_IO_END_UNWRITTEN; - else + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); if (ext4_should_dioread_nolock(inode)) map->m_flags |= EXT4_MAP_UNINIT; @@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * that we need to perform convertion when IO is done. */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { - if (io) + if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { io->flag = EXT4_IO_END_UNWRITTEN; - else + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2e8322c..7b80d54 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp) return 0; } +static void ext4_aiodio_wait(struct inode *inode) +{ + wait_queue_head_t *wq = ext4_ioend_wq(inode); + + wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); +} + +/* + * This tests whether the IO in question is block-aligned or not. + * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they + * are converted to written only after the IO is complete. Until they are + * mapped, these blocks appear as holes, so dio_zero_block() will assume that + * it needs to zero out portions of the start and/or end block. If 2 AIO + * threads are at work on the same unwritten block, they must be synchronized + * or one thread will zero the other's data, causing corruption. + */ +static int +ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct super_block *sb = inode->i_sb; + int blockmask = sb->s_blocksize - 1; + size_t count = iov_length(iov, nr_segs); + loff_t final_size = pos + count; + + if (pos >= inode->i_size) + return 0; + + if ((pos & blockmask) || (final_size & blockmask)) + return 1; + + return 0; +} + static ssize_t ext4_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + int unaligned_aio = 0; + int ret; /* * If we have encountered a bitmap-format file, the size limit @@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, nr_segs = iov_shorten((struct iovec *)iov, nr_segs, sbi->s_bitmap_maxbytes - pos); } + } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) && + !is_sync_kiocb(iocb))) { + unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); } - return generic_file_aio_write(iocb, iov, nr_segs, pos); + /* Unaligned direct AIO must be serialized; see comment above */ + if (unaligned_aio) { + static unsigned long unaligned_warn_time; + + /* Warn about this once per day */ + if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ)) + ext4_msg(inode->i_sb, KERN_WARNING, + "Unaligned AIO/DIO on inode %ld by %s; " + "performance will be poor.", + inode->i_ino, current->comm); + mutex_lock(ext4_aio_mutex(inode)); + ext4_aiodio_wait(inode); + } + + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + + if (unaligned_aio) + mutex_unlock(ext4_aio_mutex(inode)); + + return ret; } static const struct vm_operations_struct ext4_file_vm_ops = { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 851f49b..d1fe09a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep; /* We create slab caches for groupinfo data structures based on the * superblock block size. There will be one per mounted filesystem for * each unique s_blocksize_bits */ -#define NR_GRPINFO_CACHES \ - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1) +#define NR_GRPINFO_CACHES 8 static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; +static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { + "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k", + "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k", + "ext4_groupinfo_64k", "ext4_groupinfo_128k" +}; + static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, @@ -2414,6 +2419,55 @@ err_freesgi: return -ENOMEM; } +static void ext4_groupinfo_destroy_slabs(void) +{ + int i; + + for (i = 0; i < NR_GRPINFO_CACHES; i++) { + if (ext4_groupinfo_caches[i]) + kmem_cache_destroy(ext4_groupinfo_caches[i]); + ext4_groupinfo_caches[i] = NULL; + } +} + +static int ext4_groupinfo_create_slab(size_t size) +{ + static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex); + int slab_size; + int blocksize_bits = order_base_2(size); + int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; + struct kmem_cache *cachep; + + if (cache_index >= NR_GRPINFO_CACHES) + return -EINVAL; + + if (unlikely(cache_index < 0)) + cache_index = 0; + + mutex_lock(&ext4_grpinfo_slab_create_mutex); + if (ext4_groupinfo_caches[cache_index]) { + mutex_unlock(&ext4_grpinfo_slab_create_mutex); + return 0; /* Already created */ + } + + slab_size = offsetof(struct ext4_group_info, + bb_counters[blocksize_bits + 2]); + + cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index], + slab_size, 0, SLAB_RECLAIM_ACCOUNT, + NULL); + + mutex_unlock(&ext4_grpinfo_slab_create_mutex); + if (!cachep) { + printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); + return -ENOMEM; + } + + ext4_groupinfo_caches[cache_index] = cachep; + + return 0; +} + int ext4_mb_init(struct super_block *sb, int needs_recovery) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) unsigned offset; unsigned max; int ret; - int cache_index; - struct kmem_cache *cachep; - char *namep = NULL; i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); @@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) goto out; } - cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; - cachep = ext4_groupinfo_caches[cache_index]; - if (!cachep) { - char name[32]; - int len = offsetof(struct ext4_group_info, - bb_counters[sb->s_blocksize_bits + 2]); - - sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits); - namep = kstrdup(name, GFP_KERNEL); - if (!namep) { - ret = -ENOMEM; - goto out; - } - - /* Need to free the kmem_cache_name() when we - * destroy the slab */ - cachep = kmem_cache_create(namep, len, 0, - SLAB_RECLAIM_ACCOUNT, NULL); - if (!cachep) { - ret = -ENOMEM; - goto out; - } - ext4_groupinfo_caches[cache_index] = cachep; - } + ret = ext4_groupinfo_create_slab(sb->s_blocksize); + if (ret < 0) + goto out; /* order 0 is regular bitmap */ sbi->s_mb_maxs[0] = sb->s_blocksize << 3; @@ -2520,7 +2550,6 @@ out: if (ret) { kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); - kfree(namep); } return ret; } @@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void) void ext4_exit_mballoc(void) { - int i; /* * Wait for completion of call_rcu()'s on ext4_pspace_cachep * before destroying the slab cache. @@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_free_ext_cachep); - - for (i = 0; i < NR_GRPINFO_CACHES; i++) { - struct kmem_cache *cachep = ext4_groupinfo_caches[i]; - if (cachep) { - char *name = (char *)kmem_cache_name(cachep); - kmem_cache_destroy(cachep); - kfree(name); - } - } + ext4_groupinfo_destroy_slabs(); ext4_remove_debugfs_entry(); } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7270dcf..955cc30 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -32,14 +32,8 @@ static struct kmem_cache *io_page_cachep, *io_end_cachep; -#define WQ_HASH_SZ 37 -#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ]) -static wait_queue_head_t ioend_wq[WQ_HASH_SZ]; - int __init ext4_init_pageio(void) { - int i; - io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); if (io_page_cachep == NULL) return -ENOMEM; @@ -48,9 +42,6 @@ int __init ext4_init_pageio(void) kmem_cache_destroy(io_page_cachep); return -ENOMEM; } - for (i = 0; i < WQ_HASH_SZ; i++) - init_waitqueue_head(&ioend_wq[i]); - return 0; } @@ -62,7 +53,7 @@ void ext4_exit_pageio(void) void ext4_ioend_wait(struct inode *inode) { - wait_queue_head_t *wq = to_ioend_wq(inode); + wait_queue_head_t *wq = ext4_ioend_wq(inode); wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); } @@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io) for (i = 0; i < io->num_io_pages; i++) put_io_page(io->pages[i]); io->num_io_pages = 0; - wq = to_ioend_wq(io->inode); + wq = ext4_ioend_wq(io->inode); if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && waitqueue_active(wq)) wake_up_all(wq); @@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) struct inode *inode = io->inode; loff_t offset = io->offset; ssize_t size = io->size; + wait_queue_head_t *wq; int ret = 0; ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," @@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io) if (io->iocb) aio_complete(io->iocb, io->result, 0); /* clear the DIO AIO unwritten flag */ - io->flag &= ~EXT4_IO_END_UNWRITTEN; + if (io->flag & EXT4_IO_END_UNWRITTEN) { + io->flag &= ~EXT4_IO_END_UNWRITTEN; + /* Wake up anyone waiting on unwritten extent conversion */ + wq = ext4_ioend_wq(io->inode); + if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) && + waitqueue_active(wq)) { + wake_up_all(wq); + } + } + return ret; } @@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error) struct inode *inode; unsigned long flags; int i; + sector_t bi_sector = bio->bi_sector; BUG_ON(!io_end); bio->bi_private = NULL; @@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error) if (error) SetPageError(page); BUG_ON(!head); - if (head->b_size == PAGE_CACHE_SIZE) - clear_buffer_dirty(head); - else { + if (head->b_size != PAGE_CACHE_SIZE) { loff_t offset; loff_t io_end_offset = io_end->offset + io_end->size; @@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error) if (error) buffer_io_error(bh); - clear_buffer_dirty(bh); } if (buffer_delay(bh)) partial_write = 1; @@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error) (unsigned long long) io_end->offset, (long) io_end->size, (unsigned long long) - bio->bi_sector >> (inode->i_blkbits - 9)); + bi_sector >> (inode->i_blkbits - 9)); } /* Add the io_end to per-inode completed io list*/ @@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, blocksize = 1 << inode->i_blkbits; + BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); set_page_writeback(page); ClearPageError(page); @@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io, for (bh = head = page_buffers(page), block_start = 0; bh != head || !block_start; block_start = block_end, bh = bh->b_this_page) { + block_end = block_start + blocksize; if (block_start >= len) { clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; } + clear_buffer_dirty(bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 48ce561..f6a318f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); +static void ext4_clear_request_list(void); #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { @@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_sync_tid = 0; ei->i_datasync_tid = 0; atomic_set(&ei->i_ioend_count, 0); + atomic_set(&ei->i_aiodio_unwritten, 0); return &ei->vfs_inode; } @@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb) mutex_unlock(&ext4_li_info->li_list_mtx); } +static struct task_struct *ext4_lazyinit_task; + /* * This is the function where ext4lazyinit thread lives. It walks * through the request list searching for next scheduled filesystem. @@ -2784,6 +2788,10 @@ cont_thread: if (time_before(jiffies, next_wakeup)) schedule(); finish_wait(&eli->li_wait_daemon, &wait); + if (kthread_should_stop()) { + ext4_clear_request_list(); + goto exit_thread; + } } exit_thread: @@ -2808,6 +2816,7 @@ exit_thread: wake_up(&eli->li_wait_task); kfree(ext4_li_info); + ext4_lazyinit_task = NULL; ext4_li_info = NULL; mutex_unlock(&ext4_li_mtx); @@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void) static int ext4_run_lazyinit_thread(void) { - struct task_struct *t; - - t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); + ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, + ext4_li_info, "ext4lazyinit"); + if (IS_ERR(ext4_lazyinit_task)) { + int err = PTR_ERR(ext4_lazyinit_task); ext4_clear_request_list(); del_timer_sync(&ext4_li_info->li_timer); kfree(ext4_li_info); @@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void) * If thread exited earlier * there's nothing to be done. */ - if (!ext4_li_info) + if (!ext4_li_info || !ext4_lazyinit_task) return; - ext4_clear_request_list(); - - while (ext4_li_info->li_task) { - wake_up(&ext4_li_info->li_wait_daemon); - wait_event(ext4_li_info->li_wait_task, - ext4_li_info->li_task == NULL); - } + kthread_stop(ext4_lazyinit_task); } static int ext4_fill_super(struct super_block *sb, void *data, int silent) @@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -int __init ext4_init_feat_adverts(void) +static int __init ext4_init_feat_adverts(void) { struct ext4_features *ef; int ret = -ENOMEM; @@ -4792,23 +4794,44 @@ out: return ret; } +static void ext4_exit_feat_adverts(void) +{ + kobject_put(&ext4_feat->f_kobj); + wait_for_completion(&ext4_feat->f_kobj_unregister); + kfree(ext4_feat); +} + +/* Shared across all ext4 file systems */ +wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; +struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; + static int __init ext4_init_fs(void) { - int err; + int i, err; ext4_check_flag_values(); + + for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { + mutex_init(&ext4__aio_mutex[i]); + init_waitqueue_head(&ext4__ioend_wq[i]); + } + err = ext4_init_pageio(); if (err) return err; err = ext4_init_system_zone(); if (err) - goto out5; + goto out7; ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); if (!ext4_kset) - goto out4; + goto out6; ext4_proc_root = proc_mkdir("fs/ext4", NULL); + if (!ext4_proc_root) + goto out5; err = ext4_init_feat_adverts(); + if (err) + goto out4; err = ext4_init_mballoc(); if (err) @@ -4838,12 +4861,14 @@ out1: out2: ext4_exit_mballoc(); out3: - kfree(ext4_feat); + ext4_exit_feat_adverts(); +out4: remove_proc_entry("fs/ext4", NULL); +out5: kset_unregister(ext4_kset); -out4: +out6: ext4_exit_system_zone(); -out5: +out7: ext4_exit_pageio(); return err; } @@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void) destroy_inodecache(); ext4_exit_xattr(); ext4_exit_mballoc(); + ext4_exit_feat_adverts(); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); ext4_exit_system_zone(); @@ -815,7 +815,7 @@ static int __init fcntl_init(void) __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - FMODE_EXEC + __FMODE_EXEC )); fasync_cache = kmem_cache_create("fasync_cache", diff --git a/fs/file_table.c b/fs/file_table.c index c3e89ad..eb36b6b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -125,13 +125,13 @@ struct file *get_empty_filp(void) goto fail; percpu_counter_inc(&nr_files); + f->f_cred = get_cred(cred); if (security_file_alloc(f)) goto fail_sec; INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_cred = get_cred(cred); spin_lock_init(&f->f_lock); eventpoll_init_file(f); /* f->f_version: 0 */ diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 08a8beb..7cd9a5a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void) #endif glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + WQ_HIGHPRI | WQ_FREEZABLE, 0); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index ebef7ab..85ba027 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -144,7 +144,7 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (!gfs_recovery_wq) goto fail_wq; diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 52a0bca..b1991a2 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode) u32 start, len, goal; int res; - if (sbi->total_blocks - sbi->free_blocks + 8 > - sbi->alloc_file->i_size * 8) { + if (sbi->alloc_file->i_size * 8 < + sbi->total_blocks - sbi->free_blocks + 8) { /* extend alloc file */ printk(KERN_ERR "hfs: extend alloc file! " "(%llu,%u,%u)\n", diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index d66ad11..40ad88c 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c @@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb, res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, data, READ); if (res) - return res; + goto out; switch (be16_to_cpu(*((__be16 *)data))) { case HFS_OLD_PMAP_MAGIC: @@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb, res = -ENOENT; break; } - +out: kfree(data); return res; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9a3b479..b49b555 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) struct inode *root, *inode; struct qstr str; struct nls_table *nls = NULL; - int err = -EINVAL; + int err; + err = -EINVAL; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - return -ENOMEM; + goto out; sb->s_fs_info = sbi; mutex_init(&sbi->alloc_mutex); mutex_init(&sbi->vh_mutex); hfsplus_fill_defaults(sbi); + + err = -EINVAL; if (!hfsplus_parse_options(data, sbi)) { printk(KERN_ERR "hfs: unable to parse mount options\n"); - err = -EINVAL; - goto cleanup; + goto out_unload_nls; } /* temporarily use utf8 to correctly find the hidden dir below */ @@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sbi->nls = load_nls("utf8"); if (!sbi->nls) { printk(KERN_ERR "hfs: unable to load nls for utf8\n"); - err = -EINVAL; - goto cleanup; + goto out_unload_nls; } /* Grab the volume header */ if (hfsplus_read_wrapper(sb)) { if (!silent) printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); - err = -EINVAL; - goto cleanup; + goto out_unload_nls; } vhdr = sbi->s_vhdr; @@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { printk(KERN_ERR "hfs: wrong filesystem version\n"); - goto cleanup; + goto out_free_vhdr; } sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); @@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { printk(KERN_ERR "hfs: failed to load extents file\n"); - goto cleanup; + goto out_free_vhdr; } sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); if (!sbi->cat_tree) { printk(KERN_ERR "hfs: failed to load catalog file\n"); - goto cleanup; + goto out_close_ext_tree; } inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); if (IS_ERR(inode)) { printk(KERN_ERR "hfs: failed to load allocation file\n"); err = PTR_ERR(inode); - goto cleanup; + goto out_close_cat_tree; } sbi->alloc_file = inode; @@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (IS_ERR(root)) { printk(KERN_ERR "hfs: failed to load root directory\n"); err = PTR_ERR(root); - goto cleanup; - } - sb->s_d_op = &hfsplus_dentry_operations; - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - err = -ENOMEM; - goto cleanup; + goto out_put_alloc_file; } str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; @@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) - goto cleanup; + goto out_put_root; inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); if (IS_ERR(inode)) { err = PTR_ERR(inode); - goto cleanup; + goto out_put_root; } sbi->hidden_dir = inode; } else hfs_find_exit(&fd); - if (sb->s_flags & MS_RDONLY) - goto out; + if (!(sb->s_flags & MS_RDONLY)) { + /* + * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused + * all three are registered with Apple for our use + */ + vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); + vhdr->modify_date = hfsp_now2mt(); + be32_add_cpu(&vhdr->write_count, 1); + vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); + vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); + hfsplus_sync_fs(sb, 1); - /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused - * all three are registered with Apple for our use - */ - vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); - vhdr->modify_date = hfsp_now2mt(); - be32_add_cpu(&vhdr->write_count, 1); - vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); - vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); - hfsplus_sync_fs(sb, 1); - - if (!sbi->hidden_dir) { - mutex_lock(&sbi->vh_mutex); - sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); - hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, - &str, sbi->hidden_dir); - mutex_unlock(&sbi->vh_mutex); - - hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); + if (!sbi->hidden_dir) { + mutex_lock(&sbi->vh_mutex); + sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); + hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, + sbi->hidden_dir); + mutex_unlock(&sbi->vh_mutex); + + hfsplus_mark_inode_dirty(sbi->hidden_dir, + HFSPLUS_I_CAT_DIRTY); + } } -out: + + sb->s_d_op = &hfsplus_dentry_operations; + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + err = -ENOMEM; + goto out_put_hidden_dir; + } + unload_nls(sbi->nls); sbi->nls = nls; return 0; -cleanup: - hfsplus_put_super(sb); +out_put_hidden_dir: + iput(sbi->hidden_dir); +out_put_root: + iput(sbi->alloc_file); +out_put_alloc_file: + iput(sbi->alloc_file); +out_close_cat_tree: + hfs_btree_close(sbi->cat_tree); +out_close_ext_tree: + hfs_btree_close(sbi->ext_tree); +out_free_vhdr: + kfree(sbi->s_vhdr); + kfree(sbi->s_backup_vhdr); +out_unload_nls: + unload_nls(sbi->nls); unload_nls(nls); + kfree(sbi); +out: return err; } diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 1962317..3031d81 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -167,7 +167,7 @@ reread: break; case cpu_to_be16(HFSP_WRAP_MAGIC): if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) - goto out; + goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; part_size = wd.embed_count * wd.ablk_size; @@ -179,7 +179,7 @@ reread: * (should do this only for cdrom/loop though) */ if (hfs_part_find(sb, &part_start, &part_size)) - goto out; + goto out_free_backup_vhdr; goto reread; } @@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode, len = isize; } + /* + * Some filesystems can't deal with being asked to map less than + * blocksize, so make sure our len is at least block length. + */ + if (logical_to_blk(inode, len) == 0) + len = blk_to_logical(inode, 1); + start_blk = logical_to_blk(inode, start); last_blk = logical_to_blk(inode, start + len - 1); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9e46869..97e7346 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal) } /* - * Called under j_state_lock. Returns true if a transaction commit was started. + * Called with j_state_lock locked for writing. + * Returns true if a transaction commit was started. */ int __jbd2_log_start_commit(journal_t *journal, tid_t target) { @@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal) { transaction_t *transaction = NULL; tid_t tid; + int need_to_start = 0; read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { transaction = journal->j_running_transaction; - __jbd2_log_start_commit(journal, transaction->t_tid); + if (!tid_geq(journal->j_commit_request, transaction->t_tid)) + need_to_start = 1; } else if (journal->j_committing_transaction) transaction = journal->j_committing_transaction; @@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) tid = transaction->t_tid; read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); jbd2_log_wait_commit(journal, tid); return 1; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index faad2bd..1d11910 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction) static int start_this_handle(journal_t *journal, handle_t *handle, int gfp_mask) { - transaction_t *transaction; - int needed; - int nblocks = handle->h_buffer_credits; - transaction_t *new_transaction = NULL; + transaction_t *transaction, *new_transaction = NULL; + tid_t tid; + int needed, need_to_start; + int nblocks = handle->h_buffer_credits; if (nblocks > journal->j_max_transaction_buffers) { printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", @@ -222,8 +222,11 @@ repeat: atomic_sub(nblocks, &transaction->t_outstanding_credits); prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); - __jbd2_log_start_commit(journal, transaction->t_tid); + tid = transaction->t_tid; + need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; @@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - int ret; + tid_t tid; + int need_to_start, ret; /* If we've had an abort of any type, don't even think about * actually doing the restart! */ @@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - __jbd2_log_start_commit(journal, transaction->t_tid); + tid = transaction->t_tid; + need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; @@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; - /* - * It can be possible to revalidate the dentry that we started - * the path walk with. force_reval_path may also revalidate the - * dentry already committed to the nameidata. - */ - if (unlikely(parent == dentry)) - return nameidata_drop_rcu(nd); - BUG_ON(!(nd->flags & LOOKUP_RCU)); if (nd->root.mnt) { spin_lock(&fs->lock); @@ -561,39 +553,25 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) */ void release_open_intent(struct nameidata *nd) { - if (nd->intent.open.file->f_path.dentry == NULL) - put_filp(nd->intent.open.file); - else - fput(nd->intent.open.file); -} - -/* - * Call d_revalidate and handle filesystems that request rcu-walk - * to be dropped. This may be called and return in rcu-walk mode, - * regardless of success or error. If -ECHILD is returned, the caller - * must return -ECHILD back up the path walk stack so path walk may - * be restarted in ref-walk mode. - */ -static int d_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - int status; + struct file *file = nd->intent.open.file; - status = dentry->d_op->d_revalidate(dentry, nd); - if (status == -ECHILD) { - if (nameidata_dentry_drop_rcu(nd, dentry)) - return status; - status = dentry->d_op->d_revalidate(dentry, nd); + if (file && !IS_ERR(file)) { + if (file->f_path.dentry == NULL) + put_filp(file); + else + fput(file); } +} - return status; +static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + return dentry->d_op->d_revalidate(dentry, nd); } -static inline struct dentry * +static struct dentry * do_revalidate(struct dentry *dentry, struct nameidata *nd) { - int status; - - status = d_revalidate(dentry, nd); + int status = d_revalidate(dentry, nd); if (unlikely(status <= 0)) { /* * The dentry failed validation. @@ -602,24 +580,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) * to return a fail status. */ if (status < 0) { - /* If we're in rcu-walk, we don't have a ref */ - if (!(nd->flags & LOOKUP_RCU)) - dput(dentry); + dput(dentry); dentry = ERR_PTR(status); - - } else { - /* Don't d_invalidate in rcu-walk mode */ - if (nameidata_dentry_drop_rcu_maybe(nd, dentry)) - return ERR_PTR(-ECHILD); - if (!d_invalidate(dentry)) { - dput(dentry); - dentry = NULL; - } + } else if (!d_invalidate(dentry)) { + dput(dentry); + dentry = NULL; } } return dentry; } +static inline struct dentry * +do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd) +{ + int status = d_revalidate(dentry, nd); + if (likely(status > 0)) + return dentry; + if (status == -ECHILD) { + if (nameidata_dentry_drop_rcu(nd, dentry)) + return ERR_PTR(-ECHILD); + return do_revalidate(dentry, nd); + } + if (status < 0) + return ERR_PTR(status); + /* Don't d_invalidate in rcu-walk mode */ + if (nameidata_dentry_drop_rcu(nd, dentry)) + return ERR_PTR(-ECHILD); + if (!d_invalidate(dentry)) { + dput(dentry); + dentry = NULL; + } + return dentry; +} + static inline int need_reval_dot(struct dentry *dentry) { if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) @@ -664,9 +657,6 @@ force_reval_path(struct path *path, struct nameidata *nd) return 0; if (!status) { - /* Don't d_invalidate in rcu-walk mode */ - if (nameidata_drop_rcu(nd)) - return -ECHILD; d_invalidate(dentry); status = -ESTALE; } @@ -773,6 +763,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) int error; struct dentry *dentry = link->dentry; + BUG_ON(nd->flags & LOOKUP_RCU); + touch_atime(link->mnt, dentry); nd_set_link(nd, NULL); @@ -803,10 +795,16 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) * Without that kind of total limit, nasty chains of consecutive * symlinks can cause almost arbitrarily long lookups. */ -static inline int do_follow_link(struct path *path, struct nameidata *nd) +static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd) { void *cookie; int err = -ELOOP; + + /* We drop rcu-walk here */ + if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) + return -ECHILD; + BUG_ON(inode != path->dentry->d_inode); + if (current->link_count >= MAX_NESTED_LINKS) goto loop; if (current->total_link_count >= 40) @@ -1251,9 +1249,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return -ECHILD; nd->seq = seq; - if (dentry->d_flags & DCACHE_OP_REVALIDATE) - goto need_revalidate; -done2: + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { + dentry = do_revalidate_rcu(dentry, nd); + if (!dentry) + goto need_lookup; + if (IS_ERR(dentry)) + goto fail; + if (!(nd->flags & LOOKUP_RCU)) + goto done; + } path->mnt = mnt; path->dentry = dentry; if (likely(__follow_mount_rcu(nd, path, inode, false))) @@ -1266,8 +1270,13 @@ done2: if (!dentry) goto need_lookup; found: - if (dentry->d_flags & DCACHE_OP_REVALIDATE) - goto need_revalidate; + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { + dentry = do_revalidate(dentry, nd); + if (!dentry) + goto need_lookup; + if (IS_ERR(dentry)) + goto fail; + } done: path->mnt = mnt; path->dentry = dentry; @@ -1309,16 +1318,6 @@ need_lookup: mutex_unlock(&dir->i_mutex); goto found; -need_revalidate: - dentry = do_revalidate(dentry, nd); - if (!dentry) - goto need_lookup; - if (IS_ERR(dentry)) - goto fail; - if (nd->flags & LOOKUP_RCU) - goto done2; - goto done; - fail: return PTR_ERR(dentry); } @@ -1415,11 +1414,7 @@ exec_again: goto out_dput; if (inode->i_op->follow_link) { - /* We commonly drop rcu-walk here */ - if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) - return -ECHILD; - BUG_ON(inode != next.dentry->d_inode); - err = do_follow_link(&next, nd); + err = do_follow_link(inode, &next, nd); if (err) goto return_err; nd->inode = nd->path.dentry->d_inode; @@ -1463,10 +1458,7 @@ last_component: break; if (inode && unlikely(inode->i_op->follow_link) && (lookup_flags & LOOKUP_FOLLOW)) { - if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) - return -ECHILD; - BUG_ON(inode != next.dentry->d_inode); - err = do_follow_link(&next, nd); + err = do_follow_link(inode, &next, nd); if (err) goto return_err; nd->inode = nd->path.dentry->d_inode; @@ -1500,12 +1492,15 @@ return_reval: * We may need to check the cached dentry for staleness. */ if (need_reval_dot(nd->path.dentry)) { + if (nameidata_drop_rcu_last_maybe(nd)) + return -ECHILD; /* Note: we do not d_invalidate() */ err = d_revalidate(nd->path.dentry, nd); if (!err) err = -ESTALE; if (err < 0) break; + return 0; } return_base: if (nameidata_drop_rcu_last_maybe(nd)) @@ -2265,8 +2260,6 @@ static struct file *finish_open(struct nameidata *nd, return filp; exit: - if (!IS_ERR(nd->intent.open.file)) - release_open_intent(nd); path_put(&nd->path); return ERR_PTR(error); } @@ -2389,8 +2382,6 @@ exit_mutex_unlock: exit_dput: path_put_conditional(path, nd); exit: - if (!IS_ERR(nd->intent.open.file)) - release_open_intent(nd); path_put(&nd->path); return ERR_PTR(error); } @@ -2477,6 +2468,7 @@ struct file *do_filp_open(int dfd, const char *pathname, } audit_inode(pathname, nd.path.dentry); filp = finish_open(&nd, open_flag, acc_mode); + release_open_intent(&nd); return filp; creat: @@ -2553,6 +2545,7 @@ out: path_put(&nd.root); if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) goto reval; + release_open_intent(&nd); return filp; exit_dput: @@ -2560,8 +2553,6 @@ exit_dput: out_path: path_put(&nd.path); out_filp: - if (!IS_ERR(nd.intent.open.file)) - release_open_intent(&nd); filp = ERR_PTR(error); goto out; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3be975e..cde36cb 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr, out: return status; out_default: - return nfs_cb_stat_to_errno(status); + return nfs_cb_stat_to_errno(nfserr); } /* @@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, if (unlikely(status)) goto out; if (unlikely(nfserr != NFS4_OK)) - goto out_default; + status = nfs_cb_stat_to_errno(nfserr); out: return status; -out_default: - return nfs_cb_stat_to_errno(status); } /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d98d021..54b60bf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f dp->dl_client = clp; get_nfs4_file(fp); dp->dl_file = fp; - dp->dl_vfs_file = find_readable_file(fp); - get_file(dp->dl_vfs_file); - dp->dl_flock = NULL; dp->dl_type = type; dp->dl_stateid.si_boot = boot_time; dp->dl_stateid.si_stateownerid = current_delegid++; @@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &clp->cl_delegations); INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); return dp; } @@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp) if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); put_nfs4_file(dp->dl_file); - fput(dp->dl_vfs_file); kmem_cache_free(deleg_slab, dp); num_delegations--; } } -/* Remove the associated file_lock first, then remove the delegation. - * lease_modify() is called to remove the FS_LEASE file_lock from - * the i_flock list, eventually calling nfsd's lock_manager - * fl_release_callback. - */ -static void -nfs4_close_delegation(struct nfs4_delegation *dp) +static void nfs4_put_deleg_lease(struct nfs4_file *fp) { - dprintk("NFSD: close_delegation dp %p\n",dp); - /* XXX: do we even need this check?: */ - if (dp->dl_flock) - vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); + if (atomic_dec_and_test(&fp->fi_delegees)) { + vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); + fp->fi_lease = NULL; + fp->fi_deleg_file = NULL; + } } /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_perclnt); spin_lock(&recall_lock); + list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); - nfs4_close_delegation(dp); + nfs4_put_deleg_lease(dp->dl_file); nfs4_put_delegation(dp); } @@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp) spin_lock(&recall_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - dprintk("NFSD: expire client. dp %p, fp %p\n", dp, - dp->dl_flock); list_del_init(&dp->dl_perclnt); list_move(&dp->dl_recall_lru, &reaplist); } @@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino) fp->fi_inode = igrab(ino); fp->fi_id = current_fileid++; fp->fi_had_conflict = false; + fp->fi_lease = NULL; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); spin_lock(&recall_lock); @@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access) nfs4_file_put_access(fp, O_RDONLY); } -/* - * Spawn a thread to perform a recall on the delegation represented - * by the lease (file_lock) - * - * Called from break_lease() with lock_flocks() held. - * Note: we assume break_lease will only call this *once* for any given - * lease. - */ -static -void nfsd_break_deleg_cb(struct file_lock *fl) +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { - struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; - - dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); - if (!dp) - return; - /* We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel @@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl) * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - spin_lock(&recall_lock); list_add_tail(&dp->dl_recall_lru, &del_recall_lru); - spin_unlock(&recall_lock); /* only place dl_time is set. protected by lock_flocks*/ dp->dl_time = get_seconds(); + nfsd4_cb_recall(dp); +} + +/* Called from break_lease() with lock_flocks() held. */ +static void nfsd_break_deleg_cb(struct file_lock *fl) +{ + struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; + struct nfs4_delegation *dp; + + BUG_ON(!fp); + /* We assume break_lease is only called once per lease: */ + BUG_ON(fp->fi_had_conflict); /* * We don't want the locks code to timeout the lease for us; - * we'll remove it ourself if the delegation isn't returned - * in time. + * we'll remove it ourself if a delegation isn't returned + * in time: */ fl->fl_break_time = 0; - dp->dl_file->fi_had_conflict = true; - nfsd4_cb_recall(dp); + spin_lock(&recall_lock); + fp->fi_had_conflict = true; + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) + nfsd_break_one_deleg(dp); + spin_unlock(&recall_lock); } static @@ -2459,13 +2445,15 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) static struct nfs4_delegation * find_delegation_file(struct nfs4_file *fp, stateid_t *stid) { - struct nfs4_delegation *dp; + struct nfs4_delegation *dp = NULL; + spin_lock(&recall_lock); list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) - return dp; + break; } - return NULL; + spin_unlock(&recall_lock); + return dp; } int share_access_to_flags(u32 share_access) @@ -2641,6 +2629,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) +{ + struct file_lock *fl; + + fl = locks_alloc_lock(); + if (!fl) + return NULL; + locks_init_lock(fl); + fl->fl_lmops = &nfsd_lease_mng_ops; + fl->fl_flags = FL_LEASE; + fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->fl_end = OFFSET_MAX; + fl->fl_owner = (fl_owner_t)(dp->dl_file); + fl->fl_pid = current->tgid; + return fl; +} + +static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +{ + struct nfs4_file *fp = dp->dl_file; + struct file_lock *fl; + int status; + + fl = nfs4_alloc_init_lease(dp, flag); + if (!fl) + return -ENOMEM; + fl->fl_file = find_readable_file(fp); + list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); + status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); + if (status) { + list_del_init(&dp->dl_perclnt); + locks_free_lock(fl); + return -ENOMEM; + } + fp->fi_lease = fl; + fp->fi_deleg_file = fl->fl_file; + get_file(fp->fi_deleg_file); + atomic_set(&fp->fi_delegees, 1); + list_add(&dp->dl_perfile, &fp->fi_delegations); + return 0; +} + +static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +{ + struct nfs4_file *fp = dp->dl_file; + + if (!fp->fi_lease) + return nfs4_setlease(dp, flag); + spin_lock(&recall_lock); + if (fp->fi_had_conflict) { + spin_unlock(&recall_lock); + return -EAGAIN; + } + atomic_inc(&fp->fi_delegees); + list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_unlock(&recall_lock); + list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); + return 0; +} + /* * Attempt to hand out a delegation. */ @@ -2650,7 +2698,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; int cb_up; - struct file_lock *fl; int status, flag = 0; cb_up = nfsd4_cb_channel_good(sop->so_client); @@ -2681,36 +2728,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta } dp = alloc_init_deleg(sop->so_client, stp, fh, flag); - if (dp == NULL) { - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; - } - status = -ENOMEM; - fl = locks_alloc_lock(); - if (!fl) - goto out; - locks_init_lock(fl); - fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_LEASE; - fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; - fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)dp; - fl->fl_file = find_readable_file(stp->st_file); - BUG_ON(!fl->fl_file); - fl->fl_pid = current->tgid; - dp->dl_flock = fl; - - /* vfs_setlease checks to see if delegation should be handed out. - * the lock_manager callback fl_change is used - */ - if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { - dprintk("NFSD: setlease failed [%d], no delegation\n", status); - dp->dl_flock = NULL; - locks_free_lock(fl); - unhash_delegation(dp); - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; - } + if (dp == NULL) + goto out_no_deleg; + status = nfs4_set_delegation(dp, flag); + if (status) + goto out_free; memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); @@ -2722,6 +2744,12 @@ out: && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) dprintk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_delegate_type = flag; + return; +out_free: + nfs4_put_delegation(dp); +out_no_deleg: + flag = NFS4_OPEN_DELEGATE_NONE; + goto out; } /* @@ -2916,8 +2944,6 @@ nfs4_laundromat(void) test_val = u; break; } - dprintk("NFSD: purging unused delegation dp %p, fp %p\n", - dp, dp->dl_flock); list_move(&dp->dl_recall_lru, &reaplist); } spin_unlock(&recall_lock); @@ -3128,7 +3154,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, goto out; renew_client(dp->dl_client); if (filpp) { - *filpp = find_readable_file(dp->dl_file); + *filpp = dp->dl_file->fi_deleg_file; BUG_ON(!*filpp); } } else { /* open or lock stateid */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 956629b..1275b86 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) - goto out_nfserr; + if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + return status; iattr->ia_valid |= ATTR_UID; } if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { @@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) - goto out_nfserr; + if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + return status; iattr->ia_valid |= ATTR_GID; } if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 3074656..2d31224 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -83,8 +83,6 @@ struct nfs4_delegation { atomic_t dl_count; /* ref count */ struct nfs4_client *dl_client; struct nfs4_file *dl_file; - struct file *dl_vfs_file; - struct file_lock *dl_flock; u32 dl_type; time_t dl_time; /* For recall: */ @@ -379,6 +377,9 @@ struct nfs4_file { */ atomic_t fi_readers; atomic_t fi_writers; + struct file *fi_deleg_file; + struct file_lock *fi_lease; + atomic_t fi_delegees; struct inode *fi_inode; u32 fi_id; /* used with stateowner->so_id * for stateid_hashtbl hash */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 641117f..da1d970 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) if (ra->p_count == 0) frap = rap; } - depth = nfsdstats.ra_size*11/10; + depth = nfsdstats.ra_size; if (!frap) { spin_unlock(&rab->pb_lock); return NULL; @@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, host_err = nfsd_break_lease(odentry->d_inode); if (host_err) goto out_drop_write; + if (ndentry->d_inode) { + host_err = nfsd_break_lease(ndentry->d_inode); + if (host_err) + goto out_drop_write; + } + if (host_err) + goto out_drop_write; host_err = vfs_rename(fdir, odentry, tdir, ndentry); if (!host_err) { host_err = commit_metadata(tfhp); @@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); if (host_err) - goto out_nfserr; + goto out_put; host_err = nfsd_break_lease(rdentry->d_inode); if (host_err) - goto out_put; + goto out_drop_write; if (type != S_IFDIR) host_err = vfs_unlink(dirp, rdentry); else host_err = vfs_rmdir(dirp, rdentry); -out_put: - dput(rdentry); - if (!host_err) host_err = commit_metadata(fhp); - +out_drop_write: mnt_drop_write(fhp->fh_export->ex_path.mnt); +out_put: + dput(rdentry); + out_nfserr: err = nfserrno(host_err); out: @@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd) /* Pick up the filp from the open intent */ filp = nd->intent.open.file; + nd->intent.open.file = NULL; + /* Has the filesystem initialised the file for us? */ if (filp->f_path.dentry == NULL) { path_get(&nd->path); diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index 68d6a21..11f688b 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c @@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len) int mac_partition(struct parsed_partitions *state) { - int slot = 1; Sector sect; unsigned char *data; - int blk, blocks_in_map; + int slot, blocks_in_map; unsigned secsize; #ifdef CONFIG_PPC_PMAC int found_root = 0; @@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state) put_dev_sector(sect); return 0; /* not a MacOS disk */ } - strlcat(state->pp_buf, " [mac]", PAGE_SIZE); blocks_in_map = be32_to_cpu(part->map_count); - for (blk = 1; blk <= blocks_in_map; ++blk) { - int pos = blk * secsize; + if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) { + put_dev_sector(sect); + return 0; + } + strlcat(state->pp_buf, " [mac]", PAGE_SIZE); + for (slot = 1; slot <= blocks_in_map; ++slot) { + int pos = slot * secsize; put_dev_sector(sect); data = read_part_sector(state, pos/512, §); if (!data) @@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state) } if (goodness > found_root_goodness) { - found_root = blk; + found_root = slot; found_root_goodness = goodness; } } #endif /* CONFIG_PPC_PMAC */ - - ++slot; } #ifdef CONFIG_PPC_PMAC if (found_root_goodness) diff --git a/fs/proc/array.c b/fs/proc/array.c index df2b703..7c99c1c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, task_cap(m, task); task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); -#if defined(CONFIG_S390) - task_show_regs(m, task); -#endif task_context_switch_counts(m, task); return 0; } @@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s) struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { fs->kill_sb(s); + /* + * We need to call rcu_barrier so all the delayed rcu free + * inodes are flushed before we release the fs module. + */ + rcu_barrier(); put_filesystem(fs); put_super(s); } else { |