diff options
-rw-r--r-- | MAINTAINERS | 4 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 17 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 1 | ||||
-rw-r--r-- | fs/btrfs/check-integrity.c | 16 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 86 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 2 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 18 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.h | 3 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 76 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 73 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 117 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 22 | ||||
-rw-r--r-- | fs/btrfs/rcu-string.h | 56 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 30 | ||||
-rw-r--r-- | fs/btrfs/super.c | 33 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 14 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 92 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 2 |
19 files changed, 478 insertions, 191 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index f6e62de..3e30a3a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1646,11 +1646,11 @@ S: Maintained F: drivers/gpio/gpio-bt8xx.c BTRFS FILE SYSTEM -M: Chris Mason <chris.mason@oracle.com> +M: Chris Mason <chris.mason@fusionio.com> L: linux-btrfs@vger.kernel.org W: http://btrfs.wiki.kernel.org/ Q: http://patchwork.kernel.org/project/linux-btrfs/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git S: Maintained F: Documentation/filesystems/btrfs.txt F: fs/btrfs/ diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3f75895..8f7d123 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -179,7 +179,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, struct ulist *parents, int level, - struct btrfs_key *key, u64 wanted_disk_byte, + struct btrfs_key *key, u64 time_seq, + u64 wanted_disk_byte, const u64 *extent_item_pos) { int ret; @@ -212,7 +213,7 @@ add_parent: */ while (1) { eie = NULL; - ret = btrfs_next_leaf(root, path); + ret = btrfs_next_old_leaf(root, path, time_seq); if (ret < 0) return ret; if (ret) @@ -294,18 +295,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; } - if (level == 0) { - if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) { - ret = btrfs_next_leaf(root, path); - if (ret) - goto out; - eb = path->nodes[0]; - } - + if (level == 0) btrfs_item_key_to_cpu(eb, &key, path->slots[0]); - } - ret = add_all_parents(root, path, parents, level, &key, + ret = add_all_parents(root, path, parents, level, &key, time_seq, ref->wanted_disk_byte, extent_item_pos); out: btrfs_free_path(path); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e616f887..12394a9 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -37,6 +37,7 @@ #define BTRFS_INODE_IN_DEFRAG 3 #define BTRFS_INODE_DELALLOC_META_RESERVED 4 #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 +#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 /* in memory btrfs inode */ struct btrfs_inode { diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9cebb1f..da6e936 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -93,6 +93,7 @@ #include "print-tree.h" #include "locking.h" #include "check-integrity.h" +#include "rcu-string.h" #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 @@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror( superblock_tmp->never_written = 0; superblock_tmp->mirror_num = 1 + superblock_mirror_num; if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) - printk(KERN_INFO "New initial S-block (bdev %p, %s)" - " @%llu (%s/%llu/%d)\n", - superblock_bdev, device->name, - (unsigned long long)dev_bytenr, - dev_state->name, - (unsigned long long)dev_bytenr, - superblock_mirror_num); + printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" + " @%llu (%s/%llu/%d)\n", + superblock_bdev, + rcu_str_deref(device->name), + (unsigned long long)dev_bytenr, + dev_state->name, + (unsigned long long)dev_bytenr, + superblock_mirror_num); list_add(&superblock_tmp->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(superblock_tmp, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d7a96cf..04b06bc 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 0; } +/* + * This allocates memory and gets a tree modification sequence number when + * needed. + * + * Returns 0 when no sequence number is needed, < 0 on error. + * Returns 1 when a sequence number was added. In this case, + * fs_info->tree_mod_seq_lock was acquired and must be released by the caller + * after inserting into the rb tree. + */ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, struct tree_mod_elem **tm_ret) { @@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, */ kfree(tm); seq = 0; + spin_unlock(&fs_info->tree_mod_seq_lock); } else { __get_tree_mod_seq(fs_info, &tm->elem); seq = tm->elem.seq; } - spin_unlock(&fs_info->tree_mod_seq_lock); return seq; } @@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, tm->slot = slot; tm->generation = btrfs_node_ptr_generation(eb, slot); - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static noinline int @@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static noinline int @@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static struct tree_mod_elem * @@ -1023,6 +1038,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, looped = 1; } + /* if there's no old root to return, return what we found instead */ + if (!found) + found = tm; + return found; } @@ -1143,22 +1162,36 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, return eb_rewin; } +/* + * get_old_root() rewinds the state of @root's root node to the given @time_seq + * value. If there are no changes, the current root->root_node is returned. If + * anything changed in between, there's a fresh buffer allocated on which the + * rewind operations are done. In any case, the returned buffer is read locked. + * Returns NULL on error (with no locks held). + */ static inline struct extent_buffer * get_old_root(struct btrfs_root *root, u64 time_seq) { struct tree_mod_elem *tm; struct extent_buffer *eb; - struct tree_mod_root *old_root; + struct tree_mod_root *old_root = NULL; u64 old_generation; + u64 logical; + eb = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); if (!tm) return root->node; - old_root = &tm->old_root; - old_generation = tm->generation; + if (tm->op == MOD_LOG_ROOT_REPLACE) { + old_root = &tm->old_root; + old_generation = tm->generation; + logical = old_root->logical; + } else { + logical = root->node->start; + } - tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); + tm = tree_mod_log_search(root->fs_info, logical, time_seq); /* * there was an item in the log when __tree_mod_log_oldest_root * returned. this one must not go away, because the time_seq passed to @@ -1166,22 +1199,25 @@ get_old_root(struct btrfs_root *root, u64 time_seq) */ BUG_ON(!tm); - if (old_root->logical == root->node->start) { - /* there are logged operations for the current root */ - eb = btrfs_clone_extent_buffer(root->node); - } else { - /* there's a root replace operation for the current root */ + if (old_root) eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, root->nodesize); + else + eb = btrfs_clone_extent_buffer(root->node); + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); + if (!eb) + return NULL; + btrfs_tree_read_lock(eb); + if (old_root) { btrfs_set_header_bytenr(eb, eb->start); btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(eb, root->root_key.objectid); + btrfs_set_header_level(eb, old_root->level); + btrfs_set_header_generation(eb, old_generation); } - if (!eb) - return NULL; - btrfs_set_header_level(eb, old_root->level); - btrfs_set_header_generation(eb, old_generation); __tree_mod_log_rewind(eb, time_seq, tm); + extent_buffer_get(eb); return eb; } @@ -1650,8 +1686,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - btrfs_header_nritems(mid); - left = read_node_slot(root, parent, pslot - 1); if (left) { btrfs_tree_lock(left); @@ -1681,7 +1715,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = push_node_left(trans, root, left, mid, 1); if (wret < 0) ret = wret; - btrfs_header_nritems(mid); } /* @@ -2615,9 +2648,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, again: b = get_old_root(root, time_seq); - extent_buffer_get(b); level = btrfs_header_level(b); - btrfs_tree_read_lock(b); p->locks[level] = BTRFS_READ_LOCK; while (b) { @@ -5001,6 +5032,12 @@ next: */ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { + return btrfs_next_old_leaf(root, path, 0); +} + +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, + u64 time_seq) +{ int slot; int level; struct extent_buffer *c; @@ -5025,7 +5062,10 @@ again: path->keep_locks = 1; path->leave_spinning = 1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (time_seq) + ret = btrfs_search_old_slot(root, &key, path, time_seq); + else + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; if (ret < 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0236d03..8b73b2d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2753,6 +2753,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, } int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, + u64 time_seq); static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) { ++p->slots[0]; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c18d044..2399f40 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) } } } + +void btrfs_destroy_delayed_inodes(struct btrfs_root *root) +{ + struct btrfs_delayed_root *delayed_root; + struct btrfs_delayed_node *curr_node, *prev_node; + + delayed_root = btrfs_get_delayed_root(root); + + curr_node = btrfs_first_delayed_node(delayed_root); + while (curr_node) { + __btrfs_kill_delayed_node(curr_node); + + prev_node = curr_node; + curr_node = btrfs_next_delayed_node(curr_node); + btrfs_release_delayed_node(prev_node); + } +} + diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 7083d08..f5aa402 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev); /* Used for drop dead root */ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); +/* Used for clean the transaction */ +void btrfs_destroy_delayed_inodes(struct btrfs_root *root); + /* Used for readdir() */ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, struct list_head *del_list); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ae51de..e1890b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,7 @@ #include "free-space-cache.h" #include "inode-map.h" #include "check-integrity.h" +#include "rcu-string.h" static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb, features = btrfs_super_incompat_flags(disk_super); features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; - if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) + if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; /* @@ -2575,8 +2576,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) struct btrfs_device *device = (struct btrfs_device *) bh->b_private; - printk_ratelimited(KERN_WARNING "lost page write due to " - "I/O error on %s\n", device->name); + printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " + "I/O error on %s\n", + rcu_str_deref(device->name)); /* note, we dont' set_buffer_write_io_error because we have * our own ways of dealing with the IO errors */ @@ -2749,8 +2751,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - printk("btrfs: disabling barriers on dev %s\n", - device->name); + printk_in_rcu("btrfs: disabling barriers on dev %s\n", + rcu_str_deref(device->name)); device->nobarriers = 1; } if (!bio_flagged(bio, BIO_UPTODATE)) { @@ -3400,7 +3402,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, delayed_refs = &trans->delayed_refs; -again: spin_lock(&delayed_refs->lock); if (delayed_refs->num_entries == 0) { spin_unlock(&delayed_refs->lock); @@ -3408,31 +3409,36 @@ again: return ret; } - node = rb_first(&delayed_refs->root); - while (node) { + while ((node = rb_first(&delayed_refs->root)) != NULL) { ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - node = rb_next(node); - - ref->in_tree = 0; - rb_erase(&ref->rb_node, &delayed_refs->root); - delayed_refs->num_entries--; atomic_set(&ref->refs, 1); if (btrfs_delayed_ref_is_head(ref)) { struct btrfs_delayed_ref_head *head; head = btrfs_delayed_node_to_head(ref); - spin_unlock(&delayed_refs->lock); - mutex_lock(&head->mutex); + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&ref->refs); + spin_unlock(&delayed_refs->lock); + + /* Need to wait for the delayed ref to run */ + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(ref); + + continue; + } + kfree(head->extent_op); delayed_refs->num_heads--; if (list_empty(&head->cluster)) delayed_refs->num_heads_ready--; list_del_init(&head->cluster); - mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - goto again; } + ref->in_tree = 0; + rb_erase(&ref->rb_node, &delayed_refs->root); + delayed_refs->num_entries--; + spin_unlock(&delayed_refs->lock); btrfs_put_delayed_ref(ref); @@ -3520,11 +3526,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, offset >> PAGE_CACHE_SHIFT); spin_unlock(&dirty_pages->buffer_lock); - if (eb) { + if (eb) ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); - atomic_set(&eb->refs, 1); - } if (PageWriteback(page)) end_page_writeback(page); @@ -3538,8 +3542,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, spin_unlock_irq(&page->mapping->tree_lock); } - page->mapping->a_ops->invalidatepage(page, 0); unlock_page(page); + page_cache_release(page); } } @@ -3553,8 +3557,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, u64 start; u64 end; int ret; + bool loop = true; unpin = pinned_extents; +again: while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -3572,6 +3578,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, cond_resched(); } + if (loop) { + if (unpin == &root->fs_info->freed_extents[0]) + unpin = &root->fs_info->freed_extents[1]; + else + unpin = &root->fs_info->freed_extents[0]; + loop = false; + goto again; + } + return 0; } @@ -3585,21 +3600,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, /* FIXME: cleanup wait for commit */ cur_trans->in_commit = 1; cur_trans->blocked = 1; - if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) - wake_up(&root->fs_info->transaction_blocked_wait); + wake_up(&root->fs_info->transaction_blocked_wait); cur_trans->blocked = 0; - if (waitqueue_active(&root->fs_info->transaction_wait)) - wake_up(&root->fs_info->transaction_wait); + wake_up(&root->fs_info->transaction_wait); cur_trans->commit_done = 1; - if (waitqueue_active(&cur_trans->commit_wait)) - wake_up(&cur_trans->commit_wait); + wake_up(&cur_trans->commit_wait); + + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); btrfs_destroy_pending_snapshots(cur_trans); btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, EXTENT_DIRTY); + btrfs_destroy_pinned_extent(root, + root->fs_info->pinned_extents); /* memset(cur_trans, 0, sizeof(*cur_trans)); @@ -3648,6 +3665,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + btrfs_destroy_pending_snapshots(t); btrfs_destroy_delalloc_inodes(root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2c8f7b2..aaa12c1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@ #include "volumes.h" #include "check-integrity.h" #include "locking.h" +#include "rcu-string.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, return -EIO; } - printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " - "sector %llu)\n", page->mapping->host->i_ino, start, - dev->name, sector); + printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " + "(dev %s sector %llu)\n", page->mapping->host->i_ino, + start, rcu_str_deref(dev->name), sector); bio_put(bio); return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f6ab6f5..a4f0250 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode, if (IS_ERR(trans)) { extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, + start, end, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | @@ -963,7 +963,7 @@ out: out_unlock: extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, + start, end, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | @@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work) compress_file_range(async_cow->inode, async_cow->locked_page, async_cow->start, async_cow->end, async_cow, &num_added); - if (num_added == 0) + if (num_added == 0) { + iput(async_cow->inode); async_cow->inode = NULL; + } } /* @@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work) { struct async_cow *async_cow; async_cow = container_of(work, struct async_cow, work); + if (async_cow->inode) + iput(async_cow->inode); kfree(async_cow); } @@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); BUG_ON(!async_cow); /* -ENOMEM */ - async_cow->inode = inode; + async_cow->inode = igrab(inode); async_cow->root = root; async_cow->locked_page = locked_page; async_cow->start = start; @@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); - if (!path) + if (!path) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); return -ENOMEM; + } nolock = btrfs_is_free_space_inode(root, inode); @@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); btrfs_free_path(path); return PTR_ERR(trans); } @@ -1327,8 +1350,11 @@ out_check: } btrfs_release_path(path); - if (cur_offset <= end && cow_start == (u64)-1) + if (cur_offset <= end && cow_start == (u64)-1) { cow_start = cur_offset; + cur_offset = end; + } + if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, end, page_started, nr_written, 1); @@ -1347,6 +1373,17 @@ error: if (!ret) ret = err; + if (ret && cur_offset < end) + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + cur_offset, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); + btrfs_free_path(path); return ret; } @@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, int ret; struct btrfs_root *root = BTRFS_I(inode)->root; - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); - else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) + } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - else if (!btrfs_test_opt(root, COMPRESS) && - !(BTRFS_I(inode)->force_compress) && - !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) + } else if (!btrfs_test_opt(root, COMPRESS) && + !(BTRFS_I(inode)->force_compress) && + !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) { ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); - else + } else { + set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags); ret = cow_file_range_async(inode, locked_page, start, end, page_started, nr_written); + } return ret; } @@ -7054,10 +7094,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode) else b_inode->flags &= ~BTRFS_INODE_NODATACOW; - if (b_dir->flags & BTRFS_INODE_COMPRESS) + if (b_dir->flags & BTRFS_INODE_COMPRESS) { b_inode->flags |= BTRFS_INODE_COMPRESS; - else - b_inode->flags &= ~BTRFS_INODE_COMPRESS; + b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS; + } else { + b_inode->flags &= ~(BTRFS_INODE_COMPRESS | + BTRFS_INODE_NOCOMPRESS); + } } static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 24b776c..58adbd0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -52,6 +52,7 @@ #include "locking.h" #include "inode-map.h" #include "backref.h" +#include "rcu-string.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -785,39 +786,57 @@ none: return -ENOENT; } -/* - * Validaty check of prev em and next em: - * 1) no prev/next em - * 2) prev/next em is an hole/inline extent - */ -static int check_adjacent_extents(struct inode *inode, struct extent_map *em) +static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *prev = NULL, *next = NULL; - int ret = 0; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_map *em; + u64 len = PAGE_CACHE_SIZE; + /* + * hopefully we have this extent in the tree already, try without + * the full extent lock + */ read_lock(&em_tree->lock); - prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); - next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1); + em = lookup_extent_mapping(em_tree, start, len); read_unlock(&em_tree->lock); - if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && - (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) - ret = 1; - free_extent_map(prev); - free_extent_map(next); + if (!em) { + /* get the big lock and read metadata off disk */ + lock_extent(io_tree, start, start + len - 1); + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); + unlock_extent(io_tree, start, start + len - 1); + + if (IS_ERR(em)) + return NULL; + } + + return em; +} + +static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) +{ + struct extent_map *next; + bool ret = true; + + /* this is the last extent */ + if (em->start + em->len >= i_size_read(inode)) + return false; + next = defrag_lookup_extent(inode, em->start + em->len); + if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) + ret = false; + + free_extent_map(next); return ret; } -static int should_defrag_range(struct inode *inode, u64 start, u64 len, - int thresh, u64 *last_len, u64 *skip, - u64 *defrag_end) +static int should_defrag_range(struct inode *inode, u64 start, int thresh, + u64 *last_len, u64 *skip, u64 *defrag_end) { - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct extent_map *em = NULL; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; int ret = 1; + bool next_mergeable = true; /* * make sure that once we start defragging an extent, we keep on @@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, *skip = 0; - /* - * hopefully we have this extent in the tree already, try without - * the full extent lock - */ - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, start, len); - read_unlock(&em_tree->lock); - - if (!em) { - /* get the big lock and read metadata off disk */ - lock_extent(io_tree, start, start + len - 1); - em = btrfs_get_extent(inode, NULL, 0, start, len, 0); - unlock_extent(io_tree, start, start + len - 1); - - if (IS_ERR(em)) - return 0; - } + em = defrag_lookup_extent(inode, start); + if (!em) + return 0; /* this will cover holes, and inline extents */ if (em->block_start >= EXTENT_MAP_LAST_BYTE) { @@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, goto out; } - /* If we have nothing to merge with us, just skip. */ - if (check_adjacent_extents(inode, em)) { - ret = 0; - goto out; - } + next_mergeable = defrag_check_next_extent(inode, em); /* - * we hit a real extent, if it is big don't bother defragging it again + * we hit a real extent, if it is big or the next extent is not a + * real extent, don't bother defragging it */ - if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) + if ((*last_len == 0 || *last_len >= thresh) && + (em->len >= thresh || !next_mergeable)) ret = 0; - out: /* * last_len ends up being a counter of how many bytes we've defragged. @@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, break; if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, extent_thresh, - &last_len, &skip, &defrag_end)) { + extent_thresh, &last_len, &skip, + &defrag_end)) { unsigned long next; /* * the should_defrag function tells us how much to skip @@ -1304,6 +1306,13 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, ret = -EINVAL; goto out_free; } + if (device->fs_devices && device->fs_devices->seeding) { + printk(KERN_INFO "btrfs: resizer unable to apply on " + "seeding device %llu\n", devid); + ret = -EINVAL; + goto out_free; + } + if (!strcmp(sizestr, "max")) new_size = device->bdev->bd_inode->i_size; else { @@ -1345,8 +1354,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, do_div(new_size, root->sectorsize); new_size *= root->sectorsize; - printk(KERN_INFO "btrfs: new size for %s is %llu\n", - device->name, (unsigned long long)new_size); + printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", + rcu_str_deref(device->name), + (unsigned long long)new_size); if (new_size > old_size) { trans = btrfs_start_transaction(root, 0); @@ -2264,7 +2274,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) di_args->total_bytes = dev->total_bytes; memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); if (dev->name) { - strncpy(di_args->path, dev->name, sizeof(di_args->path)); + struct rcu_string *name; + + rcu_read_lock(); + name = rcu_dereference(dev->name); + strncpy(di_args->path, name->str, sizeof(di_args->path)); + rcu_read_unlock(); di_args->path[sizeof(di_args->path) - 1] = 0; } else { di_args->path[0] = '\0'; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9e138cd..643335a 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) /* start IO across the range first to instantiate any delalloc * extents */ - filemap_write_and_wait_range(inode->i_mapping, start, orig_end); + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + /* + * So with compression we will find and lock a dirty page and clear the + * first one as dirty, setup an async extent, and immediately return + * with the entire range locked but with nobody actually marked with + * writeback. So we can't just filemap_write_and_wait_range() and + * expect it to work since it will just kick off a thread to do the + * actual work. So we need to call filemap_fdatawrite_range _again_ + * since it will wait on the page lock, which won't be unlocked until + * after the pages have been marked as writeback and so we're good to go + * from there. We have to do this otherwise we'll miss the ordered + * extents and that results in badness. Please Josef, do not think you + * know better and pull this out at some point in the future, it is + * right and you are wrong. + */ + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; found = 0; diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h new file mode 100644 index 0000000..9e111e4 --- /dev/null +++ b/fs/btrfs/rcu-string.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +struct rcu_string { + struct rcu_head rcu; + char str[0]; +}; + +static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) +{ + size_t len = strlen(src) + 1; + struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) + + (len * sizeof(char)), mask); + if (!ret) + return ret; + strncpy(ret->str, src, len); + return ret; +} + +static inline void rcu_string_free(struct rcu_string *str) +{ + if (str) + kfree_rcu(str, rcu); +} + +#define printk_in_rcu(fmt, ...) do { \ + rcu_read_lock(); \ + printk(fmt, __VA_ARGS__); \ + rcu_read_unlock(); \ +} while (0) + +#define printk_ratelimited_in_rcu(fmt, ...) do { \ + rcu_read_lock(); \ + printk_ratelimited(fmt, __VA_ARGS__); \ + rcu_read_unlock(); \ +} while (0) + +#define rcu_str_deref(rcu_str) ({ \ + struct rcu_string *__str = rcu_dereference(rcu_str); \ + __str->str; \ +}) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a38cfa4..b223620 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -26,6 +26,7 @@ #include "backref.h" #include "extent_io.h" #include "check-integrity.h" +#include "rcu-string.h" /* * This is only the first step towards a full-features scrub. It reads all @@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) * hold all of the paths here */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) - printk(KERN_WARNING "btrfs: %s at logical %llu on dev " + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " "%s, sector %llu, root %llu, inode %llu, offset %llu, " "length %llu, links %u (path: %s)\n", swarn->errstr, - swarn->logical, swarn->dev->name, + swarn->logical, rcu_str_deref(swarn->dev->name), (unsigned long long)swarn->sector, root, inum, offset, min(isize - offset, (u64)PAGE_SIZE), nlink, (char *)(unsigned long)ipath->fspath->val[i]); @@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) return 0; err: - printk(KERN_WARNING "btrfs: %s at logical %llu on dev " + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " "%s, sector %llu, root %llu, inode %llu, offset %llu: path " "resolving failed with ret=%d\n", swarn->errstr, - swarn->logical, swarn->dev->name, + swarn->logical, rcu_str_deref(swarn->dev->name), (unsigned long long)swarn->sector, root, inum, offset, ret); free_ipath(ipath); @@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) do { ret = tree_backref_for_extent(&ptr, eb, ei, item_size, &ref_root, &ref_level); - printk(KERN_WARNING + printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev %s, " "sector %llu: metadata %s (level %d) in tree " - "%llu\n", errstr, swarn.logical, dev->name, + "%llu\n", errstr, swarn.logical, + rcu_str_deref(dev->name), (unsigned long long)swarn.sector, ref_level ? "node" : "leaf", ret < 0 ? -1 : ref_level, @@ -580,9 +582,11 @@ out: spin_lock(&sdev->stat_lock); ++sdev->stat.uncorrectable_errors; spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR + + printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", - (unsigned long long)fixup->logical, sdev->dev->name); + (unsigned long long)fixup->logical, + rcu_str_deref(sdev->dev->name)); } btrfs_free_path(path); @@ -936,18 +940,20 @@ corrected_error: spin_lock(&sdev->stat_lock); sdev->stat.corrected_errors++; spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR + printk_ratelimited_in_rcu(KERN_ERR "btrfs: fixed up error at logical %llu on dev %s\n", - (unsigned long long)logical, sdev->dev->name); + (unsigned long long)logical, + rcu_str_deref(sdev->dev->name)); } } else { did_not_correct_error: spin_lock(&sdev->stat_lock); sdev->stat.uncorrectable_errors++; spin_unlock(&sdev->stat_lock); - printk_ratelimited(KERN_ERR + printk_ratelimited_in_rcu(KERN_ERR "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", - (unsigned long long)logical, sdev->dev->name); + (unsigned long long)logical, + rcu_str_deref(sdev->dev->name)); } out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 96eb9fe..0eb9a4d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -54,6 +54,7 @@ #include "version.h" #include "export.h" #include "compression.h" +#include "rcu-string.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> @@ -1482,12 +1483,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags) "error %d\n", btrfs_ino(inode), ret); } +static int btrfs_show_devname(struct seq_file *m, struct dentry *root) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); + struct btrfs_fs_devices *cur_devices; + struct btrfs_device *dev, *first_dev = NULL; + struct list_head *head; + struct rcu_string *name; + + mutex_lock(&fs_info->fs_devices->device_list_mutex); + cur_devices = fs_info->fs_devices; + while (cur_devices) { + head = &cur_devices->devices; + list_for_each_entry(dev, head, dev_list) { + if (!first_dev || dev->devid < first_dev->devid) + first_dev = dev; + } + cur_devices = cur_devices->seed; + } + + if (first_dev) { + rcu_read_lock(); + name = rcu_dereference(first_dev->name); + seq_escape(m, name->str, " \t\n\\"); + rcu_read_unlock(); + } else { + WARN_ON(1); + } + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + return 0; +} + static const struct super_operations btrfs_super_ops = { .drop_inode = btrfs_drop_inode, .evict_inode = btrfs_evict_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, + .show_devname = btrfs_show_devname, .write_inode = btrfs_write_inode, .dirty_inode = btrfs_fs_dirty_inode, .alloc_inode = btrfs_alloc_inode, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1791c6e..b72b068 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -100,6 +100,10 @@ loop: kmem_cache_free(btrfs_transaction_cachep, cur_trans); cur_trans = fs_info->running_transaction; goto loop; + } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { + spin_unlock(&root->fs_info->trans_lock); + kmem_cache_free(btrfs_transaction_cachep, cur_trans); + return -EROFS; } atomic_set(&cur_trans->num_writers, 1); @@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, static void cleanup_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, int err) { struct btrfs_transaction *cur_trans = trans->transaction; WARN_ON(trans->use_count > 1); + btrfs_abort_transaction(trans, root, err); + spin_lock(&root->fs_info->trans_lock); list_del_init(&cur_trans->list); + if (cur_trans == root->fs_info->running_transaction) { + root->fs_info->running_transaction = NULL; + root->fs_info->trans_no_join = 0; + } spin_unlock(&root->fs_info->trans_lock); btrfs_cleanup_one_transaction(trans->transaction, root); @@ -1526,7 +1536,7 @@ cleanup_transaction: // WARN_ON(1); if (current->journal_info == trans) current->journal_info = NULL; - cleanup_transaction(trans, root); + cleanup_transaction(trans, root, ret); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7782020..8a3d259 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -35,6 +35,7 @@ #include "volumes.h" #include "async-thread.h" #include "check-integrity.h" +#include "rcu-string.h" static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) device = list_entry(fs_devices->devices.next, struct btrfs_device, dev_list); list_del(&device->dev_list); - kfree(device->name); + rcu_string_free(device->name); kfree(device); } kfree(fs_devices); @@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path, { struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; + struct rcu_string *name; u64 found_transid = btrfs_super_generation(disk_super); - char *name; fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { @@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path, memcpy(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); spin_lock_init(&device->io_lock); - device->name = kstrdup(path, GFP_NOFS); - if (!device->name) { + + name = rcu_string_strdup(path, GFP_NOFS); + if (!name) { kfree(device); return -ENOMEM; } + rcu_assign_pointer(device->name, name); INIT_LIST_HEAD(&device->dev_alloc_list); /* init readahead state */ @@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path, device->fs_devices = fs_devices; fs_devices->num_devices++; - } else if (!device->name || strcmp(device->name, path)) { - name = kstrdup(path, GFP_NOFS); + } else if (!device->name || strcmp(device->name->str, path)) { + name = rcu_string_strdup(path, GFP_NOFS); if (!name) return -ENOMEM; - kfree(device->name); - device->name = name; + rcu_string_free(device->name); + rcu_assign_pointer(device->name, name); if (device->missing) { fs_devices->missing_devices--; device->missing = 0; @@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { + struct rcu_string *name; + device = kzalloc(sizeof(*device), GFP_NOFS); if (!device) goto error; - device->name = kstrdup(orig_dev->name, GFP_NOFS); - if (!device->name) { + /* + * This is ok to do without rcu read locked because we hold the + * uuid mutex so nothing we touch in here is going to disappear. + */ + name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); + if (!name) { kfree(device); goto error; } + rcu_assign_pointer(device->name, name); device->devid = orig_dev->devid; device->work.func = pending_bios_fn; @@ -491,7 +501,7 @@ again: } list_del_init(&device->dev_list); fs_devices->num_devices--; - kfree(device->name); + rcu_string_free(device->name); kfree(device); } @@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work) if (device->bdev) blkdev_put(device->bdev, device->mode); - kfree(device->name); + rcu_string_free(device->name); kfree(device); } @@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { struct btrfs_device *new_device; + struct rcu_string *name; if (device->bdev) fs_devices->open_devices--; @@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device = kmalloc(sizeof(*new_device), GFP_NOFS); BUG_ON(!new_device); /* -ENOMEM */ memcpy(new_device, device, sizeof(*new_device)); - new_device->name = kstrdup(device->name, GFP_NOFS); - BUG_ON(device->name && !new_device->name); /* -ENOMEM */ + + /* Safe because we are under uuid_mutex */ + name = rcu_string_strdup(device->name->str, GFP_NOFS); + BUG_ON(device->name && !name); /* -ENOMEM */ + rcu_assign_pointer(new_device->name, name); new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; @@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (!device->name) continue; - bdev = blkdev_get_by_path(device->name, flags, holder); + bdev = blkdev_get_by_path(device->name->str, flags, holder); if (IS_ERR(bdev)) { - printk(KERN_INFO "open %s failed\n", device->name); + printk(KERN_INFO "open %s failed\n", device->name->str); goto error; } filemap_write_and_wait(bdev->bd_inode->i_mapping); @@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) struct block_device *bdev; struct list_head *devices; struct super_block *sb = root->fs_info->sb; + struct rcu_string *name; u64 total_bytes; int seeding_dev = 0; int ret = 0; @@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) goto error; } - device->name = kstrdup(device_path, GFP_NOFS); - if (!device->name) { + name = rcu_string_strdup(device_path, GFP_NOFS); + if (!name) { kfree(device); ret = -ENOMEM; goto error; } + rcu_assign_pointer(device->name, name); ret = find_next_devid(root, &device->devid); if (ret) { - kfree(device->name); + rcu_string_free(device->name); kfree(device); goto error; } trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { - kfree(device->name); + rcu_string_free(device->name); kfree(device); ret = PTR_ERR(trans); goto error; @@ -1796,7 +1812,7 @@ error_trans: unlock_chunks(root); btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); - kfree(device->name); + rcu_string_free(device->name); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; dev = bbio->stripes[dev_nr].dev; if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { +#ifdef DEBUG + struct rcu_string *name; + + rcu_read_lock(); + name = rcu_dereference(dev->name); pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " "(%s id %llu), size=%u\n", rw, (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, - dev->name, dev->devid, bio->bi_size); + name->str, dev->devid, bio->bi_size); + rcu_read_unlock(); +#endif bio->bi_bdev = dev->bdev; if (async_submit) schedule_bio(root, dev, rw, bio); @@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) key.offset = device->devid; ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { - printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", - device->name, (unsigned long long)device->devid); + printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", + rcu_str_deref(device->name), + (unsigned long long)device->devid); __btrfs_reset_dev_stats(device); device->dev_stats_valid = 1; btrfs_release_path(path); @@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", - ret, device->name); + printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", + ret, rcu_str_deref(device->name)); goto out; } @@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, /* need to delete old one and insert a new one */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", - device->name, ret); + printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", + rcu_str_deref(device->name), ret); goto out; } ret = 1; @@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", - device->name, ret); + printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", + rcu_str_deref(device->name), ret); goto out; } } @@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) { if (!dev->dev_stats_valid) return; - printk_ratelimited(KERN_ERR + printk_ratelimited_in_rcu(KERN_ERR "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", - dev->name, + rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), @@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) { - printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", - dev->name, + printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3406a88..74366f2 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -58,7 +58,7 @@ struct btrfs_device { /* the mode sent to blkdev_get */ fmode_t mode; - char *name; + struct rcu_string *name; /* the internal btrfs device id */ u64 devid; |