From 86d4a77ba3dc4ace238a0556541a41df2bd71d49 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 25 May 2011 13:03:16 -0400
Subject: Btrfs: cache bitmaps when searching for a cluster

If we are looking for a cluster in a particularly sparse or fragmented block
group, we will do a lot of looping through the free space tree looking for
various things, and if we need to look at bitmaps we will endup doing the whole
dance twice.  So instead add the bitmap entries to a temporary list so if we
have to do the bitmap search we can just look through the list of entries we've
found quickly instead of having to loop through the entire tree again.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/free-space-cache.c | 54 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 5 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ad14473..930c07f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2144,6 +2144,7 @@ again:
  */
 static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
 				   struct btrfs_free_cluster *cluster,
+				   struct list_head *bitmaps,
 				   u64 offset, u64 bytes, u64 min_bytes)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2166,6 +2167,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
 	 * extent entry.
 	 */
 	while (entry->bitmap) {
+		if (list_empty(&entry->list))
+			list_add_tail(&entry->list, bitmaps);
 		node = rb_next(&entry->offset_index);
 		if (!node)
 			return -ENOSPC;
@@ -2185,8 +2188,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
 			return -ENOSPC;
 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
 
-		if (entry->bitmap)
+		if (entry->bitmap) {
+			if (list_empty(&entry->list))
+				list_add_tail(&entry->list, bitmaps);
 			continue;
+		}
+
 		/*
 		 * we haven't filled the empty size and the window is
 		 * very large.  reset and try again
@@ -2240,6 +2247,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
  */
 static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
 				struct btrfs_free_cluster *cluster,
+				struct list_head *bitmaps,
 				u64 offset, u64 bytes, u64 min_bytes)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2250,10 +2258,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
 	if (ctl->total_bitmaps == 0)
 		return -ENOSPC;
 
+	/*
+	 * First check our cached list of bitmaps and see if there is an entry
+	 * here that will work.
+	 */
+	list_for_each_entry(entry, bitmaps, list) {
+		if (entry->bytes < min_bytes)
+			continue;
+		ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
+					   bytes, min_bytes);
+		if (!ret)
+			return 0;
+	}
+
+	/*
+	 * If we do have entries on our list and we are here then we didn't find
+	 * anything, so go ahead and get the next entry after the last entry in
+	 * this list and start the search from there.
+	 */
+	if (!list_empty(bitmaps)) {
+		entry = list_entry(bitmaps->prev, struct btrfs_free_space,
+				   list);
+		node = rb_next(&entry->offset_index);
+		if (!node)
+			return -ENOSPC;
+		entry = rb_entry(node, struct btrfs_free_space, offset_index);
+		goto search;
+	}
+
 	entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
 	if (!entry)
 		return -ENOSPC;
 
+search:
 	node = &entry->offset_index;
 	do {
 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2284,6 +2321,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
 			     u64 offset, u64 bytes, u64 empty_size)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+	struct list_head bitmaps;
+	struct btrfs_free_space *entry, *tmp;
 	u64 min_bytes;
 	int ret;
 
@@ -2322,11 +2361,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 
-	ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
-				      min_bytes);
+	INIT_LIST_HEAD(&bitmaps);
+	ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
+				      bytes, min_bytes);
 	if (ret)
-		ret = setup_cluster_bitmap(block_group, cluster, offset,
-					   bytes, min_bytes);
+		ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
+					   offset, bytes, min_bytes);
+
+	/* Clear our temporary list */
+	list_for_each_entry_safe(entry, tmp, &bitmaps, list)
+		list_del_init(&entry->list);
 
 	if (!ret) {
 		atomic_inc(&block_group->count);
-- 
cgit v1.1


From 3de85bb95cc50d0977cbb7a0c605e894be4c790d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 25 May 2011 13:07:37 -0400
Subject: Btrfs: noinline the cluster searching functions

When profiling the find cluster code it's hard to tell where we are spending our
time because the bitmap and non-bitmap functions get inlined by the compiler, so
make that not happen.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/free-space-cache.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 930c07f..f56caac 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2142,10 +2142,11 @@ again:
 /*
  * This searches the block group for just extents to fill the cluster with.
  */
-static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
-				   struct btrfs_free_cluster *cluster,
-				   struct list_head *bitmaps,
-				   u64 offset, u64 bytes, u64 min_bytes)
+static noinline int
+setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
+			struct btrfs_free_cluster *cluster,
+			struct list_head *bitmaps, u64 offset, u64 bytes,
+			u64 min_bytes)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	struct btrfs_free_space *first = NULL;
@@ -2245,10 +2246,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
  * This specifically looks for bitmaps that may work in the cluster, we assume
  * that we have already failed to find extents that will work.
  */
-static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
-				struct btrfs_free_cluster *cluster,
-				struct list_head *bitmaps,
-				u64 offset, u64 bytes, u64 min_bytes)
+static noinline int
+setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
+		     struct btrfs_free_cluster *cluster,
+		     struct list_head *bitmaps, u64 offset, u64 bytes,
+		     u64 min_bytes)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	struct btrfs_free_space *entry;
-- 
cgit v1.1


From f2bb8f5cfb3bce595b2de251ed7638047fc4e530 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 25 May 2011 13:10:16 -0400
Subject: Btrfs: don't commit the transaction if we dont have enough pinned
 bytes

I noticed when running an enospc test that we would get stuck committing the
transaction in check_data_space even though we truly didn't have enough space.
So check to see if bytes_pinned is bigger than num_bytes, if it's not don't
commit the transaction.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/extent-tree.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5b9b6b6..0d0a3fe 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3089,6 +3089,13 @@ alloc:
 			}
 			goto again;
 		}
+
+		/*
+		 * If we have less pinned bytes than we want to allocate then
+		 * don't bother committing the transaction, it won't help us.
+		 */
+		if (data_sinfo->bytes_pinned < bytes)
+			committed = 1;
 		spin_unlock(&data_sinfo->lock);
 
 		/* commit the current transaction and try again */
-- 
cgit v1.1


From 2cdc342c204dba69ca3b2ec43d8e6ff41ed920b8 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 27 May 2011 14:07:49 -0400
Subject: Btrfs: fix bitmap regression

In cleaning up the clustering code I accidently introduced a regression by
adding bitmap entries to the cluster rb tree.  The problem is if we've maxed out
the number of bitmaps we can have for the block group we can only add free space
to the bitmaps, but since the bitmap is on the cluster we can't find it and we
try to create another one.  This would result in a panic because the total
bitmaps was bigger than the max bitmaps that were allowed.  This patch fixes
this by checking to see if we have a cluster, and then looking at the cluster rb
tree to see if it has a bitmap entry and if it does and that space belongs to
that bitmap, go ahead and add it to that bitmap.

I could hit this panic every time with an fs_mark test within a couple of
minutes.  With this patch I no longer hit the panic and fs_mark goes to
completion.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/free-space-cache.c | 88 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 19 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f56caac..8258ccf8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1417,6 +1417,23 @@ again:
 	return 0;
 }
 
+static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
+			       struct btrfs_free_space *info, u64 offset,
+			       u64 bytes)
+{
+	u64 bytes_to_set = 0;
+	u64 end;
+
+	end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
+
+	bytes_to_set = min(end - offset, bytes);
+
+	bitmap_set_bits(ctl, info, offset, bytes_to_set);
+
+	return bytes_to_set;
+
+}
+
 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 		      struct btrfs_free_space *info)
 {
@@ -1453,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 	return true;
 }
 
+static struct btrfs_free_space_op free_space_op = {
+	.recalc_thresholds	= recalculate_thresholds,
+	.use_bitmap		= use_bitmap,
+};
+
 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
 			      struct btrfs_free_space *info)
 {
 	struct btrfs_free_space *bitmap_info;
+	struct btrfs_block_group_cache *block_group = NULL;
 	int added = 0;
-	u64 bytes, offset, end;
+	u64 bytes, offset, bytes_added;
 	int ret;
 
 	bytes = info->bytes;
@@ -1467,6 +1490,47 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
 	if (!ctl->op->use_bitmap(ctl, info))
 		return 0;
 
+	if (ctl->op == &free_space_op)
+		block_group = ctl->private;
+
+	/*
+	 * Since we link bitmaps right into the cluster we need to see if we
+	 * have a cluster here, and if so and it has our bitmap we need to add
+	 * the free space to that bitmap.
+	 */
+	if (block_group && !list_empty(&block_group->cluster_list)) {
+		struct btrfs_free_cluster *cluster;
+		struct rb_node *node;
+		struct btrfs_free_space *entry;
+
+		cluster = list_entry(block_group->cluster_list.next,
+				     struct btrfs_free_cluster,
+				     block_group_list);
+		spin_lock(&cluster->lock);
+		node = rb_first(&cluster->root);
+		if (!node) {
+			spin_unlock(&cluster->lock);
+			goto again;
+		}
+
+		entry = rb_entry(node, struct btrfs_free_space, offset_index);
+		if (!entry->bitmap) {
+			spin_unlock(&cluster->lock);
+			goto again;
+		}
+
+		if (entry->offset == offset_to_bitmap(ctl, offset)) {
+			bytes_added = add_bytes_to_bitmap(ctl, entry,
+							  offset, bytes);
+			bytes -= bytes_added;
+			offset += bytes_added;
+		}
+		spin_unlock(&cluster->lock);
+		if (!bytes) {
+			ret = 1;
+			goto out;
+		}
+	}
 again:
 	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
 					 1, 0);
@@ -1475,19 +1539,10 @@ again:
 		goto new_bitmap;
 	}
 
-	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
-
-	if (offset >= bitmap_info->offset && offset + bytes > end) {
-		bitmap_set_bits(ctl, bitmap_info, offset, end - offset);
-		bytes -= end - offset;
-		offset = end;
-		added = 0;
-	} else if (offset >= bitmap_info->offset && offset + bytes <= end) {
-		bitmap_set_bits(ctl, bitmap_info, offset, bytes);
-		bytes = 0;
-	} else {
-		BUG();
-	}
+	bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+	bytes -= bytes_added;
+	offset += bytes_added;
+	added = 0;
 
 	if (!bytes) {
 		ret = 1;
@@ -1766,11 +1821,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
 	       "\n", count);
 }
 
-static struct btrfs_free_space_op free_space_op = {
-	.recalc_thresholds	= recalculate_thresholds,
-	.use_bitmap		= use_bitmap,
-};
-
 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
-- 
cgit v1.1


From 723bda2083d44edbd6be0f0b09f902120dc07442 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 27 May 2011 16:11:38 -0400
Subject: Btrfs: fix the allocator loop logic

I was testing with empty_cluster = 0 to try and reproduce a problem and kept
hitting early enospc panics.  This was because our loop logic was a little
confused.  So this is what I did

1) Make the loop variable the ultimate decider on wether we should loop again
isntead of checking to see if we had an uncached bg, empty size or empty
cluster.

2) Increment loop before checking to see what we are on to make the loop
definitions make more sense.

3) If we are on the chunk alloc loop don't set empty_size/empty_cluster to 0
unless we didn't actually allocate a chunk.  If we did allocate a chunk we
should be able to easily setup a new cluster so clearing
empty_size/empty_cluster makes us less efficient.

This kept me from hitting panics while trying to reproduce the other problem.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/extent-tree.c | 48 +++++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0d0a3fe..b42efc2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5218,9 +5218,7 @@ loop:
 	 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
 	 *			again
 	 */
-	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
-	    (found_uncached_bg || empty_size || empty_cluster ||
-	     allowed_chunk_alloc)) {
+	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
 		index = 0;
 		if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
 			found_uncached_bg = false;
@@ -5260,32 +5258,36 @@ loop:
 			goto search;
 		}
 
-		if (loop < LOOP_CACHING_WAIT) {
-			loop++;
-			goto search;
-		}
+		loop++;
 
 		if (loop == LOOP_ALLOC_CHUNK) {
-			empty_size = 0;
-			empty_cluster = 0;
-		}
+		       if (allowed_chunk_alloc) {
+				ret = do_chunk_alloc(trans, root, num_bytes +
+						     2 * 1024 * 1024, data,
+						     CHUNK_ALLOC_LIMITED);
+				allowed_chunk_alloc = 0;
+				if (ret == 1)
+					done_chunk_alloc = 1;
+			} else if (!done_chunk_alloc &&
+				   space_info->force_alloc ==
+				   CHUNK_ALLOC_NO_FORCE) {
+				space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+			}
 
-		if (allowed_chunk_alloc) {
-			ret = do_chunk_alloc(trans, root, num_bytes +
-					     2 * 1024 * 1024, data,
-					     CHUNK_ALLOC_LIMITED);
-			allowed_chunk_alloc = 0;
-			done_chunk_alloc = 1;
-		} else if (!done_chunk_alloc &&
-			   space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
-			space_info->force_alloc = CHUNK_ALLOC_LIMITED;
+		       /*
+			* We didn't allocate a chunk, go ahead and drop the
+			* empty size and loop again.
+			*/
+		       if (!done_chunk_alloc)
+			       loop = LOOP_NO_EMPTY_SIZE;
 		}
 
-		if (loop < LOOP_NO_EMPTY_SIZE) {
-			loop++;
-			goto search;
+		if (loop == LOOP_NO_EMPTY_SIZE) {
+			empty_size = 0;
+			empty_cluster = 0;
 		}
-		ret = -ENOSPC;
+
+		goto search;
 	} else if (!ins->objectid) {
 		ret = -ENOSPC;
 	} else if (ins->objectid) {
-- 
cgit v1.1


From f6a398298d34af66ec3a2d82a44a4dbc5277357d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 6 Jun 2011 10:50:35 -0400
Subject: Btrfs: fix duplicate checking logic

When merging my code into the integration test the second check for duplicate
entries got screwed up.  This patch fixes it by dropping ret2 and just using ret
for the return value, and checking if we got an error before adding the bitmap
to the local list.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/free-space-cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 8258ccf8..38f3fd9 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 	pgoff_t index = 0;
 	unsigned long first_page_offset;
 	int num_checksums;
-	int ret = 0, ret2;
+	int ret = 0;
 
 	INIT_LIST_HEAD(&bitmaps);
 
@@ -421,11 +421,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 					goto free_cache;
 				}
 				spin_lock(&ctl->tree_lock);
-				ret2 = link_free_space(ctl, e);
+				ret = link_free_space(ctl, e);
 				ctl->total_bitmaps++;
 				ctl->op->recalc_thresholds(ctl);
 				spin_unlock(&ctl->tree_lock);
-				list_add_tail(&e->list, &bitmaps);
 				if (ret) {
 					printk(KERN_ERR "Duplicate entries in "
 					       "free space cache, dumping\n");
@@ -434,6 +433,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 					page_cache_release(page);
 					goto free_cache;
 				}
+				list_add_tail(&e->list, &bitmaps);
 			}
 
 			num_entries--;
-- 
cgit v1.1


From 25b8b936ed44814a5ce6fc3b2a21401f33cd56f6 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 8 Jun 2011 14:36:54 -0400
Subject: Btrfs: don't map extent buffer if path->skip_locking is set

Arne's scrub stuff exposed a problem with mapping the extent buffer in
reada_for_search.  He searches the commit root with multiple threads and with
skip_locking set, so we can race and overwrite node->map_token since node isn't
locked.  So fix this so that we only map the extent buffer if we don't already
have a map_token and skip_locking isn't set.  Without this patch scrub would
panic almost immediately, with the patch it doesn't panic anymore.  Thanks,

Reported-by: Arne Jansen <sensille@gmx.net>
Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/ctree.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d840893..2e66786 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1228,6 +1228,7 @@ static void reada_for_search(struct btrfs_root *root,
 	u32 nr;
 	u32 blocksize;
 	u32 nscan = 0;
+	bool map = true;
 
 	if (level != 1)
 		return;
@@ -1249,8 +1250,11 @@ static void reada_for_search(struct btrfs_root *root,
 
 	nritems = btrfs_header_nritems(node);
 	nr = slot;
+	if (node->map_token || path->skip_locking)
+		map = false;
+
 	while (1) {
-		if (!node->map_token) {
+		if (map && !node->map_token) {
 			unsigned long offset = btrfs_node_key_ptr_offset(nr);
 			map_private_extent_buffer(node, offset,
 						  sizeof(struct btrfs_key_ptr),
@@ -1277,7 +1281,7 @@ static void reada_for_search(struct btrfs_root *root,
 		if ((search <= target && target - search <= 65536) ||
 		    (search > target && search - target <= 65536)) {
 			gen = btrfs_node_ptr_generation(node, nr);
-			if (node->map_token) {
+			if (map && node->map_token) {
 				unmap_extent_buffer(node, node->map_token,
 						    KM_USER1);
 				node->map_token = NULL;
@@ -1289,7 +1293,7 @@ static void reada_for_search(struct btrfs_root *root,
 		if ((nread > 65536 || nscan > 32))
 			break;
 	}
-	if (node->map_token) {
+	if (map && node->map_token) {
 		unmap_extent_buffer(node, node->map_token, KM_USER1);
 		node->map_token = NULL;
 	}
-- 
cgit v1.1


From 3473f3c06a36865ae05993041fff35ee928342a7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Thu, 9 Jun 2011 10:15:17 -0400
Subject: Btrfs: unlock the trans lock properly

In btrfs_wait_for_commit if we came upon a transaction that had committed we
just exited, but that's bad since we are holding the trans_lock.  So break
instead so that the lock is dropped.  Thanks,

Reported-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/transaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dd71966..6b2e478 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -349,7 +349,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 					    list) {
 			if (t->in_commit) {
 				if (t->commit_done)
-					goto out;
+					break;
 				cur_trans = t;
 				atomic_inc(&cur_trans->use_count);
 				break;
-- 
cgit v1.1


From ad3e34bba4b64ab8e1f5ea1a17768e1a0d9648ea Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 8 Jun 2011 14:45:50 -0400
Subject: Btrfs: don't map extent buffer if path->skip_locking is set

Arne's scrub stuff exposed a problem with mapping the extent buffer in
reada_for_search.  He searches the commit root with multiple threads and with
skip_locking set, so we can race and overwrite node->map_token since node isn't
locked.  So fix this so that we only map the extent buffer if we don't already
have a map_token and skip_locking isn't set.  Without this patch scrub would
panic almost immediately, with the patch it doesn't panic anymore.  Thanks,

Reported-by: Arne Jansen <sensille@gmx.net>
Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/ctree.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d840893..2e66786 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1228,6 +1228,7 @@ static void reada_for_search(struct btrfs_root *root,
 	u32 nr;
 	u32 blocksize;
 	u32 nscan = 0;
+	bool map = true;
 
 	if (level != 1)
 		return;
@@ -1249,8 +1250,11 @@ static void reada_for_search(struct btrfs_root *root,
 
 	nritems = btrfs_header_nritems(node);
 	nr = slot;
+	if (node->map_token || path->skip_locking)
+		map = false;
+
 	while (1) {
-		if (!node->map_token) {
+		if (map && !node->map_token) {
 			unsigned long offset = btrfs_node_key_ptr_offset(nr);
 			map_private_extent_buffer(node, offset,
 						  sizeof(struct btrfs_key_ptr),
@@ -1277,7 +1281,7 @@ static void reada_for_search(struct btrfs_root *root,
 		if ((search <= target && target - search <= 65536) ||
 		    (search > target && search - target <= 65536)) {
 			gen = btrfs_node_ptr_generation(node, nr);
-			if (node->map_token) {
+			if (map && node->map_token) {
 				unmap_extent_buffer(node, node->map_token,
 						    KM_USER1);
 				node->map_token = NULL;
@@ -1289,7 +1293,7 @@ static void reada_for_search(struct btrfs_root *root,
 		if ((nread > 65536 || nscan > 32))
 			break;
 	}
-	if (node->map_token) {
+	if (map && node->map_token) {
 		unmap_extent_buffer(node, node->map_token, KM_USER1);
 		node->map_token = NULL;
 	}
-- 
cgit v1.1


From 8c51032f978bac5bec5dae0c5de4f85db97c1cc9 Mon Sep 17 00:00:00 2001
From: Arne Jansen <sensille@gmx.net>
Date: Fri, 3 Jun 2011 10:09:26 +0200
Subject: btrfs: scrub: errors in tree enumeration

due to the semantics of btrfs_search_slot the path can point to an
invalid slot when ret > 0. This condition went unnoticed, which in
turn could have led to an incomplete scrubbing.

Signed-off-by: Arne Jansen <sensille@gmx.net>
---
 fs/btrfs/scrub.c | 57 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 23 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index df50fd1..d5a4108 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -804,18 +804,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 
 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 		if (ret < 0)
-			goto out;
-
-		l = path->nodes[0];
-		slot = path->slots[0];
-		btrfs_item_key_to_cpu(l, &key, slot);
-		if (key.objectid != logical) {
-			ret = btrfs_previous_item(root, path, 0,
-						  BTRFS_EXTENT_ITEM_KEY);
-			if (ret < 0)
-				goto out;
-		}
+			goto out_noplug;
 
+		/*
+		 * we might miss half an extent here, but that doesn't matter,
+		 * as it's only the prefetch
+		 */
 		while (1) {
 			l = path->nodes[0];
 			slot = path->slots[0];
@@ -824,7 +818,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 				if (ret == 0)
 					continue;
 				if (ret < 0)
-					goto out;
+					goto out_noplug;
 
 				break;
 			}
@@ -906,15 +900,20 @@ again:
 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 		if (ret < 0)
 			goto out;
-
-		l = path->nodes[0];
-		slot = path->slots[0];
-		btrfs_item_key_to_cpu(l, &key, slot);
-		if (key.objectid != logical) {
+		if (ret > 0) {
 			ret = btrfs_previous_item(root, path, 0,
 						  BTRFS_EXTENT_ITEM_KEY);
 			if (ret < 0)
 				goto out;
+			if (ret > 0) {
+				/* there's no smaller item, so stick with the
+				 * larger one */
+				btrfs_release_path(path);
+				ret = btrfs_search_slot(NULL, root, &key,
+							path, 0, 0);
+				if (ret < 0)
+					goto out;
+			}
 		}
 
 		while (1) {
@@ -989,6 +988,7 @@ next:
 
 out:
 	blk_finish_plug(&plug);
+out_noplug:
 	btrfs_free_path(path);
 	return ret < 0 ? ret : 0;
 }
@@ -1064,8 +1064,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
 	while (1) {
 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 		if (ret < 0)
-			goto out;
-		ret = 0;
+			break;
+		if (ret > 0) {
+			if (path->slots[0] >=
+			    btrfs_header_nritems(path->nodes[0])) {
+				ret = btrfs_next_leaf(root, path);
+				if (ret)
+					break;
+			}
+		}
 
 		l = path->nodes[0];
 		slot = path->slots[0];
@@ -1075,7 +1082,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
 		if (found_key.objectid != sdev->dev->devid)
 			break;
 
-		if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+		if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
 			break;
 
 		if (found_key.offset >= end)
@@ -1104,7 +1111,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
 		cache = btrfs_lookup_block_group(fs_info, chunk_offset);
 		if (!cache) {
 			ret = -ENOENT;
-			goto out;
+			break;
 		}
 		ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
 				  chunk_offset, length);
@@ -1116,9 +1123,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
 		btrfs_release_path(path);
 	}
 
-out:
 	btrfs_free_path(path);
-	return ret;
+
+	/*
+	 * ret can still be 1 from search_slot or next_leaf,
+	 * that's not an error
+	 */
+	return ret < 0 ? ret : 0;
 }
 
 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
-- 
cgit v1.1


From 632dd772fcbde2ba37c0e8983bd38ef4a1eac906 Mon Sep 17 00:00:00 2001
From: Arne Jansen <sensille@gmx.net>
Date: Fri, 10 Jun 2011 12:07:07 +0200
Subject: btrfs: reinitialize scrub workers

Scrub starts the workers each time a scrub starts and stops them after it
finished. This patch adds an initialization for the workers before each
start, otherwise the workers behave strangely.

Signed-off-by: Arne Jansen <sensille@gmx.net>
---
 fs/btrfs/disk-io.c | 2 --
 fs/btrfs/scrub.c   | 6 +++++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a203d36..7bbbfeb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1668,8 +1668,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	init_waitqueue_head(&fs_info->scrub_pause_wait);
 	init_rwsem(&fs_info->scrub_super_lock);
 	fs_info->scrub_workers_refcnt = 0;
-	btrfs_init_workers(&fs_info->scrub_workers, "scrub",
-			   fs_info->thread_pool_size, &fs_info->generic_worker);
 
 	sb->s_blocksize = 4096;
 	sb->s_blocksize_bits = blksize_bits(4096);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index d5a4108..92cac19 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1166,8 +1166,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
 	mutex_lock(&fs_info->scrub_lock);
-	if (fs_info->scrub_workers_refcnt == 0)
+	if (fs_info->scrub_workers_refcnt == 0) {
+		btrfs_init_workers(&fs_info->scrub_workers, "scrub",
+			   fs_info->thread_pool_size, &fs_info->generic_worker);
+		fs_info->scrub_workers.idle_thresh = 4;
 		btrfs_start_workers(&fs_info->scrub_workers, 1);
+	}
 	++fs_info->scrub_workers_refcnt;
 	mutex_unlock(&fs_info->scrub_lock);
 
-- 
cgit v1.1


From 6eef3125886df260ca0e8758d141308152226f6a Mon Sep 17 00:00:00 2001
From: Arne Jansen <sensille@gmx.net>
Date: Fri, 10 Jun 2011 13:04:58 +0200
Subject: btrfs: remove unneeded includes from scrub.c

Signed-off-by: Arne Jansen <sensille@gmx.net>
---
 fs/btrfs/scrub.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 92cac19..a8d03d5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>
 #include <linux/blkdev.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
 #include "ctree.h"
 #include "volumes.h"
 #include "disk-io.h"
-- 
cgit v1.1


From 38e87880666091fe9c572a7a2ed2e771d97ca5aa Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 10 Jun 2011 16:36:57 -0400
Subject: Btrfs: make sure to recheck for bitmaps in clusters

Josef recently changed the free extent cache to look in
the block group cluster for any bitmaps before trying to
add a new bitmap for the same offset.  This avoids BUG_ON()s due
covering duplicate ranges.

But it didn't go quite far enough.  A given free range might span
between one or more bitmaps or free space entries.  The code has
looping to cover this, but it doesn't check for clustered bitmaps
every time.

This shuffles our gotos to check for a bitmap in the cluster
for every new bitmap entry we try to add.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/free-space-cache.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 38f3fd9..9f985a4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1492,7 +1492,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
 
 	if (ctl->op == &free_space_op)
 		block_group = ctl->private;
-
+again:
 	/*
 	 * Since we link bitmaps right into the cluster we need to see if we
 	 * have a cluster here, and if so and it has our bitmap we need to add
@@ -1510,13 +1510,13 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
 		node = rb_first(&cluster->root);
 		if (!node) {
 			spin_unlock(&cluster->lock);
-			goto again;
+			goto no_cluster_bitmap;
 		}
 
 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
 		if (!entry->bitmap) {
 			spin_unlock(&cluster->lock);
-			goto again;
+			goto no_cluster_bitmap;
 		}
 
 		if (entry->offset == offset_to_bitmap(ctl, offset)) {
@@ -1531,7 +1531,8 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
 			goto out;
 		}
 	}
-again:
+
+no_cluster_bitmap:
 	bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
 					 1, 0);
 	if (!bitmap_info) {
-- 
cgit v1.1


From 38e880540f983045da7a00fbc50daad238207fc5 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 10 Jun 2011 18:43:13 +0000
Subject: Btrfs: clear current->journal_info on async transaction commit

Normally current->jouranl_info is cleared by commit_transaction.  For an
async snap or subvol creation, though, it runs in a work queue.  Clear
it in btrfs_commit_transaction_async() to avoid leaking a non-NULL
journal_info when we return to userspace.  When the actual commit runs in
the other thread it won't care that it's current->journal_info is already
NULL.

Signed-off-by: Sage Weil <sage@newdream.net>
Tested-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/transaction.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6b2e478..2b3590b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1118,8 +1118,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 		wait_current_trans_commit_start_and_unblock(root, cur_trans);
 	else
 		wait_current_trans_commit_start(root, cur_trans);
-	put_transaction(cur_trans);
 
+	if (current->journal_info == trans)
+		current->journal_info = NULL;
+
+	put_transaction(cur_trans);
 	return 0;
 }
 
-- 
cgit v1.1


From 9eb9104c665aae2401a1723c044669eb10240072 Mon Sep 17 00:00:00 2001
From: richard kennedy <richard@rsk.demon.co.uk>
Date: Tue, 7 Jun 2011 10:46:32 +0000
Subject: btrfs: remove 64bit alignment padding to allow extent_buffer to fit
 into one fewer cacheline

Reorder extent_buffer to remove 8 bytes of alignment padding on 64 bit
builds. This shrinks its size to 128 bytes allowing it to fit into one
fewer cache lines and allows more objects per slab in its kmem_cache.

slabinfo extent_buffer reports :-

 before:-
    Sizes (bytes)     Slabs
    ----------------------------------
    Object :     136  Total  :     123
    SlabObj:     136  Full   :     121
    SlabSiz:    4096  Partial:       0
    Loss   :       0  CpuSlab:       2
    Align  :       8  Objects:      30

 after :-
    Object :     128  Total  :       4
    SlabObj:     128  Full   :       2
    SlabSiz:    4096  Partial:       0
    Loss   :       0  CpuSlab:       2
    Align  :       8  Objects:      32

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent_io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a..a11a92e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -126,9 +126,9 @@ struct extent_buffer {
 	unsigned long map_len;
 	struct page *first_page;
 	unsigned long bflags;
-	atomic_t refs;
 	struct list_head leak_list;
 	struct rcu_head rcu_head;
+	atomic_t refs;
 
 	/* the spinlock is used to protect most operations */
 	spinlock_t lock;
-- 
cgit v1.1


From 027ed2f0044e95a97ed34db2d55a9ca95ba84385 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 8 Jun 2011 08:27:56 +0000
Subject: Btrfs: avoid stack bloat in btrfs_ioctl_fs_info()

The size of struct btrfs_ioctl_fs_info_args is as big as 1KB, so
don't declare the variable on stack.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ac37040..b793d11 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2054,29 +2054,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
 
 static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
 {
-	struct btrfs_ioctl_fs_info_args fi_args;
+	struct btrfs_ioctl_fs_info_args *fi_args;
 	struct btrfs_device *device;
 	struct btrfs_device *next;
 	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+	int ret = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	fi_args.num_devices = fs_devices->num_devices;
-	fi_args.max_id = 0;
-	memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid));
+	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
+	if (!fi_args)
+		return -ENOMEM;
+
+	fi_args->num_devices = fs_devices->num_devices;
+	memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
 
 	mutex_lock(&fs_devices->device_list_mutex);
 	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
-		if (device->devid > fi_args.max_id)
-			fi_args.max_id = device->devid;
+		if (device->devid > fi_args->max_id)
+			fi_args->max_id = device->devid;
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
 
-	if (copy_to_user(arg, &fi_args, sizeof(fi_args)))
-		return -EFAULT;
+	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
+		ret = -EFAULT;
 
-	return 0;
+	kfree(fi_args);
+	return ret;
 }
 
 static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
-- 
cgit v1.1


From 5be76758f35ec6578e5b9b150aa513ac26bd9c54 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Thu, 9 Jun 2011 10:02:51 +0000
Subject: btrfs: fix unlocked access of delalloc_inodes

list_splice_init will make delalloc_inodes empty, but without a spinlock
around, this may produce corrupted list head, accessed in many placess,
The race window is very tight and nobody seems to have hit it so far.

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a203d36..33b744a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2911,9 +2911,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
 
 	INIT_LIST_HEAD(&splice);
 
-	list_splice_init(&root->fs_info->delalloc_inodes, &splice);
-
 	spin_lock(&root->fs_info->delalloc_lock);
+	list_splice_init(&root->fs_info->delalloc_inodes, &splice);
 
 	while (!list_empty(&splice)) {
 		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
-- 
cgit v1.1


From 08d2f347e877e489ca098c87a6fd2e872fef9767 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Wed, 4 May 2011 16:18:50 +0200
Subject: Btrfs: fix extent state leak on failed nodatasum reads

When encountering an EIO while reading from a nodatasum extent, we
insert an error record into the inode's failure tree.
btrfs_readpage_end_io_hook returns early for nodatasum inodes. We'd
better clear the failure tree in that case, otherwise the kernel
complains about

	BUG extent_state: Objects remaining on kmem_cache_close()

on rmmod.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 02ff4a1..113913a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1986,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	}
 
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
-		return 0;
+		goto good;
 
 	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
 	    test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
-- 
cgit v1.1


From 22b63a2971c5657dfc1bf4514f9410fc90c8b2c2 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 9 Feb 2011 16:05:31 +0200
Subject: Btrfs - use %pU to print fsid

Get rid of FIXME comment.  Uuids from dmesg are now the same as uuids
given by btrfs-progs.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index da541df..1efa56e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 	transid = btrfs_super_generation(disk_super);
 	if (disk_super->label[0])
 		printk(KERN_INFO "device label %s ", disk_super->label);
-	else {
-		/* FIXME, make a readl uuid parser */
-		printk(KERN_INFO "device fsid %llx-%llx ",
-		       *(unsigned long long *)disk_super->fsid,
-		       *(unsigned long long *)(disk_super->fsid + 8));
-	}
+	else
+		printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
 	printk(KERN_CONT "devid %llu transid %llu %s\n",
 	       (unsigned long long)devid, (unsigned long long)transid, path);
 	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
-- 
cgit v1.1


From 30b4caf5d73af5c99cf1b2b46496d8bc35330992 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 8 Jun 2011 03:56:44 +0000
Subject: Btrfs: use join_transaction in btrfs_evict_inode()

The WARN_ON() in start_transaction() was triggered while balancing.

The cause is btrfs_relocate_chunk() started a transaction and
then called iput() on the inode that stores free space cache,
and iput() called btrfs_start_transaction() again.

Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 113913a..c15636b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3646,7 +3646,7 @@ void btrfs_evict_inode(struct inode *inode)
 	btrfs_i_size_write(inode, 0);
 
 	while (1) {
-		trans = btrfs_start_transaction(root, 0);
+		trans = btrfs_join_transaction(root);
 		BUG_ON(IS_ERR(trans));
 		trans->block_rsv = root->orphan_block_rsv;
 
-- 
cgit v1.1


From ac08aedfa5d3de0dcb3825b598d16c2e57991f54 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 13 Jun 2011 11:28:50 -0400
Subject: Btrfs: check the return value from set_anon_super

Al Viro noticed we weren't checking for set_anon_super failures.  This
adds the required checks.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9f68c68..20c111b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1312,7 +1312,9 @@ again:
 	spin_lock_init(&root->cache_lock);
 	init_waitqueue_head(&root->cache_wait);
 
-	set_anon_super(&root->anon_super, NULL);
+	ret = set_anon_super(&root->anon_super, NULL);
+	if (ret)
+		goto fail;
 
 	if (btrfs_root_refs(&root->root_item) == 0) {
 		ret = -ENOENT;
-- 
cgit v1.1


From f4c44016218a6fce357715b9bbabbbbe1f69853c Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 13 Jun 2011 11:30:47 -0400
Subject: Btrfs: drop the delalloc_bytes check in shrink_delalloc

Even when delalloc_bytes is zero, we may need to sleep while waiting
for delalloc space.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2..1f61bf5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 	if (reserved == 0)
 		return 0;
 
-	/* nothing to shrink - nothing to reclaim */
-	if (root->fs_info->delalloc_bytes == 0)
-		return 0;
-
 	max_reclaim = min(reserved, to_reclaim);
 
 	while (loops < 1024) {
-- 
cgit v1.1


From 71d7aed014457147e8f71a843d5fbf03235e4a85 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 14 Jun 2011 14:24:32 -0400
Subject: Btrfs: fix path leakage on subvol deletion

The delayed ref patch accidently removed the btrfs_free_path in
btrfs_unlink_subvol, this puts it back and means we don't leak a path.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c15636b..5813dec 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 	ret = btrfs_update_inode(trans, root, dir);
 	BUG_ON(ret);
 
+	btrfs_free_path(path);
 	return 0;
 }
 
-- 
cgit v1.1


From 8351583e3f6e430ce8f71913909a96ad5cc6a2f6 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 14 Jun 2011 15:16:14 -0400
Subject: Btrfs: protect the pending_snapshots list with trans_lock

Currently there is nothing protecting the pending_snapshots list on the
transaction.  We only hold the directory mutex that we are snapshotting and a
read lock on the subvol_sem, so we could race with somebody else creating a
snapshot in a different directory and end up with list corruption.  So protect
this list with the trans_lock.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b793d11..a3c4751 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 	ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
 	BUG_ON(ret);
 
+	spin_lock(&root->fs_info->trans_lock);
 	list_add(&pending_snapshot->list,
 		 &trans->transaction->pending_snapshots);
+	spin_unlock(&root->fs_info->trans_lock);
 	if (async_transid) {
 		*async_transid = trans->transid;
 		ret = btrfs_commit_transaction_async(trans,
-- 
cgit v1.1


From ed0ca14021e5ae3147602128641aa7f742ab227c Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 14 Jun 2011 16:22:15 -0400
Subject: Btrfs: set no_trans_join after trying to expand the transaction

We can lockup if we try to allow new writers join the transaction and we have
flushoncommit set or have a pending snapshot.  This is because we set
no_trans_join and then loop around and try to wait for ordered extents again.
The problem is the ordered endio stuff needs to join the transaction, which it
can't do because no_trans_join is set.  So instead wait until after this loop to
set no_trans_join and then make sure to wait for num_writers == 1 in case
anybody got started in between us exiting the loop and setting no_trans_join.
This could easily be reproduced by mounting -o flushoncommit and running xfstest
13.  It cannot be reproduced with this patch.  Thanks,

Reported-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/transaction.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b..5669559 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1241,12 +1241,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			schedule_timeout(1);
 
 		finish_wait(&cur_trans->writer_wait, &wait);
-		spin_lock(&root->fs_info->trans_lock);
-		root->fs_info->trans_no_join = 1;
-		spin_unlock(&root->fs_info->trans_lock);
 	} while (atomic_read(&cur_trans->num_writers) > 1 ||
 		 (should_grow && cur_trans->num_joined != joined));
 
+	/*
+	 * Ok now we need to make sure to block out any other joins while we
+	 * commit the transaction.  We could have started a join before setting
+	 * no_join so make sure to wait for num_writers to == 1 again.
+	 */
+	spin_lock(&root->fs_info->trans_lock);
+	root->fs_info->trans_no_join = 1;
+	spin_unlock(&root->fs_info->trans_lock);
+	wait_event(cur_trans->writer_wait,
+		   atomic_read(&cur_trans->num_writers) == 1);
+
 	ret = create_pending_snapshots(trans, root->fs_info);
 	BUG_ON(ret);
 
-- 
cgit v1.1


From 7585717f304f5ed005cc4ad933a69aab3efbd136 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 13 Jun 2011 20:00:16 -0400
Subject: Btrfs: fix relocation races

The recent commit to get rid of our trans_mutex introduced
some races with block group relocation.  The problem is that relocation
needs to do some record keeping about each root, and it was relying
on the transaction mutex to coordinate things in subtle ways.

This fix adds a mutex just for the relocation code and makes sure
it doesn't have a big impact on normal operations.  The race is
really fixed in btrfs_record_root_in_trans, which is where we
step back and wait for the relocation code to finish accounting
setup.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h       | 14 ++++++++++
 fs/btrfs/disk-io.c     |  1 +
 fs/btrfs/relocation.c  | 30 ++++++++++++++-------
 fs/btrfs/transaction.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 105 insertions(+), 13 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8490ee0..a2c91a1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -967,6 +967,12 @@ struct btrfs_fs_info {
 	struct srcu_struct subvol_srcu;
 
 	spinlock_t trans_lock;
+	/*
+	 * the reloc mutex goes with the trans lock, it is taken
+	 * during commit to protect us from the relocation code
+	 */
+	struct mutex reloc_mutex;
+
 	struct list_head trans_list;
 	struct list_head hashers;
 	struct list_head dead_roots;
@@ -1172,6 +1178,14 @@ struct btrfs_root {
 	u32 type;
 
 	u64 highest_objectid;
+
+	/* btrfs_record_root_in_trans is a multi-step process,
+	 * and it can race with the balancing code.   But the
+	 * race is very small, and only the first time the root
+	 * is added to each transaction.  So in_trans_setup
+	 * is used to tell us when more checks are required
+	 */
+	unsigned long in_trans_setup;
 	int ref_cows;
 	int track_dirty;
 	int in_radix;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 20c111b..0b2b4b7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1620,6 +1620,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	spin_lock_init(&fs_info->fs_roots_radix_lock);
 	spin_lock_init(&fs_info->delayed_iput_lock);
 	spin_lock_init(&fs_info->defrag_inodes_lock);
+	mutex_init(&fs_info->reloc_mutex);
 
 	init_completion(&fs_info->kobj_unregister);
 	fs_info->tree_root = tree_root;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f25b10a..086b1e6 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
 	int ret;
 
 	if (!root->reloc_root)
-		return 0;
+		goto out;
 
 	reloc_root = root->reloc_root;
 	root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
 	ret = btrfs_update_root(trans, root->fs_info->tree_root,
 				&reloc_root->root_key, root_item);
 	BUG_ON(ret);
+
+out:
 	return 0;
 }
 
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
 	u64 num_bytes = 0;
 	int ret;
 
-	spin_lock(&root->fs_info->trans_lock);
+	mutex_lock(&root->fs_info->reloc_mutex);
 	rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
 	rc->merging_rsv_size += rc->nodes_relocated * 2;
-	spin_unlock(&root->fs_info->trans_lock);
+	mutex_unlock(&root->fs_info->reloc_mutex);
+
 again:
 	if (!err) {
 		num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
 	int ret;
 again:
 	root = rc->extent_root;
-	spin_lock(&root->fs_info->trans_lock);
+
+	/*
+	 * this serializes us with btrfs_record_root_in_transaction,
+	 * we have to make sure nobody is in the middle of
+	 * adding their roots to the list while we are
+	 * doing this splice
+	 */
+	mutex_lock(&root->fs_info->reloc_mutex);
 	list_splice_init(&rc->reloc_roots, &reloc_roots);
-	spin_unlock(&root->fs_info->trans_lock);
+	mutex_unlock(&root->fs_info->reloc_mutex);
 
 	while (!list_empty(&reloc_roots)) {
 		found = 1;
@@ -3590,17 +3600,19 @@ next:
 static void set_reloc_control(struct reloc_control *rc)
 {
 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-	spin_lock(&fs_info->trans_lock);
+
+	mutex_lock(&fs_info->reloc_mutex);
 	fs_info->reloc_ctl = rc;
-	spin_unlock(&fs_info->trans_lock);
+	mutex_unlock(&fs_info->reloc_mutex);
 }
 
 static void unset_reloc_control(struct reloc_control *rc)
 {
 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-	spin_lock(&fs_info->trans_lock);
+
+	mutex_lock(&fs_info->reloc_mutex);
 	fs_info->reloc_ctl = NULL;
-	spin_unlock(&fs_info->trans_lock);
+	mutex_unlock(&fs_info->reloc_mutex);
 }
 
 static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b..833996a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
  * to make sure the old root from before we joined the transaction is deleted
  * when the transaction commits
  */
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
 {
 	if (root->ref_cows && root->last_trans < trans->transid) {
 		WARN_ON(root == root->fs_info->extent_root);
 		WARN_ON(root->commit_root != root->node);
 
+		/*
+		 * see below for in_trans_setup usage rules
+		 * we have the reloc mutex held now, so there
+		 * is only one writer in this function
+		 */
+		root->in_trans_setup = 1;
+
+		/* make sure readers find in_trans_setup before
+		 * they find our root->last_trans update
+		 */
+		smp_wmb();
+
 		spin_lock(&root->fs_info->fs_roots_radix_lock);
 		if (root->last_trans == trans->transid) {
 			spin_unlock(&root->fs_info->fs_roots_radix_lock);
 			return 0;
 		}
-		root->last_trans = trans->transid;
 		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
 			   (unsigned long)root->root_key.objectid,
 			   BTRFS_ROOT_TRANS_TAG);
 		spin_unlock(&root->fs_info->fs_roots_radix_lock);
+		root->last_trans = trans->transid;
+
+		/* this is pretty tricky.  We don't want to
+		 * take the relocation lock in btrfs_record_root_in_trans
+		 * unless we're really doing the first setup for this root in
+		 * this transaction.
+		 *
+		 * Normally we'd use root->last_trans as a flag to decide
+		 * if we want to take the expensive mutex.
+		 *
+		 * But, we have to set root->last_trans before we
+		 * init the relocation root, otherwise, we trip over warnings
+		 * in ctree.c.  The solution used here is to flag ourselves
+		 * with root->in_trans_setup.  When this is 1, we're still
+		 * fixing up the reloc trees and everyone must wait.
+		 *
+		 * When this is zero, they can trust root->last_trans and fly
+		 * through btrfs_record_root_in_trans without having to take the
+		 * lock.  smp_wmb() makes sure that all the writes above are
+		 * done before we pop in the zero below
+		 */
 		btrfs_init_reloc_root(trans, root);
+		smp_wmb();
+		root->in_trans_setup = 0;
 	}
 	return 0;
 }
 
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root)
+{
+	if (!root->ref_cows)
+		return 0;
+
+	/*
+	 * see record_root_in_trans for comments about in_trans_setup usage
+	 * and barriers
+	 */
+	smp_rmb();
+	if (root->last_trans == trans->transid &&
+	    !root->in_trans_setup)
+		return 0;
+
+	mutex_lock(&root->fs_info->reloc_mutex);
+	record_root_in_trans(trans, root);
+	mutex_unlock(&root->fs_info->reloc_mutex);
+
+	return 0;
+}
+
 /* wait for commit against the current transaction to become unblocked
  * when this is done, it is safe to start a new transaction, but the current
  * transaction might not be fully on disk.
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	parent = dget_parent(dentry);
 	parent_inode = parent->d_inode;
 	parent_root = BTRFS_I(parent_inode)->root;
-	btrfs_record_root_in_trans(trans, parent_root);
+	record_root_in_trans(trans, parent_root);
 
 	/*
 	 * insert the directory item
@@ -900,7 +957,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	ret = btrfs_update_inode(trans, parent_root, parent_inode);
 	BUG_ON(ret);
 
-	btrfs_record_root_in_trans(trans, root);
+	record_root_in_trans(trans, root);
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
 	btrfs_check_and_init_root_item(new_root_item);
@@ -1247,6 +1304,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	} while (atomic_read(&cur_trans->num_writers) > 1 ||
 		 (should_grow && cur_trans->num_joined != joined));
 
+	/*
+	 * the reloc mutex makes sure that we stop
+	 * the balancing code from coming in and moving
+	 * extents around in the middle of the commit
+	 */
+	mutex_lock(&root->fs_info->reloc_mutex);
+
 	ret = create_pending_snapshots(trans, root->fs_info);
 	BUG_ON(ret);
 
@@ -1312,6 +1376,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	root->fs_info->running_transaction = NULL;
 	root->fs_info->trans_no_join = 0;
 	spin_unlock(&root->fs_info->trans_lock);
+	mutex_unlock(&root->fs_info->reloc_mutex);
 
 	wake_up(&root->fs_info->transaction_wait);
 
-- 
cgit v1.1


From 3ed4498caf381a73d6259d3ffacc914b17a507ec Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Mon, 13 Jun 2011 17:54:22 +0000
Subject: btrfs: fix dereference of ERR_PTR value

smatch reports:

btrfs_recover_log_trees error: 'wc.replay_dest' dereferencing
possible ERR_PTR()

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/tree-log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c..4ce8a9f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
 		tmp_key.offset = (u64)-1;
 
 		wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
-		BUG_ON(!wc.replay_dest);
+		BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
 
 		wc.replay_dest->log_root = log;
 		btrfs_record_root_in_trans(trans, wc.replay_dest);
-- 
cgit v1.1


From 9fe6a50fb764f508dd2de47a66e62e51388791fb Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Date: Thu, 16 Jun 2011 09:04:57 +0000
Subject: btrfs: Remove unused sysfs code

Removes code no longer used. The sysfs file itself is kept, because the
btrfs developers expressed interest in putting new entries to sysfs.

Signed-off-by: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h   |   1 -
 fs/btrfs/disk-io.c |   1 -
 fs/btrfs/sysfs.c   | 146 -----------------------------------------------------
 3 files changed, 148 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a2c91a1..8e948ec 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1195,7 +1195,6 @@ struct btrfs_root {
 	struct btrfs_key defrag_max;
 	int defrag_running;
 	char *name;
-	int in_sysfs;
 
 	/* the dirty list is only used by non-reference counted roots */
 	struct list_head dirty_list;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0b2b4b7..c25ef5a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 	root->last_trans = 0;
 	root->highest_objectid = 0;
 	root->name = NULL;
-	root->in_sysfs = 0;
 	root->inode_tree = RB_ROOT;
 	INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
 	root->block_rsv = NULL;
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223a..daac9ae 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_root_used(&root->root_item));
-}
-
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_root_limit(&root->root_item));
-}
-
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct btrfs_root *, char *);
-	ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
-							      show, store)
-
-ROOT_ATTR(blocks_used,	0444,	root_blocks_used_show,	NULL);
-ROOT_ATTR(block_limit,	0644,	root_block_limit_show,	NULL);
-
-static struct attribute *btrfs_root_attrs[] = {
-	&btrfs_root_attr_blocks_used.attr,
-	&btrfs_root_attr_block_limit.attr,
-	NULL,
-};
-
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct btrfs_fs_info *, char *);
-	ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
-								show, store)
-
-SUPER_ATTR(blocks_used,		0444,	super_blocks_used_show,		NULL);
-SUPER_ATTR(total_blocks,	0444,	super_total_blocks_show,	NULL);
-SUPER_ATTR(blocksize,		0444,	super_blocksize_show,		NULL);
-
-static struct attribute *btrfs_super_attrs[] = {
-	&btrfs_super_attr_blocks_used.attr,
-	&btrfs_super_attr_total_blocks.attr,
-	&btrfs_super_attr_blocksize.attr,
-	NULL,
-};
-
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
-				    struct attribute *attr, char *buf)
-{
-	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-						super_kobj);
-	struct btrfs_super_attr *a = container_of(attr,
-						  struct btrfs_super_attr,
-						  attr);
-
-	return a->show ? a->show(fs, buf) : 0;
-}
-
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
-				     struct attribute *attr,
-				     const char *buf, size_t len)
-{
-	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-						super_kobj);
-	struct btrfs_super_attr *a = container_of(attr,
-						  struct btrfs_super_attr,
-						  attr);
-
-	return a->store ? a->store(fs, buf, len) : 0;
-}
-
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
-				    struct attribute *attr, char *buf)
-{
-	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-						root_kobj);
-	struct btrfs_root_attr *a = container_of(attr,
-						 struct btrfs_root_attr,
-						 attr);
-
-	return a->show ? a->show(root, buf) : 0;
-}
-
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
-				     struct attribute *attr,
-				     const char *buf, size_t len)
-{
-	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-						root_kobj);
-	struct btrfs_root_attr *a = container_of(attr,
-						 struct btrfs_root_attr,
-						 attr);
-	return a->store ? a->store(root, buf, len) : 0;
-}
-
-static void btrfs_super_release(struct kobject *kobj)
-{
-	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-						super_kobj);
-	complete(&fs->kobj_unregister);
-}
-
-static void btrfs_root_release(struct kobject *kobj)
-{
-	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-						root_kobj);
-	complete(&root->kobj_unregister);
-}
-
-static const struct sysfs_ops btrfs_super_attr_ops = {
-	.show	= btrfs_super_attr_show,
-	.store	= btrfs_super_attr_store,
-};
-
-static const struct sysfs_ops btrfs_root_attr_ops = {
-	.show	= btrfs_root_attr_show,
-	.store	= btrfs_root_attr_store,
-};
-
 /* /sys/fs/btrfs/ entry */
 static struct kset *btrfs_kset;
 
-- 
cgit v1.1


From 19fd294957e426bfdd8e19085096467ec18df5c4 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 15 Jun 2011 10:47:30 +0000
Subject: btrfs: fix wrong reservation when doing delayed inode operations

We have migrated the space for the delayed inode items from
trans_block_rsv to global_block_rsv, but we forgot to set trans->block_rsv to
global_block_rsv when we doing delayed inode operations, and the following Oops
happened:

[ 9792.654889] ------------[ cut here ]------------
[ 9792.654898] WARNING: at fs/btrfs/extent-tree.c:5681
btrfs_alloc_free_block+0xca/0x27c [btrfs]()
[ 9792.654899] Hardware name: To Be Filled By O.E.M.
[ 9792.654900] Modules linked in: btrfs zlib_deflate libcrc32c
ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables
arc4 rt61pci rt2x00pci rt2x00lib snd_hda_codec_hdmi mac80211
snd_hda_codec_realtek cfg80211 snd_hda_intel edac_core snd_seq rfkill
pcspkr serio_raw snd_hda_codec eeprom_93cx6 edac_mce_amd sp5100_tco
i2c_piix4 k10temp snd_hwdep snd_seq_device snd_pcm floppy r8169 xhci_hcd
mii snd_timer snd soundcore snd_page_alloc ipv6 firewire_ohci pata_acpi
ata_generic firewire_core pata_via crc_itu_t radeon ttm drm_kms_helper
drm i2c_algo_bit i2c_core [last unloaded: scsi_wait_scan]
[ 9792.654919] Pid: 2762, comm: rm Tainted: G        W   2.6.39+ #1
[ 9792.654920] Call Trace:
[ 9792.654922]  [<ffffffff81053c4a>] warn_slowpath_common+0x83/0x9b
[ 9792.654925]  [<ffffffff81053c7c>] warn_slowpath_null+0x1a/0x1c
[ 9792.654933]  [<ffffffffa038e747>] btrfs_alloc_free_block+0xca/0x27c [btrfs]
[ 9792.654945]  [<ffffffffa03b8562>] ? map_extent_buffer+0x6e/0xa8 [btrfs]
[ 9792.654953]  [<ffffffffa038189b>] __btrfs_cow_block+0xfc/0x30c [btrfs]
[ 9792.654963]  [<ffffffffa0396aa6>] ? btrfs_buffer_uptodate+0x47/0x58 [btrfs]
[ 9792.654970]  [<ffffffffa0382e48>] ? read_block_for_search+0x94/0x368 [btrfs]
[ 9792.654978]  [<ffffffffa0381ba9>] btrfs_cow_block+0xfe/0x146 [btrfs]
[ 9792.654986]  [<ffffffffa03848b0>] btrfs_search_slot+0x14d/0x4b6 [btrfs]
[ 9792.654997]  [<ffffffffa03b8562>] ? map_extent_buffer+0x6e/0xa8 [btrfs]
[ 9792.655022]  [<ffffffffa03938e8>] btrfs_lookup_inode+0x2f/0x8f [btrfs]
[ 9792.655025]  [<ffffffff8147afac>] ? _cond_resched+0xe/0x22
[ 9792.655027]  [<ffffffff8147b892>] ? mutex_lock+0x29/0x50
[ 9792.655039]  [<ffffffffa03d41b1>] btrfs_update_delayed_inode+0x72/0x137 [btrfs]
[ 9792.655051]  [<ffffffffa03d4ea2>] btrfs_run_delayed_items+0x90/0xdb [btrfs]
[ 9792.655062]  [<ffffffffa039a69b>] btrfs_commit_transaction+0x228/0x654 [btrfs]
[ 9792.655064]  [<ffffffff8106e8da>] ? remove_wait_queue+0x3a/0x3a
[ 9792.655075]  [<ffffffffa03a2fa5>] btrfs_evict_inode+0x14d/0x202 [btrfs]
[ 9792.655077]  [<ffffffff81132bd6>] evict+0x71/0x111
[ 9792.655079]  [<ffffffff81132de0>] iput+0x12a/0x132
[ 9792.655081]  [<ffffffff8112aa3a>] do_unlinkat+0x106/0x155
[ 9792.655083]  [<ffffffff81127b83>] ? path_put+0x1f/0x23
[ 9792.655085]  [<ffffffff8109c53c>] ? audit_syscall_entry+0x145/0x171
[ 9792.655087]  [<ffffffff81128410>] ? putname+0x34/0x36
[ 9792.655090]  [<ffffffff8112b441>] sys_unlinkat+0x29/0x2b
[ 9792.655092]  [<ffffffff81482c42>] system_call_fastpath+0x16/0x1b
[ 9792.655093] ---[ end trace 02b696eb02b3f768 ]---

This patch fix it by setting the reservation of the transaction handle to the
correct one.

Reported-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/delayed-inode.c | 25 ++++++++++++++++++++-----
 fs/btrfs/delayed-inode.h |  1 -
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29..fc515b7 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 		item->data_len = data_len;
 		item->ins_or_del = 0;
 		item->bytes_reserved = 0;
-		item->block_rsv = NULL;
 		item->delayed_node = NULL;
 		atomic_set(&item->refs, 1);
 	}
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 
 	num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
-	if (!ret) {
+	if (!ret)
 		item->bytes_reserved = num_bytes;
-		item->block_rsv = dst_rsv;
-	}
 
 	return ret;
 }
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 						struct btrfs_delayed_item *item)
 {
+	struct btrfs_block_rsv *rsv;
+
 	if (!item->bytes_reserved)
 		return;
 
-	btrfs_block_rsv_release(root, item->block_rsv,
+	rsv = &root->fs_info->global_block_rsv;
+	btrfs_block_rsv_release(root, rsv,
 				item->bytes_reserved);
 }
 
@@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
 	struct btrfs_delayed_root *delayed_root;
 	struct btrfs_delayed_node *curr_node, *prev_node;
 	struct btrfs_path *path;
+	struct btrfs_block_rsv *block_rsv;
 	int ret = 0;
 
 	path = btrfs_alloc_path();
@@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 	path->leave_spinning = 1;
 
+	block_rsv = trans->block_rsv;
+	trans->block_rsv = &root->fs_info->global_block_rsv;
+
 	delayed_root = btrfs_get_delayed_root(root);
 
 	curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
 	}
 
 	btrfs_free_path(path);
+	trans->block_rsv = block_rsv;
 	return ret;
 }
 
@@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 					      struct btrfs_delayed_node *node)
 {
 	struct btrfs_path *path;
+	struct btrfs_block_rsv *block_rsv;
 	int ret;
 
 	path = btrfs_alloc_path();
@@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 	path->leave_spinning = 1;
 
+	block_rsv = trans->block_rsv;
+	trans->block_rsv = &node->root->fs_info->global_block_rsv;
+
 	ret = btrfs_insert_delayed_items(trans, path, node->root, node);
 	if (!ret)
 		ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 		ret = btrfs_update_delayed_inode(trans, node->root, path, node);
 	btrfs_free_path(path);
 
+	trans->block_rsv = block_rsv;
 	return ret;
 }
 
@@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
 	struct btrfs_path *path;
 	struct btrfs_delayed_node *delayed_node = NULL;
 	struct btrfs_root *root;
+	struct btrfs_block_rsv *block_rsv;
 	unsigned long nr = 0;
 	int need_requeue = 0;
 	int ret;
@@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
 	if (IS_ERR(trans))
 		goto free_path;
 
+	block_rsv = trans->block_rsv;
+	trans->block_rsv = &root->fs_info->global_block_rsv;
+
 	ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
 	if (!ret)
 		ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
 
 	nr = trans->blocks_used;
 
+	trans->block_rsv = block_rsv;
 	btrfs_end_transaction_dmeta(trans, root);
 	__btrfs_btree_balance_dirty(root, nr);
 free_path:
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240..cb79b67 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
 	struct list_head tree_list;	/* used for batch insert/delete items */
 	struct list_head readdir_list;	/* used for readdir items */
 	u64 bytes_reserved;
-	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_delayed_node *delayed_node;
 	atomic_t refs;
 	int ins_or_del;
-- 
cgit v1.1


From 35a30d7ce54e087d8025a725d4e5a2fdee723a9f Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Mon, 13 Jun 2011 15:18:23 +0000
Subject: btrfs: fix uninitialized return value

When allocation fails in btrfs_read_fs_root_no_name, ret is not set
although it is returned, holding a garbage value.

Signed-off-by: David Sterba <dsterba@suse.cz>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c25ef5a..1ac8db5d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1299,12 +1299,12 @@ again:
 		return root;
 
 	root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
-	if (!root->free_ino_ctl)
-		goto fail;
 	root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
 					GFP_NOFS);
-	if (!root->free_ino_pinned)
+	if (!root->free_ino_pinned || !root->free_ino_ctl) {
+		ret = -ENOMEM;
 		goto fail;
+	}
 
 	btrfs_init_free_ino_ctl(root);
 	mutex_init(&root->fs_commit_mutex);
-- 
cgit v1.1


From e999376f094162aa425ae749aa1df95ab928d010 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 17 Jun 2011 16:14:09 -0400
Subject: Btrfs: avoid delayed metadata items during commits

Snapshot creation has two phases.  One is the initial snapshot setup,
and the second is done during commit, while nobody is allowed to modify
the root we are snapshotting.

The delayed metadata insertion code can break that rule, it does a
delayed inode update on the inode of the parent of the snapshot,
and delayed directory item insertion.

This makes sure to run the pending delayed operations before we
record the snapshot root, which avoids corruptions.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/delayed-inode.c |  7 +++++++
 fs/btrfs/delayed-inode.h |  4 ++++
 fs/btrfs/transaction.c   | 27 +++++++++++++++++----------
 3 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index fc515b7..f1cbd02 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1237,6 +1237,13 @@ again:
 	return 0;
 }
 
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+	struct btrfs_delayed_root *delayed_root;
+	delayed_root = btrfs_get_delayed_root(root);
+	WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
 	struct btrfs_delayed_root *delayed_root;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index cb79b67..d1a6a29 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
 /* for init */
 int __init btrfs_delayed_inode_init(void);
 void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
 #endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c073d85..51dcec8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -957,6 +957,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	ret = btrfs_update_inode(trans, parent_root, parent_inode);
 	BUG_ON(ret);
 
+	/*
+	 * pull in the delayed directory update
+	 * and the delayed inode item
+	 * otherwise we corrupt the FS during
+	 * snapshot
+	 */
+	ret = btrfs_run_delayed_items(trans, root);
+	BUG_ON(ret);
+
 	record_root_in_trans(trans, root);
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
@@ -1018,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 	int ret;
 
 	list_for_each_entry(pending, head, list) {
-		/*
-		 * We must deal with the delayed items before creating
-		 * snapshots, or we will create a snapthot with inconsistent
-		 * information.
-		*/
-		ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
-		BUG_ON(ret);
-
 		ret = create_pending_snapshot(trans, fs_info, pending);
 		BUG_ON(ret);
 	}
@@ -1319,15 +1320,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	 */
 	mutex_lock(&root->fs_info->reloc_mutex);
 
-	ret = create_pending_snapshots(trans, root->fs_info);
+	ret = btrfs_run_delayed_items(trans, root);
 	BUG_ON(ret);
 
-	ret = btrfs_run_delayed_items(trans, root);
+	ret = create_pending_snapshots(trans, root->fs_info);
 	BUG_ON(ret);
 
 	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 	BUG_ON(ret);
 
+	/*
+	 * make sure none of the code above managed to slip in a
+	 * delayed item
+	 */
+	btrfs_assert_delayed_root_empty(root);
+
 	WARN_ON(cur_trans != trans->transaction);
 
 	btrfs_scrub_pause(root);
-- 
cgit v1.1


From 46e4edbf7ea9cf26665eb9f90c0fc7688d1a51ed Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Thu, 23 Jun 2011 23:59:32 +0200
Subject: Remove unneeded version.h includes from fs/

It was pointed out by 'make versioncheck' that some includes of
linux/version.h were not needed in fs/ (fs/btrfs/ctree.h and
fs/omfs/file.c).

This patch removes them.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Acked-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/ctree.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3006287..f30ac05 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,7 +19,6 @@
 #ifndef __BTRFS_CTREE__
 #define __BTRFS_CTREE__
 
-#include <linux/version.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/fs.h>
-- 
cgit v1.1


From 1973f0faeb4a5f35597793c65d3c94d8fd386e10 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 24 Jun 2011 13:13:29 -0400
Subject: Btrfs: make sure to record the transid in new inodes

When we create a new inode, we aren't filling in the
field that records the transaction that last changed this
inode.

If we then go to fsync that inode, it will be skipped because the field
isn't filled in.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5813dec..87f1e0c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4520,6 +4520,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	inode_tree_add(inode);
 
 	trace_btrfs_inode_new(inode);
+	btrfs_set_inode_last_trans(trans, inode);
 
 	return inode;
 fail:
-- 
cgit v1.1


From e0f5406727f1dfdc47b8ba4a0ff6eae4b0b5ed4c Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sat, 18 Jun 2011 20:26:38 +0000
Subject: Btrfs: fix type mismatch in find_free_extent()

data parameter should be u64 because a full-sized chunk flags field is
passed instead of 0/1 for distinguishing data from metadata.  All
underlying functions expect u64.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1f61bf5..71cd456 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4842,7 +4842,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 				     u64 num_bytes, u64 empty_size,
 				     u64 search_start, u64 search_end,
 				     u64 hint_byte, struct btrfs_key *ins,
-				     int data)
+				     u64 data)
 {
 	int ret = 0;
 	struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4869,7 +4869,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 
 	space_info = __find_space_info(root->fs_info, data);
 	if (!space_info) {
-		printk(KERN_ERR "No space info for %d\n", data);
+		printk(KERN_ERR "No space info for %llu\n", data);
 		return -ENOSPC;
 	}
 
-- 
cgit v1.1


From 9b90f5135320bc74dc6c9a8c74d69fd4821d9282 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 24 Jun 2011 16:02:51 +0000
Subject: Btrfs: make sure to update total_bitmaps when freeing cache V3

A user reported this bug again where we have more bitmaps than we are supposed
to.  This is because we failed to load the free space cache, but don't update
the ctl->total_bitmaps counter when we remove entries from the tree.  This patch
fixes this problem and we should be good to go again.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/free-space-cache.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 9f985a4..bf0d615 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1893,9 +1893,12 @@ void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
 
 	while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
 		info = rb_entry(node, struct btrfs_free_space, offset_index);
-		unlink_free_space(ctl, info);
-		kfree(info->bitmap);
-		kmem_cache_free(btrfs_free_space_cachep, info);
+		if (!info->bitmap) {
+			unlink_free_space(ctl, info);
+			kmem_cache_free(btrfs_free_space_cachep, info);
+		} else {
+			free_bitmap(ctl, info);
+		}
 		if (need_resched()) {
 			spin_unlock(&ctl->tree_lock);
 			cond_resched();
-- 
cgit v1.1


From 2f7e33d432d097a2a7f467b031bf18be91cb3d49 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Thu, 23 Jun 2011 07:27:13 +0000
Subject: btrfs: fix inconsonant inode information

When iputting the inode, We may leave the delayed nodes if they have some
delayed items that have not been dealt with. So when the inode is read again,
we must look up the relative delayed node, and use the information in it to
initialize the inode. Or we will get inconsonant inode information, it may
cause that the same directory index number is allocated again, and hit the
following oops:

[ 5447.554187] err add delayed dir index item(name: pglog_0.965_0) into the
insertion tree of the delayed node(root id: 262, inode id: 258, errno: -17)
[ 5447.569766] ------------[ cut here ]------------
[ 5447.575361] kernel BUG at fs/btrfs/delayed-inode.c:1301!
[SNIP]
[ 5447.790721] Call Trace:
[ 5447.793191]  [<ffffffffa0641c4e>] btrfs_insert_dir_item+0x189/0x1bb [btrfs]
[ 5447.800156]  [<ffffffffa0651a45>] btrfs_add_link+0x12b/0x191 [btrfs]
[ 5447.806517]  [<ffffffffa0651adc>] btrfs_add_nondir+0x31/0x58 [btrfs]
[ 5447.812876]  [<ffffffffa0651d6a>] btrfs_create+0xf9/0x197 [btrfs]
[ 5447.818961]  [<ffffffff8111f840>] vfs_create+0x72/0x92
[ 5447.824090]  [<ffffffff8111fa8c>] do_last+0x22c/0x40b
[ 5447.829133]  [<ffffffff8112076a>] path_openat+0xc0/0x2ef
[ 5447.834438]  [<ffffffff810c58e2>] ? __perf_event_task_sched_out+0x24/0x44
[ 5447.841216]  [<ffffffff8103ecdd>] ? perf_event_task_sched_out+0x59/0x67
[ 5447.847846]  [<ffffffff81121a79>] do_filp_open+0x3d/0x87
[ 5447.853156]  [<ffffffff811e126c>] ? strncpy_from_user+0x43/0x4d
[ 5447.859072]  [<ffffffff8111f1f5>] ? getname_flags+0x2e/0x80
[ 5447.864636]  [<ffffffff8111f179>] ? do_getname+0x14b/0x173
[ 5447.870112]  [<ffffffff8111f1b7>] ? audit_getname+0x16/0x26
[ 5447.875682]  [<ffffffff8112b1ab>] ? spin_lock+0xe/0x10
[ 5447.880882]  [<ffffffff81112d39>] do_sys_open+0x69/0xae
[ 5447.886153]  [<ffffffff81112db1>] sys_open+0x20/0x22
[ 5447.891114]  [<ffffffff813b9aab>] system_call_fastpath+0x16/0x1b

Fix it by reusing the old delayed node.

Reported-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Tested-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/delayed-inode.c | 104 ++++++++++++++++++++++++++++++++++++-----------
 fs/btrfs/delayed-inode.h |   1 +
 fs/btrfs/inode.c         |  12 +++++-
 3 files changed, 91 insertions(+), 26 deletions(-)

(limited to 'fs/btrfs')

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index f1cbd02..98c68e6 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
 	return root->fs_info->delayed_root;
 }
 
-static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
-							struct inode *inode)
+static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
 {
-	struct btrfs_delayed_node *node;
 	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
 	struct btrfs_root *root = btrfs_inode->root;
 	u64 ino = btrfs_ino(inode);
-	int ret;
+	struct btrfs_delayed_node *node;
 
-again:
 	node = ACCESS_ONCE(btrfs_inode->delayed_node);
 	if (node) {
-		atomic_inc(&node->refs);	/* can be accessed */
+		atomic_inc(&node->refs);
 		return node;
 	}
 
@@ -102,8 +99,10 @@ again:
 	node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 	if (node) {
 		if (btrfs_inode->delayed_node) {
+			atomic_inc(&node->refs);	/* can be accessed */
+			BUG_ON(btrfs_inode->delayed_node != node);
 			spin_unlock(&root->inode_lock);
-			goto again;
+			return node;
 		}
 		btrfs_inode->delayed_node = node;
 		atomic_inc(&node->refs);	/* can be accessed */
@@ -113,6 +112,23 @@ again:
 	}
 	spin_unlock(&root->inode_lock);
 
+	return NULL;
+}
+
+static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
+							struct inode *inode)
+{
+	struct btrfs_delayed_node *node;
+	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
+	struct btrfs_root *root = btrfs_inode->root;
+	u64 ino = btrfs_ino(inode);
+	int ret;
+
+again:
+	node = btrfs_get_delayed_node(inode);
+	if (node)
+		return node;
+
 	node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
 	if (!node)
 		return ERR_PTR(-ENOMEM);
@@ -548,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item(
 	return next;
 }
 
-static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
-							struct inode *inode)
-{
-	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
-	struct btrfs_delayed_node *delayed_node;
-
-	delayed_node = btrfs_inode->delayed_node;
-	if (delayed_node)
-		atomic_inc(&delayed_node->refs);
-
-	return delayed_node;
-}
-
 static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
 						   u64 root_id)
 {
@@ -1404,8 +1407,7 @@ end:
 
 int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 {
-	struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
-	int ret = 0;
+	struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
 
 	if (!delayed_node)
 		return -ENOENT;
@@ -1415,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 	 * a new directory index is added into the delayed node and index_cnt
 	 * is updated now. So we needn't lock the delayed node.
 	 */
-	if (!delayed_node->index_cnt)
+	if (!delayed_node->index_cnt) {
+		btrfs_release_delayed_node(delayed_node);
 		return -EINVAL;
+	}
 
 	BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
-	return ret;
+	btrfs_release_delayed_node(delayed_node);
+	return 0;
 }
 
 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
@@ -1613,6 +1618,57 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
 				      inode->i_ctime.tv_nsec);
 }
 
+int btrfs_fill_inode(struct inode *inode, u32 *rdev)
+{
+	struct btrfs_delayed_node *delayed_node;
+	struct btrfs_inode_item *inode_item;
+	struct btrfs_timespec *tspec;
+
+	delayed_node = btrfs_get_delayed_node(inode);
+	if (!delayed_node)
+		return -ENOENT;
+
+	mutex_lock(&delayed_node->mutex);
+	if (!delayed_node->inode_dirty) {
+		mutex_unlock(&delayed_node->mutex);
+		btrfs_release_delayed_node(delayed_node);
+		return -ENOENT;
+	}
+
+	inode_item = &delayed_node->inode_item;
+
+	inode->i_uid = btrfs_stack_inode_uid(inode_item);
+	inode->i_gid = btrfs_stack_inode_gid(inode_item);
+	btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+	inode->i_mode = btrfs_stack_inode_mode(inode_item);
+	inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
+	inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
+	BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
+	BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
+	inode->i_rdev = 0;
+	*rdev = btrfs_stack_inode_rdev(inode_item);
+	BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
+
+	tspec = btrfs_inode_atime(inode_item);
+	inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
+	inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+	tspec = btrfs_inode_mtime(inode_item);
+	inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
+	inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+	tspec = btrfs_inode_ctime(inode_item);
+	inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
+	inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
+
+	inode->i_generation = BTRFS_I(inode)->generation;
+	BTRFS_I(inode)->index_cnt = (u64)-1;
+
+	mutex_unlock(&delayed_node->mutex);
+	btrfs_release_delayed_node(delayed_node);
+	return 0;
+}
+
 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, struct inode *inode)
 {
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index d1a6a29..8d27af4 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -119,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode);
 
 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, struct inode *inode);
+int btrfs_fill_inode(struct inode *inode, u32 *rdev);
 
 /* Used for drop dead root */
 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 87f1e0c..447612d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	int maybe_acls;
 	u32 rdev;
 	int ret;
+	bool filled = false;
+
+	ret = btrfs_fill_inode(inode, &rdev);
+	if (!ret)
+		filled = true;
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
@@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode *inode)
 		goto make_bad;
 
 	leaf = path->nodes[0];
+
+	if (filled)
+		goto cache_acl;
+
 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
 				    struct btrfs_inode_item);
 	if (!leaf->map_token)
@@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
 	BTRFS_I(inode)->index_cnt = (u64)-1;
 	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
+cache_acl:
 	/*
 	 * try to precache a NULL acl entry for files that don't have
 	 * any xattrs or acls
@@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	}
 
 	btrfs_free_path(path);
-	inode_item = NULL;
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
-- 
cgit v1.1