From c9149235a42ab93914434fff45c44b45023363f3 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Wed, 30 Mar 2011 00:57:23 +0000
Subject: Btrfs: fix compiler warning in file.c

While compiling Btrfs, I got following messages:

  CC [M]  fs/btrfs/file.o
fs/btrfs/file.c: In function '__btrfs_buffered_write':
fs/btrfs/file.c:909: warning: 'ret' may be used uninitialized in this function
  CC [M]  fs/btrfs/tree-defrag.o

This patch fixes compiler warning.

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 656bc0a..e621ea5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -906,7 +906,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 	unsigned long last_index;
 	size_t num_written = 0;
 	int nrptrs;
-	int ret;
+	int ret = 0;
 
 	nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
 		     PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
-- 
cgit v1.1


From 200da64e0b039f873f0f20481e6a7d056e7cc6c9 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Thu, 31 Mar 2011 00:44:29 +0000
Subject: Btrfs: fix /proc/mounts info.

Some mount options are not displayed by /proc/mounts.
This patch displays the option such as compress_type by /proc/mounts.

Ex.
  [before]
    $ mount | grep sdc2
    /dev/sdc2 on /test12 type btrfs (rw,space_cache,compress=lzo)
    $ cat /proc/mounts | grep sdc2
    /dev/sdc2 /test12 btrfs rw,relatime,compress 0 0

  [after]
    $ mount | grep sdc2
    /dev/sdc2 on /test12 type btrfs (rw,space_cache,compress=lzo)
    $ cat /proc/mounts | grep sdc2
    /dev/sdc2 /test12 btrfs rw,relatime,compress=lzo,space_cache 0 0

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/super.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2edfc03..58e7de9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -644,6 +644,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
 	struct btrfs_fs_info *info = root->fs_info;
+	char *compress_type;
 
 	if (btrfs_test_opt(root, DEGRADED))
 		seq_puts(seq, ",degraded");
@@ -662,8 +663,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	if (info->thread_pool_size !=  min_t(unsigned long,
 					     num_online_cpus() + 2, 8))
 		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
-	if (btrfs_test_opt(root, COMPRESS))
-		seq_puts(seq, ",compress");
+	if (btrfs_test_opt(root, COMPRESS)) {
+		if (info->compress_type == BTRFS_COMPRESS_ZLIB)
+			compress_type = "zlib";
+		else
+			compress_type = "lzo";
+		if (btrfs_test_opt(root, FORCE_COMPRESS))
+			seq_printf(seq, ",compress-force=%s", compress_type);
+		else
+			seq_printf(seq, ",compress=%s", compress_type);
+	}
 	if (btrfs_test_opt(root, NOSSD))
 		seq_puts(seq, ",nossd");
 	if (btrfs_test_opt(root, SSD_SPREAD))
@@ -678,6 +687,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_puts(seq, ",discard");
 	if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
 		seq_puts(seq, ",noacl");
+	if (btrfs_test_opt(root, SPACE_CACHE))
+		seq_puts(seq, ",space_cache");
+	if (btrfs_test_opt(root, CLEAR_CACHE))
+		seq_puts(seq, ",clear_cache");
+	if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+		seq_puts(seq, ",user_subvol_rm_allowed");
 	return 0;
 }
 
-- 
cgit v1.1


From fe3f566cd19bb6d787c92b2e202c85f929abf3ac Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 28 Mar 2011 08:30:38 +0000
Subject: Btrfs: Fix oops for defrag with compression turned on

When we defrag a file, whose size can be fit into an inline extent,
with compression enabled, the compress type is set to be
fs_info->compress_type, which is 0 if the btrfs filesystem is mounted
without compress option. This leads to oops.

Reported-by: Daniel Blueman <daniel.blueman@gmail.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0627418..62ae9d5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -111,6 +111,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
 static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode,
 				u64 start, size_t size, size_t compressed_size,
+				int compress_type,
 				struct page **compressed_pages)
 {
 	struct btrfs_key key;
@@ -125,12 +126,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 	size_t cur_size = size;
 	size_t datasize;
 	unsigned long offset;
-	int compress_type = BTRFS_COMPRESS_NONE;
 
-	if (compressed_size && compressed_pages) {
-		compress_type = root->fs_info->compress_type;
+	if (compressed_size && compressed_pages)
 		cur_size = compressed_size;
-	}
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -220,7 +218,7 @@ fail:
 static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct inode *inode, u64 start, u64 end,
-				 size_t compressed_size,
+				 size_t compressed_size, int compress_type,
 				 struct page **compressed_pages)
 {
 	u64 isize = i_size_read(inode);
@@ -253,7 +251,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
 		inline_len = min_t(u64, isize, actual_end);
 	ret = insert_inline_extent(trans, root, inode, start,
 				   inline_len, compressed_size,
-				   compressed_pages);
+				   compress_type, compressed_pages);
 	BUG_ON(ret);
 	btrfs_delalloc_release_metadata(inode, end + 1 - start);
 	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
@@ -432,12 +430,13 @@ again:
 			 * to make an uncompressed inline extent.
 			 */
 			ret = cow_file_range_inline(trans, root, inode,
-						    start, end, 0, NULL);
+						    start, end, 0, 0, NULL);
 		} else {
 			/* try making a compressed inline extent */
 			ret = cow_file_range_inline(trans, root, inode,
 						    start, end,
-						    total_compressed, pages);
+						    total_compressed,
+						    compress_type, pages);
 		}
 		if (ret == 0) {
 			/*
@@ -791,7 +790,7 @@ static noinline int cow_file_range(struct inode *inode,
 	if (start == 0) {
 		/* lets try to make an inline extent */
 		ret = cow_file_range_inline(trans, root, inode,
-					    start, end, 0, NULL);
+					    start, end, 0, 0, NULL);
 		if (ret == 0) {
 			extent_clear_unlock_delalloc(inode,
 				     &BTRFS_I(inode)->io_tree,
-- 
cgit v1.1


From b44c59a80ded004e1a82712e5f9e17b131c03221 Mon Sep 17 00:00:00 2001
From: Johann Lombardi <johann@whamcloud.com>
Date: Thu, 31 Mar 2011 13:23:47 +0000
Subject: Btrfs: fix subvol_sem leak in btrfs_rename()

btrfs_rename() does not release the subvol_sem if the transaction failed to start.

Signed-off-by: Johann Lombardi <johann@whamcloud.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 62ae9d5..1ca3e68 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6961,8 +6961,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * should cover the worst case number of items we'll modify.
 	 */
 	trans = btrfs_start_transaction(root, 20);
-	if (IS_ERR(trans))
-		return PTR_ERR(trans);
+	if (IS_ERR(trans)) {
+                ret = PTR_ERR(trans);
+                goto out_notrans;
+        }
 
 	btrfs_set_trans_block_group(trans, new_dir);
 
@@ -7062,7 +7064,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 out_fail:
 	btrfs_end_transaction_throttle(trans, root);
-
+out_notrans:
 	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
 		up_read(&root->fs_info->subvol_sem);
 
-- 
cgit v1.1


From 8b2b2d3cbefb605501342adaf64d601b545ed154 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Mon, 4 Apr 2011 01:52:13 +0000
Subject: Btrfs: fix memory leak in btrfs_ioctl_start_sync()

Call btrfs_end_transaction() if btrfs_commit_transaction_async() fails.

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6b70e0e..255c7c5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2436,8 +2436,10 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
 		return PTR_ERR(trans);
 	transid = trans->transid;
 	ret = btrfs_commit_transaction_async(trans, root, 0);
-	if (ret)
+	if (ret) {
+		btrfs_end_transaction(trans, root);
 		return ret;
+	}
 
 	if (argp)
 		if (copy_to_user(argp, &transid, sizeof(transid)))
-- 
cgit v1.1


From 6e8df2ae89ab37730c0062782f844c66ecfc97a7 Mon Sep 17 00:00:00 2001
From: Yoshinori Sano <yoshinori.sano@gmail.com>
Date: Sun, 3 Apr 2011 12:31:28 +0000
Subject: Btrfs: fix memory leak in start_transaction()

Free btrfs_trans_handle when join_transaction() fails
in start_transaction()

Signed-off-by: Yoshinori Sano <yoshinori.sano@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/transaction.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ce48eb5..d01cc24 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -197,6 +197,7 @@ again:
 
 	ret = join_transaction(root);
 	if (ret < 0) {
+		kmem_cache_free(btrfs_trans_handle_cachep, h);
 		if (type != TRANS_JOIN_NOLOCK)
 			mutex_unlock(&root->fs_info->trans_mutex);
 		return ERR_PTR(ret);
-- 
cgit v1.1


From adae52b94e18afa1f84fab67df2a8a872c2f5533 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Thu, 31 Mar 2011 09:43:23 +0000
Subject: btrfs: clear __GFP_FS flag in the space cache inode

the object id of the space cache inode's key is allocated from the relative
root, just like the regular file. So we can't identify space cache inode by
checking the object id of the inode's key, and we have to clear __GFP_FS flag
at the time we look up the space cache inode.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/free-space-cache.c | 2 ++
 fs/btrfs/inode.c            | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0037427..13575de 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -81,6 +81,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
 		return ERR_PTR(-ENOENT);
 	}
 
+	inode->i_mapping->flags &= ~__GFP_FS;
+
 	spin_lock(&block_group->lock);
 	if (!root->fs_info->closing) {
 		block_group->inode = igrab(inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1ca3e68..57a03f6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2535,8 +2535,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
 
 	alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
-	if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
-		inode->i_mapping->flags &= ~__GFP_FS;
 
 	/*
 	 * try to precache a NULL acl entry for files that don't have
-- 
cgit v1.1


From 08fe4db170b4193603d9d31f40ebaf652d07ac9c Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 28 Mar 2011 02:01:25 +0000
Subject: Btrfs: Fix uninitialized root flags for subvolumes

root_item->flags and root_item->byte_limit are not initialized when
a subvolume is created. This bug is not revealed until we added
readonly snapshot support - now you mount a btrfs filesystem and you
may find the subvolumes in it are readonly.

To work around this problem, we steal a bit from root_item->inode_item->flags,
and use it to indicate if those fields have been properly initialized.
When we read a tree root from disk, we check if the bit is set, and if
not we'll set the flag and initialize the two fields of the root item.

Reported-by: Andreas Philipp <philipp.andreas@gmail.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Andreas Philipp <philipp.andreas@gmail.com>
cc: stable@kernel.org
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h       |  4 ++++
 fs/btrfs/disk-io.c     |  4 +++-
 fs/btrfs/ioctl.c       |  4 ++++
 fs/btrfs/root-tree.c   | 18 ++++++++++++++++++
 fs/btrfs/transaction.c |  1 +
 5 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d47ce83..3458b57 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1284,6 +1284,8 @@ struct btrfs_root {
 #define BTRFS_INODE_DIRSYNC		(1 << 10)
 #define BTRFS_INODE_COMPRESS		(1 << 11)
 
+#define BTRFS_INODE_ROOT_ITEM_INIT	(1 << 31)
+
 /* some macros to generate set/get funcs for the struct fields.  This
  * assumes there is a lefoo_to_cpu for every type, so lets make a simple
  * one for u8:
@@ -2359,6 +2361,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
 int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
 int btrfs_set_root_node(struct btrfs_root_item *item,
 			struct extent_buffer *node);
+void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
+
 /* dir-item.c */
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, const char *name,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5cf3aa7..a272bfd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1276,8 +1276,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
 	root->commit_root = btrfs_root_node(root);
 	BUG_ON(!root->node);
 out:
-	if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
+	if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
 		root->ref_cows = 1;
+		btrfs_check_and_init_root_item(&root->root_item);
+	}
 
 	return root;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 255c7c5..f9c93a9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -373,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root,
 	inode_item->nbytes = cpu_to_le64(root->leafsize);
 	inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
 
+	root_item.flags = 0;
+	root_item.byte_limit = 0;
+	inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
+
 	btrfs_set_root_bytenr(&root_item, leaf->start);
 	btrfs_set_root_generation(&root_item, trans->transid);
 	btrfs_set_root_level(&root_item, 0);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 29b2d7c..6928bff 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -473,3 +473,21 @@ again:
 	btrfs_free_path(path);
 	return 0;
 }
+
+/*
+ * Old btrfs forgets to init root_item->flags and root_item->byte_limit
+ * for subvolumes. To work around this problem, we steal a bit from
+ * root_item->inode_item->flags, and use it to indicate if those fields
+ * have been properly initialized.
+ */
+void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
+{
+	u64 inode_flags = le64_to_cpu(root_item->inode.flags);
+
+	if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
+		inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
+		root_item->inode.flags = cpu_to_le64(inode_flags);
+		root_item->flags = 0;
+		root_item->byte_limit = 0;
+	}
+}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d01cc24..5b158da 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -976,6 +976,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	record_root_in_trans(trans, root);
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
+	btrfs_check_and_init_root_item(new_root_item);
 
 	root_flags = btrfs_root_flags(new_root_item);
 	if (pending->readonly)
-- 
cgit v1.1


From 43be21462d8c263e2449b52b23326232fd710bee Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 1 Apr 2011 14:55:00 +0000
Subject: Btrfs: fix free space cache when there are pinned extents and
 clusters V2

I noticed a huge problem with the free space cache that was presenting
as an early ENOSPC.  Turns out when writing the free space cache out I
forgot to take into account pinned extents and more importantly
clusters.  This would result in us leaking free space everytime we
unmounted the filesystem and remounted it.

I fix this by making sure to check and see if the current block group
has a cluster and writing out any entries that are in the cluster to the
cache, as well as writing any pinned extents we currently have to the
cache since those will be available for us to use the next time the fs
mounts.

This patch also adds a check to the end of load_free_space_cache to make
sure we got the right amount of free space cache, and if not make sure
to clear the cache and re-cache the old fashioned way.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/free-space-cache.c | 82 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 13575de..f561c95 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -24,6 +24,7 @@
 #include "free-space-cache.h"
 #include "transaction.h"
 #include "disk-io.h"
+#include "extent_io.h"
 
 #define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
@@ -224,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 	u64 num_entries;
 	u64 num_bitmaps;
 	u64 generation;
+	u64 used = btrfs_block_group_used(&block_group->item);
 	u32 cur_crc = ~(u32)0;
 	pgoff_t index = 0;
 	unsigned long first_page_offset;
@@ -469,6 +471,17 @@ next:
 		index++;
 	}
 
+	spin_lock(&block_group->tree_lock);
+	if (block_group->free_space != (block_group->key.offset - used -
+					block_group->bytes_super)) {
+		spin_unlock(&block_group->tree_lock);
+		printk(KERN_ERR "block group %llu has an wrong amount of free "
+		       "space\n", block_group->key.objectid);
+		ret = 0;
+		goto free_cache;
+	}
+	spin_unlock(&block_group->tree_lock);
+
 	ret = 1;
 out:
 	kfree(checksums);
@@ -497,8 +510,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	struct list_head *pos, *n;
 	struct page *page;
 	struct extent_state *cached_state = NULL;
+	struct btrfs_free_cluster *cluster = NULL;
+	struct extent_io_tree *unpin = NULL;
 	struct list_head bitmap_list;
 	struct btrfs_key key;
+	u64 start, end, len;
 	u64 bytes = 0;
 	u32 *crc, *checksums;
 	pgoff_t index = 0, last_index = 0;
@@ -507,6 +523,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	int entries = 0;
 	int bitmaps = 0;
 	int ret = 0;
+	bool next_page = false;
 
 	root = root->fs_info->tree_root;
 
@@ -553,6 +570,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	 */
 	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
 
+	/* Get the cluster for this block_group if it exists */
+	if (!list_empty(&block_group->cluster_list))
+		cluster = list_entry(block_group->cluster_list.next,
+				     struct btrfs_free_cluster,
+				     block_group_list);
+
+	/*
+	 * We shouldn't have switched the pinned extents yet so this is the
+	 * right one
+	 */
+	unpin = root->fs_info->pinned_extents;
+
 	/*
 	 * Lock all pages first so we can lock the extent safely.
 	 *
@@ -582,6 +611,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
 			 0, &cached_state, GFP_NOFS);
 
+	/*
+	 * When searching for pinned extents, we need to start at our start
+	 * offset.
+	 */
+	start = block_group->key.objectid;
+
 	/* Write out the extent entries */
 	do {
 		struct btrfs_free_space_entry *entry;
@@ -589,6 +624,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		unsigned long offset = 0;
 		unsigned long start_offset = 0;
 
+		next_page = false;
+
 		if (index == 0) {
 			start_offset = first_page_offset;
 			offset = start_offset;
@@ -600,7 +637,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		entry = addr + start_offset;
 
 		memset(addr, 0, PAGE_CACHE_SIZE);
-		while (1) {
+		while (node && !next_page) {
 			struct btrfs_free_space *e;
 
 			e = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -616,12 +653,49 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 				entry->type = BTRFS_FREE_SPACE_EXTENT;
 			}
 			node = rb_next(node);
-			if (!node)
-				break;
+			if (!node && cluster) {
+				node = rb_first(&cluster->root);
+				cluster = NULL;
+			}
 			offset += sizeof(struct btrfs_free_space_entry);
 			if (offset + sizeof(struct btrfs_free_space_entry) >=
 			    PAGE_CACHE_SIZE)
+				next_page = true;
+			entry++;
+		}
+
+		/*
+		 * We want to add any pinned extents to our free space cache
+		 * so we don't leak the space
+		 */
+		while (!next_page && (start < block_group->key.objectid +
+				      block_group->key.offset)) {
+			ret = find_first_extent_bit(unpin, start, &start, &end,
+						    EXTENT_DIRTY);
+			if (ret) {
+				ret = 0;
+				break;
+			}
+
+			/* This pinned extent is out of our range */
+			if (start >= block_group->key.objectid +
+			    block_group->key.offset)
 				break;
+
+			len = block_group->key.objectid +
+				block_group->key.offset - start;
+			len = min(len, end + 1 - start);
+
+			entries++;
+			entry->offset = cpu_to_le64(start);
+			entry->bytes = cpu_to_le64(len);
+			entry->type = BTRFS_FREE_SPACE_EXTENT;
+
+			start = end + 1;
+			offset += sizeof(struct btrfs_free_space_entry);
+			if (offset + sizeof(struct btrfs_free_space_entry) >=
+			    PAGE_CACHE_SIZE)
+				next_page = true;
 			entry++;
 		}
 		*crc = ~(u32)0;
@@ -652,7 +726,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		page_cache_release(page);
 
 		index++;
-	} while (node);
+	} while (node || next_page);
 
 	/* Write out the bitmaps */
 	list_for_each_safe(pos, n, &bitmap_list) {
-- 
cgit v1.1


From c9ddec74aa950a220cc4caa5215cfc5d886050b7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 28 Mar 2011 13:43:25 +0000
Subject: Btrfs: don't warn in btrfs_add_orphan

When I moved the orphan adding to btrfs_truncate I missed the fact that during
orphan cleanup we just add the orphan items to the orphan list without going
through btrfs_orphan_add, which results in lots of warnings on mount if you have
any orphan items that need to be truncated.  Just remove this warning since it's
ok, this will allow all of the normal space accounting take place.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 57a03f6..cc60228 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2220,8 +2220,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 			insert = 1;
 #endif
 		insert = 1;
-	} else {
-		WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
 	}
 
 	if (!BTRFS_I(inode)->orphan_meta_reserved) {
-- 
cgit v1.1


From ee3dea3549444e6e76d27af48b4929983e6f023c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 30 Mar 2011 12:17:43 +0200
Subject: ufs: remove unessecary blk_flush_plug

We already flush the per-process plugging list when context switching,
so a blk_flush_plug call just before a yield() is not needed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/ufs/truncate.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 1101430..5f821db 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -479,7 +479,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
 			break;
 		if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
 			ufs_sync_inode (inode);
-		blk_flush_plug(current);
 		yield();
 	}
 
-- 
cgit v1.1


From 7dcda1c96d7c643101d4a05579ef4512a4baa7ef Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Tue, 5 Apr 2011 23:51:48 +0200
Subject: fs: export empty_aops

With the ->sync_page() hook gone, we have a few users that
add their own static address_space_operations without any
functions defined.

fs/inode.c already has an empty_aops that it uses for init
purposes. Lets export that and use it in the places where
an otherwise empty aops was defined.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/inode.c       | 9 ++++++++-
 fs/nilfs2/page.c | 2 --
 fs/ubifs/xattr.c | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 5f4e11a..33c963d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -125,6 +125,14 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
 static DECLARE_RWSEM(iprune_sem);
 
 /*
+ * Empty aops. Can be used for the cases where the user does not
+ * define any of the address_space operations.
+ */
+const struct address_space_operations empty_aops = {
+};
+EXPORT_SYMBOL(empty_aops);
+
+/*
  * Statistics gathering..
  */
 struct inodes_stat_t inodes_stat;
@@ -176,7 +184,6 @@ int proc_nr_inodes(ctl_table *table, int write,
  */
 int inode_init_always(struct super_block *sb, struct inode *inode)
 {
-	static const struct address_space_operations empty_aops;
 	static const struct inode_operations empty_iops;
 	static const struct file_operations empty_fops;
 	struct address_space *const mapping = &inode->i_data;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 9d2dc6b..1168059 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -495,8 +495,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
 void nilfs_mapping_init(struct address_space *mapping,
 			struct backing_dev_info *bdi)
 {
-	static const struct address_space_operations empty_aops;
-
 	mapping->host = NULL;
 	mapping->flags = 0;
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c74400f..3299f46 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -56,6 +56,7 @@
  */
 
 #include "ubifs.h"
+#include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
@@ -80,7 +81,6 @@ enum {
 };
 
 static const struct inode_operations none_inode_operations;
-static const struct address_space_operations none_address_operations;
 static const struct file_operations none_file_operations;
 
 /**
@@ -130,7 +130,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
 	}
 
 	/* Re-define all operations to be "nothing" */
-	inode->i_mapping->a_ops = &none_address_operations;
+	inode->i_mapping->a_ops = &empty_aops;
 	inode->i_op = &none_inode_operations;
 	inode->i_fop = &none_file_operations;
 
-- 
cgit v1.1


From d0de4dc584ec6aa3b26fffea320a8457827768fc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 5 Apr 2011 17:20:50 -0400
Subject: inotify: fix double free/corruption of stuct user

On an error path in inotify_init1 a normal user can trigger a double
free of struct user.  This is a regression introduced by a2ae4cc9a16e
("inotify: stop kernel memory leak on file creation failure").

We fix this by making sure that if a group exists the user reference is
dropped when the group is cleaned up.  We should not explictly drop the
reference on error and also drop the reference when the group is cleaned
up.

The new lifetime rules are that an inotify group lives from
inotify_new_group to the last fsnotify_put_group.  Since the struct user
and inotify_devs are directly tied to this lifetime they are only
changed/updated in those two locations.  We get rid of all special
casing of struct user or user->inotify_devs.

Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: stable@kernel.org (2.6.37 and up)
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/inotify/inotify_fsnotify.c |  1 +
 fs/notify/inotify/inotify_user.c     | 39 ++++++++++++------------------------
 2 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a91b69a..0348d0c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -198,6 +198,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
 	idr_for_each(&group->inotify_data.idr, idr_callback, group);
 	idr_remove_all(&group->inotify_data.idr);
 	idr_destroy(&group->inotify_data.idr);
+	atomic_dec(&group->inotify_data.user->inotify_devs);
 	free_uid(group->inotify_data.user);
 }
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index bd46e7c..8445fbc 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -290,7 +290,6 @@ static int inotify_fasync(int fd, struct file *file, int on)
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
-	struct user_struct *user = group->inotify_data.user;
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
@@ -299,8 +298,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_put_group(group);
 
-	atomic_dec(&user->inotify_devs);
-
 	return 0;
 }
 
@@ -697,7 +694,7 @@ retry:
 	return ret;
 }
 
-static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
+static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 {
 	struct fsnotify_group *group;
 
@@ -710,8 +707,14 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
 	spin_lock_init(&group->inotify_data.idr_lock);
 	idr_init(&group->inotify_data.idr);
 	group->inotify_data.last_wd = 0;
-	group->inotify_data.user = user;
 	group->inotify_data.fa = NULL;
+	group->inotify_data.user = get_current_user();
+
+	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
+	    inotify_max_user_instances) {
+		fsnotify_put_group(group);
+		return ERR_PTR(-EMFILE);
+	}
 
 	return group;
 }
@@ -721,7 +724,6 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
 SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
 	struct fsnotify_group *group;
-	struct user_struct *user;
 	int ret;
 
 	/* Check the IN_* constants for consistency.  */
@@ -731,31 +733,16 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
-	user = get_current_user();
-	if (unlikely(atomic_read(&user->inotify_devs) >=
-			inotify_max_user_instances)) {
-		ret = -EMFILE;
-		goto out_free_uid;
-	}
-
 	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
-	group = inotify_new_group(user, inotify_max_queued_events);
-	if (IS_ERR(group)) {
-		ret = PTR_ERR(group);
-		goto out_free_uid;
-	}
-
-	atomic_inc(&user->inotify_devs);
+	group = inotify_new_group(inotify_max_queued_events);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
 
 	ret = anon_inode_getfd("inotify", &inotify_fops, group,
 				  O_RDONLY | flags);
-	if (ret >= 0)
-		return ret;
+	if (ret < 0)
+		fsnotify_put_group(group);
 
-	fsnotify_put_group(group);
-	atomic_dec(&user->inotify_devs);
-out_free_uid:
-	free_uid(user);
 	return ret;
 }
 
-- 
cgit v1.1