From d7eecb483cc29e929bbc5515b8def830d7fc6ad2 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 28 Jan 2010 16:13:01 +0800
Subject: jfs_dmap.[ch]: trivial typo fix: s/heigth/height/g
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: Jiri Kosina <trivial@kernel.org>
Cc: André Goddard Rosa <andre.goddard@gmail.com>
Cc: jfs-discussion@lists.sourceforge.net
---
 fs/jfs/jfs_dmap.c | 16 ++++++++--------
 fs/jfs/jfs_dmap.h |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index d9b031c..7e19d2f 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -195,7 +195,7 @@ int dbMount(struct inode *ipbmap)
 	bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
 	bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
 	bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
-	bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth);
+	bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
 	bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
 	bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
 	bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
@@ -287,7 +287,7 @@ int dbSync(struct inode *ipbmap)
 	dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
 	dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
 	dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
-	dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth);
+	dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
 	dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
 	dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
 	dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
@@ -1440,7 +1440,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 	 * tree index of this allocation group within the control page.
 	 */
 	agperlev =
-	    (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
+	    (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
 	ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
 
 	/* dmap control page trees fan-out by 4 and a single allocation
@@ -1459,7 +1459,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 		 * the subtree to find the leftmost leaf that describes this
 		 * free space.
 		 */
-		for (k = bmp->db_agheigth; k > 0; k--) {
+		for (k = bmp->db_agheight; k > 0; k--) {
 			for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
 				if (l2nb <= dcp->stree[m + n]) {
 					ti = m + n;
@@ -3606,7 +3606,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
 	}
 
 	/*
-	 * compute db_aglevel, db_agheigth, db_width, db_agstart:
+	 * compute db_aglevel, db_agheight, db_width, db_agstart:
 	 * an ag is covered in aglevel dmapctl summary tree,
 	 * at agheight level height (from leaf) with agwidth number of nodes
 	 * each, which starts at agstart index node of the smmary tree node
@@ -3615,9 +3615,9 @@ void dbFinalizeBmap(struct inode *ipbmap)
 	bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
 	l2nl =
 	    bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
-	bmp->db_agheigth = l2nl >> 1;
-	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1));
-	for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0;
+	bmp->db_agheight = l2nl >> 1;
+	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
+	for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
 	     i--) {
 		bmp->db_agstart += n;
 		n <<= 2;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 1a6eb41..6dcb906 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -210,7 +210,7 @@ struct dbmap_disk {
 	__le32 dn_maxag;	/* 4: max active alloc group number	*/
 	__le32 dn_agpref;	/* 4: preferred alloc group (hint)	*/
 	__le32 dn_aglevel;	/* 4: dmapctl level holding the AG	*/
-	__le32 dn_agheigth;	/* 4: height in dmapctl of the AG	*/
+	__le32 dn_agheight;	/* 4: height in dmapctl of the AG	*/
 	__le32 dn_agwidth;	/* 4: width in dmapctl of the AG	*/
 	__le32 dn_agstart;	/* 4: start tree index at AG height	*/
 	__le32 dn_agl2size;	/* 4: l2 num of blks per alloc group	*/
@@ -229,7 +229,7 @@ struct dbmap {
 	int dn_maxag;		/* max active alloc group number	*/
 	int dn_agpref;		/* preferred alloc group (hint)		*/
 	int dn_aglevel;		/* dmapctl level holding the AG		*/
-	int dn_agheigth;	/* height in dmapctl of the AG		*/
+	int dn_agheight;	/* height in dmapctl of the AG		*/
 	int dn_agwidth;		/* width in dmapctl of the AG		*/
 	int dn_agstart;		/* start tree index at AG height	*/
 	int dn_agl2size;	/* l2 num of blks per alloc group	*/
@@ -255,7 +255,7 @@ struct bmap {
 #define	db_agsize	db_bmap.dn_agsize
 #define	db_agl2size	db_bmap.dn_agl2size
 #define	db_agwidth	db_bmap.dn_agwidth
-#define	db_agheigth	db_bmap.dn_agheigth
+#define	db_agheight	db_bmap.dn_agheight
 #define	db_agstart	db_bmap.dn_agstart
 #define	db_numag	db_bmap.dn_numag
 #define	db_maxlevel	db_bmap.dn_maxlevel
-- 
cgit v1.1


From 8bf8c376ab2eefaf0386f4e003e720e1434fa43d Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Wed, 3 Mar 2010 06:28:06 +0300
Subject: blkdev: fix merge_bvec_fn return value checks v2

merge_bvec_fn() returns bvec->bv_len on success. So we have to check
against this value. But in case of fs_optimization merge we compare
with wrong value. This patch must be included in
 b428cd6da7e6559aca69aa2e3a526037d3f20403
But accidentally i've forgot to add this in the initial patch.
To make things straight let's replace all such checks.
In fact this makes code easy to understand.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index dc17afd..d89e019 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -555,7 +555,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 					.bi_rw = bio->bi_rw,
 				};
 
-				if (q->merge_bvec_fn(q, &bvm, prev) < len) {
+				if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
 					prev->bv_len -= len;
 					return 0;
 				}
@@ -608,7 +608,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 		 * merge_bvec_fn() returns number of bytes it can accept
 		 * at this offset
 		 */
-		if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
+		if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
 			bvec->bv_page = NULL;
 			bvec->bv_len = 0;
 			bvec->bv_offset = 0;
-- 
cgit v1.1


From f11c9c5c259cb2c3d698548dc3936f773ab1f5b9 Mon Sep 17 00:00:00 2001
From: Edward Shishkin <edward.shishkin@gmail.com>
Date: Thu, 11 Mar 2010 14:09:47 -0800
Subject: vfs: improve writeback_inodes_wb()

Do not pin/unpin superblock for every inode in writeback_inodes_wb(), pin
it for the whole group of inodes which belong to the same superblock and
call writeback_sb_inodes() handler for them.

Signed-off-by: Edward Shishkin <edward.shishkin@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c | 133 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 73 insertions(+), 60 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 76fc4d5..6841eff 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -553,108 +553,85 @@ select_queue:
 	return ret;
 }
 
-static void unpin_sb_for_writeback(struct super_block **psb)
+static void unpin_sb_for_writeback(struct super_block *sb)
 {
-	struct super_block *sb = *psb;
-
-	if (sb) {
-		up_read(&sb->s_umount);
-		put_super(sb);
-		*psb = NULL;
-	}
+	up_read(&sb->s_umount);
+	put_super(sb);
 }
 
+enum sb_pin_state {
+	SB_PINNED,
+	SB_NOT_PINNED,
+	SB_PIN_FAILED
+};
+
 /*
  * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
  * before calling writeback. So make sure that we do pin it, so it doesn't
  * go away while we are writing inodes from it.
- *
- * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
- * 1 if we failed.
  */
-static int pin_sb_for_writeback(struct writeback_control *wbc,
-				struct inode *inode, struct super_block **psb)
+static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
+					      struct super_block *sb)
 {
-	struct super_block *sb = inode->i_sb;
-
-	/*
-	 * If this sb is already pinned, nothing more to do. If not and
-	 * *psb is non-NULL, unpin the old one first
-	 */
-	if (sb == *psb)
-		return 0;
-	else if (*psb)
-		unpin_sb_for_writeback(psb);
-
 	/*
 	 * Caller must already hold the ref for this
 	 */
 	if (wbc->sync_mode == WB_SYNC_ALL) {
 		WARN_ON(!rwsem_is_locked(&sb->s_umount));
-		return 0;
+		return SB_NOT_PINNED;
 	}
-
 	spin_lock(&sb_lock);
 	sb->s_count++;
 	if (down_read_trylock(&sb->s_umount)) {
 		if (sb->s_root) {
 			spin_unlock(&sb_lock);
-			goto pinned;
+			return SB_PINNED;
 		}
 		/*
 		 * umounted, drop rwsem again and fall through to failure
 		 */
 		up_read(&sb->s_umount);
 	}
-
 	sb->s_count--;
 	spin_unlock(&sb_lock);
-	return 1;
-pinned:
-	*psb = sb;
-	return 0;
+	return SB_PIN_FAILED;
 }
 
-static void writeback_inodes_wb(struct bdi_writeback *wb,
-				struct writeback_control *wbc)
+/*
+ * Write a portion of b_io inodes which belong to @sb.
+ * If @wbc->sb != NULL, then find and write all such
+ * inodes. Otherwise write only ones which go sequentially
+ * in reverse order.
+ * Return 1, if the caller writeback routine should be
+ * interrupted. Otherwise return 0.
+ */
+static int writeback_sb_inodes(struct super_block *sb,
+			       struct bdi_writeback *wb,
+			       struct writeback_control *wbc)
 {
-	struct super_block *sb = wbc->sb, *pin_sb = NULL;
-	const unsigned long start = jiffies;	/* livelock avoidance */
-
-	spin_lock(&inode_lock);
-
-	if (!wbc->for_kupdate || list_empty(&wb->b_io))
-		queue_io(wb, wbc->older_than_this);
-
 	while (!list_empty(&wb->b_io)) {
-		struct inode *inode = list_entry(wb->b_io.prev,
-						struct inode, i_list);
 		long pages_skipped;
-
-		/*
-		 * super block given and doesn't match, skip this inode
-		 */
-		if (sb && sb != inode->i_sb) {
+		struct inode *inode = list_entry(wb->b_io.prev,
+						 struct inode, i_list);
+		if (wbc->sb && sb != inode->i_sb) {
+			/* super block given and doesn't
+			   match, skip this inode */
 			redirty_tail(inode);
 			continue;
 		}
-
+		if (sb != inode->i_sb)
+			/* finish with this superblock */
+			return 0;
 		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
 			requeue_io(inode);
 			continue;
 		}
-
 		/*
 		 * Was this inode dirtied after sync_sb_inodes was called?
 		 * This keeps sync from extra jobs and livelock.
 		 */
-		if (inode_dirtied_after(inode, start))
-			break;
-
-		if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
-			requeue_io(inode);
-			continue;
-		}
+		if (inode_dirtied_after(inode, wbc->wb_start))
+			return 1;
 
 		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
 		__iget(inode);
@@ -673,14 +650,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
 		spin_lock(&inode_lock);
 		if (wbc->nr_to_write <= 0) {
 			wbc->more_io = 1;
-			break;
+			return 1;
 		}
 		if (!list_empty(&wb->b_more_io))
 			wbc->more_io = 1;
 	}
+	/* b_io is empty */
+	return 1;
+}
+
+static void writeback_inodes_wb(struct bdi_writeback *wb,
+				struct writeback_control *wbc)
+{
+	int ret = 0;
 
-	unpin_sb_for_writeback(&pin_sb);
+	wbc->wb_start = jiffies; /* livelock avoidance */
+	spin_lock(&inode_lock);
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
+
+	while (!list_empty(&wb->b_io)) {
+		struct inode *inode = list_entry(wb->b_io.prev,
+						 struct inode, i_list);
+		struct super_block *sb = inode->i_sb;
+		enum sb_pin_state state;
+
+		if (wbc->sb && sb != wbc->sb) {
+			/* super block given and doesn't
+			   match, skip this inode */
+			redirty_tail(inode);
+			continue;
+		}
+		state = pin_sb_for_writeback(wbc, sb);
+
+		if (state == SB_PIN_FAILED) {
+			requeue_io(inode);
+			continue;
+		}
+		ret = writeback_sb_inodes(sb, wb, wbc);
 
+		if (state == SB_PINNED)
+			unpin_sb_for_writeback(sb);
+		if (ret)
+			break;
+	}
 	spin_unlock(&inode_lock);
 	/* Leave any unwritten inodes on b_io */
 }
-- 
cgit v1.1


From d330a5befb88875a9b3d2db62f9b74dadf660b13 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 14 Mar 2010 18:17:54 -0400
Subject: ext4: Fix estimate of # of blocks needed to write indirect-mapped
 files

http://bugzilla.kernel.org/show_bug.cgi?id=15420

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 986120f..11119e0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1035,7 +1035,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
 					      sector_t lblock)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
+	sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
 	int blk_bits;
 
 	if (lblock < EXT4_NDIR_BLOCKS)
@@ -1050,7 +1050,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
 	}
 	ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
 	ei->i_da_metadata_calc_len = 1;
-	blk_bits = roundup_pow_of_two(lblock + 1);
+	blk_bits = order_base_2(lblock);
 	return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
 }
 
-- 
cgit v1.1


From ee860b6a650360c91f5d5f9a94262aad9be90015 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Wed, 10 Mar 2010 10:28:40 -0500
Subject: [PATCH] Skip check for mandatory locks when unlocking

ocfs2_lock() will skip locks on file which has mode set to 02666. This
is a problem in cases where the mode of the file is changed after a
process has obtained a lock on the file.

ocfs2_lock() should skip the check for mandatory locks when unlocking a
file.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/locks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 544ac62..b5cb3ed 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
-	if (__mandatory_lock(inode))
+	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
 		return -ENOLCK;
 
 	return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
-- 
cgit v1.1


From 78c37eb0d5e6a9727b12ea0f1821795ffaa66cfe Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 3 Mar 2010 11:26:27 +0800
Subject: ocfs2: Change bg_chain check for ocfs2_validate_gd_parent.

In ocfs2_validate_gd_parent, we check bg_chain against the
cl_next_free_rec of the dinode. Actually in resize, we have
the chance of bg_chain == cl_next_free_rec. So add some
additional condition check for it.

I also rename paramter "clean_error" to "resize", since the
old one is not clearly enough to indicate that we should only
meet with this case in resize.

btw, the correpsonding bug is
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1230.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/suballoc.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c3c60bc..0016503 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -152,7 +152,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 
 #define do_error(fmt, ...)						\
 	do{								\
-		if (clean_error)					\
+		if (resize)					\
 			mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__);	\
 		else							\
 			ocfs2_error(sb, fmt, ##__VA_ARGS__);		\
@@ -160,7 +160,7 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 
 static int ocfs2_validate_gd_self(struct super_block *sb,
 				  struct buffer_head *bh,
-				  int clean_error)
+				  int resize)
 {
 	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
 
@@ -211,7 +211,7 @@ static int ocfs2_validate_gd_self(struct super_block *sb,
 static int ocfs2_validate_gd_parent(struct super_block *sb,
 				    struct ocfs2_dinode *di,
 				    struct buffer_head *bh,
-				    int clean_error)
+				    int resize)
 {
 	unsigned int max_bits;
 	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
@@ -233,8 +233,11 @@ static int ocfs2_validate_gd_parent(struct super_block *sb,
 		return -EINVAL;
 	}
 
-	if (le16_to_cpu(gd->bg_chain) >=
-	    le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+	/* In resize, we may meet the case bg_chain == cl_next_free_rec. */
+	if ((le16_to_cpu(gd->bg_chain) >
+	     le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) ||
+	    ((le16_to_cpu(gd->bg_chain) ==
+	     le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) {
 		do_error("Group descriptor #%llu has bad chain %u",
 			 (unsigned long long)bh->b_blocknr,
 			 le16_to_cpu(gd->bg_chain));
-- 
cgit v1.1


From 6527f8f848ec84b9daf1cb07601266126b8507ab Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 10 Mar 2010 09:56:52 +0800
Subject: ocfs2: Update i_blocks in reflink operations.

In reflink, we need to upate i_blocks for the target inode.

Reported-by: Jie Liu <jeff.liu@oracle.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/refcounttree.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9e96921..29405f2 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4075,6 +4075,7 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
 	OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
 	spin_unlock(&OCFS2_I(t_inode)->ip_lock);
 	i_size_write(t_inode, size);
+	t_inode->i_blocks = s_inode->i_blocks;
 
 	di->i_xattr_inline_size = s_di->i_xattr_inline_size;
 	di->i_clusters = s_di->i_clusters;
-- 
cgit v1.1


From fcefd25ac89239cb57fa198f125a79ff85468c75 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Mon, 15 Mar 2010 15:39:00 -0700
Subject: ocfs2: set i_mode on disk during acl operations

ocfs2_set_acl() and ocfs2_init_acl() were setting i_mode on the in-memory
inode, but never setting it on the disk copy. Thus, acls were some times not
getting propagated between nodes. This patch fixes the issue by adding a
helper function ocfs2_acl_set_mode() which does this the right way.
ocfs2_set_acl() and ocfs2_init_acl() are then updated to call
ocfs2_acl_set_mode().

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/acl.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 72 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 0501974..8ccf0f8 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -30,6 +30,8 @@
 #include "alloc.h"
 #include "dlmglue.h"
 #include "file.h"
+#include "inode.h"
+#include "journal.h"
 #include "ocfs2_fs.h"
 
 #include "xattr.h"
@@ -166,6 +168,60 @@ static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
 }
 
 /*
+ * Helper function to set i_mode in memory and disk. Some call paths
+ * will not have di_bh or a journal handle to pass, in which case it
+ * will create it's own.
+ */
+static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
+			      handle_t *handle, umode_t new_mode)
+{
+	int ret, commit_handle = 0;
+	struct ocfs2_dinode *di;
+
+	if (di_bh == NULL) {
+		ret = ocfs2_read_inode_block(inode, &di_bh);
+		if (ret) {
+			mlog_errno(ret);
+			goto out;
+		}
+	} else
+		get_bh(di_bh);
+
+	if (handle == NULL) {
+		handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
+					   OCFS2_INODE_UPDATE_CREDITS);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			mlog_errno(ret);
+			goto out_brelse;
+		}
+
+		commit_handle = 1;
+	}
+
+	di = (struct ocfs2_dinode *)di_bh->b_data;
+	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	inode->i_mode = new_mode;
+	di->i_mode = cpu_to_le16(inode->i_mode);
+
+	ocfs2_journal_dirty(handle, di_bh);
+
+out_commit:
+	if (commit_handle)
+		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+out_brelse:
+	brelse(di_bh);
+out:
+	return ret;
+}
+
+/*
  * Set the access or default ACL of an inode.
  */
 static int ocfs2_set_acl(handle_t *handle,
@@ -193,9 +249,14 @@ static int ocfs2_set_acl(handle_t *handle,
 			if (ret < 0)
 				return ret;
 			else {
-				inode->i_mode = mode;
 				if (ret == 0)
 					acl = NULL;
+
+				ret = ocfs2_acl_set_mode(inode, di_bh,
+							 handle, mode);
+				if (ret)
+					return ret;
+
 			}
 		}
 		break;
@@ -283,6 +344,7 @@ int ocfs2_init_acl(handle_t *handle,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct posix_acl *acl = NULL;
 	int ret = 0;
+	mode_t mode;
 
 	if (!S_ISLNK(inode->i_mode)) {
 		if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
@@ -291,12 +353,17 @@ int ocfs2_init_acl(handle_t *handle,
 			if (IS_ERR(acl))
 				return PTR_ERR(acl);
 		}
-		if (!acl)
-			inode->i_mode &= ~current_umask();
+		if (!acl) {
+			mode = inode->i_mode & ~current_umask();
+			ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+			if (ret) {
+				mlog_errno(ret);
+				goto cleanup;
+			}
+		}
 	}
 	if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
 		struct posix_acl *clone;
-		mode_t mode;
 
 		if (S_ISDIR(inode->i_mode)) {
 			ret = ocfs2_set_acl(handle, inode, di_bh,
@@ -313,7 +380,7 @@ int ocfs2_init_acl(handle_t *handle,
 		mode = inode->i_mode;
 		ret = posix_acl_create_masq(clone, &mode);
 		if (ret >= 0) {
-			inode->i_mode = mode;
+			ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
 			if (ret > 0) {
 				ret = ocfs2_set_acl(handle, inode,
 						    di_bh, ACL_TYPE_ACCESS,
-- 
cgit v1.1


From b22b63ebafb97b66d1054e69941ee049d790c6cf Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Thu, 11 Mar 2010 18:43:46 -0800
Subject: ocfs2: Always try for maximum bits with new local alloc windows

What we were doing before was to ask for the current window size as the
maximum allocation. This had the effect of limiting the amount of allocation
we could get for the local alloc during times when the window size was
shrunk due to fragmentation. In some cases, that could actually *increase*
fragmentation by artificially limiting the number of bits we can accept. So
while we still want to ask for a minimum number of bits equal to window
size, there is no reason why we should limit the number of bits the local
alloc should accept. Hence always allow the maximum number of local alloc
bits.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/localalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ca992d9..171c691 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -984,8 +984,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
 	}
 
 retry_enospc:
-	(*ac)->ac_bits_wanted = osb->local_alloc_bits;
-
+	(*ac)->ac_bits_wanted = osb->local_alloc_default_bits;
 	status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
 	if (status == -ENOSPC) {
 		if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
@@ -1061,6 +1060,7 @@ retry_enospc:
 		    OCFS2_LA_DISABLED)
 			goto bail;
 
+		ac->ac_bits_wanted = osb->local_alloc_default_bits;
 		status = ocfs2_claim_clusters(osb, handle, ac,
 					      osb->local_alloc_bits,
 					      &cluster_off,
-- 
cgit v1.1


From d812e575822a2b7ab1a7cadae2571505ec6ec2bd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Mar 2010 13:55:17 -0400
Subject: NFS: Prevent another deadlock in nfs_release_page()

We should not attempt to free the page if __GFP_FS is not set. Otherwise we
can deadlock as per

  http://bugzilla.kernel.org/show_bug.cgi?id=15578

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ae8d022..ae0d927 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -491,7 +491,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
 {
 	dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
 
-	if (gfp & __GFP_WAIT)
+	/* Only do I/O if gfp is a superset of GFP_KERNEL */
+	if ((gfp & GFP_KERNEL) == GFP_KERNEL)
 		nfs_wb_page(page->mapping->host, page);
 	/* If PagePrivate() is set, then the page is not freeable */
 	if (PagePrivate(page))
-- 
cgit v1.1


From dfe4d3d6a6f707fff1dbfd4b8fce65e64a91b809 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Fri, 19 Mar 2010 15:04:23 +0800
Subject: ocfs2: Fix the update of name_offset when removing xattrs

When replacing a xattr's value, in some case we wipe its name/value
first and then re-add it. The wipe is done by
ocfs2_xa_block_wipe_namevalue() when the xattr is in the inode or
block. We currently adjust name_offset for all the entries which have
(offset < name_offset). This does not adjust the entrie we're replacing.
Since we are replacing the entry, we don't adjust the total entry count.
When we calculate a new namevalue location, we trust the entries
now-wrong offset in ocfs2_xa_get_free_start().  The solution is to
also adjust the name_offset for the replaced entry, allowing
ocfs2_xa_get_free_start() to calculate the new namevalue location
correctly.

The following script can trigger a kernel panic easily.

echo 'y'|mkfs.ocfs2 --fs-features=local,xattr -b 4K $DEVICE
mount -t ocfs2 $DEVICE $MNT_DIR
FILE=$MNT_DIR/$RANDOM
for((i=0;i<76;i++))
do
string_76="a$string_76"
done
string_78="aa$string_76"
string_82="aaaa$string_78"

touch $FILE
setfattr -n 'user.test1234567890' -v $string_76 $FILE
setfattr -n 'user.test1234567890' -v $string_78 $FILE
setfattr -n 'user.test1234567890' -v $string_82 $FILE

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d1b0d38..82c2a0b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1622,7 +1622,7 @@ static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
 	/* Now tell xh->xh_entries about it */
 	for (i = 0; i < count; i++) {
 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
-		if (offset < namevalue_offset)
+		if (offset <= namevalue_offset)
 			le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
 				     namevalue_size);
 	}
-- 
cgit v1.1


From b23179681c90a55e2a2083e1dde9f727ecffb2b7 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Fri, 19 Mar 2010 15:04:24 +0800
Subject: ocfs2: Init meta_ac properly in ocfs2_create_empty_xattr_block.

You can't store a pointer that you haven't filled in yet and expect it
to work.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/xattr.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 82c2a0b..3e77730 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6528,13 +6528,11 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
 					  int indexed)
 {
 	int ret;
-	struct ocfs2_alloc_context *meta_ac;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_xattr_set_ctxt ctxt = {
-		.meta_ac = meta_ac,
-	};
+	struct ocfs2_xattr_set_ctxt ctxt;
 
-	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
+	memset(&ctxt, 0, sizeof(ctxt));
+	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
 	if (ret < 0) {
 		mlog_errno(ret);
 		return ret;
@@ -6556,7 +6554,7 @@ static int ocfs2_create_empty_xattr_block(struct inode *inode,
 
 	ocfs2_commit_trans(osb, ctxt.handle);
 out:
-	ocfs2_free_alloc_context(meta_ac);
+	ocfs2_free_alloc_context(ctxt.meta_ac);
 	return ret;
 }
 
-- 
cgit v1.1


From 978097c907b58a2d085bbf7632bee1a5a7e6f6ba Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 8 Mar 2010 15:27:53 -0800
Subject: ceph: implemented caps should always be superset of issued caps

Added assertion, and cleared one case where the implemented caps were
not following the issued caps.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index db122bb..57d9b44 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2334,6 +2334,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 			   revoked_rdcache)
 			reply = 2;     /* send revoke ack in check_caps */
 		cap->issued = newcaps;
+		cap->implemented |= newcaps;
 	} else if (cap->issued == newcaps) {
 		dout("caps unchanged: %s -> %s\n",
 		     ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
@@ -2346,6 +2347,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 					      * pending revocation */
 		wake = 1;
 	}
+	BUG_ON(cap->issued & ~cap->implemented);
 
 	spin_unlock(&inode->i_lock);
 	if (writeback)
-- 
cgit v1.1


From 052bb34af3bf8ae2001b9f03d884ba0def3e427c Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 9 Mar 2010 12:52:26 -0800
Subject: ceph: add missing locking to protect i_snap_realm_item during split

All ci->i_snap_realm_item/realm->inodes_with_caps manipulation should be
protected by realm->inodes_with_caps_lock.  This bug would have only bit
us in a rare race with a realm split (during some snap creations).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/snap.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index bf2a5f3..8a43bc8 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -818,7 +818,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			 * queued (again) by ceph_update_snap_trace()
 			 * below.  Queue it _now_, under the old context.
 			 */
+			spin_lock(&realm->inodes_with_caps_lock);
 			list_del_init(&ci->i_snap_realm_item);
+			spin_unlock(&realm->inodes_with_caps_lock);
 			spin_unlock(&inode->i_lock);
 
 			ceph_queue_cap_snap(ci,
-- 
cgit v1.1


From 8b218b8a4a65bf4e304ae8690cadb9100ef029c0 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 9 Mar 2010 12:59:08 -0800
Subject: ceph: fix inode removal from snap realm when racing with migration

When an inode was dropped while being migrated between two MDSs,
i_cap_exporting_issued was non-zero such that issue caps were non-zero and
__ceph_is_any_caps(ci) was true.  This prevented the inode from being
removed from the snap realm, even as it was dropped from the cache.

Fix this by dropping any residual i_snap_realm ref in destroy_inode.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/inode.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7abe1ae..aca82d5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -378,6 +378,22 @@ void ceph_destroy_inode(struct inode *inode)
 
 	ceph_queue_caps_release(inode);
 
+	/*
+	 * we may still have a snap_realm reference if there are stray
+	 * caps in i_cap_exporting_issued or i_snap_caps.
+	 */
+	if (ci->i_snap_realm) {
+		struct ceph_mds_client *mdsc =
+			&ceph_client(ci->vfs_inode.i_sb)->mdsc;
+		struct ceph_snap_realm *realm = ci->i_snap_realm;
+
+		dout(" dropping residual ref to snap realm %p\n", realm);
+		spin_lock(&realm->inodes_with_caps_lock);
+		list_del_init(&ci->i_snap_realm_item);
+		spin_unlock(&realm->inodes_with_caps_lock);
+		ceph_put_snap_realm(mdsc, realm);
+	}
+
 	kfree(ci->i_symlink);
 	while ((n = rb_first(&ci->i_fragtree)) != NULL) {
 		frag = rb_entry(n, struct ceph_inode_frag, node);
-- 
cgit v1.1


From 63733a0fc55cca74b1911769633dc5dfd1a45907 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 15 Mar 2010 15:47:22 -0700
Subject: ceph: fix authenticator timeout

We were failing to reconnect to services due to an old authenticator, even
though we had the new ticket, because we weren't properly retrying the
connect handshake, because we were calling an old/incorrect helper that
left in_base_pos incorrect.  The result was a failure to reconnect to the
OSD or MDS (with an authentication error) if the MDS restarted after the
service had been up a few hours (long enough for the original authenticator
to be invalid).  This was only a problem if the AUTH_X authentication was
enabled.

Now that the 'negotiate' and 'connect' stages are fully separated, we
should use the prepare_read_connect() helper instead, and remove the
obsolete one.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/messenger.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 781656a..203c435 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -830,13 +830,6 @@ static void prepare_read_connect(struct ceph_connection *con)
 	con->in_base_pos = 0;
 }
 
-static void prepare_read_connect_retry(struct ceph_connection *con)
-{
-	dout("prepare_read_connect_retry %p\n", con);
-	con->in_base_pos = strlen(CEPH_BANNER) + sizeof(con->actual_peer_addr)
-		+ sizeof(con->peer_addr_for_me);
-}
-
 static void prepare_read_ack(struct ceph_connection *con)
 {
 	dout("prepare_read_ack %p\n", con);
@@ -1146,7 +1139,7 @@ static int process_connect(struct ceph_connection *con)
 		}
 		con->auth_retry = 1;
 		prepare_write_connect(con->msgr, con, 0);
-		prepare_read_connect_retry(con);
+		prepare_read_connect(con);
 		break;
 
 	case CEPH_MSGR_TAG_RESETSESSION:
-- 
cgit v1.1


From 807c86e2ceba8febe79b289d50cd0d5e0b0af917 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 15 Mar 2010 15:52:17 -0700
Subject: ceph: fix authenticator buffer size calculation

The buffer size was incorrectly calculated for the ceph_x_encrypt()
encapsulated ticket blob.  Use a helper (with correct arithmetic) and
BUG out if we were wrong.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/auth_x.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index f031842..96e7aaa 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -28,6 +28,12 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
 	return (ac->want_keys & xi->have_keys) == ac->want_keys;
 }
 
+static int ceph_x_encrypt_buflen(int ilen)
+{
+	return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
+		sizeof(u32);
+}
+
 static int ceph_x_encrypt(struct ceph_crypto_key *secret,
 			  void *ibuf, int ilen, void *obuf, size_t olen)
 {
@@ -242,7 +248,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 				   struct ceph_x_ticket_handler *th,
 				   struct ceph_x_authorizer *au)
 {
-	int len;
+	int maxlen;
 	struct ceph_x_authorize_a *msg_a;
 	struct ceph_x_authorize_b msg_b;
 	void *p, *end;
@@ -253,15 +259,15 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	dout("build_authorizer for %s %p\n",
 	     ceph_entity_type_name(th->service), au);
 
-	len = sizeof(*msg_a) + sizeof(msg_b) + sizeof(u32) +
-		ticket_blob_len + 16;
-	dout("  need len %d\n", len);
-	if (au->buf && au->buf->alloc_len < len) {
+	maxlen = sizeof(*msg_a) + sizeof(msg_b) +
+		ceph_x_encrypt_buflen(ticket_blob_len);
+	dout("  need len %d\n", maxlen);
+	if (au->buf && au->buf->alloc_len < maxlen) {
 		ceph_buffer_put(au->buf);
 		au->buf = NULL;
 	}
 	if (!au->buf) {
-		au->buf = ceph_buffer_new(len, GFP_NOFS);
+		au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
 		if (!au->buf)
 			return -ENOMEM;
 	}
@@ -296,6 +302,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	au->buf->vec.iov_len = p - au->buf->vec.iov_base;
 	dout(" built authorizer nonce %llx len %d\n", au->nonce,
 	     (int)au->buf->vec.iov_len);
+	BUG_ON(au->buf->vec.iov_len > maxlen);
 	return 0;
 
 out_buf:
-- 
cgit v1.1


From 5b3dbb44ab40660a080d03585bd35f45b2890c49 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 15 Mar 2010 15:57:04 -0700
Subject: ceph: release old ticket_blob buffer

Release the old ticket_blob buffer when we get an updated service ticket
from the monitor.  Previously these were getting leaked.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/auth_x.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 96e7aaa..33d3ad4 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -156,6 +156,7 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 		struct timespec validity;
 		struct ceph_crypto_key old_key;
 		void *tp, *tpend;
+		struct ceph_buffer *new_ticket_blob;
 
 		ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
 
@@ -223,9 +224,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 		ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
 		struct_v = ceph_decode_8(&tp);
 		th->secret_id = ceph_decode_64(&tp);
-		ret = ceph_decode_buffer(&th->ticket_blob, &tp, tpend);
+		ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
 		if (ret)
 			goto out;
+		if (th->ticket_blob)
+			ceph_buffer_put(th->ticket_blob);
+		th->ticket_blob = new_ticket_blob;
 		dout(" got ticket service %d (%s) secret_id %lld len %d\n",
 		     type, ceph_entity_type_name(type), th->secret_id,
 		     (int)th->ticket_blob->vec.iov_len);
-- 
cgit v1.1


From 2d8428acaee5468d194d8a84de22a0797791cb33 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 22 Mar 2010 14:01:24 +0900
Subject: nilfs2: fix duplicate call to nilfs_segctor_cancel_freev

Andreas Beckmann gave me a report that nilfs logged the following
warnings when it got a disk full:

  nilfs_sufile_do_cancel_free: segment 0 must be clean
  nilfs_sufile_do_cancel_free: segment 1 must be clean

These arise from a duplicate call to nilfs_segctor_cancel_freev in an
error path of log writer.  This will fix the issue.

Reported-by: Andreas Beckmann <debian@abeckmann.de>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/segment.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 69576a9..b622123 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1510,6 +1510,12 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
 		if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
 			break;
 
+		nilfs_clear_logs(&sci->sc_segbufs);
+
+		err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+		if (unlikely(err))
+			return err;
+
 		if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
 			err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
 							sci->sc_freesegs,
@@ -1517,12 +1523,6 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
 							NULL);
 			WARN_ON(err); /* do not happen */
 		}
-		nilfs_clear_logs(&sci->sc_segbufs);
-
-		err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
-		if (unlikely(err))
-			return err;
-
 		nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
 		sci->sc_stage = prev_stage;
 	}
-- 
cgit v1.1


From 556ae3bb32cabe483375b857dda1322384c57b65 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Sun, 21 Mar 2010 12:10:36 -0400
Subject: NFS: don't try to decode GETATTR if DELEGRETURN returned error

The reply parsing code attempts to decode the GETATTR response even if
the DELEGRETURN portion of the compound returned an error. The GETATTR
response won't actually exist if that's the case and we're asking the
parser to read past the end of the response.

This bug is fairly benign. The parser catches this without reading past
the end of the response and decode_getfattr returns -EIO. Earlier
kernels however had decode_op_hdr using the READ_BUF macro, and this
bug would make this printk pop any time the client got an error from
a delegreturn:

kernel: decode_op_hdr: reply buffer overflowed in line XXXX

More recent kernels seem to have replaced this printk with a dprintk.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4d338be..dd17713 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5552,6 +5552,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf
 	if (status != 0)
 		goto out;
 	status = decode_delegreturn(&xdr);
+	if (status != 0)
+		goto out;
 	decode_getfattr(&xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
-- 
cgit v1.1


From 99b437a9257cb6b267bf32adfb7675948dc6d485 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 22 Mar 2010 13:07:14 +0000
Subject: AFS: Potential null dereference

It seems clear from the surrounding code that xpermits is allowed to be
NULL here.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/security.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/security.c b/fs/afs/security.c
index 3ef5043..bb4ed14 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -189,8 +189,9 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
 	if (!permits)
 		goto out_unlock;
 
-	memcpy(permits->permits, xpermits->permits,
-	       count * sizeof(struct afs_permit));
+	if (xpermits)
+		memcpy(permits->permits, xpermits->permits,
+			count * sizeof(struct afs_permit));
 
 	_debug("key %x access %x",
 	       key_serial(key), vnode->status.caller_access);
-- 
cgit v1.1


From 0a990e7093566ceb07e38951e1a01686923d4f09 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 15 Mar 2010 16:12:25 -0700
Subject: ceph: clean up service ticket decoding

Previously we would decode state directly into our current ticket_handler.
This is problematic if for some reason we fail to decode, because we end
up with half new state and half old state.

We are probably already in bad shape if we get an update we can't decode,
but we may as well be tidy anyway.  Decode into new_* temporaries and
update the ticket_handler only on success.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/auth_x.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 33d3ad4..8d8a849 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -156,7 +156,11 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 		struct timespec validity;
 		struct ceph_crypto_key old_key;
 		void *tp, *tpend;
+		struct ceph_timespec new_validity;
+		struct ceph_crypto_key new_session_key;
 		struct ceph_buffer *new_ticket_blob;
+		unsigned long new_expires, new_renew_after;
+		u64 new_secret_id;
 
 		ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
 
@@ -189,16 +193,16 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 			goto bad;
 
 		memcpy(&old_key, &th->session_key, sizeof(old_key));
-		ret = ceph_crypto_key_decode(&th->session_key, &dp, dend);
+		ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
 		if (ret)
 			goto out;
 
-		ceph_decode_copy(&dp, &th->validity, sizeof(th->validity));
-		ceph_decode_timespec(&validity, &th->validity);
-		th->expires = get_seconds() + validity.tv_sec;
-		th->renew_after = th->expires - (validity.tv_sec / 4);
-		dout(" expires=%lu renew_after=%lu\n", th->expires,
-		     th->renew_after);
+		ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
+		ceph_decode_timespec(&validity, &new_validity);
+		new_expires = get_seconds() + validity.tv_sec;
+		new_renew_after = new_expires - (validity.tv_sec / 4);
+		dout(" expires=%lu renew_after=%lu\n", new_expires,
+		     new_renew_after);
 
 		/* ticket blob for service */
 		ceph_decode_8_safe(&p, end, is_enc, bad);
@@ -223,13 +227,21 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 		dout(" ticket blob is %d bytes\n", dlen);
 		ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
 		struct_v = ceph_decode_8(&tp);
-		th->secret_id = ceph_decode_64(&tp);
+		new_secret_id = ceph_decode_64(&tp);
 		ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
 		if (ret)
 			goto out;
+
+		/* all is well, update our ticket */
+		ceph_crypto_key_destroy(&th->session_key);
 		if (th->ticket_blob)
 			ceph_buffer_put(th->ticket_blob);
+		th->session_key = new_session_key;
 		th->ticket_blob = new_ticket_blob;
+		th->validity = new_validity;
+		th->secret_id = new_secret_id;
+		th->expires = new_expires;
+		th->renew_after = new_renew_after;
 		dout(" got ticket service %d (%s) secret_id %lld len %d\n",
 		     type, ceph_entity_type_name(type), th->secret_id,
 		     (int)th->ticket_blob->vec.iov_len);
-- 
cgit v1.1


From 12eadc190038e68b5884a4aa313b6ab89ba60f5e Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 15 Mar 2010 22:20:39 -0700
Subject: ceph: fix null pointer deref of r_osd in debug output

This causes an oops when debug output is enabled and we kick
an osd request with no current r_osd (sometime after an osd
failure).  Check the pointer before dereferencing.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/osd_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index dbe63db9..2210382 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -913,7 +913,7 @@ static int __kick_requests(struct ceph_osd_client *osdc,
 
 kick:
 		dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
-		     req->r_osd->o_osd);
+		     req->r_osd ? req->r_osd->o_osd : -1);
 		req->r_flags |= CEPH_OSD_FLAG_RETRY;
 		err = __send_request(osdc, req);
 		if (err) {
-- 
cgit v1.1


From 4ea0043a29c82ca52ca54728d837314563bec574 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 16 Mar 2010 10:36:40 -0700
Subject: ceph: drop unnecessary WARN_ON in caps migration

If we don't have the exported cap it's because we already released it. No
need to WARN.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 57d9b44..726c8d4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2550,9 +2550,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 			ci->i_cap_exporting_issued = cap->issued;
 		}
 		__ceph_remove_cap(cap);
-	} else {
-		WARN_ON(!cap);
 	}
+	/* else, we already released it */
 
 	spin_unlock(&inode->i_lock);
 }
-- 
cgit v1.1


From cdc2ce056a3620139056b60ad7f6d355ad13f445 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 16 Mar 2010 13:39:28 -0700
Subject: ceph: fix session locking in handle_caps, ceph_check_caps

Passing a session pointer to ceph_check_caps() used to mean it would leave
the session mutex locked.  That wasn't always possible if it wasn't passed
CHECK_CAPS_AUTHONLY.   If could unlock the passed session and lock a
differet session mutex, which was clearly wrong, and also emitted a
warning when it a racing CPU retook it and we did an unlock from the wrong
context.

This was only a problem when there was more than one MDS.

First, make ceph_check_caps unconditionally drop the session mutex, so that
it is free to lock other sessions as needed.  Then adjust the one caller
that passes in a session (handle_cap_grant) accordingly.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 726c8d4..7828486 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1407,6 +1407,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
  */
 void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		     struct ceph_mds_session *session)
+	__releases(session->s_mutex)
 {
 	struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode);
 	struct ceph_mds_client *mdsc = &client->mdsc;
@@ -1414,7 +1415,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	struct ceph_cap *cap;
 	int file_wanted, used;
 	int took_snap_rwsem = 0;             /* true if mdsc->snap_rwsem held */
-	int drop_session_lock = session ? 0 : 1;
 	int issued, implemented, want, retain, revoking, flushing = 0;
 	int mds = -1;   /* keep track of how far we've gone through i_caps list
 			   to avoid an infinite loop on retry */
@@ -1639,7 +1639,7 @@ ack:
 	if (queue_invalidate)
 		ceph_queue_invalidate(inode);
 
-	if (session && drop_session_lock)
+	if (session)
 		mutex_unlock(&session->s_mutex);
 	if (took_snap_rwsem)
 		up_read(&mdsc->snap_rwsem);
@@ -2688,14 +2688,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	case CEPH_CAP_OP_REVOKE:
 	case CEPH_CAP_OP_GRANT:
 		r = handle_cap_grant(inode, h, session, cap, msg->middle);
-		if (r == 1)
+		if (r == 1) {
 			ceph_check_caps(ceph_inode(inode),
 					CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
 					session);
-		else if (r == 2)
+			session = NULL;
+		} else if (r == 2) {
 			ceph_check_caps(ceph_inode(inode),
 					CHECK_CAPS_NODELAY,
 					session);
+			session = NULL;
+		}
 		break;
 
 	case CEPH_CAP_OP_FLUSH_ACK:
@@ -2713,7 +2716,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	}
 
 done:
-	mutex_unlock(&session->s_mutex);
+	if (session)
+		mutex_unlock(&session->s_mutex);
 
 	if (check_caps)
 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
-- 
cgit v1.1


From 15637c8b1251c38694c32214eba69b72a30e9d9b Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 16 Mar 2010 13:42:00 -0700
Subject: ceph: clean up handle_cap_grant, handle_caps wrt session mutex

Drop session mutex unconditionally in handle_cap_grant, and do the
check_caps from the handle_cap_grant helper.  This avoids using a magic
return value.

Also avoid using a flag variable in the IMPORT case and call
check_caps at the appropriate point.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 57 +++++++++++++++++++++++++--------------------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7828486..d9e860f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2195,18 +2195,19 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
  * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
  * actually be a revocation if it specifies a smaller cap set.)
  *
- * caller holds s_mutex.
+ * caller holds s_mutex and i_lock, we drop both.
+ *
  * return value:
  *  0 - ok
  *  1 - check_caps on auth cap only (writeback)
  *  2 - check_caps (ack revoke)
  */
-static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
-			    struct ceph_mds_session *session,
-			    struct ceph_cap *cap,
-			    struct ceph_buffer *xattr_buf)
+static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
+			     struct ceph_mds_session *session,
+			     struct ceph_cap *cap,
+			     struct ceph_buffer *xattr_buf)
 	__releases(inode->i_lock)
-
+	__releases(session->s_mutex)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int mds = session->s_mds;
@@ -2216,7 +2217,7 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 	u64 size = le64_to_cpu(grant->size);
 	u64 max_size = le64_to_cpu(grant->max_size);
 	struct timespec mtime, atime, ctime;
-	int reply = 0;
+	int check_caps = 0;
 	int wake = 0;
 	int writeback = 0;
 	int revoked_rdcache = 0;
@@ -2329,10 +2330,10 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER)
 			writeback = 1; /* will delay ack */
 		else if (dirty & ~newcaps)
-			reply = 1;     /* initiate writeback in check_caps */
+			check_caps = 1;  /* initiate writeback in check_caps */
 		else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 ||
 			   revoked_rdcache)
-			reply = 2;     /* send revoke ack in check_caps */
+			check_caps = 2;     /* send revoke ack in check_caps */
 		cap->issued = newcaps;
 		cap->implemented |= newcaps;
 	} else if (cap->issued == newcaps) {
@@ -2361,7 +2362,14 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		ceph_queue_invalidate(inode);
 	if (wake)
 		wake_up(&ci->i_cap_wq);
-	return reply;
+
+	if (check_caps == 1)
+		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+				session);
+	else if (check_caps == 2)
+		ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+	else
+		mutex_unlock(&session->s_mutex);
 }
 
 /*
@@ -2622,9 +2630,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	u64 cap_id;
 	u64 size, max_size;
 	u64 tid;
-	int check_caps = 0;
 	void *snaptrace;
-	int r;
 
 	dout("handle_caps from mds%d\n", mds);
 
@@ -2669,8 +2675,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	case CEPH_CAP_OP_IMPORT:
 		handle_cap_import(mdsc, inode, h, session,
 				  snaptrace, le32_to_cpu(h->snap_trace_len));
-		check_caps = 1; /* we may have sent a RELEASE to the old auth */
-		goto done;
+		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY,
+				session);
+		goto done_unlocked;
 	}
 
 	/* the rest require a cap */
@@ -2687,19 +2694,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	switch (op) {
 	case CEPH_CAP_OP_REVOKE:
 	case CEPH_CAP_OP_GRANT:
-		r = handle_cap_grant(inode, h, session, cap, msg->middle);
-		if (r == 1) {
-			ceph_check_caps(ceph_inode(inode),
-					CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
-					session);
-			session = NULL;
-		} else if (r == 2) {
-			ceph_check_caps(ceph_inode(inode),
-					CHECK_CAPS_NODELAY,
-					session);
-			session = NULL;
-		}
-		break;
+		handle_cap_grant(inode, h, session, cap, msg->middle);
+		goto done_unlocked;
 
 	case CEPH_CAP_OP_FLUSH_ACK:
 		handle_cap_flush_ack(inode, tid, h, session, cap);
@@ -2716,11 +2712,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	}
 
 done:
-	if (session)
-		mutex_unlock(&session->s_mutex);
-
-	if (check_caps)
-		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, NULL);
+	mutex_unlock(&session->s_mutex);
+done_unlocked:
 	if (inode)
 		iput(inode);
 	return;
-- 
cgit v1.1


From 916623da10e270c7e9e802a7ddfe1ec8f890982d Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 16 Mar 2010 15:01:07 -0700
Subject: ceph: only release unused caps with mds requests

We were releasing used caps (e.g. FILE_CACHE) from encode_inode_release
with MDS requests (e.g. setattr).  We don't carry refs on most caps, so
this code worked most of the time, but for setattr (utimes) we try to
drop Fscr.

This causes cap state to get slightly out of sync with reality, and may
result in subsequent mds revoke messages getting ignored.

Fix by only releasing unused caps.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d9e860f..7d0a0d0 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2836,11 +2836,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 	struct ceph_cap *cap;
 	struct ceph_mds_request_release *rel = *p;
 	int ret = 0;
-
-	dout("encode_inode_release %p mds%d drop %s unless %s\n", inode,
-	     mds, ceph_cap_string(drop), ceph_cap_string(unless));
+	int used = 0;
 
 	spin_lock(&inode->i_lock);
+	used = __ceph_caps_used(ci);
+
+	dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
+	     mds, ceph_cap_string(used), ceph_cap_string(drop),
+	     ceph_cap_string(unless));
+
+	/* only drop unused caps */
+	drop &= ~used;
+
 	cap = __get_cap_for_mds(ci, mds);
 	if (cap && __cap_is_valid(cap)) {
 		if (force ||
-- 
cgit v1.1


From 80fc7314a7e26e8d2e4ba5b3d8cc2d4aeb750015 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 16 Mar 2010 15:28:54 -0700
Subject: ceph: fix mds sync() race with completing requests

The wait_unsafe_requests() helper dropped the mdsc mutex to wait
for each request to complete, and then examined r_node to get the
next request after retaking the lock.  But the request completion
removes the request from the tree, so r_node was always undefined
at this point.  Since it's a small race, it usually led to a
valid request, but not always.  The result was an occasional
crash in rb_next() while dereferencing node->rb_left.

Fix this by clearing the rb_node when removing the request from
the request tree, and not walking off into the weeds when we
are done waiting for a request.  Since the request we waited on
will _always_ be out of the request tree, take a ref on the next
request, in the hopes that it won't be.  But if it is, it's ok:
we can start over from the beginning (and traverse over older read
requests again).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a260010..5ec8641 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -529,6 +529,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 {
 	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
 	rb_erase(&req->r_node, &mdsc->request_tree);
+	RB_CLEAR_NODE(&req->r_node);
 	ceph_mdsc_put_request(req);
 
 	if (req->r_unsafe_dir) {
@@ -2682,29 +2683,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
  */
 static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
 {
-	struct ceph_mds_request *req = NULL;
+	struct ceph_mds_request *req = NULL, *nextreq;
 	struct rb_node *n;
 
 	mutex_lock(&mdsc->mutex);
 	dout("wait_unsafe_requests want %lld\n", want_tid);
+restart:
 	req = __get_oldest_req(mdsc);
 	while (req && req->r_tid <= want_tid) {
+		/* find next request */
+		n = rb_next(&req->r_node);
+		if (n)
+			nextreq = rb_entry(n, struct ceph_mds_request, r_node);
+		else
+			nextreq = NULL;
 		if ((req->r_op & CEPH_MDS_OP_WRITE)) {
 			/* write op */
 			ceph_mdsc_get_request(req);
+			if (nextreq)
+				ceph_mdsc_get_request(nextreq);
 			mutex_unlock(&mdsc->mutex);
 			dout("wait_unsafe_requests  wait on %llu (want %llu)\n",
 			     req->r_tid, want_tid);
 			wait_for_completion(&req->r_safe_completion);
 			mutex_lock(&mdsc->mutex);
-			n = rb_next(&req->r_node);
 			ceph_mdsc_put_request(req);
-		} else {
-			n = rb_next(&req->r_node);
+			if (!nextreq)
+				break;  /* next dne before, so we're done! */
+			if (RB_EMPTY_NODE(&nextreq->r_node)) {
+				/* next request was removed from tree */
+				ceph_mdsc_put_request(nextreq);
+				goto restart;
+			}
+			ceph_mdsc_put_request(nextreq);  /* won't go away */
 		}
-		if (!n)
-			break;
-		req = rb_entry(n, struct ceph_mds_request, r_node);
+		req = nextreq;
 	}
 	mutex_unlock(&mdsc->mutex);
 	dout("wait_unsafe_requests done\n");
-- 
cgit v1.1


From efd7576b2392cc5a0934352936d793e8884c46bf Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 17 Mar 2010 10:05:28 -0700
Subject: ceph: fix pg pool decoding from incremental osdmap update

The incremental map decoding of pg pool updates wasn't skipping
the snaps and removed_snaps vectors.  This caused osd requests
to stall when pool snapshots were created or fs snapshots were
deleted.  Use a common helper for full and incremental map
decoders that decodes pools properly.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/osdmap.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index b83f269..d82fe87 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -480,6 +480,14 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
 	return NULL;
 }
 
+void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
+{
+	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
+	calc_pg_masks(pi);
+	*p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
+	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
+}
+
 /*
  * decode a full map.
  */
@@ -526,12 +534,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 				   ev, CEPH_PG_POOL_VERSION);
 			goto bad;
 		}
-		ceph_decode_copy(p, &pi->v, sizeof(pi->v));
+		__decode_pool(p, pi);
 		__insert_pg_pool(&map->pg_pools, pi);
-		calc_pg_masks(pi);
-		*p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
-		*p += le32_to_cpu(pi->v.num_removed_snap_intervals)
-			* sizeof(u64) * 2;
 	}
 	ceph_decode_32_safe(p, end, map->pool_max, bad);
 
@@ -714,8 +718,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 			pi->id = pool;
 			__insert_pg_pool(&map->pg_pools, pi);
 		}
-		ceph_decode_copy(p, &pi->v, sizeof(pi->v));
-		calc_pg_masks(pi);
+		__decode_pool(p, pi);
 	}
 
 	/* old_pool */
-- 
cgit v1.1


From e4cb4cb8a03adde1aa4b874623c50b9a5b56e635 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 18 Mar 2010 13:43:09 -0700
Subject: ceph: prevent dup stale messages to console for restarting mds

Prevent duplicate 'mds0 caps stale' message from spamming the console every
few seconds while the MDS restarts.  Set s_renew_requested earlier, so that
we only print the message once, even if we don't send an actual request.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5ec8641..5cbf46a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -863,6 +863,7 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
 	if (time_after_eq(jiffies, session->s_cap_ttl) &&
 	    time_after_eq(session->s_cap_ttl, session->s_renew_requested))
 		pr_info("mds%d caps stale\n", session->s_mds);
+	session->s_renew_requested = jiffies;
 
 	/* do not try to renew caps until a recovering mds has reconnected
 	 * with its clients. */
@@ -875,7 +876,6 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
 
 	dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
 		ceph_mds_state_name(state));
-	session->s_renew_requested = jiffies;
 	msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
 				 ++session->s_renew_seq);
 	if (IS_ERR(msg))
-- 
cgit v1.1


From 3c3f2e32effd4c6acc3a9434bd7eecb0af653d89 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 18 Mar 2010 15:20:53 -0700
Subject: ceph: fix connection fault con_work reentrancy problem

The messenger fault was clearing the BUSY bit, for reasons unclear.  This
made it possible for the con->ops->fault function to reopen the connection,
and requeue work in the workqueue--even though the current thread was
already in con_work.

This avoids a problem where the client busy loops with connection failures
on an unreachable OSD, but doesn't address the root cause of that problem.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/messenger.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 203c435..9832855 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -1836,8 +1836,6 @@ static void ceph_fault(struct ceph_connection *con)
 		goto out;
 	}
 
-	clear_bit(BUSY, &con->state);  /* to avoid an improbable race */
-
 	mutex_lock(&con->mutex);
 	if (test_bit(CLOSED, &con->state))
 		goto out_unlock;
-- 
cgit v1.1


From 3dd72fc0e6dc49c79fa9e7cd7c654deac7ccaa29 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 22 Mar 2010 14:42:30 -0700
Subject: ceph: rename r_sent_stamp r_stamp

Make variable name slightly more generic, since it will (soon)
reflect either the time the request was sent OR the time it was
last determined to be still retrying.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/osd_client.c | 12 ++++++------
 fs/ceph/osd_client.h |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 2210382..04359217 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -633,7 +633,7 @@ static int __send_request(struct ceph_osd_client *osdc,
 	reqhead->flags |= cpu_to_le32(req->r_flags);  /* e.g., RETRY */
 	reqhead->reassert_version = req->r_reassert_version;
 
-	req->r_sent_stamp = jiffies;
+	req->r_stamp = jiffies;
 	list_move_tail(&osdc->req_lru, &req->r_req_lru_item);
 
 	ceph_msg_get(req->r_request); /* send consumes a ref */
@@ -660,7 +660,7 @@ static void handle_timeout(struct work_struct *work)
 	unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ;
 	unsigned long keepalive =
 		osdc->client->mount_args->osd_keepalive_timeout * HZ;
-	unsigned long last_sent = 0;
+	unsigned long last_stamp = 0;
 	struct rb_node *p;
 	struct list_head slow_osds;
 
@@ -697,12 +697,12 @@ static void handle_timeout(struct work_struct *work)
 		req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
 				 r_req_lru_item);
 
-		if (time_before(jiffies, req->r_sent_stamp + timeout))
+		if (time_before(jiffies, req->r_stamp + timeout))
 			break;
 
-		BUG_ON(req == last_req && req->r_sent_stamp == last_sent);
+		BUG_ON(req == last_req && req->r_stamp == last_stamp);
 		last_req = req;
-		last_sent = req->r_sent_stamp;
+		last_stamp = req->r_stamp;
 
 		osd = req->r_osd;
 		BUG_ON(!osd);
@@ -718,7 +718,7 @@ static void handle_timeout(struct work_struct *work)
 	 */
 	INIT_LIST_HEAD(&slow_osds);
 	list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
-		if (time_before(jiffies, req->r_sent_stamp + keepalive))
+		if (time_before(jiffies, req->r_stamp + keepalive))
 			break;
 
 		osd = req->r_osd;
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index 1b1a3ca..b075991 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -70,7 +70,7 @@ struct ceph_osd_request {
 
 	char              r_oid[40];          /* object name */
 	int               r_oid_len;
-	unsigned long     r_sent_stamp;
+	unsigned long     r_stamp;            /* send OR check time */
 	bool              r_resend;           /* msg send failed, needs retry */
 
 	struct ceph_file_layout r_file_layout;
-- 
cgit v1.1


From 87b315a5b5cec5d7086494b203577602f5befc8c Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 22 Mar 2010 14:51:18 -0700
Subject: ceph: avoid reopening osd connections when address hasn't changed

We get a fault callback on _every_ tcp connection fault.  Normally, we
want to reopen the connection when that happens.  If the address we have
is bad, however, and connection attempts always result in a connection
refused or similar error, explicitly closing and reopening the msgr
connection just prevents the messenger's backoff logic from kicking in.
The result can be a console full of

[ 3974.417106] ceph: osd11 10.3.14.138:6800 connection failed
[ 3974.423295] ceph: osd11 10.3.14.138:6800 connection failed
[ 3974.429709] ceph: osd11 10.3.14.138:6800 connection failed

Instead, if we get a fault, and have outstanding requests, but the osd
address hasn't changed and the connection never successfully connected in
the first place, do nothing to the osd connection.  The messenger layer
will back off and retry periodically, because we never connected and thus
the lossy bit is not set.

Instead, touch each request's r_stamp so that handle_timeout can tell the
request is still alive and kicking.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/messenger.c  |  8 ++++++++
 fs/ceph/messenger.h  |  1 +
 fs/ceph/osd_client.c | 15 ++++++++++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 9832855..a32f0f8 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
 }
 
 /*
+ * return true if this connection ever successfully opened
+ */
+bool ceph_con_opened(struct ceph_connection *con)
+{
+	return con->connect_seq > 0;
+}
+
+/*
  * generic get/put
  */
 struct ceph_connection *ceph_con_get(struct ceph_connection *con)
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 4caaa59..a343dae 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr,
 			  struct ceph_connection *con);
 extern void ceph_con_open(struct ceph_connection *con,
 			  struct ceph_entity_addr *addr);
+extern bool ceph_con_opened(struct ceph_connection *con);
 extern void ceph_con_close(struct ceph_connection *con);
 extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
 extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 04359217..c7b4ded 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
  */
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 {
+	struct ceph_osd_request *req;
 	int ret = 0;
 
 	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
 	if (list_empty(&osd->o_requests)) {
 		__remove_osd(osdc, osd);
+	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
+			  &osd->o_con.peer_addr,
+			  sizeof(osd->o_con.peer_addr)) == 0 &&
+		   !ceph_con_opened(&osd->o_con)) {
+		dout(" osd addr hasn't changed and connection never opened,"
+		     " letting msgr retry");
+		/* touch each r_stamp for handle_timeout()'s benfit */
+		list_for_each_entry(req, &osd->o_requests, r_osd_item)
+			req->r_stamp = jiffies;
+		ret = -EAGAIN;
 	} else {
 		ceph_con_close(&osd->o_con);
 		ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc,
 
 	dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
 	if (kickosd) {
-		__reset_osd(osdc, kickosd);
+		err = __reset_osd(osdc, kickosd);
+		if (err == -EAGAIN)
+			return 1;
 	} else {
 		for (p = rb_first(&osdc->osds); p; p = n) {
 			struct ceph_osd *osd =
-- 
cgit v1.1


From ec4318bcb4c59d8b8bf7037c9f444a9887ccb265 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 19 Mar 2010 13:24:39 -0700
Subject: ceph: fix snap rebuild condition

We were rebuilding the snap context when it was not necessary
(i.e. when the realm seq hadn't changed _and_ the parent seq
was still older), which caused page snapc pointers to not match
the realm's snapc pointer (even though the snap context itself
was identical).  This confused begin_write and put it into an
endless loop.

The correct logic is: rebuild snapc if _my_ realm seq changed, or
if my parent realm's seq is newer than mine (and thus mine needs
to be rebuilt too).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/snap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 8a43bc8..df04e21 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -314,9 +314,9 @@ static int build_snap_context(struct ceph_snap_realm *realm)
 	   because we rebuild_snap_realms() works _downward_ in
 	   hierarchy after each update.) */
 	if (realm->cached_context &&
-	    realm->cached_context->seq <= realm->seq &&
+	    realm->cached_context->seq == realm->seq &&
 	    (!parent ||
-	     realm->cached_context->seq <= parent->cached_context->seq)) {
+	     realm->cached_context->seq >= parent->cached_context->seq)) {
 		dout("build_snap_context %llx %p: %p seq %lld (%d snaps)"
 		     " (unchanged)\n",
 		     realm->ino, realm, realm->cached_context,
-- 
cgit v1.1


From 8f883c24de33ba929c95e018ac0ba66e4f46734b Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 19 Mar 2010 13:27:53 -0700
Subject: ceph: make write_begin wait propagate ERESTARTSYS

Currently, if the wait_event_interruptible is interrupted, we
return EAGAIN unconditionally and loop, such that we aren't, in
fact, interruptible.  So, propagate ERESTARTSYS if we get it.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 23bb0ce..ce8ef61 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -919,6 +919,10 @@ static int context_is_writeable_or_written(struct inode *inode,
 /*
  * We are only allowed to write into/dirty the page if the page is
  * clean, or already dirty within the same snap context.
+ *
+ * called with page locked.
+ * return success with page locked,
+ * or any failure (incl -EAGAIN) with page unlocked.
  */
 static int ceph_update_writeable_page(struct file *file,
 			    loff_t pos, unsigned len,
@@ -961,9 +965,11 @@ retry_locked:
 			snapc = ceph_get_snap_context((void *)page->private);
 			unlock_page(page);
 			ceph_queue_writeback(inode);
-			wait_event_interruptible(ci->i_cap_wq,
+			r = wait_event_interruptible(ci->i_cap_wq,
 			       context_is_writeable_or_written(inode, snapc));
 			ceph_put_snap_context(snapc);
+			if (r == -ERESTARTSYS)
+				return r;
 			return -EAGAIN;
 		}
 
@@ -1035,7 +1041,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 	int r;
 
 	do {
-		/* get a page*/
+		/* get a page */
 		page = grab_cache_page_write_begin(mapping, index, 0);
 		if (!page)
 			return -ENOMEM;
-- 
cgit v1.1


From 9c423956b8a495f0c048143abc5da955a70eac97 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Sat, 20 Mar 2010 20:43:28 -0700
Subject: ceph: propagate mds session allocation failures to caller

Return error to original caller if register_session() fails.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5cbf46a..b6b5348 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1567,8 +1567,13 @@ static int __do_request(struct ceph_mds_client *mdsc,
 
 	/* get, open session */
 	session = __ceph_lookup_mds_session(mdsc, mds);
-	if (!session)
+	if (!session) {
 		session = register_session(mdsc, mds);
+		if (IS_ERR(session)) {
+			err = PTR_ERR(session);
+			goto finish;
+		}
+	}
 	dout("do_request mds%d session %p state %s\n", mds, session,
 	     session_state_name(session->s_state));
 	if (session->s_state != CEPH_MDS_SESSION_OPEN &&
-- 
cgit v1.1


From 4736b009b880b7c19bea36327a71032a6dbee402 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 20 Mar 2010 15:30:16 +0300
Subject: ceph: handle kmalloc() failure

Return ERR_PTR(-ENOMEM) if kmalloc() fails.  We handle allocation
failures the same way later in the function.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b6b5348..ad0fbc3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -328,6 +328,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	struct ceph_mds_session *s;
 
 	s = kzalloc(sizeof(*s), GFP_NOFS);
+	if (!s)
+		return ERR_PTR(-ENOMEM);
 	s->s_mdsc = mdsc;
 	s->s_mds = mds;
 	s->s_state = CEPH_MDS_SESSION_NEW;
-- 
cgit v1.1


From d96d60498ff748c5a88c72ec5d1cc4ba9a583e7e Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Sat, 20 Mar 2010 20:50:58 -0700
Subject: ceph: fix session check on mds reply

Fix a broken check that a reply came back from the same MDS we sent the
request to.  I don't think a case that actually triggers this would ever
come up in practice, but it's clearly wrong and easy to fix.

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ad0fbc3..5268d40 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1778,7 +1778,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	dout("handle_reply %p\n", req);
 
 	/* correct session? */
-	if (!req->r_session && req->r_session != session) {
+	if (req->r_session != session) {
 		pr_err("mdsc_handle_reply got %llu on session mds%d"
 		       " not mds%d\n", tid, session->s_mds,
 		       req->r_session ? req->r_session->s_mds : -1);
-- 
cgit v1.1


From 393f66209669ad23f4f6d4191234c1df4367df3c Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 10 Mar 2010 12:03:32 -0800
Subject: ceph: fix possible double-free of mds request reference

Clear pointer to mds request after dropping the reference to
ensure we don't drop it again, as there is at least one error
path through this function that does not reset fi->last_readdir
to a new value.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/dir.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 5107384..8a9116e 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -288,8 +288,10 @@ more:
 			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
 		/* discard old result, if any */
-		if (fi->last_readdir)
+		if (fi->last_readdir) {
 			ceph_mdsc_put_request(fi->last_readdir);
+			fi->last_readdir = NULL;
+		}
 
 		/* requery frag tree, as the frag topology may have changed */
 		frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
-- 
cgit v1.1


From 110d735a0ae69bdd11af9acb6ea3b979137eb118 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 22 Mar 2010 21:36:06 +0900
Subject: nilfs2: fix hang-up of cleaner after log writer returned with error

According to the report from Andreas Beckmann (Message-ID:
<4BA54677.3090902@abeckmann.de>), nilfs in 2.6.33 kernel got stuck
after a disk full error.

This turned out to be a regression by log writer updates merged at
kernel 2.6.33.  nilfs_segctor_abort_construction, which is a cleanup
function for erroneous cases, was skipping writeback completion for
some logs.

This fixes the bug and would resolve the hang issue.

Reported-by: Andreas Beckmann <debian@abeckmann.de>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Tested-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: stable <stable@kernel.org>                     [2.6.33.x]
---
 fs/nilfs2/segment.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index b622123..c161d89 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1897,8 +1897,7 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
 
 	list_splice_tail_init(&sci->sc_write_logs, &logs);
 	ret = nilfs_wait_on_logs(&logs);
-	if (ret)
-		nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret);
+	nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret ? : err);
 
 	list_splice_tail_init(&sci->sc_segbufs, &logs);
 	nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
-- 
cgit v1.1


From d067633b4483f3c7d971d8f889f35340a8635bb5 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 22 Mar 2010 19:33:43 +0900
Subject: nilfs2: fix imperfect completion wait in nilfs_wait_on_logs

nilfs_wait_on_logs has a potential to slip out before completion of
all bio requests when it met an error.  This synchronization fault may
cause unexpected results, for instance, violative access to freed
segment buffers from an end-bio callback routine.

This fixes the issue by ensuring that nilfs_wait_on_logs waits all
given logs.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/segbuf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 636eaaf..6129a43 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -323,14 +323,14 @@ int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs)
 int nilfs_wait_on_logs(struct list_head *logs)
 {
 	struct nilfs_segment_buffer *segbuf;
-	int err;
+	int err, ret = 0;
 
 	list_for_each_entry(segbuf, logs, sb_list) {
 		err = nilfs_segbuf_wait(segbuf);
-		if (err)
-			return err;
+		if (err && !ret)
+			ret = err;
 	}
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v1.1


From 157f1071354db1aed885816094888e0e257c9d0a Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Thu, 11 Feb 2010 07:10:38 -0600
Subject: eCryptfs: Fix metadata in xattr feature regression

Fixes regression in 8faece5f906725c10e7a1f6caf84452abadbdc7b

When using the ecryptfs_xattr_metadata mount option, eCryptfs stores the
metadata (normally stored at the front of the file) in the user.ecryptfs
xattr.  This causes ecryptfs_crypt_stat.num_header_bytes_at_front to be
0, since there is no header data at the front of the file.  This results
in too much memory being requested and ENOMEM being returned from
ecryptfs_write_metadata().

This patch fixes the problem by using the num_header_bytes_at_front
variable for specifying the max size of the metadata, despite whether it
is stored in the header or xattr.

Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/crypto.c          |  7 ++++---
 fs/ecryptfs/ecryptfs_kernel.h |  8 ++++++++
 fs/ecryptfs/inode.c           |  2 +-
 fs/ecryptfs/mmap.c            | 19 +++++--------------
 4 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7cb0a59..c907f6f 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -381,8 +381,8 @@ out:
 static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
 					     struct ecryptfs_crypt_stat *crypt_stat)
 {
-	(*offset) = (crypt_stat->num_header_bytes_at_front
-		     + (crypt_stat->extent_size * extent_num));
+	(*offset) = ecryptfs_lower_header_size(crypt_stat)
+		    + (crypt_stat->extent_size * extent_num);
 }
 
 /**
@@ -834,7 +834,8 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
 	set_extent_mask_and_shift(crypt_stat);
 	crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-		crypt_stat->num_header_bytes_at_front = 0;
+		crypt_stat->num_header_bytes_at_front =
+					ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 	else {
 		if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
 			crypt_stat->num_header_bytes_at_front =
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 542f625..8456f70 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -464,6 +464,14 @@ struct ecryptfs_daemon {
 
 extern struct mutex ecryptfs_daemon_hash_mux;
 
+static inline size_t
+ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat)
+{
+	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
+		return 0;
+	return crypt_stat->num_header_bytes_at_front;
+}
+
 static inline struct ecryptfs_file_info *
 ecryptfs_file_to_private(struct file *file)
 {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 4a430ab..1a73953 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -768,7 +768,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
 {
 	loff_t lower_size;
 
-	lower_size = crypt_stat->num_header_bytes_at_front;
+	lower_size = ecryptfs_lower_header_size(crypt_stat);
 	if (upper_size != 0) {
 		loff_t num_extents;
 
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index df4ce99..5a30e01 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -97,19 +97,6 @@ out:
  *                        (big-endian)
  *     Octet  26:         Begin RFC 2440 authentication token packet set
  */
-static void set_header_info(char *page_virt,
-			    struct ecryptfs_crypt_stat *crypt_stat)
-{
-	size_t written;
-	size_t save_num_header_bytes_at_front =
-		crypt_stat->num_header_bytes_at_front;
-
-	crypt_stat->num_header_bytes_at_front =
-		ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
-	ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written);
-	crypt_stat->num_header_bytes_at_front =
-		save_num_header_bytes_at_front;
-}
 
 /**
  * ecryptfs_copy_up_encrypted_with_header
@@ -146,9 +133,13 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 			memset(page_virt, 0, PAGE_CACHE_SIZE);
 			/* TODO: Support more than one header extent */
 			if (view_extent_num == 0) {
+				size_t written;
+
 				rc = ecryptfs_read_xattr_region(
 					page_virt, page->mapping->host);
-				set_header_info(page_virt, crypt_stat);
+				ecryptfs_write_header_metadata(page_virt + 20,
+							       crypt_stat,
+							       &written);
 			}
 			kunmap_atomic(page_virt, KM_USER0);
 			flush_dcache_page(page);
-- 
cgit v1.1


From fa3ef1cb4e6e9958a9bfaa977c107c515907f102 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Thu, 11 Feb 2010 05:09:14 -0600
Subject: eCryptfs: Rename ecryptfs_crypt_stat.num_header_bytes_at_front

This patch renames the num_header_bytes_at_front variable to
metadata_size since it now contains the max size of the metadata.

Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/crypto.c          | 24 ++++++++++--------------
 fs/ecryptfs/ecryptfs_kernel.h |  4 ++--
 fs/ecryptfs/inode.c           |  2 +-
 fs/ecryptfs/mmap.c            |  5 ++---
 4 files changed, 15 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c907f6f..391f558 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -834,14 +834,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
 	set_extent_mask_and_shift(crypt_stat);
 	crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-		crypt_stat->num_header_bytes_at_front =
-					ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
+		crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 	else {
 		if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
-			crypt_stat->num_header_bytes_at_front =
+			crypt_stat->metadata_size =
 				ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 		else
-			crypt_stat->num_header_bytes_at_front =	PAGE_CACHE_SIZE;
+			crypt_stat->metadata_size = PAGE_CACHE_SIZE;
 	}
 }
 
@@ -1238,8 +1237,7 @@ ecryptfs_write_header_metadata(char *virt,
 
 	header_extent_size = (u32)crypt_stat->extent_size;
 	num_header_extents_at_front =
-		(u16)(crypt_stat->num_header_bytes_at_front
-		      / crypt_stat->extent_size);
+		(u16)(crypt_stat->metadata_size / crypt_stat->extent_size);
 	put_unaligned_be32(header_extent_size, virt);
 	virt += 4;
 	put_unaligned_be16(num_header_extents_at_front, virt);
@@ -1382,7 +1380,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
 		rc = -EINVAL;
 		goto out;
 	}
-	virt_len = crypt_stat->num_header_bytes_at_front;
+	virt_len = crypt_stat->metadata_size;
 	order = get_order(virt_len);
 	/* Released in this function */
 	virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
@@ -1428,16 +1426,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
 	header_extent_size = get_unaligned_be32(virt);
 	virt += sizeof(__be32);
 	num_header_extents_at_front = get_unaligned_be16(virt);
-	crypt_stat->num_header_bytes_at_front =
-		(((size_t)num_header_extents_at_front
-		  * (size_t)header_extent_size));
+	crypt_stat->metadata_size = (((size_t)num_header_extents_at_front
+				     * (size_t)header_extent_size));
 	(*bytes_read) = (sizeof(__be32) + sizeof(__be16));
 	if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
-	    && (crypt_stat->num_header_bytes_at_front
+	    && (crypt_stat->metadata_size
 		< ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
 		rc = -EINVAL;
 		printk(KERN_WARNING "Invalid header size: [%zd]\n",
-		       crypt_stat->num_header_bytes_at_front);
+		       crypt_stat->metadata_size);
 	}
 	return rc;
 }
@@ -1452,8 +1449,7 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
  */
 static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
 {
-	crypt_stat->num_header_bytes_at_front =
-		ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
+	crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
 }
 
 /**
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 8456f70..d031efd 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -273,7 +273,7 @@ struct ecryptfs_crypt_stat {
 	u32 flags;
 	unsigned int file_version;
 	size_t iv_bytes;
-	size_t num_header_bytes_at_front;
+	size_t metadata_size;
 	size_t extent_size; /* Data extent size; default is 4096 */
 	size_t key_size;
 	size_t extent_shift;
@@ -469,7 +469,7 @@ ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat)
 {
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
 		return 0;
-	return crypt_stat->num_header_bytes_at_front;
+	return crypt_stat->metadata_size;
 }
 
 static inline struct ecryptfs_file_info *
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 1a73953..a50efb1 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -335,7 +335,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 		ecryptfs_dentry->d_sb)->mount_crypt_stat;
 	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
 		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-			file_size = (crypt_stat->num_header_bytes_at_front
+			file_size = (crypt_stat->metadata_size
 				     + i_size_read(lower_dentry->d_inode));
 		else
 			file_size = i_size_read(lower_dentry->d_inode);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 5a30e01..270f42a 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -122,8 +122,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 					   * num_extents_per_page)
 					  + extent_num_in_page);
 		size_t num_header_extents_at_front =
-			(crypt_stat->num_header_bytes_at_front
-			 / crypt_stat->extent_size);
+			(crypt_stat->metadata_size / crypt_stat->extent_size);
 
 		if (view_extent_num < num_header_extents_at_front) {
 			/* This is a header extent */
@@ -152,7 +151,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 			/* This is an encrypted data extent */
 			loff_t lower_offset =
 				((view_extent_num * crypt_stat->extent_size)
-				 - crypt_stat->num_header_bytes_at_front);
+				 - crypt_stat->metadata_size);
 
 			rc = ecryptfs_read_lower_page_segment(
 				page, (lower_offset >> PAGE_CACHE_SHIFT),
-- 
cgit v1.1


From 1984c23f9e0cdb432d90a85ecf88b424d36878fc Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Wed, 10 Feb 2010 23:17:44 -0600
Subject: eCryptfs: Clear buffer before reading in metadata xattr

We initially read in the first PAGE_CACHE_SIZE of a file to if the
eCryptfs header marker can be found.  If it isn't found and
ecryptfs_xattr_metadata was given as a mount option, then the
user.ecryptfs xattr is read into the same buffer.  Since the data from
the first page of the file wasn't cleared, it is possible that we think
we've found a second tag 3 or tag 1 packet and then error out after the
packet contents aren't as expected.  This patch clears the buffer before
filling it with metadata from the user.ecryptfs xattr.

Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/crypto.c | 1 +
 fs/ecryptfs/inode.c  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 391f558..4d9db0e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1603,6 +1603,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 						ecryptfs_dentry,
 						ECRYPTFS_VALIDATE_HEADER_SIZE);
 	if (rc) {
+		memset(page_virt, 0, PAGE_CACHE_SIZE);
 		rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
 		if (rc) {
 			printk(KERN_DEBUG "Valid eCryptfs headers not found in "
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a50efb1..ddbd096 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -323,6 +323,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 	rc = ecryptfs_read_and_validate_header_region(page_virt,
 						      ecryptfs_dentry->d_inode);
 	if (rc) {
+		memset(page_virt, 0, PAGE_CACHE_SIZE);
 		rc = ecryptfs_read_and_validate_xattr_region(page_virt,
 							     ecryptfs_dentry);
 		if (rc) {
-- 
cgit v1.1


From f4e60e6b303bc46cdc477d3174dbf9cb5dd013aa Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Thu, 11 Feb 2010 00:02:32 -0600
Subject: eCryptfs: Strip metadata in xattr flag in encrypted view

The ecryptfs_encrypted_view mount option provides a unified way of
viewing encrypted eCryptfs files.  If the metadata is stored in a xattr,
the metadata is moved to the file header when the file is read inside
the eCryptfs mount.  Because of this, we should strip the
ECRYPTFS_METADATA_IN_XATTR flag from the header's flag section.  This
allows eCryptfs to treat the file as an eCryptfs file with a header
at the front.

Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/crypto.c          |  9 +++++----
 fs/ecryptfs/ecryptfs_kernel.h |  3 +++
 fs/ecryptfs/mmap.c            | 14 ++++++++++++++
 3 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 4d9db0e..fad5bf6 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1107,9 +1107,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
 	(*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
 }
 
-static void
-write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
-		     size_t *written)
+void ecryptfs_write_crypt_stat_flags(char *page_virt,
+				     struct ecryptfs_crypt_stat *crypt_stat,
+				     size_t *written)
 {
 	u32 flags = 0;
 	int i;
@@ -1290,7 +1290,8 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
 	offset = ECRYPTFS_FILE_SIZE_BYTES;
 	write_ecryptfs_marker((page_virt + offset), &written);
 	offset += written;
-	write_ecryptfs_flags((page_virt + offset), crypt_stat, &written);
+	ecryptfs_write_crypt_stat_flags((page_virt + offset), crypt_stat,
+					&written);
 	offset += written;
 	ecryptfs_write_header_metadata((page_virt + offset), crypt_stat,
 				       &written);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index d031efd..bc71154 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -659,6 +659,9 @@ int ecryptfs_decrypt_page(struct page *page);
 int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
 int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
 int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
+void ecryptfs_write_crypt_stat_flags(char *page_virt,
+				     struct ecryptfs_crypt_stat *crypt_stat,
+				     size_t *written);
 int ecryptfs_read_and_validate_header_region(char *data,
 					     struct inode *ecryptfs_inode);
 int ecryptfs_read_and_validate_xattr_region(char *page_virt,
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 270f42a..bea998a 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -82,6 +82,19 @@ out:
 	return rc;
 }
 
+static void strip_xattr_flag(char *page_virt,
+			     struct ecryptfs_crypt_stat *crypt_stat)
+{
+	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+		size_t written;
+
+		crypt_stat->flags &= ~ECRYPTFS_METADATA_IN_XATTR;
+		ecryptfs_write_crypt_stat_flags(page_virt, crypt_stat,
+						&written);
+		crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
+	}
+}
+
 /**
  *   Header Extent:
  *     Octets 0-7:        Unencrypted file size (big-endian)
@@ -136,6 +149,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
 
 				rc = ecryptfs_read_xattr_region(
 					page_virt, page->mapping->host);
+				strip_xattr_flag(page_virt + 16, crypt_stat);
 				ecryptfs_write_header_metadata(page_virt + 20,
 							       crypt_stat,
 							       &written);
-- 
cgit v1.1


From b4414eea0e7b9c134262c801a87e338bf675962c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Thu, 11 Mar 2010 18:31:09 -0800
Subject: ocfs2: Clear undo bits when local alloc is freed

When the local alloc file changes windows, unused bits are freed back to the
global bitmap. By defnition, those bits can not be in use by any file. Also,
the local alloc will never have been able to allocate those bits if they
were part of a previous truncate. Therefore it makes sense that we should
clear unused local alloc bits in the undo buffer so that they can be used
immediatly.

[ Modified to call it ocfs2_release_clusters() -- Joel ]

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/localalloc.c |   6 ++-
 fs/ocfs2/ocfs2.h      |  14 +++++-
 fs/ocfs2/suballoc.c   | 116 ++++++++++++++++++++++++++++++++------------------
 fs/ocfs2/suballoc.h   |   5 +++
 4 files changed, 95 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 171c691..c983715 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -872,8 +872,10 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
 			     (unsigned long long)la_start_blk,
 			     (unsigned long long)blkno);
 
-			status = ocfs2_free_clusters(handle, main_bm_inode,
-						     main_bm_bh, blkno, count);
+			status = ocfs2_release_clusters(handle,
+							main_bm_inode,
+							main_bm_bh, blkno,
+							count);
 			if (status < 0) {
 				mlog_errno(status);
 				goto bail;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1238b49..adf5e2e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -763,8 +763,18 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
 	return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
 }
 
-#define ocfs2_set_bit ext2_set_bit
-#define ocfs2_clear_bit ext2_clear_bit
+static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
+{
+	ext2_set_bit(bit, bitmap);
+}
+#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr))
+
+static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap)
+{
+	ext2_clear_bit(bit, bitmap);
+}
+#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr))
+
 #define ocfs2_test_bit ext2_test_bit
 #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
 #define ocfs2_find_next_bit ext2_find_next_bit
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0016503..19ba00f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -95,13 +95,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
 					     struct buffer_head *group_bh,
 					     unsigned int bit_off,
 					     unsigned int num_bits);
-static inline int ocfs2_block_group_clear_bits(handle_t *handle,
-					       struct inode *alloc_inode,
-					       struct ocfs2_group_desc *bg,
-					       struct buffer_head *group_bh,
-					       unsigned int bit_off,
-					       unsigned int num_bits);
-
 static int ocfs2_relink_block_group(handle_t *handle,
 				    struct inode *alloc_inode,
 				    struct buffer_head *fe_bh,
@@ -1978,18 +1971,18 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 				      bits_wanted, cluster_start, num_clusters);
 }
 
-static inline int ocfs2_block_group_clear_bits(handle_t *handle,
-					       struct inode *alloc_inode,
-					       struct ocfs2_group_desc *bg,
-					       struct buffer_head *group_bh,
-					       unsigned int bit_off,
-					       unsigned int num_bits)
+static int ocfs2_block_group_clear_bits(handle_t *handle,
+					struct inode *alloc_inode,
+					struct ocfs2_group_desc *bg,
+					struct buffer_head *group_bh,
+					unsigned int bit_off,
+					unsigned int num_bits,
+					void (*undo_fn)(unsigned int bit,
+							unsigned long *bmap))
 {
 	int status;
 	unsigned int tmp;
-	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
 	struct ocfs2_group_desc *undo_bg = NULL;
-	int cluster_bitmap = 0;
 
 	mlog_entry_void();
 
@@ -1999,20 +1992,18 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 
 	mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
 
-	if (ocfs2_is_cluster_bitmap(alloc_inode))
-		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
-
+	BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
 	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
-					 group_bh, journal_type);
+					 group_bh,
+					 undo_fn ?
+					 OCFS2_JOURNAL_ACCESS_UNDO :
+					 OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
-	if (ocfs2_is_cluster_bitmap(alloc_inode))
-		cluster_bitmap = 1;
-
-	if (cluster_bitmap) {
+	if (undo_fn) {
 		jbd_lock_bh_state(group_bh);
 		undo_bg = (struct ocfs2_group_desc *)
 					bh2jh(group_bh)->b_committed_data;
@@ -2023,13 +2014,13 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
 	while(tmp--) {
 		ocfs2_clear_bit((bit_off + tmp),
 				(unsigned long *) bg->bg_bitmap);
-		if (cluster_bitmap)
-			ocfs2_set_bit(bit_off + tmp,
-				      (unsigned long *) undo_bg->bg_bitmap);
+		if (undo_fn)
+			undo_fn(bit_off + tmp,
+				(unsigned long *) undo_bg->bg_bitmap);
 	}
 	le16_add_cpu(&bg->bg_free_bits_count, num_bits);
 
-	if (cluster_bitmap)
+	if (undo_fn)
 		jbd_unlock_bh_state(group_bh);
 
 	status = ocfs2_journal_dirty(handle, group_bh);
@@ -2042,12 +2033,14 @@ bail:
 /*
  * expects the suballoc inode to already be locked.
  */
-int ocfs2_free_suballoc_bits(handle_t *handle,
-			     struct inode *alloc_inode,
-			     struct buffer_head *alloc_bh,
-			     unsigned int start_bit,
-			     u64 bg_blkno,
-			     unsigned int count)
+static int _ocfs2_free_suballoc_bits(handle_t *handle,
+				     struct inode *alloc_inode,
+				     struct buffer_head *alloc_bh,
+				     unsigned int start_bit,
+				     u64 bg_blkno,
+				     unsigned int count,
+				     void (*undo_fn)(unsigned int bit,
+						     unsigned long *bitmap))
 {
 	int status = 0;
 	u32 tmp_used;
@@ -2082,7 +2075,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
 
 	status = ocfs2_block_group_clear_bits(handle, alloc_inode,
 					      group, group_bh,
-					      start_bit, count);
+					      start_bit, count, undo_fn);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -2113,6 +2106,17 @@ bail:
 	return status;
 }
 
+int ocfs2_free_suballoc_bits(handle_t *handle,
+			     struct inode *alloc_inode,
+			     struct buffer_head *alloc_bh,
+			     unsigned int start_bit,
+			     u64 bg_blkno,
+			     unsigned int count)
+{
+	return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh,
+					 start_bit, bg_blkno, count, NULL);
+}
+
 int ocfs2_free_dinode(handle_t *handle,
 		      struct inode *inode_alloc_inode,
 		      struct buffer_head *inode_alloc_bh,
@@ -2126,11 +2130,13 @@ int ocfs2_free_dinode(handle_t *handle,
 					inode_alloc_bh, bit, bg_blkno, 1);
 }
 
-int ocfs2_free_clusters(handle_t *handle,
-		       struct inode *bitmap_inode,
-		       struct buffer_head *bitmap_bh,
-		       u64 start_blk,
-		       unsigned int num_clusters)
+static int _ocfs2_free_clusters(handle_t *handle,
+				struct inode *bitmap_inode,
+				struct buffer_head *bitmap_bh,
+				u64 start_blk,
+				unsigned int num_clusters,
+				void (*undo_fn)(unsigned int bit,
+						unsigned long *bitmap))
 {
 	int status;
 	u16 bg_start_bit;
@@ -2157,9 +2163,9 @@ int ocfs2_free_clusters(handle_t *handle,
 	mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
 	     (unsigned long long)bg_blkno, bg_start_bit);
 
-	status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
-					  bg_start_bit, bg_blkno,
-					  num_clusters);
+	status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
+					   bg_start_bit, bg_blkno,
+					   num_clusters, undo_fn);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -2173,6 +2179,32 @@ out:
 	return status;
 }
 
+int ocfs2_free_clusters(handle_t *handle,
+			struct inode *bitmap_inode,
+			struct buffer_head *bitmap_bh,
+			u64 start_blk,
+			unsigned int num_clusters)
+{
+	return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
+				    start_blk, num_clusters,
+				    _ocfs2_set_bit);
+}
+
+/*
+ * Give never-used clusters back to the global bitmap.  We don't need
+ * to protect these bits in the undo buffer.
+ */
+int ocfs2_release_clusters(handle_t *handle,
+			   struct inode *bitmap_inode,
+			   struct buffer_head *bitmap_bh,
+			   u64 start_blk,
+			   unsigned int num_clusters)
+{
+	return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
+				    start_blk, num_clusters,
+				    _ocfs2_clear_bit);
+}
+
 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
 {
 	printk("Block Group:\n");
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index fa60723..e0f46df 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -127,6 +127,11 @@ int ocfs2_free_clusters(handle_t *handle,
 			struct buffer_head *bitmap_bh,
 			u64 start_blk,
 			unsigned int num_clusters);
+int ocfs2_release_clusters(handle_t *handle,
+			   struct inode *bitmap_inode,
+			   struct buffer_head *bitmap_bh,
+			   u64 start_blk,
+			   unsigned int num_clusters);
 
 static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
 {
-- 
cgit v1.1


From 3939fda4b389993caf8741df5739b3e49f33a263 Mon Sep 17 00:00:00 2001
From: Tristan Ye <tristan.ye@oracle.com>
Date: Fri, 19 Mar 2010 09:21:09 +0800
Subject: Ocfs2: Journaling i_flags and i_orphaned_slot when adding inode to
 orphan dir.

Currently, some callers were missing to journal the dirty inode after
adding it to orphan dir.

Now we're going to journal such modifications within the ocfs2_orphan_add()
itself, It's safe to do so, though some existing caller may duplicate this,
and it makes the logic look more straightforward anyway.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/namei.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d9cd4e3..b1eb50a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 static int ocfs2_orphan_add(struct ocfs2_super *osb,
 			    handle_t *handle,
 			    struct inode *inode,
-			    struct ocfs2_dinode *fe,
+			    struct buffer_head *fe_bh,
 			    char *name,
 			    struct ocfs2_dir_lookup_result *lookup,
 			    struct inode *orphan_dir_inode);
@@ -879,7 +879,7 @@ static int ocfs2_unlink(struct inode *dir,
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
 	if (inode_is_unlinkable(inode)) {
-		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
+		status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name,
 					  &orphan_insert, orphan_dir);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1300,7 +1300,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (S_ISDIR(new_inode->i_mode) ||
 		    (ocfs2_read_links_count(newfe) == 1)) {
 			status = ocfs2_orphan_add(osb, handle, new_inode,
-						  newfe, orphan_name,
+						  newfe_bh, orphan_name,
 						  &orphan_insert, orphan_dir);
 			if (status < 0) {
 				mlog_errno(status);
@@ -1911,7 +1911,7 @@ leave:
 static int ocfs2_orphan_add(struct ocfs2_super *osb,
 			    handle_t *handle,
 			    struct inode *inode,
-			    struct ocfs2_dinode *fe,
+			    struct buffer_head *fe_bh,
 			    char *name,
 			    struct ocfs2_dir_lookup_result *lookup,
 			    struct inode *orphan_dir_inode)
@@ -1919,6 +1919,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	struct buffer_head *orphan_dir_bh = NULL;
 	int status = 0;
 	struct ocfs2_dinode *orphan_fe;
+	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
 
@@ -1959,6 +1960,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 		goto leave;
 	}
 
+	/*
+	 * We're going to journal the change of i_flags and i_orphaned_slot.
+	 * It's safe anyway, though some callers may duplicate the journaling.
+	 * Journaling within the func just make the logic look more
+	 * straightforward.
+	 */
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(inode),
+					 fe_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
 
 	/* Record which orphan dir our inode now resides
@@ -1966,6 +1982,8 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	 * dir to lock. */
 	fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
 
+	ocfs2_journal_dirty(handle, fe_bh);
+
 	mlog(0, "Inode %llu orphaned in slot %d\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
 
@@ -2123,7 +2141,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 	}
 
 	di = (struct ocfs2_dinode *)new_di_bh->b_data;
-	status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name,
+	status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
 				  &orphan_insert, orphan_dir);
 	if (status < 0) {
 		mlog_errno(status);
-- 
cgit v1.1


From b54c2ca475fa7d7450a45b6d778dae9dbe0bcbfe Mon Sep 17 00:00:00 2001
From: Tristan Ye <tristan.ye@oracle.com>
Date: Fri, 19 Mar 2010 09:21:10 +0800
Subject: Ocfs2: Handle deletion of reflinked oprhan inodes correctly.

The rule is that all inodes in the orphan dir have ORPHANED_FL,
otherwise we treated it as an ERROR.  This rule works well except
for some rare cases of reflink operation:

http://oss.oracle.com/bugzilla/show_bug.cgi?id=1215

The problem is caused by how reflink and our orphan_scan thread
interact.

 * The orphan scan pulls the orphans into a queue first, then runs the
   queue at a later time.  We only hold the orphan_dir's lock
   during scanning.

 * Reflink create a oprhaned target in orphan_dir as its first step.
   It removes the target and clears the flag as the final step.
   These two steps take the orphan_dir's lock, but it is not held for
   the duration.

Based on the above semantics, a reflink inode can be moved out of the
orphan dir and have its ORPHANED_FL cleared before the queue of orphans
is run.  This leads to a ERROR in ocfs2_query_wipde_inode().

This patch teaches ocfs2_query_wipe_inode() to detect previously
orphaned reflink targets.  If a reflink fails or a crash occurs during
the relfink operation, the inode will retain ORPHANED_FL and will be
properly wiped.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/inode.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 278a223..ab20790 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -891,6 +891,21 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	/* Do some basic inode verification... */
 	di = (struct ocfs2_dinode *) di_bh->b_data;
 	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+		/*
+		 * Inodes in the orphan dir must have ORPHANED_FL.  The only
+		 * inodes that come back out of the orphan dir are reflink
+		 * targets. A reflink target may be moved out of the orphan
+		 * dir between the time we scan the directory and the time we
+		 * process it. This would lead to HAS_REFCOUNT_FL being set but
+		 * ORPHANED_FL not.
+		 */
+		if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
+			mlog(0, "Reflinked inode %llu is no longer orphaned.  "
+			     "it shouldn't be deleted\n",
+			     (unsigned long long)oi->ip_blkno);
+			goto bail;
+		}
+
 		/* for lack of a better error? */
 		status = -EEXIST;
 		mlog(ML_ERROR,
-- 
cgit v1.1


From 14741472a05245ed5778aa0aec055e1f920b6ef8 Mon Sep 17 00:00:00 2001
From: Srinivas Eeda <srinivas.eeda@oracle.com>
Date: Mon, 22 Mar 2010 16:50:47 -0700
Subject: ocfs2: Fix a race in o2dlm lockres mastery

In o2dlm, the master of a lock resource keeps a map of all interested
nodes.  This prevents the master from purging the resource before an
interested node can create a lock.

A race between the mastery thread and the mastery handler allowed an
interested node to discover who the master is without informing the
master directly.  This is easily fixed by holding the dlm spinlock a
little longer in the mastery handler.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmmaster.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a659606..9289b43 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1875,7 +1875,6 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
 ok:
 		spin_unlock(&res->spinlock);
 	}
-	spin_unlock(&dlm->spinlock);
 
 	// mlog(0, "woo!  got an assert_master from node %u!\n",
 	// 	     assert->node_idx);
@@ -1926,7 +1925,6 @@ ok:
 		/* master is known, detach if not already detached.
 		 * ensures that only one assert_master call will happen
 		 * on this mle. */
-		spin_lock(&dlm->spinlock);
 		spin_lock(&dlm->master_lock);
 
 		rr = atomic_read(&mle->mle_refs.refcount);
@@ -1959,7 +1957,6 @@ ok:
 			__dlm_put_mle(mle);
 		}
 		spin_unlock(&dlm->master_lock);
-		spin_unlock(&dlm->spinlock);
 	} else if (res) {
 		if (res->owner != assert->node_idx) {
 			mlog(0, "assert_master from %u, but current "
@@ -1967,6 +1964,7 @@ ok:
 			     res->owner, namelen, name);
 		}
 	}
+	spin_unlock(&dlm->spinlock);
 
 done:
 	ret = 0;
-- 
cgit v1.1


From 37f328eb60a94779dd020084209fc4db2d6444a0 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 24 Mar 2010 20:06:41 -0400
Subject: ext4: Fix spelling of CONTIG_FS_EXT3 to CONFIG_FS_EXT3

Oops.  (Blush.)

Thanks to Sedat Dilek for pointing this out.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ce84a6e..f4b038f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4068,7 +4068,7 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
 	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
 }
 
-#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext2_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext2",
@@ -4095,7 +4095,7 @@ static inline void register_as_ext2(void) { }
 static inline void unregister_as_ext2(void) { }
 #endif
 
-#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext3_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext3",
-- 
cgit v1.1


From ba69f9ab7df844125898104e854e063b47c26637 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 24 Mar 2010 20:18:37 -0400
Subject: ext4: Don't use delayed allocation by default when used instead of
 ext3

When ext4 driver is used to mount a filesystem instead of the ext3 file
system driver (through CONFIG_EXT4_USE_FOR_EXT23), do not enable delayed
allocation by default since some ext3 users and application writers have
developed unfortunate expectations about the safety of writing files on
systems subject to sudden and violent death without using fsync().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f4b038f..29c6875 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -68,7 +68,21 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
 static void ext4_write_super(struct super_block *sb);
 static int ext4_freeze(struct super_block *sb);
+static int ext4_get_sb(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data, struct vfsmount *mnt);
 
+#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+static struct file_system_type ext3_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "ext3",
+	.get_sb		= ext4_get_sb,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+#else
+#define IS_EXT3_SB(sb) (0)
+#endif
 
 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 			       struct ext4_group_desc *bg)
@@ -2539,7 +2553,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	 * enable delayed allocation by default
 	 * Use -o nodelalloc to turn it off
 	 */
-	set_opt(sbi->s_mount_opt, DELALLOC);
+	if (!IS_EXT3_SB(sb))
+		set_opt(sbi->s_mount_opt, DELALLOC);
 
 	if (!parse_options((char *) data, sb, &journal_devnum,
 			   &journal_ioprio, NULL, 0))
@@ -4096,14 +4111,6 @@ static inline void unregister_as_ext2(void) { }
 #endif
 
 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
-static struct file_system_type ext3_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "ext3",
-	.get_sb		= ext4_get_sb,
-	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,
-};
-
 static inline void register_as_ext3(void)
 {
 	int err = register_filesystem(&ext3_fs_type);
-- 
cgit v1.1


From c4caae25187ff3f5e837c6f04eb1acc2723c72d3 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 23 Mar 2010 21:32:00 -0400
Subject: ext4: Fixed inode allocator to correctly track a flex_bg's used_dirs

When used_dirs was introduced for the flex_groups struct, it looks
like the accounting was not put into place properly, in some places
manipulating free_inodes rather than used_dirs.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ialloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 361c0b9..57f6eef 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -263,7 +263,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 					ext4_group_t f;
 
 					f = ext4_flex_group(sbi, block_group);
-					atomic_dec(&sbi->s_flex_groups[f].free_inodes);
+					atomic_dec(&sbi->s_flex_groups[f].used_dirs);
 				}
 
 			}
@@ -773,7 +773,7 @@ static int ext4_claim_inode(struct super_block *sb,
 		if (sbi->s_log_groups_per_flex) {
 			ext4_group_t f = ext4_flex_group(sbi, group);
 
-			atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
 		}
 	}
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-- 
cgit v1.1


From 7731d9a5d415414aa6903709453786d4a5ff57e4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Tue, 23 Mar 2010 13:35:15 -0700
Subject: fs/binfmt_aout.c: fix pointer warnings

fs/binfmt_aout.c: In function `aout_core_dump':
fs/binfmt_aout.c:125: warning: passing argument 2 of `dump_write' makes pointer from integer without a cast
include/linux/coredump.h:12: note: expected `const void *' but argument is of type `long unsigned int'
fs/binfmt_aout.c:132: warning: passing argument 2 of `dump_write' makes pointer from integer without a cast
include/linux/coredump.h:12: note: expected `const void *' but argument is of type `long unsigned int'

due to dump_write() expecting a user void *.  Fold casts into the
START_DATA/START_STACK macros and shut up the warnings.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Cc: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 15d80bb..9b6aef0f 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -75,14 +75,16 @@ static int aout_core_dump(struct coredump_params *cprm)
 	struct file *file = cprm->file;
 	mm_segment_t fs;
 	int has_dumped = 0;
-	unsigned long dump_start, dump_size;
+	void __user *dump_start;
+	int dump_size;
 	struct user dump;
 #ifdef __alpha__
-#       define START_DATA(u)	(u.start_data)
+#       define START_DATA(u)	((void __user *)u.start_data)
 #else
-#	define START_DATA(u)	((u.u_tsize << PAGE_SHIFT) + u.start_code)
+#	define START_DATA(u)	((void __user *)((u.u_tsize << PAGE_SHIFT) + \
+				 u.start_code))
 #endif
-#       define START_STACK(u)   (u.start_stack)
+#       define START_STACK(u)   ((void __user *)u.start_stack)
 
 	fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -104,9 +106,9 @@ static int aout_core_dump(struct coredump_params *cprm)
 
 /* make sure we actually have a data and stack area to dump */
 	set_fs(USER_DS);
-	if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
+	if (!access_ok(VERIFY_READ, START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
 		dump.u_dsize = 0;
-	if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
+	if (!access_ok(VERIFY_READ, START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
 		dump.u_ssize = 0;
 
 	set_fs(KERNEL_DS);
-- 
cgit v1.1


From 6cb4aff0a77cc0e6bae9475d62205319e3ebbf3f Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 23 Mar 2010 13:35:38 -0700
Subject: reiserfs: fix oops while creating privroot with selinux enabled

Commit 57fe60df ("reiserfs: add atomic addition of selinux attributes
during inode creation") contains a bug that will cause it to oops when
mounting a file system that didn't previously contain extended attributes
on a system using security.* xattrs.

The issue is that while creating the privroot during mount
reiserfs_security_init calls reiserfs_xattr_jcreate_nblocks which
dereferences the xattr root.  The xattr root doesn't exist, so we get an
oops.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=15309

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/xattr_security.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index d8b5bfc..de1fcff 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -76,7 +76,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
 		return error;
 	}
 
-	if (sec->length) {
+	if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) {
 		blocks = reiserfs_xattr_jcreate_nblocks(inode) +
 			 reiserfs_xattr_nblocks(inode, sec->length);
 		/* We don't want to count the directories twice if we have
-- 
cgit v1.1


From 3f8b5ee33293d43ca360771b535dfae8c57259dc Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 23 Mar 2010 13:35:39 -0700
Subject: reiserfs: properly honor read-only devices

The reiserfs journal behaves inconsistently when determining whether to
allow a mount of a read-only device.

This is due to the use of the continue_replay variable to short circuit
the journal scanning.  If it's set, it's assumed that there are
transactions to replay, but there may not be.  If it's unset, it's assumed
that there aren't any, and that may not be the case either.

I've observed two failure cases:
1) Where a clean file system on a read-only device refuses to mount
2) Where a clean file system on a read-only device passes the
   optimization and then tries writing the journal header to update
   the latest mount id.

The former is easily observable by using a freshly created file system on
a read-only loopback device.

This patch moves the check into journal_read_transaction, where it can
bail out before it's about to replay a transaction.  That way it can go
through and skip transactions where appropriate, yet still refuse to mount
a file system with outstanding transactions.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/journal.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ba98546..f3de5e8 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2217,6 +2217,15 @@ static int journal_read_transaction(struct super_block *sb,
 		brelse(d_bh);
 		return 1;
 	}
+
+	if (bdev_read_only(sb->s_bdev)) {
+		reiserfs_warning(sb, "clm-2076",
+				 "device is readonly, unable to replay log");
+		brelse(c_bh);
+		brelse(d_bh);
+		return -EROFS;
+	}
+
 	trans_id = get_desc_trans_id(desc);
 	/* now we know we've got a good transaction, and it was inside the valid time ranges */
 	log_blocks = kmalloc(get_desc_trans_len(desc) *
@@ -2459,12 +2468,6 @@ static int journal_read(struct super_block *sb)
 		goto start_log_replay;
 	}
 
-	if (continue_replay && bdev_read_only(sb->s_bdev)) {
-		reiserfs_warning(sb, "clm-2076",
-				 "device is readonly, unable to replay log");
-		return -1;
-	}
-
 	/* ok, there are transactions that need to be replayed.  start with the first log block, find
 	 ** all the valid transactions, and pick out the oldest.
 	 */
-- 
cgit v1.1


From 4fd2c20d964a8fb9861045f1022475c9d200d684 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 23 Mar 2010 13:35:42 -0700
Subject: kcore: fix test for end of list

"m" is never NULL here.  We need a different test for the end of list
condition.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index a44a789..b442dac 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -490,7 +490,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		}
 		read_unlock(&kclist_lock);
 
-		if (m == NULL) {
+		if (&m->list == &kclist_head) {
 			if (clear_user(buffer, tsz))
 				return -EFAULT;
 		} else if (is_vmalloc_or_module_addr((void *)start)) {
-- 
cgit v1.1


From 3fbf586cf7f245392142e5407c2a56f1cff979b6 Mon Sep 17 00:00:00 2001
From: Daniel Taylor <Daniel.Taylor@wdc.com>
Date: Tue, 23 Mar 2010 13:35:50 -0700
Subject: fs/partitions/msdos: add support for large disks

In order to use disks larger than 2TiB on Windows XP, it is necessary to
use 4096-byte logical sectors in an MBR.

Although the kernel storage and functions called from msdos.c used
"sector_t" internally, msdos.c still used u32 variables, which results in
the ability to handle XP-compatible large disks.

This patch changes the internal variables to "sector_t".

Daniel said: "In the near future, WD will be releasing products that need
this patch".

[hirofumi@mail.parknet.co.jp: tweaks and fix]
Signed-off-by: Daniel Taylor <daniel.taylor@wdc.com>
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/msdos.c | 72 +++++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 0028d2e..b783858 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -31,14 +31,17 @@
  */
 #include <asm/unaligned.h>
 
-#define SYS_IND(p)	(get_unaligned(&p->sys_ind))
-#define NR_SECTS(p)	({ __le32 __a =	get_unaligned(&p->nr_sects);	\
-				le32_to_cpu(__a); \
-			})
+#define SYS_IND(p)	get_unaligned(&p->sys_ind)
 
-#define START_SECT(p)	({ __le32 __a =	get_unaligned(&p->start_sect);	\
-				le32_to_cpu(__a); \
-			})
+static inline sector_t nr_sects(struct partition *p)
+{
+	return (sector_t)get_unaligned_le32(&p->nr_sects);
+}
+
+static inline sector_t start_sect(struct partition *p)
+{
+	return (sector_t)get_unaligned_le32(&p->start_sect);
+}
 
 static inline int is_extended_partition(struct partition *p)
 {
@@ -104,13 +107,13 @@ static int aix_magic_present(unsigned char *p, struct block_device *bdev)
 
 static void
 parse_extended(struct parsed_partitions *state, struct block_device *bdev,
-			u32 first_sector, u32 first_size)
+			sector_t first_sector, sector_t first_size)
 {
 	struct partition *p;
 	Sector sect;
 	unsigned char *data;
-	u32 this_sector, this_size;
-	int sector_size = bdev_logical_block_size(bdev) / 512;
+	sector_t this_sector, this_size;
+	sector_t sector_size = bdev_logical_block_size(bdev) / 512;
 	int loopct = 0;		/* number of links followed
 				   without finding a data partition */
 	int i;
@@ -145,14 +148,14 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
 		 * First process the data partition(s)
 		 */
 		for (i=0; i<4; i++, p++) {
-			u32 offs, size, next;
-			if (!NR_SECTS(p) || is_extended_partition(p))
+			sector_t offs, size, next;
+			if (!nr_sects(p) || is_extended_partition(p))
 				continue;
 
 			/* Check the 3rd and 4th entries -
 			   these sometimes contain random garbage */
-			offs = START_SECT(p)*sector_size;
-			size = NR_SECTS(p)*sector_size;
+			offs = start_sect(p)*sector_size;
+			size = nr_sects(p)*sector_size;
 			next = this_sector + offs;
 			if (i >= 2) {
 				if (offs + size > this_size)
@@ -179,13 +182,13 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
 		 */
 		p -= 4;
 		for (i=0; i<4; i++, p++)
-			if (NR_SECTS(p) && is_extended_partition(p))
+			if (nr_sects(p) && is_extended_partition(p))
 				break;
 		if (i == 4)
 			goto done;	 /* nothing left to do */
 
-		this_sector = first_sector + START_SECT(p) * sector_size;
-		this_size = NR_SECTS(p) * sector_size;
+		this_sector = first_sector + start_sect(p) * sector_size;
+		this_size = nr_sects(p) * sector_size;
 		put_dev_sector(sect);
 	}
 done:
@@ -197,7 +200,7 @@ done:
 
 static void
 parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
-			u32 offset, u32 size, int origin)
+			sector_t offset, sector_t size, int origin)
 {
 #ifdef CONFIG_SOLARIS_X86_PARTITION
 	Sector sect;
@@ -244,7 +247,7 @@ parse_solaris_x86(struct parsed_partitions *state, struct block_device *bdev,
  */
 static void
 parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
-		u32 offset, u32 size, int origin, char *flavour,
+		sector_t offset, sector_t size, int origin, char *flavour,
 		int max_partitions)
 {
 	Sector sect;
@@ -263,7 +266,7 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
 	if (le16_to_cpu(l->d_npartitions) < max_partitions)
 		max_partitions = le16_to_cpu(l->d_npartitions);
 	for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) {
-		u32 bsd_start, bsd_size;
+		sector_t bsd_start, bsd_size;
 
 		if (state->next == state->limit)
 			break;
@@ -290,7 +293,7 @@ parse_bsd(struct parsed_partitions *state, struct block_device *bdev,
 
 static void
 parse_freebsd(struct parsed_partitions *state, struct block_device *bdev,
-		u32 offset, u32 size, int origin)
+		sector_t offset, sector_t size, int origin)
 {
 #ifdef CONFIG_BSD_DISKLABEL
 	parse_bsd(state, bdev, offset, size, origin,
@@ -300,7 +303,7 @@ parse_freebsd(struct parsed_partitions *state, struct block_device *bdev,
 
 static void
 parse_netbsd(struct parsed_partitions *state, struct block_device *bdev,
-		u32 offset, u32 size, int origin)
+		sector_t offset, sector_t size, int origin)
 {
 #ifdef CONFIG_BSD_DISKLABEL
 	parse_bsd(state, bdev, offset, size, origin,
@@ -310,7 +313,7 @@ parse_netbsd(struct parsed_partitions *state, struct block_device *bdev,
 
 static void
 parse_openbsd(struct parsed_partitions *state, struct block_device *bdev,
-		u32 offset, u32 size, int origin)
+		sector_t offset, sector_t size, int origin)
 {
 #ifdef CONFIG_BSD_DISKLABEL
 	parse_bsd(state, bdev, offset, size, origin,
@@ -324,7 +327,7 @@ parse_openbsd(struct parsed_partitions *state, struct block_device *bdev,
  */
 static void
 parse_unixware(struct parsed_partitions *state, struct block_device *bdev,
-		u32 offset, u32 size, int origin)
+		sector_t offset, sector_t size, int origin)
 {
 #ifdef CONFIG_UNIXWARE_DISKLABEL
 	Sector sect;
@@ -348,7 +351,8 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev,
 
 		if (p->s_label != UNIXWARE_FS_UNUSED)
 			put_partition(state, state->next++,
-						START_SECT(p), NR_SECTS(p));
+				      le32_to_cpu(p->start_sect),
+				      le32_to_cpu(p->nr_sects));
 		p++;
 	}
 	put_dev_sector(sect);
@@ -363,7 +367,7 @@ parse_unixware(struct parsed_partitions *state, struct block_device *bdev,
  */
 static void
 parse_minix(struct parsed_partitions *state, struct block_device *bdev,
-		u32 offset, u32 size, int origin)
+		sector_t offset, sector_t size, int origin)
 {
 #ifdef CONFIG_MINIX_SUBPARTITION
 	Sector sect;
@@ -390,7 +394,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev,
 			/* add each partition in use */
 			if (SYS_IND(p) == MINIX_PARTITION)
 				put_partition(state, state->next++,
-					      START_SECT(p), NR_SECTS(p));
+					      start_sect(p), nr_sects(p));
 		}
 		printk(" >\n");
 	}
@@ -401,7 +405,7 @@ parse_minix(struct parsed_partitions *state, struct block_device *bdev,
 static struct {
 	unsigned char id;
 	void (*parse)(struct parsed_partitions *, struct block_device *,
-			u32, u32, int);
+			sector_t, sector_t, int);
 } subtypes[] = {
 	{FREEBSD_PARTITION, parse_freebsd},
 	{NETBSD_PARTITION, parse_netbsd},
@@ -415,7 +419,7 @@ static struct {
  
 int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
-	int sector_size = bdev_logical_block_size(bdev) / 512;
+	sector_t sector_size = bdev_logical_block_size(bdev) / 512;
 	Sector sect;
 	unsigned char *data;
 	struct partition *p;
@@ -483,8 +487,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 
 	state->next = 5;
 	for (slot = 1 ; slot <= 4 ; slot++, p++) {
-		u32 start = START_SECT(p)*sector_size;
-		u32 size = NR_SECTS(p)*sector_size;
+		sector_t start = start_sect(p)*sector_size;
+		sector_t size = nr_sects(p)*sector_size;
 		if (!size)
 			continue;
 		if (is_extended_partition(p)) {
@@ -513,7 +517,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 		unsigned char id = SYS_IND(p);
 		int n;
 
-		if (!NR_SECTS(p))
+		if (!nr_sects(p))
 			continue;
 
 		for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++)
@@ -521,8 +525,8 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 
 		if (!subtypes[n].parse)
 			continue;
-		subtypes[n].parse(state, bdev, START_SECT(p)*sector_size,
-						NR_SECTS(p)*sector_size, slot);
+		subtypes[n].parse(state, bdev, start_sect(p)*sector_size,
+						nr_sects(p)*sector_size, slot);
 	}
 	put_dev_sector(sect);
 	return 1;
-- 
cgit v1.1


From 8e0cc811e0f8029a7225372fb0951fab102c012f Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Tue, 23 Mar 2010 13:35:50 -0700
Subject: fs/partition/msdos: fix unusable extended partition for > 512B sector

Smaller size than a minimum blocksize can't be used, after all it's
handled like 0 size.

For extended partition itself, this makes sure to use bigger size than one
logical sector size at least.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Daniel Taylor <Daniel.Taylor@wdc.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/msdos.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index b783858..90be97f 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -492,9 +492,16 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 		if (!size)
 			continue;
 		if (is_extended_partition(p)) {
-			/* prevent someone doing mkfs or mkswap on an
-			   extended partition, but leave room for LILO */
-			put_partition(state, slot, start, size == 1 ? 1 : 2);
+			/*
+			 * prevent someone doing mkfs or mkswap on an
+			 * extended partition, but leave room for LILO
+			 * FIXME: this uses one logical sector for > 512b
+			 * sector, although it may not be enough/proper.
+			 */
+			sector_t n = 2;
+			n = min(size, max(sector_size, n));
+			put_partition(state, slot, start, n);
+
 			printk(" <");
 			parse_extended(state, bdev, start, size);
 			printk(" >");
-- 
cgit v1.1


From 47568d4c56677ede6e7ff7c8e4e0fd860a9c4372 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Mar 2010 17:02:28 +0000
Subject: FDPIC: For-loop in elf_core_vma_data_size() is incorrect

Fix an incorrect for-loop in elf_core_vma_data_size().  The advance-pointer
statement lacks an assignment:

	  CC      fs/binfmt_elf_fdpic.o
	fs/binfmt_elf_fdpic.c: In function 'elf_core_vma_data_size':
	fs/binfmt_elf_fdpic.c:1593: warning: statement with no effect

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf_fdpic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c32d00..7ab23e0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1590,7 +1590,7 @@ static size_t elf_core_vma_data_size(unsigned long mm_flags)
 	struct vm_area_struct *vma;
 	size_t size = 0;
 
-	for (vma = current->mm->mmap; vma; vma->vm_next)
+	for (vma = current->mm->mmap; vma; vma = vma->vm_next)
 		if (maydump(vma, mm_flags))
 			size += vma->vm_end - vma->vm_start;
 	return size;
-- 
cgit v1.1


From 61964eba5c419ff710ac996c5ed3a84d5af7490f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Mar 2010 17:09:19 +0000
Subject: do_sync_read/write() should set kiocb.ki_nbytes to be consistent

do_sync_read/write() should set kiocb.ki_nbytes to be consistent with
do_sync_readv_writev().

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/read_write.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index b7f4a1f..113386d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -258,6 +258,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
 	kiocb.ki_left = len;
+	kiocb.ki_nbytes = len;
 
 	for (;;) {
 		ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
@@ -313,6 +314,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
 	kiocb.ki_left = len;
+	kiocb.ki_nbytes = len;
 
 	for (;;) {
 		ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
-- 
cgit v1.1


From 1147d0f915e3b4c5c4fa279dae2c40016b8f441d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 23 Mar 2010 14:48:37 +0000
Subject: fscache: add missing unlock

Sparse complained about this missing spin_unlock()

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fscache/page.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index c598ea4..6980902 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -881,6 +881,7 @@ submit_failed:
 	goto nobufs;
 
 nobufs_unlock_obj:
+	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 nobufs:
 	spin_unlock(&cookie->lock);
-- 
cgit v1.1


From 3e297b613491f0d4928aa652a2cd266aa06dc409 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Mar 2010 12:40:13 -0400
Subject: Restore LOOKUP_DIRECTORY hint handling in final lookup on open()

	Lose want_dir argument, while we are at it - since now
nd->flags & LOOKUP_DIRECTORY is equivalent to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1c0fca6..a7dce91 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1610,8 +1610,7 @@ exit:
 
 static struct file *do_last(struct nameidata *nd, struct path *path,
 			    int open_flag, int acc_mode,
-			    int mode, const char *pathname,
-			    int *want_dir)
+			    int mode, const char *pathname)
 {
 	struct dentry *dir = nd->path.dentry;
 	struct file *filp;
@@ -1642,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (nd->last.name[nd->last.len]) {
 		if (open_flag & O_CREAT)
 			goto exit;
-		*want_dir = 1;
+		nd->flags |= LOOKUP_DIRECTORY;
 	}
 
 	/* just plain open? */
@@ -1656,8 +1655,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		if (path->dentry->d_inode->i_op->follow_link)
 			return NULL;
 		error = -ENOTDIR;
-		if (*want_dir && !path->dentry->d_inode->i_op->lookup)
-			goto exit_dput;
+		if (nd->flags & LOOKUP_DIRECTORY) {
+			if (!path->dentry->d_inode->i_op->lookup)
+				goto exit_dput;
+		}
 		path_to_nameidata(path, nd);
 		audit_inode(pathname, nd->path.dentry);
 		goto ok;
@@ -1766,7 +1767,6 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	int count = 0;
 	int flag = open_to_namei_flags(open_flag);
 	int force_reval = 0;
-	int want_dir = open_flag & O_DIRECTORY;
 
 	if (!(open_flag & O_CREAT))
 		mode = 0;
@@ -1828,7 +1828,9 @@ reval:
 		if (open_flag & O_EXCL)
 			nd.flags |= LOOKUP_EXCL;
 	}
-	filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
+	if (open_flag & O_DIRECTORY)
+		nd.flags |= LOOKUP_DIRECTORY;
+	filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
 	while (unlikely(!filp)) { /* trailing symlink */
 		struct path holder;
 		struct inode *inode = path.dentry->d_inode;
@@ -1866,7 +1868,7 @@ reval:
 		}
 		holder = path;
 		nd.flags &= ~LOOKUP_PARENT;
-		filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
+		filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
 		if (inode->i_op->put_link)
 			inode->i_op->put_link(holder.dentry, &nd, cookie);
 		path_put(&holder);
-- 
cgit v1.1


From 810627a013163cd294762d57c0ea2ec055ffe4f6 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastryyy@gmail.com>
Date: Sat, 27 Mar 2010 02:00:49 +0000
Subject: [CIFS] Add mmap for direct, nobrl cifs mount types

without mmap functions in file_ops OpenOffice can't save changes in
existing document. The same situation you can see with gedit. Also, a.out
format of files can't be executed without mmap.

Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5183bc2..ded66be 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -808,6 +808,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.release = cifs_close,
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
+	.mmap = cifs_file_mmap,
 	.splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
 	.unlocked_ioctl  = cifs_ioctl,
-- 
cgit v1.1


From 49137f2efb5cf68724bccaba531ab3d59acd71f9 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 16 Mar 2010 21:46:15 +0100
Subject: Open segment file before using it

logfs_recover_sb() needs it open.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/super.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c66beab..0187281 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -289,6 +289,10 @@ static int logfs_make_writeable(struct super_block *sb)
 {
 	int err;
 
+	err = logfs_open_segfile(sb);
+	if (err)
+		return err;
+
 	/* Repair any broken superblock copies */
 	err = logfs_recover_sb(sb);
 	if (err)
@@ -299,10 +303,6 @@ static int logfs_make_writeable(struct super_block *sb)
 	if (err)
 		return err;
 
-	err = logfs_open_segfile(sb);
-	if (err)
-		return err;
-
 	/* Do one GC pass before any data gets dirtied */
 	logfs_gc_pass(sb);
 
-- 
cgit v1.1


From 59fe27c0a8173a74b105debc803b97582028c90b Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Wed, 17 Mar 2010 13:47:45 +0100
Subject: Limit max_pages for insane devices

Intel SSDs have a limit of 0xffff as queue_max_hw_sectors(q).  Such a
limit may make sense from a hardware pov, but it causes bio_alloc() to
return NULL.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/dev_bdev.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9718c22..f99f5dc 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -97,8 +97,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
 	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
 	int i;
 
+	if (max_pages > BIO_MAX_PAGES)
+		max_pages = BIO_MAX_PAGES;
 	bio = bio_alloc(GFP_NOFS, max_pages);
-	BUG_ON(!bio); /* FIXME: handle this */
+	BUG_ON(!bio);
 
 	for (i = 0; i < nr_pages; i++) {
 		if (i >= max_pages) {
@@ -191,8 +193,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
 	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
 	int i;
 
+	if (max_pages > BIO_MAX_PAGES)
+		max_pages = BIO_MAX_PAGES;
 	bio = bio_alloc(GFP_NOFS, max_pages);
-	BUG_ON(!bio); /* FIXME: handle this */
+	BUG_ON(!bio);
 
 	for (i = 0; i < nr_pages; i++) {
 		if (i >= max_pages) {
-- 
cgit v1.1


From e07bf553f37cd4fa470b499ff34d800956df2d48 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Wed, 17 Mar 2010 15:29:38 +0100
Subject: Plug memory leak in writeseg_end_io

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/dev_bdev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index f99f5dc..a5d0c56 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -80,6 +80,7 @@ static void writeseg_end_io(struct bio *bio, int err)
 			prefetchw(&bvec->bv_page->flags);
 
 		end_page_writeback(page);
+		page_cache_release(page);
 	} while (bvec >= bio->bi_io_vec);
 	bio_put(bio);
 	if (atomic_dec_and_test(&super->s_pending_writes))
-- 
cgit v1.1


From e326068806ee044cc617b1dc24be1293fca3fbf6 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Wed, 17 Mar 2010 16:00:07 +0100
Subject: Prevent schedule while atomic in __logfs_readdir

Apparently filldir can sleep, which forbids kmap_atomic.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 56a8bfb..c76b4b5 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
 				(filler_t *)logfs_readpage, NULL);
 		if (IS_ERR(page))
 			return PTR_ERR(page);
-		dd = kmap_atomic(page, KM_USER0);
+		dd = kmap(page);
 		BUG_ON(dd->namelen == 0);
 
 		full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen),
 				pos, be64_to_cpu(dd->ino), dd->type);
-		kunmap_atomic(dd, KM_USER0);
+		kunmap(page);
 		page_cache_release(page);
 		if (full)
 			break;
-- 
cgit v1.1


From faaa27ab919799929732c356a92a160f8657ecc6 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Fri, 26 Mar 2010 10:18:36 +0100
Subject: Write out both superblocks on mismatch

If the first superblock is wrong and the second gets written, there
will still be a mismatch on next mount.  Write both to make sure they
match.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 0187281..006670f 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -277,7 +277,7 @@ static int logfs_recover_sb(struct super_block *sb)
 	}
 	if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
 		printk(KERN_INFO"Superblocks don't match - fixing.\n");
-		return write_one_sb(sb, super->s_devops->find_last_sb);
+		return logfs_write_sb(sb);
 	}
 	/* If neither is valid now, something's wrong.  Didn't we properly
 	 * check them before?!? */
-- 
cgit v1.1


From 7db8064c17b92e95aec2e333096c035db9ddd4fe Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Fri, 26 Mar 2010 14:45:55 +0100
Subject: Fix logfs_get_sb_final error path

rootdir was already allocated, so we must iput it again.
Found by Al Viro.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 006670f..2845c41 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -328,7 +328,7 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
 
 	sb->s_root = d_alloc_root(rootdir);
 	if (!sb->s_root)
-		goto fail;
+		goto fail2;
 
 	super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
 	if (!super->s_erase_page)
-- 
cgit v1.1


From 6f2e9e6a950a165a7d2c399ab7557e6745ef2bfd Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Fri, 26 Mar 2010 14:50:08 +0100
Subject: Use deactivate_locked_super

Found by Al Viro.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/super.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 2845c41..9d856c4 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -572,8 +572,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
 	return 0;
 
 err1:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
+	deactivate_locked_super(sb);
 	return err;
 err0:
 	kfree(super);
-- 
cgit v1.1


From 193219172691e29813821dc8433317768c6ed1a3 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Sat, 27 Mar 2010 09:56:58 +0100
Subject: Prevent data corruption in logfs_rewrite_block()

The comment was correct, so make the code match the comment.  As the
new comment indicates, we might be able to do a little less work.  But
for the current -rc series let's keep it simple and just fix the bug.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/readwrite.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 7a23b3e..c3a3a68 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1594,7 +1594,6 @@ int logfs_delete(struct inode *inode, pgoff_t index,
 	return ret;
 }
 
-/* Rewrite cannot mark the inode dirty but has to write it immediatly. */
 int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
 		gc_level_t gc_level, long flags)
 {
@@ -1611,6 +1610,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
 		if (level != 0)
 			alloc_indirect_block(inode, page, 0);
 		err = logfs_write_buf(inode, page, flags);
+		if (!err && shrink_level(gc_level) == 0) {
+			/* Rewrite cannot mark the inode dirty but has to
+			 * write it immediatly.
+			 * Q: Can't we just create an alias for the inode
+			 * instead?  And if not, why not?
+			 */
+			if (inode->i_ino == LOGFS_INO_MASTER)
+				logfs_write_anchor(inode->i_sb);
+			else {
+				err = __logfs_write_inode(inode, flags);
+			}
+		}
 	}
 	logfs_put_write_page(page);
 	return err;
-- 
cgit v1.1


From 81def6b9862764924a99ac1b680e73ac8c80ac64 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Sun, 28 Mar 2010 12:47:09 +0200
Subject: Simplify and fix pad_wbuf

A comment in the old code read:
        /* The math in this function can surely use some love */

And indeed it did.  In the case that area->a_used_bytes is exactly
4096 bytes below segment size it fell apart.  pad_wbuf is now split
into two helpers that are significantly less complicated.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/segment.c | 52 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 1a14f99..ff9d7f3 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -93,50 +93,58 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
 	} while (len);
 }
 
-/*
- * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
- */
-static void pad_wbuf(struct logfs_area *area, int final)
+static void pad_partial_page(struct logfs_area *area)
 {
 	struct super_block *sb = area->a_sb;
-	struct logfs_super *super = logfs_super(sb);
 	struct page *page;
 	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 	pgoff_t index = ofs >> PAGE_SHIFT;
 	long offset = ofs & (PAGE_SIZE-1);
 	u32 len = PAGE_SIZE - offset;
 
-	if (len == PAGE_SIZE) {
-		/* The math in this function can surely use some love */
-		len = 0;
-	}
-	if (len) {
-		BUG_ON(area->a_used_bytes >= super->s_segsize);
-
-		page = get_mapping_page(area->a_sb, index, 0);
+	if (len % PAGE_SIZE) {
+		page = get_mapping_page(sb, index, 0);
 		BUG_ON(!page); /* FIXME: reserve a pool */
 		memset(page_address(page) + offset, 0xff, len);
 		SetPagePrivate(page);
 		page_cache_release(page);
 	}
+}
 
-	if (!final)
-		return;
+static void pad_full_pages(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
+	u32 len = super->s_segsize - area->a_used_bytes;
+	pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
+	pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
+	struct page *page;
 
-	area->a_used_bytes += len;
-	for ( ; area->a_used_bytes < super->s_segsize;
-			area->a_used_bytes += PAGE_SIZE) {
-		/* Memset another page */
-		index++;
-		page = get_mapping_page(area->a_sb, index, 0);
+	while (no_indizes) {
+		page = get_mapping_page(sb, index, 0);
 		BUG_ON(!page); /* FIXME: reserve a pool */
-		memset(page_address(page), 0xff, PAGE_SIZE);
+		SetPageUptodate(page);
+		memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
 		SetPagePrivate(page);
 		page_cache_release(page);
+		index++;
+		no_indizes--;
 	}
 }
 
 /*
+ * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
+ * Also make sure we allocate (and memset) all pages for final writeout.
+ */
+static void pad_wbuf(struct logfs_area *area, int final)
+{
+	pad_partial_page(area);
+	if (final)
+		pad_full_pages(area);
+}
+
+/*
  * We have to be careful with the alias tree.  Since lookup is done by bix,
  * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
  * indirect blocks.  So always use it through accessor functions.
-- 
cgit v1.1


From 723b2ff40876678b49e61df34fb1d8001e34639d Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Sun, 28 Mar 2010 18:10:07 +0200
Subject: [LogFS] Clear PagePrivate when moving journal

do_logfs_journal_wl_pass() must call freeseg(), thereby clear
PagePrivate on all pages of the current journal segment.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/journal.c | 1 +
 fs/logfs/logfs.h   | 1 +
 fs/logfs/segment.c | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 6ad30a4..15454ac 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -821,6 +821,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
 		logfs_set_segment_reserved(sb, segno);
 	}
 	/* Manually move journal_area */
+	freeseg(sb, area->a_segno);
 	area->a_segno = super->s_journal_seg[0];
 	area->a_is_open = 0;
 	area->a_used_bytes = 0;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 1297794..b84b0ee 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -587,6 +587,7 @@ void move_page_to_btree(struct page *page);
 int logfs_init_mapping(struct super_block *sb);
 void logfs_sync_area(struct logfs_area *area);
 void logfs_sync_segments(struct super_block *sb);
+void freeseg(struct super_block *sb, u32 segno);
 
 /* area handling */
 int logfs_init_areas(struct super_block *sb);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index ff9d7f3..0ecd8f0 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -691,7 +691,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
 	return 0;
 }
 
-static void freeseg(struct super_block *sb, u32 segno)
+void freeseg(struct super_block *sb, u32 segno)
 {
 	struct logfs_super *super = logfs_super(sb);
 	struct address_space *mapping = super->s_mapping_inode->i_mapping;
-- 
cgit v1.1


From 94aa8ae13db2ecf2ec1b4e65a65d3fe92b468e0e Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Sun, 28 Mar 2010 21:22:50 -0700
Subject: ceph: fix use after free on mds __unregister_request

There was a use after free in __unregister_request that would trigger
whenever the request map held the last reference.  This appears to have
triggered an oops during 'umount -f' when requests are being torn down.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5268d40..5c7920b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -532,7 +532,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
 	rb_erase(&req->r_node, &mdsc->request_tree);
 	RB_CLEAR_NODE(&req->r_node);
-	ceph_mdsc_put_request(req);
 
 	if (req->r_unsafe_dir) {
 		struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
@@ -541,6 +540,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 		list_del_init(&req->r_unsafe_dir_item);
 		spin_unlock(&ci->i_unsafe_lock);
 	}
+
+	ceph_mdsc_put_request(req);
 }
 
 /*
-- 
cgit v1.1


From a53f4f9efaeb1d87cfae066346979d4d70e1abe9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 29 Mar 2010 13:08:52 +0100
Subject: SLOW_WORK: CONFIG_SLOW_WORK_PROC should be CONFIG_SLOW_WORK_DEBUG

CONFIG_SLOW_WORK_PROC was changed to CONFIG_SLOW_WORK_DEBUG, but not in all
instances.  Change the remaining instances.  This makes the debugfs file
display the time mark and the owner's description again.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fscache/object.c    | 6 +++---
 fs/fscache/operation.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index e513ac5..0b589a9 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
 static void fscache_object_slow_work_put_ref(struct slow_work *);
 static int  fscache_object_slow_work_get_ref(struct slow_work *);
 static void fscache_object_slow_work_execute(struct slow_work *);
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
 #endif
 static void fscache_initialise_object(struct fscache_object *);
@@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = {
 	.get_ref	= fscache_object_slow_work_get_ref,
 	.put_ref	= fscache_object_slow_work_put_ref,
 	.execute	= fscache_object_slow_work_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= fscache_object_slow_work_desc,
 #endif
 };
@@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
 /*
  * describe an object for slow-work debugging
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_object_slow_work_desc(struct slow_work *work,
 					  struct seq_file *m)
 {
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 313e79a..9f6c928 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -500,7 +500,7 @@ static void fscache_op_execute(struct slow_work *work)
 /*
  * describe an operation for slow-work debugging
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
 {
 	struct fscache_operation *op =
@@ -517,7 +517,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = {
 	.get_ref	= fscache_op_get_ref,
 	.put_ref	= fscache_op_put_ref,
 	.execute	= fscache_op_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= fscache_op_desc,
 #endif
 };
-- 
cgit v1.1


From 0943846ae05603efd98550f2d475e9c98191bde8 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Mon, 29 Mar 2010 21:13:28 +0200
Subject: [LogFS] Move reserved segments with journal

Fixes a GC livelock.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/journal.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 15454ac..25b1345 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -800,6 +800,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
 {
 	struct logfs_super *super = logfs_super(sb);
 	struct logfs_area *area = super->s_journal_area;
+	struct btree_head32 *head = &super->s_reserved_segments;
 	u32 segno, ec;
 	int i, err;
 
@@ -807,6 +808,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
 	/* Drop old segments */
 	journal_for_each(i)
 		if (super->s_journal_seg[i]) {
+			btree_remove32(head, super->s_journal_seg[i]);
 			logfs_set_segment_unreserved(sb,
 					super->s_journal_seg[i],
 					super->s_journal_ec[i]);
@@ -819,6 +821,8 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
 		super->s_journal_seg[i] = segno;
 		super->s_journal_ec[i] = ec;
 		logfs_set_segment_reserved(sb, segno);
+		err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
+		BUG_ON(err); /* mempool should prevent this */
 	}
 	/* Manually move journal_area */
 	freeseg(sb, area->a_segno);
-- 
cgit v1.1


From 6be7fa06eb4d721df734bd0946b5e63b27c0589b Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Mon, 29 Mar 2010 21:14:52 +0200
Subject: [LogFS] Erase new journal segments

If the device contains on old logfs image and the journal is moved to
segment that have never been used by the current logfs and not all
journal segments are erased before the next mount, the old content can
confuse mount code.  To prevent this, always erase the new journal
segments.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/journal.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 25b1345..d57c7b0 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -823,6 +823,8 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
 		logfs_set_segment_reserved(sb, segno);
 		err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
 		BUG_ON(err); /* mempool should prevent this */
+		err = logfs_erase_segment(sb, segno, 1);
+		BUG_ON(err); /* FIXME: remount-ro would be nicer */
 	}
 	/* Manually move journal_area */
 	freeseg(sb, area->a_segno);
-- 
cgit v1.1


From de329820e920cd9cfbc2127cad26a37026260cce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Mar 2010 14:30:19 -0700
Subject: ext3: fix broken handling of EXT3_STATE_NEW

In commit 9df93939b735 ("ext3: Use bitops to read/modify
EXT3_I(inode)->i_state") ext3 changed its internal 'i_state' variable to
use bitops for its state handling.  However, unline the same ext4
change, it didn't actually change the name of the field when it changed
the semantics of it.

As a result, an old use of 'i_state' remained in fs/ext3/ialloc.c that
initialized the field to EXT3_STATE_NEW.  And that does not work
_at_all_ when we're now working with individually named bits rather than
values that get masked.  So the code tried to mark the state to be new,
but in actual fact set the field to EXT3_STATE_JDATA.  Which makes no
sense at all, and screws up all the code that checks whether the inode
was newly allocated.

In particular, it made the xattr code unhappy, and caused various random
behavior, like apparently

	https://bugzilla.redhat.com/show_bug.cgi?id=577911

So fix the initialization, and rename the field to match ext4 so that we
don't have this happen again.

Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Daniel J Walsh <dwalsh@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/ialloc.c | 4 +++-
 fs/ext3/inode.c  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index ef9008b..0d0e97e 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -582,7 +582,9 @@ got:
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
 
-	ei->i_state = EXT3_STATE_NEW;
+	ei->i_state_flags = 0;
+	ext3_set_inode_state(inode, EXT3_STATE_NEW);
+
 	ei->i_extra_isize =
 		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7f920b7..ea33bdf 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
 	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
-	ei->i_state = 0;
+	ei->i_state_flags = 0;
 	ei->i_dir_start_lookup = 0;
 	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
 	/* We now have enough fields to check if the inode was active or not.
-- 
cgit v1.1


From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Mar 2010 17:04:11 +0900
Subject: include cleanup: Update gfp.h and slab.h includes to prepare for
 breaking implicit slab.h inclusion from percpu.h

percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files.  percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.

percpu.h -> slab.h dependency is about to be removed.  Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability.  As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.

  http://userweb.kernel.org/~tj/misc/slabh-sweep.py

The script does the followings.

* Scan files for gfp and slab usages and update includes such that
  only the necessary includes are there.  ie. if only gfp is used,
  gfp.h, if slab is used, slab.h.

* When the script inserts a new include, it looks at the include
  blocks and try to put the new include such that its order conforms
  to its surrounding.  It's put in the include block which contains
  core kernel includes, in the same order that the rest are ordered -
  alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
  doesn't seem to be any matching order.

* If the script can't find a place to put a new include (mostly
  because the file doesn't have fitting include block), it prints out
  an error message indicating which .h file needs to be added to the
  file.

The conversion was done in the following steps.

1. The initial automatic conversion of all .c files updated slightly
   over 4000 files, deleting around 700 includes and adding ~480 gfp.h
   and ~3000 slab.h inclusions.  The script emitted errors for ~400
   files.

2. Each error was manually checked.  Some didn't need the inclusion,
   some needed manual addition while adding it to implementation .h or
   embedding .c file was more appropriate for others.  This step added
   inclusions to around 150 files.

3. The script was run again and the output was compared to the edits
   from #2 to make sure no file was left behind.

4. Several build tests were done and a couple of problems were fixed.
   e.g. lib/decompress_*.c used malloc/free() wrappers around slab
   APIs requiring slab.h to be added manually.

5. The script was run on all .h files but without automatically
   editing them as sprinkling gfp.h and slab.h inclusions around .h
   files could easily lead to inclusion dependency hell.  Most gfp.h
   inclusion directives were ignored as stuff from gfp.h was usually
   wildly available and often used in preprocessor macros.  Each
   slab.h inclusion directive was examined and added manually as
   necessary.

6. percpu.h was updated not to include slab.h.

7. Build test were done on the following configurations and failures
   were fixed.  CONFIG_GCOV_KERNEL was turned off for all tests (as my
   distributed build env didn't work with gcov compiles) and a few
   more options had to be turned off depending on archs to make things
   build (like ipr on powerpc/64 which failed due to missing writeq).

   * x86 and x86_64 UP and SMP allmodconfig and a custom test config.
   * powerpc and powerpc64 SMP allmodconfig
   * sparc and sparc64 SMP allmodconfig
   * ia64 SMP allmodconfig
   * s390 SMP allmodconfig
   * alpha SMP allmodconfig
   * um on x86_64 SMP allmodconfig

8. percpu.h modifications were reverted so that it could be applied as
   a separate patch and serve as bisection point.

Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
---
 fs/9p/cache.c                  | 1 +
 fs/9p/fid.c                    | 1 +
 fs/9p/v9fs.c                   | 1 +
 fs/9p/vfs_dentry.c             | 1 +
 fs/9p/vfs_dir.c                | 1 +
 fs/9p/vfs_inode.c              | 1 +
 fs/9p/vfs_super.c              | 1 +
 fs/adfs/super.c                | 1 +
 fs/affs/bitmap.c               | 1 +
 fs/affs/inode.c                | 1 +
 fs/affs/super.c                | 1 +
 fs/afs/cache.c                 | 1 -
 fs/afs/cmservice.c             | 1 +
 fs/afs/dir.c                   | 1 -
 fs/afs/file.c                  | 2 +-
 fs/afs/fsclient.c              | 1 +
 fs/afs/inode.c                 | 1 -
 fs/afs/mntpt.c                 | 2 +-
 fs/afs/rxrpc.c                 | 1 +
 fs/afs/vlclient.c              | 1 +
 fs/afs/vlocation.c             | 1 +
 fs/afs/vnode.c                 | 1 -
 fs/anon_inodes.c               | 1 -
 fs/autofs/root.c               | 1 +
 fs/autofs4/dev-ioctl.c         | 1 +
 fs/autofs4/root.c              | 1 +
 fs/befs/datastream.c           | 1 -
 fs/binfmt_aout.c               | 2 +-
 fs/binfmt_em86.c               | 1 -
 fs/binfmt_script.c             | 1 -
 fs/bio-integrity.c             | 1 +
 fs/btrfs/acl.c                 | 1 +
 fs/btrfs/async-thread.c        | 1 +
 fs/btrfs/compression.c         | 1 +
 fs/btrfs/ctree.c               | 1 +
 fs/btrfs/ctree.h               | 1 +
 fs/btrfs/delayed-ref.c         | 1 +
 fs/btrfs/disk-io.c             | 1 +
 fs/btrfs/extent-tree.c         | 1 +
 fs/btrfs/extent_io.c           | 1 -
 fs/btrfs/extent_map.c          | 1 -
 fs/btrfs/file-item.c           | 1 +
 fs/btrfs/file.c                | 1 +
 fs/btrfs/free-space-cache.c    | 1 +
 fs/btrfs/inode.c               | 1 +
 fs/btrfs/ioctl.c               | 1 +
 fs/btrfs/locking.c             | 1 -
 fs/btrfs/ordered-data.c        | 1 -
 fs/btrfs/ref-cache.c           | 1 +
 fs/btrfs/relocation.c          | 1 +
 fs/btrfs/super.c               | 1 +
 fs/btrfs/transaction.c         | 1 +
 fs/btrfs/tree-log.c            | 1 +
 fs/btrfs/volumes.c             | 1 +
 fs/cachefiles/interface.c      | 1 +
 fs/cachefiles/namei.c          | 1 +
 fs/cachefiles/rdwr.c           | 1 +
 fs/cachefiles/xattr.c          | 1 +
 fs/ceph/addr.c                 | 1 +
 fs/ceph/auth.c                 | 1 +
 fs/ceph/auth_none.c            | 1 +
 fs/ceph/auth_x.c               | 1 +
 fs/ceph/buffer.c               | 3 +++
 fs/ceph/caps.c                 | 1 +
 fs/ceph/crypto.c               | 1 +
 fs/ceph/debugfs.c              | 1 +
 fs/ceph/dir.c                  | 1 +
 fs/ceph/export.c               | 1 +
 fs/ceph/file.c                 | 1 +
 fs/ceph/mds_client.c           | 1 +
 fs/ceph/messenger.c            | 1 +
 fs/ceph/mon_client.c           | 1 +
 fs/ceph/osdmap.c               | 4 +++-
 fs/ceph/pagelist.c             | 1 +
 fs/ceph/snap.c                 | 1 +
 fs/ceph/super.c                | 1 +
 fs/ceph/super.h                | 1 +
 fs/ceph/xattr.c                | 1 +
 fs/cifs/cifs_dfs_ref.c         | 1 +
 fs/cifs/cifs_spnego.c          | 1 +
 fs/cifs/cifs_unicode.c         | 1 +
 fs/cifs/cifsacl.c              | 1 +
 fs/cifs/cifsencrypt.c          | 1 +
 fs/cifs/cifsglob.h             | 1 +
 fs/cifs/cifssmb.c              | 1 +
 fs/cifs/connect.c              | 1 +
 fs/cifs/dns_resolve.c          | 1 +
 fs/cifs/file.c                 | 1 +
 fs/cifs/inode.c                | 1 +
 fs/cifs/link.c                 | 1 +
 fs/cifs/readdir.c              | 1 +
 fs/cifs/sess.c                 | 1 +
 fs/cifs/smbencrypt.c           | 1 +
 fs/cifs/transport.c            | 1 +
 fs/cifs/xattr.c                | 1 +
 fs/coda/dir.c                  | 1 +
 fs/coda/file.c                 | 1 +
 fs/coda/inode.c                | 1 +
 fs/coda/upcall.c               | 1 +
 fs/compat.c                    | 1 +
 fs/compat_ioctl.c              | 2 +-
 fs/configfs/inode.c            | 1 +
 fs/configfs/mount.c            | 1 +
 fs/configfs/symlink.c          | 1 +
 fs/debugfs/inode.c             | 1 +
 fs/devpts/inode.c              | 1 +
 fs/dlm/config.c                | 1 +
 fs/dlm/debug_fs.c              | 1 +
 fs/dlm/lock.c                  | 1 +
 fs/dlm/lowcomms.c              | 1 +
 fs/dlm/netlink.c               | 1 +
 fs/dlm/plock.c                 | 1 +
 fs/dlm/user.c                  | 1 +
 fs/ecryptfs/crypto.c           | 1 +
 fs/ecryptfs/dentry.c           | 1 +
 fs/ecryptfs/file.c             | 1 +
 fs/ecryptfs/inode.c            | 1 +
 fs/ecryptfs/keystore.c         | 1 +
 fs/ecryptfs/kthread.c          | 1 +
 fs/ecryptfs/main.c             | 1 +
 fs/ecryptfs/messaging.c        | 1 +
 fs/ecryptfs/miscdev.c          | 1 +
 fs/ecryptfs/mmap.c             | 1 +
 fs/ecryptfs/super.c            | 1 +
 fs/eventfd.c                   | 1 +
 fs/exofs/inode.c               | 1 +
 fs/exofs/ios.c                 | 1 +
 fs/exofs/super.c               | 1 +
 fs/ext2/balloc.c               | 1 +
 fs/ext2/xattr_security.c       | 1 +
 fs/ext3/balloc.c               | 1 +
 fs/ext3/xattr_security.c       | 1 +
 fs/ext4/block_validity.c       | 1 +
 fs/ext4/inode.c                | 1 +
 fs/ext4/mballoc.c              | 1 +
 fs/ext4/migrate.c              | 1 +
 fs/ext4/move_extent.c          | 1 +
 fs/ext4/xattr_security.c       | 1 +
 fs/fat/cache.c                 | 1 +
 fs/fifo.c                      | 1 -
 fs/filesystems.c               | 2 +-
 fs/freevxfs/vxfs_subr.c        | 1 -
 fs/fs-writeback.c              | 1 +
 fs/fscache/object-list.c       | 1 +
 fs/fscache/operation.c         | 1 +
 fs/fscache/page.c              | 1 +
 fs/fuse/cuse.c                 | 1 +
 fs/generic_acl.c               | 1 +
 fs/gfs2/bmap.c                 | 1 -
 fs/gfs2/dentry.c               | 1 -
 fs/gfs2/export.c               | 1 -
 fs/gfs2/glops.c                | 1 -
 fs/gfs2/lock_dlm.c             | 1 +
 fs/gfs2/rgrp.h                 | 2 ++
 fs/gfs2/sys.c                  | 1 -
 fs/gfs2/util.c                 | 1 -
 fs/hfs/bnode.c                 | 1 +
 fs/hfs/btree.c                 | 1 +
 fs/hfs/mdb.c                   | 1 +
 fs/hfs/super.c                 | 1 +
 fs/hfsplus/options.c           | 1 +
 fs/hostfs/hostfs_kern.c        | 1 +
 fs/hpfs/buffer.c               | 1 +
 fs/hpfs/dir.c                  | 1 +
 fs/hpfs/inode.c                | 1 +
 fs/hpfs/super.c                | 1 +
 fs/ioprio.c                    | 1 +
 fs/isofs/dir.c                 | 1 +
 fs/isofs/namei.c               | 1 +
 fs/jbd/commit.c                | 1 -
 fs/jbd/recovery.c              | 1 -
 fs/jbd2/recovery.c             | 1 -
 fs/jffs2/compr_lzo.c           | 1 -
 fs/jffs2/compr_zlib.c          | 1 -
 fs/jffs2/debug.c               | 1 +
 fs/jffs2/file.c                | 1 -
 fs/jffs2/nodelist.c            | 1 -
 fs/jffs2/nodemgmt.c            | 1 -
 fs/jffs2/symlink.c             | 1 -
 fs/jffs2/write.c               | 1 -
 fs/jfs/acl.c                   | 1 +
 fs/jfs/jfs_dmap.c              | 1 +
 fs/jfs/jfs_dtree.c             | 1 +
 fs/jfs/jfs_imap.c              | 1 +
 fs/jfs/jfs_logmgr.c            | 1 +
 fs/jfs/jfs_metapage.c          | 1 +
 fs/jfs/jfs_unicode.h           | 1 +
 fs/jfs/super.c                 | 1 +
 fs/jfs/xattr.c                 | 1 +
 fs/libfs.c                     | 1 +
 fs/lockd/clntlock.c            | 1 +
 fs/lockd/clntproc.c            | 1 +
 fs/lockd/mon.c                 | 1 +
 fs/lockd/svc.c                 | 1 -
 fs/lockd/svc4proc.c            | 1 -
 fs/lockd/svclock.c             | 1 +
 fs/lockd/svcproc.c             | 1 -
 fs/lockd/svcsubs.c             | 1 +
 fs/logfs/dev_bdev.c            | 1 +
 fs/logfs/dir.c                 | 2 +-
 fs/logfs/gc.c                  | 1 +
 fs/logfs/inode.c               | 1 +
 fs/logfs/journal.c             | 1 +
 fs/logfs/readwrite.c           | 1 +
 fs/logfs/segment.c             | 1 +
 fs/logfs/super.c               | 1 +
 fs/minix/itree_v1.c            | 1 +
 fs/mpage.c                     | 1 +
 fs/ncpfs/dir.c                 | 1 -
 fs/ncpfs/file.c                | 1 -
 fs/ncpfs/ioctl.c               | 1 +
 fs/ncpfs/mmap.c                | 2 +-
 fs/ncpfs/sock.c                | 1 +
 fs/ncpfs/symlink.c             | 1 +
 fs/nfs/cache_lib.c             | 1 +
 fs/nfs/callback_proc.c         | 1 +
 fs/nfs/callback_xdr.c          | 1 +
 fs/nfs/client.c                | 1 +
 fs/nfs/delegation.c            | 1 +
 fs/nfs/direct.c                | 1 +
 fs/nfs/dns_resolve.c           | 1 +
 fs/nfs/file.c                  | 2 +-
 fs/nfs/fscache.c               | 1 +
 fs/nfs/inode.c                 | 1 +
 fs/nfs/namespace.c             | 1 +
 fs/nfs/nfs2xdr.c               | 1 -
 fs/nfs/nfs3acl.c               | 1 +
 fs/nfs/nfs3proc.c              | 1 +
 fs/nfs/nfs3xdr.c               | 1 -
 fs/nfs/nfs4namespace.c         | 1 +
 fs/nfs/nfs4proc.c              | 1 +
 fs/nfs/nfs4xdr.c               | 1 -
 fs/nfs/proc.c                  | 1 -
 fs/nfs/super.c                 | 1 +
 fs/nfs/symlink.c               | 1 -
 fs/nfs_common/nfsacl.c         | 1 +
 fs/nfsd/export.c               | 1 +
 fs/nfsd/nfs2acl.c              | 1 +
 fs/nfsd/nfs3acl.c              | 1 +
 fs/nfsd/nfs4acl.c              | 1 +
 fs/nfsd/nfs4callback.c         | 1 +
 fs/nfsd/nfs4idmap.c            | 1 +
 fs/nfsd/nfs4proc.c             | 1 +
 fs/nfsd/nfs4recover.c          | 1 +
 fs/nfsd/nfs4state.c            | 1 +
 fs/nfsd/nfs4xdr.c              | 1 +
 fs/nfsd/nfscache.c             | 2 ++
 fs/nfsd/nfsctl.c               | 1 +
 fs/nfsd/vfs.c                  | 1 +
 fs/nilfs2/alloc.c              | 1 +
 fs/nilfs2/btnode.c             | 1 +
 fs/nilfs2/gcinode.c            | 1 +
 fs/nilfs2/inode.c              | 1 +
 fs/nilfs2/ioctl.c              | 1 +
 fs/nilfs2/mdt.c                | 1 +
 fs/nilfs2/page.c               | 1 +
 fs/nilfs2/recovery.c           | 1 +
 fs/nilfs2/segbuf.c             | 1 +
 fs/nilfs2/segment.c            | 1 +
 fs/nilfs2/the_nilfs.h          | 1 +
 fs/notify/fsnotify.c           | 1 +
 fs/notify/inode_mark.c         | 1 -
 fs/ntfs/aops.c                 | 1 +
 fs/ntfs/attrib.c               | 1 +
 fs/ntfs/compress.c             | 1 +
 fs/ntfs/dir.c                  | 1 +
 fs/ntfs/file.c                 | 1 +
 fs/ntfs/index.c                | 2 ++
 fs/ntfs/mft.c                  | 1 +
 fs/ntfs/namei.c                | 1 +
 fs/ocfs2/acl.c                 | 1 +
 fs/ocfs2/buffer_head_io.c      | 1 -
 fs/ocfs2/cluster/heartbeat.c   | 1 +
 fs/ocfs2/cluster/nodemanager.c | 1 +
 fs/ocfs2/cluster/quorum.c      | 1 -
 fs/ocfs2/dlm/dlmast.c          | 1 -
 fs/ocfs2/dlm/dlmconvert.c      | 1 -
 fs/ocfs2/dlm/dlmthread.c       | 1 -
 fs/ocfs2/dlm/dlmunlock.c       | 1 -
 fs/ocfs2/extent_map.c          | 1 +
 fs/ocfs2/heartbeat.c           | 1 -
 fs/ocfs2/inode.c               | 1 -
 fs/ocfs2/mmap.c                | 1 -
 fs/ocfs2/quota_global.c        | 1 +
 fs/ocfs2/quota_local.c         | 1 +
 fs/ocfs2/refcounttree.c        | 1 -
 fs/ocfs2/stack_o2cb.c          | 1 +
 fs/ocfs2/stack_user.c          | 1 +
 fs/ocfs2/sysfile.c             | 1 -
 fs/omfs/inode.c                | 1 +
 fs/open.c                      | 2 +-
 fs/partitions/check.c          | 1 +
 fs/partitions/efi.c            | 1 +
 fs/proc/array.c                | 1 -
 fs/proc/base.c                 | 1 +
 fs/proc/generic.c              | 1 +
 fs/proc/inode.c                | 1 +
 fs/proc/kcore.c                | 1 +
 fs/proc/nommu.c                | 1 -
 fs/proc/proc_devtree.c         | 1 +
 fs/proc/proc_net.c             | 1 +
 fs/proc/stat.c                 | 1 -
 fs/proc/task_mmu.c             | 1 +
 fs/proc/task_nommu.c           | 1 +
 fs/proc/vmcore.c               | 1 +
 fs/quota/netlink.c             | 1 +
 fs/ramfs/file-nommu.c          | 1 +
 fs/ramfs/inode.c               | 1 +
 fs/reiserfs/dir.c              | 1 +
 fs/reiserfs/fix_node.c         | 1 +
 fs/reiserfs/inode.c            | 1 +
 fs/reiserfs/journal.c          | 1 +
 fs/reiserfs/namei.c            | 1 +
 fs/reiserfs/super.c            | 1 +
 fs/reiserfs/xattr.c            | 1 +
 fs/reiserfs/xattr_acl.c        | 1 +
 fs/reiserfs/xattr_security.c   | 1 +
 fs/signalfd.c                  | 1 +
 fs/smbfs/file.c                | 1 -
 fs/smbfs/smbiod.c              | 1 -
 fs/smbfs/symlink.c             | 1 +
 fs/splice.c                    | 1 +
 fs/squashfs/symlink.c          | 1 -
 fs/squashfs/zlib_wrapper.c     | 1 +
 fs/sync.c                      | 1 +
 fs/sysfs/inode.c               | 1 +
 fs/sysfs/mount.c               | 1 +
 fs/sysfs/symlink.c             | 1 +
 fs/timerfd.c                   | 1 +
 fs/ubifs/commit.c              | 1 +
 fs/ubifs/debug.c               | 1 +
 fs/ubifs/file.c                | 1 +
 fs/ubifs/gc.c                  | 1 +
 fs/ubifs/io.c                  | 1 +
 fs/ubifs/lpt.c                 | 1 +
 fs/ubifs/lpt_commit.c          | 1 +
 fs/ubifs/recovery.c            | 1 +
 fs/ubifs/sb.c                  | 1 +
 fs/ubifs/tnc.c                 | 1 +
 fs/ubifs/ubifs.h               | 1 +
 fs/ubifs/xattr.c               | 1 +
 fs/udf/partition.c             | 1 -
 fs/udf/symlink.c               | 1 -
 fs/udf/unicode.c               | 1 +
 fs/xattr_acl.c                 | 2 +-
 fs/xfs/linux-2.6/kmem.c        | 1 +
 fs/xfs/linux-2.6/xfs_acl.c     | 1 +
 fs/xfs/linux-2.6/xfs_aops.c    | 1 +
 fs/xfs/linux-2.6/xfs_buf.c     | 2 +-
 fs/xfs/linux-2.6/xfs_ioctl.c   | 1 +
 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 +
 fs/xfs/linux-2.6/xfs_iops.c    | 1 +
 fs/xfs/linux-2.6/xfs_super.c   | 1 +
 353 files changed, 300 insertions(+), 72 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index e777961..0dbe0d1 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -22,6 +22,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/file.h>
+#include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 08b2eb1..82ee460 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/idr.h>
 #include <net/9p/9p.h>
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6c7f6a2..cb57d33 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,6 +29,7 @@
 #include <linux/sched.h>
 #include <linux/parser.h>
 #include <linux/idr.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d743252..cbf4e50 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -34,6 +34,7 @@
 #include <linux/namei.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index d8a3afe..909711f 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/inet.h>
 #include <linux/idr.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 5fe45d6..63c2b5a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -34,6 +34,7 @@
 #include <linux/namei.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 69357c0..a271549d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -37,6 +37,7 @@
 #include <linux/mount.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 6910a98..4a3af70 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -13,6 +13,7 @@
 #include <linux/parser.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/statfs.h>
 #include "adfs.h"
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 8306d53..3e26271 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -7,6 +7,7 @@
  *  block allocation, deallocation, calculation of free space.
  */
 
+#include <linux/slab.h>
 #include "affs.h"
 
 /* This is, of course, shamelessly stolen from fs/minix */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index c9744d7..f4b2a4e 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -10,6 +10,7 @@
  *  (C) 1991  Linus Torvalds - minix filesystem
  */
 #include <linux/sched.h>
+#include <linux/gfp.h>
 #include "affs.h"
 
 extern const struct inode_operations affs_symlink_inode_operations;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d41e967..16a3e47 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -17,6 +17,7 @@
 #include <linux/magic.h>
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
+#include <linux/slab.h>
 #include "affs.h"
 
 extern struct timezone sys_tz;
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index e2b1d3f..0fb315d 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -9,7 +9,6 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/slab.h>
 #include <linux/sched.h>
 #include "internal.h"
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index eb76548..a3bcec7 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -11,6 +11,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/ip.h>
 #include "internal.h"
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 88067f3..adc1cb7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/ctype.h>
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 39b3016..0df9bc2 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -12,10 +12,10 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
+#include <linux/gfp.h>
 #include "internal.h"
 
 static int afs_readpage(struct file *file, struct page *page);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 023b95b..4bd0218 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/circ_buf.h>
 #include "internal.h"
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index c048f06..d00b312 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -16,7 +16,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/sched.h>
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5ffb570..5e813a8 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -12,11 +12,11 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/gfp.h>
 #include "internal.h"
 
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bde3f19..67cf810 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <rxrpc/packet.h>
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 36c1306..340afd0 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include "internal.h"
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 6e68920..9ac260d 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -11,6 +11,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include "internal.h"
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 2f05c4f..25cf4c3 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include "internal.h"
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2de0095..e4b75d6 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -12,7 +12,6 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 4a1401c..8713c7c 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -13,6 +13,7 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/stat.h>
+#include <linux/slab.h>
 #include <linux/param.h>
 #include <linux/time.h>
 #include <linux/smp_lock.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index c8a80df..d29b7f6 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -22,6 +22,7 @@
 #include <linux/magic.h>
 #include <linux/dcache.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 
 #include "autofs_i.h"
 
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a015b49..109a6c6 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -15,6 +15,7 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/stat.h>
+#include <linux/slab.h>
 #include <linux/param.h>
 #include <linux/time.h>
 #include "autofs_i.h"
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index e3287d0..59096b5 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -11,7 +11,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/string.h>
 
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 9b6aef0f..f96eff0 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -20,11 +20,11 @@
 #include <linux/fcntl.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
-#include <linux/slab.h>
 #include <linux/binfmts.h>
 #include <linux/personality.h>
 #include <linux/init.h>
 #include <linux/coredump.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 32fb00b..b8e8b0a 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/stat.h>
-#include <linux/slab.h>
 #include <linux/binfmts.h>
 #include <linux/elf.h>
 #include <linux/init.h>
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 0834350..aca9d55 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -8,7 +8,6 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/stat.h>
-#include <linux/slab.h>
 #include <linux/binfmts.h>
 #include <linux/init.h>
 #include <linux/file.h>
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index a16f29e..612a5c3 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -24,6 +24,7 @@
 #include <linux/mempool.h>
 #include <linux/bio.h>
 #include <linux/workqueue.h>
+#include <linux/slab.h>
 
 struct integrity_slab {
 	struct kmem_cache *slab;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6df6d6e..6ef7b26 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -22,6 +22,7 @@
 #include <linux/posix_acl_xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 #include "ctree.h"
 #include "btrfs_inode.h"
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index c0861e7..462859a 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/kthread.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/freezer.h>
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 28b92a7..629880e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -32,6 +32,7 @@
 #include <linux/writeback.h>
 #include <linux/bit_spinlock.h>
 #include <linux/pagevec.h>
+#include <linux/slab.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c4bc570..8e29260 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0af2e38..8a6518a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -26,6 +26,7 @@
 #include <linux/completion.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
+#include <linux/slab.h>
 #include <asm/kmap_types.h>
 #include "extent_io.h"
 #include "extent_map.h"
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 84e6781..902ce50 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/sort.h>
 #include "ctree.h"
 #include "delayed-ref.h"
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 11d0ad3..e57ded7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -27,6 +27,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/crc32c.h>
+#include <linux/slab.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1727b26..fa2907d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -22,6 +22,7 @@
 #include <linux/sort.h>
 #include <linux/rcupdate.h>
 #include <linux/kthread.h>
+#include <linux/slab.h>
 #include "compat.h"
 #include "hash.h"
 #include "ctree.h"
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c99121a..0adceb5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2,7 +2,6 @@
 #include <linux/slab.h>
 #include <linux/bio.h>
 #include <linux/mm.h>
-#include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/page-flags.h>
 #include <linux/module.h>
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 28d87ba..454ca52 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1,5 +1,4 @@
 #include <linux/err.h>
-#include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 9b99886..54a2550 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/bio.h>
+#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include "ctree.h"
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ee3323c..29ff749 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -28,6 +28,7 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
+#include <linux/slab.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index dd831ed..f488fac 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,7 @@
 
 #include <linux/pagemap.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/math64.h>
 #include "ctree.h"
 #include "free-space-cache.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 02bb099..2fe494c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/falloc.h>
+#include <linux/slab.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2845c6c..9b3d73a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -39,6 +39,7 @@
 #include <linux/security.h>
 #include <linux/xattr.h>
 #include <linux/vmalloc.h>
+#include <linux/slab.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 1c36e5c..6151f2e 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -16,7 +16,6 @@
  * Boston, MA 021110-1307, USA.
  */
 #include <linux/sched.h>
-#include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 #include <linux/page-flags.h>
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a8ffecd..ecb22ff 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -16,7 +16,6 @@
  * Boston, MA 021110-1307, USA.
  */
 
-#include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/writeback.h>
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
index d0cc62b..a97314c 100644
--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/sort.h>
 #include "ctree.h"
 #include "ref-cache.h"
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0b23942..e558dd9 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -21,6 +21,7 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/rbtree.h>
+#include <linux/slab.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9ac612e..693a664 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -38,6 +38,7 @@
 #include <linux/namei.h>
 #include <linux/miscdevice.h>
 #include <linux/magic.h>
+#include <linux/slab.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2d654c1..c0d0e3e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/pagemap.h>
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1255fcc..af57dd2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include "ctree.h"
 #include "transaction.h"
 #include "disk-io.h"
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9df8e3f..1692ec9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -17,6 +17,7 @@
  */
 #include <linux/sched.h>
 #include <linux/bio.h>
+#include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/random.h>
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 2708931..37fe101 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -9,6 +9,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/buffer_head.h>
 #include "internal.h"
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index eeb4986..d5db84a 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -19,6 +19,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 #define CACHEFILES_KEYBUF_SIZE 512
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 1d83325..0f0d41f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/mount.h>
+#include <linux/slab.h>
 #include <linux/file.h>
 #include "internal.h"
 
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index f3e7a0b..e18b183 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -16,6 +16,7 @@
 #include <linux/fsnotify.h>
 #include <linux/quotaops.h>
 #include <linux/xattr.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 static const char cachefiles_xattr_cache[] =
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ce8ef61..aa3cd7c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -5,6 +5,7 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>	/* generic_writepages */
+#include <linux/slab.h>
 #include <linux/pagevec.h>
 #include <linux/task_io_accounting_ops.h>
 
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index abb204f..f6394b9 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/err.h>
 
 #include "types.h"
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index b4ef6f0..8cd9e3a 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -4,6 +4,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 
 #include "auth_none.h"
 #include "auth.h"
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 8d8a849..d9001a4 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -4,6 +4,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 
 #include "auth_x.h"
 #include "auth_x_protocol.h"
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c
index b98086c..c67535d 100644
--- a/fs/ceph/buffer.c
+++ b/fs/ceph/buffer.c
@@ -1,5 +1,8 @@
 
 #include "ceph_debug.h"
+
+#include <linux/slab.h>
+
 #include "buffer.h"
 #include "decode.h"
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7d0a0d0..3710e07 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3,6 +3,7 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
 #include <linux/writeback.h>
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
index 291ac28..f704b3b 100644
--- a/fs/ceph/crypto.c
+++ b/fs/ceph/crypto.c
@@ -3,6 +3,7 @@
 
 #include <linux/err.h>
 #include <linux/scatterlist.h>
+#include <linux/slab.h>
 #include <crypto/hash.h>
 
 #include "crypto.h"
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index e159f14..f7048da 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/device.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8a9116e..7261dc6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -3,6 +3,7 @@
 #include <linux/spinlock.h>
 #include <linux/fs_struct.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 
 #include "super.h"
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fc68e39..9d67572 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/exportfs.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 
 #include "super.h"
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5d2af84..4add3d5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/namei.h>
 #include <linux/writeback.h>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5c7920b..60a9a4a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/wait.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 
 #include "mds_client.h"
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index a32f0f8..8f1715f 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -6,6 +6,7 @@
 #include <linux/inet.h>
 #include <linux/kthread.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
 #include <net/tcp.h>
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index 890597c..8fdc011 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/sched.h>
 
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index d82fe87..21c6623 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1,4 +1,7 @@
 
+#include "ceph_debug.h"
+
+#include <linux/slab.h>
 #include <asm/div64.h>
 
 #include "super.h"
@@ -6,7 +9,6 @@
 #include "crush/hash.h"
 #include "crush/mapper.h"
 #include "decode.h"
-#include "ceph_debug.h"
 
 char *ceph_osdmap_state_str(char *str, int len, int state)
 {
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index 370e936..5f8dbf7 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -1,4 +1,5 @@
 
+#include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index df04e21..e6f9bc5 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -1,6 +1,7 @@
 #include "ceph_debug.h"
 
 #include <linux/sort.h>
+#include <linux/slab.h>
 
 #include "super.h"
 #include "decode.h"
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4290a6e..75d02ea 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -11,6 +11,7 @@
 #include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include <linux/statfs.h>
 #include <linux/string.h>
 #include <linux/version.h>
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 65d1203..ca702c6 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/wait.h>
 #include <linux/writeback.h>
+#include <linux/slab.h>
 
 #include "types.h"
 #include "messenger.h"
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 37d6ce6..2845422 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -3,6 +3,7 @@
 #include "decode.h"
 
 #include <linux/xattr.h>
+#include <linux/slab.h>
 
 static bool ceph_is_valid_xattr(const char *name)
 {
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index b1d61d0..78e4d2a 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -15,6 +15,7 @@
 #include <linux/dcache.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 #include <linux/vfs.h>
 #include <linux/fs.h>
 #include "cifsglob.h"
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 8ec7736..310d12f 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/list.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <keys/user-type.h>
 #include <linux/key-type.h>
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 714a542..d07676b 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -19,6 +19,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include "cifs_unicode.h"
 #include "cifs_uniupr.h"
 #include "cifspdu.h"
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7dfe084..9b716d0 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsacl.h"
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7efe174..fbe9864 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifs_debug.h"
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 63c89d1..ecf0ffb 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -18,6 +18,7 @@
  */
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/slab.h>
 #include <linux/slow-work.h>
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 7cc7f83..3f4fbd6 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -30,6 +30,7 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/vfs.h>
+#include <linux/slab.h>
 #include <linux/posix_acl_xattr.h>
 #include <asm/uaccess.h>
 #include "cifspdu.h"
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 45eb6cb..d9566bf 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <linux/list.h>
 #include <linux/wait.h>
+#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 8794814..6f8a0e3 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -23,6 +23,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
+#include <linux/slab.h>
 #include <keys/user-type.h>
 #include "dns_resolve.h"
 #include "cifsglob.h"
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ca2ba7a..058b390 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -31,6 +31,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/delay.h>
 #include <linux/mount.h>
+#include <linux/slab.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 723daac..35ec117 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -20,6 +20,7 @@
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
+#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index fc1e048..c1a9d42 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -20,6 +20,7 @@
  */
 #include <linux/fs.h>
 #include <linux/stat.h>
+#include <linux/slab.h>
 #include <linux/namei.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index c343b14..18e0bc1 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -22,6 +22,7 @@
  */
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/stat.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index aaa9c1c..7c3fd74 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -29,6 +29,7 @@
 #include "ntlmssp.h"
 #include "nterr.h"
 #include <linux/utsname.h>
+#include <linux/slab.h>
 #include "cifs_spnego.h"
 
 extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 93fb09a..192ea51 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -24,6 +24,7 @@
 */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 07b8e71..ad081fe 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -22,6 +22,7 @@
 
 #include <linux/fs.h>
 #include <linux/list.h>
+#include <linux/gfp.h>
 #include <linux/wait.h>
 #include <linux/net.h>
 #include <linux/delay.h>
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3e2ef0d..f555ce0 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -21,6 +21,7 @@
 
 #include <linux/fs.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/slab.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #include "cifsglob.h"
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 4bb9d0a..ccd98b0 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/time.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/stat.h>
 #include <linux/errno.h>
diff --git a/fs/coda/file.c b/fs/coda/file.c
index ffd4281..4c813f2 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/smp_lock.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 
 #include <linux/coda.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 830f51a..a1695dc 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -18,6 +18,7 @@
 #include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/vfs.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index c274d94..f09c5ed 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -26,6 +26,7 @@
 #include <linux/stat.h>
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/vfs.h>
diff --git a/fs/compat.c b/fs/compat.c
index 030602d..4b6ed03 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -49,6 +49,7 @@
 #include <linux/mm.h>
 #include <linux/eventpoll.h>
 #include <linux/fs_struct.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6d55b61..c32a1b6 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -23,7 +23,6 @@
 #include <linux/ioctl.h>
 #include <linux/if.h>
 #include <linux/if_bridge.h>
-#include <linux/slab.h>
 #include <linux/raid/md_u.h>
 #include <linux/kd.h>
 #include <linux/route.h>
@@ -60,6 +59,7 @@
 #include <linux/i2c.h>
 #include <linux/i2c-dev.h>
 #include <linux/atalk.h>
+#include <linux/gfp.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci.h>
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index a2f7460..c8af2d9 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -34,6 +34,7 @@
 #include <linux/capability.h>
 #include <linux/sched.h>
 #include <linux/lockdep.h>
+#include <linux/slab.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8421cea..8c8d642 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -29,6 +29,7 @@
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 32a5f46..0f3eb41 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -27,6 +27,7 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 
 #include <linux/configfs.h>
 #include "configfs_internal.h"
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 049d6c3..30a87b3 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -27,6 +27,7 @@
 #include <linux/fsnotify.h>
 #include <linux/string.h>
 #include <linux/magic.h>
+#include <linux/slab.h>
 
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8882ecc..0120247 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/mutex.h>
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 0df2438..b54bca0 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/configfs.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <net/ipv6.h>
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 29d6139..c6cf251 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 
 #include "dlm_internal.h"
 #include "lock.h"
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 46ffd3e..17903b4 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -56,6 +56,7 @@
    L: receive_xxxx_reply()     <-  R: send_xxxx_reply()
 */
 #include <linux/types.h>
+#include <linux/slab.h>
 #include "dlm_internal.h"
 #include <linux/dlm_device.h>
 #include "memory.h"
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 52cab16..c0d35c6 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -51,6 +51,7 @@
 #include <linux/file.h>
 #include <linux/mutex.h>
 #include <linux/sctp.h>
+#include <linux/slab.h>
 #include <net/sctp/user.h>
 #include <net/ipv6.h>
 
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 052095c..2c6ad51 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -9,6 +9,7 @@
 #include <net/genetlink.h>
 #include <linux/dlm.h>
 #include <linux/dlm_netlink.h>
+#include <linux/gfp.h>
 
 #include "dlm_internal.h"
 
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index b5f89ae..d45c02d 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -11,6 +11,7 @@
 #include <linux/poll.h>
 #include <linux/dlm.h>
 #include <linux/dlm_plock.h>
+#include <linux/slab.h>
 
 #include "dlm_internal.h"
 #include "lockspace.h"
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index a4bfd31..8b6e73c 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -17,6 +17,7 @@
 #include <linux/spinlock.h>
 #include <linux/dlm.h>
 #include <linux/dlm_device.h>
+#include <linux/slab.h>
 
 #include "dlm_internal.h"
 #include "lockspace.h"
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7cb0a59..efb2b94 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
 #include <linux/crypto.h>
 #include <linux/file.h>
 #include <linux/scatterlist.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 8f006a0..906e803 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -26,6 +26,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/fs_stack.h>
+#include <linux/slab.h>
 #include "ecryptfs_kernel.h"
 
 /**
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 678172b..e7440a6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -25,6 +25,7 @@
 
 #include <linux/file.h>
 #include <linux/poll.h>
+#include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/security.h>
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 4a430ab..d3362fa 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
 #include <linux/mount.h>
 #include <linux/crypto.h>
 #include <linux/fs_stack.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index a0a7847..89c5476 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -32,6 +32,7 @@
 #include <linux/random.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
+#include <linux/slab.h>
 #include "ecryptfs_kernel.h"
 
 /**
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index e14cf7e..d8c3a37 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -22,6 +22,7 @@
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/mount.h>
 #include "ecryptfs_kernel.h"
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index ea2f921..af1a8f0 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -35,6 +35,7 @@
 #include <linux/key.h>
 #include <linux/parser.h>
 #include <linux/fs_stack.h>
+#include <linux/slab.h>
 #include "ecryptfs_kernel.h"
 
 /**
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index f1c17e8..2d8dbce 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -20,6 +20,7 @@
  * 02111-1307, USA.
  */
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/user_namespace.h>
 #include <linux/nsproxy.h>
 #include "ecryptfs_kernel.h"
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 4ec8f61..3745f61 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -24,6 +24,7 @@
 #include <linux/random.h>
 #include <linux/miscdevice.h>
 #include <linux/poll.h>
+#include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/module.h>
 #include "ecryptfs_kernel.h"
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index df4ce99..d491237 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index b15a43a..fcef41c 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/key.h>
+#include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/smp_lock.h>
 #include <linux/file.h>
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 7758cc3..6bd3f76 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/anon_inodes.h>
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a17e4b7..76d2a79 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -31,6 +31,7 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
 #include <scsi/scsi_device.h>
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 5293bc4..4337cad 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -22,6 +22,7 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/slab.h>
 #include <scsi/scsi_device.h>
 #include <asm/div64.h>
 
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 6cf5e4e..18e57ea 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -37,6 +37,7 @@
 #include <linux/vfs.h>
 #include <linux/random.h>
 #include <linux/exportfs.h>
+#include <linux/slab.h>
 
 #include "exofs.h"
 
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 1d081f0..3cf038c 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -13,6 +13,7 @@
 
 #include "ext2.h"
 #include <linux/quotaops.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index c815584..b118c63 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/ext2_fs.h>
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 161da2d..a177122 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -14,6 +14,7 @@
 #include <linux/time.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/jbd.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 4743487..3af91f4 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/ext3_jbd.h>
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 983f0e1..538c486 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -18,6 +18,7 @@
 #include <linux/pagemap.h>
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include "ext4.h"
 
 struct ext4_system_zone {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 11119e0..5381802 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,6 +39,7 @@
 #include <linux/bio.h>
 #include <linux/workqueue.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 
 #include "ext4_jbd2.h"
 #include "xattr.h"
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 54df209..bde9d0b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -23,6 +23,7 @@
 
 #include "mballoc.h"
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 #include <trace/events/ext4.h>
 
 /*
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 8b87bd0..34dcfc5 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index aa5fe28..d1fc662 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -15,6 +15,7 @@
 
 #include <linux/fs.h>
 #include <linux/quotaops.h>
+#include <linux/slab.h>
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 #include "ext4.h"
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 983c253..8b145e9 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -7,6 +7,7 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/security.h>
+#include <linux/slab.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "xattr.h"
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 923990e..113f0a1 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include "fat.h"
 
diff --git a/fs/fifo.c b/fs/fifo.c
index f8f97b8..5d6606f 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/pipe_fs_i.h>
diff --git a/fs/filesystems.c b/fs/filesystems.c
index a24c58e..68ba492 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -10,10 +10,10 @@
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 
 /*
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index ed8f0b0..1429f3ae 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -33,7 +33,6 @@
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/pagemap.h>
 
 #include "vxfs_extern.h"
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 76fc4d5..781a322 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 3221a0c..1e1f286 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -12,6 +12,7 @@
 #define FSCACHE_DEBUG_LEVEL COOKIE
 #include <linux/module.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include <linux/key.h>
 #include <keys/user-type.h>
 #include "internal.h"
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 9f6c928..f17ceca 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -14,6 +14,7 @@
 #define FSCACHE_DEBUG_LEVEL OPERATION
 #include <linux/module.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 atomic_t fscache_op_debug_id;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 6980902..47aefd3 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -14,6 +14,7 @@
 #include <linux/fscache-cache.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 /*
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index de792dc..e1f8171 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -44,6 +44,7 @@
 #include <linux/magic.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stat.h>
 
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 5545803..fe5df54 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/gfp.h>
 #include <linux/fs.h>
 #include <linux/generic_acl.h>
 #include <linux/posix_acl.h>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 583e823..5e411d5 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -7,7 +7,6 @@
  * of the GNU General Public License version 2.
  */
 
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 91bedda..bb7907b 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -7,7 +7,6 @@
  * of the GNU General Public License version 2.
  */
 
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index d15876e..c22c211 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -7,7 +7,6 @@
  * of the GNU General Public License version 2.
  */
 
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 38e3749..49f97d3 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -7,7 +7,6 @@
  * of the GNU General Public License version 2.
  */
 
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 569b462..0e0470e 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -9,6 +9,7 @@
 
 #include <linux/fs.h>
 #include <linux/dlm.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/gfs2_ondisk.h>
 
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b4106dd..f07119d 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -10,6 +10,8 @@
 #ifndef __RGRP_DOT_H__
 #define __RGRP_DOT_H__
 
+#include <linux/slab.h>
+
 struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 419042f..54fd984 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 226f2bf..5351129 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -7,7 +7,6 @@
  * of the GNU General Public License version 2.
  */
 
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 0d20006..cdb41a1 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/swap.h>
 
 #include "btree.h"
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 052f214..38a0a99 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/log2.h>
 
 #include "btree.h"
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 8bbe03c..86428f5 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -11,6 +11,7 @@
 #include <linux/cdrom.h>
 #include <linux/genhd.h>
 #include <linux/nls.h>
+#include <linux/slab.h>
 
 #include "hfs_fs.h"
 #include "btree.h"
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 5ed7252..0a81eb7 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -19,6 +19,7 @@
 #include <linux/nls.h>
 #include <linux/parser.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/vfs.h>
 
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 3fcbb0e..572628b 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -15,6 +15,7 @@
 #include <linux/nls.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include "hfsplus_fs.h"
 
 enum {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 032604e..3a029d8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/statfs.h>
+#include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include "hostfs.h"
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index b6fca54..eac5f96 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -6,6 +6,7 @@
  *  general buffer i/o
  */
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include "hpfs_fn.h"
 
 void hpfs_lock_creation(struct super_block *s)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 26e3964..2338130 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/smp_lock.h>
+#include <linux/slab.h>
 #include "hpfs_fn.h"
 
 static int hpfs_dir_release(struct inode *inode, struct file *filp)
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index ff90aff..1042a9b 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/smp_lock.h>
+#include <linux/slab.h>
 #include "hpfs_fn.h"
 
 void hpfs_init_inode(struct inode *i)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index cadc4ce..aa53842 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
 #include <linux/bitmap.h>
+#include <linux/slab.h>
 
 /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index c7c0b28..748cfb9 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -19,6 +19,7 @@
  * See also Documentation/block/ioprio.txt
  *
  */
+#include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/ioprio.h>
 #include <linux/blkdev.h>
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 8ba5441..b9ab69b 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -11,6 +11,7 @@
  *  isofs directory handling functions
  */
 #include <linux/smp_lock.h>
+#include <linux/gfp.h>
 #include "isofs.h"
 
 int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode)
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index eaa8313..ab438be 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/smp_lock.h>
+#include <linux/gfp.h>
 #include "isofs.h"
 
 /*
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2c90e3e..ecb44c9 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -17,7 +17,6 @@
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/errno.h>
-#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/bio.h>
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index cb1a49a..54c9bc9 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -20,7 +20,6 @@
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/errno.h>
-#include <linux/slab.h>
 #endif
 
 /*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 7306328..049281b 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -20,7 +20,6 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
 #include <linux/errno.h>
-#include <linux/slab.h>
 #include <linux/crc32.h>
 #endif
 
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index 90cb60d..cd02aca 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -11,7 +11,6 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/lzo.h>
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index cfd301a..b46661a4 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -14,7 +14,6 @@
 #endif
 
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/zlib.h>
 #include <linux/zutil.h>
 #include "nodelist.h"
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index 5544d31..ec35384 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -15,6 +15,7 @@
 #include <linux/crc32.h>
 #include <linux/jffs2.h>
 #include <linux/mtd/mtd.h>
+#include <linux/slab.h>
 #include "nodelist.h"
 #include "debug.h"
 
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index b7b74e2..e7291c1 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/pagemap.h>
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 87c6f55..af02bd1 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -15,7 +15,6 @@
 #include <linux/mtd/mtd.h>
 #include <linux/rbtree.h>
 #include <linux/crc32.h>
-#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include "nodelist.h"
 
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 21a0529..191359d 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/mtd/mtd.h>
 #include <linux/compiler.h>
 #include <linux/sched.h> /* For cond_resched() */
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 4ec11e8..b955626 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include "nodelist.h"
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index ca29440..c819eb0 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/crc32.h>
-#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/mtd/mtd.h>
 #include "nodelist.h"
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 2131697..1057a49 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index d9b031c..6c4dfcbf 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_dmap.h"
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 0e4623b..9197a1b 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -102,6 +102,7 @@
 
 #include <linux/fs.h>
 #include <linux/quotaops.h>
+#include <linux/slab.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_filsys.h"
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 0fc3040..f8332dc 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -45,6 +45,7 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/slab.h>
 
 #include "jfs_incore.h"
 #include "jfs_inode.h"
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 335c4de..c51af2a 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -70,6 +70,7 @@
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 07b6c5d..48b44bd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -21,6 +21,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/bio.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/mempool.h>
diff --git a/fs/jfs/jfs_unicode.h b/fs/jfs/jfs_unicode.h
index 3fbb3a2..8f0f02c 100644
--- a/fs/jfs/jfs_unicode.h
+++ b/fs/jfs/jfs_unicode.h
@@ -19,6 +19,7 @@
 #ifndef _H_JFS_UNICODE
 #define _H_JFS_UNICODE
 
+#include <linux/slab.h>
 #include <asm/byteorder.h>
 #include "jfs_types.h"
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 266699d..157382f 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -30,6 +30,7 @@
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
 #include <linux/crc32.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
 #include <linux/smp_lock.h>
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 1f594ab..fa96bbb 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/slab.h>
 #include <linux/quotaops.h>
 #include <linux/security.h>
 #include "jfs_incore.h"
diff --git a/fs/libfs.c b/fs/libfs.c
index 9e50bcf..ea9a6cc 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -5,6 +5,7 @@
 
 #include <linux/module.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/mutex.h>
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index fc9032d..64fd427 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -8,6 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/nfs_fs.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index c81249f..7932c39 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -8,6 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/smp_lock.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index fefa4df..e301546 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -10,6 +10,7 @@
 #include <linux/utsname.h>
 #include <linux/kernel.h>
 #include <linux/ktime.h>
+#include <linux/slab.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/xprtsock.h>
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 7d15051..f1bacf1 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -21,7 +21,6 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/uio.h>
-#include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/mutex.h>
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a7966ee..031c656 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d100179..84055d3 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 56c9519..0f2ab74 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index ad478da..d0ef94c 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -10,6 +10,7 @@
 #include <linux/string.h>
 #include <linux/time.h>
 #include <linux/in.h>
+#include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9718c22..18e8c14 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -9,6 +9,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
+#include <linux/gfp.h>
 
 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
 
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 56a8bfb..e1cb995 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
  */
 #include "logfs.h"
-
+#include <linux/slab.h>
 
 /*
  * Atomic dir operations
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 92949f9..84e36f5 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -7,6 +7,7 @@
  */
 #include "logfs.h"
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 /*
  * Wear leveling needs to kick in when the difference between low erase
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 33ec1ae..14ed272 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
  */
 #include "logfs.h"
+#include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 6ad30a4..f186043 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -6,6 +6,7 @@
  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
  */
 #include "logfs.h"
+#include <linux/slab.h>
 
 static void logfs_calc_free(struct super_block *sb)
 {
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 7a23b3e..d5919af 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -18,6 +18,7 @@
  */
 #include "logfs.h"
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 static u64 adjust_bix(u64 bix, level_t level)
 {
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 1a14f99..614d7a6 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -10,6 +10,7 @@
  * three kinds of objects: inodes, dentries and blocks, both data and indirect.
  */
 #include "logfs.h"
+#include <linux/slab.h>
 
 static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
 {
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index c66beab..46990ea 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -11,6 +11,7 @@
  */
 #include "logfs.h"
 #include <linux/bio.h>
+#include <linux/slab.h>
 #include <linux/mtd/mtd.h>
 #include <linux/statfs.h>
 #include <linux/buffer_head.h>
diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c
index 82d6554..282e15a 100644
--- a/fs/minix/itree_v1.c
+++ b/fs/minix/itree_v1.c
@@ -1,4 +1,5 @@
 #include <linux/buffer_head.h>
+#include <linux/slab.h>
 #include "minix.h"
 
 enum {DEPTH = 3, DIRECT = 7};	/* Only double indirect */
diff --git a/fs/mpage.c b/fs/mpage.c
index 598d54e..fd56ca2 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/kdev_t.h>
+#include <linux/gfp.h>
 #include <linux/bio.h>
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index b8b5b30..7edfcd4 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -15,7 +15,6 @@
 #include <linux/errno.h>
 #include <linux/stat.h>
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <asm/uaccess.h>
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6a7d901..1daabb9 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -15,7 +15,6 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index ec8f45f..60a5e28 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
+#include <linux/slab.h>
 #include <linux/highuid.h>
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 15458de..56f5b3a 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -9,12 +9,12 @@
 #include <linux/stat.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
+#include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/shm.h>
 #include <linux/errno.h>
 #include <linux/mman.h>
 #include <linux/string.h>
-#include <linux/slab.h>
 #include <linux/fcntl.h>
 #include <linux/ncp_fs.h>
 
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index e37df8d..c7ff6c7 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -21,6 +21,7 @@
 #include <linux/mm.h>
 #include <linux/netdevice.h>
 #include <linux/signal.h>
+#include <linux/slab.h>
 #include <net/scm.h>
 #include <net/sock.h>
 #include <linux/ipx.h>
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index e3d26c1..c634fd1 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -27,6 +27,7 @@
 #include <linux/fs.h>
 #include <linux/ncp_fs.h>
 #include <linux/time.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/stat.h>
 #include "ncplib_kernel.h"
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index b4ffd01..8469031 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -10,6 +10,7 @@
 #include <linux/moduleparam.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 84761b5..a08770a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -7,6 +7,7 @@
  */
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include <linux/slab.h>
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a2b8b4d..05af212 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -9,6 +9,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
+#include <linux/slab.h>
 #include "nfs4_fs.h"
 #include "callback.h"
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2274f17..2a3d352 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -35,6 +35,7 @@
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include <linux/in6.h>
+#include <linux/slab.h>
 #include <net/ipv6.h>
 #include <linux/nfs_xdr.h>
 #include <linux/sunrpc/bc_xprt.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 2563beb..1567124 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -10,6 +10,7 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0d28982..ad4cd31 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -44,6 +44,7 @@
 #include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/kref.h>
+#include <linux/slab.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 3f0cd4d..76fd235 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -9,6 +9,7 @@
 #include <linux/hash.h>
 #include <linux/string.h>
 #include <linux/kmod.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/seq_file.h>
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ae0d927..8d965bd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -24,9 +24,9 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/aio.h>
+#include <linux/gfp.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 237874f..a6b16ed 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -17,6 +17,7 @@
 #include <linux/nfs_fs_sb.h>
 #include <linux/in6.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include "internal.h"
 #include "iostat.h"
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e358df7..737128f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -36,6 +36,7 @@
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include <linux/nfs_xdr.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 40c7667..7888cf3 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/dcache.h>
+#include <linux/gfp.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7bc2da8..81cf142 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -12,7 +12,6 @@
 #include <linux/param.h>
 #include <linux/time.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index bac6051..d150ae0 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -1,4 +1,5 @@
 #include <linux/fs.h>
+#include <linux/gfp.h>
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 24992f0..e701002 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/slab.h>
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 5fe5492..56a86f6 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -9,7 +9,6 @@
 #include <linux/param.h>
 #include <linux/time.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index fa3408f..f071d12 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -11,6 +11,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/nfs_fs.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/vfs.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f9254fb..d79a7b3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -39,6 +39,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dd17713..38f3b58 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -38,7 +38,6 @@
 #include <linux/param.h>
 #include <linux/time.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c752d94..0288be8 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -29,7 +29,6 @@
 
 #include <linux/types.h>
 #include <linux/param.h>
-#include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 6baf9a3..e016372 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -48,6 +48,7 @@
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include <linux/in6.h>
+#include <linux/slab.h>
 #include <net/ipv6.h>
 #include <linux/netdevice.h>
 #include <linux/nfs_xdr.h>
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 2ea9e5c..05c9e02 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -19,7 +19,6 @@
 #include <linux/pagemap.h>
 #include <linux/stat.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/namei.h>
 
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 04133aa..fc1c525 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -22,6 +22,7 @@
 
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/gfp.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/nfsacl.h>
 #include <linux/nfs3.h>
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index a0c4016..872a5ef 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -12,6 +12,7 @@
  * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
  */
 
+#include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/module.h>
 #include <linux/exportfs.h>
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index f20589d..6aa5590 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -7,6 +7,7 @@
 #include "nfsd.h"
 /* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include <linux/gfp.h>
 #include "cache.h"
 #include "xdr3.h"
 #include "vfs.h"
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index e0c4846..a596e9d 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -7,6 +7,7 @@
 #include "nfsd.h"
 /* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include <linux/gfp.h>
 #include "cache.h"
 #include "xdr3.h"
 #include "vfs.h"
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 8815068..e480526 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -34,6 +34,7 @@
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/slab.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs4_acl.h>
 
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4bc22c7..7e32bd3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/slab.h>
 #include "nfsd.h"
 #include "state.h"
 
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 6e2983b..c78dbf4 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -36,6 +36,7 @@
 #include <linux/nfsd_idmap.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 /*
  * Cache entry
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 37514c4..2ab9e85 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -33,6 +33,7 @@
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <linux/file.h>
+#include <linux/slab.h>
 
 #include "cache.h"
 #include "xdr4.h"
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 98fb98e..7a9ae32 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -32,6 +32,7 @@
 */
 
 #include <linux/file.h>
+#include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/crypto.h>
 #include <linux/sched.h>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c97fddb..6a8feda 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -34,6 +34,7 @@
 
 #include <linux/file.h>
 #include <linux/smp_lock.h>
+#include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/swap.h>
 #include <linux/sunrpc/svcauth_gss.h>
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c47b4d7..e170317 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,6 +40,7 @@
  * at the end of nfs4svc_decode_compoundargs.
  */
 
+#include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/utsname.h>
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index da08560..4666a20 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -8,6 +8,8 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
+#include <linux/slab.h>
+
 #include "nfsd.h"
 #include "cache.h"
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0f0e77f..e359107 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -4,6 +4,7 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
+#include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/ctype.h>
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a11b0e8..6dd5f19 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -25,6 +25,7 @@
 #include <linux/xattr.h>
 #include <linux/jhash.h>
 #include <linux/ima.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 3f959f1..8d6356a 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -26,6 +26,7 @@
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 #include "mdt.h"
 #include "alloc.h"
 
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 471e269..447ce47 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -27,6 +27,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mm.h>
 #include <linux/backing-dev.h>
+#include <linux/gfp.h>
 #include "nilfs.h"
 #include "mdt.h"
 #include "dat.h"
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 8880a9e..145f03c 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -45,6 +45,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
 #include <linux/hash.h>
+#include <linux/slab.h>
 #include <linux/swap.h>
 #include "nilfs.h"
 #include "page.h"
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 7868cc1..0957b58 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/buffer_head.h>
+#include <linux/gfp.h>
 #include <linux/mpage.h>
 #include <linux/writeback.h>
 #include <linux/uio.h>
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 313d0a2..c2ff1b3 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -23,6 +23,7 @@
 #include <linux/fs.h>
 #include <linux/wait.h>
 #include <linux/smp_lock.h>	/* lock_kernel(), unlock_kernel() */
+#include <linux/slab.h>
 #include <linux/capability.h>	/* capable() */
 #include <linux/uaccess.h>	/* copy_from_user(), copy_to_user() */
 #include <linux/vmalloc.h>
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 06713ff..024be8c 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -26,6 +26,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/swap.h>
+#include <linux/slab.h>
 #include "nilfs.h"
 #include "segment.h"
 #include "page.h"
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index fc246db..8de3e1e 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -29,6 +29,7 @@
 #include <linux/list.h>
 #include <linux/highmem.h>
 #include <linux/pagevec.h>
+#include <linux/gfp.h>
 #include "nilfs.h"
 #include "page.h"
 #include "mdt.h"
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 017bedc..ba43146 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -23,6 +23,7 @@
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/swap.h>
+#include <linux/slab.h>
 #include <linux/crc32.h>
 #include "nilfs.h"
 #include "segment.h"
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6129a43..17851f7 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -25,6 +25,7 @@
 #include <linux/writeback.h>
 #include <linux/crc32.h>
 #include <linux/backing-dev.h>
+#include <linux/slab.h>
 #include "page.h"
 #include "segbuf.h"
 
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index c161d89..6a7dbd8 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -32,6 +32,7 @@
 #include <linux/kthread.h>
 #include <linux/crc32.h>
 #include <linux/pagevec.h>
+#include <linux/slab.h>
 #include "nilfs.h"
 #include "btnode.h"
 #include "page.h"
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index e9795f1..1ab9745 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -29,6 +29,7 @@
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/slab.h>
 #include "sb.h"
 
 /* the_nilfs struct */
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 037e878..fcc2f06 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -18,6 +18,7 @@
 
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/srcu.h>
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 3165d85..0399bcb 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -87,7 +87,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/writeback.h> /* for inode_lock */
 
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index cfce53c..c3c2c7a 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -23,6 +23,7 @@
 
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 50d3b0c..f5094ee 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -22,6 +22,7 @@
 
 #include <linux/buffer_head.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 08f7530..6551c7c 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -25,6 +25,7 @@
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/vmalloc.h>
+#include <linux/slab.h>
 
 #include "attrib.h"
 #include "inode.h"
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 9173e82..fe44d3f 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/buffer_head.h>
+#include <linux/slab.h>
 
 #include "dir.h"
 #include "aops.h"
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index b681c71..8804f09 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/buffer_head.h>
+#include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/sched.h>
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 2194eff..096c135 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -19,6 +19,8 @@
  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/slab.h>
+
 #include "aops.h"
 #include "collate.h"
 #include "debug.h"
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 1caa0ef..b572b67 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/buffer_head.h>
+#include <linux/slab.h>
 #include <linux/swap.h>
 
 #include "attrib.h"
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 2ca0015..358273e 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -23,6 +23,7 @@
 #include <linux/dcache.h>
 #include <linux/exportfs.h>
 #include <linux/security.h>
+#include <linux/slab.h>
 
 #include "attrib.h"
 #include "debug.h"
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 8ccf0f8..e13fc9e 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -21,6 +21,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 21c808f..ecebb22 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -25,7 +25,6 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 
 #include <cluster/masklog.h>
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 5c98900..41d5f1f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -34,6 +34,7 @@
 #include <linux/crc32.h>
 #include <linux/time.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 
 #include "heartbeat.h"
 #include "tcp.h"
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index c81142e..ed0c9f3 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -19,6 +19,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/configfs.h>
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 6390240..cf3e166 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -44,7 +44,6 @@
  * and if they're the last, they fire off the decision.
  */
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/reboot.h>
 
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index dccc439..a795eb9 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index f283bce..90803b4 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 52ec020..11a6d1f 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 49e29ec..b47c1b9 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index c562a75..09e3fdf 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -24,6 +24,7 @@
 
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/fiemap.h>
 
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index c6e7213..1aa863d 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -26,7 +26,6 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 
 #define MLOG_MASK_PREFIX ML_SUPER
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index ab20790..07cc8bb 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -25,7 +25,6 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 3973761..7898bd3 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -25,7 +25,6 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/uio.h>
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 355f41d..ab42a74 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -3,6 +3,7 @@
  */
 #include <linux/spinlock.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/quota.h>
 #include <linux/quotaops.h>
 #include <linux/dqblk_qtree.h>
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index a6467f3..9ad4930 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/quota.h>
 #include <linux/quotaops.h>
 #include <linux/module.h>
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 29405f2..bd96f6c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -37,7 +37,6 @@
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 7020e12..0d3049f 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -19,6 +19,7 @@
 
 #include <linux/kernel.h>
 #include <linux/crc32.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 
 /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 5ae8812..2dc57bc 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/reboot.h>
 #include <asm/uaccess.h>
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 40e5370..bfe7190 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -25,7 +25,6 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/highmem.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 75d9b5b..c82af6a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -6,6 +6,7 @@
 #include <linux/version.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
diff --git a/fs/open.c b/fs/open.c
index e17f544..74e5cd9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -10,7 +10,6 @@
 #include <linux/fdtable.h>
 #include <linux/fsnotify.h>
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/tty.h>
 #include <linux/namei.h>
 #include <linux/backing-dev.h>
@@ -20,6 +19,7 @@
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/fcntl.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/fs.h>
 #include <linux/personality.h>
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e8865c1..e238ab2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 49cfd5f..91babda 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,6 +95,7 @@
  ************************************************************/
 #include <linux/crc32.h>
 #include <linux/math64.h>
+#include <linux/slab.h>
 #include "check.h"
 #include "efi.h"
 
diff --git a/fs/proc/array.c b/fs/proc/array.c
index aa8637b..e51f2ec 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -68,7 +68,6 @@
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
-#include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/highmem.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a731084..9e82adc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -81,6 +81,7 @@
 #include <linux/elf.h>
 #include <linux/pid_namespace.h>
 #include <linux/fs_struct.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 /* NOTE:
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 08f4d71..43c1274 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -13,6 +13,7 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/init.h>
 #include <linux/idr.h>
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 445a02b..d35b232 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/sysctl.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index b442dac..19979a2 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -19,6 +19,7 @@
 #include <linux/highmem.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <linux/list.h>
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 9fe7d7e..b1822dd 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -21,7 +21,6 @@
 #include <linux/mmzone.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
-#include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/seq_file.h>
 #include <linux/hugetlb.h>
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index f8650dc..ce94801 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/of.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <asm/prom.h>
 #include <asm/uaccess.h>
 #include "internal.h"
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 04d1270..9020ac1 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -14,6 +14,7 @@
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/module.h>
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index b9b7aad..bf31b03 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -1,6 +1,5 @@
 #include <linux/cpumask.h>
 #include <linux/fs.h>
-#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 183f8ff..2d45889 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -4,6 +4,7 @@
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
+#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 5d9fd64..46d4b5d 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -5,6 +5,7 @@
 #include <linux/fs_struct.h>
 #include <linux/mount.h>
 #include <linux/ptrace.h>
+#include <linux/slab.h>
 #include <linux/seq_file.h>
 #include "internal.h"
 
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 0872afa..9fbc99e 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -12,6 +12,7 @@
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
+#include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 2663ed9..d67908b 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -5,6 +5,7 @@
 #include <linux/kernel.h>
 #include <linux/quotaops.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 1739a4a..5ea4ad8 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -21,6 +21,7 @@
 #include <linux/pagevec.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include "internal.h"
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a6090aa..c948534 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -35,6 +35,7 @@
 #include <linux/sched.h>
 #include <linux/parser.h>
 #include <linux/magic.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index c094f58..f8a6075a 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -8,6 +8,7 @@
 #include <linux/reiserfs_fs.h>
 #include <linux/stat.h>
 #include <linux/buffer_head.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 
 extern const struct reiserfs_key MIN_KEY;
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 6591cb2..1e4250b 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -35,6 +35,7 @@
  **/
 
 #include <linux/time.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/reiserfs_fs.h>
 #include <linux/buffer_head.h>
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d1da94b..dc2c65e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,6 +11,7 @@
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/buffer_head.h>
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index f3de5e8..19fbc81 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -50,6 +50,7 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 
 #include <asm/system.h>
 
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 96e4cbb..d0c43cb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -13,6 +13,7 @@
 
 #include <linux/time.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 #include <linux/reiserfs_fs.h>
 #include <linux/reiserfs_acl.h>
 #include <linux/reiserfs_xattr.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 04bf5d7..d8fd90d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/time.h>
 #include <asm/uaccess.h>
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 37d034c..4f9586b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -38,6 +38,7 @@
 #include <linux/dcache.h>
 #include <linux/namei.h>
 #include <linux/errno.h>
+#include <linux/gfp.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index dd20a78..9cdb759 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -5,6 +5,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
+#include <linux/slab.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/reiserfs_acl.h>
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index de1fcff..7271a47 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -3,6 +3,7 @@
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
+#include <linux/slab.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/security.h>
 #include <asm/uaccess.h>
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 1dabe4e..f329849c 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/list.h>
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 92d5e8f..dbf6548 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -13,7 +13,6 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/net.h>
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 6bd9b69..0e39a92 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -12,7 +12,6 @@
 #include <linux/string.h>
 #include <linux/stat.h>
 #include <linux/errno.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/file.h>
 #include <linux/dcache.h>
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
index 00b2909..54350b5 100644
--- a/fs/smbfs/symlink.c
+++ b/fs/smbfs/symlink.c
@@ -15,6 +15,7 @@
 #include <linux/pagemap.h>
 #include <linux/net.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
diff --git a/fs/splice.c b/fs/splice.c
index 3920866..9313b61 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -30,6 +30,7 @@
 #include <linux/syscalls.h>
 #include <linux/uio.h>
 #include <linux/security.h>
+#include <linux/gfp.h>
 
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index e80be20..32b911f 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -33,7 +33,6 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/pagemap.h>
 
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 4dd70e0..15a03d0 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -24,6 +24,7 @@
 
 #include <linux/mutex.h>
 #include <linux/buffer_head.h>
+#include <linux/slab.h>
 #include <linux/zlib.h>
 
 #include "squashfs_fs.h"
diff --git a/fs/sync.c b/fs/sync.c
index f557d71..fc5c3d7 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -5,6 +5,7 @@
 #include <linux/kernel.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/writeback.h>
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 082daae..a4a0a94 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -18,6 +18,7 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/xattr.h>
 #include <linux/security.h>
 #include "sysfs.h"
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 0cb1088..7761378 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/magic.h>
+#include <linux/slab.h>
 
 #include "sysfs.h"
 
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 1b9a3a1..b93ec51 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/gfp.h>
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/kobject.h>
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 1bfc95a..98158de 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/time.h>
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 4775af4..37fa7ed 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -45,6 +45,7 @@
 
 #include <linux/freezer.h>
 #include <linux/kthread.h>
+#include <linux/slab.h>
 #include "ubifs.h"
 
 /**
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 9049232..c2a68ba 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,6 +34,7 @@
 #include <linux/moduleparam.h>
 #include <linux/debugfs.h>
 #include <linux/math64.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_UBIFS_FS_DEBUG
 
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e26c02a..5692cf7 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -52,6 +52,7 @@
 #include "ubifs.h"
 #include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 
 static int read_block(struct inode *inode, void *addr, unsigned int block,
 		      struct ubifs_data_node *dn)
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index e5a3d8e..918d158 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -53,6 +53,7 @@
  * good, and GC takes extra care when moving them.
  */
 
+#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/list_sort.h>
 #include "ubifs.h"
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index e589fed..77d5cf4 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -51,6 +51,7 @@
  */
 
 #include <linux/crc32.h>
+#include <linux/slab.h>
 #include "ubifs.h"
 
 /**
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index b2792e8..ad7f67b 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -46,6 +46,7 @@
 #include "ubifs.h"
 #include <linux/crc16.h>
 #include <linux/math64.h>
+#include <linux/slab.h>
 
 /**
  * do_calc_lpt_geom - calculate sizes for the LPT area.
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 8cbfb82..13cb7a4 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -26,6 +26,7 @@
  */
 
 #include <linux/crc16.h>
+#include <linux/slab.h>
 #include "ubifs.h"
 
 /**
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 868a55e..109c6ea 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/crc32.h>
+#include <linux/slab.h>
 #include "ubifs.h"
 
 /**
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 57085e4..96cb62c 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -27,6 +27,7 @@
  */
 
 #include "ubifs.h"
+#include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/math64.h>
 
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e5b1a7d..2194915 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/crc32.h>
+#include <linux/slab.h>
 #include "ubifs.h"
 
 /*
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index b2d9763..bd2542d 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -28,6 +28,7 @@
 #include <linux/fs.h>
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 195830f..c74400f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -56,6 +56,7 @@
  */
 
 #include "ubifs.h"
+#include <linux/slab.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 4b540ee..745eb20 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -24,7 +24,6 @@
 
 #include <linux/fs.h>
 #include <linux/string.h>
-#include <linux/slab.h>
 #include <linux/buffer_head.h>
 
 uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 852e918..1606478 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -26,7 +26,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/stat.h>
-#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index cefa8c8..d03a90b 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -24,6 +24,7 @@
 #include <linux/string.h>	/* for memset */
 #include <linux/nls.h>
 #include <linux/crc-itu-t.h>
+#include <linux/slab.h>
 
 #include "udf_sb.h"
 
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index 05ac0fe..8d5a506 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -6,9 +6,9 @@
  */
 
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/gfp.h>
 
 
 /*
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index bc74055..666c9db 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -17,6 +17,7 @@
  */
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index bf85bbe..a7bc925 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -22,6 +22,7 @@
 #include "xfs_inode.h"
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
+#include <linux/slab.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 9962850..0f8b996 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -40,6 +40,7 @@
 #include "xfs_vnodeops.h"
 #include "xfs_trace.h"
 #include "xfs_bmap.h"
+#include <linux/gfp.h>
 #include <linux/mpage.h>
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index bd111b7..44c2b0e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -18,7 +18,7 @@
 #include "xfs.h"
 #include <linux/stddef.h>
 #include <linux/errno.h>
-#include <linux/slab.h>
+#include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4ea1ee1..7b26cc2 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -58,6 +58,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/exportfs.h>
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0bf6d61..593c05b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/ioctl.h>
 #include <linux/mount.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include "xfs.h"
 #include "xfs_fs.h"
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 61a9960..e65a793 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -56,6 +56,7 @@
 #include <linux/security.h>
 #include <linux/falloc.h>
 #include <linux/fiemap.h>
+#include <linux/slab.h>
 
 /*
  * Bring the timestamps in the XFS inode uptodate.
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 71345a3..52e06b4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -61,6 +61,7 @@
 
 #include <linux/namei.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/mempool.h>
 #include <linux/writeback.h>
-- 
cgit v1.1


From e05c378f4973674a16d5b9636f2310cf88aca5f2 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 30 Mar 2010 18:25:17 +0200
Subject: [LogFS] Remove unused method

All callers are long gone.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/logfs.h     |  1 -
 fs/logfs/readwrite.c | 21 ---------------------
 fs/logfs/segment.c   |  6 ------
 3 files changed, 28 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b84b0ee..97195b9 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -305,7 +305,6 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
 		level_t level, int child_no, __be64 val);
 struct logfs_block_ops {
 	void	(*write_block)(struct logfs_block *block);
-	gc_level_t	(*block_level)(struct logfs_block *block);
 	void	(*free_block)(struct super_block *sb, struct logfs_block*block);
 	int	(*write_alias)(struct super_block *sb,
 			struct logfs_block *block,
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index c3a3a68..3659c37 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -429,25 +429,6 @@ static void inode_write_block(struct logfs_block *block)
 	}
 }
 
-static gc_level_t inode_block_level(struct logfs_block *block)
-{
-	BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
-	return GC_LEVEL(LOGFS_MAX_LEVELS);
-}
-
-static gc_level_t indirect_block_level(struct logfs_block *block)
-{
-	struct page *page;
-	struct inode *inode;
-	u64 bix;
-	level_t level;
-
-	page = block->page;
-	inode = page->mapping->host;
-	logfs_unpack_index(page->index, &bix, &level);
-	return expand_level(inode->i_ino, level);
-}
-
 /*
  * This silences a false, yet annoying gcc warning.  I hate it when my editor
  * jumps into bitops.h each time I recompile this file.
@@ -586,14 +567,12 @@ static void indirect_free_block(struct super_block *sb,
 
 static struct logfs_block_ops inode_block_ops = {
 	.write_block = inode_write_block,
-	.block_level = inode_block_level,
 	.free_block = inode_free_block,
 	.write_alias = inode_write_alias,
 };
 
 struct logfs_block_ops indirect_block_ops = {
 	.write_block = indirect_write_block,
-	.block_level = indirect_block_level,
 	.free_block = indirect_free_block,
 	.write_alias = indirect_write_alias,
 };
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 0ecd8f0..02db22e 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -182,14 +182,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
 	return 0;
 }
 
-static gc_level_t btree_block_level(struct logfs_block *block)
-{
-	return expand_level(block->ino, block->level);
-}
-
 static struct logfs_block_ops btree_block_ops = {
 	.write_block	= btree_write_block,
-	.block_level	= btree_block_level,
 	.free_block	= __free_block,
 	.write_alias	= btree_write_alias,
 };
-- 
cgit v1.1


From efd647f744f3cf504ed83580274bd4b6918139fe Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 30 Mar 2010 13:21:31 +0800
Subject: ocfs2_dlmfs: User DLM_* when decoding file open flags.

In commit 0016eedc4185a3cd7e578b027a6e69001b85d6c4, we have
changed dlmfs to use stackglue. So when use DLM* when we
decode dlm flags from open level.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmfs/dlmfs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de15..a99d1ea 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
  * O_RDONLY -> PRMODE level
  * O_WRONLY -> EXMODE level
  *
- * O_NONBLOCK -> LKM_NOQUEUE
+ * O_NONBLOCK -> NOQUEUE
  */
 static int dlmfs_decode_open_flags(int open_flags,
 				   int *level,
 				   int *flags)
 {
 	if (open_flags & (O_WRONLY|O_RDWR))
-		*level = LKM_EXMODE;
+		*level = DLM_LOCK_EX;
 	else
-		*level = LKM_PRMODE;
+		*level = DLM_LOCK_PR;
 
 	*flags = 0;
 	if (open_flags & O_NONBLOCK)
-		*flags |= LKM_NOQUEUE;
+		*flags |= DLM_LKF_NOQUEUE;
 
 	return 0;
 }
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
 		 * to be able userspace to be able to distinguish a
 		 * valid lock request from one that simply couldn't be
 		 * granted. */
-		if (flags & LKM_NOQUEUE && status == -EAGAIN)
+		if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
 			status = -ETXTBSY;
 		kfree(fp);
 		goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
 	status = 0;
 	if (fp) {
 		level = fp->fp_lock_level;
-		if (level != LKM_IVMODE)
+		if (level != DLM_LOCK_IV)
 			user_dlm_cluster_unlock(&ip->ip_lockres, level);
 
 		kfree(fp);
-- 
cgit v1.1


From a03ab788d070c256eff1ac24cf0e3bc2ca148096 Mon Sep 17 00:00:00 2001
From: Coly Li <coly.li@suse.de>
Date: Fri, 26 Mar 2010 05:15:12 +0800
Subject: ocfs2: one more warning fix in ocfs2_file_aio_write(), v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes another compiling warning in ocfs2_file_aio_write() like this,
    fs/ocfs2/file.c: In function ‘ocfs2_file_aio_write’:
    fs/ocfs2/file.c:2026: warning: suggest parentheses around ‘&&’ within ‘||’

As Joel suggested, '!ret' is unary, this version removes the wrap from '!ret'.

Signed-off-by: Coly Li <coly.li@suse.de>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc..2b4235c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2021,9 +2021,9 @@ out_dio:
 		if (ret < 0)
 			written = ret;
 
-		if (!ret && (old_size != i_size_read(inode) ||
-		    old_clusters != OCFS2_I(inode)->ip_clusters ||
-		    has_refcount)) {
+		if (!ret && ((old_size != i_size_read(inode)) ||
+			     (old_clusters != OCFS2_I(inode)->ip_clusters) ||
+			     has_refcount)) {
 			ret = jbd2_journal_force_commit(osb->journal->j_journal);
 			if (ret < 0)
 				written = ret;
-- 
cgit v1.1


From 428257f8870f0e72e85ce782d091fa1f366de7df Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Wed, 24 Mar 2010 22:40:44 +0800
Subject: ocfs2: Check the owner of a lockres inside the spinlock

The checking of lockres owner in dlm_update_lvb() is not inside spinlock
protection. I don't see problem in current call path of dlm_update_lvb().
But just for code robustness.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmast.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index dccc439..b7a25ef 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -185,9 +185,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	BUG_ON(!lksb);
 
 	/* only updates if this node masters the lockres */
+	spin_lock(&res->spinlock);
 	if (res->owner == dlm->node_num) {
-
-		spin_lock(&res->spinlock);
 		/* check the lksb flags for the direction */
 		if (lksb->flags & DLM_LKSB_GET_LVB) {
 			mlog(0, "getting lvb from lockres for %s node\n",
@@ -202,8 +201,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
  		 * here. In the future we might want to clear it at the time
  		 * the put is actually done.
 		 */
-		spin_unlock(&res->spinlock);
 	}
+	spin_unlock(&res->spinlock);
 
 	/* reset any lvb flags on the lksb */
 	lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
-- 
cgit v1.1


From b7b7fa43103a9fb30dbcc60cbd5161fdfc25f904 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@jeffreymahoney.com>
Date: Mon, 29 Mar 2010 15:12:39 -0400
Subject: reiserfs: Fix locking BUG during mount failure

Commit 8ebc423238341b52912c7295b045a32477b33f09 (reiserfs: kill-the-BKL)
introduced a bug in the mount failure case.

The error label releases the lock before calling journal_release_error,
but it requires that the lock be held. do_journal_release unlocks and
retakes it. When it releases it without it held, we trigger a BUG().

The error_alloc label skips the unlock since the lock isn't held yet
but none of the other conditions that are clean up exist yet either.

This patch returns immediately after the kzalloc failure and moves
the reiserfs_write_unlock after the journal_release_error call.

This was reported in https://bugzilla.novell.com/show_bug.cgi?id=591807

Reported-by:  Thomas Siedentopf <thomas.siedentopf@novell.com>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Thomas Siedentopf <thomas.siedentopf@novell.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: 2.6.33.x <stable@kernel.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 fs/reiserfs/super.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 04bf5d7..ab19051 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1618,10 +1618,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	save_mount_options(s, data);
 
 	sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
-	if (!sbi) {
-		errval = -ENOMEM;
-		goto error_alloc;
-	}
+	if (!sbi)
+		return -ENOMEM;
 	s->s_fs_info = sbi;
 	/* Set default values for options: non-aggressive tails, RO on errors */
 	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
@@ -1878,12 +1876,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	return (0);
 
 error:
-	reiserfs_write_unlock(s);
-error_alloc:
 	if (jinit_done) {	/* kill the commit thread, free journal ram */
 		journal_release_error(NULL, s);
 	}
 
+	reiserfs_write_unlock(s);
+
 	reiserfs_free_bitmap_cache(s);
 	if (SB_BUFFER_WITH_SB(s))
 		brelse(SB_BUFFER_WITH_SB(s));
-- 
cgit v1.1


From 9358c6d4c0264b1572554c49c4b92673ea9a5c72 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 30 Mar 2010 13:54:41 -0700
Subject: ceph: fix dentry rehashing on virtual .snap dir

If a lookup fails on the magic .snap directory, we bind it to a magic
snap directory inode in ceph_lookup_finish().  That code assumes the dentry
is unhashed, but a recent server-side change started returning NULL leases
on lookup failure, causing the .snap dentry to be hashed and NULL by
ceph_fill_trace().

This causes dentry hash chain corruption, or a dies when d_rehash()
includes
	BUG_ON(!d_unhashed(entry));

So, avoid processing the NULL dentry lease if it the dentry matches the
snapdir name in ceph_fill_trace().  That allows the lookup completion to
properly bind it to the snapdir inode.  BUG there if dentry is hashed to
be sure.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/dir.c   |  1 +
 fs/ceph/inode.c | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8a9116e..aed8fda 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -488,6 +488,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 		struct inode *inode = ceph_get_snapdir(parent);
 		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
 		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
+		BUG_ON(!d_unhashed(dentry));
 		d_add(dentry, inode);
 		err = 0;
 	}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index aca82d5..26f883c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -886,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 	struct inode *in = NULL;
 	struct ceph_mds_reply_inode *ininfo;
 	struct ceph_vino vino;
+	struct ceph_client *client = ceph_sb_to_client(sb);
 	int i = 0;
 	int err = 0;
 
@@ -949,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			return err;
 	}
 
-	if (rinfo->head->is_dentry && !req->r_aborted) {
+	/*
+	 * ignore null lease/binding on snapdir ENOENT, or else we
+	 * will have trouble splicing in the virtual snapdir later
+	 */
+	if (rinfo->head->is_dentry && !req->r_aborted &&
+	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
+					       client->mount_args->snapdir_name,
+					       req->r_dentry->d_name.len))) {
 		/*
 		 * lookup link rename   : null -> possibly existing inode
 		 * mknod symlink mkdir  : null -> new inode
-- 
cgit v1.1


From 2f3014fc2ab1e25c36531e19164c48182c168995 Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Thu, 25 Mar 2010 17:22:45 +0000
Subject: Btrfs: remove duplicate include in ioctl.c

fs/btrfs/ioctl.c: ctree.h is included more than once.

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2845c6c..5c9f8b3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -48,7 +48,6 @@
 #include "print-tree.h"
 #include "volumes.h"
 #include "locking.h"
-#include "ctree.h"
 
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
-- 
cgit v1.1


From 90d2c51dbb4db05c040bc7db264bb7ab35e35455 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Thu, 25 Mar 2010 12:37:12 +0000
Subject: Btrfs: add NULL check for do_walk_down()

btrfs_find_create_tree_block() may return NULL, so we must check the returned
value, or we will access a NULL pointer.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1727b26..503a18e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5205,6 +5205,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 	next = btrfs_find_tree_block(root, bytenr, blocksize);
 	if (!next) {
 		next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+		if (!next)
+			return -ENOMEM;
 		reada = 1;
 	}
 	btrfs_tree_lock(next);
@@ -5417,7 +5419,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
 		if (ret > 0) {
 			path->slots[level]++;
 			continue;
-		}
+		} else if (ret < 0)
+			return ret;
 		level = wc->level;
 	}
 	return 0;
-- 
cgit v1.1


From 471fa17dff556ad38caf26de097c0630530d8cbe Mon Sep 17 00:00:00 2001
From: Zhao Lei <zhaolei@cn.fujitsu.com>
Date: Thu, 25 Mar 2010 12:35:14 +0000
Subject: Btrfs: Remove unnecessary finish_wait() in wait_current_trans()

We only need to call finish_wait() after wait loop.

By the way, this patch makes code of waiting loop similar to
example in wait.h(no functional change)

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/transaction.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2d654c1..4305428 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -147,18 +147,13 @@ static void wait_current_trans(struct btrfs_root *root)
 		while (1) {
 			prepare_to_wait(&root->fs_info->transaction_wait, &wait,
 					TASK_UNINTERRUPTIBLE);
-			if (cur_trans->blocked) {
-				mutex_unlock(&root->fs_info->trans_mutex);
-				schedule();
-				mutex_lock(&root->fs_info->trans_mutex);
-				finish_wait(&root->fs_info->transaction_wait,
-					    &wait);
-			} else {
-				finish_wait(&root->fs_info->transaction_wait,
-					    &wait);
+			if (!cur_trans->blocked)
 				break;
-			}
+			mutex_unlock(&root->fs_info->trans_mutex);
+			schedule();
+			mutex_lock(&root->fs_info->trans_mutex);
 		}
+		finish_wait(&root->fs_info->transaction_wait, &wait);
 		put_transaction(cur_trans);
 	}
 }
-- 
cgit v1.1


From ab59381ea43f81c977cbd09add26950aaf6cb9fe Mon Sep 17 00:00:00 2001
From: Zhao Lei <zhaolei@cn.fujitsu.com>
Date: Thu, 25 Mar 2010 12:34:49 +0000
Subject: Btrfs: Add error handle for btrfs_search_slot() in
 btrfs_read_chunk_tree()

We need to check return value of btrfs_search_slot() in
btrfs_read_chunk_tree() and do corresponding error handing.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9df8e3f..c6c8009 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3389,6 +3389,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
 	key.type = 0;
 again:
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto error;
 	while (1) {
 		leaf = path->nodes[0];
 		slot = path->slots[0];
-- 
cgit v1.1


From f3eae7e8a5ed124bbc781e18ea10c21856017322 Mon Sep 17 00:00:00 2001
From: Zhao Lei <zhaolei@cn.fujitsu.com>
Date: Thu, 25 Mar 2010 12:32:59 +0000
Subject: Btrfs: Simplify num_stripes's calculation logical for
 __btrfs_alloc_chunk()

We can use this simple method to make source more readable.

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c6c8009..bf3bec3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2198,9 +2198,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 		min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
-		num_stripes = min_t(u64, 2, fs_devices->rw_devices);
-		if (num_stripes < 2)
+		if (fs_devices->rw_devices < 2)
 			return -ENOSPC;
+		num_stripes = 2;
 		min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
-- 
cgit v1.1


From 683be16eb6e19a35aca2473668652259ed074094 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 20 Mar 2010 11:24:48 +0000
Subject: Btrfs: dereferencing freed memory

The original code dereferenced range on the next line.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5c9f8b3..874d36e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1374,6 +1374,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 					   sizeof(*range))) {
 				ret = -EFAULT;
 				kfree(range);
+				goto out;
 			}
 			/* compression requires us to start the IO */
 			if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
-- 
cgit v1.1


From c2b96929e2ca6914cf4a66cd8fe2a34c4a98277f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 20 Mar 2010 11:24:15 +0000
Subject: Btrfs: handle kmalloc() failure in inode lookup ioctl

Return -ENOMEM if kmalloc() fails.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 874d36e..74d8913 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1211,6 +1211,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 		return -EPERM;
 
 	args = kmalloc(sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+
 	if (copy_from_user(args, argp, sizeof(*args))) {
 		kfree(args);
 		return -EFAULT;
-- 
cgit v1.1


From 6cf8bfbf5e88edfb09a2bf0631a067060f534592 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 20 Mar 2010 11:22:10 +0000
Subject: Btrfs: check btrfs_get_extent return for IS_ERR()

btrfs_get_extent() never returns NULL, only a valid pointer or ERR_PTR()

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 74d8913..2b7dd88 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -510,7 +510,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
 		em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
 		unlock_extent(io_tree, start, start + len - 1, GFP_NOFS);
 
-		if (!em)
+		if (IS_ERR(em))
 			return 0;
 	}
 
-- 
cgit v1.1


From 1b1d1f6625e517a08640ddb4b8f8a0e025243fe3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Mar 2010 20:49:55 +0000
Subject: Btrfs: fail to mount if we have problems reading the block groups

We don't actually check the return value of btrfs_read_block_groups, so we can
possibly succeed to mount, but then fail to say read the superblock xattr for
selinux which will cause the vfs code to deactivate the super.

This is a problem because in find_free_extent we just assume that we
will find the right space_info for the allocation we want.  But if we
failed to read the block groups, we won't have setup any space_info's,
and we'll hit a NULL pointer deref in find_free_extent.

This patch fixes that problem by checking the return value of
btrfs_read_block_groups, and failing out properly.  I've also added a
check in find_free_extent so if for some reason we don't find an
appropriate space_info, we just return -ENOSPC.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/disk-io.c     | 11 ++++++++---
 fs/btrfs/extent-tree.c |  5 ++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 11d0ad3..5ae1c0f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1922,7 +1922,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
 	csum_root->track_dirty = 1;
 
-	btrfs_read_block_groups(extent_root);
+	ret = btrfs_read_block_groups(extent_root);
+	if (ret) {
+		printk(KERN_ERR "Failed to read block groups: %d\n", ret);
+		goto fail_block_groups;
+	}
 
 	fs_info->generation = generation;
 	fs_info->last_trans_committed = generation;
@@ -1932,7 +1936,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
 					       "btrfs-cleaner");
 	if (IS_ERR(fs_info->cleaner_kthread))
-		goto fail_csum_root;
+		goto fail_block_groups;
 
 	fs_info->transaction_kthread = kthread_run(transaction_kthread,
 						   tree_root,
@@ -2020,7 +2024,8 @@ fail_cleaner:
 	filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 
-fail_csum_root:
+fail_block_groups:
+	btrfs_free_block_groups(fs_info);
 	free_extent_buffer(csum_root->node);
 	free_extent_buffer(csum_root->commit_root);
 fail_dev_root:
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 503a18e..4c91035 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4170,6 +4170,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	ins->offset = 0;
 
 	space_info = __find_space_info(root->fs_info, data);
+	if (!space_info) {
+		printk(KERN_ERR "No space info for %d\n", data);
+		return -ENOSPC;
+	}
 
 	if (orig_root->ref_cows || empty_size)
 		allowed_chunk_alloc = 1;
@@ -7372,7 +7376,6 @@ static int find_first_block_group(struct btrfs_root *root,
 		}
 		path->slots[0]++;
 	}
-	ret = -ENOENT;
 out:
 	return ret;
 }
-- 
cgit v1.1


From 287a0ab91d25ca982f895a76402e5893b47ed7a6 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Mar 2010 18:07:23 +0000
Subject: Btrfs: kill max_extent mount option

As Yan pointed out, theres not much reason for all this complicated math to
account for file extents being split up into max_extent chunks, since they are
likely to all end up in the same leaf anyway.  Since there isn't much reason to
use max_extent, just remove the option altogether so we have one less thing we
need to test.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h        |  1 -
 fs/btrfs/disk-io.c      |  1 -
 fs/btrfs/extent-tree.c  |  4 ++--
 fs/btrfs/inode.c        | 59 ++++---------------------------------------------
 fs/btrfs/ordered-data.c |  6 +++--
 fs/btrfs/super.c        | 23 +++----------------
 6 files changed, 13 insertions(+), 81 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1111584..ae8c409 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -834,7 +834,6 @@ struct btrfs_fs_info {
 	u64 last_trans_log_full_commit;
 	u64 open_ioctl_trans;
 	unsigned long mount_opt;
-	u64 max_extent;
 	u64 max_inline;
 	u64 alloc_start;
 	struct btrfs_transaction *running_transaction;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5ae1c0f..6632e5c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1634,7 +1634,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	atomic_set(&fs_info->async_submit_draining, 0);
 	atomic_set(&fs_info->nr_async_bios, 0);
 	fs_info->sb = sb;
-	fs_info->max_extent = (u64)-1;
 	fs_info->max_inline = 8192 * 1024;
 	fs_info->metadata_ratio = 0;
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4c91035..5f6bc28 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2846,7 +2846,7 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
 	}
 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
-	BTRFS_I(inode)->reserved_extents--;
+	BTRFS_I(inode)->reserved_extents -= num_items;
 	BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
 
 	if (meta_sinfo->bytes_delalloc < num_bytes) {
@@ -3111,7 +3111,7 @@ again:
 		return -ENOSPC;
 	}
 
-	BTRFS_I(inode)->reserved_extents++;
+	BTRFS_I(inode)->reserved_extents += num_items;
 	check_force_delalloc(meta_sinfo);
 	spin_unlock(&meta_sinfo->lock);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2a337a0..a85b90c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -796,7 +796,7 @@ static noinline int cow_file_range(struct inode *inode,
 	while (disk_num_bytes > 0) {
 		unsigned long op;
 
-		cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent);
+		cur_alloc_size = disk_num_bytes;
 		ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
 					   root->sectorsize, 0, alloc_hint,
 					   (u64)-1, &ins, 1);
@@ -1227,30 +1227,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 static int btrfs_split_extent_hook(struct inode *inode,
 				    struct extent_state *orig, u64 split)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	u64 size;
-
 	if (!(orig->state & EXTENT_DELALLOC))
 		return 0;
 
-	size = orig->end - orig->start + 1;
-	if (size > root->fs_info->max_extent) {
-		u64 num_extents;
-		u64 new_size;
-
-		new_size = orig->end - split + 1;
-		num_extents = div64_u64(size + root->fs_info->max_extent - 1,
-					root->fs_info->max_extent);
-
-		/*
-		 * if we break a large extent up then leave oustanding_extents
-		 * be, since we've already accounted for the large extent.
-		 */
-		if (div64_u64(new_size + root->fs_info->max_extent - 1,
-			      root->fs_info->max_extent) < num_extents)
-			return 0;
-	}
-
 	spin_lock(&BTRFS_I(inode)->accounting_lock);
 	BTRFS_I(inode)->outstanding_extents++;
 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1268,38 +1247,10 @@ static int btrfs_merge_extent_hook(struct inode *inode,
 				   struct extent_state *new,
 				   struct extent_state *other)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	u64 new_size, old_size;
-	u64 num_extents;
-
 	/* not delalloc, ignore it */
 	if (!(other->state & EXTENT_DELALLOC))
 		return 0;
 
-	old_size = other->end - other->start + 1;
-	if (new->start < other->start)
-		new_size = other->end - new->start + 1;
-	else
-		new_size = new->end - other->start + 1;
-
-	/* we're not bigger than the max, unreserve the space and go */
-	if (new_size <= root->fs_info->max_extent) {
-		spin_lock(&BTRFS_I(inode)->accounting_lock);
-		BTRFS_I(inode)->outstanding_extents--;
-		spin_unlock(&BTRFS_I(inode)->accounting_lock);
-		return 0;
-	}
-
-	/*
-	 * If we grew by another max_extent, just return, we want to keep that
-	 * reserved amount.
-	 */
-	num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
-				root->fs_info->max_extent);
-	if (div64_u64(new_size + root->fs_info->max_extent - 1,
-		      root->fs_info->max_extent) > num_extents)
-		return 0;
-
 	spin_lock(&BTRFS_I(inode)->accounting_lock);
 	BTRFS_I(inode)->outstanding_extents--;
 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1328,6 +1279,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
 		BTRFS_I(inode)->outstanding_extents++;
 		spin_unlock(&BTRFS_I(inode)->accounting_lock);
 		btrfs_delalloc_reserve_space(root, inode, end - start + 1);
+
 		spin_lock(&root->fs_info->delalloc_lock);
 		BTRFS_I(inode)->delalloc_bytes += end - start + 1;
 		root->fs_info->delalloc_bytes += end - start + 1;
@@ -1356,6 +1308,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
 
 		if (bits & EXTENT_DO_ACCOUNTING) {
 			spin_lock(&BTRFS_I(inode)->accounting_lock);
+			WARN_ON(!BTRFS_I(inode)->outstanding_extents);
 			BTRFS_I(inode)->outstanding_extents--;
 			spin_unlock(&BTRFS_I(inode)->accounting_lock);
 			btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -5384,7 +5337,6 @@ free:
 void btrfs_drop_inode(struct inode *inode)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-
 	if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
 		generic_delete_inode(inode);
 	else
@@ -5788,18 +5740,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_key ins;
-	u64 alloc_size;
 	u64 cur_offset = start;
 	u64 num_bytes = end - start;
 	int ret = 0;
 	u64 i_size;
 
 	while (num_bytes > 0) {
-		alloc_size = min(num_bytes, root->fs_info->max_extent);
-
 		trans = btrfs_start_transaction(root, 1);
 
-		ret = btrfs_reserve_extent(trans, root, alloc_size,
+		ret = btrfs_reserve_extent(trans, root, num_bytes,
 					   root->sectorsize, 0, alloc_hint,
 					   (u64)-1, &ins, 1);
 		if (ret) {
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a8ffecd..5c99882 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -303,6 +303,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
 				struct btrfs_ordered_extent *entry)
 {
 	struct btrfs_ordered_inode_tree *tree;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct rb_node *node;
 
 	tree = &BTRFS_I(inode)->ordered_tree;
@@ -312,12 +313,13 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
 	set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
 
 	spin_lock(&BTRFS_I(inode)->accounting_lock);
+	WARN_ON(!BTRFS_I(inode)->outstanding_extents);
 	BTRFS_I(inode)->outstanding_extents--;
 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
 	btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
 					      inode, 1);
 
-	spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
+	spin_lock(&root->fs_info->ordered_extent_lock);
 	list_del_init(&entry->root_extent_list);
 
 	/*
@@ -329,7 +331,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
 	    !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
 		list_del_init(&BTRFS_I(inode)->ordered_operations);
 	}
-	spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
+	spin_unlock(&root->fs_info->ordered_extent_lock);
 
 	return 0;
 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9ac612e..d11b12f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -64,10 +64,9 @@ static void btrfs_put_super(struct super_block *sb)
 
 enum {
 	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
-	Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start,
-	Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool,
-	Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
-	Opt_flushoncommit,
+	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
+	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
+	Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
 	Opt_discard, Opt_err,
 };
 
@@ -79,7 +78,6 @@ static match_table_t tokens = {
 	{Opt_nodatasum, "nodatasum"},
 	{Opt_nodatacow, "nodatacow"},
 	{Opt_nobarrier, "nobarrier"},
-	{Opt_max_extent, "max_extent=%s"},
 	{Opt_max_inline, "max_inline=%s"},
 	{Opt_alloc_start, "alloc_start=%s"},
 	{Opt_thread_pool, "thread_pool=%d"},
@@ -188,18 +186,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 				       info->thread_pool_size);
 			}
 			break;
-		case Opt_max_extent:
-			num = match_strdup(&args[0]);
-			if (num) {
-				info->max_extent = memparse(num, NULL);
-				kfree(num);
-
-				info->max_extent = max_t(u64,
-					info->max_extent, root->sectorsize);
-				printk(KERN_INFO "btrfs: max_extent at %llu\n",
-				       (unsigned long long)info->max_extent);
-			}
-			break;
 		case Opt_max_inline:
 			num = match_strdup(&args[0]);
 			if (num) {
@@ -529,9 +515,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_puts(seq, ",nodatacow");
 	if (btrfs_test_opt(root, NOBARRIER))
 		seq_puts(seq, ",nobarrier");
-	if (info->max_extent != (u64)-1)
-		seq_printf(seq, ",max_extent=%llu",
-			   (unsigned long long)info->max_extent);
 	if (info->max_inline != 8192 * 1024)
 		seq_printf(seq, ",max_inline=%llu",
 			   (unsigned long long)info->max_inline);
-- 
cgit v1.1


From 0cad8a1130f77c7c445e3298c0e3593b3c0ef439 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 17 Mar 2010 20:45:56 +0000
Subject: Btrfs: fix chunk allocate size calculation

If the amount of free space left in a device is less than what we think should
be the minimum size, just ignore the minimum size and use the amount we have.  I
ran into this running tests on a 600mb volume, the chunk allocator wouldn't let
me allocate the last 52mb of the disk for data because we want to have at least
64mb chunks for data.  This patch fixes that problem.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bf3bec3..9bf1f58 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2244,8 +2244,10 @@ again:
 		do_div(calc_size, stripe_len);
 		calc_size *= stripe_len;
 	}
+
 	/* we don't want tiny stripes */
-	calc_size = max_t(u64, min_stripe_size, calc_size);
+	if (!looped)
+		calc_size = max_t(u64, min_stripe_size, calc_size);
 
 	do_div(calc_size, stripe_len);
 	calc_size *= stripe_len;
-- 
cgit v1.1


From 753234007f4ac2c96921cfb19ec1ba535ac29790 Mon Sep 17 00:00:00 2001
From: Li Hong <lihong.hi@gmail.com>
Date: Wed, 31 Mar 2010 15:41:00 +0800
Subject: nilfs2: fix a wrong type conversion in nilfs_ioctl()

(void * __user *) should be (void __user *)

Signed-off-by: Li Hong <lihong.hi@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 313d0a2..c446017 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -648,7 +648,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
 long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = filp->f_dentry->d_inode;
-	void __user *argp = (void * __user *)arg;
+	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
 	case NILFS_IOCTL_CHANGE_CPMODE:
-- 
cgit v1.1


From 30d1872d9eb3663b4cf7bdebcbf5cd465674cced Mon Sep 17 00:00:00 2001
From: Nikolaus Schulz <microschulz@web.de>
Date: Thu, 1 Apr 2010 02:21:10 +0900
Subject: fat: fix buffer overflow in vfat_create_shortname()

When using the string representation of a random counter as part of the base
name, ensure that it is no longer than 4 bytes.

Since we are repeatedly decrementing the counter in a loop until we have found a
unique base name, the counter may wrap around zero; therefore, it is not enough
to mask its higher bits before entering the loop, this must be done inside the
loop.

[hirofumi@mail.parknet.co.jp: use snprintf()]
Signed-off-by: Nikolaus Schulz <microschulz@web.de>
Cc: stable@kernel.org
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/namei_vfat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c1ef501..6fcc7e7 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -309,7 +309,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
 {
 	struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options;
 	wchar_t *ip, *ext_start, *end, *name_start;
-	unsigned char base[9], ext[4], buf[8], *p;
+	unsigned char base[9], ext[4], buf[5], *p;
 	unsigned char charbuf[NLS_MAX_CHARSET_SIZE];
 	int chl, chi;
 	int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen;
@@ -467,7 +467,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
 			return 0;
 	}
 
-	i = jiffies & 0xffff;
+	i = jiffies;
 	sz = (jiffies >> 16) & 0x7;
 	if (baselen > 2) {
 		baselen = numtail2_baselen;
@@ -476,7 +476,7 @@ static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
 	name_res[baselen + 4] = '~';
 	name_res[baselen + 5] = '1' + sz;
 	while (1) {
-		sprintf(buf, "%04X", i);
+		snprintf(buf, sizeof(buf), "%04X", i & 0xffff);
 		memcpy(&name_res[baselen], buf, 4);
 		if (vfat_find_form(dir, name_res) < 0)
 			break;
-- 
cgit v1.1


From a42ab8e1a37257da37e0f018e707bf365ac24531 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 31 Mar 2010 18:25:44 -0700
Subject: ocfs2: Compute metaecc for superblocks during online resize.

Online resize writes out the new superblock and its backups directly.
The metaecc data wasn't being recomputed.  Let's do that directly.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>[
Cc: stable@kernel.org
---
 fs/ocfs2/buffer_head_io.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 21c808f..b18c6d6 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -407,6 +407,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 				struct buffer_head *bh)
 {
 	int ret = 0;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 
 	mlog_entry_void();
 
@@ -426,6 +427,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
 
 	get_bh(bh); /* for end_buffer_write_sync() */
 	bh->b_end_io = end_buffer_write_sync;
+	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
 	submit_bh(WRITE, bh);
 
 	wait_on_buffer(bh);
-- 
cgit v1.1


From b95c35e76b29ba812e5dabdd91592e25ec640e93 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 1 Apr 2010 15:13:57 +0200
Subject: oom: fix the unsafe usage of badness() in proc_oom_score()

proc_oom_score(task) has a reference to task_struct, but that is all.
If this task was already released before we take tasklist_lock

	- we can't use task->group_leader, it points to nowhere

	- it is not safe to call badness() even if this task is
	  ->group_leader, has_intersects_mems_allowed() assumes
	  it is safe to iterate over ->thread_group list.

	- even worse, badness() can hit ->signal == NULL

Add the pid_alive() check to ensure __unhash_process() was not called.

Also, use "task" instead of task->group_leader. badness() should return
the same result for any sub-thread. Currently this is not true, but
this should be changed anyway.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index a731084..b1f6e62 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -442,12 +442,13 @@ static const struct file_operations proc_lstats_operations = {
 unsigned long badness(struct task_struct *p, unsigned long uptime);
 static int proc_oom_score(struct task_struct *task, char *buffer)
 {
-	unsigned long points;
+	unsigned long points = 0;
 	struct timespec uptime;
 
 	do_posix_clock_monotonic_gettime(&uptime);
 	read_lock(&tasklist_lock);
-	points = badness(task->group_leader, uptime.tv_sec);
+	if (pid_alive(task))
+		points = badness(task, uptime.tv_sec);
 	read_unlock(&tasklist_lock);
 	return sprintf(buffer, "%lu\n", points);
 }
-- 
cgit v1.1


From 80e755fedebc8de0599a79efad2c656503df2e62 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 31 Mar 2010 21:52:10 -0700
Subject: ceph: allow writeback of snapped pages older than 'oldest' snapc

On snap deletion, we don't regenerate ceph_cap_snaps for inodes with dirty
pages because deletion does not affect metadata writeback.  However, we
did run into problems when we went to write back the pages because the
'oldest' snapc is determined by the oldest cap_snap, and that may be the
newer snapc that reflects the deletion.  This caused confusion and an
infinite loop in ceph_update_writeable_page().

Change the snapc checks to allow writeback of any snapc that is equal to
OR older than the 'oldest' snapc.

When there are no cap_snaps, we were also using the realm's latest snapc
for writeback, which complicates ceph_put_wrbufffer_cap_refs().  Instead,
use i_head_snapc, the most snapc used for the most recent ('head') data.
This makes the writeback snapc (ceph_osd_request.r_snapc) _always_ match a
capsnap or i_head_snapc.

Also, in writepags_finish(), drop the snapc referenced by the _page_
and do not assume it matches the request snapc (it may not anymore).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ce8ef61..a313e9b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -356,8 +356,8 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
 			break;
 		}
 	}
-	if (!snapc && ci->i_snap_realm) {
-		snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
+	if (!snapc && ci->i_head_snapc) {
+		snapc = ceph_get_snap_context(ci->i_head_snapc);
 		dout(" head snapc %p has %d dirty pages\n",
 		     snapc, ci->i_wrbuffer_ref_head);
 	}
@@ -412,7 +412,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		dout("writepage %p page %p not dirty?\n", inode, page);
 		goto out;
 	}
-	if (snapc != get_oldest_context(inode, &snap_size)) {
+	if (snapc->seq > get_oldest_context(inode, &snap_size)->seq) {
 		dout("writepage %p page %p snapc %p not writeable - noop\n",
 		     inode, page, (void *)page->private);
 		/* we should only noop if called by kswapd */
@@ -557,9 +557,9 @@ static void writepages_finish(struct ceph_osd_request *req,
 			dout("inode %p skipping page %p\n", inode, page);
 			wbc->pages_skipped++;
 		}
+		ceph_put_snap_context((void *)page->private);
 		page->private = 0;
 		ClearPagePrivate(page);
-		ceph_put_snap_context(snapc);
 		dout("unlocking %d %p\n", i, page);
 		end_page_writeback(page);
 
@@ -617,7 +617,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	int range_whole = 0;
 	int should_loop = 1;
 	pgoff_t max_pages = 0, max_pages_ever = 0;
-	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL;
+	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
 	struct pagevec pvec;
 	int done = 0;
 	int rc = 0;
@@ -769,9 +769,10 @@ get_more_pages:
 			}
 
 			/* only if matching snap context */
-			if (snapc != (void *)page->private) {
-				dout("page snapc %p != oldest %p\n",
-				     (void *)page->private, snapc);
+			pgsnapc = (void *)page->private;
+			if (pgsnapc->seq > snapc->seq) {
+				dout("page snapc %p %lld > oldest %p %lld\n",
+				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
 				unlock_page(page);
 				if (!locked_pages)
 					continue; /* keep looking for snap */
@@ -935,8 +936,8 @@ static int ceph_update_writeable_page(struct file *file,
 	int pos_in_page = pos & ~PAGE_CACHE_MASK;
 	int end_in_page = pos_in_page + len;
 	loff_t i_size;
-	struct ceph_snap_context *snapc;
 	int r;
+	struct ceph_snap_context *snapc, *oldest;
 
 retry_locked:
 	/* writepages currently holds page lock, but if we change that later, */
@@ -946,16 +947,16 @@ retry_locked:
 	BUG_ON(!ci->i_snap_realm);
 	down_read(&mdsc->snap_rwsem);
 	BUG_ON(!ci->i_snap_realm->cached_context);
-	if (page->private &&
-	    (void *)page->private != ci->i_snap_realm->cached_context) {
+	snapc = (void *)page->private;
+	if (snapc && snapc != ci->i_head_snapc) {
 		/*
 		 * this page is already dirty in another (older) snap
 		 * context!  is it writeable now?
 		 */
-		snapc = get_oldest_context(inode, NULL);
+		oldest = get_oldest_context(inode, NULL);
 		up_read(&mdsc->snap_rwsem);
 
-		if (snapc != (void *)page->private) {
+		if (snapc->seq > oldest->seq) {
 			dout(" page %p snapc %p not current or oldest\n",
 			     page, (void *)page->private);
 			/*
-- 
cgit v1.1


From 6298a33757ba7361bb8f506c106daad77e5ac8cf Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 31 Mar 2010 22:01:38 -0700
Subject: ceph: fix snap context reference leaks

The get_oldest_context() helper takes a reference to the returned snap
context, but most callers weren't dropping that reference.  Fix them.

Also drop the unused locked __get_oldest_context() variant.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index a313e9b..41f1f71 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -336,16 +336,15 @@ out:
 /*
  * Get ref for the oldest snapc for an inode with dirty data... that is, the
  * only snap context we are allowed to write back.
- *
- * Caller holds i_lock.
  */
-static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
-						      u64 *snap_size)
+static struct ceph_snap_context *get_oldest_context(struct inode *inode,
+						    u64 *snap_size)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_snap_context *snapc = NULL;
 	struct ceph_cap_snap *capsnap = NULL;
 
+	spin_lock(&inode->i_lock);
 	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
 		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
 		     capsnap->context, capsnap->dirty_pages);
@@ -361,16 +360,6 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
 		dout(" head snapc %p has %d dirty pages\n",
 		     snapc, ci->i_wrbuffer_ref_head);
 	}
-	return snapc;
-}
-
-static struct ceph_snap_context *get_oldest_context(struct inode *inode,
-						    u64 *snap_size)
-{
-	struct ceph_snap_context *snapc = NULL;
-
-	spin_lock(&inode->i_lock);
-	snapc = __get_oldest_context(inode, snap_size);
 	spin_unlock(&inode->i_lock);
 	return snapc;
 }
@@ -391,7 +380,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	int len = PAGE_CACHE_SIZE;
 	loff_t i_size;
 	int err = 0;
-	struct ceph_snap_context *snapc;
+	struct ceph_snap_context *snapc, *oldest;
 	u64 snap_size = 0;
 	long writeback_stat;
 
@@ -412,13 +401,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		dout("writepage %p page %p not dirty?\n", inode, page);
 		goto out;
 	}
-	if (snapc->seq > get_oldest_context(inode, &snap_size)->seq) {
+	oldest = get_oldest_context(inode, &snap_size);
+	if (snapc->seq > oldest->seq) {
 		dout("writepage %p page %p snapc %p not writeable - noop\n",
 		     inode, page, (void *)page->private);
 		/* we should only noop if called by kswapd */
 		WARN_ON((current->flags & PF_MEMALLOC) == 0);
+		ceph_put_snap_context(oldest);
 		goto out;
 	}
+	ceph_put_snap_context(oldest);
 
 	/* is this a partial page at end of file? */
 	if (snap_size)
@@ -457,7 +449,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	ClearPagePrivate(page);
 	end_page_writeback(page);
 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-	ceph_put_snap_context(snapc);
+	ceph_put_snap_context(snapc);  /* page's reference */
 out:
 	return err;
 }
@@ -914,7 +906,10 @@ static int context_is_writeable_or_written(struct inode *inode,
 					   struct ceph_snap_context *snapc)
 {
 	struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
-	return !oldest || snapc->seq <= oldest->seq;
+	int ret = !oldest || snapc->seq <= oldest->seq;
+
+	ceph_put_snap_context(oldest);
+	return ret;
 }
 
 /*
@@ -957,13 +952,14 @@ retry_locked:
 		up_read(&mdsc->snap_rwsem);
 
 		if (snapc->seq > oldest->seq) {
+			ceph_put_snap_context(oldest);
 			dout(" page %p snapc %p not current or oldest\n",
-			     page, (void *)page->private);
+			     page, snapc);
 			/*
 			 * queue for writeback, and wait for snapc to
 			 * be writeable or written
 			 */
-			snapc = ceph_get_snap_context((void *)page->private);
+			snapc = ceph_get_snap_context(snapc);
 			unlock_page(page);
 			ceph_queue_writeback(inode);
 			r = wait_event_interruptible(ci->i_cap_wq,
@@ -973,6 +969,7 @@ retry_locked:
 				return r;
 			return -EAGAIN;
 		}
+		ceph_put_snap_context(oldest);
 
 		/* yay, writeable, do it now (without dropping page lock) */
 		dout(" page %p snapc %p not current, but oldest\n",
-- 
cgit v1.1


From 819ccbfa448403992ceafc05d6d7097aaa74d4c3 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 1 Apr 2010 09:33:46 -0700
Subject: ceph: fix leaked inode ref due to snap metadata writeback race

We create a ceph_cap_snap if there is dirty cap metadata (for writeback to
mds) OR dirty pages (for writeback to osd).  It is thus possible that the
metadata has been written back to the MDS but the OSD data has not when
the cap_snap is created.  This results in a cap_snap with dirty(caps) == 0.
The problem is that cap writeback to the MDS isn't necessary, and a
FLUSHSNAP cap op gets no ack from the MDS.  This leaves the cap_snap
attached to the inode along with its inode reference.

Fix the problem by dropping the cap_snap if it becomes 'complete' (all
pages written out) and dirty(caps) == 0 in ceph_put_wrbuffer_cap_refs().

Also, BUG() in __ceph_flush_snaps() if we encounter a cap_snap with
dirty(caps) == 0.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 42 ++++++++++++++++++++++++++++++++----------
 fs/ceph/snap.c | 10 ++++++----
 2 files changed, 38 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7d0a0d0..b6fdf01 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1204,6 +1204,12 @@ retry:
 		if (capsnap->dirty_pages || capsnap->writing)
 			continue;
 
+		/*
+		 * if cap writeback already occurred, we should have dropped
+		 * the capsnap in ceph_put_wrbuffer_cap_refs.
+		 */
+		BUG_ON(capsnap->dirty == 0);
+
 		/* pick mds, take s_mutex */
 		mds = __ceph_get_cap_mds(ci, &mseq);
 		if (session && session->s_mds != mds) {
@@ -2117,8 +2123,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 		}
 	spin_unlock(&inode->i_lock);
 
-	dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had),
-	     last ? "last" : "");
+	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
+	     last ? " last" : "", put ? " put" : "");
 
 	if (last && !flushsnaps)
 		ceph_check_caps(ci, 0, NULL);
@@ -2142,7 +2148,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 {
 	struct inode *inode = &ci->vfs_inode;
 	int last = 0;
-	int last_snap = 0;
+	int complete_capsnap = 0;
+	int drop_capsnap = 0;
 	int found = 0;
 	struct ceph_cap_snap *capsnap = NULL;
 
@@ -2165,19 +2172,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
 			if (capsnap->context == snapc) {
 				found = 1;
-				capsnap->dirty_pages -= nr;
-				last_snap = !capsnap->dirty_pages;
 				break;
 			}
 		}
 		BUG_ON(!found);
+		capsnap->dirty_pages -= nr;
+		if (capsnap->dirty_pages == 0) {
+			complete_capsnap = 1;
+			if (capsnap->dirty == 0)
+				/* cap writeback completed before we created
+				 * the cap_snap; no FLUSHSNAP is needed */
+				drop_capsnap = 1;
+		}
 		dout("put_wrbuffer_cap_refs on %p cap_snap %p "
-		     " snap %lld %d/%d -> %d/%d %s%s\n",
+		     " snap %lld %d/%d -> %d/%d %s%s%s\n",
 		     inode, capsnap, capsnap->context->seq,
 		     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
 		     ci->i_wrbuffer_ref, capsnap->dirty_pages,
 		     last ? " (wrbuffer last)" : "",
-		     last_snap ? " (capsnap last)" : "");
+		     complete_capsnap ? " (complete capsnap)" : "",
+		     drop_capsnap ? " (drop capsnap)" : "");
+		if (drop_capsnap) {
+			ceph_put_snap_context(capsnap->context);
+			list_del(&capsnap->ci_item);
+			list_del(&capsnap->flushing_item);
+			ceph_put_cap_snap(capsnap);
+		}
 	}
 
 	spin_unlock(&inode->i_lock);
@@ -2185,10 +2205,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 	if (last) {
 		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 		iput(inode);
-	} else if (last_snap) {
+	} else if (complete_capsnap) {
 		ceph_flush_snaps(ci);
 		wake_up(&ci->i_cap_wq);
 	}
+	if (drop_capsnap)
+		iput(inode);
 }
 
 /*
@@ -2464,8 +2486,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 				break;
 			}
 			WARN_ON(capsnap->dirty_pages || capsnap->writing);
-			dout(" removing cap_snap %p follows %lld\n",
-			     capsnap, follows);
+			dout(" removing %p cap_snap %p follows %lld\n",
+			     inode, capsnap, follows);
 			ceph_put_snap_context(capsnap->context);
 			list_del(&capsnap->ci_item);
 			list_del(&capsnap->flushing_item);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index df04e21..7e3e5f9 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -521,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 	capsnap->ctime = inode->i_ctime;
 	capsnap->time_warp_seq = ci->i_time_warp_seq;
 	if (capsnap->dirty_pages) {
-		dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu "
+		dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
 		     "still has %d dirty pages\n", inode, capsnap,
 		     capsnap->context, capsnap->context->seq,
-		     capsnap->size, capsnap->dirty_pages);
+		     ceph_cap_string(capsnap->dirty), capsnap->size,
+		     capsnap->dirty_pages);
 		return 0;
 	}
-	dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n",
+	dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
 	     inode, capsnap, capsnap->context,
-	     capsnap->context->seq, capsnap->size);
+	     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
+	     capsnap->size);
 
 	spin_lock(&mdsc->snap_flush_lock);
 	list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
-- 
cgit v1.1


From 308f44193f796b1c522b3b87760e43d8d8e316d2 Mon Sep 17 00:00:00 2001
From: Li Hong <lihong.hi@gmail.com>
Date: Fri, 2 Apr 2010 18:40:39 +0800
Subject: nilfs2: Remove an uninitialization warning in
 nilfs_btree_propagate_v()

`make CONFIG_NILFS2_FS=m M=fs/nilfs2/` will give the following warnings:

fs/nilfs2/btree.c: In function 'nilfs_btree_propagate':
fs/nilfs2/btree.c:1882: warning: 'maxlevel' may be used uninitialized in this function
fs/nilfs2/btree.c:1882: note: 'maxlevel' was declared here

Set maxlevel = 0 to fix it.

Signed-off-by: Li Hong <lihong.hi@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/btree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 7cdd98b..76c38e3 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1879,7 +1879,7 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
 				   struct nilfs_btree_path *path,
 				   int level, struct buffer_head *bh)
 {
-	int maxlevel, ret;
+	int maxlevel = 0, ret;
 	struct nilfs_btree_node *parent;
 	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
 	__u64 ptr;
-- 
cgit v1.1


From 0e0d5e0c4bb0476d53a43bfc87d03a25ec4b5579 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 2 Apr 2010 16:07:19 -0700
Subject: ceph: fix ack counter reset on connection reset

If in_seq_acked isn't reset along with in_seq, we don't ack received
messages until we reach the old count, consuming gobs memory on the other
end of the connection and introducing a large delay when those messages
are eventually deleted.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/messenger.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index a32f0f8..f35b494 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -332,6 +332,7 @@ static void reset_connection(struct ceph_connection *con)
 		con->out_msg = NULL;
 	}
 	con->in_seq = 0;
+	con->in_seq_acked = 0;
 }
 
 /*
-- 
cgit v1.1


From a24e2d7d8f512340991ef0a59cb5d08d491b8e98 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 3 Apr 2010 17:20:21 +0000
Subject: [CIFS] initialize nbytes at the beginning of CIFSSMBWrite()

By doing this we always overwrite nbytes value that is being passed on to
CIFSSMBWrite() and need not rely on the callers to initialize. CIFSSMBWrite2 is
doing this already.

CC: Stable <stable@kernel.org>
Reviewed-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Reviewed-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 7cc7f83..e1f90a3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1430,6 +1430,8 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	__u32 bytes_sent;
 	__u16 byte_count;
 
+	*nbytes = 0;
+
 	/* cFYI(1, ("write at %lld %d bytes", offset, count));*/
 	if (tcon->ses == NULL)
 		return -ECONNABORTED;
@@ -1512,7 +1514,6 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 	cifs_stats_inc(&tcon->num_writes);
 	if (rc) {
 		cFYI(1, ("Send error in write = %d", rc));
-		*nbytes = 0;
 	} else {
 		*nbytes = le16_to_cpu(pSMBr->CountHigh);
 		*nbytes = (*nbytes) << 16;
-- 
cgit v1.1


From 6513a81e9325d712f1bfb9a1d7b750134e49ff18 Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.de>
Date: Wed, 31 Mar 2010 12:00:03 +0530
Subject: cifs: Fix a kernel BUG with remote OS/2 server (try #3)

While chasing a bug report involving a OS/2 server, I noticed the server sets
pSMBr->CountHigh to a incorrect value even in case of normal writes. This
results in 'nbytes' being computed wrongly and triggers a kernel BUG at
mm/filemap.c.

void iov_iter_advance(struct iov_iter *i, size_t bytes)
{
        BUG_ON(i->count < bytes);    <--- BUG here

Why the server is setting 'CountHigh' is not clear but only does so after
writing 64k bytes. Though this looks like the server bug, the client side
crash may not be acceptable.

The workaround is to mask off high 16 bits if the number of bytes written as
returned by the server is greater than the bytes requested by the client as
suggested by Jeff Layton.

CC: Stable <stable@kernel.org>
Reviewed-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e1f90a3..f213b8a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1518,6 +1518,14 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 		*nbytes = le16_to_cpu(pSMBr->CountHigh);
 		*nbytes = (*nbytes) << 16;
 		*nbytes += le16_to_cpu(pSMBr->Count);
+
+		/*
+		 * Mask off high 16 bits when bytes written as returned by the
+		 * server is greater than bytes requested by the client. Some
+		 * OS/2 servers are known to set incorrect CountHigh values.
+		 */
+		if (*nbytes > count)
+			*nbytes &= 0xFFFF;
 	}
 
 	cifs_buf_release(pSMB);
@@ -1606,6 +1614,14 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 		*nbytes = le16_to_cpu(pSMBr->CountHigh);
 		*nbytes = (*nbytes) << 16;
 		*nbytes += le16_to_cpu(pSMBr->Count);
+
+		/*
+		 * Mask off high 16 bits when bytes written as returned by the
+		 * server is greater than bytes requested by the client. OS/2
+		 * servers are known to set incorrect CountHigh values.
+		 */
+		if (*nbytes > count)
+			*nbytes &= 0xFFFF;
 	}
 
 /*	cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
-- 
cgit v1.1


From 8b472d739b2ddd8ab7fb278874f696cd95b25a5e Mon Sep 17 00:00:00 2001
From: Curt Wohlgemuth <curtw@google.com>
Date: Sat, 3 Apr 2010 16:45:06 -0400
Subject: ext4: Fix possible lost inode write in no journal mode

In the no-journal case, ext4_write_inode() will fetch the bh and call
sync_dirty_buffer() on it.  However, if the bh has already been
written and the bh reclaimed for some other purpose, AND if the inode
is the only one in the inode table block in use, then
ext4_get_inode_loc() will not read the inode table block from disk,
but as an optimization, fill the block with zero's assuming that its
caller will copy in the on-disk version of the inode.  This is not
done by ext4_write_inode(), so the contents of the inode can simply
get lost.  The fix is to use __ext4_get_inode_loc() with in_mem set to
0, instead of ext4_get_inode_loc().  Long term the API needs to be
fixed so it's obvious why latter is not safe.

Addresses-Google-Bug: #2526446

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 11119e0..87e3c70 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5374,7 +5374,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 	} else {
 		struct ext4_iloc iloc;
 
-		err = ext4_get_inode_loc(inode, &iloc);
+		err = __ext4_get_inode_loc(inode, &iloc, 0);
 		if (err)
 			return err;
 		if (wbc->sync_mode == WB_SYNC_ALL)
-- 
cgit v1.1


From fd2dd9fbaf9e498ec63eef298921e36556f7214c Mon Sep 17 00:00:00 2001
From: Curt Wohlgemuth <curtw@google.com>
Date: Sat, 3 Apr 2010 17:44:16 -0400
Subject: ext4: Fix buffer head leaks after calls to ext4_get_inode_loc()

Calls to ext4_get_inode_loc() returns with a reference to a buffer
head in iloc->bh.  The callers of this function in ext4_write_inode()
when in no journal mode and in ext4_xattr_fiemap() don't release the
buffer head after using it.

Addresses-Google-Bug: #2548165

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 1 +
 fs/ext4/inode.c   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 94c8ee8..236b834 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
 		physical += offset;
 		length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
 		flags |= FIEMAP_EXTENT_DATA_INLINE;
+		brelse(iloc.bh);
 	} else { /* external block */
 		physical = EXT4_I(inode)->i_file_acl << blockbits;
 		length = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 87e3c70..ba1eee8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5385,6 +5385,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 				   (unsigned long long)iloc.bh->b_blocknr);
 			err = -EIO;
 		}
+		brelse(iloc.bh);
 	}
 	return err;
 }
-- 
cgit v1.1


From d82ef020cf31504c816803b1def94eb5ff173363 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 2 Apr 2010 09:11:29 +0900
Subject: proc: pagemap: Hold mmap_sem during page walk

In initial design, walk_page_range() was designed just for walking page
table and it didn't require mmap_sem.  Now, find_vma() etc..  are used
in walk_page_range() and we need mmap_sem around it.

This patch adds mmap_sem around walk_page_range().

Because /proc/<pid>/pagemap's callback routine use put_user(), we have
to get rid of it to do sane fix.

Changelog: 2010/Apr/2
 - fixed start_vaddr and end overflow
Changelog: 2010/Apr/1
 - fixed start_vaddr calculation
 - removed unnecessary cast.
 - removed unnecessary change in smaps.
 - use GFP_TEMPORARY instead of GFP_KERNEL

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: San Mehat <san@google.com>
Cc: Brian Swetland <swetland@google.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
[ Fixed kmalloc failure return code as per Matt ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 87 ++++++++++++++++++++++++------------------------------
 1 file changed, 38 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 183f8ff..0962739 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -406,6 +406,7 @@ static int show_smap(struct seq_file *m, void *v)
 
 	memset(&mss, 0, sizeof mss);
 	mss.vma = vma;
+	/* mmap_sem is held in m_start */
 	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
 		walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
 
@@ -552,7 +553,8 @@ const struct file_operations proc_clear_refs_operations = {
 };
 
 struct pagemapread {
-	u64 __user *out, *end;
+	int pos, len;
+	u64 *buffer;
 };
 
 #define PM_ENTRY_BYTES      sizeof(u64)
@@ -575,10 +577,8 @@ struct pagemapread {
 static int add_to_pagemap(unsigned long addr, u64 pfn,
 			  struct pagemapread *pm)
 {
-	if (put_user(pfn, pm->out))
-		return -EFAULT;
-	pm->out++;
-	if (pm->out >= pm->end)
+	pm->buffer[pm->pos++] = pfn;
+	if (pm->pos >= pm->len)
 		return PM_END_OF_BUFFER;
 	return 0;
 }
@@ -720,21 +720,20 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
+#define PAGEMAP_WALK_SIZE	(PMD_SIZE)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
 	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
-	struct page **pages, *page;
-	unsigned long uaddr, uend;
 	struct mm_struct *mm;
 	struct pagemapread pm;
-	int pagecount;
 	int ret = -ESRCH;
 	struct mm_walk pagemap_walk = {};
 	unsigned long src;
 	unsigned long svpfn;
 	unsigned long start_vaddr;
 	unsigned long end_vaddr;
+	int copied = 0;
 
 	if (!task)
 		goto out;
@@ -757,35 +756,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (!mm)
 		goto out_task;
 
-
-	uaddr = (unsigned long)buf & PAGE_MASK;
-	uend = (unsigned long)(buf + count);
-	pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
-	ret = 0;
-	if (pagecount == 0)
-		goto out_mm;
-	pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+	pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
+	pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
 	ret = -ENOMEM;
-	if (!pages)
+	if (!pm.buffer)
 		goto out_mm;
 
-	down_read(&current->mm->mmap_sem);
-	ret = get_user_pages(current, current->mm, uaddr, pagecount,
-			     1, 0, pages, NULL);
-	up_read(&current->mm->mmap_sem);
-
-	if (ret < 0)
-		goto out_free;
-
-	if (ret != pagecount) {
-		pagecount = ret;
-		ret = -EFAULT;
-		goto out_pages;
-	}
-
-	pm.out = (u64 __user *)buf;
-	pm.end = (u64 __user *)(buf + count);
-
 	pagemap_walk.pmd_entry = pagemap_pte_range;
 	pagemap_walk.pte_hole = pagemap_pte_hole;
 	pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
@@ -807,23 +783,36 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	 * user buffer is tracked in "pm", and the walk
 	 * will stop when we hit the end of the buffer.
 	 */
-	ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
-	if (ret == PM_END_OF_BUFFER)
-		ret = 0;
-	/* don't need mmap_sem for these, but this looks cleaner */
-	*ppos += (char __user *)pm.out - buf;
-	if (!ret)
-		ret = (char __user *)pm.out - buf;
-
-out_pages:
-	for (; pagecount; pagecount--) {
-		page = pages[pagecount-1];
-		if (!PageReserved(page))
-			SetPageDirty(page);
-		page_cache_release(page);
+	ret = 0;
+	while (count && (start_vaddr < end_vaddr)) {
+		int len;
+		unsigned long end;
+
+		pm.pos = 0;
+		end = start_vaddr + PAGEMAP_WALK_SIZE;
+		/* overflow ? */
+		if (end < start_vaddr || end > end_vaddr)
+			end = end_vaddr;
+		down_read(&mm->mmap_sem);
+		ret = walk_page_range(start_vaddr, end, &pagemap_walk);
+		up_read(&mm->mmap_sem);
+		start_vaddr = end;
+
+		len = min(count, PM_ENTRY_BYTES * pm.pos);
+		if (copy_to_user(buf, pm.buffer, len) < 0) {
+			ret = -EFAULT;
+			goto out_free;
+		}
+		copied += len;
+		buf += len;
+		count -= len;
 	}
+	*ppos += copied;
+	if (!ret || ret == PM_END_OF_BUFFER)
+		ret = copied;
+
 out_free:
-	kfree(pages);
+	kfree(pm.buffer);
 out_mm:
 	mmput(mm);
 out_task:
-- 
cgit v1.1


From 476ada0436351672fbf482db54cb94b8ba877709 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Wed, 17 Mar 2010 17:02:38 +0000
Subject: 9p: Fix setting of protocol flags in v9fs_session_info structure.

This patch fixes a simple bug I left behind in my earlier protocol
negotiation patch.

Thanks,
Sripathi.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 6c7f6a2..2f814cf 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -241,7 +241,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	list_add(&v9ses->slist, &v9fs_sessionlist);
 	spin_unlock(&v9fs_sessionlist_lock);
 
-	v9ses->flags = V9FS_PROTO_2000U | V9FS_ACCESS_USER;
+	v9ses->flags = V9FS_ACCESS_USER;
 	strcpy(v9ses->uname, V9FS_DEFUSER);
 	strcpy(v9ses->aname, V9FS_DEFANAME);
 	v9ses->uid = ~0;
@@ -262,8 +262,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 		goto error;
 	}
 
-	if (!p9_is_proto_dotu(v9ses->clnt))
-		v9ses->flags &= ~V9FS_PROTO_2000U;
+	if (p9_is_proto_dotl(v9ses->clnt))
+		v9ses->flags |= V9FS_PROTO_2000L;
+	else if (p9_is_proto_dotu(v9ses->clnt))
+		v9ses->flags |= V9FS_PROTO_2000U;
 
 	v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
 
-- 
cgit v1.1


From 5b0fa207d1a6f27c9a2f2d707147dce01af21db7 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 19 Mar 2010 12:47:26 +0000
Subject: fs/9p: Clunk the fid resulting from partial walk of the name

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/fid.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 08b2eb1..b0a23c7 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -110,7 +110,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 {
 	int i, n, l, clone, any, access;
 	u32 uid;
-	struct p9_fid *fid;
+	struct p9_fid *fid, *old_fid = NULL;
 	struct dentry *d, *ds;
 	struct v9fs_session_info *v9ses;
 	char **wnames, *uname;
@@ -183,10 +183,18 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 		l = min(n - i, P9_MAXWELEM);
 		fid = p9_client_walk(fid, l, &wnames[i], clone);
 		if (IS_ERR(fid)) {
+			if (old_fid) {
+				/*
+				 * If we fail, clunk fid which are mapping
+				 * to path component and not the last component
+				 * of the path.
+				 */
+				p9_client_clunk(old_fid);
+			}
 			kfree(wnames);
 			return fid;
 		}
-
+		old_fid = fid;
 		i += l;
 		clone = 0;
 	}
-- 
cgit v1.1


From d994f4058d9f9be7e44529b55fc6be6552901ead Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 29 Mar 2010 18:14:50 -0500
Subject: 9p: drop nlink remove

We need to drop the link count on the inode of a sucessfull remove

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 5fe45d6..ae83967 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -431,6 +431,7 @@ error:
 
 static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 {
+	int retval;
 	struct inode *file_inode;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *v9fid;
@@ -444,7 +445,10 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 	if (IS_ERR(v9fid))
 		return PTR_ERR(v9fid);
 
-	return p9_client_remove(v9fid);
+	retval = p9_client_remove(v9fid);
+	if (!retval)
+		drop_nlink(file_inode);
+	return retval;
 }
 
 static int
-- 
cgit v1.1


From 6d96d3ab7aea5f0e75205a0c97f8d1fdf82c5287 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 29 Mar 2010 18:13:59 -0500
Subject: 9p: Make sure we are able to clunk the cached fid on umount

dcache prune happen on umount. So we cannot mark the client
satus disconnect. That will prevent a 9p call to the server

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c      | 13 +++++++++++++
 fs/9p/v9fs.h      |  1 +
 fs/9p/vfs_super.c |  3 ++-
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f814cf..0b78ae8 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -342,6 +342,19 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
 	p9_client_disconnect(v9ses->clnt);
 }
 
+/**
+ * v9fs_session_begin_cancel - Begin terminate of a session
+ * @v9ses: session to terminate
+ *
+ * After this call we don't allow any request other than clunk.
+ */
+
+void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses)
+{
+	P9_DPRINTK(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses);
+	p9_client_begin_disconnect(v9ses->clnt);
+}
+
 extern int v9fs_error_init(void);
 
 static struct kobject *v9fs_kobj;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 6b801d1..a0a8d3d 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -108,6 +108,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
 									char *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
+void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
 
 #define V9FS_MAGIC 0x01021997
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 69357c0..d1a3c80 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -193,6 +193,7 @@ static void v9fs_kill_super(struct super_block *s)
 
 	kill_anon_super(s);
 
+	v9fs_session_cancel(v9ses);
 	v9fs_session_close(v9ses);
 	kfree(v9ses);
 	s->s_fs_info = NULL;
@@ -205,7 +206,7 @@ v9fs_umount_begin(struct super_block *sb)
 	struct v9fs_session_info *v9ses;
 
 	v9ses = sb->s_fs_info;
-	v9fs_session_cancel(v9ses);
+	v9fs_session_begin_cancel(v9ses);
 }
 
 static const struct super_operations v9fs_super_ops = {
-- 
cgit v1.1


From 11e9b49b7fa056bfc00a56de8956d1d5fe8b84ea Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Mon, 29 Mar 2010 18:13:59 -0500
Subject: 9p: Creating files with names too long should fail with ENAMETOOLONG.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index ae83967..905f664 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -660,6 +660,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	P9_DPRINTK(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
 		dir, dentry->d_name.name, dentry, nameidata);
 
+	if (dentry->d_name.len > NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+
 	sb = dir->i_sb;
 	v9ses = v9fs_inode2v9ses(dir);
 	dfid = v9fs_fid_lookup(dentry->d_parent);
-- 
cgit v1.1


From 28ecb60906e86e74e9ad4ac7e0218d8631e73a94 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 17 Mar 2010 13:31:04 +0000
Subject: Btrfs: use add_to_page_cache_lru, use __page_cache_alloc

Pagecache pages should be allocated with __page_cache_alloc, so they
obey pagecache memory policies.

add_to_page_cache_lru is exported, so it should be used. Benefits over
using a private pagevec: neater code, 128 bytes fewer stack used, percpu
lru ordering is preserved, and finally don't need to flush pagevec
before returning so batching may be shared with other LRU insertions.

Signed-off-by: Nick Piggin <npiggin@suse.de>:
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/compression.c | 22 ++++------------------
 fs/btrfs/extent_io.c   | 15 +--------------
 2 files changed, 5 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 28b92a7..1d54c53 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -31,7 +31,6 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/bit_spinlock.h>
-#include <linux/pagevec.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -445,7 +444,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 	unsigned long nr_pages = 0;
 	struct extent_map *em;
 	struct address_space *mapping = inode->i_mapping;
-	struct pagevec pvec;
 	struct extent_map_tree *em_tree;
 	struct extent_io_tree *tree;
 	u64 end;
@@ -461,7 +459,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 
 	end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
 
-	pagevec_init(&pvec, 0);
 	while (last_offset < compressed_end) {
 		page_index = last_offset >> PAGE_CACHE_SHIFT;
 
@@ -478,26 +475,17 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 			goto next;
 		}
 
-		page = alloc_page(mapping_gfp_mask(mapping) & ~__GFP_FS);
+		page = __page_cache_alloc(mapping_gfp_mask(mapping) &
+								~__GFP_FS);
 		if (!page)
 			break;
 
-		page->index = page_index;
-		/*
-		 * what we want to do here is call add_to_page_cache_lru,
-		 * but that isn't exported, so we reproduce it here
-		 */
-		if (add_to_page_cache(page, mapping,
-				      page->index, GFP_NOFS)) {
+		if (add_to_page_cache_lru(page, mapping, page_index,
+								GFP_NOFS)) {
 			page_cache_release(page);
 			goto next;
 		}
 
-		/* open coding of lru_cache_add, also not exported */
-		page_cache_get(page);
-		if (!pagevec_add(&pvec, page))
-			__pagevec_lru_add_file(&pvec);
-
 		end = last_offset + PAGE_CACHE_SIZE - 1;
 		/*
 		 * at this point, we have a locked page in the page cache
@@ -551,8 +539,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 next:
 		last_offset += PAGE_CACHE_SIZE;
 	}
-	if (pagevec_count(&pvec))
-		__pagevec_lru_add_file(&pvec);
 	return 0;
 }
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c99121a..fc742e5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2679,33 +2679,20 @@ int extent_readpages(struct extent_io_tree *tree,
 {
 	struct bio *bio = NULL;
 	unsigned page_idx;
-	struct pagevec pvec;
 	unsigned long bio_flags = 0;
 
-	pagevec_init(&pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
 
 		prefetchw(&page->flags);
 		list_del(&page->lru);
-		/*
-		 * what we want to do here is call add_to_page_cache_lru,
-		 * but that isn't exported, so we reproduce it here
-		 */
-		if (!add_to_page_cache(page, mapping,
+		if (!add_to_page_cache_lru(page, mapping,
 					page->index, GFP_KERNEL)) {
-
-			/* open coding of lru_cache_add, also not exported */
-			page_cache_get(page);
-			if (!pagevec_add(&pvec, page))
-				__pagevec_lru_add_file(&pvec);
 			__extent_read_full_page(tree, page, get_extent,
 						&bio, 0, &bio_flags);
 		}
 		page_cache_release(page);
 	}
-	if (pagevec_count(&pvec))
-		__pagevec_lru_add_file(&pvec);
 	BUG_ON(!list_empty(pages));
 	if (bio)
 		submit_one_bio(READ, bio, 0, bio_flags);
-- 
cgit v1.1


From b5cb160084fad438c513d0952849e597ffe9e3d9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 12 Mar 2010 19:28:18 +0000
Subject: Btrfs: fix small race with delalloc flushing waitqueue's

Everytime we start a new flushing thread, we init the waitqueue if there isn't a
flushing thread running.  The problem with this is we check
space_info->flushing, which we clear right before doing a wake_up on the
flushing waitqueue, which causes problems if we init the waitqueue in the middle
of clearing the flushing flagh and calling wake_up.  This is hard to hit, but
the code is wrong anyway, so init the flushing/allocating waitqueue when
creating the space info and let it be.  I haven't seen the panic since I've been
using this patch.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5f6bc28..101041d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2676,6 +2676,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 
 	INIT_LIST_HEAD(&found->block_groups);
 	init_rwsem(&found->groups_sem);
+	init_waitqueue_head(&found->flush_wait);
+	init_waitqueue_head(&found->allocate_wait);
 	spin_lock_init(&found->lock);
 	found->flags = flags;
 	found->total_bytes = total_bytes;
@@ -2944,12 +2946,10 @@ static void flush_delalloc(struct btrfs_root *root,
 
 	spin_lock(&info->lock);
 
-	if (!info->flushing) {
+	if (!info->flushing)
 		info->flushing = 1;
-		init_waitqueue_head(&info->flush_wait);
-	} else {
+	else
 		wait = true;
-	}
 
 	spin_unlock(&info->lock);
 
@@ -3011,7 +3011,6 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
 	if (!info->allocating_chunk) {
 		info->force_alloc = 1;
 		info->allocating_chunk = 1;
-		init_waitqueue_head(&info->allocate_wait);
 	} else {
 		wait = true;
 	}
-- 
cgit v1.1


From 6bdb72ded1e281cd8844918c39d00cdd0e59f655 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 15 Mar 2010 17:27:13 +0000
Subject: Btrfs: create snapshot references in same commit as snapshot

This creates the reference to a new snapshot in the same commit as the
snapshot itself.  This avoids the need for a second commit in order for a
snapshot to be persistent, and also avoids the problem of "leaking" a
new snapshot tree root if the host crashes before the second commit takes
place.

It is not at all clear to me why it wasn't always done this way.  If there
is still a reason for the two-stage {create,finish}_pending_snapshots()
approach I'm missing something!  :)

I've been running this for a couple weeks under pretty heavy usage (a few
snapshots per minute) without obvious problems.

Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/transaction.c | 97 ++++++++++++++++----------------------------------
 1 file changed, 31 insertions(+), 66 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4305428..01cebd6 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -755,10 +755,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	struct btrfs_root_item *new_root_item;
 	struct btrfs_root *tree_root = fs_info->tree_root;
 	struct btrfs_root *root = pending->root;
+	struct btrfs_root *parent_root;
+	struct inode *parent_inode;
 	struct extent_buffer *tmp;
 	struct extent_buffer *old;
 	int ret;
 	u64 objectid;
+	int namelen;
+	u64 index = 0;
+
+	parent_inode = pending->dentry->d_parent->d_inode;
+	parent_root = BTRFS_I(parent_inode)->root;
 
 	new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
 	if (!new_root_item) {
@@ -769,79 +776,59 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto fail;
 
-	record_root_in_trans(trans, root);
-	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
-	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
-
 	key.objectid = objectid;
 	/* record when the snapshot was created in key.offset */
 	key.offset = trans->transid;
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 
-	old = btrfs_lock_root_node(root);
-	btrfs_cow_block(trans, root, old, NULL, 0, &old);
-	btrfs_set_lock_blocking(old);
-
-	btrfs_copy_root(trans, root, old, &tmp, objectid);
-	btrfs_tree_unlock(old);
-	free_extent_buffer(old);
-
-	btrfs_set_root_node(new_root_item, tmp);
-	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
-				new_root_item);
-	btrfs_tree_unlock(tmp);
-	free_extent_buffer(tmp);
-	if (ret)
-		goto fail;
-
-	key.offset = (u64)-1;
 	memcpy(&pending->root_key, &key, sizeof(key));
-fail:
-	kfree(new_root_item);
-	return ret;
-}
-
-static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
-				   struct btrfs_pending_snapshot *pending)
-{
-	int ret;
-	int namelen;
-	u64 index = 0;
-	struct btrfs_trans_handle *trans;
-	struct inode *parent_inode;
-	struct btrfs_root *parent_root;
-
-	parent_inode = pending->dentry->d_parent->d_inode;
-	parent_root = BTRFS_I(parent_inode)->root;
-	trans = btrfs_join_transaction(parent_root, 1);
+	pending->root_key.offset = (u64)-1;
 
+	record_root_in_trans(trans, parent_root);
 	/*
 	 * insert the directory item
 	 */
 	namelen = strlen(pending->name);
 	ret = btrfs_set_inode_index(parent_inode, &index);
+	BUG_ON(ret);
 	ret = btrfs_insert_dir_item(trans, parent_root,
 			    pending->name, namelen,
 			    parent_inode->i_ino,
 			    &pending->root_key, BTRFS_FT_DIR, index);
-
-	if (ret)
-		goto fail;
+	BUG_ON(ret);
 
 	btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
 	ret = btrfs_update_inode(trans, parent_root, parent_inode);
 	BUG_ON(ret);
 
+	record_root_in_trans(trans, root);
+	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
+	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
+
+	old = btrfs_lock_root_node(root);
+	btrfs_cow_block(trans, root, old, NULL, 0, &old);
+	btrfs_set_lock_blocking(old);
+
+	btrfs_copy_root(trans, root, old, &tmp, objectid);
+	btrfs_tree_unlock(old);
+	free_extent_buffer(old);
+
+	btrfs_set_root_node(new_root_item, tmp);
+	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
+				new_root_item);
+	BUG_ON(ret);
+	btrfs_tree_unlock(tmp);
+	free_extent_buffer(tmp);
+
 	ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
 				 pending->root_key.objectid,
 				 parent_root->root_key.objectid,
 				 parent_inode->i_ino, index, pending->name,
 				 namelen);
-
 	BUG_ON(ret);
 
 fail:
-	btrfs_end_transaction(trans, fs_info->fs_root);
+	kfree(new_root_item);
 	return ret;
 }
 
@@ -862,25 +849,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 	return 0;
 }
 
-static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
-					     struct btrfs_fs_info *fs_info)
-{
-	struct btrfs_pending_snapshot *pending;
-	struct list_head *head = &trans->transaction->pending_snapshots;
-	int ret;
-
-	while (!list_empty(head)) {
-		pending = list_entry(head->next,
-				     struct btrfs_pending_snapshot, list);
-		ret = finish_pending_snapshot(fs_info, pending);
-		BUG_ON(ret);
-		list_del(&pending->list);
-		kfree(pending->name);
-		kfree(pending);
-	}
-	return 0;
-}
-
 static void update_super_roots(struct btrfs_root *root)
 {
 	struct btrfs_root_item *root_item;
@@ -1092,9 +1060,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 	btrfs_finish_extent_commit(trans, root);
 
-	/* do the directory inserts of any pending snapshot creations */
-	finish_pending_snapshots(trans, root->fs_info);
-
 	mutex_lock(&root->fs_info->trans_mutex);
 
 	cur_trans->commit_done = 1;
-- 
cgit v1.1


From 109f6aef5fc436f355ad027f4d97bd696df2049a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 2 Apr 2010 09:20:18 -0400
Subject: Btrfs: add check for changed leaves in setup_leaf_for_split

setup_leaf_for_split needs to drop the path and search again, and has
checks to see if the item we want to split changed size.  But, it misses
the case where the leaf changed and now has enough room for the item
we want to insert.

This adds an extra check to make sure the leaf really needs splitting
before we call btrfs_split_leaf(), which keeps us from trying to split
a leaf with a single item.

btrfs_split_leaf() will blindly split the single item leaf, leaving us
with one good leaf and one empty leaf and then a crash.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c4bc570..babf7fb 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -3040,6 +3040,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
 	if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
 		goto err;
 
+	/* the leaf has  changed, it now has room.  return now */
+	if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
+		goto err;
+
 	if (key.type == BTRFS_EXTENT_DATA_KEY) {
 		fi = btrfs_item_ptr(leaf, path->slots[0],
 				    struct btrfs_file_extent_item);
-- 
cgit v1.1


From 85a770a8889035625466a4cfb1393cd7d2ffd165 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 30 Mar 2010 09:41:25 +0000
Subject: 9p: return on mutex_lock_interruptible()

If "err" is -EINTR here the original code calls mutex_unlock() and then
returns, but it should just return directly.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
---
 fs/9p/vfs_dir.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index d8a3afe..bbe00cf 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -130,6 +130,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	rdir = (struct p9_rdir *) fid->rdir;
 
 	err = mutex_lock_interruptible(&rdir->mutex);
+	if (err)
+		return err;
 	while (err == 0) {
 		if (rdir->tail == rdir->head) {
 			err = v9fs_file_readn(filp, rdir->buf, NULL,
-- 
cgit v1.1


From ab6e24103cbd215e922938a4f58c75194761a60e Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 19 Mar 2010 14:38:13 +0000
Subject: Btrfs: fix data enospc check overflow

Because we account for reserved space we get from the allocator before we
actually account for allocating delalloc space, we can have a small window where
the amount of "used" space in a space_info is more than the total amount of
space in the space_info.  This will cause a overflow in our check, so it will
seem like we have _tons_ of free space, and we'll allow reservations to occur
that will end up larger than the amount of space we have.  I've seen users
report ENOSPC panic's in cow_file_range a few times recently, so I tried to
reproduce this problem and found I could reproduce it if I ran one of my tests
in a loop for like 20 minutes.  With this patch my test ran all night without
issues.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 101041d..786899d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3234,7 +3234,8 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
 				u64 bytes)
 {
 	struct btrfs_space_info *data_sinfo;
-	int ret = 0, committed = 0;
+	u64 used;
+	int ret = 0, committed = 0, flushed = 0;
 
 	/* make sure bytes are sectorsize aligned */
 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3246,12 +3247,21 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
 again:
 	/* make sure we have enough space to handle the data first */
 	spin_lock(&data_sinfo->lock);
-	if (data_sinfo->total_bytes - data_sinfo->bytes_used -
-	    data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
-	    data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
-	    data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
+	used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
+		data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
+		data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
+		data_sinfo->bytes_super;
+
+	if (used + bytes > data_sinfo->total_bytes) {
 		struct btrfs_trans_handle *trans;
 
+		if (!flushed) {
+			spin_unlock(&data_sinfo->lock);
+			flush_delalloc(root, data_sinfo);
+			flushed = 1;
+			goto again;
+		}
+
 		/*
 		 * if we don't have enough free bytes in this space then we need
 		 * to alloc a new chunk.
-- 
cgit v1.1


From 9f680ce04ea19dabbbafe01b57b61930a9b70741 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 6 Apr 2010 09:37:47 -0400
Subject: Btrfs: make sure the chunk allocator doesn't create zero length
 chunks

A recent commit allowed for smaller chunks to be created, but didn't
make sure they were always bigger than a stripe.  After some divides,
this led to zero length stripes.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9bf1f58..b584e9a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2249,6 +2249,12 @@ again:
 	if (!looped)
 		calc_size = max_t(u64, min_stripe_size, calc_size);
 
+	/*
+	 * we're about to do_div by the stripe_len so lets make sure
+	 * we end up with something bigger than a stripe
+	 */
+	calc_size = max_t(u64, calc_size, stripe_len * 4);
+
 	do_div(calc_size, stripe_len);
 	calc_size *= stripe_len;
 
-- 
cgit v1.1


From 309361e09ca9e9670dc8664e5d14125bf82078af Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 6 Apr 2010 13:45:39 +0300
Subject: proc: copy_to_user() returns unsigned

copy_to_user() returns the number of bytes left to be copied.

This was a typo from: d82ef020cf31 "proc: pagemap: Hold mmap_sem during
page walk".

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index caf0337..a05a669 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -800,7 +800,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 		start_vaddr = end;
 
 		len = min(count, PM_ENTRY_BYTES * pm.pos);
-		if (copy_to_user(buf, pm.buffer, len) < 0) {
+		if (copy_to_user(buf, pm.buffer, len)) {
 			ret = -EFAULT;
 			goto out_free;
 		}
-- 
cgit v1.1


From f05337c6ac48d19d354e0640a8eb8fc884f82bcc Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastryyy@gmail.com>
Date: Mon, 5 Apr 2010 09:59:14 +0400
Subject: not overwriting file_lock structure after GET_LK

If we have preventing lock, cifs should overwrite file_lock structure
with info about preventing lock. If we haven't preventing lock, cifs
should leave it unchanged except for the lock type (change it to F_UNLCK).

Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
Reviewed-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 15 ++++++++++++++-
 fs/cifs/file.c    | 28 ++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f213b8a..184a399 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1810,8 +1810,21 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
 		}
 		parm_data = (struct cifs_posix_lock *)
 			((char *)&pSMBr->hdr.Protocol + data_offset);
-		if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
+		if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
 			pLockData->fl_type = F_UNLCK;
+		else {
+			if (parm_data->lock_type ==
+					__constant_cpu_to_le16(CIFS_RDLCK))
+				pLockData->fl_type = F_RDLCK;
+			else if (parm_data->lock_type ==
+					__constant_cpu_to_le16(CIFS_WRLCK))
+				pLockData->fl_type = F_WRLCK;
+
+			pLockData->fl_start = parm_data->start;
+			pLockData->fl_end = parm_data->start +
+						parm_data->length - 1;
+			pLockData->fl_pid = parm_data->pid;
+		}
 	}
 
 plk_err_exit:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ca2ba7a..d9e8650 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -838,8 +838,32 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 
 		} else {
 			/* if rc == ERR_SHARING_VIOLATION ? */
-			rc = 0;	/* do not change lock type to unlock
-				   since range in use */
+			rc = 0;
+
+			if (lockType & LOCKING_ANDX_SHARED_LOCK) {
+				pfLock->fl_type = F_WRLCK;
+			} else {
+				rc = CIFSSMBLock(xid, tcon, netfid, length,
+					pfLock->fl_start, 0, 1,
+					lockType | LOCKING_ANDX_SHARED_LOCK,
+					0 /* wait flag */);
+				if (rc == 0) {
+					rc = CIFSSMBLock(xid, tcon, netfid,
+						length, pfLock->fl_start, 1, 0,
+						lockType |
+						LOCKING_ANDX_SHARED_LOCK,
+						0 /* wait flag */);
+					pfLock->fl_type = F_RDLCK;
+					if (rc != 0)
+						cERROR(1, ("Error unlocking "
+						"previously locked range %d "
+						"during test of lock", rc));
+					rc = 0;
+				} else {
+					pfLock->fl_type = F_WRLCK;
+					rc = 0;
+				}
+			}
 		}
 
 		FreeXid(xid);
-- 
cgit v1.1


From 55ab3a1ff843e3f0e24d2da44e71bffa5d853010 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 6 Apr 2010 14:34:58 -0700
Subject: raw: fsync method is now required

Commit 148f948ba877f4d3cdef036b1ff6d9f68986706a (vfs: Introduce new
helpers for syncing after writing to O_SYNC file or IS_SYNC inode) broke
the raw driver.

We now call through generic_file_aio_write -> generic_write_sync ->
vfs_fsync_range.  vfs_fsync_range has:

        if (!fop || !fop->fsync) {
                ret = -EINVAL;
                goto out;
        }

But drivers/char/raw.c doesn't set an fsync method.

We have two options: fix it or remove the raw driver completely.  I'm
happy to do either, the fact this has been broken for so long suggests it
is rarely used.

The patch below adds an fsync method to the raw driver.  My knowledge of
the block layer is pretty sketchy so this could do with a once over.

If we instead decide to remove the raw driver, this patch might still be
useful as a backport to 2.6.33 and 2.6.32.

Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Tested-by: Jeff Moyer <jmoyer@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index d11d028..8db62b2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -404,7 +404,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
  *	NULL first argument is nfsd_sync_dir() and that's not a directory.
  */
  
-static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
 	int error;
@@ -418,6 +418,7 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 		error = 0;
 	return error;
 }
+EXPORT_SYMBOL(block_fsync);
 
 /*
  * pseudo-fs
-- 
cgit v1.1


From b1dd3b2843b3b73b7fc2ee47d96310cd1c051371 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 6 Apr 2010 14:35:00 -0700
Subject: vfs: rename block_fsync() to blkdev_fsync()

Requested by hch, for consistency now it is exported.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Anton Blanchard <anton@samba.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8db62b2..2a6d019 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -404,7 +404,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
  *	NULL first argument is nfsd_sync_dir() and that's not a directory.
  */
  
-int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
 	int error;
@@ -418,7 +418,7 @@ int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 		error = 0;
 	return error;
 }
-EXPORT_SYMBOL(block_fsync);
+EXPORT_SYMBOL(blkdev_fsync);
 
 /*
  * pseudo-fs
@@ -1482,7 +1482,7 @@ const struct file_operations def_blk_fops = {
   	.aio_read	= generic_file_aio_read,
 	.aio_write	= blkdev_aio_write,
 	.mmap		= generic_file_mmap,
-	.fsync		= block_fsync,
+	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= compat_blkdev_ioctl,
-- 
cgit v1.1


From 116354d177ba2da37e91cf884e3d11e67f825efd Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 6 Apr 2010 14:35:04 -0700
Subject: pagemap: fix pfn calculation for hugepage

When we look into pagemap using page-types with option -p, the value of
pfn for hugepages looks wrong (see below.) This is because pte was
evaluated only once for one vma although it should be updated for each
hugepage.  This patch fixes it.

  $ page-types -p 3277 -Nl -b huge
  voffset   offset  len     flags
  7f21e8a00 11e400  1       ___U___________H_G________________
  7f21e8a01 11e401  1ff     ________________TG________________
               ^^^
  7f21e8c00 11e400  1       ___U___________H_G________________
  7f21e8c01 11e401  1ff     ________________TG________________
               ^^^

One hugepage contains 1 head page and 511 tail pages in x86_64 and each
two lines represent each hugepage.  Voffset and offset mean virtual
address and physical address in the page unit, respectively.  The
different hugepages should not have the same offset value.

With this patch applied:

  $ page-types -p 3386 -Nl -b huge
  voffset   offset   len    flags
  7fec7a600 112c00   1      ___UD__________H_G________________
  7fec7a601 112c01   1ff    ________________TG________________
               ^^^
  7fec7a800 113200   1      ___UD__________H_G________________
  7fec7a801 113201   1ff    ________________TG________________
               ^^^
               OK

More info:

- This patch modifies walk_page_range()'s hugepage walker.  But the
  change only affects pagemap_read(), which is the only caller of hugepage
  callback.

- Without this patch, hugetlb_entry() callback is called per vma, that
  doesn't match the natural expectation from its name.

- With this patch, hugetlb_entry() is called per hugepte entry and the
  callback can become much simpler.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a05a669..0705534 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -662,31 +662,18 @@ static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
 	return pme;
 }
 
-static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
-				 unsigned long end, struct mm_walk *walk)
+/* This function walks within one hugetlb entry in the single call */
+static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
+				 unsigned long addr, unsigned long end,
+				 struct mm_walk *walk)
 {
-	struct vm_area_struct *vma;
 	struct pagemapread *pm = walk->private;
-	struct hstate *hs = NULL;
 	int err = 0;
+	u64 pfn;
 
-	vma = find_vma(walk->mm, addr);
-	if (vma)
-		hs = hstate_vma(vma);
 	for (; addr != end; addr += PAGE_SIZE) {
-		u64 pfn = PM_NOT_PRESENT;
-
-		if (vma && (addr >= vma->vm_end)) {
-			vma = find_vma(walk->mm, addr);
-			if (vma)
-				hs = hstate_vma(vma);
-		}
-
-		if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
-			/* calculate pfn of the "raw" page in the hugepage. */
-			int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
-			pfn = huge_pte_to_pagemap_entry(*pte, offset);
-		}
+		int offset = (addr & ~hmask) >> PAGE_SHIFT;
+		pfn = huge_pte_to_pagemap_entry(*pte, offset);
 		err = add_to_pagemap(addr, pfn, pm);
 		if (err)
 			return err;
-- 
cgit v1.1


From cc4fc29e59c386919a7674e203be7822dc968dc0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 6 Apr 2010 14:35:09 -0700
Subject: fs-cache: order the debugfs stats correctly

Order the debugfs statistics correctly.  The values displayed through a
seq_printf() statement should be in the same order as the names in the
format string.

In the 'Lookups' line, objects created ('crt=') and lookups timed out
('tmo=') have their values transposed.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fscache/stats.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 46435f3..4765190 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -165,8 +165,8 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_object_lookups),
 		   atomic_read(&fscache_n_object_lookups_negative),
 		   atomic_read(&fscache_n_object_lookups_positive),
-		   atomic_read(&fscache_n_object_lookups_timed_out),
-		   atomic_read(&fscache_n_object_created));
+		   atomic_read(&fscache_n_object_created),
+		   atomic_read(&fscache_n_object_lookups_timed_out));
 
 	seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
 		   atomic_read(&fscache_n_updates),
-- 
cgit v1.1


From 04287f975e68038051eb9c79896866d36610b8e0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 8 Apr 2010 00:06:07 +0100
Subject: Have nfs ->d_revalidate() report errors properly

	If nfs atomic open implementation ends up doing open request from
->d_revalidate() codepath and gets an error from server, return that error
to caller explicitly and don't bother with lookup_instantiate_filp() at all.
->d_revalidate() can return an error itself just fine...

See
	http://bugzilla.kernel.org/show_bug.cgi?id=15674
	http://marc.info/?l=linux-kernel&m=126988782722711&w=2

for original report.

Reported-by: Daniel J Blueman <daniel.blueman@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/nfs4proc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d79a7b3..fe0cd9e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2068,8 +2068,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 			case -EDQUOT:
 			case -ENOSPC:
 			case -EROFS:
-				lookup_instantiate_filp(nd, (struct dentry *)state, NULL);
-				return 1;
+				return PTR_ERR(state);
 			default:
 				goto out_drop;
 		}
-- 
cgit v1.1


From 69ecbbedac8e353bbd924fad16fed0c7c54e6382 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 15 Mar 2010 11:21:13 +0300
Subject: udf: potential integer overflow

bloc->logicalBlockNum is unsigned so it's never less than zero.

When I saw that, it made me worry that "bloc->logicalBlockNum + count"
could overflow.  That's why I changed the check for less than zero
to an overflow check.  (The test works because "count" is also
unsigned.)

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/balloc.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 19626e2..9a9378b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -125,9 +125,8 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
-	if (bloc->logicalBlockNum < 0 ||
-	    (bloc->logicalBlockNum + count) >
-		partmap->s_partition_len) {
+	if (bloc->logicalBlockNum + count < count ||
+	    (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
 			  bloc->logicalBlockNum, 0, bloc->logicalBlockNum,
 			  count, partmap->s_partition_len);
@@ -393,9 +392,8 @@ static void udf_table_free_blocks(struct super_block *sb,
 
 	mutex_lock(&sbi->s_alloc_mutex);
 	partmap = &sbi->s_partmaps[bloc->partitionReferenceNum];
-	if (bloc->logicalBlockNum < 0 ||
-	    (bloc->logicalBlockNum + count) >
-		partmap->s_partition_len) {
+	if (bloc->logicalBlockNum + count < count ||
+	    (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
 		udf_debug("%d < %d || %d + %d > %d\n",
 			  bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count,
 			  partmap->s_partition_len);
-- 
cgit v1.1


From c15d0fc0fc399d2639240b35ad7ed93ed5a59412 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 29 Mar 2010 11:05:21 +0400
Subject: udf: add speciffic ->setattr callback

generic setattr not longer responsible for quota transfer.
use udf_setattr for all udf's inodes.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/file.c    | 2 +-
 fs/udf/inode.c   | 2 +-
 fs/udf/namei.c   | 9 ++++++++-
 fs/udf/udfdecl.h | 3 ++-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1eb0677..4b6a46c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -218,7 +218,7 @@ const struct file_operations udf_file_operations = {
 	.llseek			= generic_file_llseek,
 };
 
-static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
+int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index bb863fe..8a3fbd1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1314,7 +1314,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 		break;
 	case ICBTAG_FILE_TYPE_SYMLINK:
 		inode->i_data.a_ops = &udf_symlink_aops;
-		inode->i_op = &page_symlink_inode_operations;
+		inode->i_op = &udf_symlink_inode_operations;
 		inode->i_mode = S_IFLNK | S_IRWXUGO;
 		break;
 	case ICBTAG_FILE_TYPE_MAIN:
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index db423ab..7581602 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -925,7 +925,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	iinfo = UDF_I(inode);
 	inode->i_mode = S_IFLNK | S_IRWXUGO;
 	inode->i_data.a_ops = &udf_symlink_aops;
-	inode->i_op = &page_symlink_inode_operations;
+	inode->i_op = &udf_symlink_inode_operations;
 
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
 		struct kernel_lb_addr eloc;
@@ -1393,6 +1393,7 @@ const struct export_operations udf_export_ops = {
 const struct inode_operations udf_dir_inode_operations = {
 	.lookup				= udf_lookup,
 	.create				= udf_create,
+	.setattr			= udf_setattr,
 	.link				= udf_link,
 	.unlink				= udf_unlink,
 	.symlink			= udf_symlink,
@@ -1401,3 +1402,9 @@ const struct inode_operations udf_dir_inode_operations = {
 	.mknod				= udf_mknod,
 	.rename				= udf_rename,
 };
+const struct inode_operations udf_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+	.put_link	= page_put_link,
+	.setattr	= udf_setattr,
+};
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 4223ac8..702a114 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -76,6 +76,7 @@ extern const struct inode_operations udf_dir_inode_operations;
 extern const struct file_operations udf_dir_operations;
 extern const struct inode_operations udf_file_inode_operations;
 extern const struct file_operations udf_file_operations;
+extern const struct inode_operations udf_symlink_inode_operations;
 extern const struct address_space_operations udf_aops;
 extern const struct address_space_operations udf_adinicb_aops;
 extern const struct address_space_operations udf_symlink_aops;
@@ -131,7 +132,7 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
 /* file.c */
 extern int udf_ioctl(struct inode *, struct file *, unsigned int,
 		     unsigned long);
-
+extern int udf_setattr(struct dentry *dentry, struct iattr *iattr);
 /* inode.c */
 extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
 extern int udf_sync_inode(struct inode *);
-- 
cgit v1.1


From 2844a76a25a2fc2f5025cf128c95a14d86146d33 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 9 Apr 2010 15:46:42 -0700
Subject: ceph: decode v5 of osdmap (pool names) [protocol change]

Teach the client to decode an updated format for the osdmap.  The new
format includes pool names, which will be useful shortly.  Get this change
in earlier rather than later.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/osdmap.c | 180 +++++++++++++++++++++++++++++++++----------------------
 fs/ceph/osdmap.h |   1 +
 fs/ceph/rados.h  |   6 +-
 3 files changed, 114 insertions(+), 73 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index d82fe87..7526d6d 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -312,71 +312,6 @@ bad:
 	return ERR_PTR(err);
 }
 
-
-/*
- * osd map
- */
-void ceph_osdmap_destroy(struct ceph_osdmap *map)
-{
-	dout("osdmap_destroy %p\n", map);
-	if (map->crush)
-		crush_destroy(map->crush);
-	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
-		struct ceph_pg_mapping *pg =
-			rb_entry(rb_first(&map->pg_temp),
-				 struct ceph_pg_mapping, node);
-		rb_erase(&pg->node, &map->pg_temp);
-		kfree(pg);
-	}
-	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
-		struct ceph_pg_pool_info *pi =
-			rb_entry(rb_first(&map->pg_pools),
-				 struct ceph_pg_pool_info, node);
-		rb_erase(&pi->node, &map->pg_pools);
-		kfree(pi);
-	}
-	kfree(map->osd_state);
-	kfree(map->osd_weight);
-	kfree(map->osd_addr);
-	kfree(map);
-}
-
-/*
- * adjust max osd value.  reallocate arrays.
- */
-static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
-{
-	u8 *state;
-	struct ceph_entity_addr *addr;
-	u32 *weight;
-
-	state = kcalloc(max, sizeof(*state), GFP_NOFS);
-	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
-	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
-	if (state == NULL || addr == NULL || weight == NULL) {
-		kfree(state);
-		kfree(addr);
-		kfree(weight);
-		return -ENOMEM;
-	}
-
-	/* copy old? */
-	if (map->osd_state) {
-		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
-		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
-		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
-		kfree(map->osd_state);
-		kfree(map->osd_addr);
-		kfree(map->osd_weight);
-	}
-
-	map->osd_state = state;
-	map->osd_weight = weight;
-	map->osd_addr = addr;
-	map->max_osd = max;
-	return 0;
-}
-
 /*
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds)
@@ -480,6 +415,13 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
 	return NULL;
 }
 
+static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
+{
+	rb_erase(&pi->node, root);
+	kfree(pi->name);
+	kfree(pi);
+}
+
 void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
 {
 	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
@@ -488,6 +430,98 @@ void __decode_pool(void **p, struct ceph_pg_pool_info *pi)
 	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
 }
 
+static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
+{
+	struct ceph_pg_pool_info *pi;
+	u32 num, len, pool;
+
+	ceph_decode_32_safe(p, end, num, bad);
+	dout(" %d pool names\n", num);
+	while (num--) {
+		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_32_safe(p, end, len, bad);
+		dout("  pool %d len %d\n", pool, len);
+		pi = __lookup_pg_pool(&map->pg_pools, pool);
+		if (pi) {
+			kfree(pi->name);
+			pi->name = kmalloc(len + 1, GFP_NOFS);
+			if (pi->name) {
+				memcpy(pi->name, *p, len);
+				pi->name[len] = '\0';
+				dout("  name is %s\n", pi->name);
+			}
+		}
+		*p += len;
+	}
+	return 0;
+
+bad:
+	return -EINVAL;
+}
+
+/*
+ * osd map
+ */
+void ceph_osdmap_destroy(struct ceph_osdmap *map)
+{
+	dout("osdmap_destroy %p\n", map);
+	if (map->crush)
+		crush_destroy(map->crush);
+	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_temp),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_temp);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
+		struct ceph_pg_pool_info *pi =
+			rb_entry(rb_first(&map->pg_pools),
+				 struct ceph_pg_pool_info, node);
+		__remove_pg_pool(&map->pg_pools, pi);
+	}
+	kfree(map->osd_state);
+	kfree(map->osd_weight);
+	kfree(map->osd_addr);
+	kfree(map);
+}
+
+/*
+ * adjust max osd value.  reallocate arrays.
+ */
+static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
+{
+	u8 *state;
+	struct ceph_entity_addr *addr;
+	u32 *weight;
+
+	state = kcalloc(max, sizeof(*state), GFP_NOFS);
+	addr = kcalloc(max, sizeof(*addr), GFP_NOFS);
+	weight = kcalloc(max, sizeof(*weight), GFP_NOFS);
+	if (state == NULL || addr == NULL || weight == NULL) {
+		kfree(state);
+		kfree(addr);
+		kfree(weight);
+		return -ENOMEM;
+	}
+
+	/* copy old? */
+	if (map->osd_state) {
+		memcpy(state, map->osd_state, map->max_osd*sizeof(*state));
+		memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr));
+		memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight));
+		kfree(map->osd_state);
+		kfree(map->osd_addr);
+		kfree(map->osd_weight);
+	}
+
+	map->osd_state = state;
+	map->osd_weight = weight;
+	map->osd_addr = addr;
+	map->max_osd = max;
+	return 0;
+}
+
 /*
  * decode a full map.
  */
@@ -524,7 +558,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	ceph_decode_32_safe(p, end, max, bad);
 	while (max--) {
 		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
-		pi = kmalloc(sizeof(*pi), GFP_NOFS);
+		pi = kzalloc(sizeof(*pi), GFP_NOFS);
 		if (!pi)
 			goto bad;
 		pi->id = ceph_decode_32(p);
@@ -537,6 +571,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 		__decode_pool(p, pi);
 		__insert_pg_pool(&map->pg_pools, pi);
 	}
+
+	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+		goto bad;
+
 	ceph_decode_32_safe(p, end, map->pool_max, bad);
 
 	ceph_decode_32_safe(p, end, map->flags, bad);
@@ -710,7 +748,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 		}
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (!pi) {
-			pi = kmalloc(sizeof(*pi), GFP_NOFS);
+			pi = kzalloc(sizeof(*pi), GFP_NOFS);
 			if (!pi) {
 				err = -ENOMEM;
 				goto bad;
@@ -720,6 +758,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 		}
 		__decode_pool(p, pi);
 	}
+	if (version >= 5 && __decode_pool_names(p, end, map) < 0)
+		goto bad;
 
 	/* old_pool */
 	ceph_decode_32_safe(p, end, len, bad);
@@ -728,10 +768,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 
 		ceph_decode_32_safe(p, end, pool, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
-		if (pi) {
-			rb_erase(&pi->node, &map->pg_pools);
-			kfree(pi);
-		}
+		if (pi)
+			__remove_pg_pool(&map->pg_pools, pi);
 	}
 
 	/* new_up */
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 1fb55af..8bc9f1e 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -23,6 +23,7 @@ struct ceph_pg_pool_info {
 	int id;
 	struct ceph_pg_pool v;
 	int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
+	char *name;
 };
 
 struct ceph_pg_mapping {
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 26ac8b8..a1fc1d0 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -11,8 +11,10 @@
 /*
  * osdmap encoding versions
  */
-#define CEPH_OSDMAP_INC_VERSION 4
-#define CEPH_OSDMAP_VERSION     4
+#define CEPH_OSDMAP_INC_VERSION     5
+#define CEPH_OSDMAP_INC_VERSION_EXT 5
+#define CEPH_OSDMAP_VERSION         5
+#define CEPH_OSDMAP_VERSION_EXT     5
 
 /*
  * fs id
-- 
cgit v1.1


From 80e60639f1b7c121a7fea53920c5a4b94009361a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Mar 2010 13:51:05 -0400
Subject: NFSv4: Fall back to ordinary lookup if nfs4_atomic_open() returns
 EISDIR

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c6f2750..be46f26 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1025,12 +1025,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
 			case -ENOTDIR:
 				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				goto out;
-- 
cgit v1.1


From 1544fa0f7a46241582abc48f07b74f3d846379e4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Mar 2010 13:54:49 -0400
Subject: NFS: Fix the mode calculation in nfs_find_open_context

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 737128f..50a56ed 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -623,10 +623,10 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 	list_for_each_entry(pos, &nfsi->open_files, list) {
 		if (cred != NULL && pos->cred != cred)
 			continue;
-		if ((pos->mode & mode) == mode) {
-			ctx = get_nfs_open_context(pos);
-			break;
-		}
+		if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
+			continue;
+		ctx = get_nfs_open_context(pos);
+		break;
 	}
 	spin_unlock(&inode->i_lock);
 	return ctx;
-- 
cgit v1.1


From b80c3cb628f0ebc241b02e38dd028969fb8026a2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 9 Apr 2010 19:07:07 -0400
Subject: NFS: Ensure that writeback_single_inode() calls write_inode() when
 syncing

Since writeback_single_inode() checks the inode->i_state flags _before_ it
flushes out the data, we need to ensure that the I_DIRTY_DATASYNC flag is
already set. Otherwise we risk not seeing a call to write_inode(), which
again means that we break fsync() et al...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53ff70e..7f40ea3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -421,6 +421,7 @@ static void
 nfs_mark_request_dirty(struct nfs_page *req)
 {
 	__set_page_dirty_nobuffers(req->wb_page);
+	__mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC);
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -660,6 +661,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	req = nfs_setup_write_request(ctx, page, offset, count);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
+	nfs_mark_request_dirty(req);
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
@@ -739,8 +741,6 @@ int nfs_updatepage(struct file *file, struct page *page,
 	status = nfs_writepage_setup(ctx, page, offset, count);
 	if (status < 0)
 		nfs_set_pageerror(page);
-	else
-		__set_page_dirty_nobuffers(page);
 
 	dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
 			status, (long long)i_size_read(inode));
-- 
cgit v1.1


From a6305ddb080fb483ca41ca56cacb6f96089f0c8e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 9 Apr 2010 19:07:08 -0400
Subject: NFS: Fix a race with the new commit code

This patch fixes a race which occurs due to the fact that we release the
PG_writeback flag while still holding the nfs_page locked.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7f40ea3..40297c4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -201,6 +201,7 @@ static int nfs_set_page_writeback(struct page *page)
 		struct inode *inode = page->mapping->host;
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
+		page_cache_get(page);
 		if (atomic_long_inc_return(&nfss->writeback) >
 				NFS_CONGESTION_ON_THRESH) {
 			set_bdi_congested(&nfss->backing_dev_info,
@@ -216,6 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
 	end_page_writeback(page);
+	page_cache_release(page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
@@ -665,6 +667,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
 	nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+	nfs_mark_request_dirty(req);
 	nfs_clear_page_tag_locked(req);
 	return 0;
 }
@@ -749,13 +752,12 @@ int nfs_updatepage(struct file *file, struct page *page,
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
+	struct page *page = req->wb_page;
 
-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
-		nfs_end_page_writeback(req->wb_page);
+	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req))
 		nfs_inode_remove_request(req);
-	} else
-		nfs_end_page_writeback(req->wb_page);
 	nfs_clear_page_tag_locked(req);
+	nfs_end_page_writeback(page);
 }
 
 static int flush_task_priority(int how)
@@ -847,9 +849,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
  */
 static void nfs_redirty_request(struct nfs_page *req)
 {
+	struct page *page = req->wb_page;
+
 	nfs_mark_request_dirty(req);
-	nfs_end_page_writeback(req->wb_page);
 	nfs_clear_page_tag_locked(req);
+	nfs_end_page_writeback(page);
 }
 
 /*
@@ -1084,16 +1088,15 @@ static void nfs_writeback_release_full(void *calldata)
 		if (nfs_write_need_commit(data)) {
 			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req);
-			nfs_end_page_writeback(page);
 			dprintk(" marked for commit\n");
 			goto next;
 		}
 		dprintk(" OK\n");
 remove_request:
-		nfs_end_page_writeback(page);
 		nfs_inode_remove_request(req);
 	next:
 		nfs_clear_page_tag_locked(req);
+		nfs_end_page_writeback(page);
 	}
 	nfs_writedata_release(calldata);
 }
-- 
cgit v1.1


From 2c61be0a9478258f77b66208a0c4b1f5f8161c3c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 9 Apr 2010 19:54:50 -0400
Subject: NFS: Ensure that the WRITE and COMMIT RPC calls are always
 uninterruptible

We always want to ensure that WRITE and COMMIT completes, whether or not
the user presses ^C. Do this by making the call asynchronous, and allowing
the user to do an interruptible wait for rpc_task completion.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 40297c4..de38d63 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -781,7 +781,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 		int how)
 {
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
-	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
 	struct rpc_task *task;
 	struct rpc_message msg = {
@@ -796,9 +795,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 		.callback_ops = call_ops,
 		.callback_data = data,
 		.workqueue = nfsiod_workqueue,
-		.flags = flags,
+		.flags = RPC_TASK_ASYNC,
 		.priority = priority,
 	};
+	int ret = 0;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -837,10 +837,18 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
 		(unsigned long long)data->args.offset);
 
 	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task))
-		return PTR_ERR(task);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+		goto out;
+	}
+	if (how & FLUSH_SYNC) {
+		ret = rpc_wait_for_completion_task(task);
+		if (ret == 0)
+			ret = task->tk_status;
+	}
 	rpc_put_task(task);
-	return 0;
+out:
+	return ret;
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1210,7 +1218,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 {
 	struct nfs_page *first = nfs_list_entry(head->next);
 	struct inode *inode = first->wb_context->path.dentry->d_inode;
-	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
 	struct rpc_task *task;
 	struct rpc_message msg = {
@@ -1225,7 +1232,7 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 		.callback_ops = &nfs_commit_ops,
 		.callback_data = data,
 		.workqueue = nfsiod_workqueue,
-		.flags = flags,
+		.flags = RPC_TASK_ASYNC,
 		.priority = priority,
 	};
 
@@ -1255,6 +1262,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
+	if (how & FLUSH_SYNC)
+		rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
 	return 0;
 }
-- 
cgit v1.1


From be3bd2223b89d270853302ab0a5909fa875fd831 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 12 Apr 2010 01:51:03 +0900
Subject: nilfs2: fix typo "numer" -> "number" in alloc.c

Fixes the typo found in a warning message of a persistent object
allocator function.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 3f959f1..c2a1387 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -425,7 +425,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
 	bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
 	if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
 				    group_offset, bitmap))
-		printk(KERN_WARNING "%s: entry numer %llu already freed\n",
+		printk(KERN_WARNING "%s: entry number %llu already freed\n",
 		       __func__, (unsigned long long)req->pr_entry_nr);
 
 	nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
-- 
cgit v1.1


From 0df5dd4aae211edeeeb84f7f84f6d093406d7c22 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 11 Apr 2010 16:48:44 -0400
Subject: NFSv4: fix delegated locking

Arnaud Giersch reports that NFSv4 locking is broken when we hold a
delegation since commit 8e469ebd6dc32cbaf620e134d79f740bf0ebab79 (NFSv4:
Don't allow posix locking against servers that don't support it).

According to Arnaud, the lock succeeds the first time he opens the file
(since we cannot do a delegated open) but then fails after we start using
delegated opens.

The following patch fixes it by ensuring that locking behaviour is
governed by a per-filesystem capability flag that is initially set, but
gets cleared if the server ever returns an OPEN without the
NFS4_OPEN_RESULT_LOCKTYPE_POSIX flag being set.

Reported-by: Arnaud Giersch <arnaud.giersch@iut-bm.univ-fcomte.fr>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/client.c   | 3 ++-
 fs/nfs/nfs4proc.c | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2a3d352..a8766c4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1294,7 +1294,8 @@ static int nfs4_init_server(struct nfs_server *server,
 
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
-	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
+	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
+		NFS_CAP_POSIX_LOCK;
 	server->options = data->options;
 
 	/* Get a client record */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fe0cd9e..6380670 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1523,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		nfs_post_op_update_inode(dir, o_res->dir_attr);
 	} else
 		nfs_refresh_inode(dir, o_res->dir_attr);
+	if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
+		server->caps &= ~NFS_CAP_POSIX_LOCK;
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(data);
 		if (status != 0)
@@ -1664,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
 	status = PTR_ERR(state);
 	if (IS_ERR(state))
 		goto err_opendata_put;
-	if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0)
+	if (server->caps & NFS_CAP_POSIX_LOCK)
 		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
-- 
cgit v1.1


From fc7683a3c30c22131b1651271d6bf9ea113b77c5 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 26 Mar 2010 19:29:54 +0300
Subject: ext2: symlink must be handled via filesystem specific operation

generic setattr implementation is no longer responsible for
quota transfer so synlinks must be handled via ext2_setattr.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/symlink.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 4e2426e..565cf81 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -32,6 +32,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
+	.setattr	= ext2_setattr,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
@@ -43,6 +44,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
 const struct inode_operations ext2_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= ext2_follow_link,
+	.setattr	= ext2_setattr,
 #ifdef CONFIG_EXT2_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-- 
cgit v1.1


From 774f03fb2cf89951b5f5f363b7739a2835d5924e Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 26 Mar 2010 19:29:55 +0300
Subject: ext3: symlink must be handled via filesystem specific operation

generic setattr implementation is no longer responsible for
quota transfer so synlinks must be handled via ext3_setattr.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/symlink.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index ff7b4cc..7c48982 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -34,6 +34,7 @@ const struct inode_operations ext3_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
+	.setattr	= ext3_setattr,
 #ifdef CONFIG_EXT3_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
@@ -45,6 +46,7 @@ const struct inode_operations ext3_symlink_inode_operations = {
 const struct inode_operations ext3_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= ext3_follow_link,
+	.setattr	= ext3_setattr,
 #ifdef CONFIG_EXT3_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-- 
cgit v1.1


From 4c5e6c0e70fd6ca2fa67184fd36a261b3b7b38d0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 6 Apr 2010 18:52:47 +0200
Subject: quota: Hide warnings about writes to the filesystem before quota was
 turned on

For a root filesystem write to the filesystem before quota is turned on happens
regularly and there's no way around it because of writes to syslog, /etc/mtab,
and similar. So the warning is rather pointless for ordinary users. It's
still useful during development so we just hide the warning behind
__DQUOT_PARANOIA config option.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index e0b870f..f9a3751 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -874,14 +874,18 @@ static int dqinit_needed(struct inode *inode, int type)
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct inode *inode, *old_inode = NULL;
+#ifdef __DQUOT_PARANOIA
 	int reserved = 0;
+#endif
 
 	spin_lock(&inode_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
 			continue;
+#ifdef __DQUOT_PARANOIA
 		if (unlikely(inode_get_rsv_space(inode) > 0))
 			reserved = 1;
+#endif
 		if (!atomic_read(&inode->i_writecount))
 			continue;
 		if (!dqinit_needed(inode, type))
@@ -903,11 +907,13 @@ static void add_dquot_ref(struct super_block *sb, int type)
 	spin_unlock(&inode_lock);
 	iput(old_inode);
 
+#ifdef __DQUOT_PARANOIA
 	if (reserved) {
 		printk(KERN_WARNING "VFS (%s): Writes happened before quota"
 			" was turned on thus quota information is probably "
 			"inconsistent. Please run quotacheck(8).\n", sb->s_id);
 	}
+#endif
 }
 
 /*
-- 
cgit v1.1


From 08261673cb6dc638c39f44d69b76fffb57b92a8b Mon Sep 17 00:00:00 2001
From: Andrew Perepechko <andrew.perepechko@sun.com>
Date: Mon, 12 Apr 2010 22:16:50 +0400
Subject: quota: Fix possible dq_flags corruption

dq_flags are modified non-atomically in do_set_dqblk via __set_bit calls and
atomically for example in mark_dquot_dirty or clear_dquot_dirty.  Hence a
change done by an atomic operation can be overwritten by a change done by a
non-atomic one. Fix the problem by using atomic bitops even in do_set_dqblk.

Signed-off-by: Andrew Perepechko <andrew.perepechko@sun.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f9a3751..a0a9405 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2328,34 +2328,34 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 	if (di->dqb_valid & QIF_SPACE) {
 		dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
 		check_blim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_BLIMITS) {
 		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
 		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
 		check_blim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_INODES) {
 		dm->dqb_curinodes = di->dqb_curinodes;
 		check_ilim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_ILIMITS) {
 		dm->dqb_isoftlimit = di->dqb_isoftlimit;
 		dm->dqb_ihardlimit = di->dqb_ihardlimit;
 		check_ilim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_BTIME) {
 		dm->dqb_btime = di->dqb_btime;
 		check_blim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_ITIME) {
 		dm->dqb_itime = di->dqb_itime;
 		check_ilim = 1;
-		__set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+		set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
 	}
 
 	if (check_blim) {
-- 
cgit v1.1


From f5b066287c74b624583b993395a65d03a6487b3a Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 12 Apr 2010 14:24:28 -0700
Subject: ceph: fix dentry reference leak in dcache readdir

When filldir returned an error (e.g. buffer full for a large directory),
we would leak a dentry reference, causing an oops on umount.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index aed8fda..7505b4f 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -170,11 +170,11 @@ more:
 	spin_lock(&inode->i_lock);
 	spin_lock(&dcache_lock);
 
+	last = dentry;
+
 	if (err < 0)
 		goto out_unlock;
 
-	last = dentry;
-
 	p = p->prev;
 	filp->f_pos++;
 
-- 
cgit v1.1


From 032d8f7268444a0f5d4ee02d9513d682d5b8edfc Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 13 Apr 2010 17:46:37 +0200
Subject: [LogFS] Prevent memory corruption on large deletes

Removing sufficiently large files would create aliases for a large
number of segments.  This in turn results in a large number of journal
entries and an overflow of s_je_array.

Cheap fix is to add a BUG_ON, turning memory corruption into something
annoying, but less dangerous.  Real fix is to count the number of
affected segments and prevent the problem completely.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/gc.c        |  8 ++++++++
 fs/logfs/journal.c   |  3 +++
 fs/logfs/logfs.h     |  8 +++++++-
 fs/logfs/readwrite.c | 14 ++++++++++++++
 fs/logfs/super.c     |  2 ++
 5 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 92949f9..e8253e7 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -458,6 +458,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target)
 	struct logfs_block *block;
 	int round, progress, last_progress = 0;
 
+	/*
+	 * Doing too many changes to the segfile at once would result
+	 * in a large number of aliases.  Write the journal before
+	 * things get out of hand.
+	 */
+	if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
+		logfs_write_anchor(sb);
+
 	if (no_free_segments(sb) >= target &&
 			super->s_no_object_aliases < MAX_OBJ_ALIASES)
 		return;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index d57c7b0..2c22a4a 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -493,6 +493,8 @@ static void account_shadows(struct super_block *sb)
 
 	btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
 	btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
+	btree_grim_visitor32(&tree->segment_map, 0, NULL);
+	tree->no_shadowed_segments = 0;
 
 	if (li->li_block) {
 		/*
@@ -660,6 +662,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
 	if (ofs < 0)
 		return ofs;
 	logfs_buf_write(area, ofs, super->s_compressed_je, len);
+	BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES);
 	super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
 	return 0;
 }
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 97195b9..c9929ee 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -257,10 +257,14 @@ struct logfs_shadow {
  * struct shadow_tree
  * @new:			shadows where old_ofs==0, indexed by new_ofs
  * @old:			shadows where old_ofs!=0, indexed by old_ofs
+ * @segment_map:		bitfield of segments containing shadows
+ * @no_shadowed_segment:	number of segments containing shadows
  */
 struct shadow_tree {
 	struct btree_head64 new;
 	struct btree_head64 old;
+	struct btree_head32 segment_map;
+	int no_shadowed_segments;
 };
 
 struct object_alias_item {
@@ -311,6 +315,8 @@ struct logfs_block_ops {
 			write_alias_t *write_one_alias);
 };
 
+#define MAX_JOURNAL_ENTRIES 256
+
 struct logfs_super {
 	struct mtd_info *s_mtd;			/* underlying device */
 	struct block_device *s_bdev;		/* underlying device */
@@ -377,7 +383,7 @@ struct logfs_super {
 	u32	 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
 	u64	 s_last_version;
 	struct logfs_area *s_journal_area;	/* open journal segment */
-	__be64	s_je_array[64];
+	__be64	s_je_array[MAX_JOURNAL_ENTRIES];
 	int	s_no_je;
 
 	int	 s_sum_index;			/* for the 12 summaries */
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 3659c37..7e0c39c 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1219,6 +1219,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
 	mempool_free(shadow, super->s_shadow_pool);
 }
 
+static void mark_segment(struct shadow_tree *tree, u32 segno)
+{
+	int err;
+
+	if (!btree_lookup32(&tree->segment_map, segno)) {
+		err = btree_insert32(&tree->segment_map, segno, (void *)1,
+				GFP_NOFS);
+		BUG_ON(err);
+		tree->no_shadowed_segments++;
+	}
+}
+
 /**
  * fill_shadow_tree - Propagate shadow tree changes due to a write
  * @inode:	Inode owning the page
@@ -1266,6 +1278,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page,
 
 		super->s_dirty_used_bytes += shadow->new_len;
 		super->s_dirty_free_bytes += shadow->old_len;
+		mark_segment(tree, shadow->old_ofs >> super->s_segshift);
+		mark_segment(tree, shadow->new_ofs >> super->s_segshift);
 	}
 }
 
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 9d856c4..d6e1f4f 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -451,6 +451,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
 
 	btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
 	btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
+	btree_init_mempool32(&super->s_shadow_tree.segment_map,
+			super->s_btree_pool);
 
 	ret = logfs_init_mapping(sb);
 	if (ret)
-- 
cgit v1.1


From d3a03f8031000f8297823b80e36db536fd020884 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 13 Apr 2010 17:54:27 +0200
Subject: [LogFS] Plug 8 byte information leak

Within each journal segment, 8 bytes at offset 24 would remain
uninitialized.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/journal.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 2c22a4a..2957bfc 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -388,7 +388,10 @@ static void journal_get_erase_count(struct logfs_area *area)
 static int journal_erase_segment(struct logfs_area *area)
 {
 	struct super_block *sb = area->a_sb;
-	struct logfs_segment_header sh;
+	union {
+		struct logfs_segment_header sh;
+		unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)];
+	} u;
 	u64 ofs;
 	int err;
 
@@ -396,20 +399,21 @@ static int journal_erase_segment(struct logfs_area *area)
 	if (err)
 		return err;
 
-	sh.pad = 0;
-	sh.type = SEG_JOURNAL;
-	sh.level = 0;
-	sh.segno = cpu_to_be32(area->a_segno);
-	sh.ec = cpu_to_be32(area->a_erase_count);
-	sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
-	sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+	memset(&u, 0, sizeof(u));
+	u.sh.pad = 0;
+	u.sh.type = SEG_JOURNAL;
+	u.sh.level = 0;
+	u.sh.segno = cpu_to_be32(area->a_segno);
+	u.sh.ec = cpu_to_be32(area->a_erase_count);
+	u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+	u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4);
 
 	/* This causes a bug in segment.c.  Not yet. */
 	//logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
 
 	ofs = dev_ofs(sb, area->a_segno, 0);
-	area->a_used_bytes = ALIGN(sizeof(sh), 16);
-	logfs_buf_write(area, ofs, &sh, sizeof(sh));
+	area->a_used_bytes = sizeof(u);
+	logfs_buf_write(area, ofs, &u, sizeof(u));
 	return 0;
 }
 
-- 
cgit v1.1


From ead88af5f577fd2b399a0fcdfe52605116fac489 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 13 Apr 2010 17:57:21 +0200
Subject: [LogFS] Move assertion

The assertion is valid independently of the condition.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/journal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 2957bfc..fd44eee 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -612,9 +612,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
 	if (len == 0)
 		return logfs_write_header(super, header, 0, type);
 
+	BUG_ON(len > sb->s_blocksize);
 	compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
 	if (compr_len < 0 || type == JE_ANCHOR) {
-		BUG_ON(len > sb->s_blocksize);
 		memcpy(data, buf, len);
 		compr_len = len;
 		compr = COMPR_NONE;
-- 
cgit v1.1


From fc837c8f0446b73a1661339db406c0238dd1d184 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 13 Apr 2010 11:41:22 -0700
Subject: ceph: queue_cap_snap should always queue dirty context

This simplifies the calling convention, and fixes a bug where we queue a
capsnap with a context other than i_head_snapc (the one that matches the
dirty pages).  The result was a BUG at fs/ceph/caps.c:2178 on writeback
completion when a capsnap matching the writeback snapc could not be found.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/snap.c  | 16 +++++++---------
 fs/ceph/super.h |  3 +--
 2 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 7e3e5f9..d197431 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -430,8 +430,7 @@ static int dup_array(u64 **dst, __le64 *src, int num)
  * Caller must hold snap_rwsem for read (i.e., the realm topology won't
  * change).
  */
-void ceph_queue_cap_snap(struct ceph_inode_info *ci,
-			 struct ceph_snap_context *snapc)
+void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 {
 	struct inode *inode = &ci->vfs_inode;
 	struct ceph_cap_snap *capsnap;
@@ -450,10 +449,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 		   as no new writes are allowed to start when pending, so any
 		   writes in progress now were started before the previous
 		   cap_snap.  lucky us. */
-		dout("queue_cap_snap %p snapc %p seq %llu used %d"
-		     " already pending\n", inode, snapc, snapc->seq, used);
+		dout("queue_cap_snap %p already pending\n", inode);
 		kfree(capsnap);
 	} else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) {
+		struct ceph_snap_context *snapc = ci->i_head_snapc;
+
 		igrab(inode);
 
 		atomic_set(&capsnap->nref, 1);
@@ -462,7 +462,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 		INIT_LIST_HEAD(&capsnap->flushing_item);
 
 		capsnap->follows = snapc->seq - 1;
-		capsnap->context = ceph_get_snap_context(snapc);
 		capsnap->issued = __ceph_caps_issued(ci, NULL);
 		capsnap->dirty = __ceph_caps_dirty(ci);
 
@@ -479,7 +478,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 		   snapshot. */
 		capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
 		ci->i_wrbuffer_ref_head = 0;
-		ceph_put_snap_context(ci->i_head_snapc);
+		capsnap->context = snapc;
 		ci->i_head_snapc = NULL;
 		list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
 
@@ -603,7 +602,7 @@ more:
 				if (lastinode)
 					iput(lastinode);
 				lastinode = inode;
-				ceph_queue_cap_snap(ci, realm->cached_context);
+				ceph_queue_cap_snap(ci);
 				spin_lock(&realm->inodes_with_caps_lock);
 			}
 			spin_unlock(&realm->inodes_with_caps_lock);
@@ -825,8 +824,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			spin_unlock(&realm->inodes_with_caps_lock);
 			spin_unlock(&inode->i_lock);
 
-			ceph_queue_cap_snap(ci,
-					    ci->i_snap_realm->cached_context);
+			ceph_queue_cap_snap(ci);
 
 			iput(inode);
 			continue;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 65d1203..4c07aca 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -714,8 +714,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m,
 extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			     struct ceph_mds_session *session,
 			     struct ceph_msg *msg);
-extern void ceph_queue_cap_snap(struct ceph_inode_info *ci,
-				struct ceph_snap_context *snapc);
+extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
 extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 				  struct ceph_cap_snap *capsnap);
 extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
-- 
cgit v1.1


From e1e4dd0caa63e166afa46a1ccc947bebb4f66bcf Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Tue, 13 Apr 2010 11:45:56 -0700
Subject: ceph: reserve one more caps space when doing readdir

We were missing space for the directory cap.  The result was a BUG at
fs/ceph/caps.c:2178.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7505b4f..159f588 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -311,7 +311,7 @@ more:
 		req->r_readdir_offset = fi->next_offset;
 		req->r_args.readdir.frag = cpu_to_le32(frag);
 		req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
-		req->r_num_caps = max_entries;
+		req->r_num_caps = max_entries + 1;
 		err = ceph_mdsc_do_request(mdsc, NULL, req);
 		if (err < 0) {
 			ceph_mdsc_put_request(req);
-- 
cgit v1.1


From a6a5349d17f2a5c37079826f1a1474c3d08c6b53 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 13 Apr 2010 14:07:07 -0700
Subject: ceph: use separate class for ceph sockets' sk_lock

Use a separate class for ceph sockets to prevent lockdep confusion.
Because ceph sockets only get passed kernel pointers, there is no
dependency from sk_lock -> mmap_sem.  If we share the same class as other
sockets, lockdep detects a circular dependency from

	mmap_sem (page fault) -> fs mutex -> sk_lock -> mmap_sem

because dependencies are noted from both ceph and user contexts.  Using
a separate class prevents the sk_lock(ceph) -> mmap_sem dependency and
makes lockdep happy.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/messenger.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index f35b494..5c75d5d 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -29,6 +29,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG;
 static char tag_ack = CEPH_MSGR_TAG_ACK;
 static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
 
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key socket_class;
+#endif
+
 
 static void queue_con(struct ceph_connection *con);
 static void con_work(struct work_struct *);
@@ -227,6 +231,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
 	con->sock = sock;
 	sock->sk->sk_allocation = GFP_NOFS;
 
+#ifdef CONFIG_LOCKDEP
+	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
+#endif
+
 	set_sock_callbacks(sock, con);
 
 	dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
-- 
cgit v1.1


From 1f1b0008e8dd1930d6e89522c70f4a438374302a Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Thu, 15 Apr 2010 08:03:57 +0200
Subject: [LogFS] Prevent mempool_destroy NULL pointer dereference

It would probably be better to just accept NULL pointers in
mempool_destroy().  But for the current -rc series let's keep things
simple.

This patch was lost in the cracks for a while.
Kevin Cernekee <cernekee@gmail.com> had to rediscover the problem and
send a similar patch because of it. :(

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/logfs.h     | 6 ++++++
 fs/logfs/readwrite.c | 6 ++----
 fs/logfs/segment.c   | 2 +-
 fs/logfs/super.c     | 4 ++--
 4 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index c9929ee..0a3df1a 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -727,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb,
 	return logfs_super(sb)->s_area[(__force u8)gc_level];
 }
 
+static inline void logfs_mempool_destroy(mempool_t *pool)
+{
+	if (pool)
+		mempool_destroy(pool);
+}
+
 #endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 7e0c39c..aca6c56 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2243,8 +2243,6 @@ void logfs_cleanup_rw(struct super_block *sb)
 	struct logfs_super *super = logfs_super(sb);
 
 	destroy_meta_inode(super->s_segfile_inode);
-	if (super->s_block_pool)
-		mempool_destroy(super->s_block_pool);
-	if (super->s_shadow_pool)
-		mempool_destroy(super->s_shadow_pool);
+	logfs_mempool_destroy(super->s_block_pool);
+	logfs_mempool_destroy(super->s_shadow_pool);
 }
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 02db22e..8c82fe0 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -912,7 +912,7 @@ err:
 	for (i--; i >= 0; i--)
 		free_area(super->s_area[i]);
 	free_area(super->s_journal_area);
-	mempool_destroy(super->s_alias_pool);
+	logfs_mempool_destroy(super->s_alias_pool);
 	return -ENOMEM;
 }
 
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index d6e1f4f..d4531eb 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -517,8 +517,8 @@ static void logfs_kill_sb(struct super_block *sb)
 	if (super->s_erase_page)
 		__free_page(super->s_erase_page);
 	super->s_devops->put_device(sb);
-	mempool_destroy(super->s_btree_pool);
-	mempool_destroy(super->s_alias_pool);
+	logfs_mempool_destroy(super->s_btree_pool);
+	logfs_mempool_destroy(super->s_alias_pool);
 	kfree(super);
 	log_super("LogFS: Finished unmounting\n");
 }
-- 
cgit v1.1


From 79681842e160c3211eeeb47ea31b061038d1e41e Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Fri, 16 Apr 2010 13:59:25 +0800
Subject: ocfs2: Reset status if we want to restart file extension.

In __ocfs2_extend_allocation, we will restart our file extension
if ((!status) && restart_func). But there is a bug that the
status is still left as -EGAIN. This is really an old bug,
but it is masked by the return value of ocfs2_journal_dirty.
So it show up when we make ocfs2_journal_dirty void.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2b4235c..20e0ee5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -684,6 +684,7 @@ restarted_transaction:
 		if (why == RESTART_META) {
 			mlog(0, "restarting function.\n");
 			restart_func = 1;
+			status = 0;
 		} else {
 			BUG_ON(why != RESTART_TRANS);
 
-- 
cgit v1.1


From 2b0b39517d1af5294128dbc2fd7ed39c8effa540 Mon Sep 17 00:00:00 2001
From: Bill Pemberton <wfp5p@virginia.edu>
Date: Fri, 16 Apr 2010 08:01:20 -0500
Subject: jfs: fix diAllocExt error in resizing filesystem

Resizing the filesystem would result in an diAllocExt error in some
instances because changes in bmp->db_agsize would not get noticed if
goto extendBmap was called.

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: jfs-discussion@lists.sourceforge.net
Cc: linux-kernel@vger.kernel.org
---
 fs/jfs/resize.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 7f24a0b..1aba003 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	struct inode *iplist[1];
 	struct jfs_superblock *j_sb, *j_sb2;
 	uint old_agsize;
+	int agsizechanged = 0;
 	struct buffer_head *bh, *bh2;
 
 	/* If the volume hasn't grown, get out now */
@@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	 */
 	if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
 		goto error_out;
+
+	agsizechanged |= (bmp->db_agsize != old_agsize);
+
 	/*
 	 * the map now has extended to cover additional nblocks:
 	 * dn_mapsize = oldMapsize + nblocks;
@@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	 * will correctly identify the new ag);
 	 */
 	/* if new AG size the same as old AG size, done! */
-	if (bmp->db_agsize != old_agsize) {
+	if (agsizechanged) {
 		if ((rc = diExtendFS(ipimap, ipbmap)))
 			goto error_out;
 
-- 
cgit v1.1


From c7f2e1f0ac142a657a1de00d404e1c8345b20598 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 16 Apr 2010 08:05:50 -0500
Subject: jfs: add jfs specific ->setattr call

generic setattr not longer responsible for quota transfer.
use jfs_setattr for all jfs's inodes.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
---
 fs/jfs/inode.c     |  2 +-
 fs/jfs/jfs_inode.h |  1 +
 fs/jfs/namei.c     |  4 ++--
 fs/jfs/symlink.c   | 14 +++++++++++++-
 4 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b2ae190..97cd119 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -60,7 +60,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
 			inode->i_op = &page_symlink_inode_operations;
 			inode->i_mapping->a_ops = &jfs_aops;
 		} else {
-			inode->i_op = &jfs_symlink_inode_operations;
+			inode->i_op = &jfs_fast_symlink_inode_operations;
 			/*
 			 * The inline data should be null-terminated, but
 			 * don't let on-disk corruption crash the kernel
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 1eff7db..f8b56b2 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -47,5 +47,6 @@ extern const struct file_operations jfs_dir_operations;
 extern const struct inode_operations jfs_file_inode_operations;
 extern const struct file_operations jfs_file_operations;
 extern const struct inode_operations jfs_symlink_inode_operations;
+extern const struct inode_operations jfs_fast_symlink_inode_operations;
 extern const struct dentry_operations jfs_ci_dentry_operations;
 #endif				/* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index c79a427..114e600 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -946,7 +946,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	 */
 
 	if (ssize <= IDATASIZE) {
-		ip->i_op = &jfs_symlink_inode_operations;
+		ip->i_op = &jfs_fast_symlink_inode_operations;
 
 		i_fastsymlink = JFS_IP(ip)->i_inline;
 		memcpy(i_fastsymlink, name, ssize);
@@ -968,7 +968,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 	else {
 		jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
 
-		ip->i_op = &page_symlink_inode_operations;
+		ip->i_op = &jfs_symlink_inode_operations;
 		ip->i_mapping->a_ops = &jfs_aops;
 
 		/*
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 4af1a05..205b946 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	return NULL;
 }
 
-const struct inode_operations jfs_symlink_inode_operations = {
+const struct inode_operations jfs_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= jfs_follow_link,
+	.setattr	= jfs_setattr,
+	.setxattr	= jfs_setxattr,
+	.getxattr	= jfs_getxattr,
+	.listxattr	= jfs_listxattr,
+	.removexattr	= jfs_removexattr,
+};
+
+const struct inode_operations jfs_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+	.put_link	= page_put_link,
+	.setattr	= jfs_setattr,
 	.setxattr	= jfs_setxattr,
 	.getxattr	= jfs_getxattr,
 	.listxattr	= jfs_listxattr,
-- 
cgit v1.1


From b6f8dd49dbdbfa60a33bba3d4b766fe341109b4b Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 13 Apr 2010 15:06:44 +1000
Subject: xfs: ensure that sync updates the log tail correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updates to the VFS layer removed an extra ->sync_fs call into the
filesystem during the sync process (from the quota code).
Unfortunately the sync code was unknowingly relying on this call to
make sure metadata buffers were flushed via a xfs_buftarg_flush()
call to move the tail of the log forward in memory before the final
transactions of the sync process were issued.

As a result, the old code would write a very recent log tail value
to the log by the end of the sync process, and so a subsequent crash
would leave nothing for log recovery to do. Hence in qa test 182,
log recovery only replayed a small handle for inode fsync
transactions in this case.

However, with the removal of the extra ->sync_fs call, the log tail
was now not moved forward with the inode fsync transactions near the
end of the sync procese the first (and only) buftarg flush occurred
after these transactions went to disk. The result is that log
recovery now sees a large number of transactions for metadata that
is already on disk.

This usually isn't a problem, but when the transactions include
inode chunk allocation, the inode create transactions and all
subsequent changes are replayed as we cannt rely on what is on disk
is valid. As a result, if the inode was written and contains
unlogged changes, the unlogged changes are lost, thereby violating
sync semantics.

The fix is to always issue a transaction after the buftarg flush
occurs is the log iѕ not idle or covered. This results in a dummy
transaction being written that contains the up-to-date log tail
value, which will be very recent. Indeed, it will be at least as
recent as the old code would have left on disk, so log recovery
will behave exactly as it used to in this situation.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/xfs_log.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e8fba92..2be0191 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -745,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t	*mp,
 
 /*
  * Determine if we have a transaction that has gone to disk
- * that needs to be covered. Log activity needs to be idle (no AIL and
- * nothing in the iclogs). And, we need to be in the right state indicating
- * something has gone out.
+ * that needs to be covered. To begin the transition to the idle state
+ * firstly the log needs to be idle (no AIL and nothing in the iclogs).
+ * If we are then in a state where covering is needed, the caller is informed
+ * that dummy transactions are required to move the log into the idle state.
+ *
+ * Because this is called as part of the sync process, we should also indicate
+ * that dummy transactions should be issued in anything but the covered or
+ * idle states. This ensures that the log tail is accurately reflected in
+ * the log at the end of the sync, hence if a crash occurrs avoids replay
+ * of transactions where the metadata is already on disk.
  */
 int
 xfs_log_need_covered(xfs_mount_t *mp)
@@ -759,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp)
 		return 0;
 
 	spin_lock(&log->l_icloglock);
-	if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
-		(log->l_covered_state == XLOG_STATE_COVER_NEED2))
-			&& !xfs_trans_ail_tail(log->l_ailp)
-			&& xlog_iclogs_empty(log)) {
-		if (log->l_covered_state == XLOG_STATE_COVER_NEED)
-			log->l_covered_state = XLOG_STATE_COVER_DONE;
-		else {
-			ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2);
-			log->l_covered_state = XLOG_STATE_COVER_DONE2;
+	switch (log->l_covered_state) {
+	case XLOG_STATE_COVER_DONE:
+	case XLOG_STATE_COVER_DONE2:
+	case XLOG_STATE_COVER_IDLE:
+		break;
+	case XLOG_STATE_COVER_NEED:
+	case XLOG_STATE_COVER_NEED2:
+		if (!xfs_trans_ail_tail(log->l_ailp) &&
+		    xlog_iclogs_empty(log)) {
+			if (log->l_covered_state == XLOG_STATE_COVER_NEED)
+				log->l_covered_state = XLOG_STATE_COVER_DONE;
+			else
+				log->l_covered_state = XLOG_STATE_COVER_DONE2;
 		}
+		/* FALLTHRU */
+	default:
 		needed = 1;
+		break;
 	}
 	spin_unlock(&log->l_icloglock);
 	return needed;
-- 
cgit v1.1


From f1d486a3617a2f620b31224e4ace1496c4627e39 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 13 Apr 2010 15:06:45 +1000
Subject: xfs: don't warn on EAGAIN in inode reclaim

Any inode reclaim flush that returns EAGAIN will result in the inode
reclaim being attempted again later. There is no need to issue a
warning into the logs about this situation.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_sync.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 05cd853..fd96982 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -820,10 +820,10 @@ xfs_reclaim_inode(
 	 * call into reclaim to find it in a clean state instead of waiting for
 	 * it now. We also don't return errors here - if the error is transient
 	 * then the next reclaim pass will flush the inode, and if the error
-	 * is permanent then the next sync reclaim will relcaim the inode and
+	 * is permanent then the next sync reclaim will reclaim the inode and
 	 * pass on the error.
 	 */
-	if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		xfs_fs_cmn_err(CE_WARN, ip->i_mount,
 			"inode 0x%llx background reclaim flush failed with %d",
 			(long long)ip->i_ino, error);
-- 
cgit v1.1


From b8639077abf034824046ed09e779b74c4393031f Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Sat, 17 Apr 2010 19:54:27 +0200
Subject: [LogFS] Set s_bdi

Since 32a88aa1 sync() was turned into a NOP for logfs.  Worse, sync()
would not return an error, giving the illusion that writeout had
actually happened.

Afaics jffs2 was broken as well.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/super.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index d4531eb..dacce3a 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -11,6 +11,7 @@
  */
 #include "logfs.h"
 #include <linux/bio.h>
+#include <linux/blkdev.h>
 #include <linux/mtd/mtd.h>
 #include <linux/statfs.h>
 #include <linux/buffer_head.h>
@@ -136,6 +137,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
 	sb->s_fs_info = super;
 	sb->s_mtd = super->s_mtd;
 	sb->s_bdev = super->s_bdev;
+	if (sb->s_bdev)
+		sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
+	if (sb->s_mtd)
+		sb->s_bdi = sb->s_mtd->backing_dev_info;
 	return 0;
 }
 
-- 
cgit v1.1


From 3a60a1686f0d51c99bd0df8ac93050fb6dfce647 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Mon, 22 Mar 2010 00:41:35 -0500
Subject: eCryptfs: Decrypt symlink target for stat size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a getattr handler for eCryptfs symlinks that is capable of
reading the lower target and decrypting its path.  Prior to this patch,
a stat's st_size field would represent the strlen of the encrypted path,
while readlink() would return the strlen of the decrypted path.  This
could lead to confusion in some userspace applications, since the two
values should be equal.

https://bugs.launchpad.net/bugs/524919

Reported-by: Loïc Minier <loic.minier@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/inode.c | 100 +++++++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ddbd096..605f514 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -648,38 +648,17 @@ out_lock:
 	return rc;
 }
 
-static int
-ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf,
+				   size_t *bufsiz)
 {
+	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	char *lower_buf;
-	size_t lower_bufsiz;
-	struct dentry *lower_dentry;
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
-	char *plaintext_name;
-	size_t plaintext_name_size;
+	size_t lower_bufsiz = PATH_MAX;
 	mm_segment_t old_fs;
 	int rc;
 
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (!lower_dentry->d_inode->i_op->readlink) {
-		rc = -EINVAL;
-		goto out;
-	}
-	mount_crypt_stat = &ecryptfs_superblock_to_private(
-						dentry->d_sb)->mount_crypt_stat;
-	/*
-	 * If the lower filename is encrypted, it will result in a significantly
-	 * longer name.  If needed, truncate the name after decode and decrypt.
-	 */
-	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
-		lower_bufsiz = PATH_MAX;
-	else
-		lower_bufsiz = bufsiz;
-	/* Released in this function */
 	lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
-	if (lower_buf == NULL) {
-		printk(KERN_ERR "%s: Out of memory whilst attempting to "
-		       "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
+	if (!lower_buf) {
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -689,29 +668,31 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
 						   (char __user *)lower_buf,
 						   lower_bufsiz);
 	set_fs(old_fs);
-	if (rc >= 0) {
-		rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
-							  &plaintext_name_size,
-							  dentry, lower_buf,
-							  rc);
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to decode and "
-			       "decrypt filename; rc = [%d]\n", __func__,
-				rc);
-			goto out_free_lower_buf;
-		}
-		/* Check for bufsiz <= 0 done in sys_readlinkat() */
-		rc = copy_to_user(buf, plaintext_name,
-				  min((size_t) bufsiz, plaintext_name_size));
-		if (rc)
-			rc = -EFAULT;
-		else
-			rc = plaintext_name_size;
-		kfree(plaintext_name);
-		fsstack_copy_attr_atime(dentry->d_inode, lower_dentry->d_inode);
-	}
-out_free_lower_buf:
+	if (rc < 0)
+		goto out;
+	lower_bufsiz = rc;
+	rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry,
+						  lower_buf, lower_bufsiz);
+out:
 	kfree(lower_buf);
+	return rc;
+}
+
+static int
+ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+	char *kbuf;
+	size_t kbufsiz, copied;
+	int rc;
+
+	rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz);
+	if (rc)
+		goto out;
+	copied = min_t(size_t, bufsiz, kbufsiz);
+	rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied;
+	kfree(kbuf);
+	fsstack_copy_attr_atime(dentry->d_inode,
+				ecryptfs_dentry_to_lower(dentry)->d_inode);
 out:
 	return rc;
 }
@@ -1016,6 +997,28 @@ out:
 	return rc;
 }
 
+int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
+			  struct kstat *stat)
+{
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+	int rc = 0;
+
+	mount_crypt_stat = &ecryptfs_superblock_to_private(
+						dentry->d_sb)->mount_crypt_stat;
+	generic_fillattr(dentry->d_inode, stat);
+	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
+		char *target;
+		size_t targetsiz;
+
+		rc = ecryptfs_readlink_lower(dentry, &target, &targetsiz);
+		if (!rc) {
+			kfree(target);
+			stat->size = targetsiz;
+		}
+	}
+	return rc;
+}
+
 int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		     struct kstat *stat)
 {
@@ -1133,6 +1136,7 @@ const struct inode_operations ecryptfs_symlink_iops = {
 	.put_link = ecryptfs_put_link,
 	.permission = ecryptfs_permission,
 	.setattr = ecryptfs_setattr,
+	.getattr = ecryptfs_getattr_link,
 	.setxattr = ecryptfs_setxattr,
 	.getxattr = ecryptfs_getxattr,
 	.listxattr = ecryptfs_listxattr,
-- 
cgit v1.1


From cfce08c6bdfb20ade979284e55001ca1f100ed51 Mon Sep 17 00:00:00 2001
From: Christian Pulvermacher <pulvermacher@gmx.de>
Date: Tue, 23 Mar 2010 11:51:38 -0500
Subject: ecryptfs: fix error code for missing xattrs in lower fs

If the lower file system driver has extended attributes disabled,
ecryptfs' own access functions return -ENOSYS instead of -EOPNOTSUPP.
This breaks execution of programs in the ecryptfs mount, since the
kernel expects the latter error when checking for security
capabilities in xattrs.

Signed-off-by: Christian Pulvermacher <pulvermacher@gmx.de>
Cc: stable@kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 605f514..efd4f47d5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1043,7 +1043,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->setxattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1061,7 +1061,7 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
 	int rc = 0;
 
 	if (!lower_dentry->d_inode->i_op->getxattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1088,7 +1088,7 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->listxattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
@@ -1105,7 +1105,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
 
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->removexattr) {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto out;
 	}
 	mutex_lock(&lower_dentry->d_inode->i_mutex);
-- 
cgit v1.1


From 133b8f9d632cc23715c6d72d1c5ac449e054a12a Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@jeffreymahoney.com>
Date: Fri, 19 Mar 2010 15:35:46 -0400
Subject: ecryptfs: fix use with tmpfs by removing d_drop from
 ecryptfs_destroy_inode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since tmpfs has no persistent storage, it pins all its dentries in memory
so they have d_count=1 when other file systems would have d_count=0.
->lookup is only used to create new dentries. If the caller doesn't
instantiate it, it's freed immediately at dput(). ->readdir reads
directly from the dcache and depends on the dentries being hashed.

When an ecryptfs mount is mounted, it associates the lower file and dentry
with the ecryptfs files as they're accessed. When it's umounted and
destroys all the in-memory ecryptfs inodes, it fput's the lower_files and
d_drop's the lower_dentries. Commit 4981e081 added this and a d_delete in
2008 and several months later commit caeeeecf removed the d_delete. I
believe the d_drop() needs to be removed as well.

The d_drop effectively hides any file that has been accessed via ecryptfs
from the underlying tmpfs since it depends on it being hashed for it to
be accessible. I've removed the d_drop on my development node and see no
ill effects with basic testing on both tmpfs and persistent storage.

As a side effect, after ecryptfs d_drops the dentries on tmpfs, tmpfs
BUGs on umount. This is due to the dentries being unhashed.
tmpfs->kill_sb is kill_litter_super which calls d_genocide to drop
the reference pinning the dentry. It skips unhashed and negative dentries,
but shrink_dcache_for_umount_subtree doesn't. Since those dentries
still have an elevated d_count, we get a BUG().

This patch removes the d_drop call and fixes both issues.

This issue was reported at:
https://bugzilla.novell.com/show_bug.cgi?id=567887

Reported-by:  Árpád Bíró <biroa@demasz.hu>
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Dustin Kirkland <kirkland@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/super.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index b15a43a..1a037f7 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -85,7 +85,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
 		if (lower_dentry->d_inode) {
 			fput(inode_info->lower_file);
 			inode_info->lower_file = NULL;
-			d_drop(lower_dentry);
 		}
 	}
 	ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
-- 
cgit v1.1


From 3a8380c0754a7972668a46f645930910e304095c Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Tue, 23 Mar 2010 18:09:02 -0500
Subject: eCryptfs: Copy lower directory inode times and size on link

The timestamps and size of a lower inode involved in a link() call was
being copied to the upper parent inode.  Instead, we should be
copying lower parent inode's timestamps and size to the upper parent
inode.  I discovered this bug using the POSIX test suite at Tuxera.

Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index efd4f47d5..15b4248 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -456,8 +456,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
 	rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0);
 	if (rc)
 		goto out_lock;
-	fsstack_copy_attr_times(dir, lower_new_dentry->d_inode);
-	fsstack_copy_inode_size(dir, lower_new_dentry->d_inode);
+	fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
+	fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
 	old_dentry->d_inode->i_nlink =
 		ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink;
 	i_size_write(new_dentry->d_inode, file_size_save);
-- 
cgit v1.1


From 9f37622f897a90ad3c3da5c14d94d8f3ffc62b70 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Thu, 25 Mar 2010 11:16:56 -0500
Subject: eCryptfs: Turn lower lookup error messages into debug messages

Vaugue warnings about ENAMETOOLONG errors when looking up an encrypted
file name have caused many users to become concerned about their data.
Since this is a rather harmless condition, I'm moving this warning to
only be printed when the ecryptfs_verbosity module param is 1.

Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 15b4248..b980fce 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -388,9 +388,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
-		       "lower_dentry = [%s]\n", __func__, rc,
-		       ecryptfs_dentry->d_name.name);
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+				"[%d] on lower_dentry = [%s]\n", __func__, rc,
+				encrypted_and_encoded_name);
 		goto out_d_drop;
 	}
 	if (lower_dentry->d_inode)
@@ -417,9 +417,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
-		       "lower_dentry = [%s]\n", __func__, rc,
-		       encrypted_and_encoded_name);
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+				"[%d] on lower_dentry = [%s]\n", __func__, rc,
+				encrypted_and_encoded_name);
 		goto out_d_drop;
 	}
 lookup_and_interpose:
-- 
cgit v1.1


From 62af9b520513d78484f22f874916dfacbc889ce0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 19 Apr 2010 16:47:20 +0200
Subject: quota: Convert __DQUOT_PARANOIA symbol to standard config option

Make __DQUOT_PARANOIA define from the old days a standard config option
and turn it off by default.

This gets rid of a quota warning about writes before quota is turned on
for systems with ext4 root filesystem. Currently there's no way to legally
solve this because /etc/mtab has to be written before quota is turned on
on most systems.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/Kconfig |  8 ++++++++
 fs/quota/dquot.c | 16 +++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index dad7fb2..3e21b1e 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -33,6 +33,14 @@ config PRINT_QUOTA_WARNING
 	  Note that this behavior is currently deprecated and may go away in
 	  future. Please use notification via netlink socket instead.
 
+config QUOTA_DEBUG
+	bool "Additional quota sanity checks"
+	depends on QUOTA
+	default n
+	help
+	  If you say Y here, quota subsystem will perform some additional
+	  sanity checks of quota internal structures. If unsure, say N.
+
 # Generic support for tree structured quota files. Selected when needed.
 config QUOTA_TREE
 	 tristate
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a0a9405..788b580 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -80,8 +80,6 @@
 
 #include <asm/uaccess.h>
 
-#define __DQUOT_PARANOIA
-
 /*
  * There are three quota SMP locks. dq_list_lock protects all lists with quotas
  * and quota formats, dqstats structure containing statistics about the lists
@@ -695,7 +693,7 @@ void dqput(struct dquot *dquot)
 
 	if (!dquot)
 		return;
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	if (!atomic_read(&dquot->dq_count)) {
 		printk("VFS: dqput: trying to free free dquot\n");
 		printk("VFS: device %s, dquot of %s %d\n",
@@ -748,7 +746,7 @@ we_slept:
 		goto we_slept;
 	}
 	atomic_dec(&dquot->dq_count);
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	/* sanity check */
 	BUG_ON(!list_empty(&dquot->dq_free));
 #endif
@@ -845,7 +843,7 @@ we_slept:
 		dquot = NULL;
 		goto out;
 	}
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	BUG_ON(!dquot->dq_sb);	/* Has somebody invalidated entry under us? */
 #endif
 out:
@@ -874,7 +872,7 @@ static int dqinit_needed(struct inode *inode, int type)
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct inode *inode, *old_inode = NULL;
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	int reserved = 0;
 #endif
 
@@ -882,7 +880,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
 			continue;
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 		if (unlikely(inode_get_rsv_space(inode) > 0))
 			reserved = 1;
 #endif
@@ -907,7 +905,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 	spin_unlock(&inode_lock);
 	iput(old_inode);
 
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 	if (reserved) {
 		printk(KERN_WARNING "VFS (%s): Writes happened before quota"
 			" was turned on thus quota information is probably "
@@ -940,7 +938,7 @@ static int remove_inode_dquot_ref(struct inode *inode, int type,
 	inode->i_dquot[type] = NULL;
 	if (dquot) {
 		if (dqput_blocks(dquot)) {
-#ifdef __DQUOT_PARANOIA
+#ifdef CONFIG_QUOTA_DEBUG
 			if (atomic_read(&dquot->dq_count) != 1)
 				printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count));
 #endif
-- 
cgit v1.1


From b6349ac89eacb813f6963f7263da05bc3f483351 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Tue, 20 Apr 2010 21:44:10 +0200
Subject: [LogFS] Split large truncated into smaller chunks

Truncate would do an almost limitless amount of work without invoking
the garbage collector in between.  Split it up into more manageable,
though still large, chunks.

Signed-off-by: Joern Engel <joern@logfs.org>
---
 fs/logfs/readwrite.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index aca6c56..7e3a1e5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1837,19 +1837,37 @@ static int __logfs_truncate(struct inode *inode, u64 size)
 	return logfs_truncate_direct(inode, size);
 }
 
-int logfs_truncate(struct inode *inode, u64 size)
+/*
+ * Truncate, by changing the segment file, can consume a fair amount
+ * of resources.  So back off from time to time and do some GC.
+ * 8 or 2048 blocks should be well within safety limits even if
+ * every single block resided in a different segment.
+ */
+#define TRUNCATE_STEP	(8 * 1024 * 1024)
+int logfs_truncate(struct inode *inode, u64 target)
 {
 	struct super_block *sb = inode->i_sb;
-	int err;
+	u64 size = i_size_read(inode);
+	int err = 0;
 
-	logfs_get_wblocks(sb, NULL, 1);
-	err = __logfs_truncate(inode, size);
-	if (!err)
-		err = __logfs_write_inode(inode, 0);
-	logfs_put_wblocks(sb, NULL, 1);
+	size = ALIGN(size, TRUNCATE_STEP);
+	while (size > target) {
+		if (size > TRUNCATE_STEP)
+			size -= TRUNCATE_STEP;
+		else
+			size = 0;
+		if (size < target)
+			size = target;
+
+		logfs_get_wblocks(sb, NULL, 1);
+		err = __logfs_truncate(inode, target);
+		if (!err)
+			err = __logfs_write_inode(inode, 0);
+		logfs_put_wblocks(sb, NULL, 1);
+	}
 
 	if (!err)
-		err = vmtruncate(inode, size);
+		err = vmtruncate(inode, target);
 
 	/* I don't trust error recovery yet. */
 	WARN_ON(err);
-- 
cgit v1.1


From b90f687018e6d6c77d981b09203780f7001407e5 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 20 Apr 2010 16:51:59 -0400
Subject: ext4: Issue the discard operation *before* releasing the blocks to be
 reused

Otherwise, we can end up having data corruption because the blocks
could get reused and then discarded!

https://bugzilla.kernel.org/show_bug.cgi?id=15579

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 54df209..e5ab41b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2534,6 +2534,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
 			 entry->count, entry->group, entry);
 
+		if (test_opt(sb, DISCARD)) {
+			ext4_fsblk_t discard_block;
+
+			discard_block = entry->start_blk +
+				ext4_group_first_block_no(sb, entry->group);
+			trace_ext4_discard_blocks(sb,
+					(unsigned long long)discard_block,
+					entry->count);
+			sb_issue_discard(sb, discard_block, entry->count);
+		}
+
 		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
 		/* we expect to find existing buddy because it's pinned */
 		BUG_ON(err != 0);
@@ -2555,16 +2566,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 			page_cache_release(e4b.bd_bitmap_page);
 		}
 		ext4_unlock_group(sb, entry->group);
-		if (test_opt(sb, DISCARD)) {
-			ext4_fsblk_t discard_block;
-
-			discard_block = entry->start_blk +
-				ext4_group_first_block_no(sb, entry->group);
-			trace_ext4_discard_blocks(sb,
-					(unsigned long long)discard_block,
-					entry->count);
-			sb_issue_discard(sb, discard_block, entry->count);
-		}
 		kmem_cache_free(ext4_free_ext_cachep, entry);
 		ext4_mb_release_desc(&e4b);
 	}
-- 
cgit v1.1


From d7dfee3f5db5575b1d838744559c3c9bb351f74f Mon Sep 17 00:00:00 2001
From: Jun Sun <jsun@junsun.net>
Date: Thu, 31 Dec 2009 17:28:52 -0800
Subject: uclinux: error message when FLAT reloc symbol is invalid, v2

This patch fixes a cosmetic error in printk. Text segment and data/bss
segment are allocated from two different areas. It is not meaningful to
give the diff between them in the error reporting messages.

Signed-off-by: Jun Sun <jsun@junsun.net>
Signed-off-by: Greg Ungerer <gerg@uclinux.org>
---
 fs/binfmt_flat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e0e769b..49566c1 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
 
 	if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
 		printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)",
-		       (int) r,(int)(start_brk-start_code),(int)text_len);
+		       (int) r,(int)(start_brk-start_data+text_len),(int)text_len);
 		goto failed;
 	}
 
-- 
cgit v1.1


From 083fd8b21a13742b37ab347089c73f895a896672 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 21 Apr 2010 12:01:23 +0100
Subject: AFS: Don't pass error value to page_cache_release() in error handling

In the error handling in afs_mntpt_do_automount(), we pass an error
pointer to page_cache_release() if read_mapping_page() failed.  Instead,
we should extend the gotos around the error handling we don't need.

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/mntpt.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5e813a8..b3feddc 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 {
 	struct afs_super_info *super;
 	struct vfsmount *mnt;
-	struct page *page = NULL;
+	struct page *page;
 	size_t size;
-	char *buf, *devname = NULL, *options = NULL;
+	char *buf, *devname, *options;
 	int ret;
 
 	_enter("{%s}", mntpt->d_name.name);
@@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	ret = -EINVAL;
 	size = mntpt->d_inode->i_size;
 	if (size > PAGE_SIZE - 1)
-		goto error;
+		goto error_no_devname;
 
 	ret = -ENOMEM;
 	devname = (char *) get_zeroed_page(GFP_KERNEL);
 	if (!devname)
-		goto error;
+		goto error_no_devname;
 
 	options = (char *) get_zeroed_page(GFP_KERNEL);
 	if (!options)
-		goto error;
+		goto error_no_options;
 
 	/* read the contents of the AFS special symlink */
 	page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
 	if (IS_ERR(page)) {
 		ret = PTR_ERR(page);
-		goto error;
+		goto error_no_page;
 	}
 
 	ret = -EIO;
@@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	return mnt;
 
 error:
-	if (page)
-		page_cache_release(page);
-	if (devname)
-		free_page((unsigned long) devname);
-	if (options)
-		free_page((unsigned long) options);
+	page_cache_release(page);
+error_no_page:
+	free_page((unsigned long) options);
+error_no_options:
+	free_page((unsigned long) devname);
+error_no_devname:
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
-- 
cgit v1.1


From 0ed07ddb56d1348e5ce33f3b8de20d730351983a Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 11:42:00 +0200
Subject: 9p: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/9p/v9fs.c      | 10 ++++++++++
 fs/9p/v9fs.h      |  2 ++
 fs/9p/vfs_super.c |  1 +
 3 files changed, 13 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5c5bc84..f8b86e9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
+	if (rc) {
+		__putname(v9ses->aname);
+		__putname(v9ses->uname);
+		return ERR_PTR(rc);
+	}
+
 	spin_lock(&v9fs_sessionlist_lock);
 	list_add(&v9ses->slist, &v9fs_sessionlist);
 	spin_unlock(&v9fs_sessionlist_lock);
@@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	return fid;
 
 error:
+	bdi_destroy(&v9ses->bdi);
 	return ERR_PTR(retval);
 }
 
@@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 	__putname(v9ses->uname);
 	__putname(v9ses->aname);
 
+	bdi_destroy(&v9ses->bdi);
+
 	spin_lock(&v9fs_sessionlist_lock);
 	list_del(&v9ses->slist);
 	spin_unlock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a0a8d3d..bec4d0b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,7 @@
  *  Boston, MA  02111-1301  USA
  *
  */
+#include <linux/backing-dev.h>
 
 /**
  * enum p9_session_flags - option flags for each 9P session
@@ -102,6 +103,7 @@ struct v9fs_session_info {
 	u32 uid;		/* if ACCESS_SINGLE, the uid that has access */
 	struct p9_client *clnt;	/* 9p client */
 	struct list_head slist; /* list of sessions registered with v9fs */
+	struct backing_dev_info bdi;
 };
 
 struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 491108b..806da5d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 	sb->s_blocksize = 1 << sb->s_blocksize_bits;
 	sb->s_magic = V9FS_MAGIC;
 	sb->s_op = &v9fs_super_ops;
+	sb->s_bdi = &v9ses->bdi;
 
 	sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
 	    MS_NOATIME;
-- 
cgit v1.1


From e1da0222753a2322d76c97fc02396fb83143c7ac Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 11:58:18 +0200
Subject: afs: add bdi backing to mount session.

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/afs/internal.h | 2 ++
 fs/afs/super.c    | 1 +
 fs/afs/volume.c   | 7 +++++++
 3 files changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c54dad4..a10f258 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 #include <linux/fscache.h>
+#include <linux/backing-dev.h>
 
 #include "afs.h"
 #include "afs_vl.h"
@@ -313,6 +314,7 @@ struct afs_volume {
 	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
 	struct afs_server	*servers[8];	/* servers on which volume resides (ordered) */
 	struct rw_semaphore	server_sem;	/* lock for accessing current server */
+	struct backing_dev_info	bdi;
 };
 
 /*
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 14f6431..e932e5a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
 	sb->s_magic		= AFS_FS_MAGIC;
 	sb->s_op		= &afs_super_ops;
 	sb->s_fs_info		= as;
+	sb->s_bdi		= &as->volume->bdi;
 
 	/* allocate the root inode and dentry */
 	fid.vid		= as->volume->vid;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index a353e69..401eeb2 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
 	volume->cell		= params->cell;
 	volume->vid		= vlocation->vldb.vid[params->type];
 
+	ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
+	if (ret)
+		goto error_bdi;
+
 	init_rwsem(&volume->server_sem);
 
 	/* look up all the applicable server records */
@@ -151,6 +155,8 @@ error:
 	return ERR_PTR(ret);
 
 error_discard:
+	bdi_destroy(&volume->bdi);
+error_bdi:
 	up_write(&params->cell->vl_sem);
 
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume)
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
 		afs_put_server(volume->servers[loop]);
 
+	bdi_destroy(&volume->bdi);
 	kfree(volume);
 
 	_leave(" [destroyed]");
-- 
cgit v1.1


From 8044f7f468469c80031611206d554f86fcdfe704 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:09:48 +0200
Subject: cifs: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/cifs/cifs_fs_sb.h |  3 +++
 fs/cifs/cifsfs.c     | 10 ++++++++++
 2 files changed, 13 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 4797787..246a167 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -18,6 +18,8 @@
 #ifndef _CIFS_FS_SB_H
 #define _CIFS_FS_SB_H
 
+#include <linux/backing-dev.h>
+
 #define CIFS_MOUNT_NO_PERM      1 /* do not do client vfs_perm check */
 #define CIFS_MOUNT_SET_UID      2 /* set current's euid in create etc. */
 #define CIFS_MOUNT_SERVER_INUM  4 /* inode numbers from uniqueid from server  */
@@ -50,5 +52,6 @@ struct cifs_sb_info {
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	char   *mountdata; /* mount options received at mount time */
 #endif
+	struct backing_dev_info bdi;
 };
 #endif				/* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ded66be..ad235d6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data,
 	if (cifs_sb == NULL)
 		return -ENOMEM;
 
+	rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
+	if (rc) {
+		kfree(cifs_sb);
+		return rc;
+	}
+
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	/* copy mount params to sb for use in submounts */
 	/* BB: should we move this after the mount so we
@@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data,
 		int len = strlen(data);
 		cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
 		if (cifs_sb->mountdata == NULL) {
+			bdi_destroy(&cifs_sb->bdi);
 			kfree(sb->s_fs_info);
 			sb->s_fs_info = NULL;
 			return -ENOMEM;
@@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data,
 
 	sb->s_magic = CIFS_MAGIC_NUMBER;
 	sb->s_op = &cifs_super_ops;
+	sb->s_bdi = &cifs_sb->bdi;
 /*	if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
 	    sb->s_blocksize =
 		cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
@@ -183,6 +191,7 @@ out_mount_failed:
 		}
 #endif
 		unload_nls(cifs_sb->local_nls);
+		bdi_destroy(&cifs_sb->bdi);
 		kfree(cifs_sb);
 	}
 	return rc;
@@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb)
 #endif
 
 	unload_nls(cifs_sb->local_nls);
+	bdi_destroy(&cifs_sb->bdi);
 	kfree(cifs_sb);
 
 	unlock_kernel();
-- 
cgit v1.1


From 5163d90076729413cb882d3dd5c3d3cfb5b9f035 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:12:40 +0200
Subject: coda: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/coda/inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index a1695dc..d97f993 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 		return -EBUSY;
 	}
 
+	error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
+	if (error)
+		goto bdi_err;
+
 	vc->vc_sb = sb;
 
 	sb->s_fs_info = vc;
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_blocksize_bits = 12;
 	sb->s_magic = CODA_SUPER_MAGIC;
 	sb->s_op = &coda_super_operations;
+	sb->s_bdi = &vc->bdi;
 
 	/* get root fid from Venus: this needs the root inode */
 	error = venus_rootfid(sb, &fid);
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
         return 0;
 
  error:
+	bdi_destroy(&vc->bdi);
+ bdi_err:
 	if (root)
 		iput(root);
 	if (vc)
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
 
 static void coda_put_super(struct super_block *sb)
 {
+	bdi_destroy(&coda_vcp(sb)->bdi);
 	coda_vcp(sb)->vc_sb = NULL;
 	sb->s_fs_info = NULL;
 
-- 
cgit v1.1


From 9df9c8b930156a2f9ce2b2ae66acb14bee2663f5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:22:04 +0200
Subject: ecryptfs: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/ecryptfs/ecryptfs_kernel.h |  2 ++
 fs/ecryptfs/main.c            | 10 +++++++++-
 fs/ecryptfs/super.c           |  1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index bc71154..bfc2e0f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -35,6 +35,7 @@
 #include <linux/scatterlist.h>
 #include <linux/hash.h>
 #include <linux/nsproxy.h>
+#include <linux/backing-dev.h>
 
 /* Version verification for shared data structures w/ userspace */
 #define ECRYPTFS_VERSION_MAJOR 0x00
@@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat {
 struct ecryptfs_sb_info {
 	struct super_block *wsi_sb;
 	struct ecryptfs_mount_crypt_stat mount_crypt_stat;
+	struct backing_dev_info bdi;
 };
 
 /* file private data. */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index af1a8f0..760983d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache;
 static int
 ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 {
+	struct ecryptfs_sb_info *esi;
 	int rc = 0;
 
 	/* Released in ecryptfs_put_super() */
 	ecryptfs_set_superblock_private(sb,
 					kmem_cache_zalloc(ecryptfs_sb_info_cache,
 							 GFP_KERNEL));
-	if (!ecryptfs_superblock_to_private(sb)) {
+	esi = ecryptfs_superblock_to_private(sb);
+	if (!esi) {
 		ecryptfs_printk(KERN_WARNING, "Out of memory\n");
 		rc = -ENOMEM;
 		goto out;
 	}
+
+	rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
+	if (rc)
+		goto out;
+
+	sb->s_bdi = &esi->bdi;
 	sb->s_op = &ecryptfs_sops;
 	/* Released through deactivate_super(sb) from get_sb_nodev */
 	sb->s_root = d_alloc(NULL, &(const struct qstr) {
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 278743c..0c0ae49 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -122,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb)
 	lock_kernel();
 
 	ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
+	bdi_destroy(&sb_info->bdi);
 	kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
 	ecryptfs_set_superblock_private(sb, NULL);
 
-- 
cgit v1.1


From b3d0ab7e60d1865bb6f6a79a77aaba22f2543236 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:26:04 +0200
Subject: exofs: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/exofs/exofs.h | 2 ++
 fs/exofs/super.c | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 8442e35..5437327 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -35,6 +35,7 @@
 
 #include <linux/fs.h>
 #include <linux/time.h>
+#include <linux/backing-dev.h>
 #include "common.h"
 
 /* FIXME: Remove once pnfs hits mainline
@@ -92,6 +93,7 @@ struct exofs_sb_info {
 	struct exofs_layout	layout;		/* Default files layout,
 						 * contains the variable osd_dev
 						 * array. Keep last */
+	struct backing_dev_info bdi;
 	struct osd_dev	*_min_one_dev[1];	/* Place holder for one dev   */
 };
 
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 18e57ea..03149b9 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb)
 	_exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
 			    sbi->layout.s_pid);
 
+	bdi_destroy(&sbi->bdi);
 	exofs_free_sbi(sbi);
 	sb->s_fs_info = NULL;
 }
@@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sbi)
 		return -ENOMEM;
 
+	ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
+	if (ret)
+		goto free_bdi;
+
 	/* use mount options to fill superblock */
 	od = osduld_path_lookup(opts->dev_name);
 	if (IS_ERR(od)) {
@@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* set up operation vectors */
+	sb->s_bdi = &sbi->bdi;
 	sb->s_fs_info = sbi;
 	sb->s_op = &exofs_sops;
 	sb->s_export_op = &exofs_export_ops;
@@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 
 free_sbi:
+	bdi_destroy(&sbi->bdi);
+free_bdi:
 	EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
 		  opts->dev_name, sbi->layout.s_pid, ret);
 	exofs_free_sbi(sbi);
-- 
cgit v1.1


From f1970c73cbb6b884152207e4dfe90639f5029905 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:31:11 +0200
Subject: ncpfs: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/ncpfs/inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cf98da1..fa33851 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = NCP_SUPER_MAGIC;
 	sb->s_op = &ncp_sops;
+	sb->s_bdi = &server->bdi;
 
 	server = NCP_SBP(sb);
 	memset(server, 0, sizeof(*server));
 
+	error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
+	if (error)
+		goto out_bdi;
+
 	server->ncp_filp = ncp_filp;
 	server->ncp_sock = sock;
 	
@@ -719,6 +724,8 @@ out_fput2:
 	if (server->info_filp)
 		fput(server->info_filp);
 out_fput:
+	bdi_destroy(&server->bdi);
+out_bdi:
 	/* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
 	 * 
 	 * The previously used put_filp(ncp_filp); was bogous, since
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb)
 	kill_pid(server->m.wdog_pid, SIGTERM, 1);
 	put_pid(server->m.wdog_pid);
 
+	bdi_destroy(&server->bdi);
 	kfree(server->priv.data);
 	kfree(server->auth.object_name);
 	vfree(server->rxbuf);
-- 
cgit v1.1


From 424264b7b220e8eee165dc3080ae48692af73dec Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 22 Apr 2010 12:37:07 +0200
Subject: smbfs: add bdi backing to mount session

This ensures that dirty data gets flushed properly.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/smbfs/inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1c4c8f0..dfa1d67 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb)
 	if (server->conn_pid)
 		kill_pid(server->conn_pid, SIGTERM, 1);
 
+	bdi_destroy(&server->bdi);
 	kfree(server->ops);
 	smb_unload_nls(server);
 	sb->s_fs_info = NULL;
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
 	if (!server)
 		goto out_no_server;
 	sb->s_fs_info = server;
+	
+	if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
+		goto out_bdi;
+
+	sb->s_bdi = &server->bdi;
 
 	server->super_block = sb;
 	server->mnt = NULL;
@@ -624,6 +630,8 @@ out_no_smbiod:
 out_bad_option:
 	kfree(mem);
 out_no_mem:
+	bdi_destroy(&server->bdi);
+out_bdi:
 	if (!server->mnt)
 		printk(KERN_ERR "smb_fill_super: allocation failure\n");
 	sb->s_fs_info = NULL;
-- 
cgit v1.1


From 1f063d2cdf332a8a5722006b1345d15d16007c6e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 22 Apr 2010 15:35:55 -0400
Subject: NFSv4: Don't attempt an atomic open if the file is a mountpoint

Fix https://bugzilla.kernel.org/show_bug.cgi?id=15789

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index be46f26..fbb4cf7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1050,7 +1050,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct inode *dir;
 	int openflags, ret = 0;
 
-	if (!is_atomic_open(nd))
+	if (!is_atomic_open(nd) || d_mountpoint(dentry))
 		goto no_open;
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
-- 
cgit v1.1


From 356e76b855bdbfd8d1c5e75bcf0c6bf0dfe83496 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 22 Apr 2010 15:35:56 -0400
Subject: NFS: rsize and wsize settings ignored on v4 mounts

NFSv4 mounts ignore the rsize and wsize mount options, and always use
the default transfer size for both.  This seems to be because all
NFSv4 mounts are now cloned, and the cloning logic doesn't copy the
rsize and wsize settings from the parent nfs_server.

I tested Fedora's 2.6.32.11-99 and it seems to have this problem as
well, so I'm guessing that .33, .32, and perhaps older kernels have
this issue as well.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Stable <stable@kernel.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a8766c4..acc9c49 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -966,6 +966,8 @@ out_error:
 static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
 {
 	target->flags = source->flags;
+	target->rsize = source->rsize;
+	target->wsize = source->wsize;
 	target->acregmin = source->acregmin;
 	target->acregmax = source->acregmax;
 	target->acdirmin = source->acdirmin;
-- 
cgit v1.1


From cdd29ecfcb9554132cd94b82ae8b69ba37adb3b5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Apr 2010 15:35:56 -0400
Subject: nfs: testing for null instead of ERR_PTR()

nfs_path() returns an ERR_PTR(), it doesn't return null.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e016372..f9327bb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2657,7 +2657,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
 	devname = nfs_path(path->mnt->mnt_devname,
 			path->mnt->mnt_root, path->dentry,
 			page, PAGE_SIZE);
-	if (devname == NULL)
+	if (IS_ERR(devname))
 		goto out_freepage;
 	tmp = kstrdup(devname, GFP_KERNEL);
 	if (tmp == NULL)
-- 
cgit v1.1


From 71d0a6112a363e703e383ae5b12c492485c39701 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 22 Apr 2010 15:35:57 -0400
Subject: NFS: Fix an unstable write data integrity race

Commit 2c61be0a9478258f77b66208a0c4b1f5f8161c3c (NFS: Ensure that the WRITE
and COMMIT RPC calls are always uninterruptible) exposed a race on file
close. In order to ensure correct close-to-open behaviour, we want to wait
for all outstanding background commit operations to complete.

This patch adds an inode flag that indicates if a commit operation is under
way, and provides a mechanism to allow ->write_inode() to wait for its
completion if this is a data integrity flush.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de38d63..ccde2ae 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+{
+	if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
+		return 1;
+	if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
+				NFS_INO_COMMIT, nfs_wait_bit_killable,
+				TASK_KILLABLE))
+		return 1;
+	return 0;
+}
+
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+{
+	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+}
+
+
 static void nfs_commitdata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
@@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	if (how & FLUSH_SYNC)
-		rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
 	return 0;
 }
@@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 				BDI_RECLAIMABLE);
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(inode));
 	return -ENOMEM;
 }
 
@@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(data->inode));
 	nfs_commitdata_release(calldata);
 }
 
@@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
-	int res;
+	int may_wait = how & FLUSH_SYNC;
+	int res = 0;
 
+	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+		goto out;
 	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&inode->i_lock);
@@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
 		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
-	}
+		if (may_wait)
+			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+					nfs_wait_bit_killable,
+					TASK_KILLABLE);
+	} else
+		nfs_commit_clear_lock(NFS_I(inode));
+out:
 	return res;
 }
 
-- 
cgit v1.1


From 792590c72376649b4e315df386fd208b83db05b9 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Sun, 4 Apr 2010 22:20:58 +0100
Subject: squashfs: fix locking bug in zlib wrapper

Fix locking bug in zlib wrapper introduced by recent decompressor changes.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
---
 fs/squashfs/zlib_wrapper.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 15a03d0..7a60387 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
 		goto release_mutex;
 	}
 
+	length = stream->total_out;
 	mutex_unlock(&msblk->read_data_mutex);
-	return stream->total_out;
+	return length;
 
 release_mutex:
 	mutex_unlock(&msblk->read_data_mutex);
-- 
cgit v1.1


From 3a3076f4d6e2fa31338a0b007df42a3b32f079e0 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 23 Apr 2010 12:17:17 -0400
Subject: Cleanup generic block based fiemap

This cleans up a few of the complaints of __generic_block_fiemap.  I've
fixed all the typing stuff, used inline functions instead of macros,
gotten rid of a couple of variables, and made sure the size and block
requests are all block aligned.  It also fixes a problem where sometimes
FIEMAP_EXTENT_LAST wasn't being set properly.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ioctl.c | 92 ++++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 53 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c75110..7faefb4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 
 #ifdef CONFIG_BLOCK
 
-#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
-#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+	return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+	return (blk << inode->i_blkbits);
+}
 
 /**
  * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode - the inode to map
- * @arg - the pointer to userspace where we copy everything to
- * @get_block - the fs's get_block function
+ * @inode: the inode to map
+ * @fieinfo: the fiemap info struct that will be passed back to userspace
+ * @start: where to start mapping in the inode
+ * @len: how much space to map
+ * @get_block: the fs's get_block function
  *
  * This does FIEMAP for block based inodes.  Basically it will just loop
  * through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
  */
 
 int __generic_block_fiemap(struct inode *inode,
-			   struct fiemap_extent_info *fieinfo, u64 start,
-			   u64 len, get_block_t *get_block)
+			   struct fiemap_extent_info *fieinfo, loff_t start,
+			   loff_t len, get_block_t *get_block)
 {
-	struct buffer_head tmp;
-	unsigned long long start_blk;
-	long long length = 0, map_len = 0;
+	struct buffer_head map_bh;
+	sector_t start_blk, last_blk;
+	loff_t isize = i_size_read(inode);
 	u64 logical = 0, phys = 0, size = 0;
 	u32 flags = FIEMAP_EXTENT_MERGED;
-	int ret = 0, past_eof = 0, whole_file = 0;
+	bool past_eof = false, whole_file = false;
+	int ret = 0;
 
-	if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
 		return ret;
 
-	start_blk = logical_to_blk(inode, start);
-
-	length = (long long)min_t(u64, len, i_size_read(inode));
-	if (length < len)
-		whole_file = 1;
+	/*
+	 * Either the i_mutex or other appropriate locking needs to be held
+	 * since we expect isize to not change at all through the duration of
+	 * this call.
+	 */
+	if (len >= isize) {
+		whole_file = true;
+		len = isize;
+	}
 
-	map_len = length;
+	start_blk = logical_to_blk(inode, start);
+	last_blk = logical_to_blk(inode, start + len - 1);
 
 	do {
 		/*
 		 * we set b_size to the total size we want so it will map as
 		 * many contiguous blocks as possible at once
 		 */
-		memset(&tmp, 0, sizeof(struct buffer_head));
-		tmp.b_size = map_len;
+		memset(&map_bh, 0, sizeof(struct buffer_head));
+		map_bh.b_size = len;
 
-		ret = get_block(inode, start_blk, &tmp, 0);
+		ret = get_block(inode, start_blk, &map_bh, 0);
 		if (ret)
 			break;
 
 		/* HOLE */
-		if (!buffer_mapped(&tmp)) {
-			length -= blk_to_logical(inode, 1);
+		if (!buffer_mapped(&map_bh)) {
 			start_blk++;
 
 			/*
-			 * we want to handle the case where there is an
+			 * We want to handle the case where there is an
 			 * allocated block at the front of the file, and then
 			 * nothing but holes up to the end of the file properly,
 			 * to make sure that extent at the front gets properly
 			 * marked with FIEMAP_EXTENT_LAST
 			 */
 			if (!past_eof &&
-			    blk_to_logical(inode, start_blk) >=
-			    blk_to_logical(inode, 0)+i_size_read(inode))
+			    blk_to_logical(inode, start_blk) >= isize)
 				past_eof = 1;
 
 			/*
-			 * first hole after going past the EOF, this is our
+			 * First hole after going past the EOF, this is our
 			 * last extent
 			 */
 			if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
 							      flags);
-				break;
+			} else if (size) {
+				ret = fiemap_fill_next_extent(fieinfo, logical,
+							      phys, size, flags);
+				size = 0;
 			}
 
 			/* if we have holes up to/past EOF then we're done */
-			if (length <= 0 || past_eof)
+			if (start_blk > last_blk || past_eof || ret)
 				break;
 		} else {
 			/*
-			 * we have gone over the length of what we wanted to
+			 * We have gone over the length of what we wanted to
 			 * map, and it wasn't the entire file, so add the extent
 			 * we got last time and exit.
 			 *
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
 			 * are good to go, just add the extent to the fieinfo
 			 * and break
 			 */
-			if (length <= 0 && !whole_file) {
+			if (start_blk > last_blk && !whole_file) {
 				ret = fiemap_fill_next_extent(fieinfo, logical,
 							      phys, size,
 							      flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
 			}
 
 			logical = blk_to_logical(inode, start_blk);
-			phys = blk_to_logical(inode, tmp.b_blocknr);
-			size = tmp.b_size;
+			phys = blk_to_logical(inode, map_bh.b_blocknr);
+			size = map_bh.b_size;
 			flags = FIEMAP_EXTENT_MERGED;
 
-			length -= tmp.b_size;
 			start_blk += logical_to_blk(inode, size);
 
 			/*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
 			 * soon as we find a hole that the last extent we found
 			 * is marked with FIEMAP_EXTENT_LAST
 			 */
-			if (!past_eof &&
-			    logical+size >=
-			    blk_to_logical(inode, 0)+i_size_read(inode))
-				past_eof = 1;
+			if (!past_eof && logical + size >= isize)
+				past_eof = true;
 		}
 		cond_resched();
 	} while (1);
 
-	/* if ret is 1 then we just hit the end of the extent array */
+	/* If ret is 1 then we just hit the end of the extent array */
 	if (ret == 1)
 		ret = 0;
 
-- 
cgit v1.1


From d4cd1871cff68e188dadcf6d1280762522b643eb Mon Sep 17 00:00:00 2001
From: Li Dongyang <lidongyang@novell.com>
Date: Thu, 22 Apr 2010 16:11:19 +0800
Subject: ocfs2: add OCFS2_INODE_SKIP_ORPHAN_DIR flag and honor it in the inode
 wipe code

Currently in the error path of ocfs2_symlink and ocfs2_mknod, we just call
iput with the inode we failed with, but the inode wipe code will complain
because we don't add the inode to orphan dir. One solution would be to lock
the orphan dir during the entire transaction, but that's too heavy for a
rare error path. Instead, we add a flag, OCFS2_INODE_SKIP_ORPHAN_DIR which
tells the inode wipe code that it won't find this inode in the orphan dir.

[ Merge fixes and comment style cleanups -Mark ]

Signed-off-by: Li Dongyang <lidongyang@novell.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/inode.c | 65 +++++++++++++++++++++++++++++++-------------------------
 fs/ocfs2/inode.h |  2 ++
 fs/ocfs2/namei.c |  1 +
 3 files changed, 39 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb..2639920 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -639,11 +639,13 @@ static int ocfs2_remove_inode(struct inode *inode,
 		goto bail_unlock;
 	}
 
-	status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
-				  orphan_dir_bh);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail_commit;
+	if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
+		status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
+					  orphan_dir_bh);
+		if (status < 0) {
+			mlog_errno(status);
+			goto bail_commit;
+		}
 	}
 
 	/* set the inodes dtime */
@@ -726,34 +728,35 @@ static int ocfs2_wipe_inode(struct inode *inode,
 	struct inode *orphan_dir_inode = NULL;
 	struct buffer_head *orphan_dir_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_dinode *di;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
 
-	di = (struct ocfs2_dinode *) di_bh->b_data;
-	orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
+	if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
+		orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
 
-	status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
-	if (status)
-		return status;
+		status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
+		if (status)
+			return status;
 
-	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
-						       ORPHAN_DIR_SYSTEM_INODE,
-						       orphaned_slot);
-	if (!orphan_dir_inode) {
-		status = -EEXIST;
-		mlog_errno(status);
-		goto bail;
-	}
+		orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+							       ORPHAN_DIR_SYSTEM_INODE,
+							       orphaned_slot);
+		if (!orphan_dir_inode) {
+			status = -EEXIST;
+			mlog_errno(status);
+			goto bail;
+		}
 
-	/* Lock the orphan dir. The lock will be held for the entire
-	 * delete_inode operation. We do this now to avoid races with
-	 * recovery completion on other nodes. */
-	mutex_lock(&orphan_dir_inode->i_mutex);
-	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
-	if (status < 0) {
-		mutex_unlock(&orphan_dir_inode->i_mutex);
+		/* Lock the orphan dir. The lock will be held for the entire
+		 * delete_inode operation. We do this now to avoid races with
+		 * recovery completion on other nodes. */
+		mutex_lock(&orphan_dir_inode->i_mutex);
+		status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+		if (status < 0) {
+			mutex_unlock(&orphan_dir_inode->i_mutex);
 
-		mlog_errno(status);
-		goto bail;
+			mlog_errno(status);
+			goto bail;
+		}
 	}
 
 	/* we do this while holding the orphan dir lock because we
@@ -794,6 +797,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
 		mlog_errno(status);
 
 bail_unlock_dir:
+	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
+		return status;
+
 	ocfs2_inode_unlock(orphan_dir_inode, 1);
 	mutex_unlock(&orphan_dir_inode->i_mutex);
 	brelse(orphan_dir_bh);
@@ -889,7 +895,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 
 	/* Do some basic inode verification... */
 	di = (struct ocfs2_dinode *) di_bh->b_data;
-	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
+	    !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
 		/*
 		 * Inodes in the orphan dir must have ORPHANED_FL.  The only
 		 * inodes that come back out of the orphan dir are reflink
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07..0b28e19 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -100,6 +100,8 @@ struct ocfs2_inode_info
 #define OCFS2_INODE_MAYBE_ORPHANED	0x00000020
 /* Does someone have the file open O_DIRECT */
 #define OCFS2_INODE_OPEN_DIRECT		0x00000040
+/* Tell the inode wipe code it's not in orphan dir */
+#define OCFS2_INODE_SKIP_ORPHAN_DIR     0x00000080
 
 static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50a..ae315c9 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1976,6 +1976,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	}
 
 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
+	OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
 
 	/* Record which orphan dir our inode now resides
 	 * in. delete_inode will use this to determine which orphan
-- 
cgit v1.1


From ab41fdc8fdd9f0942430941c1e2b516fd481371d Mon Sep 17 00:00:00 2001
From: Li Dongyang <lidongyang@novell.com>
Date: Thu, 22 Apr 2010 16:11:25 +0800
Subject: ocfs2: use OCFS2_INODE_SKIP_ORPHAN_DIR in ocfs2_symlink error path

Mark the inode with flag OCFS2_INODE_SKIP_ORPHAN_DIR when we get an error
after allocating one, so that we can kill the inode.

Signed-off-by: Li Dongyang <lidongyang@novell.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/namei.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ae315c9..b66e488 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1811,6 +1811,7 @@ bail:
 	if (xattr_ac)
 		ocfs2_free_alloc_context(xattr_ac);
 	if ((status < 0) && inode) {
+		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
 		clear_nlink(inode);
 		iput(inode);
 	}
-- 
cgit v1.1


From 062d340384dcf77dfd8de0a082b5da571de3925a Mon Sep 17 00:00:00 2001
From: Li Dongyang <lidongyang@novell.com>
Date: Thu, 22 Apr 2010 16:11:29 +0800
Subject: ocfs2: use OCFS2_INODE_SKIP_ORPHAN_DIR in ocfs2_mknod error path

Mark the inode with flag OCFS2_INODE_SKIP_ORPHAN_DIR in ocfs2_mknod, so we
can kill the inode in case of error.

[ Fixed up comment style -Mark ]

Signed-off-by: Li Dongyang <lidongyang@novell.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/namei.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b66e488..8ff035e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -445,11 +445,6 @@ leave:
 
 	ocfs2_free_dir_lookup_result(&lookup);
 
-	if ((status < 0) && inode) {
-		clear_nlink(inode);
-		iput(inode);
-	}
-
 	if (inode_ac)
 		ocfs2_free_alloc_context(inode_ac);
 
@@ -459,6 +454,17 @@ leave:
 	if (meta_ac)
 		ocfs2_free_alloc_context(meta_ac);
 
+	/*
+	 * We should call iput after the i_mutex of the bitmap been
+	 * unlocked in ocfs2_free_alloc_context, or the
+	 * ocfs2_delete_inode will mutex_lock again.
+	 */
+	if ((status < 0) && inode) {
+		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
+		clear_nlink(inode);
+		iput(inode);
+	}
+
 	mlog_exit(status);
 
 	return status;
-- 
cgit v1.1


From a9743fcdc0eb43d028b71267438076e1b0112ba0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Fri, 23 Apr 2010 11:42:22 -0700
Subject: ocfs2: Add directory entry later in ocfs2_symlink() and ocfs2_mknod()

If we get a failure during creation of an inode we'll allow the orphan code
to remove the inode, which is correct. However, we need to ensure that we
don't get any errors after the call to ocfs2_add_entry(), otherwise we could
leave a dangling directory reference. The solution is simple - in both
cases, all I had to do was move ocfs2_dentry_attach_lock() above the
ocfs2_add_entry() call.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/ocfs2/namei.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8ff035e..4cbb18f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir,
 		}
 	}
 
-	status = ocfs2_add_entry(handle, dentry, inode,
-				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
-				 &lookup);
-	if (status < 0) {
+	/*
+	 * Do this before adding the entry to the directory. We add
+	 * also set d_op after success so that ->d_iput() will cleanup
+	 * the dentry lock even if ocfs2_add_entry() fails below.
+	 */
+	status = ocfs2_dentry_attach_lock(dentry, inode,
+					  OCFS2_I(dir)->ip_blkno);
+	if (status) {
 		mlog_errno(status);
 		goto leave;
 	}
+	dentry->d_op = &ocfs2_dentry_ops;
 
-	status = ocfs2_dentry_attach_lock(dentry, inode,
-					  OCFS2_I(dir)->ip_blkno);
-	if (status) {
+	status = ocfs2_add_entry(handle, dentry, inode,
+				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
+				 &lookup);
+	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
 	insert_inode_hash(inode);
-	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 	status = 0;
 leave:
@@ -1777,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir,
 		}
 	}
 
-	status = ocfs2_add_entry(handle, dentry, inode,
-				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
-				 &lookup);
-	if (status < 0) {
+	/*
+	 * Do this before adding the entry to the directory. We add
+	 * also set d_op after success so that ->d_iput() will cleanup
+	 * the dentry lock even if ocfs2_add_entry() fails below.
+	 */
+	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
+	if (status) {
 		mlog_errno(status);
 		goto bail;
 	}
+	dentry->d_op = &ocfs2_dentry_ops;
 
-	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
-	if (status) {
+	status = ocfs2_add_entry(handle, dentry, inode,
+				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
+				 &lookup);
+	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
 	}
 
 	insert_inode_hash(inode);
-	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 bail:
 	if (status < 0 && did_quota)
-- 
cgit v1.1


From 0350cb078f5035716ebdad4ad4709d02fe466a8a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Apr 2010 11:39:29 +0200
Subject: ocfs2: potential ERR_PTR dereference on error paths

If "handle" is non null at the end of the function then we assume it's a
valid pointer and pass it to ocfs2_commit_trans();

Signed-off-by: Dan Carpenter <error27@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index ab20790..23c254e 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -559,6 +559,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 		handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
 		if (IS_ERR(handle)) {
 			status = PTR_ERR(handle);
+			handle = NULL;
 			mlog_errno(status);
 			goto out;
 		}
-- 
cgit v1.1


From c21a534e2f24968cf74976a4e721ac194db30ded Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Wed, 21 Apr 2010 14:05:55 +0800
Subject: ocfs2: Update VFS inode's id info after reflink.

In reflink we update the id info on the disk but forgot to update
the corresponding information in the VFS inode.  Update them
accordingly when we want to preserve the attributes.

Reported-by: Jeff Liu <jeff.liu@oracle.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/refcounttree.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 29405f2..32a8ac5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4084,6 +4084,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
 	di->i_attr = s_di->i_attr;
 
 	if (preserve) {
+		t_inode->i_uid = s_inode->i_uid;
+		t_inode->i_gid = s_inode->i_gid;
+		t_inode->i_mode = s_inode->i_mode;
 		di->i_uid = s_di->i_uid;
 		di->i_gid = s_di->i_gid;
 		di->i_mode = s_di->i_mode;
-- 
cgit v1.1


From a36d515c7a2dfacebcf41729f6812dbc424ebcf0 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Fri, 23 Apr 2010 15:24:59 -0700
Subject: ocfs2_dlmfs: Fix math error when reading LVB.

When asked for a partial read of the LVB in a dlmfs file, we can
accidentally calculate a negative count.

Reported-by: Dan Carpenter <error27@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmfs/dlmfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index a99d1ea..b83d610 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
 	if ((count + *ppos) > i_size_read(inode))
 		readlen = i_size_read(inode) - *ppos;
 	else
-		readlen = count - *ppos;
+		readlen = count;
 
 	lvb_buf = kmalloc(readlen, GFP_NOFS);
 	if (!lvb_buf)
-- 
cgit v1.1


From cac36f707119b792b2396aed371d6b5cdc194890 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 23 Apr 2010 13:17:37 -0400
Subject: reiserfs: fix permissions on .reiserfs_priv

Commit 677c9b2e393a0cd203bd54e9c18b012b2c73305a ("reiserfs: remove
privroot hiding in lookup") removed the magic from the lookup code to hide
the .reiserfs_priv directory since it was getting loaded at mount-time
instead.  The intent was that the entry would be hidden from the user via
a poisoned d_compare, but this was faulty.

This introduced a security issue where unprivileged users could access and
modify extended attributes or ACLs belonging to other users, including
root.

This patch resolves the issue by properly hiding .reiserfs_priv.  This was
the intent of the xattr poisoning code, but it appears to have never
worked as expected.  This is fixed by using d_revalidate instead of
d_compare.

This patch makes -oexpose_privroot a no-op.  I'm fine leaving it this way.
The effort involved in working out the corner cases wrt permissions and
caching outweigh the benefit of the feature.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Acked-by: Edward Shishkin <edward.shishkin@gmail.com>
Reported-by: Matt McCutchen <matt@mattmccutchen.net>
Tested-by: Matt McCutchen <matt@mattmccutchen.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/dir.c   |  2 --
 fs/reiserfs/xattr.c | 17 ++++-------------
 2 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index f8a6075a..0793044 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir,
 				   struct reiserfs_de_head *deh)
 {
 	struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
-	if (reiserfs_expose_privroot(dir->d_sb))
-		return 0;
 	return (dir == dir->d_parent && privroot->d_inode &&
 	        deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4f9586b..28f0a44 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask)
 	return generic_permission(inode, mask, NULL);
 }
 
-/* This will catch lookups from the fs root to .reiserfs_priv */
-static int
-xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
+static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root;
-	if (container_of(q1, struct dentry, d_name) == priv_root)
-		return -ENOENT;
-	if (q1->len == name->len &&
-		   !memcmp(q1->name, name->name, name->len))
-		return 0;
-	return 1;
+	return -EPERM;
 }
 
 static const struct dentry_operations xattr_lookup_poison_ops = {
-	.d_compare = xattr_lookup_poison,
+	.d_revalidate = xattr_hide_revalidate,
 };
 
 int reiserfs_lookup_privroot(struct super_block *s)
@@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
 		REISERFS_SB(s)->priv_root = dentry;
-		if (!reiserfs_expose_privroot(s))
-			s->s_root->d_op = &xattr_lookup_poison_ops;
+		dentry->d_op = &xattr_lookup_poison_ops;
 		if (dentry->d_inode)
 			dentry->d_inode->i_flags |= S_PRIVATE;
 	} else
-- 
cgit v1.1


From fb2162df74bb19552db3d988fd11c787cf5fad56 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 23 Apr 2010 13:17:41 -0400
Subject: reiserfs: fix corruption during shrinking of xattrs

Commit 48b32a3553a54740d236b79a90f20147a25875e3 ("reiserfs: use generic
xattr handlers") introduced a problem that causes corruption when extended
attributes are replaced with a smaller value.

The issue is that the reiserfs_setattr to shrink the xattr file was moved
from before the write to after the write.

The root issue has always been in the reiserfs xattr code, but was papered
over by the fact that in the shrink case, the file would just be expanded
again while the xattr was written.

The end result is that the last 8 bytes of xattr data are lost.

This patch fixes it to use new_size.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=14826

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reported-by: Christian Kujau <lists@nerdbynature.de>
Tested-by: Christian Kujau <lists@nerdbynature.de>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Cc: Jethro Beekman <kernel@jbeekman.nl>
Cc: Greg Surbey <gregsurbey@hotmail.com>
Cc: Marco Gatti <marco.gatti@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 28f0a44..e7cc00e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	if (!err && new_size < i_size_read(dentry->d_inode)) {
 		struct iattr newattrs = {
 			.ia_ctime = current_fs_time(inode->i_sb),
-			.ia_size = buffer_size,
+			.ia_size = new_size,
 			.ia_valid = ATTR_SIZE | ATTR_CTIME,
 		};
 
-- 
cgit v1.1


From b8af67e2681c693a21f3933e3bdfce4cf66596d3 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 23 Apr 2010 13:18:06 -0400
Subject: fs/block_dev.c: fix performance regression in O_DIRECT|O_SYNC writes
 to block devices

We are seeing a large regression in database performance on recent
kernels.  The database opens a block device with O_DIRECT|O_SYNC and a
number of threads write to different regions of the file at the same time.

A simple test case is below.  I haven't defined DEVICE since getting it
wrong will destroy your data :) On an 3 disk LVM with a 64k chunk size we
see about 17MB/sec and only a few threads in IO wait:

procs  -----io---- -system-- -----cpu------
 r  b     bi    bo   in   cs us sy id wa st
 0  3      0 16170  656 2259  0  0 86 14  0
 0  2      0 16704  695 2408  0  0 92  8  0
 0  2      0 17308  744 2653  0  0 86 14  0
 0  2      0 17933  759 2777  0  0 89 10  0

Most threads are blocking in vfs_fsync_range, which has:

        mutex_lock(&mapping->host->i_mutex);
        err = fop->fsync(file, dentry, datasync);
        if (!ret)
                ret = err;
        mutex_unlock(&mapping->host->i_mutex);

commit 148f948ba877f4d3cdef036b1ff6d9f68986706a (vfs: Introduce new
helpers for syncing after writing to O_SYNC file or IS_SYNC inode) offers
some explanation of what is going on:

    Use these new helpers for syncing from generic VFS functions. This makes
    O_SYNC writes to block devices acquire i_mutex for syncing. If we really
    care about this, we can make block_fsync() drop the i_mutex and reacquire
    it before it returns.

Thanks Jan for such a good commit message!  As well as dropping i_mutex,
Christoph suggests we should remove the call to sync_blockdev():

> sync_blockdev is an overcomplicated alias for filemap_write_and_wait on
> the block device inode, which is exactly what we did just before calling
> into ->fsync

The patch below incorporates both suggestions. With it the testcase improves
from 17MB/s to 68M/sec:

procs  -----io---- -system-- -----cpu------
 r  b     bi    bo   in   cs us sy id wa st
 0  7      0 65536 1000 3878  0  0 70 30  0
 0 34      0 69632 1016 3921  0  1 46 53  0
 0 57      0 69632 1000 3921  0  0 55 45  0
 0 53      0 69640  754 4111  0  0 81 19  0

Testcase:

#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#define NR_THREADS 64
#define BUFSIZE (64 * 1024)

#define DEVICE "/dev/mapper/XXXXXX"

#define ALIGN(VAL, SIZE) (((VAL)+(SIZE)-1) & ~((SIZE)-1))

static int fd;

static void *doit(void *arg)
{
	unsigned long offset = (long)arg;
	char *b, *buf;

	b = malloc(BUFSIZE + 1024);
	buf = (char *)ALIGN((unsigned long)b, 1024);
	memset(buf, 0, BUFSIZE);

	while (1)
		pwrite(fd, buf, BUFSIZE, offset);
}

int main(int argc, char *argv[])
{
	int flags = O_RDWR|O_DIRECT;
	int i;
	unsigned long offset = 0;

	if (argc > 1 && !strcmp(argv[1], "O_SYNC"))
		flags |= O_SYNC;

	fd = open(DEVICE, flags);
	if (fd == -1) {
		perror("open");
		exit(1);
	}

	for (i = 0; i < NR_THREADS-1; i++) {
		pthread_t tid;
		pthread_create(&tid, NULL, doit, (void *)offset);
		offset += BUFSIZE;
	}
	doit((void *)offset);

	return 0;
}

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2a6d019..6dcee88 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -406,16 +406,23 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
  
 int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
 {
-	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+	struct inode *bd_inode = filp->f_mapping->host;
+	struct block_device *bdev = I_BDEV(bd_inode);
 	int error;
 
-	error = sync_blockdev(bdev);
-	if (error)
-		return error;
-	
+	/*
+	 * There is no need to serialise calls to blkdev_issue_flush with
+	 * i_mutex and doing so causes performance issues with concurrent
+	 * O_SYNC writers to a block device.
+	 */
+	mutex_unlock(&bd_inode->i_mutex);
+
 	error = blkdev_issue_flush(bdev, NULL);
 	if (error == -EOPNOTSUPP)
 		error = 0;
+
+	mutex_lock(&bd_inode->i_mutex);
+
 	return error;
 }
 EXPORT_SYMBOL(blkdev_fsync);
-- 
cgit v1.1


From 1cb08e97389bb603e1b999312d9686c8faf0187a Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Fri, 16 Apr 2010 01:01:36 +0100
Subject: squashfs: fix warn_on when root inode is corrupted

Fix warn_on triggered by mounting a fsfuzzer corrupted file system, where
the root inode has been corrupted.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
Reported-by: Steve Grubb <sgrubb@redhat.com>
---
 fs/squashfs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3550aec..07ceeb8 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -275,7 +275,8 @@ allocate_root:
 
 	err = squashfs_read_inode(root, root_inode);
 	if (err) {
-		iget_failed(root);
+		make_bad_inode(root);
+		iput(root);
 		goto failed_mount;
 	}
 	insert_inode_hash(root);
-- 
cgit v1.1


From 370ec3d1ed9d76ba992e5b9b7d7d10700014d436 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Fri, 23 Apr 2010 00:24:22 +0100
Subject: squashfs: add missing buffer free

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
---
 fs/squashfs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07ceeb8..48b6f4a 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -354,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb)
 		kfree(sbi->id_table);
 		kfree(sbi->fragment_index);
 		kfree(sbi->meta_index);
+		kfree(sbi->inode_lookup_table);
 		kfree(sb->s_fs_info);
 		sb->s_fs_info = NULL;
 	}
-- 
cgit v1.1


From e0d1f70010dce062ccce1bbd940a661e60b82631 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Fri, 23 Apr 2010 02:32:02 +0100
Subject: squashfs: fix potential buffer over-run on 4K block file systems

Sizing the buffer based on block size is incorrect, leading
to a potential buffer over-run on 4K block size file systems
(because the metadata block size is always 8K).  This bug
doesn't seem have triggered because 4K block size file systems
are not default, and also because metadata blocks after
compression tend to be less than 4K.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
---
 fs/squashfs/block.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 1cb0d81..653c030 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
 	u64 cur_index = index >> msblk->devblksize_log2;
 	int bytes, compressed, b = 0, k = 0, page = 0, avail;
 
-
-	bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1,
-				sizeof(*bh), GFP_KERNEL);
+	bh = kcalloc(((srclength + msblk->devblksize - 1)
+		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
 	if (bh == NULL)
 		return -ENOMEM;
 
-- 
cgit v1.1


From 5129a469a91a91427334c40e29e64c6d0ab68caf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rn=20Engel?= <joern@logfs.org>
Date: Sun, 25 Apr 2010 08:54:42 +0200
Subject: Catch filesystems lacking s_bdi

noop_backing_dev_info is used only as a flag to mark filesystems that
don't have any backing store, like tmpfs, procfs, spufs, etc.

Signed-off-by: Joern Engel <joern@logfs.org>

Changed the BUG_ON() to a WARN_ON(). Note that adding dirty inodes
to the noop_backing_dev_info is not legal and will not result in
them being flushed, but we already catch this condition in
__mark_inode_dirty() when checking for a registered bdi.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/super.c | 8 +++++---
 fs/sync.c  | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index f35ac60..dc72491 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -693,6 +693,7 @@ int set_anon_super(struct super_block *s, void *data)
 		return -EMFILE;
 	}
 	s->s_dev = MKDEV(0, dev & MINORMASK);
+	s->s_bdi = &noop_backing_dev_info;
 	return 0;
 }
 
@@ -954,10 +955,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	if (error < 0)
 		goto out_free_secdata;
 	BUG_ON(!mnt->mnt_sb);
+	WARN_ON(!mnt->mnt_sb->s_bdi);
 
- 	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
- 	if (error)
- 		goto out_sb;
+	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
+	if (error)
+		goto out_sb;
 
 	/*
 	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
diff --git a/fs/sync.c b/fs/sync.c
index fc5c3d7..92b2281 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/backing-dev.h>
 #include "internal.h"
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	 * This should be safe, as we require bdi backing to actually
 	 * write out data in the first place
 	 */
-	if (!sb->s_bdi)
+	if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
 		return 0;
 
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
-- 
cgit v1.1


From e6d086d83cf7f102d48c006f58172a69ec0c15a4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 26 Apr 2010 10:27:54 +0200
Subject: btrfs: convert to using bdi_setup_and_register()

It's now a provided helper, so get rid of the internal setup
and btrfs atomic_t bdi enumerator.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/btrfs/disk-io.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b8f2c..feca041 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
 
-static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
-
 /*
  * end_io_wq structs are used to do processing in task context when an IO is
  * complete.  This is used during reads to verify checksums, and it is used
@@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 {
 	int err;
 
-	bdi->name = "btrfs";
 	bdi->capabilities = BDI_CAP_MAP_COPY;
-	err = bdi_init(bdi);
+	err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
 	if (err)
 		return err;
 
-	err = bdi_register(bdi, NULL, "btrfs-%d",
-				atomic_inc_return(&btrfs_bdi_num));
-	if (err) {
-		bdi_destroy(bdi);
-		return err;
-	}
-
 	bdi->ra_pages	= default_backing_dev_info.ra_pages;
 	bdi->unplug_io_fn	= btrfs_unplug_io_fn;
 	bdi->unplug_io_data	= info;
-- 
cgit v1.1


From dd77ef924c835c9813c3f4dc7e9c72e9cd88d238 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 20 Apr 2010 17:00:37 +1000
Subject: xfs: more swap extent fixes for dynamic fork offsets

A new xfsqa test (226) with a prototype xfs_fsr change to try to
handle dynamic fork offsets better triggers an assertion failure
where the inode data fork is in btree format, yet there is room in
the inode for it to be in extent format. The two inodes look like:

before: ino 0x101 (target), num_extents 11, Max in-fork extents 6, broot size 40, fork offset 96
before: ino 0x115 (temp),  num_extents 5, Max in-fork extents 3, broot size 40, fork offset 56
after: ino 0x101 (target), num_extents 5, Max in-fork extents 6, broot size 40, fork offset 96
after: ino 0x115 (temp), num_extents 11, Max in-fork extents 3, broot size 40, fork offset 56

Basically the target inode ends up with 5 extents in btree format,
but it had space for 6 extents in extent format, so ends up
incorrect. Notably here the broot size is the same, and that is
where the kernel code is going wrong - the btree root will fit, so
it lets the swap go ahead.

The check should not allow the swap to take place if the number of
extents while in btree format is less than the number of extents
that can fit in the inode in extent format. Adding that check will
prevent this swap and corruption from occurring.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_dfrag.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index cd27c9d..5bba29a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -177,16 +177,26 @@ xfs_swap_extents_check_format(
 	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
 		return EINVAL;
 
-	/* Check root block of temp in btree form to max in target */
+	/*
+	 * If we are in a btree format, check that the temp root block will fit
+	 * in the target and that it has enough extents to be in btree format
+	 * in the target.
+	 *
+	 * Note that we have to be careful to allow btree->extent conversions
+	 * (a common defrag case) which will occur when the temp inode is in
+	 * extent format...
+	 */
 	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
-	    XFS_IFORK_BOFF(ip) &&
-	    tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
+	    ((XFS_IFORK_BOFF(ip) &&
+	      tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
+	     XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
 		return EINVAL;
 
-	/* Check root block of target in btree form to max in temp */
+	/* Reciprocal target->temp btree format checks */
 	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
-	    XFS_IFORK_BOFF(tip) &&
-	    ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
+	    ((XFS_IFORK_BOFF(tip) &&
+	      ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
+	     XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
 		return EINVAL;
 
 	return 0;
-- 
cgit v1.1


From 2bc3c1179c781b359d4f2f3439cb3df72afc17fc Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Tue, 20 Apr 2010 12:16:52 +1000
Subject: nfsd4: bug in read_buf

When read_buf is called to move over to the next page in the pagelist
of an NFSv4 request, it sets argp->end to essentially a random
number, certainly not an address within the page which argp->p now
points to.  So subsequent calls to READ_BUF will think there is much
more than a page of spare space (the cast to u32 ensures an unsigned
comparison) so we can expect to fall off the end of the second
page.

We never encountered thsi in testing because typically the only
operations which use more than two pages are write-like operations,
which have their own decoding logic.  Something like a getattr after a
write may cross a page boundary, but it would be very unusual for it to
cross another boundary after that.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4xdr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e170317..34ccf81 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
 	argp->p = page_address(argp->pagelist[0]);
 	argp->pagelist++;
 	if (argp->pagelen < PAGE_SIZE) {
-		argp->end = p + (argp->pagelen>>2);
+		argp->end = argp->p + (argp->pagelen>>2);
 		argp->pagelen = 0;
 	} else {
-		argp->end = p + (PAGE_SIZE>>2);
+		argp->end = argp->p + (PAGE_SIZE>>2);
 		argp->pagelen -= PAGE_SIZE;
 	}
 	memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
@@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 			argp->p = page_address(argp->pagelist[0]);
 			argp->pagelist++;
 			if (argp->pagelen < PAGE_SIZE) {
-				argp->end = p + (argp->pagelen>>2);
+				argp->end = argp->p + (argp->pagelen>>2);
 				argp->pagelen = 0;
 			} else {
-				argp->end = p + (PAGE_SIZE>>2);
+				argp->end = argp->p + (PAGE_SIZE>>2);
 				argp->pagelen -= PAGE_SIZE;
 			}
 		}
-- 
cgit v1.1


From 16a5b3c4143fc7f6cbe0ef9fd4e9a58376f91506 Mon Sep 17 00:00:00 2001
From: Christoph Egger <siccegge@cs.fau.de>
Date: Mon, 26 Apr 2010 15:56:36 +0100
Subject: Remove redundant check for CONFIG_MMU

The checks for CONFIG_MMU at this location are duplicated as all the code is
located inside a #ifndef CONFIG_MMU block. So the first conditional block will
always be included while the second never will.

Signed-off-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf_fdpic.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7ab23e0..2c5f9a0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 				}
 			} else if (!mm->start_data) {
 				mm->start_data = seg->addr;
-#ifndef CONFIG_MMU
 				mm->end_data = seg->addr + phdr->p_memsz;
-#endif
 			}
-
-#ifdef CONFIG_MMU
-			if (seg->addr + phdr->p_memsz > mm->end_data)
-				mm->end_data = seg->addr + phdr->p_memsz;
-#endif
 		}
 
 		seg++;
-- 
cgit v1.1


From ba8b06e67ed7a560b0e7c80091bcadda4f4727a5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 27 Apr 2010 18:33:54 -0400
Subject: NFS: Ensure that nfs_wb_page() waits for Pg_writeback to clear

Neil Brown reports that he is seeing the BUG_ON(ret == 0) trigger in
nfs_page_async_flush. According to the trace in
     https://bugzilla.novell.com/show_bug.cgi?id=599628
the problem appears to be due to nfs_wb_page() not waiting for the
PG_writeback flag to clear.

There is a ditto problem in nfs_wb_page_cancel()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ccde2ae..3aea3ca 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1472,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 
 	BUG_ON(!PageLocked(page));
 	for (;;) {
+		wait_on_page_writeback(page);
 		req = nfs_page_find_request(page);
 		if (req == NULL)
 			break;
@@ -1506,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page)
 		.range_start = range_start,
 		.range_end = range_end,
 	};
-	struct nfs_page *req;
-	int need_commit;
 	int ret;
 
 	while(PagePrivate(page)) {
+		wait_on_page_writeback(page);
 		if (clear_page_dirty_for_io(page)) {
 			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
 		}
-		req = nfs_find_and_lock_request(page);
-		if (!req)
-			break;
-		if (IS_ERR(req)) {
-			ret = PTR_ERR(req);
+		ret = sync_inode(inode, &wbc);
+		if (ret < 0)
 			goto out_error;
-		}
-		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
-		nfs_clear_page_tag_locked(req);
-		if (need_commit) {
-			ret = nfs_commit_inode(inode, FLUSH_SYNC);
-			if (ret < 0)
-				goto out_error;
-		}
 	}
 	return 0;
 out_error:
-- 
cgit v1.1


From 3835541dd481091c4dbf5ef83c08aed12e50fd61 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Tue, 27 Apr 2010 13:13:06 -0700
Subject: procfs: fix tid fdinfo

Correct the file_operations struct in fdinfo entry of tid_base_stuff[].

Presently /proc/*/task/*/fdinfo contains symlinks to opened files like
/proc/*/fd/.

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7621db8..8418fcc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2909,7 +2909,7 @@ out_no_task:
  */
 static const struct pid_entry tid_base_stuff[] = {
 	DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
-	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations),
+	DIR("fdinfo",    S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
 	REG("environ",   S_IRUSR, proc_environ_operations),
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
-- 
cgit v1.1


From acf82b85a70f39786e3cbb1ffed8655bcc972424 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Apr 2010 11:28:39 +0200
Subject: nfs: fix some issues in nfs41_proc_reclaim_complete()

The original code passed an ERR_PTR() to rpc_put_task() and instead of
returning zero on success it returned -ENOMEM.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6380670..071fced 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5218,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
 	msg.rpc_resp = &calldata->res;
 	task_setup_data.callback_data = calldata;
 	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task))
+	if (IS_ERR(task)) {
 		status = PTR_ERR(task);
+		goto out;
+	}
 	rpc_put_task(task);
+	return 0;
 out:
 	dprintk("<-- %s status=%d\n", __func__, status);
 	return status;
-- 
cgit v1.1


From 9699eda6bc1f708a28acb716e1477aa351362fe2 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Thu, 22 Apr 2010 18:56:17 +0800
Subject: nfs: fix memory leak in nfs_get_sb with CONFIG_NFS_V4

With CONFIG_NFS_V4 and data version 4, nfs_get_sb will allocate memory for
export_path in nfs4_validate_text_mount_data, so we need to free it then.
This is addressed in following kmemleak report:

unreferenced object 0xffff88016bf48a50 (size 16):
  comm "mount.nfs", pid 22567, jiffies 4651574704 (age 175471.200s)
  hex dump (first 16 bytes):
    2f 6f 70 74 2f 77 6f 72 6b 00 6b 6b 6b 6b 6b a5  /opt/work.kkkkk.
  backtrace:
    [<ffffffff814b34f9>] kmemleak_alloc+0x60/0xa7
    [<ffffffff81102c76>] kmemleak_alloc_recursive.clone.5+0x1b/0x1d
    [<ffffffff811046b3>] __kmalloc_track_caller+0x18f/0x1b7
    [<ffffffff810e1b08>] kstrndup+0x37/0x54
    [<ffffffffa0336971>] nfs_parse_devname+0x152/0x204 [nfs]
    [<ffffffffa0336af3>] nfs4_validate_text_mount_data+0xd0/0xdc [nfs]
    [<ffffffffa0338deb>] nfs_get_sb+0x325/0x736 [nfs]
    [<ffffffff81113671>] vfs_kern_mount+0xbd/0x17c
    [<ffffffff81113798>] do_kern_mount+0x4d/0xed
    [<ffffffff81129a87>] do_mount+0x787/0x7fe
    [<ffffffff81129b86>] sys_mount+0x88/0xc2
    [<ffffffff81009b42>] system_call_fastpath+0x16/0x1b

Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Benny Halevy <bhalevy@panasas.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f9327bb..b4148fc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	if (data->version == 4) {
 		error = nfs4_try_mount(flags, dev_name, data, mnt);
 		kfree(data->client_address);
+		kfree(data->nfs_server.export_path);
 		goto out;
 	}
 #endif	/* CONFIG_NFS_V4 */
-- 
cgit v1.1


From d9e80b7de91db05c1c4d2e5ebbfd70b3b3ba0e0f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 29 Apr 2010 03:10:43 +0100
Subject: nfs d_revalidate() is too trigger-happy with d_drop()

If dentry found stale happens to be a root of disconnected tree, we
can't d_drop() it; its d_hash is actually part of s_anon and d_drop()
would simply hide it from shrink_dcache_for_umount(), leading to
all sorts of fun, including busy inodes on umount and oopsen after
that.

Bug had been there since at least 2006 (commit c636eb already has it),
so it's definitely -stable fodder.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/dir.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index be46f26..db3ad84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -837,6 +837,8 @@ out_zap_parent:
 		/* If we have submounts, don't unhash ! */
 		if (have_submounts(dentry))
 			goto out_valid;
+		if (dentry->d_flags & DCACHE_DISCONNECTED)
+			goto out_valid;
 		shrink_dcache_parent(dentry);
 	}
 	d_drop(dentry);
-- 
cgit v1.1


From a36fed12a4d980eebb2e67b87ea30ad090238cff Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 29 Apr 2010 13:38:00 +0300
Subject: exofs: Fix "add bdi backing to mount session" fall out

Commit b3d0ab7e60d1865bb6f6a79a77aaba22f2543236 ("exofs: add bdi backing
to mount session") has a bug in the placement of the bdi member at
struct exofs_sb_info.  The layout member must be kept last.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exofs/exofs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 5437327..22721b2 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -85,6 +85,7 @@ struct exofs_sb_info {
 	u32		s_next_generation;	/* next gen # to use          */
 	atomic_t	s_curr_pending;		/* number of pending commands */
 	uint8_t		s_cred[OSD_CAP_LEN];	/* credential for the fscb    */
+	struct 		backing_dev_info bdi;	/* register our bdi with VFS  */
 
 	struct pnfs_osd_data_map data_map;	/* Default raid to use
 						 * FIXME: Needed ?
@@ -93,7 +94,6 @@ struct exofs_sb_info {
 	struct exofs_layout	layout;		/* Default files layout,
 						 * contains the variable osd_dev
 						 * array. Keep last */
-	struct backing_dev_info bdi;
 	struct osd_dev	*_min_one_dev[1];	/* Place holder for one dev   */
 };
 
-- 
cgit v1.1


From f80a0ca6ad8f2800453e819dafa09a0ed9e56850 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Apr 2010 14:36:41 +0200
Subject: pktcdvd: improve BKL and compat_ioctl.c usage

The pktcdvd driver uses proper locking and does not need the BKL in the
ioctl and llseek functions of the character device, so kill both.

Moving the compat_ioctl handling from common code into the driver itself
fixes build problems when CONFIG_BLOCK is disabled.

Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat_ioctl.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c32a1b6..641640d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -102,7 +102,6 @@
 #include <linux/nbd.h>
 #include <linux/random.h>
 #include <linux/filter.h>
-#include <linux/pktcdvd.h>
 
 #include <linux/hiddev.h>
 
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
 COMPATIBLE_IOCTL(PPGETPHASE)
 COMPATIBLE_IOCTL(PPGETFLAGS)
 COMPATIBLE_IOCTL(PPSETFLAGS)
-/* pktcdvd */
-COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
 /* Big A */
 /* sparc only */
 /* Big Q for sound/OSS */
-- 
cgit v1.1


From 5477d0face8a3ba4e9a1e7283692fff9c92f8e5e Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 29 Apr 2010 20:33:35 +0200
Subject: fs: fs/super.c needs to include backing-dev.h for !CONFIG_BLOCK

When CONFIG_BLOCK is set, it ends up getting backing-dev.h included.
But for !CONFIG_BLOCK, it isn't so lucky. The proper thing to do is
include <linux/backing-dev.h> directly from the file it's used from,
so do that.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index dc72491..1527e6a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
 #include <linux/kobject.h>
 #include <linux/mutex.h>
 #include <linux/file.h>
+#include <linux/backing-dev.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
-- 
cgit v1.1


From 3c2023dd8ed31e2ecfbb2d5aa20e8884d4b339e2 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 29 Apr 2010 20:35:29 +0200
Subject: exofs: Fix "add bdi backing to mount session" fall out

The patch: add bdi backing to mount session
	(b3d0ab7e60d1865bb6f6a79a77aaba22f2543236)

Has a bug in the placement of the bdi member at
struct exofs_sb_info. The layout member must be kept
last.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/exofs/exofs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 5437327..22721b2 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -85,6 +85,7 @@ struct exofs_sb_info {
 	u32		s_next_generation;	/* next gen # to use          */
 	atomic_t	s_curr_pending;		/* number of pending commands */
 	uint8_t		s_cred[OSD_CAP_LEN];	/* credential for the fscb    */
+	struct 		backing_dev_info bdi;	/* register our bdi with VFS  */
 
 	struct pnfs_osd_data_map data_map;	/* Default raid to use
 						 * FIXME: Needed ?
@@ -93,7 +94,6 @@ struct exofs_sb_info {
 	struct exofs_layout	layout;		/* Default files layout,
 						 * contains the variable osd_dev
 						 * array. Keep last */
-	struct backing_dev_info bdi;
 	struct osd_dev	*_min_one_dev[1];	/* Place holder for one dev   */
 };
 
-- 
cgit v1.1


From 9bf729c0af67897ea8498ce17c29b0683f7f2028 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 29 Apr 2010 09:55:50 +1000
Subject: xfs: add a shrinker to background inode reclaim

On low memory boxes or those with highmem, kernel can OOM before the
background reclaims inodes via xfssyncd. Add a shrinker to run inode
reclaim so that it inode reclaim is expedited when memory is low.

This is more complex than it needs to be because the VM folk don't
want a context added to the shrinker infrastructure. Hence we need
to add a global list of XFS mount structures so the shrinker can
traverse them.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/linux-2.6/xfs_super.c   |   5 ++
 fs/xfs/linux-2.6/xfs_sync.c    | 112 ++++++++++++++++++++++++++++++++++++++---
 fs/xfs/linux-2.6/xfs_sync.h    |   7 ++-
 fs/xfs/quota/xfs_qm_syscalls.c |   3 +-
 fs/xfs/xfs_ag.h                |   1 +
 fs/xfs/xfs_mount.h             |   1 +
 6 files changed, 120 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b4..29f1edc 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(
 
 	xfs_unmountfs(mp);
 	xfs_freesb(mp);
+	xfs_inode_shrinker_unregister(mp);
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
 	xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
 	if (error)
 		goto fail_vnrele;
 
+	xfs_inode_shrinker_register(mp);
+
 	kfree(mtpt);
 	return 0;
 
@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
 		goto out_cleanup_procfs;
 
 	vfs_initquota();
+	xfs_inode_shrinker_init();
 
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
 {
 	vfs_exitquota();
 	unregister_filesystem(&xfs_fs_type);
+	xfs_inode_shrinker_destroy();
 	xfs_sysctl_unregister();
 	xfs_cleanup_procfs();
 	xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fd96982..a427c63 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	uint32_t		first_index;
 	int			last_error = 0;
@@ -134,7 +135,7 @@ restart:
 		if (error == EFSCORRUPTED)
 			break;
 
-	} while (1);
+	} while ((*nr_to_scan)--);
 
 	if (skipped) {
 		delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	int			error = 0;
 	int			last_error = 0;
 	xfs_agnumber_t		ag;
+	int			nr;
 
+	nr = nr_to_scan ? *nr_to_scan : INT_MAX;
 	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
 		struct xfs_perag	*pag;
 
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
 			continue;
 		}
 		error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
-						exclusive);
+						exclusive, &nr);
 		xfs_perag_put(pag);
 		if (error) {
 			last_error = error;
 			if (error == EFSCORRUPTED)
 				break;
 		}
+		if (nr <= 0)
+			break;
 	}
+	if (nr_to_scan)
+		*nr_to_scan = nr;
 	return XFS_ERROR(last_error);
 }
 
@@ -291,7 +299,7 @@ xfs_sync_data(
 	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
 	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-				      XFS_ICI_NO_TAG, 0);
+				      XFS_ICI_NO_TAG, 0, NULL);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -310,7 +318,7 @@ xfs_sync_attr(
 	ASSERT((flags & ~SYNC_WAIT) == 0);
 
 	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-				     XFS_ICI_NO_TAG, 0);
+				     XFS_ICI_NO_TAG, 0, NULL);
 }
 
 STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
 	radix_tree_tag_set(&pag->pag_ici_root,
 			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
 			   XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable++;
 }
 
 /*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
 {
 	radix_tree_tag_clear(&pag->pag_ici_root,
 			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable--;
 }
 
 /*
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
 	int		mode)
 {
 	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-					XFS_ICI_RECLAIM_TAG, 1);
+					XFS_ICI_RECLAIM_TAG, 1, NULL);
+}
+
+/*
+ * Shrinker infrastructure.
+ *
+ * This is all far more complex than it needs to be. It adds a global list of
+ * mounts because the shrinkers can only call a global context. We need to make
+ * the shrinkers pass a context to avoid the need for global state.
+ */
+static LIST_HEAD(xfs_mount_list);
+static struct rw_semaphore xfs_mount_list_lock;
+
+static int
+xfs_reclaim_inode_shrink(
+	int		nr_to_scan,
+	gfp_t		gfp_mask)
+{
+	struct xfs_mount *mp;
+	struct xfs_perag *pag;
+	xfs_agnumber_t	ag;
+	int		reclaimable = 0;
+
+	if (nr_to_scan) {
+		if (!(gfp_mask & __GFP_FS))
+			return -1;
+
+		down_read(&xfs_mount_list_lock);
+		list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+			xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+					XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+			if (nr_to_scan <= 0)
+				break;
+		}
+		up_read(&xfs_mount_list_lock);
+	}
+
+	down_read(&xfs_mount_list_lock);
+	list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+
+			pag = xfs_perag_get(mp, ag);
+			if (!pag->pag_ici_init) {
+				xfs_perag_put(pag);
+				continue;
+			}
+			reclaimable += pag->pag_ici_reclaimable;
+			xfs_perag_put(pag);
+		}
+	}
+	up_read(&xfs_mount_list_lock);
+	return reclaimable;
+}
+
+static struct shrinker xfs_inode_shrinker = {
+	.shrink = xfs_reclaim_inode_shrink,
+	.seeks = DEFAULT_SEEKS,
+};
+
+void __init
+xfs_inode_shrinker_init(void)
+{
+	init_rwsem(&xfs_mount_list_lock);
+	register_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_destroy(void)
+{
+	ASSERT(list_empty(&xfs_mount_list));
+	unregister_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_register(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_add_tail(&mp->m_mplist, &xfs_mount_list);
+	up_write(&xfs_mount_list_lock);
+}
+
+void
+xfs_inode_shrinker_unregister(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_del(&mp->m_mplist);
+	up_write(&xfs_mount_list_lock);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c34..cdcbaac 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
 int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
 	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-	int flags, int tag, int write_lock);
+	int flags, int tag, int write_lock, int *nr_to_scan);
+
+void xfs_inode_shrinker_init(void);
+void xfs_inode_shrinker_destroy(void);
+void xfs_inode_shrinker_register(struct xfs_mount *mp);
+void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
 
 #endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d..50bee07 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
 	uint		 flags)
 {
 	ASSERT(mp->m_quotainfo);
-	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
+	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+				XFS_ICI_NO_TAG, 0, NULL);
 }
 
 /*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1f..abb8222 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
 	int		pag_ici_init;	/* incore inode cache initialised */
 	rwlock_t	pag_ici_lock;	/* incore inode lock */
 	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
+	int		pag_ici_reclaimable;	/* reclaimable inodes */
 #endif
 	int		pagb_count;	/* pagb slots in use */
 	xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS];	/* unstable blocks */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7..9ff48a16 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
 	wait_queue_head_t	m_wait_single_sync_task;
 	__int64_t		m_update_flags;	/* sb flags we need to update
 						   on the next remount,rw */
+	struct list_head	m_mplist;	/* inode shrinker mount list */
 } xfs_mount_t;
 
 /*
-- 
cgit v1.1


From 12b1b321689cf92236fb216472744e39419fab30 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 8 Mar 2010 20:51:03 +0100
Subject: Inotify: Fix build failure in inotify user support

CONFIG_INOTIFY_USER defined but CONFIG_ANON_INODES undefined will result
in the following build failure:

    LD      vmlinux
  fs/built-in.o: In function 'sys_inotify_init1':
  (.text.sys_inotify_init1+0x22c): undefined reference to 'anon_inode_getfd'
  fs/built-in.o: In function `sys_inotify_init1':
  (.text.sys_inotify_init1+0x22c): relocation truncated to fit: R_MIPS_26 against 'anon_inode_getfd'
  make[2]: *** [vmlinux] Error 1
  make[1]: *** [sub-make] Error 2
  make: *** [all] Error 2

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/inotify/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbf..b3a159b 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
 
 config INOTIFY_USER
 	bool "Inotify support for userspace"
+	select ANON_INODES
 	select FSNOTIFY
 	default y
 	---help---
-- 
cgit v1.1


From 6b933c8e6f1a2f3118082c455eef25f9b1ac7b45 Mon Sep 17 00:00:00 2001
From: Li Dongyang <lidongyang@novell.com>
Date: Sat, 17 Apr 2010 17:49:10 +0800
Subject: ocfs2: Avoid direct write if we fall back to buffered I/O

when we fall back to buffered write from direct write, we call
__generic_file_aio_write() but that will end up doing direct write
even we are only prepared to do buffered write because the file
has the O_DIRECT flag set. This is a fix for
https://bugzilla.novell.com/show_bug.cgi?id=591039
revised with Joel's comments.

Signed-off-by: Li Dongyang <lidongyang@novell.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/file.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 20e0ee5..a5fbd9c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1982,18 +1982,18 @@ relock:
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	if (direct_io) {
-		ret = generic_segment_checks(iov, &nr_segs, &ocount,
-					     VERIFY_READ);
-		if (ret)
-			goto out_dio;
+	ret = generic_segment_checks(iov, &nr_segs, &ocount,
+				     VERIFY_READ);
+	if (ret)
+		goto out_dio;
 
-		count = ocount;
-		ret = generic_write_checks(file, ppos, &count,
-					   S_ISBLK(inode->i_mode));
-		if (ret)
-			goto out_dio;
+	count = ocount;
+	ret = generic_write_checks(file, ppos, &count,
+				   S_ISBLK(inode->i_mode));
+	if (ret)
+		goto out_dio;
 
+	if (direct_io) {
 		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
 						    ppos, count, ocount);
 		if (written < 0) {
@@ -2008,7 +2008,10 @@ relock:
 			goto out_dio;
 		}
 	} else {
-		written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
+		current->backing_dev_info = file->f_mapping->backing_dev_info;
+		written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
+						      ppos, count, 0);
+		current->backing_dev_info = NULL;
 	}
 
 out_dio:
-- 
cgit v1.1


From 8f649c376254755f2261a693b3d48d09126218dc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 1 May 2010 12:36:18 -0400
Subject: NFSv4: Fix the locking in nfs_inode_reclaim_delegation()

Ensure that we correctly rcu-dereference the delegation itself, and that we
protect against removal while we're changing the contents.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 fs/nfs/delegation.c | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 1567124..8d9ec49 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -129,21 +129,35 @@ again:
  */
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
-	struct rpc_cred *oldcred;
+	struct nfs_delegation *delegation;
+	struct rpc_cred *oldcred = NULL;
 
-	if (delegation == NULL)
-		return;
-	memcpy(delegation->stateid.data, res->delegation.data,
-			sizeof(delegation->stateid.data));
-	delegation->type = res->delegation_type;
-	delegation->maxsize = res->maxsize;
-	oldcred = delegation->cred;
-	delegation->cred = get_rpccred(cred);
-	clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
-	NFS_I(inode)->delegation_state = delegation->type;
-	smp_wmb();
-	put_rpccred(oldcred);
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL) {
+		spin_lock(&delegation->lock);
+		if (delegation->inode != NULL) {
+			memcpy(delegation->stateid.data, res->delegation.data,
+			       sizeof(delegation->stateid.data));
+			delegation->type = res->delegation_type;
+			delegation->maxsize = res->maxsize;
+			oldcred = delegation->cred;
+			delegation->cred = get_rpccred(cred);
+			clear_bit(NFS_DELEGATION_NEED_RECLAIM,
+				  &delegation->flags);
+			NFS_I(inode)->delegation_state = delegation->type;
+			spin_unlock(&delegation->lock);
+			put_rpccred(oldcred);
+			rcu_read_unlock();
+		} else {
+			/* We appear to have raced with a delegation return. */
+			spin_unlock(&delegation->lock);
+			rcu_read_unlock();
+			nfs_inode_set_delegation(inode, cred, res);
+		}
+	} else {
+		rcu_read_unlock();
+	}
 }
 
 static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
-- 
cgit v1.1


From 17d2c0a0c4d4e074f0a2a5c0090ff6d88f5e1d44 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 1 May 2010 12:37:18 -0400
Subject: NFS: Fix RCU issues in the NFSv4 delegation code

Fix a number of RCU issues in the NFSv4 delegation code.

 (1) delegation->cred doesn't need to be RCU protected as it's essentially an
     invariant refcounted structure.

     By the time we get to nfs_free_delegation(), the delegation is being
     released, so no one else should be attempting to use the saved
     credentials, and they can be cleared.

     However, since the list of delegations could still be under traversal at
     this point by such as nfs_client_return_marked_delegations(), the cred
     should be released in nfs_do_free_delegation() rather than in
     nfs_free_delegation().  Simply using rcu_assign_pointer() to clear it is
     insufficient as that doesn't stop the cred from being destroyed, and nor
     does calling put_rpccred() after call_rcu(), given that the latter is
     asynchronous.

 (2) nfs_detach_delegation_locked() and nfs_inode_set_delegation() should use
     rcu_derefence_protected() because they can only be called if
     nfs_client::cl_lock is held, and that guards against anyone changing
     nfsi->delegation under it.  Furthermore, the barrier imposed by
     rcu_dereference() is superfluous, given that the spin_lock() is also a
     barrier.

 (3) nfs_detach_delegation_locked() is now passed a pointer to the nfs_client
     struct so that it can issue lockdep advice based on clp->cl_lock for (2).

 (4) nfs_inode_return_delegation_noreclaim() and nfs_inode_return_delegation()
     should use rcu_access_pointer() outside the spinlocked region as they
     merely examine the pointer and don't follow it, thus rendering unnecessary
     the need to impose a partial ordering over the one item of interest.

     These result in an RCU warning like the following:

[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
fs/nfs/delegation.c:332 invoked rcu_dereference_check() without protection!

other info that might help us debug this:

rcu_scheduler_active = 1, debug_locks = 0
2 locks held by mount.nfs4/2281:
 #0:  (&type->s_umount_key#34){+.+...}, at: [<ffffffff810b25b4>] deactivate_super+0x60/0x80
 #1:  (iprune_sem){+.+...}, at: [<ffffffff810c332a>] invalidate_inodes+0x39/0x13a

stack backtrace:
Pid: 2281, comm: mount.nfs4 Not tainted 2.6.34-rc1-cachefs #110
Call Trace:
 [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2
 [<ffffffffa00b4591>] nfs_inode_return_delegation_noreclaim+0x5b/0xa0 [nfs]
 [<ffffffffa0095d63>] nfs4_clear_inode+0x11/0x1e [nfs]
 [<ffffffff810c2d92>] clear_inode+0x9e/0xf8
 [<ffffffff810c3028>] dispose_list+0x67/0x10e
 [<ffffffff810c340d>] invalidate_inodes+0x11c/0x13a
 [<ffffffff810b1dc1>] generic_shutdown_super+0x42/0xf4
 [<ffffffff810b1ebe>] kill_anon_super+0x11/0x4f
 [<ffffffffa009893c>] nfs4_kill_super+0x3f/0x72 [nfs]
 [<ffffffff810b25bc>] deactivate_super+0x68/0x80
 [<ffffffff810c6744>] mntput_no_expire+0xbb/0xf8
 [<ffffffff810c681b>] release_mounts+0x9a/0xb0
 [<ffffffff810c689b>] put_mnt_ns+0x6a/0x79
 [<ffffffffa00983a1>] nfs_follow_remote_path+0x5a/0x146 [nfs]
 [<ffffffffa0098334>] ? nfs_do_root_mount+0x82/0x95 [nfs]
 [<ffffffffa00985a9>] nfs4_try_mount+0x75/0xaf [nfs]
 [<ffffffffa0098874>] nfs4_get_sb+0x291/0x31a [nfs]
 [<ffffffff810b2059>] vfs_kern_mount+0xb8/0x177
 [<ffffffff810b2176>] do_kern_mount+0x48/0xe8
 [<ffffffff810c810b>] do_mount+0x782/0x7f9
 [<ffffffff810c8205>] sys_mount+0x83/0xbe
 [<ffffffff81001eeb>] system_call_fastpath+0x16/0x1b

Also on:

fs/nfs/delegation.c:215 invoked rcu_dereference_check() without protection!
 [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2
 [<ffffffffa00b4223>] nfs_inode_set_delegation+0xfe/0x219 [nfs]
 [<ffffffffa00a9c6f>] nfs4_opendata_to_nfs4_state+0x2c2/0x30d [nfs]
 [<ffffffffa00aa15d>] nfs4_do_open+0x2a6/0x3a6 [nfs]
 ...

And:

fs/nfs/delegation.c:40 invoked rcu_dereference_check() without protection!
 [<ffffffff8105149f>] lockdep_rcu_dereference+0xaa/0xb2
 [<ffffffffa00b3bef>] nfs_free_delegation+0x3d/0x6e [nfs]
 [<ffffffffa00b3e71>] nfs_do_return_delegation+0x26/0x30 [nfs]
 [<ffffffffa00b406a>] __nfs_inode_return_delegation+0x1ef/0x1fe [nfs]
 [<ffffffffa00b448a>] nfs_client_return_marked_delegations+0xc9/0x124 [nfs]
 ...

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 8d9ec49..ea61d26 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
 
 static void nfs_do_free_delegation(struct nfs_delegation *delegation)
 {
+	if (delegation->cred)
+		put_rpccred(delegation->cred);
 	kfree(delegation);
 }
 
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
 
 static void nfs_free_delegation(struct nfs_delegation *delegation)
 {
-	struct rpc_cred *cred;
-
-	cred = rcu_dereference(delegation->cred);
-	rcu_assign_pointer(delegation->cred, NULL);
 	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
-	if (cred)
-		put_rpccred(cred);
 }
 
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -180,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
 	return inode;
 }
 
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
+							   const nfs4_stateid *stateid,
+							   struct nfs_client *clp)
 {
-	struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+	struct nfs_delegation *delegation =
+		rcu_dereference_protected(nfsi->delegation,
+					  lockdep_is_held(&clp->cl_lock));
 
 	if (delegation == NULL)
 		goto nomatch;
@@ -209,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_delegation *delegation;
+	struct nfs_delegation *delegation, *old_delegation;
 	struct nfs_delegation *freeme = NULL;
 	int status = 0;
 
@@ -227,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	spin_lock_init(&delegation->lock);
 
 	spin_lock(&clp->cl_lock);
-	if (rcu_dereference(nfsi->delegation) != NULL) {
-		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
-					sizeof(delegation->stateid)) == 0 &&
-				delegation->type == nfsi->delegation->type) {
+	old_delegation = rcu_dereference_protected(nfsi->delegation,
+						   lockdep_is_held(&clp->cl_lock));
+	if (old_delegation != NULL) {
+		if (memcmp(&delegation->stateid, &old_delegation->stateid,
+					sizeof(old_delegation->stateid)) == 0 &&
+				delegation->type == old_delegation->type) {
 			goto out;
 		}
 		/*
@@ -240,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 		dfprintk(FILE, "%s: server %s handed out "
 				"a duplicate delegation!\n",
 				__func__, clp->cl_hostname);
-		if (delegation->type <= nfsi->delegation->type) {
+		if (delegation->type <= old_delegation->type) {
 			freeme = delegation;
 			delegation = NULL;
 			goto out;
 		}
-		freeme = nfs_detach_delegation_locked(nfsi, NULL);
+		freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
 	}
 	list_add_rcu(&delegation->super_list, &clp->cl_delegations);
 	nfsi->delegation_state = delegation->type;
@@ -315,7 +317,7 @@ restart:
 		if (inode == NULL)
 			continue;
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		rcu_read_unlock();
 		if (delegation != NULL) {
@@ -344,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
-	if (rcu_dereference(nfsi->delegation) != NULL) {
+	if (rcu_access_pointer(nfsi->delegation) != NULL) {
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		if (delegation != NULL)
 			nfs_do_return_delegation(inode, delegation, 0);
@@ -360,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
 	struct nfs_delegation *delegation;
 	int err = 0;
 
-	if (rcu_dereference(nfsi->delegation) != NULL) {
+	if (rcu_access_pointer(nfsi->delegation) != NULL) {
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		if (delegation != NULL) {
 			nfs_msync_inode(inode);
@@ -540,7 +542,7 @@ restart:
 		if (inode == NULL)
 			continue;
 		spin_lock(&clp->cl_lock);
-		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
 		spin_unlock(&clp->cl_lock);
 		rcu_read_unlock();
 		if (delegation != NULL)
-- 
cgit v1.1


From 973bec34bfc1bc2465646181653d67f767d418c8 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 3 May 2010 21:00:48 +0900
Subject: nilfs2: fix sync silent failure

As of 32a88aa1, __sync_filesystem() will return 0 if s_bdi is not set.
And nilfs does not set s_bdi anywhere.  I noticed this problem by the
warning introduced by the recent commit 5129a469 ("Catch filesystem
lacking s_bdi").

 WARNING: at fs/super.c:959 vfs_kern_mount+0xc5/0x14e()
 Hardware name: PowerEdge 2850
 Modules linked in: nilfs2 loop tpm_tis tpm tpm_bios video shpchp pci_hotplug output dcdbas
 Pid: 3773, comm: mount.nilfs2 Not tainted 2.6.34-rc6-debug #38
 Call Trace:
  [<c1028422>] warn_slowpath_common+0x60/0x90
  [<c102845f>] warn_slowpath_null+0xd/0x10
  [<c1095936>] vfs_kern_mount+0xc5/0x14e
  [<c1095a03>] do_kern_mount+0x32/0xbd
  [<c10a811e>] do_mount+0x671/0x6d0
  [<c1073794>] ? __get_free_pages+0x1f/0x21
  [<c10a684f>] ? copy_mount_options+0x2b/0xe2
  [<c107b634>] ? strndup_user+0x48/0x67
  [<c10a81de>] sys_mount+0x61/0x8f
  [<c100280c>] sysenter_do_call+0x12/0x32

This ensures to set s_bdi for nilfs and fixes the sync silent failure.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e..48145f5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
 	sb->s_export_op = &nilfs_export_ops;
 	sb->s_root = NULL;
 	sb->s_time_gran = 1;
+	sb->s_bdi = nilfs->ns_bdi;
 
 	err = load_nilfs(nilfs, sbi);
 	if (err)
-- 
cgit v1.1


From c10f5e12bafde7f7a2f9b75d76f7a68d62154e91 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 16 Apr 2010 12:56:11 -0700
Subject: ceph: clear dir complete on d_move

d_move() reorders the d_subdirs list, breaking the readdir result caching.
Unless/until d_move preserves that ordering, clear CEPH_I_COMPLETE on
rename.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 26f883c..261f3e6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -997,6 +997,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			     dn, dn->d_name.len, dn->d_name.name);
 			dout("fill_trace doing d_move %p -> %p\n",
 			     req->r_old_dentry, dn);
+
+			/* d_move screws up d_subdirs order */
+			ceph_i_clear(dir, CEPH_I_COMPLETE);
+
 			d_move(req->r_old_dentry, dn);
 			dout(" src %p '%.*s' dst %p '%.*s'\n",
 			     req->r_old_dentry,
-- 
cgit v1.1


From 91dee39eebcfb47085c4d457a584b0e9723b6ca0 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 19 Apr 2010 10:15:44 -0700
Subject: ceph: fix snap realm splits

The snap realm split was checking i_snap_realm, not the list_head, to
determine if an inode belonged in the new realm.  The check always failed,
which meant we always moved the inode, corrupting the old realm's list and
causing various crashes.

Also wait to release old realm reference to avoid possibility of use after
free.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/snap.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2b88126..d5114db 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -869,16 +869,20 @@ skip_inode:
 				continue;
 			ci = ceph_inode(inode);
 			spin_lock(&inode->i_lock);
-			if (!ci->i_snap_realm)
-				goto split_skip_inode;
-			ceph_put_snap_realm(mdsc, ci->i_snap_realm);
-			spin_lock(&realm->inodes_with_caps_lock);
-			list_add(&ci->i_snap_realm_item,
-				 &realm->inodes_with_caps);
-			ci->i_snap_realm = realm;
-			spin_unlock(&realm->inodes_with_caps_lock);
-			ceph_get_snap_realm(mdsc, realm);
-split_skip_inode:
+			if (list_empty(&ci->i_snap_realm_item)) {
+				struct ceph_snap_realm *oldrealm =
+					ci->i_snap_realm;
+
+				dout(" moving %p to split realm %llx %p\n",
+				     inode, realm->ino, realm);
+				spin_lock(&realm->inodes_with_caps_lock);
+				list_add(&ci->i_snap_realm_item,
+					 &realm->inodes_with_caps);
+				ci->i_snap_realm = realm;
+				spin_unlock(&realm->inodes_with_caps_lock);
+				ceph_get_snap_realm(mdsc, realm);
+				ceph_put_snap_realm(mdsc, oldrealm);
+			}
 			spin_unlock(&inode->i_lock);
 			iput(inode);
 		}
-- 
cgit v1.1


From c8f16584ac85444d51d8753c5df502350cfc7bb7 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 19 Apr 2010 13:50:26 -0700
Subject: ceph: print more useful version info on module load

Decouple the client version from the server side.  Print relevant protocol
and map version info instead.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/super.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02ea..f888cf4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -996,9 +996,10 @@ static int __init init_ceph(void)
 	if (ret)
 		goto out_icache;
 
-	pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n",
-		CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH,
-		CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL);
+	pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
+		CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
+		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
+		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
 	return 0;
 
 out_icache:
-- 
cgit v1.1


From 0b0c06d1476290cea248923c0ee7be9fd61cacea Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 20 Apr 2010 10:27:13 -0700
Subject: ceph: fix leaked spinlock during mds reconnect

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa2239f..0c16818 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1861,8 +1861,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
 		} else {
 			pr_err("%p auth cap %p not mds%d ???\n", inode,
 			       cap, session->s_mds);
-			spin_unlock(&inode->i_lock);
 		}
+		spin_unlock(&inode->i_lock);
 	}
 }
 
-- 
cgit v1.1


From d45d0d970f495e04a4e4f46acd74e90f4a4564f9 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 20 Apr 2010 15:20:33 -0700
Subject: ceph: add missing #includes

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/auth.c      | 1 +
 fs/ceph/auth_none.h | 2 ++
 fs/ceph/super.h     | 1 +
 3 files changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b9..818afe7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 
 #include "types.h"
 #include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c0553..8164df1 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
 #ifndef _FS_CEPH_AUTH_NONE_H
 #define _FS_CEPH_AUTH_NONE_H
 
+#include <linux/slab.h>
+
 #include "auth.h"
 
 /*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e30dfbb..13513b8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mempool.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/slab.h>
-- 
cgit v1.1


From 684be25c52a1e43638ced160be0b0b46596e7f2b Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 21 Apr 2010 20:45:59 -0700
Subject: ceph: fix seq counting for skipped messages

Increment in_seq even when the message is skipped for some reason.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/messenger.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cdaaa13..e7b91e0 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -1379,6 +1379,7 @@ static int read_partial_message(struct ceph_connection *con)
 			con->in_base_pos = -front_len - middle_len - data_len -
 				sizeof(m->footer);
 			con->in_tag = CEPH_MSGR_TAG_READY;
+			con->in_seq++;
 			return 0;
 		}
 		if (IS_ERR(con->in_msg)) {
@@ -2030,6 +2031,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
 		ceph_msg_put(con->in_msg);
 		con->in_msg = NULL;
 		con->in_tag = CEPH_MSGR_TAG_READY;
+		con->in_seq++;
 	} else {
 		dout("con_revoke_pages %p msg %p pages %p no-op\n",
 		     con, con->in_msg, msg);
-- 
cgit v1.1


From ae18756b9fa7bb93132cff06cd8575e3d46633f9 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 22 Apr 2010 07:47:01 -0700
Subject: ceph: discard incoming messages with bad seq #

We can get old message seq #'s after a tcp reconnect for stateful sessions
(i.e., the MDS).  If we get a higher seq #, that is an error, and we
shouldn't see any bad seq #'s for stateless (mon, osd) connections.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/messenger.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index e7b91e0..509f57d 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -1334,6 +1334,7 @@ static int read_partial_message(struct ceph_connection *con)
 	unsigned front_len, middle_len, data_len, data_off;
 	int datacrc = con->msgr->nocrc;
 	int skip;
+	u64 seq;
 
 	dout("read_partial_message con %p msg %p\n", con, m);
 
@@ -1368,6 +1369,25 @@ static int read_partial_message(struct ceph_connection *con)
 		return -EIO;
 	data_off = le16_to_cpu(con->in_hdr.data_off);
 
+	/* verify seq# */
+	seq = le64_to_cpu(con->in_hdr.seq);
+	if ((s64)seq - (s64)con->in_seq < 1) {
+		pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+			ENTITY_NAME(con->peer_name),
+			pr_addr(&con->peer_addr.in_addr),
+			seq, con->in_seq + 1);
+		con->in_base_pos = -front_len - middle_len - data_len -
+			sizeof(m->footer);
+		con->in_tag = CEPH_MSGR_TAG_READY;
+		con->in_seq++;
+		return 0;
+	} else if ((s64)seq - (s64)con->in_seq > 1) {
+		pr_err("read_partial_message bad seq %lld expected %lld\n",
+		       seq, con->in_seq + 1);
+		con->error_msg = "bad message sequence # for incoming message";
+		return -EBADMSG;
+	}
+
 	/* allocate message? */
 	if (!con->in_msg) {
 		dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
-- 
cgit v1.1


From 5c6a2cdb4fe8aaf6b54f022c14f13d2a12b45914 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 22 Apr 2010 13:48:59 -0700
Subject: ceph: fix direct io truncate offset

truncate_inode_pages_range wants the end offset to align with the last byte
in a page.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5..ed6f197 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
 		 * throw out any page cache pages in this range. this
 		 * may block.
 		 */
-		truncate_inode_pages_range(inode->i_mapping, pos, pos+len);
+		truncate_inode_pages_range(inode->i_mapping, pos, 
+					   (pos+len) | (PAGE_CACHE_SIZE-1));
 	} else {
 		pages = alloc_page_vector(num_pages);
 		if (IS_ERR(pages)) {
-- 
cgit v1.1


From ea1409f96197c1bffe5d7d5bc967b3445edcc1fa Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 28 Apr 2010 16:12:06 -0700
Subject: ceph: clear dir complete, invalidate dentry on replayed rename

If a rename operation is resent to the MDS following an MDS restart, the
client does not get a full reply (containing the resulting metadata) back.
In that case, a ceph_rename() needs to compensate by doing anything useful
that fill_inode() would have, like d_move().

It also needs to invalidate the dentry (to workaround the vfs_rename_dir()
bug) and clear the dir complete flag, just like fill_trace().

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/dir.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ea8ee2e..650d2db 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -880,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * do_request, above).  If there is no trace, we need
 		 * to do it here.
 		 */
+
+		/* d_move screws up d_subdirs order */
+		ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+
 		d_move(old_dentry, new_dentry);
+
+		/* ensure target dentry is invalidated, despite
+		   rehashing bug in vfs_rename_dir */
+		new_dentry->d_time = jiffies;
+		ceph_dentry(new_dentry)->lease_shared_gen = 0;
 	}
 	ceph_mdsc_put_request(req);
 	return err;
-- 
cgit v1.1


From 7ff899da02cb674211858fcd919f8b4511a4423f Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 23 Apr 2010 10:25:33 -0700
Subject: ceph: fix lockless caps check

The __ variant requires caller to hold i_lock.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4125937..4b42c2b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -509,7 +509,7 @@ static void writepages_finish(struct ceph_osd_request *req,
 	u64 bytes = 0;
 	struct ceph_client *client = ceph_inode_to_client(inode);
 	long writeback_stat;
-	unsigned issued = __ceph_caps_issued(ci, NULL);
+	unsigned issued = ceph_caps_issued(ci);
 
 	/* parse reply */
 	replyhead = msg->front.iov_base;
-- 
cgit v1.1


From b0930f8d38c6ab76dc8222a5a910a21392d38208 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 29 Apr 2010 13:26:53 -0700
Subject: ceph: remove bad auth_x kmem_cache

It's useless, since our allocations are already a power of 2.  And it was
allocated per-instance (not globally), which caused a name collision when
we tried to mount a second file system with auth_x enabled.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/auth_x.c | 32 ++++++++++----------------------
 1 file changed, 10 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4..fee5a08 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
 #include "auth.h"
 #include "decode.h"
 
-struct kmem_cache *ceph_x_ticketbuf_cachep;
-
 #define TEMP_TICKET_BUF_LEN	256
 
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 	char *ticket_buf;
 	u8 struct_v;
 
-	dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC);
+	dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
 	if (!dbuf)
 		return -ENOMEM;
 
 	ret = -ENOMEM;
-	ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep,
-				      GFP_NOFS | GFP_ATOMIC);
+	ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
 	if (!ticket_buf)
 		goto out_dbuf;
 
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
 
 	ret = 0;
 out:
-	kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf);
+	kfree(ticket_buf);
 out_dbuf:
-	kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf);
+	kfree(dbuf);
 	return ret;
 
 bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
 		remove_ticket_handler(ac, th);
 	}
 
-	kmem_cache_destroy(ceph_x_ticketbuf_cachep);
-
 	kfree(ac->private);
 	ac->private = NULL;
 }
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
 	int ret;
 
 	dout("ceph_x_init %p\n", ac);
+	ret = -ENOMEM;
 	xi = kzalloc(sizeof(*xi), GFP_NOFS);
 	if (!xi)
-		return -ENOMEM;
+		goto out;
 
-	ret = -ENOMEM;
-	ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
-				      TEMP_TICKET_BUF_LEN, 8,
-				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
-				      NULL);
-	if (!ceph_x_ticketbuf_cachep)
-		goto done_nomem;
 	ret = -EINVAL;
 	if (!ac->secret) {
 		pr_err("no secret set (for auth_x protocol)\n");
-		goto done_nomem;
+		goto out_nomem;
 	}
 
 	ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
 	if (ret)
-		goto done_nomem;
+		goto out_nomem;
 
 	xi->starting = true;
 	xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
 	ac->ops = &ceph_x_ops;
 	return 0;
 
-done_nomem:
+out_nomem:
 	kfree(xi);
-	if (ceph_x_ticketbuf_cachep)
-		kmem_cache_destroy(ceph_x_ticketbuf_cachep);
+out:
 	return ret;
 }
 
-- 
cgit v1.1


From d577632e65ea01fb3b124b652d7bd2381251da3c Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 3 May 2010 19:15:49 -0700
Subject: ocfs2: Avoid a gcc warning in ocfs2_wipe_inode().

gcc warns that a variable is uninitialized.  It's actually handled, but
an early return fools gcc.  Let's just initialize the variable to a
garbage value that will crash if the usage is ever broken.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 83fe1d3..af18988 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -725,7 +725,7 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
 static int ocfs2_wipe_inode(struct inode *inode,
 			    struct buffer_head *di_bh)
 {
-	int status, orphaned_slot;
+	int status, orphaned_slot = -1;
 	struct inode *orphan_dir_inode = NULL;
 	struct buffer_head *orphan_dir_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-- 
cgit v1.1