From 72f465033702ebfe20db8f50edaad59f0f38b0f5 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 23 Aug 2010 13:35:09 +0200
Subject: bio-integrity.c: remove dependency on __GFP_NOFAIL

The kmalloc() in bio_integrity_prep() is failable, so remove __GFP_NOFAIL
from its mask.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/bio-integrity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 612a5c3..a8f4cc6 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -413,7 +413,7 @@ int bio_integrity_prep(struct bio *bio)
 
 	/* Allocate kernel buffer for protection data */
 	len = sectors * blk_integrity_tuple_size(bi);
-	buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
+	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
 		return -EIO;
-- 
cgit v1.1


From 220eb7fd984bfc7e6b4005fdf32efe9cd8af7cf2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 23 Aug 2010 13:36:20 +0200
Subject: fs/bio-integrity.c: return -ENOMEM on kmalloc failure

Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/bio-integrity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index a8f4cc6..4d0ff5e 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -416,7 +416,7 @@ int bio_integrity_prep(struct bio *bio)
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
-		return -EIO;
+		return -ENOMEM;
 	}
 
 	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-- 
cgit v1.1


From b76b4014f9d988d2412b873e4d4c13c7f9afc4e4 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sat, 28 Aug 2010 08:52:10 +0200
Subject: writeback: Fix lost wake-up shutting down writeback thread

Setting the task state here may cause us to miss the wake up from
kthread_stop(), so we need to recheck kthread_should_stop() or risk
sleeping forever in the following schedule().

Symptom was an indefinite hang on an NFSv4 mount.  (NFSv4 may create
multiple mounts in a temporary namespace while traversing the mount
path, and since the temporary namespace is immediately destroyed, it may
end up destroying a mount very soon after it was created, possibly
making this race more likely.)

INFO: task mount.nfs4:4314 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
mount.nfs4    D 0000000000000000  2880  4314   4313 0x00000000
 ffff88001ed6da28 0000000000000046 ffff88001ed6dfd8 ffff88001ed6dfd8
 ffff88001ed6c000 ffff88001ed6c000 ffff88001ed6c000 ffff88001e5003a0
 ffff88001ed6dfd8 ffff88001e5003a8 ffff88001ed6c000 ffff88001ed6dfd8
Call Trace:
 [<ffffffff8196090d>] schedule_timeout+0x1cd/0x2e0
 [<ffffffff8106a31c>] ? mark_held_locks+0x6c/0xa0
 [<ffffffff819639a0>] ? _raw_spin_unlock_irq+0x30/0x60
 [<ffffffff8106a5fd>] ? trace_hardirqs_on_caller+0x14d/0x190
 [<ffffffff819671fe>] ? sub_preempt_count+0xe/0xd0
 [<ffffffff8195fc80>] wait_for_common+0x120/0x190
 [<ffffffff81033c70>] ? default_wake_function+0x0/0x20
 [<ffffffff8195fdcd>] wait_for_completion+0x1d/0x20
 [<ffffffff810595fa>] kthread_stop+0x4a/0x150
 [<ffffffff81061a60>] ? thaw_process+0x70/0x80
 [<ffffffff810cc68a>] bdi_unregister+0x10a/0x1a0
 [<ffffffff81229dc9>] nfs_put_super+0x19/0x20
 [<ffffffff810ee8c4>] generic_shutdown_super+0x54/0xe0
 [<ffffffff810ee9b6>] kill_anon_super+0x16/0x60
 [<ffffffff8122d3b9>] nfs4_kill_super+0x39/0x90
 [<ffffffff810eda45>] deactivate_locked_super+0x45/0x60
 [<ffffffff810edfb9>] deactivate_super+0x49/0x70
 [<ffffffff81108294>] mntput_no_expire+0x84/0xe0
 [<ffffffff811084ef>] release_mounts+0x9f/0xc0
 [<ffffffff81108575>] put_mnt_ns+0x65/0x80
 [<ffffffff8122cc56>] nfs_follow_remote_path+0x1e6/0x420
 [<ffffffff8122cfbf>] nfs4_try_mount+0x6f/0xd0
 [<ffffffff8122d0c2>] nfs4_get_sb+0xa2/0x360
 [<ffffffff810edcb8>] vfs_kern_mount+0x88/0x1f0
 [<ffffffff810ede92>] do_kern_mount+0x52/0x130
 [<ffffffff81963d9a>] ? _lock_kernel+0x6a/0x170
 [<ffffffff81108e9e>] do_mount+0x26e/0x7f0
 [<ffffffff81106b3a>] ? copy_mount_options+0xea/0x190
 [<ffffffff811094b8>] sys_mount+0x98/0xf0
 [<ffffffff810024d8>] system_call_fastpath+0x16/0x1b
1 lock held by mount.nfs4/4314:
 #0:  (&type->s_umount_key#24){+.+...}, at: [<ffffffff810edfb1>] deactivate_super+0x41/0x70

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/fs-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7d9d06b..81e086d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -808,7 +808,7 @@ int bdi_writeback_thread(void *data)
 			wb->last_active = jiffies;
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!list_empty(&bdi->work_list)) {
+		if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
 			__set_current_state(TASK_RUNNING);
 			continue;
 		}
-- 
cgit v1.1


From 7100ae97266e387d25d0c8a5d9934931f0b07dbc Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 20:54:49 +0000
Subject: Revert "[CIFS] Eliminate unused variable warning"

The change to kernel crypto and fixes to ntlvm2 and ntlmssp
series, introduced a regression.  Deferring this patch series
to 2.6.37 after Shirish fixes it.

This reverts commit c89e5198b26a869ce2842bad8519264f3394dee9.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/sess.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 795095f..4788e16 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -620,6 +620,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	struct key *spnego_key = NULL;
 	__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
 	bool first_time;
+	char *ntlmsspblob;
 
 	if (ses == NULL)
 		return -EINVAL;
@@ -867,8 +868,6 @@ ssetup_ntlmssp_authenticate:
 				iov[1].iov_base = &pSMB->req.SecurityBlob[0];
 			} else if (phase == NtLmAuthenticate) {
 				int blob_len;
-				char *ntlmsspblob;
-
 				ntlmsspblob = kmalloc(5 *
 					sizeof(struct _AUTHENTICATE_MESSAGE),
 					GFP_KERNEL);
-- 
cgit v1.1


From 56234e2767496c125a858f880f1b3a62e04a3406 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 20:57:05 +0000
Subject: Revert "Eliminate sparse warning - bad constant expression"

This reverts commit 2d20ca835867d93ead6ce61780d883a4b128106d.

    The change to kernel crypto and fixes to ntlvm2 and ntlmssp
    series, introduced a regression.  Deferring this patch series
    to 2.6.37 after Shirish fixes it.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/cifsencrypt.c | 193 +++++++++++++++++++-------------------------------
 fs/cifs/cifsglob.h    |   7 --
 2 files changed, 72 insertions(+), 128 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 709f229..eef78c2 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -45,38 +45,39 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
 			struct TCP_Server_Info *server, char *signature)
 {
-	int rc;
+	int rc = 0;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
+	} sdesc;
 
 	if (cifs_pdu == NULL || server == NULL || signature == NULL)
 		return -EINVAL;
 
-	if (!server->ntlmssp.sdescmd5) {
-		cERROR(1,
-			"cifs_calculate_signature: can't generate signature\n");
-		return -1;
-	}
+	sdesc.shash.tfm = server->ntlmssp.md5;
+	sdesc.shash.flags = 0x0;
 
-	rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "cifs_calculate_signature: oould not init md5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			server->session_key.data.ntlmv2.key,
 			CIFS_NTLMV2_SESSKEY_SIZE);
 	else
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			(char *)&server->session_key.data,
 			server->session_key.len);
 
-	crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+	crypto_shash_update(&sdesc.shash,
 			cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
 
-	rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
+	rc = crypto_shash_final(&sdesc.shash, signature);
 
-	return rc;
+	return 0;
 }
 
 
@@ -114,28 +115,30 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 			struct TCP_Server_Info *server, char *signature)
 {
 	int i;
-	int rc;
+	int rc = 0;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
+	} sdesc;
 
 	if (iov == NULL || server == NULL || signature == NULL)
 		return -EINVAL;
 
-	if (!server->ntlmssp.sdescmd5) {
-		cERROR(1, "cifs_calc_signature2: can't generate signature\n");
-		return -1;
-	}
+	sdesc.shash.tfm = server->ntlmssp.md5;
+	sdesc.shash.flags = 0x0;
 
-	rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "cifs_calc_signature2: oould not init md5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			server->session_key.data.ntlmv2.key,
 			CIFS_NTLMV2_SESSKEY_SIZE);
 	else
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			(char *)&server->session_key.data,
 			server->session_key.len);
 
@@ -143,7 +146,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 		if (iov[i].iov_len == 0)
 			continue;
 		if (iov[i].iov_base == NULL) {
-			cERROR(1, "cifs_calc_signature2: null iovec entry");
+			cERROR(1, "null iovec entry");
 			return -EIO;
 		}
 		/* The first entry includes a length field (which does not get
@@ -151,16 +154,16 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 		if (i == 0) {
 			if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
 				break; /* nothing to sign or corrupt header */
-			crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+			crypto_shash_update(&sdesc.shash,
 				iov[i].iov_base + 4, iov[i].iov_len - 4);
 		} else
-			crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+			crypto_shash_update(&sdesc.shash,
 				iov[i].iov_base, iov[i].iov_len);
 	}
 
-	rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
+	rc = crypto_shash_final(&sdesc.shash, signature);
 
-	return rc;
+	return 0;
 }
 
 int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
@@ -310,48 +313,43 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 	wchar_t *user;
 	wchar_t *domain;
 	wchar_t *server;
-
-	if (!ses->server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
-		return -1;
-	}
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
+	} sdesc;
 
 	/* calculate md4 hash of password */
 	E_md4hash(ses->password, nt_hash);
 
+	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
+	sdesc.shash.flags = 0x0;
+
 	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash,
 				CIFS_NTHASH_SIZE);
 
-	rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	/* convert ses->userName to unicode and uppercase */
 	len = strlen(ses->userName);
 	user = kmalloc(2 + (len * 2), GFP_KERNEL);
-	if (user == NULL) {
-		cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
-		rc = -ENOMEM;
+	if (user == NULL)
 		goto calc_exit_2;
-	}
 	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
 	UniStrupr(user);
 
-	crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-				(char *)user, 2 * len);
+	crypto_shash_update(&sdesc.shash, (char *)user, 2 * len);
 
 	/* convert ses->domainName to unicode and uppercase */
 	if (ses->domainName) {
 		len = strlen(ses->domainName);
 
 		domain = kmalloc(2 + (len * 2), GFP_KERNEL);
-		if (domain == NULL) {
-			cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
-			rc = -ENOMEM;
+		if (domain == NULL)
 			goto calc_exit_1;
-		}
 		len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
 					nls_cp);
 		/* the following line was removed since it didn't work well
@@ -359,19 +357,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		   Maybe converting the domain name earlier makes sense */
 		/* UniStrupr(domain); */
 
-		crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-					(char *)domain, 2 * len);
+		crypto_shash_update(&sdesc.shash, (char *)domain, 2 * len);
 
 		kfree(domain);
 	} else if (ses->serverName) {
 		len = strlen(ses->serverName);
 
 		server = kmalloc(2 + (len * 2), GFP_KERNEL);
-		if (server == NULL) {
-			cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
-			rc = -ENOMEM;
+		if (server == NULL)
 			goto calc_exit_1;
-		}
 		len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
 					nls_cp);
 		/* the following line was removed since it didn't work well
@@ -379,20 +373,16 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		   Maybe converting the domain name earlier makes sense */
 		/* UniStrupr(domain); */
 
-		crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-					(char *)server, 2 * len);
+		crypto_shash_update(&sdesc.shash, (char *)server, 2 * len);
 
 		kfree(server);
 	}
-
-	rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
-					ses->server->ntlmv2_hash);
-
 calc_exit_1:
 	kfree(user);
 calc_exit_2:
 	/* BB FIXME what about bytes 24 through 40 of the signing key?
 	   compare with the NTLM example */
+	rc = crypto_shash_final(&sdesc.shash, ses->server->ntlmv2_hash);
 
 	return rc;
 }
@@ -452,33 +442,34 @@ CalcNTLMv2_response(const struct TCP_Server_Info *server,
 			 char *v2_session_response)
 {
 	int rc;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(server->ntlmssp.hmacmd5)];
+	} sdesc;
 
-	if (!server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
-		return -1;
-	}
+	sdesc.shash.tfm = server->ntlmssp.hmacmd5;
+	sdesc.shash.flags = 0x0;
 
 	crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash,
 		CIFS_HMAC_MD5_HASH_SIZE);
 
-	rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
 		server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE);
-	crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
+	crypto_shash_update(&sdesc.shash,
 		v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
 		sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE);
 
 	if (server->tilen)
-		crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 					server->tiblob, server->tilen);
 
-	rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash,
-					v2_session_response);
+	rc = crypto_shash_final(&sdesc.shash, v2_session_response);
 
 	return rc;
 }
@@ -489,6 +480,10 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
 {
 	int rc = 0;
 	struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
+	} sdesc;
 
 	buf->blob_signature = cpu_to_le32(0x00000101);
 	buf->reserved = 0;
@@ -516,24 +511,21 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
 		return rc;
 	}
 
-	if (!ses->server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
-		return -1;
-	}
-
 	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5,
 			ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
 
-	rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
+	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
+	sdesc.shash.flags = 0x0;
+
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
-	crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-				resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
+	crypto_shash_update(&sdesc.shash, resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
 
-	rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
+	rc = crypto_shash_final(&sdesc.shash,
 		ses->server->session_key.data.ntlmv2.key);
 
 	memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf,
@@ -586,65 +578,24 @@ cifs_crypto_shash_release(struct TCP_Server_Info *server)
 
 	if (server->ntlmssp.hmacmd5)
 		crypto_free_shash(server->ntlmssp.hmacmd5);
-
-	kfree(server->ntlmssp.sdeschmacmd5);
-
-	kfree(server->ntlmssp.sdescmd5);
 }
 
 int
 cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
 {
-	int rc;
-	unsigned int size;
-
 	server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
 	if (!server->ntlmssp.hmacmd5 ||
 			IS_ERR(server->ntlmssp.hmacmd5)) {
-		cERROR(1, "could not allocate crypto hmacmd5\n");
+		cERROR(1, "could not allocate master crypto API hmacmd5\n");
 		return 1;
 	}
 
 	server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0);
 	if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) {
-		cERROR(1, "could not allocate crypto md5\n");
-		rc = 1;
-		goto cifs_crypto_shash_allocate_ret1;
-	}
-
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->ntlmssp.hmacmd5);
-	server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
-	if (!server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
-		rc = -ENOMEM;
-		goto cifs_crypto_shash_allocate_ret2;
-	}
-	server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5;
-	server->ntlmssp.sdeschmacmd5->shash.flags = 0x0;
-
-
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->ntlmssp.md5);
-	server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL);
-	if (!server->ntlmssp.sdescmd5) {
-		cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
-		rc = -ENOMEM;
-		goto cifs_crypto_shash_allocate_ret3;
+		crypto_free_shash(server->ntlmssp.hmacmd5);
+		cERROR(1, "could not allocate master crypto API md5\n");
+		return 1;
 	}
-	server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5;
-	server->ntlmssp.sdescmd5->shash.flags = 0x0;
 
 	return 0;
-
-cifs_crypto_shash_allocate_ret3:
-	kfree(server->ntlmssp.sdeschmacmd5);
-
-cifs_crypto_shash_allocate_ret2:
-	crypto_free_shash(server->ntlmssp.md5);
-
-cifs_crypto_shash_allocate_ret1:
-	crypto_free_shash(server->ntlmssp.hmacmd5);
-
-	return rc;
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c9d0cfc..49563e0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -123,19 +123,12 @@ struct cifs_cred {
 	struct cifs_ace *aces;
 };
 
-struct sdesc {
-	struct shash_desc shash;
-	char ctx[];
-};
-
 struct ntlmssp_auth {
 	__u32 client_flags;
 	__u32 server_flags;
 	unsigned char ciphertext[CIFS_CPHTXT_SIZE];
 	struct crypto_shash *hmacmd5;
 	struct crypto_shash *md5;
-	struct sdesc *sdeschmacmd5;
-	struct sdesc *sdescmd5;
 };
 
 /*
-- 
cgit v1.1


From 745e507a9c79c6e1385d3414d5e56f3d4621a375 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 21:09:27 +0000
Subject: Revert "missing changes during ntlmv2/ntlmssp auth and sign"

This reverts commit 3ec6bbcdb4e85403f2c5958876ca9492afdf4031.

    The change to kernel crypto and fixes to ntlvm2 and ntlmssp
    series, introduced a regression.  Deferring this patch series
    to 2.6.37 after Shirish fixes it.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/cifsencrypt.c |  2 --
 fs/cifs/sess.c        | 13 +++++--------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index eef78c2..051d000 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -553,8 +553,6 @@ calc_seckey(struct TCP_Server_Info *server)
 		return 1;
 	}
 
-	desc.tfm = tfm_arc4;
-
 	crypto_blkcipher_setkey(tfm_arc4,
 		server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE);
 	sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 4788e16..41fc532 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -408,8 +408,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 	/* BB spec says that if AvId field of MsvAvTimestamp is populated then
 		we must set the MIC field of the AUTHENTICATE_MESSAGE */
 
-	ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags);
-
 	tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
 	tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
 	ses->server->tilen = tilen;
@@ -442,13 +440,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
 	/* BB is NTLMV2 session security format easier to use here? */
 	flags = NTLMSSP_NEGOTIATE_56 |	NTLMSSP_REQUEST_TARGET |
 		NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
-		NTLMSSP_NEGOTIATE_NTLM;
+		NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
 	if (ses->server->secMode &
-	   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
-		flags |= NTLMSSP_NEGOTIATE_SIGN |
-			NTLMSSP_NEGOTIATE_KEY_XCH |
-			NTLMSSP_NEGOTIATE_EXTENDED_SEC;
-	}
+	   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+		flags |= NTLMSSP_NEGOTIATE_SIGN;
+	if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
+		flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
 
 	sec_blob->NegotiateFlags |= cpu_to_le32(flags);
 
-- 
cgit v1.1


From c8e56f1f4fb9f82f63e4ce6d73a14501d0432c76 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 21:10:58 +0000
Subject: Revert "[CIFS] Fix ntlmv2 auth with ntlmssp"

This reverts commit 9fbc590860e75785bdaf8b83e48fabfe4d4f7d58.

The change to kernel crypto and fixes to ntlvm2 and ntlmssp
series, introduced a regression.  Deferring this patch series
to 2.6.37 after Shirish fixes it.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/Kconfig       |   2 -
 fs/cifs/asn1.c        |   6 +-
 fs/cifs/cifsencrypt.c | 416 +++++++++++++++-----------------------------------
 fs/cifs/cifsglob.h    |  18 +--
 fs/cifs/cifspdu.h     |   7 +-
 fs/cifs/cifsproto.h   |  12 +-
 fs/cifs/cifssmb.c     |  13 +-
 fs/cifs/connect.c     |  13 +-
 fs/cifs/ntlmssp.h     |  13 --
 fs/cifs/sess.c        | 118 ++++----------
 fs/cifs/transport.c   |   6 +-
 11 files changed, 172 insertions(+), 452 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0da1deb..917b7d4 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,8 +2,6 @@ config CIFS
 	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
 	depends on INET
 	select NLS
-	select CRYPTO_MD5
-	select CRYPTO_ARC4
 	help
 	  This is the client VFS module for the Common Internet File System
 	  (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 21f0fbd..cfd1ce3 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 				if (compare_oid(oid, oidlen, MSKRB5_OID,
 						MSKRB5_OID_LEN))
 					server->sec_mskerberos = true;
-				if (compare_oid(oid, oidlen, KRB5U2U_OID,
+				else if (compare_oid(oid, oidlen, KRB5U2U_OID,
 						     KRB5U2U_OID_LEN))
 					server->sec_kerberosu2u = true;
-				if (compare_oid(oid, oidlen, KRB5_OID,
+				else if (compare_oid(oid, oidlen, KRB5_OID,
 						     KRB5_OID_LEN))
 					server->sec_kerberos = true;
-				if (compare_oid(oid, oidlen, NTLMSSP_OID,
+				else if (compare_oid(oid, oidlen, NTLMSSP_OID,
 						     NTLMSSP_OID_LEN))
 					server->sec_ntlmssp = true;
 
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 051d000..847628d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -27,7 +27,6 @@
 #include "md5.h"
 #include "cifs_unicode.h"
 #include "cifsproto.h"
-#include "ntlmssp.h"
 #include <linux/ctype.h>
 #include <linux/random.h>
 
@@ -43,44 +42,21 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 		       unsigned char *p24);
 
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
-			struct TCP_Server_Info *server, char *signature)
+				    const struct mac_key *key, char *signature)
 {
-	int rc = 0;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
-	} sdesc;
+	struct	MD5Context context;
 
-	if (cifs_pdu == NULL || server == NULL || signature == NULL)
+	if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
 		return -EINVAL;
 
-	sdesc.shash.tfm = server->ntlmssp.md5;
-	sdesc.shash.flags = 0x0;
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&sdesc.shash,
-			server->session_key.data.ntlmv2.key,
-			CIFS_NTLMV2_SESSKEY_SIZE);
-	else
-		crypto_shash_update(&sdesc.shash,
-			(char *)&server->session_key.data,
-			server->session_key.len);
-
-	crypto_shash_update(&sdesc.shash,
-			cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
-
-	rc = crypto_shash_final(&sdesc.shash, signature);
+	cifs_MD5_init(&context);
+	cifs_MD5_update(&context, (char *)&key->data, key->len);
+	cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
 
+	cifs_MD5_final(signature, &context);
 	return 0;
 }
 
-
 int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 		  __u32 *pexpected_response_sequence_number)
 {
@@ -102,7 +78,8 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 	server->sequence_number++;
 	spin_unlock(&GlobalMid_Lock);
 
-	rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
+	rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key,
+				      smb_signature);
 	if (rc)
 		memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
 	else
@@ -112,36 +89,16 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 }
 
 static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
-			struct TCP_Server_Info *server, char *signature)
+				const struct mac_key *key, char *signature)
 {
+	struct  MD5Context context;
 	int i;
-	int rc = 0;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
-	} sdesc;
 
-	if (iov == NULL || server == NULL || signature == NULL)
+	if ((iov == NULL) || (signature == NULL) || (key == NULL))
 		return -EINVAL;
 
-	sdesc.shash.tfm = server->ntlmssp.md5;
-	sdesc.shash.flags = 0x0;
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&sdesc.shash,
-			server->session_key.data.ntlmv2.key,
-			CIFS_NTLMV2_SESSKEY_SIZE);
-	else
-		crypto_shash_update(&sdesc.shash,
-			(char *)&server->session_key.data,
-			server->session_key.len);
-
+	cifs_MD5_init(&context);
+	cifs_MD5_update(&context, (char *)&key->data, key->len);
 	for (i = 0; i < n_vec; i++) {
 		if (iov[i].iov_len == 0)
 			continue;
@@ -154,18 +111,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 		if (i == 0) {
 			if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
 				break; /* nothing to sign or corrupt header */
-			crypto_shash_update(&sdesc.shash,
-				iov[i].iov_base + 4, iov[i].iov_len - 4);
+			cifs_MD5_update(&context, iov[0].iov_base+4,
+				  iov[0].iov_len-4);
 		} else
-			crypto_shash_update(&sdesc.shash,
-				iov[i].iov_base, iov[i].iov_len);
+			cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
 	}
 
-	rc = crypto_shash_final(&sdesc.shash, signature);
+	cifs_MD5_final(signature, &context);
 
 	return 0;
 }
 
+
 int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 		   __u32 *pexpected_response_sequence_number)
 {
@@ -188,7 +145,8 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 	server->sequence_number++;
 	spin_unlock(&GlobalMid_Lock);
 
-	rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
+	rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key,
+				      smb_signature);
 	if (rc)
 		memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
 	else
@@ -198,14 +156,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 }
 
 int cifs_verify_signature(struct smb_hdr *cifs_pdu,
-			  struct TCP_Server_Info *server,
+			  const struct mac_key *mac_key,
 			  __u32 expected_sequence_number)
 {
-	int rc;
+	unsigned int rc;
 	char server_response_sig[8];
 	char what_we_think_sig_should_be[20];
 
-	if (cifs_pdu == NULL || server == NULL)
+	if ((cifs_pdu == NULL) || (mac_key == NULL))
 		return -EINVAL;
 
 	if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -234,7 +192,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 					cpu_to_le32(expected_sequence_number);
 	cifs_pdu->Signature.Sequence.Reserved = 0;
 
-	rc = cifs_calculate_signature(cifs_pdu, server,
+	rc = cifs_calculate_signature(cifs_pdu, mac_key,
 		what_we_think_sig_should_be);
 
 	if (rc)
@@ -251,7 +209,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 }
 
 /* We fill in key by putting in 40 byte array which was allocated by caller */
-int cifs_calculate_session_key(struct session_key *key, const char *rn,
+int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 			   const char *password)
 {
 	char temp_key[16];
@@ -265,6 +223,63 @@ int cifs_calculate_session_key(struct session_key *key, const char *rn,
 	return 0;
 }
 
+int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses,
+			       const struct nls_table *nls_info)
+{
+	char temp_hash[16];
+	struct HMACMD5Context ctx;
+	char *ucase_buf;
+	__le16 *unicode_buf;
+	unsigned int i, user_name_len, dom_name_len;
+
+	if (ses == NULL)
+		return -EINVAL;
+
+	E_md4hash(ses->password, temp_hash);
+
+	hmac_md5_init_limK_to_64(temp_hash, 16, &ctx);
+	user_name_len = strlen(ses->userName);
+	if (user_name_len > MAX_USERNAME_SIZE)
+		return -EINVAL;
+	if (ses->domainName == NULL)
+		return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
+	dom_name_len = strlen(ses->domainName);
+	if (dom_name_len > MAX_USERNAME_SIZE)
+		return -EINVAL;
+
+	ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL);
+	if (ucase_buf == NULL)
+		return -ENOMEM;
+	unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL);
+	if (unicode_buf == NULL) {
+		kfree(ucase_buf);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < user_name_len; i++)
+		ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]];
+	ucase_buf[i] = 0;
+	user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf,
+				      MAX_USERNAME_SIZE*2, nls_info);
+	unicode_buf[user_name_len] = 0;
+	user_name_len++;
+
+	for (i = 0; i < dom_name_len; i++)
+		ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]];
+	ucase_buf[i] = 0;
+	dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf,
+				     MAX_USERNAME_SIZE*2, nls_info);
+
+	unicode_buf[user_name_len + dom_name_len] = 0;
+	hmac_md5_update((const unsigned char *) unicode_buf,
+		(user_name_len+dom_name_len)*2, &ctx);
+
+	hmac_md5_final(ses->server->ntlmv2_hash, &ctx);
+	kfree(ucase_buf);
+	kfree(unicode_buf);
+	return 0;
+}
+
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
 			char *lnm_session_key)
@@ -309,29 +324,21 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 {
 	int rc = 0;
 	int len;
-	char nt_hash[CIFS_NTHASH_SIZE];
+	char nt_hash[16];
+	struct HMACMD5Context *pctxt;
 	wchar_t *user;
 	wchar_t *domain;
-	wchar_t *server;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
-	} sdesc;
 
-	/* calculate md4 hash of password */
-	E_md4hash(ses->password, nt_hash);
+	pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
 
-	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
-	sdesc.shash.flags = 0x0;
+	if (pctxt == NULL)
+		return -ENOMEM;
 
-	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash,
-				CIFS_NTHASH_SIZE);
+	/* calculate md4 hash of password */
+	E_md4hash(ses->password, nt_hash);
 
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
+	/* convert Domainname to unicode and uppercase */
+	hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
 
 	/* convert ses->userName to unicode and uppercase */
 	len = strlen(ses->userName);
@@ -340,8 +347,7 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		goto calc_exit_2;
 	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
 	UniStrupr(user);
-
-	crypto_shash_update(&sdesc.shash, (char *)user, 2 * len);
+	hmac_md5_update((char *)user, 2*len, pctxt);
 
 	/* convert ses->domainName to unicode and uppercase */
 	if (ses->domainName) {
@@ -357,243 +363,65 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		   Maybe converting the domain name earlier makes sense */
 		/* UniStrupr(domain); */
 
-		crypto_shash_update(&sdesc.shash, (char *)domain, 2 * len);
+		hmac_md5_update((char *)domain, 2*len, pctxt);
 
 		kfree(domain);
-	} else if (ses->serverName) {
-		len = strlen(ses->serverName);
-
-		server = kmalloc(2 + (len * 2), GFP_KERNEL);
-		if (server == NULL)
-			goto calc_exit_1;
-		len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
-					nls_cp);
-		/* the following line was removed since it didn't work well
-		   with lower cased domain name that passed as an option.
-		   Maybe converting the domain name earlier makes sense */
-		/* UniStrupr(domain); */
-
-		crypto_shash_update(&sdesc.shash, (char *)server, 2 * len);
-
-		kfree(server);
 	}
 calc_exit_1:
 	kfree(user);
 calc_exit_2:
 	/* BB FIXME what about bytes 24 through 40 of the signing key?
 	   compare with the NTLM example */
-	rc = crypto_shash_final(&sdesc.shash, ses->server->ntlmv2_hash);
-
-	return rc;
-}
-
-static int
-find_domain_name(struct cifsSesInfo *ses)
-{
-	int rc = 0;
-	unsigned int attrsize;
-	unsigned int type;
-	unsigned char *blobptr;
-	struct ntlmssp2_name *attrptr;
-
-	if (ses->server->tiblob) {
-		blobptr = ses->server->tiblob;
-		attrptr = (struct ntlmssp2_name *) blobptr;
-
-		while ((type = attrptr->type) != 0) {
-			blobptr += 2; /* advance attr type */
-			attrsize = attrptr->length;
-			blobptr += 2; /* advance attr size */
-			if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
-				if (!ses->domainName) {
-					ses->domainName =
-						kmalloc(attrptr->length + 1,
-								GFP_KERNEL);
-					if (!ses->domainName)
-							return -ENOMEM;
-					cifs_from_ucs2(ses->domainName,
-						(__le16 *)blobptr,
-						attrptr->length,
-						attrptr->length,
-						load_nls_default(), false);
-				}
-			}
-			blobptr += attrsize; /* advance attr  value */
-			attrptr = (struct ntlmssp2_name *) blobptr;
-		}
-	} else {
-		ses->server->tilen = 2 * sizeof(struct ntlmssp2_name);
-		ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL);
-		if (!ses->server->tiblob) {
-			ses->server->tilen = 0;
-			cERROR(1, "Challenge target info allocation failure");
-			return -ENOMEM;
-		}
-		memset(ses->server->tiblob, 0x0, ses->server->tilen);
-		attrptr = (struct ntlmssp2_name *) ses->server->tiblob;
-		attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
-	}
+	hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
 
+	kfree(pctxt);
 	return rc;
 }
 
-static int
-CalcNTLMv2_response(const struct TCP_Server_Info *server,
-			 char *v2_session_response)
-{
-	int rc;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(server->ntlmssp.hmacmd5)];
-	} sdesc;
-
-	sdesc.shash.tfm = server->ntlmssp.hmacmd5;
-	sdesc.shash.flags = 0x0;
-
-	crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash,
-		CIFS_HMAC_MD5_HASH_SIZE);
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
-		server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE);
-	crypto_shash_update(&sdesc.shash,
-		v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
-		sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE);
-
-	if (server->tilen)
-		crypto_shash_update(&sdesc.shash,
-					server->tiblob, server->tilen);
-
-	rc = crypto_shash_final(&sdesc.shash, v2_session_response);
-
-	return rc;
-}
-
-int
-setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
+void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
 		      const struct nls_table *nls_cp)
 {
-	int rc = 0;
+	int rc;
 	struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
-	} sdesc;
+	struct HMACMD5Context context;
 
 	buf->blob_signature = cpu_to_le32(0x00000101);
 	buf->reserved = 0;
 	buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
 	get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
 	buf->reserved2 = 0;
-
-	if (!ses->domainName) {
-		rc = find_domain_name(ses);
-		if (rc) {
-			cERROR(1, "could not get domain/server name rc %d", rc);
-			return rc;
-		}
-	}
+	buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
+	buf->names[0].length = 0;
+	buf->names[1].type = 0;
+	buf->names[1].length = 0;
 
 	/* calculate buf->ntlmv2_hash */
 	rc = calc_ntlmv2_hash(ses, nls_cp);
-	if (rc) {
-		cERROR(1, "could not get v2 hash rc %d", rc);
-		return rc;
-	}
-	rc = CalcNTLMv2_response(ses->server, resp_buf);
-	if (rc) {
+	if (rc)
 		cERROR(1, "could not get v2 hash rc %d", rc);
-		return rc;
-	}
-
-	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5,
-			ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
-
-	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
-	sdesc.shash.flags = 0x0;
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	crypto_shash_update(&sdesc.shash, resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
+	CalcNTLMv2_response(ses, resp_buf);
 
-	rc = crypto_shash_final(&sdesc.shash,
-		ses->server->session_key.data.ntlmv2.key);
+	/* now calculate the MAC key for NTLMv2 */
+	hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
+	hmac_md5_update(resp_buf, 16, &context);
+	hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context);
 
-	memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf,
-			sizeof(struct ntlmv2_resp));
-	ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp);
-
-	return rc;
+	memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf,
+	       sizeof(struct ntlmv2_resp));
+	ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp);
 }
 
-int
-calc_seckey(struct TCP_Server_Info *server)
-{
-	int rc;
-	unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE];
-	struct crypto_blkcipher *tfm_arc4;
-	struct scatterlist sgin, sgout;
-	struct blkcipher_desc desc;
-
-	get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
-
-	tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)",
-						0, CRYPTO_ALG_ASYNC);
-	if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
-		cERROR(1, "could not allocate " "master crypto API arc4\n");
-		return 1;
-	}
-
-	crypto_blkcipher_setkey(tfm_arc4,
-		server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE);
-	sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE);
-	sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
-	rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
-
-	if (!rc)
-		memcpy(server->session_key.data.ntlmv2.key,
-				sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
-
-	crypto_free_blkcipher(tfm_arc4);
-
-	return 0;
-}
-
-void
-cifs_crypto_shash_release(struct TCP_Server_Info *server)
-{
-	if (server->ntlmssp.md5)
-		crypto_free_shash(server->ntlmssp.md5);
-
-	if (server->ntlmssp.hmacmd5)
-		crypto_free_shash(server->ntlmssp.hmacmd5);
-}
-
-int
-cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+void CalcNTLMv2_response(const struct cifsSesInfo *ses,
+			 char *v2_session_response)
 {
-	server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
-	if (!server->ntlmssp.hmacmd5 ||
-			IS_ERR(server->ntlmssp.hmacmd5)) {
-		cERROR(1, "could not allocate master crypto API hmacmd5\n");
-		return 1;
-	}
+	struct HMACMD5Context context;
+	/* rest of v2 struct already generated */
+	memcpy(v2_session_response + 8, ses->server->cryptKey, 8);
+	hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
 
-	server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0);
-	if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) {
-		crypto_free_shash(server->ntlmssp.hmacmd5);
-		cERROR(1, "could not allocate master crypto API md5\n");
-		return 1;
-	}
+	hmac_md5_update(v2_session_response+8,
+			sizeof(struct ntlmv2_resp) - 8, &context);
 
-	return 0;
+	hmac_md5_final(v2_session_response, &context);
+/*	cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 49563e0..0cdfb8c 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,9 +25,6 @@
 #include <linux/workqueue.h>
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
-#include <crypto/internal/hash.h>
-#include <linux/scatterlist.h>
-
 /*
  * The sizes of various internal tables and strings
  */
@@ -100,7 +97,7 @@ enum protocolEnum {
 	/* Netbios frames protocol not supported at this time */
 };
 
-struct session_key {
+struct mac_key {
 	unsigned int len;
 	union {
 		char ntlm[CIFS_SESS_KEY_SIZE + 16];
@@ -123,14 +120,6 @@ struct cifs_cred {
 	struct cifs_ace *aces;
 };
 
-struct ntlmssp_auth {
-	__u32 client_flags;
-	__u32 server_flags;
-	unsigned char ciphertext[CIFS_CPHTXT_SIZE];
-	struct crypto_shash *hmacmd5;
-	struct crypto_shash *md5;
-};
-
 /*
  *****************************************************************
  * Except the CIFS PDUs themselves all the
@@ -193,14 +182,11 @@ struct TCP_Server_Info {
 	/* 16th byte of RFC1001 workstation name is always null */
 	char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
 	__u32 sequence_number; /* needed for CIFS PDU signature */
-	struct session_key session_key;
+	struct mac_key mac_signing_key;
 	char ntlmv2_hash[16];
 	unsigned long lstrp; /* when we got last response from this server */
 	u16 dialect; /* dialect index that server chose */
 	/* extended security flavors that server supports */
-	unsigned int tilen; /* length of the target info blob */
-	unsigned char *tiblob; /* target info blob in challenge response */
-	struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */
 	bool	sec_kerberos;		/* supports plain Kerberos */
 	bool	sec_mskerberos;		/* supports legacy MS Kerberos */
 	bool	sec_kerberosu2u;	/* supports U2U Kerberos */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 320e0fd..14d036d 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -134,12 +134,6 @@
  * Size of the session key (crypto key encrypted with the password
  */
 #define CIFS_SESS_KEY_SIZE (24)
-#define CIFS_CLIENT_CHALLENGE_SIZE (8)
-#define CIFS_SERVER_CHALLENGE_SIZE (8)
-#define CIFS_HMAC_MD5_HASH_SIZE (16)
-#define CIFS_CPHTXT_SIZE (16)
-#define CIFS_NTLMV2_SESSKEY_SIZE (16)
-#define CIFS_NTHASH_SIZE (16)
 
 /*
  * Maximum user name length
@@ -669,6 +663,7 @@ struct ntlmv2_resp {
 	__le64  time;
 	__u64  client_chal; /* random */
 	__u32  reserved2;
+	struct ntlmssp2_name names[2];
 	/* array of name entries could follow ending in minimum 4 byte struct */
 } __attribute__((packed));
 
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1378d91..1f54508 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -361,15 +361,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
 extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 			  __u32 *);
 extern int cifs_verify_signature(struct smb_hdr *,
-				 struct TCP_Server_Info *server,
+				 const struct mac_key *mac_key,
 				__u32 expected_sequence_number);
-extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
+extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 				 const char *pass);
-extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
+extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
+			const struct nls_table *);
+extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
+extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
 			     const struct nls_table *);
-extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
-extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
-extern int calc_seckey(struct TCP_Server_Info *);
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern void calc_lanman_hash(const char *password, const char *cryptkey,
 				bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4bda920..c65c341 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -604,14 +604,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			else
 				rc = -EINVAL;
 
-			if (server->secType == Kerberos) {
-				if (!server->sec_kerberos &&
-						!server->sec_mskerberos)
-					rc = -EOPNOTSUPP;
-			} else if (server->secType == RawNTLMSSP) {
-				if (!server->sec_ntlmssp)
-					rc = -EOPNOTSUPP;
-			} else
+			if (server->sec_kerberos || server->sec_mskerberos)
+				server->secType = Kerberos;
+			else if (server->sec_ntlmssp)
+				server->secType = RawNTLMSSP;
+			else
 				rc = -EOPNOTSUPP;
 		}
 	} else
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ec0ea4a..0ea52e9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1708,7 +1708,6 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
 		CIFSSMBLogoff(xid, ses);
 		_FreeXid(xid);
 	}
-	cifs_crypto_shash_release(server);
 	sesInfoFree(ses);
 	cifs_put_tcp_session(server);
 }
@@ -1788,23 +1787,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	ses->linux_uid = volume_info->linux_uid;
 	ses->overrideSecFlg = volume_info->secFlg;
 
-	rc = cifs_crypto_shash_allocate(server);
-	if (rc) {
-		cERROR(1, "could not setup hash structures rc %d", rc);
-		goto get_ses_fail;
-	}
-	server->tilen = 0;
-	server->tiblob = NULL;
-
 	mutex_lock(&ses->session_mutex);
 	rc = cifs_negotiate_protocol(xid, ses);
 	if (!rc)
 		rc = cifs_setup_session(xid, ses, volume_info->local_nls);
 	mutex_unlock(&ses->session_mutex);
-	if (rc) {
-		cifs_crypto_shash_release(ses->server);
+	if (rc)
 		goto get_ses_fail;
-	}
 
 	/* success, put it on the list */
 	write_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 1db0f07..49c9a4e 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -61,19 +61,6 @@
 #define NTLMSSP_NEGOTIATE_KEY_XCH   0x40000000
 #define NTLMSSP_NEGOTIATE_56        0x80000000
 
-/* Define AV Pair Field IDs */
-#define NTLMSSP_AV_EOL			0
-#define NTLMSSP_AV_NB_COMPUTER_NAME	1
-#define NTLMSSP_AV_NB_DOMAIN_NAME	2
-#define NTLMSSP_AV_DNS_COMPUTER_NAME	3
-#define NTLMSSP_AV_DNS_DOMAIN_NAME	4
-#define NTLMSSP_AV_DNS_TREE_NAME	5
-#define NTLMSSP_AV_FLAGS		6
-#define NTLMSSP_AV_TIMESTAMP		7
-#define NTLMSSP_AV_RESTRICTION		8
-#define NTLMSSP_AV_TARGET_NAME		9
-#define NTLMSSP_AV_CHANNEL_BINDINGS	10
-
 /* Although typedefs are not commonly used for structure definitions */
 /* in the Linux kernel, in this particular case they are useful      */
 /* to more closely match the standards document for NTLMSSP from     */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 41fc532..0a57cb7 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -383,9 +383,6 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
 static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 				    struct cifsSesInfo *ses)
 {
-	unsigned int tioffset; /* challeng message target info area */
-	unsigned int tilen; /* challeng message target info area length  */
-
 	CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
 
 	if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
@@ -408,18 +405,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 	/* BB spec says that if AvId field of MsvAvTimestamp is populated then
 		we must set the MIC field of the AUTHENTICATE_MESSAGE */
 
-	tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
-	tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
-	ses->server->tilen = tilen;
-	if (tilen) {
-		ses->server->tiblob = kmalloc(tilen, GFP_KERNEL);
-		if (!ses->server->tiblob) {
-			cERROR(1, "Challenge target info allocation failure");
-			return -ENOMEM;
-		}
-		memcpy(ses->server->tiblob,  bcc_ptr + tioffset, tilen);
-	}
-
 	return 0;
 }
 
@@ -466,12 +451,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 				   struct cifsSesInfo *ses,
 				   const struct nls_table *nls_cp, bool first)
 {
-	int rc;
-	unsigned int size;
 	AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
 	__u32 flags;
 	unsigned char *tmp;
-	struct ntlmv2_resp ntlmv2_response = {};
+	char ntlm_session_key[CIFS_SESS_KEY_SIZE];
 
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
 	sec_blob->MessageType = NtLmAuthenticate;
@@ -494,25 +477,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->LmChallengeResponse.Length = 0;
 	sec_blob->LmChallengeResponse.MaximumLength = 0;
 
-	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
-	rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
-	if (rc) {
-		cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc);
-		goto setup_ntlmv2_ret;
-	}
-	size =  sizeof(struct ntlmv2_resp);
-	memcpy(tmp, (char *)&ntlmv2_response, size);
-	tmp += size;
-	if (ses->server->tilen > 0) {
-		memcpy(tmp, ses->server->tiblob, ses->server->tilen);
-		tmp += ses->server->tilen;
-	} else
-		ses->server->tilen = 0;
+	/* calculate session key,  BB what about adding similar ntlmv2 path? */
+	SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key);
+	if (first)
+		cifs_calculate_mac_key(&ses->server->mac_signing_key,
+				       ntlm_session_key, ses->password);
 
-	sec_blob->NtChallengeResponse.Length = cpu_to_le16(size +
-				ses->server->tilen);
+	memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE);
+	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE);
 	sec_blob->NtChallengeResponse.MaximumLength =
-		cpu_to_le16(size + ses->server->tilen);
+				cpu_to_le16(CIFS_SESS_KEY_SIZE);
+
+	tmp += CIFS_SESS_KEY_SIZE;
 
 	if (ses->domainName == NULL) {
 		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -524,6 +501,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUCS((__le16 *)tmp, ses->domainName,
 				    MAX_USERNAME_SIZE, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
+		len += 2; /* trailing null */
 		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
 		sec_blob->DomainName.Length = cpu_to_le16(len);
 		sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
@@ -540,6 +518,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
 				    MAX_USERNAME_SIZE, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
+		len += 2; /* trailing null */
 		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
 		sec_blob->UserName.Length = cpu_to_le16(len);
 		sec_blob->UserName.MaximumLength = cpu_to_le16(len);
@@ -551,26 +530,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->WorkstationName.MaximumLength = 0;
 	tmp += 2;
 
-	if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
-			!calc_seckey(ses->server)) {
-		memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
-		sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
-		sec_blob->SessionKey.MaximumLength =
-			cpu_to_le16(CIFS_CPHTXT_SIZE);
-		tmp += CIFS_CPHTXT_SIZE;
-	} else {
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
-		sec_blob->SessionKey.Length = 0;
-		sec_blob->SessionKey.MaximumLength = 0;
-	}
-
-	ses->server->sequence_number = 0;
-
-setup_ntlmv2_ret:
-	if (ses->server->tilen > 0)
-		kfree(ses->server->tiblob);
-
+	sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->SessionKey.Length = 0;
+	sec_blob->SessionKey.MaximumLength = 0;
 	return tmp - pbuffer;
 }
 
@@ -584,14 +546,15 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
 	return;
 }
 
-static int setup_ntlmssp_auth_req(char *ntlmsspblob,
+static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
 				  struct cifsSesInfo *ses,
 				  const struct nls_table *nls, bool first_time)
 {
 	int bloblen;
 
-	bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls,
+	bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls,
 					  first_time);
+	pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen);
 
 	return bloblen;
 }
@@ -617,7 +580,6 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	struct key *spnego_key = NULL;
 	__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
 	bool first_time;
-	char *ntlmsspblob;
 
 	if (ses == NULL)
 		return -EINVAL;
@@ -728,7 +690,7 @@ ssetup_ntlmssp_authenticate:
 
 		if (first_time) /* should this be moved into common code
 				  with similar ntlmv2 path? */
-			cifs_calculate_session_key(&ses->server->session_key,
+			cifs_calculate_mac_key(&ses->server->mac_signing_key,
 				ntlm_session_key, ses->password);
 		/* copy session key */
 
@@ -767,21 +729,12 @@ ssetup_ntlmssp_authenticate:
 			cpu_to_le16(sizeof(struct ntlmv2_resp));
 
 		/* calculate session key */
-		rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
-		if (rc) {
-			kfree(v2_sess_key);
-			goto ssetup_exit;
-		}
+		setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
 		/* FIXME: calculate MAC key */
 		memcpy(bcc_ptr, (char *)v2_sess_key,
 		       sizeof(struct ntlmv2_resp));
 		bcc_ptr += sizeof(struct ntlmv2_resp);
 		kfree(v2_sess_key);
-		if (ses->server->tilen > 0) {
-			memcpy(bcc_ptr, ses->server->tiblob,
-				ses->server->tilen);
-			bcc_ptr += ses->server->tilen;
-		}
 		if (ses->capabilities & CAP_UNICODE) {
 			if (iov[0].iov_len % 2) {
 				*bcc_ptr = 0;
@@ -812,15 +765,15 @@ ssetup_ntlmssp_authenticate:
 		}
 		/* bail out if key is too long */
 		if (msg->sesskey_len >
-		    sizeof(ses->server->session_key.data.krb5)) {
+		    sizeof(ses->server->mac_signing_key.data.krb5)) {
 			cERROR(1, "Kerberos signing key too long (%u bytes)",
 				msg->sesskey_len);
 			rc = -EOVERFLOW;
 			goto ssetup_exit;
 		}
 		if (first_time) {
-			ses->server->session_key.len = msg->sesskey_len;
-			memcpy(ses->server->session_key.data.krb5,
+			ses->server->mac_signing_key.len = msg->sesskey_len;
+			memcpy(ses->server->mac_signing_key.data.krb5,
 				msg->data, msg->sesskey_len);
 		}
 		pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
@@ -862,26 +815,12 @@ ssetup_ntlmssp_authenticate:
 			if (phase == NtLmNegotiate) {
 				setup_ntlmssp_neg_req(pSMB, ses);
 				iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
-				iov[1].iov_base = &pSMB->req.SecurityBlob[0];
 			} else if (phase == NtLmAuthenticate) {
 				int blob_len;
-				ntlmsspblob = kmalloc(5 *
-					sizeof(struct _AUTHENTICATE_MESSAGE),
-					GFP_KERNEL);
-				if (!ntlmsspblob) {
-					cERROR(1, "Can't allocate NTLMSSP");
-					rc = -ENOMEM;
-					goto ssetup_exit;
-				}
-
-				blob_len = setup_ntlmssp_auth_req(ntlmsspblob,
-								ses,
-								nls_cp,
-								first_time);
+				blob_len = setup_ntlmssp_auth_req(pSMB, ses,
+								  nls_cp,
+								  first_time);
 				iov[1].iov_len = blob_len;
-				iov[1].iov_base = ntlmsspblob;
-				pSMB->req.SecurityBlobLength =
-					cpu_to_le16(blob_len);
 				/* Make sure that we tell the server that we
 				   are using the uid that it just gave us back
 				   on the response (challenge) */
@@ -891,6 +830,7 @@ ssetup_ntlmssp_authenticate:
 				rc = -ENOSYS;
 				goto ssetup_exit;
 			}
+			iov[1].iov_base = &pSMB->req.SecurityBlob[0];
 			/* unicode strings must be word aligned */
 			if ((iov[0].iov_len + iov[1].iov_len) % 2) {
 				*bcc_ptr = 0;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e0588cd..82f78c4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 		    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 					     SECMODE_SIGN_ENABLED))) {
 			rc = cifs_verify_signature(midQ->resp_buf,
-						ses->server,
+						&ses->server->mac_signing_key,
 						midQ->sequence_number+1);
 			if (rc) {
 				cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 		    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 					     SECMODE_SIGN_ENABLED))) {
 			rc = cifs_verify_signature(out_buf,
-						ses->server,
+						&ses->server->mac_signing_key,
 						midQ->sequence_number+1);
 			if (rc) {
 				cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 	    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 				     SECMODE_SIGN_ENABLED))) {
 		rc = cifs_verify_signature(out_buf,
-					   ses->server,
+					   &ses->server->mac_signing_key,
 					   midQ->sequence_number+1);
 		if (rc) {
 			cERROR(1, "Unexpected SMB signature");
-- 
cgit v1.1


From 639e7a913d81f918bfbf506e6ecd54664f787cbd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 11:50:09 -0400
Subject: cifs: eliminate redundant xdev check in cifs_rename

The VFS always checks that the source and target of a rename are on the
same vfsmount, and hence have the same superblock. So, this check is
redundant. Remove it and simplify the error handling.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 86a164f..93f77d4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1462,29 +1462,18 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 {
 	char *fromName = NULL;
 	char *toName = NULL;
-	struct cifs_sb_info *cifs_sb_source;
-	struct cifs_sb_info *cifs_sb_target;
+	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *tcon;
 	FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
 	FILE_UNIX_BASIC_INFO *info_buf_target;
 	int xid, rc, tmprc;
 
-	cifs_sb_target = CIFS_SB(target_dir->i_sb);
-	cifs_sb_source = CIFS_SB(source_dir->i_sb);
-	tcon = cifs_sb_source->tcon;
+	cifs_sb = CIFS_SB(source_dir->i_sb);
+	tcon = cifs_sb->tcon;
 
 	xid = GetXid();
 
 	/*
-	 * BB: this might be allowed if same server, but different share.
-	 * Consider adding support for this
-	 */
-	if (tcon != cifs_sb_target->tcon) {
-		rc = -EXDEV;
-		goto cifs_rename_exit;
-	}
-
-	/*
 	 * we already have the rename sem so we do not need to
 	 * grab it again here to protect the path integrity
 	 */
@@ -1519,17 +1508,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 		info_buf_target = info_buf_source + 1;
 		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
 					info_buf_source,
-					cifs_sb_source->local_nls,
-					cifs_sb_source->mnt_cifs_flags &
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		if (tmprc != 0)
 			goto unlink_target;
 
-		tmprc = CIFSSMBUnixQPathInfo(xid, tcon,
-					toName, info_buf_target,
-					cifs_sb_target->local_nls,
-					/* remap based on source sb */
-					cifs_sb_source->mnt_cifs_flags &
+		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName,
+					info_buf_target,
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 		if (tmprc == 0 && (info_buf_source->UniqueId ==
-- 
cgit v1.1


From 4266d9118f85b050a341992f0cfab40d392ef32c Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 21:17:29 +0000
Subject: [CIFS] ntlmv2/ntlmssp remove-unused-function
 CalcNTLMv2_partial_mac_key

This function is not used, so remove the definition and declaration.

Reviewed-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsencrypt.c | 57 ---------------------------------------------------
 fs/cifs/cifsproto.h   |  2 --
 2 files changed, 59 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 847628d..35042d8 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -223,63 +223,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 	return 0;
 }
 
-int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses,
-			       const struct nls_table *nls_info)
-{
-	char temp_hash[16];
-	struct HMACMD5Context ctx;
-	char *ucase_buf;
-	__le16 *unicode_buf;
-	unsigned int i, user_name_len, dom_name_len;
-
-	if (ses == NULL)
-		return -EINVAL;
-
-	E_md4hash(ses->password, temp_hash);
-
-	hmac_md5_init_limK_to_64(temp_hash, 16, &ctx);
-	user_name_len = strlen(ses->userName);
-	if (user_name_len > MAX_USERNAME_SIZE)
-		return -EINVAL;
-	if (ses->domainName == NULL)
-		return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
-	dom_name_len = strlen(ses->domainName);
-	if (dom_name_len > MAX_USERNAME_SIZE)
-		return -EINVAL;
-
-	ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL);
-	if (ucase_buf == NULL)
-		return -ENOMEM;
-	unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL);
-	if (unicode_buf == NULL) {
-		kfree(ucase_buf);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < user_name_len; i++)
-		ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]];
-	ucase_buf[i] = 0;
-	user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf,
-				      MAX_USERNAME_SIZE*2, nls_info);
-	unicode_buf[user_name_len] = 0;
-	user_name_len++;
-
-	for (i = 0; i < dom_name_len; i++)
-		ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]];
-	ucase_buf[i] = 0;
-	dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf,
-				     MAX_USERNAME_SIZE*2, nls_info);
-
-	unicode_buf[user_name_len + dom_name_len] = 0;
-	hmac_md5_update((const unsigned char *) unicode_buf,
-		(user_name_len+dom_name_len)*2, &ctx);
-
-	hmac_md5_final(ses->server->ntlmv2_hash, &ctx);
-	kfree(ucase_buf);
-	kfree(unicode_buf);
-	return 0;
-}
-
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
 			char *lnm_session_key)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1f54508..f399b16 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -365,8 +365,6 @@ extern int cifs_verify_signature(struct smb_hdr *,
 				__u32 expected_sequence_number);
 extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 				 const char *pass);
-extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
-			const struct nls_table *);
 extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
 extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
 			     const struct nls_table *);
-- 
cgit v1.1


From 522bbe65a2415fabce618186fc7777eb4c502989 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 12:00:49 -0400
Subject: cifs: prevent cifsd from exiting prematurely

When cifs_demultiplex_thread exits, it does a number of cleanup tasks
including freeing the TCP_Server_Info struct. Much of the existing code
in cifs assumes that when there is a cisfSesInfo struct, that it holds a
reference to a valid TCP_Server_Info struct.

We can never allow cifsd to exit when a cifsSesInfo struct is still
holding a reference to the server. The server pointers will then point
to freed memory.

This patch eliminates a couple of questionable conditions where it does
this.  The idea here is to make an -EINTR return from kernel_recvmsg
behave the same way as -ERESTARTSYS or -EAGAIN. If the task was
signalled from cifs_put_tcp_session, then tcpStatus will be CifsExiting,
and the kernel_recvmsg call will return quickly.

There's also another condition where this can occur too -- if the
tcpStatus is still in CifsNew, then it will also exit if the server
closes the socket prematurely.  I think we'll probably also need to fix
that situation, but that requires a bit more consideration.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0ea52e9..5f68b96 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -400,7 +400,9 @@ incomplete_rcv:
 			cFYI(1, "call to reconnect done");
 			csocket = server->ssocket;
 			continue;
-		} else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
+		} else if (length == -ERESTARTSYS ||
+			   length == -EAGAIN ||
+			   length == -EINTR) {
 			msleep(1); /* minimum sleep to prevent looping
 				allowing socket to clear and app threads to set
 				tcpStatus CifsNeedReconnect if server hung */
@@ -422,10 +424,6 @@ incomplete_rcv:
 				   and so simply return error to mount */
 				break;
 			}
-			if (!try_to_freeze() && (length == -EINTR)) {
-				cFYI(1, "cifsd thread killed");
-				break;
-			}
 			cFYI(1, "Reconnect after unexpected peek error %d",
 				length);
 			cifs_reconnect(server);
@@ -522,8 +520,7 @@ incomplete_rcv:
 		     total_read += length) {
 			length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
 						pdu_length - total_read, 0);
-			if ((server->tcpStatus == CifsExiting) ||
-			    (length == -EINTR)) {
+			if (server->tcpStatus == CifsExiting) {
 				/* then will exit */
 				reconnect = 2;
 				break;
@@ -534,8 +531,9 @@ incomplete_rcv:
 				/* Now we will reread sock */
 				reconnect = 1;
 				break;
-			} else if ((length == -ERESTARTSYS) ||
-				   (length == -EAGAIN)) {
+			} else if (length == -ERESTARTSYS ||
+				   length == -EAGAIN ||
+				   length == -EINTR) {
 				msleep(1); /* minimum sleep to prevent looping,
 					      allowing socket to clear and app
 					      threads to set tcpStatus
-- 
cgit v1.1


From 7332f2a6217ee6925f83ef0e725013067ed316ba Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 12:00:49 -0400
Subject: cifs: eliminate some more premature cifsd exits

If the tcpStatus is still CifsNew, the main cifs_demultiplex_loop can
break out prematurely in some cases. This is wrong as we will almost
always have other structures with pointers to the TCP_Server_Info. If
the main loop breaks under any other condition other than tcpStatus ==
CifsExiting, then it'll face a use-after-free situation.

I don't see any reason to treat a CifsNew tcpStatus differently than
CifsGood. I believe we'll still want to attempt to reconnect in either
case. What should happen in those situations is that the MIDs get marked
as MID_RETRY_NEEDED. This will make CIFSSMBNegotiate return -EAGAIN, and
then the caller can retry the whole thing on a newly reconnected socket.
If that fails again in the same way, the caller of cifs_get_smb_ses
should tear down the TCP_Server_Info struct.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 41 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5f68b96..5fde83f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -416,14 +416,6 @@ incomplete_rcv:
 			} else
 				continue;
 		} else if (length <= 0) {
-			if (server->tcpStatus == CifsNew) {
-				cFYI(1, "tcp session abend after SMBnegprot");
-				/* some servers kill the TCP session rather than
-				   returning an SMB negprot error, in which
-				   case reconnecting here is not going to help,
-				   and so simply return error to mount */
-				break;
-			}
 			cFYI(1, "Reconnect after unexpected peek error %d",
 				length);
 			cifs_reconnect(server);
@@ -464,27 +456,18 @@ incomplete_rcv:
 			   an error on SMB negprot response */
 			cFYI(1, "Negative RFC1002 Session Response Error 0x%x)",
 				pdu_length);
-			if (server->tcpStatus == CifsNew) {
-				/* if nack on negprot (rather than
-				ret of smb negprot error) reconnecting
-				not going to help, ret error to mount */
-				break;
-			} else {
-				/* give server a second to
-				clean up before reconnect attempt */
-				msleep(1000);
-				/* always try 445 first on reconnect
-				since we get NACK on some if we ever
-				connected to port 139 (the NACK is
-				since we do not begin with RFC1001
-				session initialize frame) */
-				server->addr.sockAddr.sin_port =
-					htons(CIFS_PORT);
-				cifs_reconnect(server);
-				csocket = server->ssocket;
-				wake_up(&server->response_q);
-				continue;
-			}
+			/* give server a second to clean up  */
+			msleep(1000);
+			/* always try 445 first on reconnect since we get NACK
+			 * on some if we ever connected to port 139 (the NACK
+			 * is since we do not begin with RFC1001 session
+			 * initialize frame)
+			 */
+			server->addr.sockAddr.sin_port = htons(CIFS_PORT);
+			cifs_reconnect(server);
+			csocket = server->ssocket;
+			wake_up(&server->response_q);
+			continue;
 		} else if (temp != (char) 0) {
 			cERROR(1, "Unknown RFC 1002 frame");
 			cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
-- 
cgit v1.1


From 32670396e7fc6e4f37451a69339968985461a374 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 12:00:50 -0400
Subject: cifs: prevent possible memory corruption in cifs_demultiplex_thread

cifs_demultiplex_thread sets the addr.sockAddr.sin_port without any
regard for the socket family. While it may be that the error in question
here never occurs on an IPv6 socket, it's probably best to be safe and
set the port properly if it ever does.

Break the port setting code out of cifs_fill_sockaddr and into a new
function, and call that from cifs_demultiplex_thread.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  3 ++-
 fs/cifs/connect.c   |  3 ++-
 fs/cifs/netmisc.c   | 22 +++++++++++++---------
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f399b16..1d60c65 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			struct TCP_Server_Info *server);
 extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
+extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
 extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
-				unsigned short int port);
+				const unsigned short int port);
 extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
 			    const struct cifsTconInfo *, int /* length of
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5fde83f..67dad54 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -463,7 +463,8 @@ incomplete_rcv:
 			 * is since we do not begin with RFC1001 session
 			 * initialize frame)
 			 */
-			server->addr.sockAddr.sin_port = htons(CIFS_PORT);
+			cifs_set_port((struct sockaddr *)
+					&server->addr.sockAddr, CIFS_PORT);
 			cifs_reconnect(server);
 			csocket = server->ssocket;
 			wake_up(&server->response_q);
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index f978511..9aad47a 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
 }
 
 int
-cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
-		   const unsigned short int port)
+cifs_set_port(struct sockaddr *addr, const unsigned short int port)
 {
-	if (!cifs_convert_address(dst, src, len))
-		return 0;
-
-	switch (dst->sa_family) {
+	switch (addr->sa_family) {
 	case AF_INET:
-		((struct sockaddr_in *)dst)->sin_port = htons(port);
+		((struct sockaddr_in *)addr)->sin_port = htons(port);
 		break;
 	case AF_INET6:
-		((struct sockaddr_in6 *)dst)->sin6_port = htons(port);
+		((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
 		break;
 	default:
 		return 0;
 	}
-
 	return 1;
 }
 
+int
+cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
+		   const unsigned short int port)
+{
+	if (!cifs_convert_address(dst, src, len))
+		return 0;
+	return cifs_set_port(dst, port);
+}
+
 /*****************************************************************************
 convert a NT status code to a dos class/code
  *****************************************************************************/
-- 
cgit v1.1


From a122eb2fdfd78b58c6dd992d6f4b1aaef667eef9 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Date: Mon, 6 Sep 2010 18:24:57 -0400
Subject: xfs: prevent reading uninitialized stack memory

The XFS_IOC_FSGETXATTR ioctl allows unprivileged users to read 12
bytes of uninitialized stack memory, because the fsxattr struct
declared on the stack in xfs_ioc_fsgetxattr() does not alter (or zero)
the 12-byte fsx_pad member before copying it back to the user.  This
patch takes care of it.

Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4fec427..3b9e626 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr(
 {
 	struct fsxattr		fa;
 
+	memset(&fa, 0, sizeof(struct fsxattr));
+
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	fa.fsx_xflags = xfs_ip2xflags(ip);
 	fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-- 
cgit v1.1


From 1b528181b2ffa14721fb28ad1bd539fe1732c583 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 7 Sep 2010 19:35:49 -0700
Subject: setup_arg_pages: diagnose excessive argument size

The CONFIG_STACK_GROWSDOWN variant of setup_arg_pages() does not
check the size of the argument/environment area on the stack.
When it is unworkably large, shift_arg_pages() hits its BUG_ON.
This is exploitable with a very large RLIMIT_STACK limit, to
create a crash pretty easily.

Check that the initial stack is not too large to make it possible
to map in any executable.  We're not checking that the actual
executable (or intepreter, for binfmt_elf) will fit.  So those
mappings might clobber part of the initial stack mapping.  But
that is just userland lossage that userland made happen, not a
kernel problem.

Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 2d94552..1b63237 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -594,6 +594,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
 #else
 	stack_top = arch_align_stack(stack_top);
 	stack_top = PAGE_ALIGN(stack_top);
+
+	if (unlikely(stack_top < mmap_min_addr) ||
+	    unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
+		return -ENOMEM;
+
 	stack_shift = vma->vm_end - stack_top;
 
 	bprm->p -= stack_shift;
-- 
cgit v1.1


From 7993bc1f4663c0db67bb8f0d98e6678145b387cd Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 7 Sep 2010 19:36:28 -0700
Subject: execve: improve interactivity with large arguments

This adds a preemption point during the copying of the argument and
environment strings for execve, in copy_strings().  There is already
a preemption point in the count() loop, so this doesn't add any new
points in the abstract sense.

When the total argument+environment strings are very large, the time
spent copying them can be much more than a normal user time slice.
So this change improves the interactivity of the rest of the system
when one process is doing an execve with very large arguments.

Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 1b63237..6f2d777 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -419,6 +419,8 @@ static int copy_strings(int argc, const char __user *const __user *argv,
 		while (len > 0) {
 			int offset, bytes_to_copy;
 
+			cond_resched();
+
 			offset = pos % PAGE_SIZE;
 			if (offset == 0)
 				offset = PAGE_SIZE;
-- 
cgit v1.1


From 9aea5a65aa7a1af9a4236dfaeb0088f1624f9919 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 7 Sep 2010 19:37:06 -0700
Subject: execve: make responsive to SIGKILL with large arguments

An execve with a very large total of argument/environment strings
can take a really long time in the execve system call.  It runs
uninterruptibly to count and copy all the strings.  This change
makes it abort the exec quickly if sent a SIGKILL.

Note that this is the conservative change, to interrupt only for
SIGKILL, by using fatal_signal_pending().  It would be perfectly
correct semantics to let any signal interrupt the string-copying in
execve, i.e. use signal_pending() instead of fatal_signal_pending().
We'll save that change for later, since it could have user-visible
consequences, such as having a timer set too quickly make it so that
an execve can never complete, though it always happened to work before.

Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 6f2d777..828dd24 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max)
 			argv++;
 			if (i++ >= max)
 				return -E2BIG;
+
+			if (fatal_signal_pending(current))
+				return -ERESTARTNOHAND;
 			cond_resched();
 		}
 	}
@@ -419,6 +422,10 @@ static int copy_strings(int argc, const char __user *const __user *argv,
 		while (len > 0) {
 			int offset, bytes_to_copy;
 
+			if (fatal_signal_pending(current)) {
+				ret = -ERESTARTNOHAND;
+				goto out;
+			}
 			cond_resched();
 
 			offset = pos % PAGE_SIZE;
-- 
cgit v1.1


From 51749e47e191db8e588ad5cebea731caf7b705d7 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 8 Sep 2010 09:00:22 +0000
Subject: xfs: log IO completion workqueue is a high priority queue

The workqueue implementation in 2.6.36-rcX has changed, resulting
in the workqueues no longer having dedicated threads for work
processing. This has caused severe livelocks under heavy parallel
create workloads because the log IO completions have been getting
held up behind metadata IO completions.  Hence log commits would
stall, memory allocation would stall because pages could not be
cleaned, and lock contention on the AIL during inode IO completion
processing was being seen to slow everything down even further.

By making the log Io completion workqueue a high priority workqueue,
they are queued ahead of all data/metadata IO completions and
processed before the data/metadata completions. Hence the log never
gets stalled, and operations needed to clean memory can continue as
quickly as possible. This avoids the livelock conditions and allos
the system to keep running under heavy load as per normal.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d72cf2b..286e36e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1932,7 +1932,8 @@ xfs_buf_init(void)
 	if (!xfs_buf_zone)
 		goto out;
 
-	xfslogd_workqueue = create_workqueue("xfslogd");
+	xfslogd_workqueue = alloc_workqueue("xfslogd",
+					WQ_RESCUER | WQ_HIGHPRI, 1);
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-- 
cgit v1.1


From 5e64b0d9e86ffff8b299556341d85319117539e9 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 7 Sep 2010 13:30:05 +0800
Subject: ocfs2/lockdep: Move ip_xattr_sem out of ocfs2_xattr_get_nolock.

As the name shows, we shouldn't have any lock in
ocfs2_xattr_get_nolock. so lift ip_xattr_sem to the caller.
This should be safe for us since the only 2 callers are:
1. ocfs2_xattr_get which will lock the resources.
2. ocfs2_mknod which don't need this locking.

And this also resolves the following lockdep warning.

=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.35+ #5
-------------------------------------------------------
reflink/30027 is trying to acquire lock:
 (&oi->ip_alloc_sem){+.+.+.}, at: [<ffffffffa0673b67>] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2]

but task is already holding lock:
 (&oi->ip_xattr_sem){++++..}, at: [<ffffffffa0673b58>] ocfs2_reflink_ioctl+0x68b/0x1226 [ocfs2]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 (&oi->ip_xattr_sem){++++..}:
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82065a81>] lock_acquire+0xc6/0xed
       [<ffffffff82339650>] down_read+0x34/0x47
       [<ffffffffa0691cb8>] ocfs2_xattr_get_nolock+0xa0/0x4e6 [ocfs2]
       [<ffffffffa069d64f>] ocfs2_get_acl_nolock+0x5c/0x132 [ocfs2]
       [<ffffffffa069d9c7>] ocfs2_init_acl+0x60/0x243 [ocfs2]
       [<ffffffffa066499d>] ocfs2_mknod+0xae8/0xfea [ocfs2]
       [<ffffffffa0665041>] ocfs2_create+0x9d/0x105 [ocfs2]
       [<ffffffff820e1c83>] vfs_create+0x9b/0xf4
       [<ffffffff820e20bb>] do_last+0x2fd/0x5be
       [<ffffffff820e31c0>] do_filp_open+0x1fb/0x572
       [<ffffffff820d6cf6>] do_sys_open+0x5a/0xe7
       [<ffffffff820d6dac>] sys_open+0x1b/0x1d
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

-> #2 (jbd2_handle){+.+...}:
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82065a81>] lock_acquire+0xc6/0xed
       [<ffffffffa0604ff8>] start_this_handle+0x4a3/0x4bc [jbd2]
       [<ffffffffa06051d6>] jbd2__journal_start+0xba/0xee [jbd2]
       [<ffffffffa0605218>] jbd2_journal_start+0xe/0x10 [jbd2]
       [<ffffffffa065ca34>] ocfs2_start_trans+0xb7/0x19b [ocfs2]
       [<ffffffffa06645f3>] ocfs2_mknod+0x73e/0xfea [ocfs2]
       [<ffffffffa0665041>] ocfs2_create+0x9d/0x105 [ocfs2]
       [<ffffffff820e1c83>] vfs_create+0x9b/0xf4
       [<ffffffff820e20bb>] do_last+0x2fd/0x5be
       [<ffffffff820e31c0>] do_filp_open+0x1fb/0x572
       [<ffffffff820d6cf6>] do_sys_open+0x5a/0xe7
       [<ffffffff820d6dac>] sys_open+0x1b/0x1d
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

-> #1 (&journal->j_trans_barrier){.+.+..}:
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82064fa9>] lock_release_non_nested+0x1e5/0x24b
       [<ffffffff82065999>] lock_release+0x158/0x17a
       [<ffffffff823389f6>] __mutex_unlock_slowpath+0xbf/0x11b
       [<ffffffff82338a5b>] mutex_unlock+0x9/0xb
       [<ffffffffa0679673>] ocfs2_free_ac_resource+0x31/0x67 [ocfs2]
       [<ffffffffa067c6bc>] ocfs2_free_alloc_context+0x11/0x1d [ocfs2]
       [<ffffffffa0633de0>] ocfs2_write_begin_nolock+0x141e/0x159b [ocfs2]
       [<ffffffffa0635523>] ocfs2_write_begin+0x11e/0x1e7 [ocfs2]
       [<ffffffff820a1297>] generic_file_buffered_write+0x10c/0x210
       [<ffffffffa0653624>] ocfs2_file_aio_write+0x4cc/0x6d3 [ocfs2]
       [<ffffffff820d822d>] do_sync_write+0xc2/0x106
       [<ffffffff820d897b>] vfs_write+0xae/0x131
       [<ffffffff820d8e55>] sys_write+0x47/0x6f
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

-> #0 (&oi->ip_alloc_sem){+.+.+.}:
       [<ffffffff82063f92>] validate_chain+0x727/0xd68
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82065a81>] lock_acquire+0xc6/0xed
       [<ffffffff82339694>] down_write+0x31/0x52
       [<ffffffffa0673b67>] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2]
       [<ffffffffa06599f6>] ocfs2_ioctl+0x61a/0x656 [ocfs2]
       [<ffffffff820e53ac>] vfs_ioctl+0x2a/0x9d
       [<ffffffff820e5903>] do_vfs_ioctl+0x45d/0x4ae
       [<ffffffff820e59ab>] sys_ioctl+0x57/0x7a
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/xattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d03469f..06fa5e7 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;
 
-	down_read(&oi->ip_xattr_sem);
 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
 				    buffer_size, &xis);
 	if (ret == -ENODATA && di->i_xattr_loc)
 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
 					    buffer_size, &xbs);
-	up_read(&oi->ip_xattr_sem);
 
 	return ret;
 }
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode,
 		mlog_errno(ret);
 		return ret;
 	}
+	down_read(&OCFS2_I(inode)->ip_xattr_sem);
 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
 				     name, buffer, buffer_size);
+	up_read(&OCFS2_I(inode)->ip_xattr_sem);
 
 	ocfs2_inode_unlock(inode, 0);
 
-- 
cgit v1.1


From 07eaac9438b13ec0b863111698b91ccec8f3b8d4 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 7 Sep 2010 13:30:06 +0800
Subject: ocfs2: Fix lockdep warning in reflink.

This patch change mutex_lock to a new subclass and
add a new inode lock subclass for the target inode
which caused this lockdep warning.

=============================================
[ INFO: possible recursive locking detected ]
2.6.35+ #5
---------------------------------------------
reflink/11086 is trying to acquire lock:
 (Meta){+++++.}, at: [<ffffffffa06f9d65>] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]

but task is already holding lock:
 (Meta){+++++.}, at: [<ffffffffa06f9aa0>] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2]

other info that might help us debug this:
6 locks held by reflink/11086:
 #0:  (&sb->s_type->i_mutex_key#15/1){+.+.+.}, at: [<ffffffff820e09ec>] lookup_create+0x26/0x97
 #1:  (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [<ffffffffa06f99a0>] ocfs2_reflink_ioctl+0x4d3/0x1229 [ocfs2]
 #2:  (Meta){+++++.}, at: [<ffffffffa06f9aa0>] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2]
 #3:  (&oi->ip_xattr_sem){+.+.+.}, at: [<ffffffffa06f9b58>] ocfs2_reflink_ioctl+0x68b/0x1229 [ocfs2]
 #4:  (&oi->ip_alloc_sem){+.+.+.}, at: [<ffffffffa06f9b67>] ocfs2_reflink_ioctl+0x69a/0x1229 [ocfs2]
 #5:  (&sb->s_type->i_mutex_key#15/2){+.+...}, at: [<ffffffffa06f9d4f>] ocfs2_reflink_ioctl+0x882/0x1229 [ocfs2]

stack backtrace:
Pid: 11086, comm: reflink Not tainted 2.6.35+ #5
Call Trace:
 [<ffffffff82063dd9>] validate_chain+0x56e/0xd68
 [<ffffffff82062275>] ? mark_held_locks+0x49/0x69
 [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
 [<ffffffff82065a81>] lock_acquire+0xc6/0xed
 [<ffffffffa06f9d65>] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffffa06c9ade>] __ocfs2_cluster_lock+0x975/0xa0d [ocfs2]
 [<ffffffffa06f9d65>] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffffa06e107b>] ? ocfs2_wait_for_recovery+0x15/0x8a [ocfs2]
 [<ffffffffa06cb6ea>] ocfs2_inode_lock_full_nested+0x1ac/0xdc5 [ocfs2]
 [<ffffffffa06f9d65>] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffff820623a0>] ? trace_hardirqs_on_caller+0x10b/0x12f
 [<ffffffff82060193>] ? debug_mutex_free_waiter+0x4f/0x53
 [<ffffffffa06f9d65>] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffffa06ce24a>] ? ocfs2_file_lock_res_init+0x66/0x78 [ocfs2]
 [<ffffffff820bb2d2>] ? might_fault+0x40/0x8d
 [<ffffffffa06df9f6>] ocfs2_ioctl+0x61a/0x656 [ocfs2]
 [<ffffffff820ee5d3>] ? mntput_no_expire+0x1d/0xb0
 [<ffffffff820e07b3>] ? path_put+0x2c/0x31
 [<ffffffff820e53ac>] vfs_ioctl+0x2a/0x9d
 [<ffffffff820e5903>] do_vfs_ioctl+0x45d/0x4ae
 [<ffffffff8233a7f6>] ? _raw_spin_unlock+0x26/0x2a
 [<ffffffff8200299c>] ? sysret_check+0x27/0x62
 [<ffffffff820e59ab>] sys_ioctl+0x57/0x7a
 [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.h      | 1 +
 fs/ocfs2/refcounttree.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index d1ce48e..1d596d8 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -84,6 +84,7 @@ enum {
 	OI_LS_PARENT,
 	OI_LS_RENAME1,
 	OI_LS_RENAME2,
+	OI_LS_REFLINK_TARGET,
 };
 
 int ocfs2_dlm_init(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 0afeda831..efdd756 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4201,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
 		goto out;
 	}
 
-	mutex_lock(&new_inode->i_mutex);
-	ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
+	mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+	ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
+				      OI_LS_REFLINK_TARGET);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_unlock;
-- 
cgit v1.1


From 0f4da216b8c3c35c90ecd18e1899c6f125957c2b Mon Sep 17 00:00:00 2001
From: Tristan Ye <tristan.ye@oracle.com>
Date: Wed, 8 Sep 2010 17:12:38 +0800
Subject: Ocfs2: Re-access the journal after ocfs2_insert_extent() in dxdir
 codes.

In ocfs2_dx_dir_rebalance(), we need to rejournal_acess the blocks after
calling ocfs2_insert_extent() since growing an extent tree may trigger
ocfs2_extend_trans(), which makes previous journal_access meaningless.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dir.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f04ebcf..c49f6de 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 		goto out_commit;
 	}
 
+	cpos = split_hash;
+	ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
+				       data_ac, meta_ac, new_dx_leaves,
+				       num_dx_leaves);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
 	for (i = 0; i < num_dx_leaves; i++) {
 		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
 					      orig_dx_leaves[i],
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 			mlog_errno(ret);
 			goto out_commit;
 		}
-	}
 
-	cpos = split_hash;
-	ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
-				       data_ac, meta_ac, new_dx_leaves,
-				       num_dx_leaves);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
+					      new_dx_leaves[i],
+					      OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
 	}
 
 	ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
-- 
cgit v1.1


From 228ac6357718df2d5c8d70210fa51b2225aab5ee Mon Sep 17 00:00:00 2001
From: Tristan Ye <tristan.ye@oracle.com>
Date: Fri, 10 Sep 2010 10:16:33 +0800
Subject: Ocfs2: Handle empty list in lockres_seq_start() for dlmdebug.c

This patch tries to handle the case in which list 'dlm->tracking_list' is
empty, to avoid accessing an invalid pointer. It fixes the following oops:

http://oss.oracle.com/bugzilla/show_bug.cgi?id=1287

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmdebug.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5efdd37..901ca52 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
 	spin_lock(&dlm->track_lock);
 	if (oldres)
 		track_list = &oldres->tracking;
-	else
+	else {
 		track_list = &dlm->tracking_list;
+		if (list_empty(track_list)) {
+			dl = NULL;
+			spin_unlock(&dlm->track_lock);
+			goto bail;
+		}
+	}
 
 	list_for_each_entry(res, track_list, tracking) {
 		if (&res->tracking == &dlm->tracking_list)
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
 	} else
 		dl = NULL;
 
+bail:
 	/* passed to seq_show */
 	return dl;
 }
-- 
cgit v1.1


From ca04d9c3ec721e474f00992efc1b1afb625507f5 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 26 Aug 2010 16:12:01 -0700
Subject: ceph: fix null pointer deref on anon root dentry release

When we release a root dentry, particularly after a splice, the parent
(actually our) inode was evaluating to NULL and was getting dereferenced
by ceph_snap().  This is reproduced by something as simple as

 mount -t ceph monhost:/a/b mnt
 mount -t ceph monhost:/a mnt2
 ls mnt2

A splice_dentry() would kill the old 'b' inode's root dentry, and we'd
crash while releasing it.

Fix by checking for both the ROOT and NULL cases explicitly.  We only need
to invalidate the parent dir when we have a correct parent to invalidate.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/dir.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6e4f43f..a1986eb 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1021,11 +1021,15 @@ out_touch:
 static void ceph_dentry_release(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
-	struct inode *parent_inode = dentry->d_parent->d_inode;
-	u64 snapid = ceph_snap(parent_inode);
+	struct inode *parent_inode = NULL;
+	u64 snapid = CEPH_NOSNAP;
 
+	if (!IS_ROOT(dentry)) {
+		parent_inode = dentry->d_parent->d_inode;
+		if (parent_inode)
+			snapid = ceph_snap(parent_inode);
+	}
 	dout("dentry_release %p parent %p\n", dentry, parent_inode);
-
 	if (parent_inode && snapid != CEPH_SNAPDIR) {
 		struct ceph_inode_info *ci = ceph_inode(parent_inode);
 
-- 
cgit v1.1


From 3d4401d9d0aef5c40706350685ddea3df6708496 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Fri, 3 Sep 2010 12:57:11 -0700
Subject: ceph: fix pagelist kunmap tail

A wrong parameter was passed to the kunmap.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/pagelist.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index b6859f4..46a368b 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -5,10 +5,18 @@
 
 #include "pagelist.h"
 
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+	struct page *page = list_entry(pl->head.prev, struct page,
+				       lru);
+	kunmap(page);
+}
+
 int ceph_pagelist_release(struct ceph_pagelist *pl)
 {
 	if (pl->mapped_tail)
-		kunmap(pl->mapped_tail);
+		ceph_pagelist_unmap_tail(pl);
+
 	while (!list_empty(&pl->head)) {
 		struct page *page = list_first_entry(&pl->head, struct page,
 						     lru);
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
 	pl->room += PAGE_SIZE;
 	list_add_tail(&page->lru, &pl->head);
 	if (pl->mapped_tail)
-		kunmap(pl->mapped_tail);
+		ceph_pagelist_unmap_tail(pl);
 	pl->mapped_tail = kmap(page);
 	return 0;
 }
-- 
cgit v1.1


From 3612abbd5df6baa9ca3e0777c6c8646e202d3f66 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 7 Sep 2010 15:59:27 -0700
Subject: ceph: fix reconnect encoding for old servers

Fix the reconnect encoding to encode the cap record when the MDS does not
have the FLOCK capability (i.e., pre v0.22).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f091b13..fad95f8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 						num_fcntl_locks,
 						num_flock_locks);
 		unlock_kernel();
+	} else {
+		err = ceph_pagelist_append(pagelist, &rec, reclen);
 	}
 
 out_free:
-- 
cgit v1.1


From a77d9f7dce7600058d56f0670ed29d77abffcde2 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Sat, 11 Sep 2010 10:55:25 -0700
Subject: ceph: fix file offset wrapping at 4GB on 32-bit archs

Cast the value before shifting so that we don't run out of bits with a
32-bit unsigned long.  This fixes wrapping of high file offsets into the
low 4GB of a file on disk, and the subsequent data corruption for large
files.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4cfce1e..50461b8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -766,7 +766,8 @@ get_more_pages:
 			/* ok */
 			if (locked_pages == 0) {
 				/* prepare async write request */
-				offset = page->index << PAGE_CACHE_SHIFT;
+				offset = (unsigned long long)page->index
+					<< PAGE_CACHE_SHIFT;
 				len = wsize;
 				req = ceph_osdc_new_request(&client->osdc,
 					    &ci->i_layout,
-- 
cgit v1.1


From b1bde04c6d9a120dec602cc8a70b8a7f21600883 Mon Sep 17 00:00:00 2001
From: Fabio Olive Leite <fleite@redhat.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: Remove incorrect do_vfs_lock message

The do_vfs_lock function on fs/nfs/file.c is only called if NLM is
not being used, via the -onolock mount option. Therefore it cannot
really be "out of sync with lock manager" when the local locking
function called returns an error, as there will be no corresponding
call to the NLM. For details, simply check the if/else on do_setlk
and do_unlk on fs/nfs/file.c.

Signed-Off-By: Fabio Olive Leite <fleite@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eb51bd6..05bf3c0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -723,10 +723,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 		default:
 			BUG();
 	}
-	if (res < 0)
-		dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
-			" - error %d!\n",
-				__func__, res);
 	return res;
 }
 
-- 
cgit v1.1


From b20d37ca9561711c6a3c4b859c2855f49565e061 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:55:26 -0400
Subject: NFS: Fix a typo in nfs_sockaddr_match_ipaddr6

Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4e7df2a..e734072 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -275,7 +275,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
 	    sin1->sin6_scope_id != sin2->sin6_scope_id)
 		return 0;
 
-	return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
+	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
 }
 #else	/* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
 static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
-- 
cgit v1.1


From fbf3fdd2443965d9ba6fb4b5fecd1f6e0847218f Mon Sep 17 00:00:00 2001
From: Menyhart Zoltan <Zoltan.Menyhart@bull.net>
Date: Sun, 12 Sep 2010 19:55:26 -0400
Subject: statfs() gives ESTALE error

Hi,

An NFS client executes a statfs("file", &buff) call.
"file" exists / existed, the client has read / written it,
but it has already closed it.

user_path(pathname, &path) looks up "file" successfully in the
directory-cache  and restarts the aging timer of the directory-entry.
Even if "file" has already been removed from the server, because the
lookupcache=positive option I use, keeps the entries valid for a while.

nfs_statfs() returns ESTALE if "file" has already been removed from the
server.

If the user application repeats the statfs("file", &buff) call, we
are stuck: "file" remains young forever in the directory-cache.

Signed-off-by: Zoltan Menyhart  <Zoltan.Menyhart@bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/super.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ec3966e..f4cbf0c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 		goto out_err;
 
 	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
+	if (unlikely(error == -ESTALE)) {
+		struct dentry *pd_dentry;
 
+		pd_dentry = dget_parent(dentry);
+		if (pd_dentry != NULL) {
+			nfs_zap_caches(pd_dentry->d_inode);
+			dput(pd_dentry);
+		}
+	}
 	nfs_free_fattr(res.fattr);
 	if (error < 0)
 		goto out_err;
-- 
cgit v1.1


From 827e3457022d0bb0b1bb8a0eb88501876fe7dcf0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:57:50 -0400
Subject: SUNRPC: Fix the NFSv4 and RPCSEC_GSS Kconfig dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NFSv4 client's callback server calls svc_gss_principal(), which
is defined in the auth_rpcgss.ko

The NFSv4 server has the same dependency, and in addition calls
svcauth_gss_flavor(), gss_mech_get_by_pseudoflavor(),
gss_pseudoflavor_to_service() and gss_mech_put() from the same module.

The module auth_rpcgss itself has no dependencies aside from sunrpc,
so we only need to select RPCSEC_GSS.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Kconfig  | 1 +
 fs/nfsd/Kconfig | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 6c2aad4..f7e13db 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -63,6 +63,7 @@ config NFS_V3_ACL
 config NFS_V4
 	bool "NFS client support for NFS version 4"
 	depends on NFS_FS
+	select SUNRPC_GSS
 	help
 	  This option enables support for version 4 of the NFS protocol
 	  (RFC 3530) in the kernel's NFS client.
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 95932f5..4264377 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -69,6 +69,7 @@ config NFSD_V4
 	depends on NFSD && PROC_FS && EXPERIMENTAL
 	select NFSD_V3
 	select FS_POSIX_ACL
+	select SUNRPC_GSS
 	help
 	  This option enables support in your system's NFS server for
 	  version 4 of the NFS protocol (RFC 3530).
-- 
cgit v1.1


From 62b2be591a9b12c550308ef7718a31abfc815b50 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lionkov@gmail.com>
Date: Tue, 24 Aug 2010 18:13:59 +0000
Subject: fs/9p, net/9p: memory leak fixes

Four memory leak fixes in the 9P code.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c7c23ea..84159cf 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1128,6 +1128,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
 		generic_fillattr(dentry->d_inode, stat);
 
+	p9stat_free(st);
 	kfree(st);
 	return 0;
 }
@@ -1489,6 +1490,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	retval = strnlen(buffer, buflen);
 done:
+	p9stat_free(st);
 	kfree(st);
 	return retval;
 }
-- 
cgit v1.1


From 5c25f347a7b00b2ebe0a55c4a3cfe4c3e1e8725e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 24 Aug 2010 10:30:49 +0000
Subject: fs/9p: Fix error handling in v9fs_get_sb

This was introduced by 7cadb63d58a932041afa3f957d5cbb6ce69dcee5

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_super.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f931107..1d12ba0 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	fid = v9fs_session_init(v9ses, dev_name, data);
 	if (IS_ERR(fid)) {
 		retval = PTR_ERR(fid);
+		/*
+		 * we need to call session_close to tear down some
+		 * of the data structure setup by session_init
+		 */
 		goto close_session;
 	}
 
@@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		retval = -ENOMEM;
 		goto release_sb;
 	}
-
 	sb->s_root = root;
 
 	if (v9fs_proto_dotl(v9ses)) {
@@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
 		if (IS_ERR(st)) {
 			retval = PTR_ERR(st);
-			goto clunk_fid;
+			goto release_sb;
 		}
 
 		v9fs_stat2inode_dotl(st, root->d_inode);
@@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		st = p9_client_stat(fid);
 		if (IS_ERR(st)) {
 			retval = PTR_ERR(st);
-			goto clunk_fid;
+			goto release_sb;
 		}
 
 		root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
@@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 
 	v9fs_fid_add(root, fid);
 
-P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
+	P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
 	simple_set_mnt(mnt, sb);
 	return 0;
 
 clunk_fid:
 	p9_client_clunk(fid);
-
 close_session:
 	v9fs_session_close(v9ses);
 	kfree(v9ses);
 	return retval;
-
 release_sb:
+	/*
+	 * we will do the session_close and root dentry release
+	 * in the below call. But we need to clunk fid, because we haven't
+	 * attached the fid to dentry so it won't get clunked
+	 * automatically.
+	 */
+	p9_client_clunk(fid);
 	deactivate_locked_super(sb);
 	return retval;
 }
-- 
cgit v1.1


From 62726a7ab3a6a3624256172af055ff0a38c6ffa2 Mon Sep 17 00:00:00 2001
From: jvrao <jvrao@linux.vnet.ibm.com>
Date: Wed, 25 Aug 2010 16:26:21 +0000
Subject: 9p: Check for NULL fid in v9fs_dir_release()

NULL fid should be handled in cases where we endup calling v9fs_dir_release()
before even we instantiate the fid in filp.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_dir.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 16c8a2a..899f168 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 
 	fid = filp->private_data;
 	P9_DPRINTK(P9_DEBUG_VFS,
-			"inode: %p filp: %p fid: %d\n", inode, filp, fid->fid);
+			"v9fs_dir_release: inode: %p filp: %p fid: %d\n",
+			inode, filp, fid ? fid->fid : -1);
 	filemap_write_and_wait(inode->i_mapping);
-	p9_client_clunk(fid);
+	if (fid)
+		p9_client_clunk(fid);
 	return 0;
 }
 
-- 
cgit v1.1


From 3c30750ffafbc32af040b09f777b67aa2486b063 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 30 Aug 2010 16:04:35 +0000
Subject: fs/9p: Use the correct dentry operations

We should use the cached dentry operation only if caching mode is enabled

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 84159cf..a6990bb 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
 		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
 		goto error;
 	}
-	dentry->d_op = &v9fs_cached_dentry_operations;
+	if (v9ses->cache)
+		dentry->d_op = &v9fs_cached_dentry_operations;
+	else
+		dentry->d_op = &v9fs_dentry_operations;
 	d_instantiate(dentry, inode);
 	err = v9fs_fid_add(dentry, fid);
 	if (err < 0)
-- 
cgit v1.1


From 1d76e3135733a06aa12bb35891c05f306b27b2d6 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 30 Aug 2010 17:43:07 +0000
Subject: fs/9p: Don't use dotl version of mknod for dotu inode operations

We should not use dotlversion for the dotu inode operations

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index a6990bb..9e670d5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1947,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.unlink = v9fs_vfs_unlink,
 	.mkdir = v9fs_vfs_mkdir,
 	.rmdir = v9fs_vfs_rmdir,
-	.mknod = v9fs_vfs_mknod_dotl,
+	.mknod = v9fs_vfs_mknod,
 	.rename = v9fs_vfs_rename,
 	.getattr = v9fs_vfs_getattr,
 	.setattr = v9fs_vfs_setattr,
-- 
cgit v1.1


From 467c525109d5d542d7d416b0c11bdd54610fe2f4 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 13 Sep 2010 11:39:20 -0700
Subject: ceph: fix dn offset during readdir_prepopulate

When adding the readdir results to the cache, ceph_set_dentry_offset was
clobbered our just-set offset.  This can cause the readdir result offsets
to get out of sync with the server.  Add an argument to the helper so
that it does not.

This bug was introduced by 1cd3935bedccf592d44343890251452a6dd74fc4.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/inode.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e7cca41..62377ec 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
  * the caller) if we fail.
  */
 static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
-				    bool *prehash)
+				    bool *prehash, bool set_offset)
 {
 	struct dentry *realdn;
 
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
 	}
 	if ((!prehash || *prehash) && d_unhashed(dn))
 		d_rehash(dn);
-	ceph_set_dentry_offset(dn);
+	if (set_offset)
+		ceph_set_dentry_offset(dn);
 out:
 	return dn;
 }
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 				d_delete(dn);
 				goto done;
 			}
-			dn = splice_dentry(dn, in, &have_lease);
+			dn = splice_dentry(dn, in, &have_lease, true);
 			if (IS_ERR(dn)) {
 				err = PTR_ERR(dn);
 				goto done;
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			goto done;
 		}
 		dout(" linking snapped dir %p to dn %p\n", in, dn);
-		dn = splice_dentry(dn, in, NULL);
+		dn = splice_dentry(dn, in, NULL, true);
 		if (IS_ERR(dn)) {
 			err = PTR_ERR(dn);
 			goto done;
@@ -1237,7 +1238,7 @@ retry_lookup:
 				err = PTR_ERR(in);
 				goto out;
 			}
-			dn = splice_dentry(dn, in, NULL);
+			dn = splice_dentry(dn, in, NULL, false);
 			if (IS_ERR(dn))
 				dn = NULL;
 		}
-- 
cgit v1.1


From 8bef9239ee1a42eb37d3f83bacf6a75f019c028d Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 14 Sep 2010 15:45:44 -0700
Subject: ceph: correctly set 'follows' in flushsnap messages

The 'follows' should match the seq for the snap context for the given snap
cap, which is the context under which we have been dirtying and writing
data and metadata.  The snapshot that _contains_ those updates thus
_follows_ that context's seq #.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/snap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4868b9d..9e836af 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -467,7 +467,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 		INIT_LIST_HEAD(&capsnap->ci_item);
 		INIT_LIST_HEAD(&capsnap->flushing_item);
 
-		capsnap->follows = snapc->seq - 1;
+		capsnap->follows = snapc->seq;
 		capsnap->issued = __ceph_caps_issued(ci, NULL);
 		capsnap->dirty = dirty;
 
-- 
cgit v1.1


From cfc0bf6640dfd0f43bf8bfec5a475284809baa4d Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 14 Sep 2010 15:50:59 -0700
Subject: ceph: stop sending FLUSHSNAPs when we hit a dirty capsnap

Stop sending FLUSHSNAP messages when we hit a capsnap that has dirty_pages
or is still writing.  We'll send the newer capsnaps only after the older
ones complete.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a2069b6..9fbe901 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1227,7 +1227,7 @@ retry:
 		 * pages to be written out.
 		 */
 		if (capsnap->dirty_pages || capsnap->writing)
-			continue;
+			break;
 
 		/*
 		 * if cap writeback already occurred, we should have dropped
@@ -1276,8 +1276,8 @@ retry:
 			      &session->s_cap_snaps_flushing);
 		spin_unlock(&inode->i_lock);
 
-		dout("flush_snaps %p cap_snap %p follows %lld size %llu\n",
-		     inode, capsnap, next_follows, capsnap->size);
+		dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
+		     inode, capsnap, capsnap->follows, capsnap->flush_tid);
 		send_cap_msg(session, ceph_vino(inode).ino, 0,
 			     CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
 			     capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
-- 
cgit v1.1


From 460cf3411b858ad509d5255e0dfaf862a83c0299 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 14 Sep 2010 11:38:24 -0400
Subject: cifs: fix potential double put of TCP session reference

cifs_get_smb_ses must be called on a server pointer on which it holds an
active reference. It first does a search for an existing SMB session. If
it finds one, it'll put the server reference and then try to ensure that
the negprot is done, etc.

If it encounters an error at that point then it'll return an error.
There's a potential problem here though. When cifs_get_smb_ses returns
an error, the caller will also put the TCP server reference leading to a
double-put.

Fix this by having cifs_get_smb_ses only put the server reference if
it found an existing session that it could use and isn't returning an
error.

Cc: stable@kernel.org
Reviewed-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 67dad54..88c84a3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1706,9 +1706,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	if (ses) {
 		cFYI(1, "Existing smb sess found (status=%d)", ses->status);
 
-		/* existing SMB ses has a server reference already */
-		cifs_put_tcp_session(server);
-
 		mutex_lock(&ses->session_mutex);
 		rc = cifs_negotiate_protocol(xid, ses);
 		if (rc) {
@@ -1731,6 +1728,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 			}
 		}
 		mutex_unlock(&ses->session_mutex);
+
+		/* existing SMB ses has a server reference already */
+		cifs_put_tcp_session(server);
 		FreeXid(xid);
 		return ses;
 	}
-- 
cgit v1.1


From 75e1c70fc31490ef8a373ea2a4bea2524099b478 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Fri, 10 Sep 2010 14:16:00 -0700
Subject: aio: check for multiplication overflow in do_io_submit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tavis Ormandy pointed out that do_io_submit does not do proper bounds
checking on the passed-in iocb array:

       if (unlikely(nr < 0))
               return -EINVAL;

       if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(iocbpp)))))
               return -EFAULT;                      ^^^^^^^^^^^^^^^^^^

The attached patch checks for overflow, and if it is detected, the
number of iocbs submitted is scaled down to a number that will fit in
the long.  This is an ok thing to do, as sys_io_submit is documented as
returning the number of iocbs submitted, so callers should handle a
return value of less than the 'nr' argument passed in.

Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 3006b5b..1320b2a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1659,6 +1659,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
 	if (unlikely(nr < 0))
 		return -EINVAL;
 
+	if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
+		nr = LONG_MAX/sizeof(*iocbpp);
+
 	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
 		return -EFAULT;
 
-- 
cgit v1.1


From ae00d4f37f4df56821331deb1028748110dd6dc9 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 16 Sep 2010 16:26:51 -0700
Subject: ceph: fix cap_snap and realm split

The cap_snap creation/queueing relies on both the current i_head_snapc
_and_ the i_snap_realm pointers being correct, so that the new cap_snap
can properly reference the old context and the new i_head_snapc can be
updated to reference the new snaprealm's context.  To fix this, we:

 - move inodes completely to the new (split) realm so that i_snap_realm
   is correct, and
 - generate the new snapc's _before_ queueing the cap_snaps in
   ceph_update_snap_trace().

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c  |  4 +--
 fs/ceph/snap.c  | 88 +++++++++++++++++++--------------------------------------
 fs/ceph/super.h |  2 ++
 3 files changed, 33 insertions(+), 61 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 50461b8..efbc604 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	if (i_size < page_off + len)
 		len = i_size - page_off;
 
-	dout("writepage %p page %p index %lu on %llu~%u\n",
-	     inode, page, page->index, page_off, len);
+	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
+	     inode, page, page->index, page_off, len, snapc);
 
 	writeback_stat = atomic_long_inc_return(&client->writeback_count);
 	if (writeback_stat >
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 9e836af..9e6eef1 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
 	INIT_LIST_HEAD(&realm->children);
 	INIT_LIST_HEAD(&realm->child_item);
 	INIT_LIST_HEAD(&realm->empty_item);
+	INIT_LIST_HEAD(&realm->dirty_item);
 	INIT_LIST_HEAD(&realm->inodes_with_caps);
 	spin_lock_init(&realm->inodes_with_caps_lock);
 	__insert_snap_realm(&mdsc->snap_realms, realm);
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
 	struct ceph_snap_realm *realm;
 	int invalidate = 0;
 	int err = -ENOMEM;
+	LIST_HEAD(dirty_realms);
 
 	dout("update_snap_trace deletion=%d\n", deletion);
 more:
@@ -626,24 +628,6 @@ more:
 		}
 	}
 
-	if (le64_to_cpu(ri->seq) > realm->seq) {
-		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
-		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
-		/*
-		 * if the realm seq has changed, queue a cap_snap for every
-		 * inode with open caps.  we do this _before_ we update
-		 * the realm info so that we prepare for writeback under the
-		 * _previous_ snap context.
-		 *
-		 * ...unless it's a snap deletion!
-		 */
-		if (!deletion)
-			queue_realm_cap_snaps(realm);
-	} else {
-		dout("update_snap_trace %llx %p seq %lld unchanged\n",
-		     realm->ino, realm, realm->seq);
-	}
-
 	/* ensure the parent is correct */
 	err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
 	if (err < 0)
@@ -651,6 +635,8 @@ more:
 	invalidate += err;
 
 	if (le64_to_cpu(ri->seq) > realm->seq) {
+		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
+		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
 		/* update realm parameters, snap lists */
 		realm->seq = le64_to_cpu(ri->seq);
 		realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ more:
 		if (err < 0)
 			goto fail;
 
+		/* queue realm for cap_snap creation */
+		list_add(&realm->dirty_item, &dirty_realms);
+
 		invalidate = 1;
 	} else if (!realm->cached_context) {
+		dout("update_snap_trace %llx %p seq %lld new\n",
+		     realm->ino, realm, realm->seq);
 		invalidate = 1;
+	} else {
+		dout("update_snap_trace %llx %p seq %lld unchanged\n",
+		     realm->ino, realm, realm->seq);
 	}
 
 	dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ more:
 	if (invalidate)
 		rebuild_snap_realms(realm);
 
+	/*
+	 * queue cap snaps _after_ we've built the new snap contexts,
+	 * so that i_head_snapc can be set appropriately.
+	 */
+	list_for_each_entry(realm, &dirty_realms, dirty_item) {
+		queue_realm_cap_snaps(realm);
+	}
+
 	__cleanup_empty_realms(mdsc);
 	return 0;
 
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			};
 			struct inode *inode = ceph_find_inode(sb, vino);
 			struct ceph_inode_info *ci;
+			struct ceph_snap_realm *oldrealm;
 
 			if (!inode)
 				continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			dout(" will move %p to split realm %llx %p\n",
 			     inode, realm->ino, realm);
 			/*
-			 * Remove the inode from the realm's inode
-			 * list, but don't add it to the new realm
-			 * yet.  We don't want the cap_snap to be
-			 * queued (again) by ceph_update_snap_trace()
-			 * below.  Queue it _now_, under the old context.
+			 * Move the inode to the new realm
 			 */
 			spin_lock(&realm->inodes_with_caps_lock);
 			list_del_init(&ci->i_snap_realm_item);
+			list_add(&ci->i_snap_realm_item,
+				 &realm->inodes_with_caps);
+			oldrealm = ci->i_snap_realm;
+			ci->i_snap_realm = realm;
 			spin_unlock(&realm->inodes_with_caps_lock);
 			spin_unlock(&inode->i_lock);
 
-			ceph_queue_cap_snap(ci);
+			ceph_get_snap_realm(mdsc, realm);
+			ceph_put_snap_realm(mdsc, oldrealm);
 
 			iput(inode);
 			continue;
@@ -880,43 +884,9 @@ skip_inode:
 	ceph_update_snap_trace(mdsc, p, e,
 			       op == CEPH_SNAP_OP_DESTROY);
 
-	if (op == CEPH_SNAP_OP_SPLIT) {
-		/*
-		 * ok, _now_ add the inodes into the new realm.
-		 */
-		for (i = 0; i < num_split_inos; i++) {
-			struct ceph_vino vino = {
-				.ino = le64_to_cpu(split_inos[i]),
-				.snap = CEPH_NOSNAP,
-			};
-			struct inode *inode = ceph_find_inode(sb, vino);
-			struct ceph_inode_info *ci;
-
-			if (!inode)
-				continue;
-			ci = ceph_inode(inode);
-			spin_lock(&inode->i_lock);
-			if (list_empty(&ci->i_snap_realm_item)) {
-				struct ceph_snap_realm *oldrealm =
-					ci->i_snap_realm;
-
-				dout(" moving %p to split realm %llx %p\n",
-				     inode, realm->ino, realm);
-				spin_lock(&realm->inodes_with_caps_lock);
-				list_add(&ci->i_snap_realm_item,
-					 &realm->inodes_with_caps);
-				ci->i_snap_realm = realm;
-				spin_unlock(&realm->inodes_with_caps_lock);
-				ceph_get_snap_realm(mdsc, realm);
-				ceph_put_snap_realm(mdsc, oldrealm);
-			}
-			spin_unlock(&inode->i_lock);
-			iput(inode);
-		}
-
+	if (op == CEPH_SNAP_OP_SPLIT)
 		/* we took a reference when we created the realm, above */
 		ceph_put_snap_realm(mdsc, realm);
-	}
 
 	__cleanup_empty_realms(mdsc);
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c33897a..c80bfbe 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -690,6 +690,8 @@ struct ceph_snap_realm {
 
 	struct list_head empty_item;     /* if i have ref==0 */
 
+	struct list_head dirty_item;     /* if realm needs new context */
+
 	/* the current set of snaps for this realm */
 	struct ceph_snap_context *cached_context;
 
-- 
cgit v1.1


From 5f4874903df3562b9d5649fc1cf7b8c6bb238e42 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 9 Sep 2010 14:45:00 +0100
Subject: GFS2: gfs2_logd should be using interruptible waits

Looks like this crept in, in a recent update.

Reported-by:  Krzysztof Urbaniak <urban@bash.org.pl>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cde1248..ac750bd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -932,7 +932,7 @@ int gfs2_logd(void *data)
 
 		do {
 			prepare_to_wait(&sdp->sd_logd_waitq, &wait,
-					TASK_UNINTERRUPTIBLE);
+					TASK_INTERRUPTIBLE);
 			if (!gfs2_ail_flush_reqd(sdp) &&
 			    !gfs2_jrnl_flush_reqd(sdp) &&
 			    !kthread_should_stop())
-- 
cgit v1.1


From e835124c2be289515b918f2688ced4249e2de566 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 17 Sep 2010 08:03:08 -0700
Subject: ceph: only send one flushsnap per cap_snap per mds session

Sending multiple flushsnap messages is problematic because we ignore
the response if the tid doesn't match, and the server may only respond to
each one once.  It's also a waste.

So, skip cap_snaps that are already on the flushing list, unless the caller
tells us to resend (because we are reconnecting).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c  | 19 +++++++++++++++----
 fs/ceph/snap.c  |  2 +-
 fs/ceph/super.h |  3 ++-
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 9fbe901..b01c316 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
  * asynchronously back to the MDS once sync writes complete and dirty
  * data is written out.
  *
+ * Unless @again is true, skip cap_snaps that were already sent to
+ * the MDS (i.e., during this session).
+ *
  * Called under i_lock.  Takes s_mutex as needed.
  */
 void __ceph_flush_snaps(struct ceph_inode_info *ci,
-			struct ceph_mds_session **psession)
+			struct ceph_mds_session **psession,
+			int again)
 		__releases(ci->vfs_inode->i_lock)
 		__acquires(ci->vfs_inode->i_lock)
 {
@@ -1240,6 +1244,13 @@ retry:
 			dout("no auth cap (migrating?), doing nothing\n");
 			goto out;
 		}
+
+		/* only flush each capsnap once */
+		if (!again && !list_empty(&capsnap->flushing_item)) {
+			dout("already flushed %p, skipping\n", capsnap);
+			continue;
+		}
+
 		mds = ci->i_auth_cap->session->s_mds;
 		mseq = ci->i_auth_cap->mseq;
 
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 	struct inode *inode = &ci->vfs_inode;
 
 	spin_lock(&inode->i_lock);
-	__ceph_flush_snaps(ci, NULL);
+	__ceph_flush_snaps(ci, NULL, 0);
 	spin_unlock(&inode->i_lock);
 }
 
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 
 	/* flush snaps first time around only */
 	if (!list_empty(&ci->i_cap_snaps))
-		__ceph_flush_snaps(ci, &session);
+		__ceph_flush_snaps(ci, &session, 0);
 	goto retry_locked;
 retry:
 	spin_lock(&inode->i_lock);
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
 		if (cap && cap->session == session) {
 			dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
 			     cap, capsnap);
-			__ceph_flush_snaps(ci, &session);
+			__ceph_flush_snaps(ci, &session, 1);
 		} else {
 			pr_err("%p auth cap %p not mds%d ???\n", inode,
 			       cap, session->s_mds);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 9e6eef1..190b6c4 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -717,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
 		igrab(inode);
 		spin_unlock(&mdsc->snap_flush_lock);
 		spin_lock(&inode->i_lock);
-		__ceph_flush_snaps(ci, &session);
+		__ceph_flush_snaps(ci, &session, 0);
 		spin_unlock(&inode->i_lock);
 		iput(inode);
 		spin_lock(&mdsc->snap_flush_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c80bfbe..b87638e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -828,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
 extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
-			       struct ceph_mds_session **psession);
+			       struct ceph_mds_session **psession,
+			       int again);
 extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 			    struct ceph_mds_session *session);
 extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
-- 
cgit v1.1


From a43fb73101eaf6db0b33d22c152b338ab8b3edbb Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 17 Sep 2010 09:54:08 -0700
Subject: ceph: check mapping to determine if FILE_CACHE cap is used

See if the i_data mapping has any pages to determine if the FILE_CACHE
capability is currently in use, instead of assuming it is any time the
rdcache_gen value is set (i.e., issued -> used).

This allows the MDS RECALL_STATE process work for inodes that have cached
pages.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b01c316..73c1530 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
 		used |= CEPH_CAP_PIN;
 	if (ci->i_rd_ref)
 		used |= CEPH_CAP_FILE_RD;
-	if (ci->i_rdcache_ref || ci->i_rdcache_gen)
+	if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
 		used |= CEPH_CAP_FILE_CACHE;
 	if (ci->i_wr_ref)
 		used |= CEPH_CAP_FILE_WR;
-- 
cgit v1.1


From be4f104dfd3b5e3ae262bff607965cfc38027dec Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 17 Sep 2010 12:30:31 -0700
Subject: ceph: select CRYPTO

We select CRYPTO_AES, but not CRYPTO.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index bc87b9c..0fcd264 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,6 +3,7 @@ config CEPH_FS
 	depends on INET && EXPERIMENTAL
 	select LIBCRC32C
 	select CRYPTO_AES
+	select CRYPTO
 	help
 	  Choose Y or M here to include support for mounting the
 	  experimental Ceph distributed file system.  Ceph is an extremely
-- 
cgit v1.1


From 50aff040363d31f87e94f38f1710973d99489951 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 21 Aug 2010 14:40:20 +0800
Subject: ocfs2/net: fix uninitialized ret in o2net_send_message_vec()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mmotm/fs/ocfs2/cluster/tcp.c: In function ‘o2net_send_message_vec’:
mmotm/fs/ocfs2/cluster/tcp.c:980:6: warning: ‘ret’ may be used uninitialized in this function

It seems a real bug introduced by commit 9af0b38ff3 (ocfs2/net:
Use wait_event() in o2net_send_message_vec()).

cc: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1361997..cbe2f05 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
 int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
 			   size_t caller_veclen, u8 target_node, int *status)
 {
-	int ret;
+	int ret = 0;
 	struct o2net_msg *msg = NULL;
 	size_t veclen, caller_bytes = 0;
 	struct kvec *vec = NULL;
-- 
cgit v1.1


From 112d421df2fddc0278584b084f4fcfedd144c5f4 Mon Sep 17 00:00:00 2001
From: Jan Harkes <jaharkes@cs.cmu.edu>
Date: Fri, 17 Sep 2010 23:26:01 -0400
Subject: Coda: mount hangs because of missed REQ_WRITE rename

Coda's REQ_* defines were renamed to avoid clashes with the block layer
(commit 4aeefdc69f7b: "coda: fixup clash with block layer REQ_*
defines").

However one was missed and response messages are no longer matched with
requests and waiting threads are no longer woken up.  This patch fixes
this.

Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
[ Also fixed up whitespace while at it  -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/coda/psdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index de89645..116af75 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -184,8 +184,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 	}
 
 	/* adjust outsize. is this useful ?? */
-        req->uc_outSize = nbytes;	
-        req->uc_flags |= REQ_WRITE;
+	req->uc_outSize = nbytes;
+	req->uc_flags |= CODA_REQ_WRITE;
 	count = nbytes;
 
 	/* Convert filedescriptor into a file handle */
-- 
cgit v1.1


From 371d217ee1ff8b418b8f73fb2a34990f951ec2d4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 21 Sep 2010 11:49:01 +0200
Subject: char: Mark /dev/zero and /dev/kmem as not capable of writeback

These devices don't do any writeback but their device inodes still can get
dirty so mark bdi appropriately so that bdi code does the right thing and files
inodes to lists of bdi carrying the device inodes.

Cc: stable@kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/char_dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index f80a4f2..143d393 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = {
 #endif
 		/* permit direct mmap, for read, write or exec */
 		BDI_CAP_MAP_DIRECT |
-		BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
+		BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
+		/* no writeback happens */
+		BDI_CAP_NO_ACCT_AND_WRITEBACK),
 };
 
 static struct kobj_map *cdev_map;
-- 
cgit v1.1


From 692ebd17c2905313fff3c504c249c6a0faad16ec Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 21 Sep 2010 11:51:01 +0200
Subject: bdi: Fix warnings in __mark_inode_dirty for /dev/zero and friends

Inodes of devices such as /dev/zero can get dirty for example via
utime(2) syscall or due to atime update. Backing device of such inodes
(zero_bdi, etc.) is however unable to handle dirty inodes and thus
__mark_inode_dirty complains.  In fact, inode should be rather dirtied
against backing device of the filesystem holding it. This is generally a
good rule except for filesystems such as 'bdev' or 'mtd_inodefs'. Inodes
in these pseudofilesystems are referenced from ordinary filesystem
inodes and carry mapping with real data of the device. Thus for these
inodes we have to use inode->i_mapping->backing_dev_info as we did so
far. We distinguish these filesystems by checking whether sb->s_bdi
points to a non-trivial backing device or not.

Example: Assume we have an ext3 filesystem on /dev/sda1 mounted on /.
There's a device inode A described by a path "/dev/sdb" on this
filesystem. This inode will be dirtied against backing device "8:0"
after this patch. bdev filesystem contains block device inode B coupled
with our inode A. When someone modifies a page of /dev/sdb, it's B that
gets dirtied and the dirtying happens against the backing device "8:16".
Thus both inodes get filed to a correct bdi list.

Cc: stable@kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 81e086d..5581122 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -52,8 +52,6 @@ struct wb_writeback_work {
 #define CREATE_TRACE_POINTS
 #include <trace/events/writeback.h>
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
 /*
  * We don't actually have pdflush, but this one is exported though /proc...
  */
@@ -71,6 +69,27 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 	return test_bit(BDI_writeback_running, &bdi->state);
 }
 
+static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+
+	/*
+	 * For inodes on standard filesystems, we use superblock's bdi. For
+	 * inodes on virtual filesystems, we want to use inode mapping's bdi
+	 * because they can possibly point to something useful (think about
+	 * block_dev filesystem).
+	 */
+	if (sb->s_bdi && sb->s_bdi != &noop_backing_dev_info) {
+		/* Some device inodes could play dirty tricks. Catch them... */
+		WARN(bdi != sb->s_bdi && bdi_cap_writeback_dirty(bdi),
+			"Dirtiable inode bdi %s != sb bdi %s\n",
+			bdi->name, sb->s_bdi->name);
+		return sb->s_bdi;
+	}
+	return bdi;
+}
+
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
-- 
cgit v1.1


From 767b68e96993e29e3480d7ecdd9c4b84667c5762 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Wed, 22 Sep 2010 14:32:56 -0400
Subject: Prevent freeing uninitialized pointer in compat_do_readv_writev

In 32-bit compatibility mode, the error handling for
compat_do_readv_writev() may free an uninitialized pointer, potentially
leading to all sorts of ugly memory corruption.  This is reliably
triggerable by unprivileged users by invoking the readv()/writev()
syscalls with an invalid iovec pointer.  The below patch fixes this to
emulate the non-compat version.

Introduced by commit b83733639a49 ("compat: factor out
compat_rw_copy_check_uvector from compat_do_readv_writev")

Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Cc: stable@kernel.org (2.6.35)
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 718c706..0644a15 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 {
 	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov;
+	struct iovec *iov = iovstack;
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
-- 
cgit v1.1


From c227e69028473c7c7994a9b0a2cc0034f3f7e0fe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Sep 2010 13:04:54 -0700
Subject: /proc/vmcore: fix seeking

Commit 73296bc611 ("procfs: Use generic_file_llseek in /proc/vmcore")
broke seeking on /proc/vmcore.  This changes it back to use default_llseek
in order to restore the original behaviour.

The problem with generic_file_llseek is that it only allows seeks up to
inode->i_sb->s_maxbytes, which is zero on procfs and some other virtual
file systems.  We should merge generic_file_llseek and default_llseek some
day and clean this up in a proper way, but for 2.6.35/36, reverting vmcore
is the safer solution.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Reported-by: CAI Qian <caiqian@redhat.com>
Tested-by: CAI Qian <caiqian@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/vmcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 91c817f..2367fb3 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
 
 static const struct file_operations proc_vmcore_operations = {
 	.read		= read_vmcore,
-	.llseek		= generic_file_llseek,
+	.llseek		= default_llseek,
 };
 
 static struct vmcore* __init get_new_element(void)
-- 
cgit v1.1


From a0c42bac79731276c9b2f28d54f9e658fcf843a2 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 22 Sep 2010 13:05:03 -0700
Subject: aio: do not return ERESTARTSYS as a result of AIO

OCFS2 can return ERESTARTSYS from its write function when the process is
signalled while waiting for a cluster lock (and the filesystem is mounted
with intr mount option).  Generally, it seems reasonable to allow
filesystems to return this error code from its IO functions.  As we must
not leak ERESTARTSYS (and similar error codes) to userspace as a result of
an AIO operation, we have to properly convert it to EINTR inside AIO code
(restarting the syscall isn't really an option because other AIO could
have been already submitted by the same io_submit syscall).

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 1320b2a..250b0a7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	 */
 	ret = retry(iocb);
 
-	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
+	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
+		/*
+		 * There's no easy way to restart the syscall since other AIO's
+		 * may be already running. Just fail this IO with EINTR.
+		 */
+		if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
+			     ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
+			ret = -EINTR;
 		aio_complete(iocb, ret, 0);
+	}
 out:
 	spin_lock_irq(&ctx->ctx_lock);
 
-- 
cgit v1.1


From 1c2499ae87f828eabddf6483b0dfc11da1100c07 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 22 Sep 2010 13:05:06 -0700
Subject: /proc/pid/smaps: fix dirty pages accounting

Currently, /proc/<pid>/smaps has wrong dirty pages accounting.
Shared_Dirty and Private_Dirty output only pte dirty pages and ignore
PG_dirty page flag.  It is difference against documentation, but also
inconsistent against Referenced field.  (Referenced checks both pte and
page flags)

This patch fixes it.

Test program:

 large-array.c
 ---------------------------------------------------
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>

 char array[1*1024*1024*1024L];

 int main(void)
 {
         memset(array, 1, sizeof(array));
         pause();

         return 0;
 }
 ---------------------------------------------------

Test case:
 1. run ./large-array
 2. cat /proc/`pidof large-array`/smaps
 3. swapoff -a
 4. cat /proc/`pidof large-array`/smaps again

Test result:
 <before patch>

00601000-40601000 rw-p 00000000 00:00 0
Size:            1048576 kB
Rss:             1048576 kB
Pss:             1048576 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:    218992 kB   <-- showed pages as clean incorrectly
Private_Dirty:    829584 kB
Referenced:       388364 kB
Swap:                  0 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB

 <after patch>

00601000-40601000 rw-p 00000000 00:00 0
Size:            1048576 kB
Rss:             1048576 kB
Pss:             1048576 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:   1048576 kB  <-- fixed
Referenced:       388480 kB
Swap:                  0 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 271afc4..1dbca4e8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			mss->referenced += PAGE_SIZE;
 		mapcount = page_mapcount(page);
 		if (mapcount >= 2) {
-			if (pte_dirty(ptent))
+			if (pte_dirty(ptent) || PageDirty(page))
 				mss->shared_dirty += PAGE_SIZE;
 			else
 				mss->shared_clean += PAGE_SIZE;
 			mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
 		} else {
-			if (pte_dirty(ptent))
+			if (pte_dirty(ptent) || PageDirty(page))
 				mss->private_dirty += PAGE_SIZE;
 			else
 				mss->private_clean += PAGE_SIZE;
-- 
cgit v1.1


From 12828061cdacfb1db3eb03fd71952d5ebc555bbb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 13 Sep 2010 14:00:23 +0800
Subject: ocfs2: update ctime when changing the file's permission by setfacl

In commit 30e2bab, ext3 fixed it. So change it accordingly in ocfs2.

Steps to reproduce:
# touch aaa
# stat -c %Z aaa
1283760364
# setfacl -m  'u::x,g::x,o::x' aaa
# stat -c %Z aaa
1283760364

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/acl.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a76e0aa..3919150 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
 	}
 
 	inode->i_mode = new_mode;
+	inode->i_ctime = CURRENT_TIME;
 	di->i_mode = cpu_to_le16(inode->i_mode);
+	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
 
 	ocfs2_journal_dirty(handle, di_bh);
 
-- 
cgit v1.1


From 47dea423799d98c53793237ab386a94976f305d5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 13 Sep 2010 15:13:50 +0800
Subject: ocfs2: Use cpu_to_le16 for e_leaf_clusters in
 ocfs2_bg_discontig_add_extent.

e_leaf_clusters is a le16, so use cpu_to_le16 instead
of cpu_to_le32.

What's more, we change 'clusters' to unsigned int to
signify that the size of 'clusters' isn't important here.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/suballoc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8a286f5..849c2f0 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -357,7 +357,7 @@ out:
 static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
 					  struct ocfs2_group_desc *bg,
 					  struct ocfs2_chain_list *cl,
-					  u64 p_blkno, u32 clusters)
+					  u64 p_blkno, unsigned int clusters)
 {
 	struct ocfs2_extent_list *el = &bg->bg_list;
 	struct ocfs2_extent_rec *rec;
@@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
 	rec->e_blkno = cpu_to_le64(p_blkno);
 	rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
 				  le16_to_cpu(cl->cl_bpc));
-	rec->e_leaf_clusters = cpu_to_le32(clusters);
+	rec->e_leaf_clusters = cpu_to_le16(clusters);
 	le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
 	le16_add_cpu(&bg->bg_free_bits_count,
 		     clusters * le16_to_cpu(cl->cl_bpc));
-- 
cgit v1.1


From 4a452de4fdfe4dbb27e491904d8bfaf1262bdff4 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sun, 19 Sep 2010 13:42:28 +0800
Subject: ocfs2: Move 'wanted' into parens of ocfs2_resmap_resv_bits.

The first time I read the function ocfs2_resmap_resv_bits, I consider
about what 'wanted' will be used and consider about the comments.
Then I find it is only used if the reservation is empty. ;)

So we'd better move it to the parens so that it make the code more
readable, what's more, ocfs2_resmap_resv_bits is used so frequently
and we should save some cpus.

Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/reservations.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index d8b6e42..3e78db3 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
 			   struct ocfs2_alloc_reservation *resv,
 			   int *cstart, int *clen)
 {
-	unsigned int wanted = *clen;
-
 	if (resv == NULL || ocfs2_resmap_disabled(resmap))
 		return -ENOSPC;
 
 	spin_lock(&resv_lock);
 
-	/*
-	 * We don't want to over-allocate for temporary
-	 * windows. Otherwise, we run the risk of fragmenting the
-	 * allocation space.
-	 */
-	wanted = ocfs2_resv_window_bits(resmap, resv);
-	if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
-		wanted = *clen;
-
 	if (ocfs2_resv_empty(resv)) {
-		mlog(0, "empty reservation, find new window\n");
+		/*
+		 * We don't want to over-allocate for temporary
+		 * windows. Otherwise, we run the risk of fragmenting the
+		 * allocation space.
+		 */
+		unsigned int wanted = ocfs2_resv_window_bits(resmap, resv);
 
+		if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
+			wanted = *clen;
+
+		mlog(0, "empty reservation, find new window\n");
 		/*
 		 * Try to get a window here. If it works, we must fall
 		 * through and test the bitmap . This avoids some
-- 
cgit v1.1


From 0000b862027d624ac564609b87c1aa4d14dd1e46 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sun, 19 Sep 2010 13:42:29 +0800
Subject: ocfs2: Sync inode flags with ext2.

We sync our inode flags with ext2 and define them by hex
values. But actually in commit 3669567(4 years ago), all
these values are moved to include/linux/fs.h. So we'd
better also use them as what ext2 did. So sync our inode
flags with ext2 by using FS_*.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/ocfs2_fs.h    | 37 +++++++++++++++++++++++++------------
 fs/ocfs2/ocfs2_ioctl.h |  8 ++++----
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 33f1c9a..fa31d05 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -235,18 +235,31 @@
 #define OCFS2_HAS_REFCOUNT_FL   (0x0010)
 
 /* Inode attributes, keep in sync with EXT2 */
-#define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
-#define OCFS2_UNRM_FL		(0x00000002)	/* Undelete */
-#define OCFS2_COMPR_FL		(0x00000004)	/* Compress file */
-#define OCFS2_SYNC_FL		(0x00000008)	/* Synchronous updates */
-#define OCFS2_IMMUTABLE_FL	(0x00000010)	/* Immutable file */
-#define OCFS2_APPEND_FL		(0x00000020)	/* writes to file may only append */
-#define OCFS2_NODUMP_FL		(0x00000040)	/* do not dump file */
-#define OCFS2_NOATIME_FL	(0x00000080)	/* do not update atime */
-#define OCFS2_DIRSYNC_FL	(0x00010000)	/* dirsync behaviour (directories only) */
-
-#define OCFS2_FL_VISIBLE	(0x000100FF)	/* User visible flags */
-#define OCFS2_FL_MODIFIABLE	(0x000100FF)	/* User modifiable flags */
+#define OCFS2_SECRM_FL			FS_SECRM_FL	/* Secure deletion */
+#define OCFS2_UNRM_FL			FS_UNRM_FL	/* Undelete */
+#define OCFS2_COMPR_FL			FS_COMPR_FL	/* Compress file */
+#define OCFS2_SYNC_FL			FS_SYNC_FL	/* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL		FS_IMMUTABLE_FL	/* Immutable file */
+#define OCFS2_APPEND_FL			FS_APPEND_FL	/* writes to file may only append */
+#define OCFS2_NODUMP_FL			FS_NODUMP_FL	/* do not dump file */
+#define OCFS2_NOATIME_FL		FS_NOATIME_FL	/* do not update atime */
+/* Reserved for compression usage... */
+#define OCFS2_DIRTY_FL			FS_DIRTY_FL
+#define OCFS2_COMPRBLK_FL		FS_COMPRBLK_FL	/* One or more compressed clusters */
+#define OCFS2_NOCOMP_FL			FS_NOCOMP_FL	/* Don't compress */
+#define OCFS2_ECOMPR_FL			FS_ECOMPR_FL	/* Compression error */
+/* End compression flags --- maybe not all used */
+#define OCFS2_BTREE_FL			FS_BTREE_FL	/* btree format dir */
+#define OCFS2_INDEX_FL			FS_INDEX_FL	/* hash-indexed directory */
+#define OCFS2_IMAGIC_FL			FS_IMAGIC_FL	/* AFS directory */
+#define OCFS2_JOURNAL_DATA_FL		FS_JOURNAL_DATA_FL /* Reserved for ext3 */
+#define OCFS2_NOTAIL_FL			FS_NOTAIL_FL	/* file tail should not be merged */
+#define OCFS2_DIRSYNC_FL		FS_DIRSYNC_FL	/* dirsync behaviour (directories only) */
+#define OCFS2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
+#define OCFS2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
+
+#define OCFS2_FL_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
+#define OCFS2_FL_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
 
 /*
  * Extent record flags (e_node.leaf.flags)
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 2d3420a..5d24150 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -23,10 +23,10 @@
 /*
  * ioctl commands
  */
-#define OCFS2_IOC_GETFLAGS	_IOR('f', 1, long)
-#define OCFS2_IOC_SETFLAGS	_IOW('f', 2, long)
-#define OCFS2_IOC32_GETFLAGS	_IOR('f', 1, int)
-#define OCFS2_IOC32_SETFLAGS	_IOW('f', 2, int)
+#define OCFS2_IOC_GETFLAGS	FS_IOC_GETFLAGS
+#define OCFS2_IOC_SETFLAGS	FS_IOC_SETFLAGS
+#define OCFS2_IOC32_GETFLAGS	FS_IOC32_GETFLAGS
+#define OCFS2_IOC32_SETFLAGS	FS_IOC32_SETFLAGS
 
 /*
  * Space reservation / allocation / free ioctls and argument structure
-- 
cgit v1.1


From 5dad6c39d156fbbde0b0ef170d9173feffdeb546 Mon Sep 17 00:00:00 2001
From: Srinivas Eeda <srinivas.eeda@oracle.com>
Date: Tue, 21 Sep 2010 16:27:26 -0700
Subject: o2dlm: force free mles during dlm exit

While umounting, a block mle doesn't get freed if dlm is shutdown after
master request is received but before assert master. This results in unclean
shutdown of dlm domain.

This patch frees all mles that lie around after other nodes were notified about
exiting the dlm and marking dlm state as leaving. Only block mles are expected
to be around, so we log ERROR for other mles but still free them.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h |  1 +
 fs/ocfs2/dlm/dlmdomain.c |  1 +
 fs/ocfs2/dlm/dlmmaster.c | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 4b6ae2c..7652989 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
 			 struct dlm_lock_resource *res);
 void dlm_clean_master_list(struct dlm_ctxt *dlm,
 			   u8 dead_node);
+void dlm_force_free_mles(struct dlm_ctxt *dlm);
 int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
 int __dlm_lockres_unused(struct dlm_lock_resource *res);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 153abb5..11a5c87 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
 
 		dlm_mark_domain_leaving(dlm);
 		dlm_leave_domain(dlm);
+		dlm_force_free_mles(dlm);
 		dlm_complete_dlm_shutdown(dlm);
 	}
 	dlm_put(dlm);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ffb4c68..f564b0e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
 	wake_up(&res->wq);
 	wake_up(&dlm->migration_wq);
 }
+
+void dlm_force_free_mles(struct dlm_ctxt *dlm)
+{
+	int i;
+	struct hlist_head *bucket;
+	struct dlm_master_list_entry *mle;
+	struct hlist_node *tmp, *list;
+
+	/*
+	 * We notified all other nodes that we are exiting the domain and
+	 * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still
+	 * around we force free them and wake any processes that are waiting
+	 * on the mles
+	 */
+	spin_lock(&dlm->spinlock);
+	spin_lock(&dlm->master_lock);
+
+	BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
+	BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
+
+	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
+		bucket = dlm_master_hash(dlm, i);
+		hlist_for_each_safe(list, tmp, bucket) {
+			mle = hlist_entry(list, struct dlm_master_list_entry,
+					  master_hash_node);
+			if (mle->type != DLM_MLE_BLOCK) {
+				mlog(ML_ERROR, "bad mle: %p\n", mle);
+				dlm_print_one_mle(mle);
+			}
+			atomic_set(&mle->woken, 1);
+			wake_up(&mle->wq);
+
+			__dlm_unlink_mle(dlm, mle);
+			__dlm_mle_detach_hb_events(dlm, mle);
+			__dlm_put_mle(mle);
+		}
+	}
+	spin_unlock(&dlm->master_lock);
+	spin_unlock(&dlm->spinlock);
+}
-- 
cgit v1.1


From 80168676ebfe4af51407d30f336d67f082d45201 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 24 Sep 2010 18:13:44 +1000
Subject: xfs: force background CIL push under sustained load

I have been seeing occasional pauses in transaction throughput up to
30s long under heavy parallel workloads. The only notable thing was
that the xfsaild was trying to be active during the pauses, but
making no progress. It was running exactly 20 times a second (on the
50ms no-progress backoff), and the number of pushbuf events was
constant across this time as well.  IOWs, the xfsaild appeared to be
stuck on buffers that it could not push out.

Further investigation indicated that it was trying to push out inode
buffers that were pinned and/or locked. The xfsbufd was also getting
woken at the same frequency (by the xfsaild, no doubt) to push out
delayed write buffers. The xfsbufd was not making any progress
because all the buffers in the delwri queue were pinned. This scan-
and-make-no-progress dance went one in the trace for some seconds,
before the xfssyncd came along an issued a log force, and then
things started going again.

However, I noticed something strange about the log force - there
were way too many IO's issued. 516 log buffers were written, to be
exact. That added up to 129MB of log IO, which got me very
interested because it's almost exactly 25% of the size of the log.
He delayed logging code is suppose to aggregate the minimum of 25%
of the log or 8MB worth of changes before flushing. That's what
really puzzled me - why did a log force write 129MB instead of only
8MB?

Essentially what has happened is that no CIL pushes had occurred
since the previous tail push which cleared out 25% of the log space.
That caused all the new transactions to block because there wasn't
log space for them, but they kick the xfsaild to push the tail.
However, the xfsaild was not making progress because there were
buffers it could not lock and flush, and the xfsbufd could not flush
them because they were pinned. As a result, both the xfsaild and the
xfsbufd could not move the tail of the log forward without the CIL
first committing.

The cause of the problem was that the background CIL push, which
should happen when 8MB of aggregated changes have been committed, is
being held off by the concurrent transaction commit load. The
background push does a down_write_trylock() which will fail if there
is a concurrent transaction commit holding the push lock in read
mode. With 8 CPUs all doing transactions as fast as they can, there
was enough concurrent transaction commits to hold off the background
push until tail-pushing could no longer free log space, and the halt
would occur.

It should be noted that there is no reason why it would halt at 25%
of log space used by a single CIL checkpoint. This bug could
definitely violate the "no transaction should be larger than half
the log" requirement and hence result in corruption if the system
crashed under heavy load. This sort of bug is exactly the reason why
delayed logging was tagged as experimental....

The fix is to start blocking background pushes once the threshold
has been exceeded. Rework the threshold calculations to keep the
amount of log space a CIL checkpoint can use to below that of the
AIL push threshold to avoid the problem completely.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_log_cil.c  | 12 +++++++++---
 fs/xfs/xfs_log_priv.h | 37 +++++++++++++++++++++----------------
 2 files changed, 30 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ed575fb..7e206fc 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -405,9 +405,15 @@ xlog_cil_push(
 	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
 	new_ctx->ticket = xlog_cil_ticket_alloc(log);
 
-	/* lock out transaction commit, but don't block on background push */
+	/*
+	 * Lock out transaction commit, but don't block for background pushes
+	 * unless we are well over the CIL space limit. See the definition of
+	 * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
+	 * used here.
+	 */
 	if (!down_write_trylock(&cil->xc_ctx_lock)) {
-		if (!push_seq)
+		if (!push_seq &&
+		    cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
 			goto out_free_ticket;
 		down_write(&cil->xc_ctx_lock);
 	}
@@ -422,7 +428,7 @@ xlog_cil_push(
 		goto out_skip;
 
 	/* check for a previously pushed seqeunce */
-	if (push_seq < cil->xc_ctx->sequence)
+	if (push_seq && push_seq < cil->xc_ctx->sequence)
 		goto out_skip;
 
 	/*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ced52b9..edcdfe0 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -426,13 +426,13 @@ struct xfs_cil {
 };
 
 /*
- * The amount of log space we should the CIL to aggregate is difficult to size.
- * Whatever we chose we have to make we can get a reservation for the log space
- * effectively, that it is large enough to capture sufficient relogging to
- * reduce log buffer IO significantly, but it is not too large for the log or
- * induces too much latency when writing out through the iclogs. We track both
- * space consumed and the number of vectors in the checkpoint context, so we
- * need to decide which to use for limiting.
+ * The amount of log space we allow the CIL to aggregate is difficult to size.
+ * Whatever we choose, we have to make sure we can get a reservation for the
+ * log space effectively, that it is large enough to capture sufficient
+ * relogging to reduce log buffer IO significantly, but it is not too large for
+ * the log or induces too much latency when writing out through the iclogs. We
+ * track both space consumed and the number of vectors in the checkpoint
+ * context, so we need to decide which to use for limiting.
  *
  * Every log buffer we write out during a push needs a header reserved, which
  * is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -459,16 +459,21 @@ struct xfs_cil {
  * checkpoint transaction ticket is specific to the checkpoint context, rather
  * than the CIL itself.
  *
- * With dynamic reservations, we can basically make up arbitrary limits for the
- * checkpoint size so long as they don't violate any other size rules.  Hence
- * the initial maximum size for the checkpoint transaction will be set to a
- * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
- * right now based on the latency of writing out a large amount of data through
- * the circular iclog buffers.
+ * With dynamic reservations, we can effectively make up arbitrary limits for
+ * the checkpoint size so long as they don't violate any other size rules.
+ * Recovery imposes a rule that no transaction exceed half the log, so we are
+ * limited by that.  Furthermore, the log transaction reservation subsystem
+ * tries to keep 25% of the log free, so we need to keep below that limit or we
+ * risk running out of free log space to start any new transactions.
+ *
+ * In order to keep background CIL push efficient, we will set a lower
+ * threshold at which background pushing is attempted without blocking current
+ * transaction commits.  A separate, higher bound defines when CIL pushes are
+ * enforced to ensure we stay within our maximum checkpoint size bounds.
+ * threshold, yet give us plenty of space for aggregation on large logs.
  */
-
-#define XLOG_CIL_SPACE_LIMIT(log)	\
-	(min((log->l_logsize >> 2), (8 * 1024 * 1024)))
+#define XLOG_CIL_SPACE_LIMIT(log)	(log->l_logsize >> 3)
+#define XLOG_CIL_HARD_SPACE_LIMIT(log)	(3 * (log->l_logsize >> 4))
 
 /*
  * The reservation head lsn is not made up of a cycle number and block number.
-- 
cgit v1.1


From 522440ed55d2cc8855ea5f82bc067e0483b2e1be Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 29 Sep 2010 09:49:54 -0400
Subject: cifs: set backing_dev_info on new S_ISREG inodes

Testing on very recent kernel (2.6.36-rc6) made this warning pop:

    WARNING: at fs/fs-writeback.c:87 inode_to_bdi+0x65/0x70()
    Hardware name:
    Dirtiable inode bdi default != sb bdi cifs

...the following patch fixes it and seems to be the obviously correct
thing to do for cifs.

Cc: stable@kernel.org
Acked-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 93f77d4..53cce8c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -801,6 +801,8 @@ retry_iget5_locked:
 			inode->i_flags |= S_NOATIME | S_NOCMTIME;
 		if (inode->i_state & I_NEW) {
 			inode->i_ino = hash;
+			if (S_ISREG(inode->i_mode))
+				inode->i_data.backing_dev_info = sb->s_bdi;
 #ifdef CONFIG_CIFS_FSCACHE
 			/* initialize per-inode cache cookie pointer */
 			CIFS_I(inode)->fscache = NULL;
-- 
cgit v1.1


From 1fc8a117865b54590acd773a55fbac9221b018f0 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 29 Sep 2010 17:33:05 -0700
Subject: ocfs2: Don't walk off the end of fast symlinks.

ocfs2 fast symlinks are NUL terminated strings stored inline in the
inode data area.  However, disk corruption or a local attacker could, in
theory, remove that NUL.  Because we're using strlen() (my fault,
introduced in a731d1 when removing vfs_follow_link()), we could walk off
the end of that string.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Cc: stable@kernel.org
---
 fs/ocfs2/symlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 32499d21..9975457 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
 	}
 
 	/* Fast symlinks can't be large */
-	len = strlen(target);
+	len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb));
 	link = kzalloc(len + 1, GFP_NOFS);
 	if (!link) {
 		status = -ENOMEM;
-- 
cgit v1.1


From f569599ae70f0899035f8d5876a7939f629c5976 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 29 Sep 2010 15:27:08 -0400
Subject: cifs: prevent infinite recursion in cifs_reconnect_tcon

cifs_reconnect_tcon is called from smb_init. After a successful
reconnect, cifs_reconnect_tcon will call reset_cifs_unix_caps. That
function will, in turn call CIFSSMBQFSUnixInfo and CIFSSMBSetFSUnixInfo.
Those functions also call smb_init.

It's possible for the session and tcon reconnect to succeed, and then
for another cifs_reconnect to occur before CIFSSMBQFSUnixInfo or
CIFSSMBSetFSUnixInfo to be called. That'll cause those functions to call
smb_init and cifs_reconnect_tcon again, ad infinitum...

Break the infinite recursion by having those functions use a new
smb_init variant that doesn't attempt to perform a reconnect.

Reported-and-Tested-by: Michal Suchanek <hramrach@centrum.cz>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifssmb.c | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c65c341..7e83b35 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -232,7 +232,7 @@ static int
 small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 		void **request_buf)
 {
-	int rc = 0;
+	int rc;
 
 	rc = cifs_reconnect_tcon(tcon, smb_command);
 	if (rc)
@@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 	if (tcon != NULL)
 		cifs_stats_inc(&tcon->num_smbs_sent);
 
-	return rc;
+	return 0;
 }
 
 int
@@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct,
 
 /* If the return code is zero, this function must fill in request_buf pointer */
 static int
-smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
-	 void **request_buf /* returned */ ,
-	 void **response_buf /* returned */ )
+__smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
+			void **request_buf, void **response_buf)
 {
-	int rc = 0;
-
-	rc = cifs_reconnect_tcon(tcon, smb_command);
-	if (rc)
-		return rc;
-
 	*request_buf = cifs_buf_get();
 	if (*request_buf == NULL) {
 		/* BB should we add a retry in here if not a writepage? */
@@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
 	if (tcon != NULL)
 		cifs_stats_inc(&tcon->num_smbs_sent);
 
-	return rc;
+	return 0;
+}
+
+/* If the return code is zero, this function must fill in request_buf pointer */
+static int
+smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
+	 void **request_buf, void **response_buf)
+{
+	int rc;
+
+	rc = cifs_reconnect_tcon(tcon, smb_command);
+	if (rc)
+		return rc;
+
+	return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
+}
+
+static int
+smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon,
+			void **request_buf, void **response_buf)
+{
+	if (tcon->ses->need_reconnect || tcon->need_reconnect)
+		return -EHOSTDOWN;
+
+	return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
 }
 
 static int validate_t2(struct smb_t2_rsp *pSMB)
@@ -4534,8 +4551,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon)
 
 	cFYI(1, "In QFSUnixInfo");
 QFSUnixRetry:
-	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
-		      (void **) &pSMBr);
+	rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
+				   (void **) &pSMB, (void **) &pSMBr);
 	if (rc)
 		return rc;
 
@@ -4604,8 +4621,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
 	cFYI(1, "In SETFSUnixInfo");
 SETFSUnixRetry:
 	/* BB switch to small buf init to save memory */
-	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
-		      (void **) &pSMBr);
+	rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
+					(void **) &pSMB, (void **) &pSMBr);
 	if (rc)
 		return rc;
 
-- 
cgit v1.1


From 3036e7b490bf7878c6dae952eec5fb87b1106589 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 30 Sep 2010 15:15:33 -0700
Subject: proc: make /proc/pid/limits world readable

Having the limits file world readable will ease the task of system
management on systems where root privileges might be restricted.

Having admin restricted with root priviledges, he/she could not check
other users process' limits.

Also it'd align with most of the /proc stat files.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Cc: Eugene Teo <eugene@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index a1c43e7..8e4adda 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2675,7 +2675,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	INF("auxv",       S_IRUSR, proc_pid_auxv),
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	  S_IRUSR, proc_pid_limits),
+	INF("limits",	  S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
@@ -3011,7 +3011,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	INF("auxv",      S_IRUSR, proc_pid_auxv),
 	ONE("status",    S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
-	INF("limits",	 S_IRUSR, proc_pid_limits),
+	INF("limits",	 S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
-- 
cgit v1.1


From 3f259d092c7a2fdf217823e8f1838530adb0cdb0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 30 Sep 2010 15:15:37 -0700
Subject: reiserfs: fix dependency inversion between inode and reiserfs mutexes

The reiserfs mutex already depends on the inode mutex, so we can't lock
the inode mutex in reiserfs_unpack() without using the safe locking API,
because reiserfs_unpack() is always called with the reiserfs mutex locked.

This fixes:

  =======================================================
  [ INFO: possible circular locking dependency detected ]
  2.6.35c #13
  -------------------------------------------------------
  lilo/1606 is trying to acquire lock:
   (&sb->s_type->i_mutex_key#8){+.+.+.}, at: [<d0329450>] reiserfs_unpack+0x60/0x110 [reiserfs]

  but task is already holding lock:
   (&REISERFS_SB(s)->lock){+.+.+.}, at: [<d032a268>] reiserfs_write_lock+0x28/0x40 [reiserfs]

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #1 (&REISERFS_SB(s)->lock){+.+.+.}:
         [<c1056347>] lock_acquire+0x67/0x80
         [<c12f083d>] __mutex_lock_common+0x4d/0x410
         [<c12f0c58>] mutex_lock_nested+0x18/0x20
         [<d032a268>] reiserfs_write_lock+0x28/0x40 [reiserfs]
         [<d0329e9a>] reiserfs_lookup_privroot+0x2a/0x90 [reiserfs]
         [<d0316b81>] reiserfs_fill_super+0x941/0xe60 [reiserfs]
         [<c10b7d17>] get_sb_bdev+0x117/0x170
         [<d0313e21>] get_super_block+0x21/0x30 [reiserfs]
         [<c10b74ba>] vfs_kern_mount+0x6a/0x1b0
         [<c10b7659>] do_kern_mount+0x39/0xe0
         [<c10cebe0>] do_mount+0x340/0x790
         [<c10cf0b4>] sys_mount+0x84/0xb0
         [<c12f25cd>] syscall_call+0x7/0xb

  -> #0 (&sb->s_type->i_mutex_key#8){+.+.+.}:
         [<c1056186>] __lock_acquire+0x1026/0x1180
         [<c1056347>] lock_acquire+0x67/0x80
         [<c12f083d>] __mutex_lock_common+0x4d/0x410
         [<c12f0c58>] mutex_lock_nested+0x18/0x20
         [<d0329450>] reiserfs_unpack+0x60/0x110 [reiserfs]
         [<d0329772>] reiserfs_ioctl+0x272/0x320 [reiserfs]
         [<c10c3228>] vfs_ioctl+0x28/0xa0
         [<c10c3c5d>] do_vfs_ioctl+0x32d/0x5c0
         [<c10c3f53>] sys_ioctl+0x63/0x70
         [<c12f25cd>] syscall_call+0x7/0xb

  other info that might help us debug this:

  1 lock held by lilo/1606:
   #0:  (&REISERFS_SB(s)->lock){+.+.+.}, at: [<d032a268>] reiserfs_write_lock+0x28/0x40 [reiserfs]

  stack backtrace:
  Pid: 1606, comm: lilo Not tainted 2.6.35c #13
  Call Trace:
   [<c1056186>] __lock_acquire+0x1026/0x1180
   [<c1056347>] lock_acquire+0x67/0x80
   [<c12f083d>] __mutex_lock_common+0x4d/0x410
   [<c12f0c58>] mutex_lock_nested+0x18/0x20
   [<d0329450>] reiserfs_unpack+0x60/0x110 [reiserfs]
   [<d0329772>] reiserfs_ioctl+0x272/0x320 [reiserfs]
   [<c10c3228>] vfs_ioctl+0x28/0xa0
   [<c10c3c5d>] do_vfs_ioctl+0x32d/0x5c0
   [<c10c3f53>] sys_ioctl+0x63/0x70
   [<c12f25cd>] syscall_call+0x7/0xb

Reported-by: Jarek Poplawski <jarkao2@gmail.com>
Tested-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: <stable@kernel.org>		[2.6.32 and later]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index f53505d..679d502 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -188,7 +188,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 	/* we need to make sure nobody is changing the file size beneath
 	 ** us
 	 */
-	mutex_lock(&inode->i_mutex);
+	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
 	reiserfs_write_lock(inode->i_sb);
 
 	write_from = inode->i_size & (blocksize - 1);
-- 
cgit v1.1


From 9d8117e72bf453dd9d85e0cd322ce4a0f8bccbc0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 30 Sep 2010 15:15:38 -0700
Subject: reiserfs: fix unwanted reiserfs lock recursion

Prevent from recursively locking the reiserfs lock in reiserfs_unpack()
because we may call journal_begin() that requires the lock to be taken
only once, otherwise it won't be able to release the lock while taking
other mutexes, ending up in inverted dependencies between the journal
mutex and the reiserfs lock for example.

This fixes:

  =======================================================
  [ INFO: possible circular locking dependency detected ]
  2.6.35.4.4a #3
  -------------------------------------------------------
  lilo/1620 is trying to acquire lock:
   (&journal->j_mutex){+.+...}, at: [<d0325bff>] do_journal_begin_r+0x7f/0x340 [reiserfs]

  but task is already holding lock:
   (&REISERFS_SB(s)->lock){+.+.+.}, at: [<d032a278>] reiserfs_write_lock+0x28/0x40 [reiserfs]

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #1 (&REISERFS_SB(s)->lock){+.+.+.}:
         [<c10562b7>] lock_acquire+0x67/0x80
         [<c12facad>] __mutex_lock_common+0x4d/0x410
         [<c12fb0c8>] mutex_lock_nested+0x18/0x20
         [<d032a278>] reiserfs_write_lock+0x28/0x40 [reiserfs]
         [<d0325c06>] do_journal_begin_r+0x86/0x340 [reiserfs]
         [<d0325f77>] journal_begin+0x77/0x140 [reiserfs]
         [<d0315be4>] reiserfs_remount+0x224/0x530 [reiserfs]
         [<c10b6a20>] do_remount_sb+0x60/0x110
         [<c10cee25>] do_mount+0x625/0x790
         [<c10cf014>] sys_mount+0x84/0xb0
         [<c12fca3d>] syscall_call+0x7/0xb

  -> #0 (&journal->j_mutex){+.+...}:
         [<c10560f6>] __lock_acquire+0x1026/0x1180
         [<c10562b7>] lock_acquire+0x67/0x80
         [<c12facad>] __mutex_lock_common+0x4d/0x410
         [<c12fb0c8>] mutex_lock_nested+0x18/0x20
         [<d0325bff>] do_journal_begin_r+0x7f/0x340 [reiserfs]
         [<d0325f77>] journal_begin+0x77/0x140 [reiserfs]
         [<d0326271>] reiserfs_persistent_transaction+0x41/0x90 [reiserfs]
         [<d030d06c>] reiserfs_get_block+0x22c/0x1530 [reiserfs]
         [<c10db9db>] __block_prepare_write+0x1bb/0x3a0
         [<c10dbbe6>] block_prepare_write+0x26/0x40
         [<d030b738>] reiserfs_prepare_write+0x88/0x170 [reiserfs]
         [<d03294d6>] reiserfs_unpack+0xe6/0x120 [reiserfs]
         [<d0329782>] reiserfs_ioctl+0x272/0x320 [reiserfs]
         [<c10c3188>] vfs_ioctl+0x28/0xa0
         [<c10c3bbd>] do_vfs_ioctl+0x32d/0x5c0
         [<c10c3eb3>] sys_ioctl+0x63/0x70
         [<c12fca3d>] syscall_call+0x7/0xb

  other info that might help us debug this:

  2 locks held by lilo/1620:
   #0:  (&sb->s_type->i_mutex_key#8){+.+.+.}, at: [<d032945a>] reiserfs_unpack+0x6a/0x120 [reiserfs]
   #1:  (&REISERFS_SB(s)->lock){+.+.+.}, at: [<d032a278>] reiserfs_write_lock+0x28/0x40 [reiserfs]

  stack backtrace:
  Pid: 1620, comm: lilo Not tainted 2.6.35.4.4a #3
  Call Trace:
   [<c10560f6>] __lock_acquire+0x1026/0x1180
   [<c10562b7>] lock_acquire+0x67/0x80
   [<c12facad>] __mutex_lock_common+0x4d/0x410
   [<c12fb0c8>] mutex_lock_nested+0x18/0x20
   [<d0325bff>] do_journal_begin_r+0x7f/0x340 [reiserfs]
   [<d0325f77>] journal_begin+0x77/0x140 [reiserfs]
   [<d0326271>] reiserfs_persistent_transaction+0x41/0x90 [reiserfs]
   [<d030d06c>] reiserfs_get_block+0x22c/0x1530 [reiserfs]
   [<c10db9db>] __block_prepare_write+0x1bb/0x3a0
   [<c10dbbe6>] block_prepare_write+0x26/0x40
   [<d030b738>] reiserfs_prepare_write+0x88/0x170 [reiserfs]
   [<d03294d6>] reiserfs_unpack+0xe6/0x120 [reiserfs]
   [<d0329782>] reiserfs_ioctl+0x272/0x320 [reiserfs]
   [<c10c3188>] vfs_ioctl+0x28/0xa0
   [<c10c3bbd>] do_vfs_ioctl+0x32d/0x5c0
   [<c10c3eb3>] sys_ioctl+0x63/0x70
   [<c12fca3d>] syscall_call+0x7/0xb

Reported-by: Jarek Poplawski <jarkao2@gmail.com>
Tested-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: All since 2.6.32 <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/ioctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 679d502..5cbb81e 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -170,6 +170,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
 int reiserfs_unpack(struct inode *inode, struct file *filp)
 {
 	int retval = 0;
+	int depth;
 	int index;
 	struct page *page;
 	struct address_space *mapping;
@@ -189,7 +190,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 	 ** us
 	 */
 	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
-	reiserfs_write_lock(inode->i_sb);
+	depth = reiserfs_write_lock_once(inode->i_sb);
 
 	write_from = inode->i_size & (blocksize - 1);
 	/* if we are on a block boundary, we are already unpacked.  */
@@ -224,6 +225,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 
       out:
 	mutex_unlock(&inode->i_mutex);
-	reiserfs_write_unlock(inode->i_sb);
+	reiserfs_write_unlock_once(inode->i_sb, depth);
 	return retval;
 }
-- 
cgit v1.1


From 0157443c56bcc50be4933ebdff3ece723dfd535c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 30 Sep 2010 22:06:21 +0200
Subject: fuse: Initialize total_len in fuse_retrieve()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fs/fuse/dev.c:1357: warning: ‘total_len’ may be used uninitialized in this
function

Initialize total_len to zero, else its value will be undefined.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d367af1..cde755c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1354,7 +1354,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 	loff_t file_size;
 	unsigned int num;
 	unsigned int offset;
-	size_t total_len;
+	size_t total_len = 0;
 
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
-- 
cgit v1.1


From aaead25b954879e1a708ff2f3602f494c18d20b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 4 Oct 2010 14:25:33 +0200
Subject: writeback: always use sb->s_bdi for writeback purposes

We currently use struct backing_dev_info for various different purposes.
Originally it was introduced to describe a backing device which includes
an unplug and congestion function and various bits of readahead information
and VM-relevant flags.  We're also using for tracking dirty inodes for
writeback.

To make writeback properly find all inodes we need to only access the
per-filesystem backing_device pointed to by the superblock in ->s_bdi
inside the writeback code, and not the instances pointeded to by
inode->i_mapping->backing_dev which can be overriden by special devices
or might not be set at all by some filesystems.

Long term we should split out the writeback-relevant bits of struct
backing_device_info (which includes more than the current bdi_writeback)
and only point to it from the superblock while leaving the traditional
backing device as a separate structure that can be overriden by devices.

The one exception for now is the block device filesystem which really
wants different writeback contexts for it's different (internal) inodes
to handle the writeout more efficiently.  For now we do this with
a hack in fs-writeback.c because we're so late in the cycle, but in
the future I plan to replace this with a superblock method that allows
for multiple writeback contexts per filesystem.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5581122..ab38fef 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -72,22 +72,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
-	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
 
-	/*
-	 * For inodes on standard filesystems, we use superblock's bdi. For
-	 * inodes on virtual filesystems, we want to use inode mapping's bdi
-	 * because they can possibly point to something useful (think about
-	 * block_dev filesystem).
-	 */
-	if (sb->s_bdi && sb->s_bdi != &noop_backing_dev_info) {
-		/* Some device inodes could play dirty tricks. Catch them... */
-		WARN(bdi != sb->s_bdi && bdi_cap_writeback_dirty(bdi),
-			"Dirtiable inode bdi %s != sb bdi %s\n",
-			bdi->name, sb->s_bdi->name);
-		return sb->s_bdi;
-	}
-	return bdi;
+	if (strcmp(sb->s_type->name, "bdev") == 0)
+		return inode->i_mapping->backing_dev_info;
+
+	return sb->s_bdi;
 }
 
 static void bdi_queue_work(struct backing_dev_info *bdi,
-- 
cgit v1.1