39 files changed, 363 insertions, 228 deletions
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1eb9a2e..0b3c37e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,6 +1,11 @@
 Version 1.46
 ------------
 Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+Allow null user to be specified on mount ("username="). Do not return
+EINVAL on readdir when filldir fails due to overwritten blocksize
+(fixes FC problem).  Return error in rename 2nd attempt retry (ie report
+if rename by handle also fails, after rename by path fails, we were
+not reporting whether the retry worked or not).
 
 Version 1.45
 ------------
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4093d53..71f7791 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -822,10 +822,13 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 		} else if (strnicmp(data, "nouser_xattr",12) == 0) {
 			vol->no_xattr = 1;
 		} else if (strnicmp(data, "user", 4) == 0) {
-			if (!value || !*value) {
+			if (!value) {
 				printk(KERN_WARNING
 				       "CIFS: invalid or missing username\n");
 				return 1;	/* needs_arg; */
+			} else if(!*value) {
+				/* null user, ie anonymous, authentication */
+				vol->nullauth = 1;
 			}
 			if (strnlen(value, 200) < 200) {
 				vol->username = value;
@@ -1642,6 +1645,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		/* BB fixme parse for domain name here */
 		cFYI(1, ("Username: %s ", volume_info.username));
 
+	} else if (volume_info.nullauth) {
+		cFYI(1,("null user"));
 	} else {
 		cifserror("No username specified");
         /* In userspace mount helper we can get user name from alternate
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 976a691..2436ed8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -492,10 +492,14 @@ int cifs_close(struct inode *inode, struct file *file)
 					the struct would be in each open file,
 					but this should give enough time to 
 					clear the socket */
-					cERROR(1,("close with pending writes"));
+#ifdef CONFIG_CIFS_DEBUG2
+					cFYI(1,("close delay, write pending"));
+#endif /* DEBUG2 */
 					msleep(timeout);
 					timeout *= 4;
-				} 
+				}
+				if(atomic_read(&pSMBFile->wrtPending))
+					cERROR(1,("close with pending writes"));
 				rc = CIFSSMBClose(xid, pTcon,
 						  pSMBFile->netfid);
 			}
@@ -1806,13 +1810,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		}
 		if ((rc < 0) || (smb_read_data == NULL)) {
 			cFYI(1, ("Read error in readpages: %d", rc));
-			/* clean up remaing pages off list */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
-				page_cache_release(page);
-			}
 			break;
 		} else if (bytes_read > 0) {
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
@@ -1831,13 +1828,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				   this case is ok - if we are at server EOF 
 				   we will hit it on next read */
 
-			/* while (!list_empty(page_list) && (i < num_pages)) {
-					page = list_entry(page_list->prev, 
-							  struct page, list);
-					list_del(&page->list);
-					page_cache_release(page);
-				}
-				break; */
+				/* break; */
 			}
 		} else {
 			cFYI(1, ("No bytes read (%d) at offset %lld . "
@@ -1845,14 +1836,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				 bytes_read, offset));
 			/* BB turn off caching and do new lookup on 
 			   file size at server? */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
-
-				/* BB removeme - replace with zero of page? */
-				page_cache_release(page);
-			}
 			break;
 		}
 		if (smb_read_data) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35d54bb..1ad8c9f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -885,10 +885,14 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
 			kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 		if (info_buf_source != NULL) {
 			info_buf_target = info_buf_source + 1;
-			rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
-				info_buf_source, cifs_sb_source->local_nls, 
-				cifs_sb_source->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
+			if (pTcon->ses->capabilities & CAP_UNIX)
+				rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
+					info_buf_source, 
+					cifs_sb_source->local_nls,
+					cifs_sb_source->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
+			/* else rc is still EEXIST so will fall through to
+			   unlink the target and retry rename */
 			if (rc == 0) {
 				rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName,
 						info_buf_target,
@@ -937,7 +941,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
 				 cifs_sb_source->mnt_cifs_flags & 
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		if (rc==0) {
-			CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName,
+			rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName,
 					      cifs_sb_source->local_nls, 
 					      cifs_sb_source->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1085,8 +1089,10 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct kstat *stat)
 {
 	int err = cifs_revalidate(dentry);
-	if (!err)
+	if (!err) {
 		generic_fillattr(dentry->d_inode, stat);
+		stat->blksize = CIFS_MAX_MSGSIZE;
+	}
 	return err;
 }
 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b5b0a2a..ed18c39 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -896,6 +896,10 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 		     tmp_inode->i_ino,obj_type);
 	if(rc) {
 		cFYI(1,("filldir rc = %d",rc));
+		/* we can not return filldir errors to the caller
+		since they are "normal" when the stat blocksize
+		is too small - we return remapped error instead */
+		rc = -EOVERFLOW;
 	}
 
 	dput(tmp_dentry);
@@ -1074,6 +1078,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 			we want to check for that here? */
 			rc = cifs_filldir(current_entry, file,
 					filldir, direntry, tmp_buf, max_len);
+			if(rc == -EOVERFLOW) {
+				rc = 0;
+				break;
+			}
+
 			file->f_pos++;
 			if(file->f_pos == 
 				cifsFile->srch_inf.index_of_last_entry) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index a8a0835..bbdda99 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -90,7 +90,9 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
 	} */
 	/* copy user */
 	if(ses->userName == NULL) {
-		/* BB what about null user mounts - check that we do this BB */
+		/* null user mount */
+		*bcc_ptr = 0;
+		*(bcc_ptr+1) = 0;
 	} else { /* 300 should be long enough for any conceivable user name */
 		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
 					  300, nls_cp);
@@ -98,10 +100,13 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
 	bcc_ptr += 2 * bytes_ret;
 	bcc_ptr += 2; /* account for null termination */
 	/* copy domain */
-	if(ses->domainName == NULL)
-		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr,
-					  "CIFS_LINUX_DOM", 32, nls_cp);
-	else
+	if(ses->domainName == NULL) {
+		/* Sending null domain better than using a bogus domain name (as
+		we did briefly in 2.6.18) since server will use its default */
+		*bcc_ptr = 0;
+		*(bcc_ptr+1) = 0;
+		bytes_ret = 0;
+	} else
 		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, 
 					  256, nls_cp);
 	bcc_ptr += 2 * bytes_ret;
@@ -144,13 +149,11 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
 
         /* copy domain */
 	
-        if(ses->domainName == NULL) {
-                strcpy(bcc_ptr, "CIFS_LINUX_DOM");
-		bcc_ptr += 14;  /* strlen(CIFS_LINUX_DOM) */
- 	} else {
+        if(ses->domainName != NULL) {
                 strncpy(bcc_ptr, ses->domainName, 256); 
 		bcc_ptr += strnlen(ses->domainName, 256);
-	}
+	} /* else we will send a null domain name 
+	     so the server will default to its own domain */
 	*bcc_ptr = 0;
 	bcc_ptr++;
 
diff --git a/fs/compat.c b/fs/compat.c
index 50624d4..8d0a001 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1835,9 +1835,12 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 
 	} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
 
-	if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) {
+	if (tsp) {
 		struct compat_timespec rts;
 
+		if (current->personality & STICKY_TIMEOUTS)
+			goto sticky;
+
 		rts.tv_sec = timeout / HZ;
 		rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
 		if (rts.tv_nsec >= NSEC_PER_SEC) {
@@ -1846,8 +1849,19 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 		}
 		if (compat_timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
-		if (copy_to_user(tsp, &rts, sizeof(rts)))
-			ret = -EFAULT;
+		if (copy_to_user(tsp, &rts, sizeof(rts))) {
+sticky:
+			/*
+			 * If an application puts its timeval in read-only
+			 * memory, we don't want the Linux-specific update to
+			 * the timeval to cause a fault after the select has
+			 * completed successfully. However, because we're not
+			 * updating the timeval, we can't restart the system
+			 * call.
+			 */
+			if (ret == -ERESTARTNOHAND)
+				ret = -EINTR;
+		}
 	}
 
 	if (ret == -ERESTARTNOHAND) {
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 109333c..f8842ca 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -43,6 +43,10 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
 	ssize_t ret = len;
 	int n = simple_strtol(buf, NULL, 0);
 
+	ls = dlm_find_lockspace_local(ls->ls_local_handle);
+	if (!ls)
+		return -EINVAL;
+
 	switch (n) {
 	case 0:
 		dlm_ls_stop(ls);
@@ -53,6 +57,7 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
 	default:
 		ret = -EINVAL;
 	}
+	dlm_put_lockspace(ls);
 	return ret;
 }
 
@@ -143,6 +148,12 @@ static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
 	return a->store ? a->store(ls, buf, len) : len;
 }
 
+static void lockspace_kobj_release(struct kobject *k)
+{
+	struct dlm_ls *ls  = container_of(k, struct dlm_ls, ls_kobj);
+	kfree(ls);
+}
+
 static struct sysfs_ops dlm_attr_ops = {
 	.show  = dlm_attr_show,
 	.store = dlm_attr_store,
@@ -151,6 +162,7 @@ static struct sysfs_ops dlm_attr_ops = {
 static struct kobj_type dlm_ktype = {
 	.default_attrs = dlm_attrs,
 	.sysfs_ops     = &dlm_attr_ops,
+	.release       = lockspace_kobj_release,
 };
 
 static struct kset dlm_kset = {
@@ -678,7 +690,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	dlm_clear_members_gone(ls);
 	kfree(ls->ls_node_array);
 	kobject_unregister(&ls->ls_kobj);
-	kfree(ls);
+        /* The ls structure will be freed when the kobject is done with */
 
 	mutex_lock(&ls_lock);
 	ls_count--;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f49f105..136175a 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -134,7 +134,7 @@ int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
 
 	algified_name_len = (chaining_modifier_len + cipher_name_len + 3);
 	(*algified_name) = kmalloc(algified_name_len, GFP_KERNEL);
-	if (!(algified_name)) {
+	if (!(*algified_name)) {
 		rc = -ENOMEM;
 		goto out;
 	}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bb5ace..763a50d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -397,14 +397,14 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 
 	err = -EIO;
 	if (is_bad_inode(inode))
-		goto clean_pages_up;
+		goto out;
 
 	data.file = file;
 	data.inode = inode;
 	data.req = fuse_get_req(fc);
 	err = PTR_ERR(data.req);
 	if (IS_ERR(data.req))
-		goto clean_pages_up;
+		goto out;
 
 	err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
 	if (!err) {
@@ -413,10 +413,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
 		else
 			fuse_put_request(fc, data.req);
 	}
-	return err;
-
-clean_pages_up:
-	put_pages_list(pages);
+out:
 	return err;
 }
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 57c43ac..d470e52 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -157,6 +157,9 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum,
 	struct gfs2_glock *io_gl;
 	int error;
 
+	if (!inode)
+		return ERR_PTR(-ENOBUFS);
+
 	if (inode->i_state & I_NEW) {
 		struct gfs2_sbd *sdp = GFS2_SB(inode);
 		umode_t mode = DT2IF(type);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 21508a1..9889c1e 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -84,8 +84,8 @@ static int __init init_gfs2_fs(void)
 
 	gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
 					      sizeof(struct gfs2_inode),
-					      0, (SLAB_RECLAIM_ACCOUNT|
-					      SLAB_PANIC|SLAB_MEM_SPREAD),
+					      0,  SLAB_RECLAIM_ACCOUNT|
+					          SLAB_MEM_SPREAD,
 					      gfs2_init_inode_once, NULL);
 	if (!gfs2_inode_cachep)
 		goto fail;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 8d5963c..015640b 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -337,13 +337,6 @@ out:
 out_noerror:
 	ret = 0;
 out_unlock:
-	/* unlock all pages, we can't do any I/O right now */
-	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
-		struct page *page = list_entry(pages->prev, struct page, lru);
-		list_del(&page->lru);
-		unlock_page(page);
-		page_cache_release(page);
-	}
 	if (do_unlock)
 		gfs2_holder_uninit(&gh);
 	goto out;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 06f06f7..b47d959 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -138,16 +138,27 @@ static void gfs2_put_super(struct super_block *sb)
 }
 
 /**
- * gfs2_write_super - disk commit all incore transactions
- * @sb: the filesystem
+ * gfs2_write_super
+ * @sb: the superblock
  *
- * This function is called every time sync(2) is called.
- * After this exits, all dirty buffers are synced.
  */
 
 static void gfs2_write_super(struct super_block *sb)
 {
+	sb->s_dirt = 0;
+}
+
+/**
+ * gfs2_sync_fs - sync the filesystem
+ * @sb: the superblock
+ *
+ * Flushes the log to disk.
+ */
+static int gfs2_sync_fs(struct super_block *sb, int wait)
+{
+	sb->s_dirt = 0;
 	gfs2_log_flush(sb->s_fs_info, NULL);
+	return 0;
 }
 
 /**
@@ -452,17 +463,18 @@ static void gfs2_destroy_inode(struct inode *inode)
 }
 
 struct super_operations gfs2_super_ops = {
-	.alloc_inode = gfs2_alloc_inode,
-	.destroy_inode = gfs2_destroy_inode,
-	.write_inode = gfs2_write_inode,
-	.delete_inode = gfs2_delete_inode,
-	.put_super = gfs2_put_super,
-	.write_super = gfs2_write_super,
-	.write_super_lockfs = gfs2_write_super_lockfs,
-	.unlockfs = gfs2_unlockfs,
-	.statfs = gfs2_statfs,
-	.remount_fs = gfs2_remount_fs,
-	.clear_inode = gfs2_clear_inode,
-	.show_options = gfs2_show_options,
+	.alloc_inode		= gfs2_alloc_inode,
+	.destroy_inode		= gfs2_destroy_inode,
+	.write_inode		= gfs2_write_inode,
+	.delete_inode		= gfs2_delete_inode,
+	.put_super		= gfs2_put_super,
+	.write_super		= gfs2_write_super,
+	.sync_fs		= gfs2_sync_fs,
+	.write_super_lockfs 	= gfs2_write_super_lockfs,
+	.unlockfs		= gfs2_unlockfs,
+	.statfs			= gfs2_statfs,
+	.remount_fs		= gfs2_remount_fs,
+	.clear_inode		= gfs2_clear_inode,
+	.show_options		= gfs2_show_options,
 };
 
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 4c7985e..b753ba2 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -756,6 +756,11 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 	return -EOPNOTSUPP;
 }
 
+/*
+ * Most of the permission checking is done by xattr_permission in the vfs.
+ * The local file system is responsible for handling the system.* namespace.
+ * We also need to verify that this is a namespace that we recognize.
+ */
 static int can_set_xattr(struct inode *inode, const char *name,
 			 const void *value, size_t value_len)
 {
@@ -771,10 +776,6 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	    strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
 		return -EOPNOTSUPP;
 
-	if (!S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
-		return -EPERM;
-
 	return 0;
 }
 
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 6341392..8ca1808 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -353,9 +353,6 @@ EXPORT_SYMBOL(lockd_down);
  * Sysctl parameters (same as module parameters, different interface).
  */
 
-/* Something that isn't CTL_ANY, CTL_NONE or a value that may clash. */
-#define CTL_UNNUMBERED		-2
-
 static ctl_table nlm_sysctls[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 2fe3403..3ea50ac 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -18,11 +18,6 @@
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
 static struct ctl_table_header *nfs_callback_sysctl_table;
-/*
- * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
- * Use the same values as fs/lockd/svc.c
- */
-#define CTL_UNNUMBERED -2
 
 static ctl_table nfs_cb_sysctls[] = {
 #ifdef CONFIG_NFS_V4
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 64db601..7f5bad0 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -258,7 +258,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 	/* Now create the file and set attributes */
 	nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len,
 				attr, newfhp,
-				argp->createmode, argp->verf, NULL);
+				argp->createmode, argp->verf, NULL, NULL);
 
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0a7bbdc..50bc942 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -93,6 +93,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 {
 	struct svc_fh resfh;
 	__be32 status;
+	int created = 0;
 
 	fh_init(&resfh, NFS4_FHSIZE);
 	open->op_truncate = 0;
@@ -105,28 +106,27 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 		status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data,
 					open->op_fname.len, &open->op_iattr,
 					&resfh, open->op_createmode,
-					(u32 *)open->op_verf.data, &open->op_truncate);
-	}
-	else {
+					(u32 *)open->op_verf.data, &open->op_truncate, &created);
+	} else {
 		status = nfsd_lookup(rqstp, current_fh,
 				     open->op_fname.data, open->op_fname.len, &resfh);
 		fh_unlock(current_fh);
 	}
+	if (status)
+		goto out;
 
-	if (!status) {
-		set_change_info(&open->op_cinfo, current_fh);
+	set_change_info(&open->op_cinfo, current_fh);
 
-		/* set reply cache */
-		fh_dup2(current_fh, &resfh);
-		open->op_stateowner->so_replay.rp_openfh_len =
-			resfh.fh_handle.fh_size;
-		memcpy(open->op_stateowner->so_replay.rp_openfh,
-				&resfh.fh_handle.fh_base,
-				resfh.fh_handle.fh_size);
+	/* set reply cache */
+	fh_dup2(current_fh, &resfh);
+	open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size;
+	memcpy(open->op_stateowner->so_replay.rp_openfh,
+			&resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
 
+	if (!created)
 		status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
-	}
 
+out:
 	fh_put(&resfh);
 	return status;
 }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e9d0770..81b8565 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
 	 * any regular files anyway, just in case the directory was created by
 	 * a kernel from the future.... */
 	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
-	mutex_lock(&dir->d_inode->i_mutex);
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
 	status = vfs_rmdir(dir->d_inode, dentry);
 	mutex_unlock(&dir->d_inode->i_mutex);
 	return status;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f21e917..bb4d926 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1177,7 +1177,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	/*
 	 * Get the dir op function pointer.
 	 */
-	err = nfserr_perm;
+	err = 0;
 	switch (type) {
 	case S_IFREG:
 		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
@@ -1237,7 +1237,7 @@ __be32
 nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		struct svc_fh *resfhp, int createmode, u32 *verifier,
-	        int *truncp)
+	        int *truncp, int *created)
 {
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
@@ -1331,6 +1331,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
 	if (host_err < 0)
 		goto out_nfserr;
+	if (created)
+		*created = 1;
 
 	if (EX_ISSYNC(fhp->fh_export)) {
 		err = nfserrno(nfsd_sync_dir(dentry));
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9041802..1724999 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1619,6 +1619,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 		      "jmacd-8: reiserfs_fill_super: unable to read bitmap");
 		goto error;
 	}
+	errval = -EINVAL;
 #ifdef CONFIG_REISERFS_CHECK
 	SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
 	SWARN(silent, s, "- it is slow mode for debugging.");
diff --git a/fs/splice.c b/fs/splice.c
index 8d70595..da74583 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1109,6 +1109,19 @@ out_release:
 EXPORT_SYMBOL(do_splice_direct);
 
 /*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+static inline struct pipe_inode_info *pipe_info(struct inode *inode)
+{
+	if (S_ISFIFO(inode->i_mode))
+		return inode->i_pipe;
+
+	return NULL;
+}
+
+/*
  * Determine where to splice to/from.
  */
 static long do_splice(struct file *in, loff_t __user *off_in,
@@ -1119,7 +1132,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	loff_t offset, *off;
 	long ret;
 
-	pipe = in->f_dentry->d_inode->i_pipe;
+	pipe = pipe_info(in->f_dentry->d_inode);
 	if (pipe) {
 		if (off_in)
 			return -ESPIPE;
@@ -1140,7 +1153,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 		return ret;
 	}
 
-	pipe = out->f_dentry->d_inode->i_pipe;
+	pipe = pipe_info(out->f_dentry->d_inode);
 	if (pipe) {
 		if (off_out)
 			return -ESPIPE;
@@ -1298,7 +1311,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 			unsigned long nr_segs, unsigned int flags)
 {
-	struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *pipe;
 	struct page *pages[PIPE_BUFFERS];
 	struct partial_page partial[PIPE_BUFFERS];
 	struct splice_pipe_desc spd = {
@@ -1308,7 +1321,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 		.ops = &user_page_pipe_buf_ops,
 	};
 
-	if (unlikely(!pipe))
+	pipe = pipe_info(file->f_dentry->d_inode);
+	if (!pipe)
 		return -EBADF;
 	if (unlikely(nr_segs > UIO_MAXIOV))
 		return -EINVAL;
@@ -1535,8 +1549,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
-	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *ipipe = pipe_info(in->f_dentry->d_inode);
+	struct pipe_inode_info *opipe = pipe_info(out->f_dentry->d_inode);
 	int ret = -EINVAL;
 
 	/*
diff --git a/fs/xattr.c b/fs/xattr.c
index 3956351..0901bdc 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -48,14 +48,21 @@ xattr_permission(struct inode *inode, const char *name, int mask)
 		return 0;
 
 	/*
-	 * The trusted.* namespace can only accessed by a privilegued user.
+	 * The trusted.* namespace can only be accessed by a privileged user.
 	 */
 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
 		return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
 
+	/* In user.* namespace, only regular files and directories can have
+	 * extended attributes. For sticky directories, only the owner and
+	 * privileged user can write attributes.
+	 */
 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
-		if (!S_ISREG(inode->i_mode) &&
-		    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+			return -EPERM;
+		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
+		    (mask & MAY_WRITE) && (current->fsuid != inode->i_uid) &&
+		    !capable(CAP_FOWNER))
 			return -EPERM;
 	}
 
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index 291948d..b49989b 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -21,22 +21,7 @@ EXTRA_CFLAGS +=	 -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
 XFS_LINUX := linux-2.6
 
 ifeq ($(CONFIG_XFS_DEBUG),y)
-	EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
-	EXTRA_CFLAGS += -DXFS_BUF_LOCK_TRACKING
-endif
-ifeq ($(CONFIG_XFS_TRACE),y)
-	EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
-	EXTRA_CFLAGS += -DXFS_ATTR_TRACE
-	EXTRA_CFLAGS += -DXFS_BLI_TRACE
-	EXTRA_CFLAGS += -DXFS_BMAP_TRACE
-	EXTRA_CFLAGS += -DXFS_BMBT_TRACE
-	EXTRA_CFLAGS += -DXFS_DIR2_TRACE
-	EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
-	EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
-	EXTRA_CFLAGS += -DXFS_LOG_TRACE
-	EXTRA_CFLAGS += -DXFS_RW_TRACE
-	EXTRA_CFLAGS += -DXFS_BUF_TRACE
-	EXTRA_CFLAGS += -DXFS_VNODE_TRACE
+	EXTRA_CFLAGS += -g
 endif
 
 obj-$(CONFIG_XFS_FS)		+= xfs.o
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index db5f5a3..d338284 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -15,6 +15,7 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include "xfs.h"
 #include <linux/stddef.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
@@ -31,7 +32,6 @@
 #include <linux/kthread.h>
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
-#include "xfs_linux.h"
 
 STATIC kmem_zone_t *xfs_buf_zone;
 STATIC kmem_shaker_t xfs_buf_shake;
@@ -1406,7 +1406,7 @@ xfs_alloc_bufhash(
 	btp->bt_hashshift = external ? 3 : 8;	/* 8 or 256 buckets */
 	btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
 	btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
-					sizeof(xfs_bufhash_t), KM_SLEEP);
+					sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE);
 	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
 		spin_lock_init(&btp->bt_hash[i].bh_lock);
 		INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
new file mode 100644
index 0000000..a8b0b16
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DMAPI_PRIV_H__
+#define __XFS_DMAPI_PRIV_H__
+
+/*
+ *	Based on IO_ISDIRECT, decide which i_ flag is set.
+ */
+#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
+			      DM_FLAGS_IMUX : 0)
+#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
+
+#endif /*__XFS_DMAPI_PRIV_H__*/
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a74f854..74d0948 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -341,8 +341,11 @@ xfs_open_by_handle(
 		put_unused_fd(new_fd);
 		return -XFS_ERROR(-PTR_ERR(filp));
 	}
-	if (inode->i_mode & S_IFREG)
+	if (inode->i_mode & S_IFREG) {
+		/* invisible operation should not change atime */
+		filp->f_flags |= O_NOATIME;
 		filp->f_op = &xfs_invis_file_operations;
+	}
 
 	fd_install(new_fd, filp);
 	return new_fd;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 38c4d12..de05abb 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -227,9 +227,7 @@ xfs_initialize_vnode(
 		xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
 		xfs_set_inodeops(inode);
 
-		spin_lock(&ip->i_flags_lock);
-		ip->i_flags &= ~XFS_INEW;
-		spin_unlock(&ip->i_flags_lock);
+		xfs_iflags_clear(ip, XFS_INEW);
 		barrier();
 
 		unlock_new_inode(inode);
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index c75f683..4363512 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -15,11 +15,9 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include <xfs.h>
 #include "debug.h"
 #include "spin.h"
-#include <asm/page.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
 
 static char		message[256];	/* keep it off the stack */
 static DEFINE_SPINLOCK(xfs_err_lock);
diff --git a/fs/xfs/support/move.c b/fs/xfs/support/move.c
index caefa17..ac8617c 100644
--- a/fs/xfs/support/move.c
+++ b/fs/xfs/support/move.c
@@ -22,7 +22,7 @@
  * as we go.
  */
 int
-uio_read(caddr_t src, size_t len, struct uio *uio)
+xfs_uio_read(caddr_t src, size_t len, struct uio *uio)
 {
 	size_t	count;
 
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
index 97a2498..977879c 100644
--- a/fs/xfs/support/move.h
+++ b/fs/xfs/support/move.h
@@ -65,6 +65,6 @@ struct uio {
 typedef struct uio uio_t;
 typedef struct iovec iovec_t;
 
-extern int	uio_read (caddr_t, size_t, uio_t *);
+extern int	xfs_uio_read (caddr_t, size_t, uio_t *);
 
 #endif  /* __XFS_SUPPORT_MOVE_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 1a48dbb..bf0a120 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -17,5 +17,28 @@
  */
 #ifndef __XFS_H__
 #define __XFS_H__
+
+#ifdef CONFIG_XFS_DEBUG
+#define STATIC
+#define DEBUG 1
+#define XFS_BUF_LOCK_TRACKING 1
+/* #define QUOTADEBUG 1 */
+#endif
+
+#ifdef CONFIG_XFS_TRACE
+#define XFS_ALLOC_TRACE 1
+#define XFS_ATTR_TRACE 1
+#define XFS_BLI_TRACE 1
+#define XFS_BMAP_TRACE 1
+#define XFS_BMBT_TRACE 1
+#define XFS_DIR2_TRACE 1
+#define XFS_DQUOT_TRACE 1
+#define XFS_ILOCK_TRACE 1
+#define XFS_LOG_TRACE 1
+#define XFS_RW_TRACE 1
+#define XFS_BUF_TRACE 1
+#define XFS_VNODE_TRACE 1
+#endif
+
 #include <linux-2.6/xfs_linux.h>
 #endif	/* __XFS_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8edbe1a..8e8e527 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -678,7 +678,7 @@ xfs_dir2_put_dirent64_uio(
 	idbp->d_off = pa->cook;
 	idbp->d_name[namelen] = '\0';
 	memcpy(idbp->d_name, pa->name, namelen);
-	rval = uio_read((caddr_t)idbp, reclen, uio);
+	rval = xfs_uio_read((caddr_t)idbp, reclen, uio);
 	pa->done = (rval == 0);
 	return rval;
 }
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index 4e7865a..adc3d25 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -157,27 +157,9 @@ typedef enum {
 #define DM_FLAGS_IALLOCSEM_WR	0x020	/* thread holds i_alloc_sem wr */
 
 /*
- *	Based on IO_ISDIRECT, decide which i_ flag is set.
+ *	Pull in platform specific event flags defines
  */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      DM_FLAGS_IMUX : 0)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
-    (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-#endif
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
-			      0 : DM_FLAGS_IMUX)
-#define DM_SEM_FLAG_WR	(DM_FLAGS_IMUX)
-#endif
-
+#include "xfs_dmapi_priv.h"
 
 /*
  *	Macros to turn caller specified delay/block flags into
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b73d216..c1c89da 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -215,7 +215,7 @@ again:
 			 * If INEW is set this inode is being set up
 			 * we need to pause and try again.
 			 */
-			if (ip->i_flags & XFS_INEW) {
+			if (xfs_iflags_test(ip, XFS_INEW)) {
 				read_unlock(&ih->ih_lock);
 				delay(1);
 				XFS_STATS_INC(xs_ig_frecycle);
@@ -230,22 +230,50 @@ again:
 				 * on its way out of the system,
 				 * we need to pause and try again.
 				 */
-				if (ip->i_flags & XFS_IRECLAIM) {
+				if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
 					read_unlock(&ih->ih_lock);
 					delay(1);
 					XFS_STATS_INC(xs_ig_frecycle);
 
 					goto again;
 				}
+				ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+
+				/*
+				 * If lookup is racing with unlink, then we
+				 * should return an error immediately so we
+				 * don't remove it from the reclaim list and
+				 * potentially leak the inode.
+				 */
+				if ((ip->i_d.di_mode == 0) &&
+				    !(flags & XFS_IGET_CREATE)) {
+					read_unlock(&ih->ih_lock);
+					return ENOENT;
+				}
+
+				/*
+				 * There may be transactions sitting in the
+				 * incore log buffers or being flushed to disk
+				 * at this time.  We can't clear the
+				 * XFS_IRECLAIMABLE flag until these
+				 * transactions have hit the disk, otherwise we
+				 * will void the guarantee the flag provides
+				 * xfs_iunpin()
+				 */
+				if (xfs_ipincount(ip)) {
+					read_unlock(&ih->ih_lock);
+					xfs_log_force(mp, 0,
+						XFS_LOG_FORCE|XFS_LOG_SYNC);
+					XFS_STATS_INC(xs_ig_frecycle);
+					goto again;
+				}
 
 				vn_trace_exit(vp, "xfs_iget.alloc",
 					(inst_t *)__return_address);
 
 				XFS_STATS_INC(xs_ig_found);
 
-				spin_lock(&ip->i_flags_lock);
-				ip->i_flags &= ~XFS_IRECLAIMABLE;
-				spin_unlock(&ip->i_flags_lock);
+				xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
 				version = ih->ih_version;
 				read_unlock(&ih->ih_lock);
 				xfs_ihash_promote(ih, ip, version);
@@ -299,10 +327,7 @@ finish_inode:
 			if (lock_flags != 0)
 				xfs_ilock(ip, lock_flags);
 
-			spin_lock(&ip->i_flags_lock);
-			ip->i_flags &= ~XFS_ISTALE;
-			spin_unlock(&ip->i_flags_lock);
-
+			xfs_iflags_clear(ip, XFS_ISTALE);
 			vn_trace_exit(vp, "xfs_iget.found",
 						(inst_t *)__return_address);
 			goto return_ip;
@@ -371,10 +396,7 @@ finish_inode:
 	ih->ih_next = ip;
 	ip->i_udquot = ip->i_gdquot = NULL;
 	ih->ih_version++;
-	spin_lock(&ip->i_flags_lock);
-	ip->i_flags |= XFS_INEW;
-	spin_unlock(&ip->i_flags_lock);
-
+	xfs_iflags_set(ip, XFS_INEW);
 	write_unlock(&ih->ih_lock);
 
 	/*
@@ -625,7 +647,7 @@ xfs_iput_new(xfs_inode_t	*ip,
 	vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
 
 	if ((ip->i_d.di_mode == 0)) {
-		ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
+		ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
 		vn_mark_bad(vp);
 	}
 	if (inode->i_state & I_NEW)
@@ -683,6 +705,7 @@ xfs_ireclaim(xfs_inode_t *ip)
 	/*
 	 * Free all memory associated with the inode.
 	 */
+	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
 	xfs_idestroy(ip);
 }
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c27d7d49..d72c80d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2193,7 +2193,7 @@ xfs_ifree_cluster(
 			/* Inode not in memory or we found it already,
 			 * nothing to do
 			 */
-			if (!ip || (ip->i_flags & XFS_ISTALE)) {
+			if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
 				read_unlock(&ih->ih_lock);
 				continue;
 			}
@@ -2215,10 +2215,7 @@ xfs_ifree_cluster(
 
 			if (ip == free_ip) {
 				if (xfs_iflock_nowait(ip)) {
-					spin_lock(&ip->i_flags_lock);
-					ip->i_flags |= XFS_ISTALE;
-					spin_unlock(&ip->i_flags_lock);
-
+					xfs_iflags_set(ip, XFS_ISTALE);
 					if (xfs_inode_clean(ip)) {
 						xfs_ifunlock(ip);
 					} else {
@@ -2231,9 +2228,7 @@ xfs_ifree_cluster(
 
 			if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
 				if (xfs_iflock_nowait(ip)) {
-					spin_lock(&ip->i_flags_lock);
-					ip->i_flags |= XFS_ISTALE;
-					spin_unlock(&ip->i_flags_lock);
+					xfs_iflags_set(ip, XFS_ISTALE);
 
 					if (xfs_inode_clean(ip)) {
 						xfs_ifunlock(ip);
@@ -2263,9 +2258,7 @@ xfs_ifree_cluster(
 				AIL_LOCK(mp,s);
 				iip->ili_flush_lsn = iip->ili_item.li_lsn;
 				AIL_UNLOCK(mp, s);
-				spin_lock(&iip->ili_inode->i_flags_lock);
-				iip->ili_inode->i_flags |= XFS_ISTALE;
-				spin_unlock(&iip->ili_inode->i_flags_lock);
+				xfs_iflags_set(ip, XFS_ISTALE);
 				pre_flushed++;
 			}
 			lip = lip->li_bio_list;
@@ -2748,42 +2741,39 @@ xfs_iunpin(
 {
 	ASSERT(atomic_read(&ip->i_pincount) > 0);
 
-	if (atomic_dec_and_test(&ip->i_pincount)) {
+	if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) {
+
 		/*
-		 * If the inode is currently being reclaimed, the
-		 * linux inode _and_ the xfs vnode may have been
-		 * freed so we cannot reference either of them safely.
-		 * Hence we should not try to do anything to them
-		 * if the xfs inode is currently in the reclaim
-		 * path.
+		 * If the inode is currently being reclaimed, the link between
+		 * the bhv_vnode and the xfs_inode will be broken after the
+		 * XFS_IRECLAIM* flag is set. Hence, if these flags are not
+		 * set, then we can move forward and mark the linux inode dirty
+		 * knowing that it is still valid as it won't freed until after
+		 * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The
+		 * i_flags_lock is used to synchronise the setting of the
+		 * XFS_IRECLAIM* flags and the breaking of the link, and so we
+		 * can execute atomically w.r.t to reclaim by holding this lock
+		 * here.
 		 *
-		 * However, we still need to issue the unpin wakeup
-		 * call as the inode reclaim may be blocked waiting for
-		 * the inode to become unpinned.
+		 * However, we still need to issue the unpin wakeup call as the
+		 * inode reclaim may be blocked waiting for the inode to become
+		 * unpinned.
 		 */
-		struct inode *inode = NULL;
 
-		spin_lock(&ip->i_flags_lock);
-		if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
+		if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
 			bhv_vnode_t	*vp = XFS_ITOV_NULL(ip);
+			struct inode *inode = NULL;
+
+			BUG_ON(vp == NULL);
+			inode = vn_to_inode(vp);
+			BUG_ON(inode->i_state & I_CLEAR);
 
 			/* make sync come back and flush this inode */
-			if (vp) {
-				inode = vn_to_inode(vp);
-
-				if (!(inode->i_state &
-						(I_NEW|I_FREEING|I_CLEAR))) {
-					inode = igrab(inode);
-					if (inode)
-						mark_inode_dirty_sync(inode);
-				} else
-					inode = NULL;
-			}
+			if (!(inode->i_state & (I_NEW|I_FREEING)))
+				mark_inode_dirty_sync(inode);
 		}
 		spin_unlock(&ip->i_flags_lock);
 		wake_up(&ip->i_ipin_wait);
-		if (inode)
-			iput(inode);
 	}
 }
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index e96eb08..bc82372 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -305,6 +305,47 @@ typedef struct xfs_inode {
 #endif
 } xfs_inode_t;
 
+
+/*
+ * i_flags helper functions
+ */
+static inline void
+__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+{
+	ip->i_flags |= flags;
+}
+
+static inline void
+xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+{
+	spin_lock(&ip->i_flags_lock);
+	__xfs_iflags_set(ip, flags);
+	spin_unlock(&ip->i_flags_lock);
+}
+
+static inline void
+xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
+{
+	spin_lock(&ip->i_flags_lock);
+	ip->i_flags &= ~flags;
+	spin_unlock(&ip->i_flags_lock);
+}
+
+static inline int
+__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+{
+	return (ip->i_flags & flags);
+}
+
+static inline int
+xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+{
+	int ret;
+	spin_lock(&ip->i_flags_lock);
+	ret = __xfs_iflags_test(ip, flags);
+	spin_unlock(&ip->i_flags_lock);
+	return ret;
+}
 #endif	/* __KERNEL__ */
 
 
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 061e2ff..bda774a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1013,7 +1013,7 @@ xfs_readlink(
 	pathlen = (int)ip->i_d.di_size;
 
 	if (ip->i_df.if_flags & XFS_IFINLINE) {
-		error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
+		error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
 	}
 	else {
 		/*
@@ -1044,7 +1044,7 @@ xfs_readlink(
 				byte_cnt = pathlen;
 			pathlen -= byte_cnt;
 
-			error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
+			error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
 			xfs_buf_relse (bp);
 		}
 
@@ -3827,11 +3827,16 @@ xfs_reclaim(
 	 */
 	xfs_synchronize_atime(ip);
 
-	/* If we have nothing to flush with this inode then complete the
-	 * teardown now, otherwise break the link between the xfs inode
-	 * and the linux inode and clean up the xfs inode later. This
-	 * avoids flushing the inode to disk during the delete operation
-	 * itself.
+	/*
+	 * If we have nothing to flush with this inode then complete the
+	 * teardown now, otherwise break the link between the xfs inode and the
+	 * linux inode and clean up the xfs inode later. This avoids flushing
+	 * the inode to disk during the delete operation itself.
+	 *
+	 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
+	 * first to ensure that xfs_iunpin() will never see an xfs inode
+	 * that has a linux inode being reclaimed. Synchronisation is provided
+	 * by the i_flags_lock.
 	 */
 	if (!ip->i_update_core && (ip->i_itemp == NULL)) {
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -3840,13 +3845,13 @@ xfs_reclaim(
 	} else {
 		xfs_mount_t	*mp = ip->i_mount;
 
-		/* Protect sync from us */
+		/* Protect sync and unpin from us */
 		XFS_MOUNT_ILOCK(mp);
-		vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
-		list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
 		spin_lock(&ip->i_flags_lock);
-		ip->i_flags |= XFS_IRECLAIMABLE;
+		__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+		vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
 		spin_unlock(&ip->i_flags_lock);
+		list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
 		XFS_MOUNT_IUNLOCK(mp);
 	}
 	return 0;
@@ -3872,8 +3877,8 @@ xfs_finish_reclaim(
 	 */
 	write_lock(&ih->ih_lock);
 	spin_lock(&ip->i_flags_lock);
-	if ((ip->i_flags & XFS_IRECLAIM) ||
-	    (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
+	if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
+	    (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
 		spin_unlock(&ip->i_flags_lock);
 		write_unlock(&ih->ih_lock);
 		if (locked) {
@@ -3882,7 +3887,7 @@ xfs_finish_reclaim(
 		}
 		return 1;
 	}
-	ip->i_flags |= XFS_IRECLAIM;
+	__xfs_iflags_set(ip, XFS_IRECLAIM);
 	spin_unlock(&ip->i_flags_lock);
 	write_unlock(&ih->ih_lock);