diff options
author | Colin Cross <ccross@android.com> | 2011-10-27 15:01:19 -0700 |
---|---|---|
committer | Colin Cross <ccross@android.com> | 2011-10-27 15:01:19 -0700 |
commit | 2bb3e310159b65c88caf0c67a20ed257568be267 (patch) | |
tree | e4ad01c06a9e27939781c5dd9d0cb92e6fcd54d5 /fs | |
parent | 2f53cb72c1574d3880d9e88e254b756565fe2f6d (diff) | |
parent | 97596c34030ed28657ccafddb67e17a03890b90a (diff) | |
download | kernel_samsung_tuna-2bb3e310159b65c88caf0c67a20ed257568be267.zip kernel_samsung_tuna-2bb3e310159b65c88caf0c67a20ed257568be267.tar.gz kernel_samsung_tuna-2bb3e310159b65c88caf0c67a20ed257568be267.tar.bz2 |
Merge commit 'v3.0.8' into android-3.0
Diffstat (limited to 'fs')
55 files changed, 969 insertions, 435 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 535ab6e..4a866cd 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -185,12 +185,15 @@ int v9fs_acl_chmod(struct dentry *dentry) } int v9fs_set_create_acl(struct dentry *dentry, - struct posix_acl *dpacl, struct posix_acl *pacl) + struct posix_acl **dpacl, struct posix_acl **pacl) { - v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); - v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl); - posix_acl_release(dpacl); - posix_acl_release(pacl); + if (dentry) { + v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, *dpacl); + v9fs_set_acl(dentry, ACL_TYPE_ACCESS, *pacl); + } + posix_acl_release(*dpacl); + posix_acl_release(*pacl); + *dpacl = *pacl = NULL; return 0; } @@ -212,11 +215,11 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep, struct posix_acl *clone; if (S_ISDIR(mode)) - *dpacl = acl; + *dpacl = posix_acl_dup(acl); clone = posix_acl_clone(acl, GFP_NOFS); - retval = -ENOMEM; + posix_acl_release(acl); if (!clone) - goto cleanup; + return -ENOMEM; retval = posix_acl_create_masq(clone, &mode); if (retval < 0) { @@ -225,11 +228,12 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep, } if (retval > 0) *pacl = clone; + else + posix_acl_release(clone); } *modep = mode; return 0; cleanup: - posix_acl_release(acl); return retval; } diff --git a/fs/9p/acl.h b/fs/9p/acl.h index 7ef3ac9..c47ea9c 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -19,7 +19,7 @@ extern int v9fs_get_acl(struct inode *, struct p9_fid *); extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags); extern int v9fs_acl_chmod(struct dentry *); extern int v9fs_set_create_acl(struct dentry *, - struct posix_acl *, struct posix_acl *); + struct posix_acl **, struct posix_acl **); extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl); #else @@ -33,8 +33,8 @@ static inline int v9fs_acl_chmod(struct dentry *dentry) return 0; } static inline int v9fs_set_create_acl(struct dentry *dentry, - struct posix_acl *dpacl, - struct posix_acl *pacl) + struct posix_acl **dpacl, + struct posix_acl **pacl) { return 0; } diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 5b335c5..945aa5f 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -108,11 +108,10 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, void *buffer, uint16_t bufmax) { const struct v9fs_inode *v9inode = cookie_netfs_data; - memcpy(buffer, &v9inode->fscache_key->path, - sizeof(v9inode->fscache_key->path)); + memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path)); P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode, - v9inode->fscache_key->path); - return sizeof(v9inode->fscache_key->path); + v9inode->qid.path); + return sizeof(v9inode->qid.path); } static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, @@ -129,11 +128,10 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, void *buffer, uint16_t buflen) { const struct v9fs_inode *v9inode = cookie_netfs_data; - memcpy(buffer, &v9inode->fscache_key->version, - sizeof(v9inode->fscache_key->version)); + memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version)); P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode, - v9inode->fscache_key->version); - return sizeof(v9inode->fscache_key->version); + v9inode->qid.version); + return sizeof(v9inode->qid.version); } static enum @@ -143,11 +141,11 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data, { const struct v9fs_inode *v9inode = cookie_netfs_data; - if (buflen != sizeof(v9inode->fscache_key->version)) + if (buflen != sizeof(v9inode->qid.version)) return FSCACHE_CHECKAUX_OBSOLETE; - if (memcmp(buffer, &v9inode->fscache_key->version, - sizeof(v9inode->fscache_key->version))) + if (memcmp(buffer, &v9inode->qid.version, + sizeof(v9inode->qid.version))) return FSCACHE_CHECKAUX_OBSOLETE; return FSCACHE_CHECKAUX_OKAY; diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 049507a..40cc54c 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -93,15 +93,6 @@ static inline void v9fs_uncache_page(struct inode *inode, struct page *page) BUG_ON(PageFsCache(page)); } -static inline void v9fs_fscache_set_key(struct inode *inode, - struct p9_qid *qid) -{ - struct v9fs_inode *v9inode = V9FS_I(inode); - spin_lock(&v9inode->fscache_lock); - v9inode->fscache_key = qid; - spin_unlock(&v9inode->fscache_lock); -} - static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) { diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index c82b017..ef96618 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -78,6 +78,25 @@ static const match_table_t tokens = { {Opt_err, NULL} }; +/* Interpret mount options for cache mode */ +static int get_cache_mode(char *s) +{ + int version = -EINVAL; + + if (!strcmp(s, "loose")) { + version = CACHE_LOOSE; + P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n"); + } else if (!strcmp(s, "fscache")) { + version = CACHE_FSCACHE; + P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n"); + } else if (!strcmp(s, "none")) { + version = CACHE_NONE; + P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n"); + } else + printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s); + return version; +} + /** * v9fs_parse_options - parse mount options into session structure * @v9ses: existing v9fs session information @@ -97,7 +116,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) /* setup defaults */ v9ses->afid = ~0; v9ses->debug = 0; - v9ses->cache = 0; + v9ses->cache = CACHE_NONE; #ifdef CONFIG_9P_FSCACHE v9ses->cachetag = NULL; #endif @@ -171,13 +190,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) "problem allocating copy of cache arg\n"); goto free_and_return; } + ret = get_cache_mode(s); + if (ret == -EINVAL) { + kfree(s); + goto free_and_return; + } - if (strcmp(s, "loose") == 0) - v9ses->cache = CACHE_LOOSE; - else if (strcmp(s, "fscache") == 0) - v9ses->cache = CACHE_FSCACHE; - else - v9ses->cache = CACHE_NONE; + v9ses->cache = ret; kfree(s); break; @@ -200,9 +219,15 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) } else { v9ses->flags |= V9FS_ACCESS_SINGLE; v9ses->uid = simple_strtoul(s, &e, 10); - if (*e != '\0') - v9ses->uid = ~0; + if (*e != '\0') { + ret = -EINVAL; + printk(KERN_INFO "9p: Unknown access " + "argument %s.\n", s); + kfree(s); + goto free_and_return; + } } + kfree(s); break; @@ -487,8 +512,8 @@ static void v9fs_inode_init_once(void *foo) struct v9fs_inode *v9inode = (struct v9fs_inode *)foo; #ifdef CONFIG_9P_FSCACHE v9inode->fscache = NULL; - v9inode->fscache_key = NULL; #endif + memset(&v9inode->qid, 0, sizeof(v9inode->qid)); inode_init_once(&v9inode->vfs_inode); } diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index e5ebedf..e78956c 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -125,8 +125,8 @@ struct v9fs_inode { #ifdef CONFIG_9P_FSCACHE spinlock_t fscache_lock; struct fscache_cookie *fscache; - struct p9_qid *fscache_key; #endif + struct p9_qid qid; unsigned int cache_validity; struct p9_fid *writeback_fid; struct mutex v_mutex; @@ -153,13 +153,13 @@ extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p); extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, - struct super_block *sb); + struct super_block *sb, int new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, - struct super_block *sb); + struct super_block *sb, int new); /* other default globals */ #define V9FS_PORT 564 @@ -201,8 +201,27 @@ v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) { if (v9fs_proto_dotl(v9ses)) - return v9fs_inode_from_fid_dotl(v9ses, fid, sb); + return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0); else - return v9fs_inode_from_fid(v9ses, fid, sb); + return v9fs_inode_from_fid(v9ses, fid, sb, 0); } + +/** + * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by + * issuing a attribute request + * @v9ses: session information + * @fid: fid to issue attribute request for + * @sb: superblock on which to create inode + * + */ +static inline struct inode * +v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, + struct super_block *sb) +{ + if (v9fs_proto_dotl(v9ses)) + return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1); + else + return v9fs_inode_from_fid(v9ses, fid, sb, 1); +} + #endif diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 4014160..f9a28ea 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache; struct inode *v9fs_alloc_inode(struct super_block *sb); void v9fs_destroy_inode(struct inode *inode); -struct inode *v9fs_get_inode(struct super_block *sb, int mode); +struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t); int v9fs_init_inode(struct v9fs_session_info *v9ses, - struct inode *inode, int mode); + struct inode *inode, int mode, dev_t); void v9fs_evict_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); @@ -82,4 +82,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode) v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; return; } + +int v9fs_open_to_dotl_flags(int flags); #endif diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index ffed558..9d6e168 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9inode = V9FS_I(inode); v9ses = v9fs_inode2v9ses(inode); if (v9fs_proto_dotl(v9ses)) - omode = file->f_flags; + omode = v9fs_open_to_dotl_flags(file->f_flags); else omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses)); @@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) /* convert posix lock to p9 tlock args */ memset(&flock, 0, sizeof(flock)); - flock.type = fl->fl_type; + /* map the lock type */ + switch (fl->fl_type) { + case F_RDLCK: + flock.type = P9_LOCK_TYPE_RDLCK; + break; + case F_WRLCK: + flock.type = P9_LOCK_TYPE_WRLCK; + break; + case F_UNLCK: + flock.type = P9_LOCK_TYPE_UNLCK; + break; + } flock.start = fl->fl_start; if (fl->fl_end == OFFSET_MAX) flock.length = 0; @@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) /* convert posix lock to p9 tgetlock args */ memset(&glock, 0, sizeof(glock)); - glock.type = fl->fl_type; + glock.type = P9_LOCK_TYPE_UNLCK; glock.start = fl->fl_start; if (fl->fl_end == OFFSET_MAX) glock.length = 0; @@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl) res = p9_client_getlock_dotl(fid, &glock); if (res < 0) return res; - if (glock.type != F_UNLCK) { - fl->fl_type = glock.type; + /* map 9p lock type to os lock type */ + switch (glock.type) { + case P9_LOCK_TYPE_RDLCK: + fl->fl_type = F_RDLCK; + break; + case P9_LOCK_TYPE_WRLCK: + fl->fl_type = F_WRLCK; + break; + case P9_LOCK_TYPE_UNLCK: + fl->fl_type = F_UNLCK; + break; + } + if (glock.type != P9_LOCK_TYPE_UNLCK) { fl->fl_start = glock.start; if (glock.length == 0) fl->fl_end = OFFSET_MAX; else fl->fl_end = glock.start + glock.length - 1; fl->fl_pid = glock.proc_id; - } else - fl->fl_type = F_UNLCK; - + } return res; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7f6c677..c72e20c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) /** * p9mode2unixmode- convert plan9 mode bits to unix mode bits * @v9ses: v9fs session information - * @mode: mode to convert + * @stat: p9_wstat from which mode need to be derived + * @rdev: major number, minor number in case of device files. * */ - -static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) +static int p9mode2unixmode(struct v9fs_session_info *v9ses, + struct p9_wstat *stat, dev_t *rdev) { int res; + int mode = stat->mode; - res = mode & 0777; + res = mode & S_IALLUGO; + *rdev = 0; if ((mode & P9_DMDIR) == P9_DMDIR) res |= S_IFDIR; @@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) && (v9ses->nodev == 0)) res |= S_IFIFO; else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) - && (v9ses->nodev == 0)) - res |= S_IFBLK; - else + && (v9ses->nodev == 0)) { + char type = 0, ext[32]; + int major = -1, minor = -1; + + strncpy(ext, stat->extension, sizeof(ext)); + sscanf(ext, "%c %u %u", &type, &major, &minor); + switch (type) { + case 'c': + res |= S_IFCHR; + break; + case 'b': + res |= S_IFBLK; + break; + default: + P9_DPRINTK(P9_DEBUG_ERROR, + "Unknown special type %c %s\n", type, + stat->extension); + }; + *rdev = MKDEV(major, minor); + } else res |= S_IFREG; if (v9fs_proto_dotu(v9ses)) { @@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) if ((mode & P9_DMSETVTX) == P9_DMSETVTX) res |= S_ISVTX; } - return res; } @@ -216,7 +235,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb) return NULL; #ifdef CONFIG_9P_FSCACHE v9inode->fscache = NULL; - v9inode->fscache_key = NULL; spin_lock_init(&v9inode->fscache_lock); #endif v9inode->writeback_fid = NULL; @@ -243,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode) } int v9fs_init_inode(struct v9fs_session_info *v9ses, - struct inode *inode, int mode) + struct inode *inode, int mode, dev_t rdev) { int err = 0; inode_init_owner(inode, NULL, mode); inode->i_blocks = 0; - inode->i_rdev = 0; + inode->i_rdev = rdev; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->a_ops = &v9fs_addr_operations; @@ -336,7 +354,7 @@ error: * */ -struct inode *v9fs_get_inode(struct super_block *sb, int mode) +struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev) { int err; struct inode *inode; @@ -349,7 +367,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); return ERR_PTR(-ENOMEM); } - err = v9fs_init_inode(v9ses, inode, mode); + err = v9fs_init_inode(v9ses, inode, mode, rdev); if (err) { iput(inode); return ERR_PTR(err); @@ -433,17 +451,62 @@ void v9fs_evict_inode(struct inode *inode) } } +static int v9fs_test_inode(struct inode *inode, void *data) +{ + int umode; + dev_t rdev; + struct v9fs_inode *v9inode = V9FS_I(inode); + struct p9_wstat *st = (struct p9_wstat *)data; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + + umode = p9mode2unixmode(v9ses, st, &rdev); + /* don't match inode of different type */ + if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) + return 0; + + /* compare qid details */ + if (memcmp(&v9inode->qid.version, + &st->qid.version, sizeof(v9inode->qid.version))) + return 0; + + if (v9inode->qid.type != st->qid.type) + return 0; + return 1; +} + +static int v9fs_test_new_inode(struct inode *inode, void *data) +{ + return 0; +} + +static int v9fs_set_inode(struct inode *inode, void *data) +{ + struct v9fs_inode *v9inode = V9FS_I(inode); + struct p9_wstat *st = (struct p9_wstat *)data; + + memcpy(&v9inode->qid, &st->qid, sizeof(st->qid)); + return 0; +} + static struct inode *v9fs_qid_iget(struct super_block *sb, struct p9_qid *qid, - struct p9_wstat *st) + struct p9_wstat *st, + int new) { + dev_t rdev; int retval, umode; unsigned long i_ino; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; + int (*test)(struct inode *, void *); + + if (new) + test = v9fs_test_new_inode; + else + test = v9fs_test_inode; i_ino = v9fs_qid2ino(qid); - inode = iget_locked(sb, i_ino); + inode = iget5_locked(sb, i_ino, test, v9fs_set_inode, st); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) @@ -453,14 +516,14 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, * FIXME!! we may need support for stale inodes * later. */ - umode = p9mode2unixmode(v9ses, st->mode); - retval = v9fs_init_inode(v9ses, inode, umode); + inode->i_ino = i_ino; + umode = p9mode2unixmode(v9ses, st, &rdev); + retval = v9fs_init_inode(v9ses, inode, umode, rdev); if (retval) goto error; v9fs_stat2inode(st, inode, sb); #ifdef CONFIG_9P_FSCACHE - v9fs_fscache_set_key(inode, &st->qid); v9fs_cache_inode_get_cookie(inode); #endif unlock_new_inode(inode); @@ -474,7 +537,7 @@ error: struct inode * v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, - struct super_block *sb) + struct super_block *sb, int new) { struct p9_wstat *st; struct inode *inode = NULL; @@ -483,7 +546,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, if (IS_ERR(st)) return ERR_CAST(st); - inode = v9fs_qid_iget(sb, &st->qid, st); + inode = v9fs_qid_iget(sb, &st->qid, st, new); p9stat_free(st); kfree(st); return inode; @@ -585,19 +648,17 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, } /* instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; - + d_instantiate(dentry, inode); return ofid; - error: if (ofid) p9_client_clunk(ofid); @@ -738,6 +799,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nameidata) { + struct dentry *res; struct super_block *sb; struct v9fs_session_info *v9ses; struct p9_fid *dfid, *fid; @@ -769,22 +831,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(result); } - - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + /* + * Make sure we don't use a wrong inode due to parallel + * unlink. For cached mode create calls request for new + * inode. But with cache disabled, lookup should do this. + */ + if (v9ses->cache) + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + else + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { result = PTR_ERR(inode); inode = NULL; goto error; } - result = v9fs_fid_add(dentry, fid); if (result < 0) goto error_iput; - inst_out: - d_add(dentry, inode); - return NULL; - + /* + * If we had a rename on the server and a parallel lookup + * for the new name, then make sure we instantiate with + * the new name. ie look up for a/b, while on server somebody + * moved b under k and client parallely did a lookup for + * k/b. + */ + res = d_materialise_unique(dentry, inode); + if (!IS_ERR(res)) + return res; + result = PTR_ERR(res); error_iput: iput(inode); error: @@ -950,7 +1025,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, return PTR_ERR(st); v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); - generic_fillattr(dentry->d_inode, stat); + generic_fillattr(dentry->d_inode, stat); p9stat_free(st); kfree(st); @@ -1034,6 +1109,7 @@ void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, struct super_block *sb) { + mode_t mode; char ext[32]; char tag_name[14]; unsigned int i_nlink; @@ -1069,31 +1145,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, inode->i_nlink = i_nlink; } } - inode->i_mode = p9mode2unixmode(v9ses, stat->mode); - if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) { - char type = 0; - int major = -1; - int minor = -1; - - strncpy(ext, stat->extension, sizeof(ext)); - sscanf(ext, "%c %u %u", &type, &major, &minor); - switch (type) { - case 'c': - inode->i_mode &= ~S_IFBLK; - inode->i_mode |= S_IFCHR; - break; - case 'b': - break; - default: - P9_DPRINTK(P9_DEBUG_ERROR, - "Unknown special type %c %s\n", type, - stat->extension); - }; - inode->i_rdev = MKDEV(major, minor); - init_special_inode(inode, inode->i_mode, inode->i_rdev); - } else - inode->i_rdev = 0; - + mode = stat->mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; + inode->i_mode = mode; i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ @@ -1359,6 +1413,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) { + int umode; + dev_t rdev; loff_t i_size; struct p9_wstat *st; struct v9fs_session_info *v9ses; @@ -1367,6 +1423,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) st = p9_client_stat(fid); if (IS_ERR(st)) return PTR_ERR(st); + /* + * Don't update inode if the file type is different + */ + umode = p9mode2unixmode(v9ses, st, &rdev); + if ((inode->i_mode & S_IFMT) != (umode & S_IFMT)) + goto out; spin_lock(&inode->i_lock); /* @@ -1378,6 +1440,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) if (v9ses->cache) inode->i_size = i_size; spin_unlock(&inode->i_lock); +out: p9stat_free(st); kfree(st); return 0; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 691c78f..c873172 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -86,18 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) return dentry; } +static int v9fs_test_inode_dotl(struct inode *inode, void *data) +{ + struct v9fs_inode *v9inode = V9FS_I(inode); + struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; + + /* don't match inode of different type */ + if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) + return 0; + + if (inode->i_generation != st->st_gen) + return 0; + + /* compare qid details */ + if (memcmp(&v9inode->qid.version, + &st->qid.version, sizeof(v9inode->qid.version))) + return 0; + + if (v9inode->qid.type != st->qid.type) + return 0; + return 1; +} + +/* Always get a new inode */ +static int v9fs_test_new_inode_dotl(struct inode *inode, void *data) +{ + return 0; +} + +static int v9fs_set_inode_dotl(struct inode *inode, void *data) +{ + struct v9fs_inode *v9inode = V9FS_I(inode); + struct p9_stat_dotl *st = (struct p9_stat_dotl *)data; + + memcpy(&v9inode->qid, &st->qid, sizeof(st->qid)); + inode->i_generation = st->st_gen; + return 0; +} + static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, struct p9_qid *qid, struct p9_fid *fid, - struct p9_stat_dotl *st) + struct p9_stat_dotl *st, + int new) { int retval; unsigned long i_ino; struct inode *inode; struct v9fs_session_info *v9ses = sb->s_fs_info; + int (*test)(struct inode *, void *); + + if (new) + test = v9fs_test_new_inode_dotl; + else + test = v9fs_test_inode_dotl; i_ino = v9fs_qid2ino(qid); - inode = iget_locked(sb, i_ino); + inode = iget5_locked(sb, i_ino, test, v9fs_set_inode_dotl, st); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) @@ -107,13 +152,14 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, * FIXME!! we may need support for stale inodes * later. */ - retval = v9fs_init_inode(v9ses, inode, st->st_mode); + inode->i_ino = i_ino; + retval = v9fs_init_inode(v9ses, inode, + st->st_mode, new_decode_dev(st->st_rdev)); if (retval) goto error; v9fs_stat2inode_dotl(st, inode); #ifdef CONFIG_9P_FSCACHE - v9fs_fscache_set_key(inode, &st->qid); v9fs_cache_inode_get_cookie(inode); #endif retval = v9fs_get_acl(inode, fid); @@ -131,20 +177,72 @@ error: struct inode * v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, - struct super_block *sb) + struct super_block *sb, int new) { struct p9_stat_dotl *st; struct inode *inode = NULL; - st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); + st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN); if (IS_ERR(st)) return ERR_CAST(st); - inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st); + inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new); kfree(st); return inode; } +struct dotl_openflag_map { + int open_flag; + int dotl_flag; +}; + +static int v9fs_mapped_dotl_flags(int flags) +{ + int i; + int rflags = 0; + struct dotl_openflag_map dotl_oflag_map[] = { + { O_CREAT, P9_DOTL_CREATE }, + { O_EXCL, P9_DOTL_EXCL }, + { O_NOCTTY, P9_DOTL_NOCTTY }, + { O_TRUNC, P9_DOTL_TRUNC }, + { O_APPEND, P9_DOTL_APPEND }, + { O_NONBLOCK, P9_DOTL_NONBLOCK }, + { O_DSYNC, P9_DOTL_DSYNC }, + { FASYNC, P9_DOTL_FASYNC }, + { O_DIRECT, P9_DOTL_DIRECT }, + { O_LARGEFILE, P9_DOTL_LARGEFILE }, + { O_DIRECTORY, P9_DOTL_DIRECTORY }, + { O_NOFOLLOW, P9_DOTL_NOFOLLOW }, + { O_NOATIME, P9_DOTL_NOATIME }, + { O_CLOEXEC, P9_DOTL_CLOEXEC }, + { O_SYNC, P9_DOTL_SYNC}, + }; + for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { + if (flags & dotl_oflag_map[i].open_flag) + rflags |= dotl_oflag_map[i].dotl_flag; + } + return rflags; +} + +/** + * v9fs_open_to_dotl_flags- convert Linux specific open flags to + * plan 9 open flag. + * @flags: flags to convert + */ +int v9fs_open_to_dotl_flags(int flags) +{ + int rflags = 0; + + /* + * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY + * and P9_DOTL_NOACCESS + */ + rflags |= flags & O_ACCMODE; + rflags |= v9fs_mapped_dotl_flags(flags); + + return rflags; +} + /** * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol. * @dir: directory inode that is being created @@ -213,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, "Failed to get acl values in creat %d\n", err); goto error; } - err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid); + err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags), + mode, gid, &qid); if (err < 0) { P9_DPRINTK(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n", @@ -230,19 +329,19 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, fid = NULL; goto error; } - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); /* Now set the ACL based on the default value */ - v9fs_set_create_acl(dentry, dacl, pacl); + v9fs_set_create_acl(dentry, &dacl, &pacl); v9inode = V9FS_I(inode); mutex_lock(&v9inode->v_mutex); @@ -283,6 +382,7 @@ error: err_clunk_old_fid: if (ofid) p9_client_clunk(ofid); + v9fs_set_create_acl(NULL, &dacl, &pacl); return err; } @@ -350,17 +450,17 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, goto error; } - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); fid = NULL; } else { /* @@ -368,7 +468,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, * inode with stat. We need to get an inode * so that we can set the acl with dentry */ - inode = v9fs_get_inode(dir->i_sb, mode); + inode = v9fs_get_inode(dir->i_sb, mode, 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; @@ -376,12 +476,13 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ - v9fs_set_create_acl(dentry, dacl, pacl); + v9fs_set_create_acl(dentry, &dacl, &pacl); inc_nlink(dir); v9fs_invalidate_inode_attr(dir); error: if (fid) p9_client_clunk(fid); + v9fs_set_create_acl(NULL, &dacl, &pacl); return err; } @@ -493,6 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) { + mode_t mode; struct v9fs_inode *v9inode = V9FS_I(inode); if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { @@ -505,11 +607,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) inode->i_uid = stat->st_uid; inode->i_gid = stat->st_gid; inode->i_nlink = stat->st_nlink; - inode->i_mode = stat->st_mode; - inode->i_rdev = new_decode_dev(stat->st_rdev); - if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) - init_special_inode(inode, inode->i_mode, inode->i_rdev); + mode = stat->st_mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; + inode->i_mode = mode; i_size_write(inode, stat->st_size); inode->i_blocks = stat->st_blocks; @@ -547,7 +648,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) inode->i_blocks = stat->st_blocks; } if (stat->st_result_mask & P9_STATS_GEN) - inode->i_generation = stat->st_gen; + inode->i_generation = stat->st_gen; /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION * because the inode structure does not have fields for them. @@ -603,21 +704,21 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, } /* instantiate inode and assign the unopened fid to dentry */ - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); fid = NULL; } else { /* Not in cached mode. No need to populate inode with stat */ - inode = v9fs_get_inode(dir->i_sb, S_IFLNK); + inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; @@ -756,24 +857,24 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, goto error; } - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); if (IS_ERR(inode)) { err = PTR_ERR(inode); P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) goto error; + d_instantiate(dentry, inode); fid = NULL; } else { /* * Not in cached mode. No need to populate inode with stat. * socket syscall returns a fd, so we need instantiate */ - inode = v9fs_get_inode(dir->i_sb, mode); + inode = v9fs_get_inode(dir->i_sb, mode, rdev); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto error; @@ -781,10 +882,11 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ - v9fs_set_create_acl(dentry, dacl, pacl); + v9fs_set_create_acl(dentry, &dacl, &pacl); error: if (fid) p9_client_clunk(fid); + v9fs_set_create_acl(NULL, &dacl, &pacl); return err; } @@ -838,6 +940,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) st = p9_client_getattr_dotl(fid, P9_STATS_ALL); if (IS_ERR(st)) return PTR_ERR(st); + /* + * Don't update inode if the file type is different + */ + if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT)) + goto out; spin_lock(&inode->i_lock); /* @@ -849,6 +956,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) if (v9ses->cache) inode->i_size = i_size; spin_unlock(&inode->i_lock); +out: kfree(st); return 0; } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index feef6cd..c70251d 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, else sb->s_d_op = &v9fs_dentry_operations; - inode = v9fs_get_inode(sb, S_IFDIR | mode); + inode = v9fs_get_inode(sb, S_IFDIR | mode, 0); if (IS_ERR(inode)) { retval = PTR_ERR(inode); goto release_sb; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 54b8c28..720d885 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; - befs_debug(sb, "Follow long symlink"); - - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); + if (len == 0) { + befs_error(sb, "Long symlink with illegal length"); link = ERR_PTR(-EIO); } else { - link[len - 1] = '\0'; + befs_debug(sb, "Follow long symlink"); + + link = kmalloc(len, GFP_NOFS); + if (!link) { + link = ERR_PTR(-ENOMEM); + } else if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); + link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; + } } } else { link = befs_ino->i_data.symlink; diff --git a/fs/block_dev.c b/fs/block_dev.c index 610e8e0..194cf66 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1419,6 +1419,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); + /* ->release can cause the old bdi to disappear, + * so must switch it out first + */ + bdev_inode_switch_bdi(bdev->bd_inode, + &default_backing_dev_info); } if (bdev->bd_contains == bdev) { if (disk->fops->release) @@ -1432,8 +1437,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; - bdev_inode_switch_bdi(bdev->bd_inode, - &default_backing_dev_info); if (bdev != bdev->bd_contains) victim = bdev->bd_contains; bdev->bd_contains = NULL; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456..7e20a65 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1784,6 +1784,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, for (i = 0; i < multi->num_stripes; i++, stripe++) { + if (!stripe->dev->can_discard) + continue; + ret = btrfs_issue_discard(stripe->dev->bdev, stripe->physical, stripe->length); @@ -1791,11 +1794,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, discarded_bytes += stripe->length; else if (ret != -EOPNOTSUPP) break; + + /* + * Just in case we get back EOPNOTSUPP for some reason, + * just ignore the return value so we don't screw up + * people calling discard_extent. + */ + ret = 0; } kfree(multi); } - if (discarded_bytes && ret == -EOPNOTSUPP) - ret = 0; if (actual_bytes) *actual_bytes = discarded_bytes; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3601f0a..d42e6bf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4124,7 +4124,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, /* special case for "." */ if (filp->f_pos == 0) { - over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR); + over = filldir(dirent, ".", 1, + filp->f_pos, btrfs_ino(inode), DT_DIR); if (over) return 0; filp->f_pos = 1; @@ -4133,7 +4134,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, if (filp->f_pos == 1) { u64 pino = parent_ino(filp->f_path.dentry); over = filldir(dirent, "..", 2, - 2, pino, DT_DIR); + filp->f_pos, pino, DT_DIR); if (over) return 0; filp->f_pos = 2; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f..7fa128d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir; - int ret; struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *dir; struct inode *inode; - char *name; - int namelen; unsigned long ref_ptr; unsigned long ref_end; + char *name; + int namelen; + int ret; int search_done = 0; /* @@ -909,6 +910,25 @@ again: } btrfs_release_path(path); + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + + /* look for a conflicing name */ + di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + insert: /* insert our name */ ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19450bc..43baaf0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -500,6 +500,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) fs_devices->rw_devices--; } + if (device->can_discard) + fs_devices->num_can_discard--; + new_device = kmalloc(sizeof(*new_device), GFP_NOFS); BUG_ON(!new_device); memcpy(new_device, device, sizeof(*new_device)); @@ -508,6 +511,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; + new_device->can_discard = 0; list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -547,6 +551,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder) { + struct request_queue *q; struct block_device *bdev; struct list_head *head = &fs_devices->devices; struct btrfs_device *device; @@ -603,6 +608,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, seeding = 0; } + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) { + device->can_discard = 1; + fs_devices->num_can_discard++; + } + device->bdev = bdev; device->in_fs_metadata = 0; device->mode = flags; @@ -1542,6 +1553,7 @@ error: int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { + struct request_queue *q; struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; @@ -1611,6 +1623,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) lock_chunks(root); + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) + device->can_discard = 1; device->writeable = 1; device->work.func = pending_bios_fn; generate_random_uuid(device->uuid); @@ -1646,6 +1661,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; root->fs_info->fs_devices->rw_devices++; + if (device->can_discard) + root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; if (!blk_queue_nonrot(bdev_get_queue(bdev))) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61..6d866db 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -48,6 +48,7 @@ struct btrfs_device { int writeable; int in_fs_metadata; int missing; + int can_discard; spinlock_t io_lock; @@ -104,6 +105,7 @@ struct btrfs_fs_devices { u64 rw_devices; u64 missing_devices; u64 total_rw_bytes; + u64 num_can_discard; struct block_device *latest_bdev; /* all of the devices in the FS, protected by a mutex diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index bc4b12c..53e7d72 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -566,6 +566,12 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) struct inode *dir = dentry->d_inode; struct dentry *child; + if (!dir) { + dput(dentry); + dentry = ERR_PTR(-ENOENT); + break; + } + /* skip separators */ while (*s == sep) s++; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1a9fe7f..07132c4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4079,7 +4079,8 @@ int CIFSFindNext(const int xid, struct cifs_tcon *tcon, T2_FNEXT_RSP_PARMS *parms; char *response_data; int rc = 0; - int bytes_returned, name_len; + int bytes_returned; + unsigned int name_len; __u16 params, byte_count; cFYI(1, "In FindNext"); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ccc1afa..2451627 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1258,7 +1258,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* ignore */ } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ - } else if (strnicmp(data, "rw", 2) == 0) { + } else if (strnicmp(data, "rw", 2) == 0 && strlen(data) == 2) { /* ignore */ } else if (strnicmp(data, "ro", 2) == 0) { /* ignore */ @@ -1361,7 +1361,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->server_ino = 1; } else if (strnicmp(data, "noserverino", 9) == 0) { vol->server_ino = 0; - } else if (strnicmp(data, "rwpidforward", 4) == 0) { + } else if (strnicmp(data, "rwpidforward", 12) == 0) { vol->rwpidforward = 1; } else if (strnicmp(data, "cifsacl", 7) == 0) { vol->cifs_acl = 1; @@ -2838,7 +2838,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); - kfree(volume_info->UNCip); + if (volume_info->UNCip != volume_info->UNC + 2) + kfree(volume_info->UNCip); kfree(volume_info->domainname); kfree(volume_info->iocharset); kfree(volume_info->prepath); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d8d26f3..16cdd6d 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -110,8 +110,8 @@ cifs_bp_rename_retry: } rcu_read_unlock(); if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { - cERROR(1, "did not end path lookup where expected namelen is %d", - namelen); + cFYI(1, "did not end path lookup where expected. namelen=%d " + "dfsplen=%d", namelen, dfsplen); /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9b018c8..a7b2dcd 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, if (full_path == NULL) return full_path; - if (dfsplen) { + if (dfsplen) strncpy(full_path, tcon->treeName, dfsplen); - /* switch slash direction in prepath depending on whether - * windows or posix style path names - */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { - int i; - for (i = 0; i < dfsplen; i++) { - if (full_path[i] == '\\') - full_path[i] = '/'; - } - } - } strncpy(full_path + dfsplen, vol->prepath, pplen); + convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); full_path[dfsplen + pplen] = 0; /* add trailing null */ return full_path; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 147aa22..c1b9c4b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -362,6 +362,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid = AllocMidQEntry(hdr, server); if (mid == NULL) { mutex_unlock(&server->srv_mutex); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); return -ENOMEM; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9f1bb74..b4a6bef 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only, + ecryptfs_opt_check_dev_ruid, ecryptfs_opt_err }; static const match_table_t tokens = { @@ -191,6 +192,7 @@ static const match_table_t tokens = { {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"}, + {ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"}, {ecryptfs_opt_err, NULL} }; @@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat( * ecryptfs_parse_options * @sb: The ecryptfs super block * @options: The options passed to the kernel + * @check_ruid: set to 1 if device uid should be checked against the ruid * * Parse mount options: * debug=N - ecryptfs_verbosity level for debug output @@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat( * * Returns zero on success; non-zero on error */ -static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) +static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, + uid_t *check_ruid) { char *p; int rc = 0; @@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; + *check_ruid = 0; + if (!options) { rc = -EINVAL; goto out; @@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) mount_crypt_stat->flags |= ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY; break; + case ecryptfs_opt_check_dev_ruid: + *check_ruid = 1; + break; case ecryptfs_opt_err: default: printk(KERN_WARNING @@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags const char *err = "Getting sb failed"; struct inode *inode; struct path path; + uid_t check_ruid; int rc; sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); @@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - rc = ecryptfs_parse_options(sbi, raw_data); + rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid); if (rc) { err = "Error parsing options"; goto out; @@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags "known incompatibilities\n"); goto out_free; } + + if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) { + rc = -EPERM; + printk(KERN_ERR "Mount of device (uid: %d) not owned by " + "requested user (uid: %d)\n", + path.dentry->d_inode->i_uid, current_uid()); + goto out_free; + } + ecryptfs_set_superblock_lower(s, path.dentry->d_sb); s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 85d4309..3745f7c 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -39,15 +39,16 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size) { - struct ecryptfs_inode_info *inode_info; + struct file *lower_file; mm_segment_t fs_save; ssize_t rc; - inode_info = ecryptfs_inode_to_private(ecryptfs_inode); - BUG_ON(!inode_info->lower_file); + lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; + if (!lower_file) + return -EIO; fs_save = get_fs(); set_fs(get_ds()); - rc = vfs_write(inode_info->lower_file, data, size, &offset); + rc = vfs_write(lower_file, data, size, &offset); set_fs(fs_save); mark_inode_dirty_sync(ecryptfs_inode); return rc; @@ -225,15 +226,16 @@ out: int ecryptfs_read_lower(char *data, loff_t offset, size_t size, struct inode *ecryptfs_inode) { - struct ecryptfs_inode_info *inode_info = - ecryptfs_inode_to_private(ecryptfs_inode); + struct file *lower_file; mm_segment_t fs_save; ssize_t rc; - BUG_ON(!inode_info->lower_file); + lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; + if (!lower_file) + return -EIO; fs_save = get_fs(); set_fs(get_ds()); - rc = vfs_read(inode_info->lower_file, data, size, &offset); + rc = vfs_read(lower_file, data, size, &offset); set_fs(fs_save); return rc; } @@ -1411,6 +1411,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) printable(bprm->buf[2]) && printable(bprm->buf[3])) break; /* -ENOEXEC */ + if (try) + break; /* -ENOEXEC */ request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); #endif } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 34b6d9b..e5a7111 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2210,9 +2210,11 @@ static int ext3_symlink (struct inode * dir, /* * For non-fast symlinks, we just allocate inode and put it on * orphan list in the first transaction => we need bitmap, - * group descriptor, sb, inode block, quota blocks. + * group descriptor, sb, inode block, quota blocks, and + * possibly selinux xattr blocks. */ - credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); + credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + EXT3_XATTR_TRANS_BLOCKS; } else { /* * Fast symlink. We have to add entry to directory diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index bb85757..5802fa1 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode) static inline int ext4_should_writeback_data(struct inode *inode) { - if (!S_ISREG(inode->i_mode)) - return 0; if (EXT4_JOURNAL(inode) == NULL) return 1; + if (!S_ISREG(inode->i_mode)) + return 0; if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) return 0; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1593004..c94774c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -189,6 +189,12 @@ void ext4_evict_inode(struct inode *inode) int err; trace_ext4_evict_inode(inode); + + mutex_lock(&inode->i_mutex); + ext4_flush_completed_IO(inode); + mutex_unlock(&inode->i_mutex); + ext4_ioend_wait(inode); + if (inode->i_nlink) { truncate_inode_pages(&inode->i_data, 0); goto no_delete; @@ -1849,6 +1855,8 @@ static int ext4_journalled_write_end(struct file *file, from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; + BUG_ON(!ext4_handle_valid(handle)); + if (copied < len) { if (!PageUptodate(page)) copied = 0; @@ -2569,6 +2577,8 @@ static int __ext4_journalled_writepage(struct page *page, goto out; } + BUG_ON(!ext4_handle_valid(handle)); + ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); @@ -2746,7 +2756,7 @@ static int write_cache_pages_da(struct address_space *mapping, index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->sync_mode == WB_SYNC_ALL) + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; @@ -2978,7 +2988,7 @@ static int ext4_da_writepages(struct address_space *mapping, } retry: - if (wbc->sync_mode == WB_SYNC_ALL) + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); while (!ret && wbc->nr_to_write > 0) { @@ -3640,8 +3650,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) goto out; } - io_end->flag = EXT4_IO_END_UNWRITTEN; + /* + * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now, + * but being more careful is always safe for the future change. + */ inode = io_end->inode; + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } /* Add the io_end to per-inode completed io list*/ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b754b77..458a394 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2264,9 +2264,11 @@ static int ext4_symlink(struct inode *dir, /* * For non-fast symlinks, we just allocate inode and put it on * orphan list in the first transaction => we need bitmap, - * group descriptor, sb, inode block, quota blocks. + * group descriptor, sb, inode block, quota blocks, and + * possibly selinux xattr blocks. */ - credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); + credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + EXT4_XATTR_TRANS_BLOCKS; } else { /* * Fast symlink. We have to add entry to directory diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7bb8f76..97e5e98 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -338,8 +338,10 @@ submit_and_retry: if ((io_end->num_io_pages >= MAX_IO_PAGES) && (io_end->pages[io_end->num_io_pages-1] != io_page)) goto submit_and_retry; - if (buffer_uninit(bh)) - io->io_end->flag |= EXT4_IO_END_UNWRITTEN; + if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } io->io_end->size += bh->b_size; io->io_next_block++; ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ea71aa..111ed9d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -892,7 +892,6 @@ static void ext4_i_callback(struct rcu_head *head) static void ext4_destroy_inode(struct inode *inode) { - ext4_ioend_wait(inode); if (!list_empty(&(EXT4_I(inode)->i_orphan))) { ext4_msg(inode->i_sb, KERN_ERR, "Inode %lu (%p): orphan list check failed!", diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3c72c99..49c8c98 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -36,6 +36,7 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; enum writeback_sync_modes sync_mode; + unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; @@ -418,6 +419,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode->i_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { + /* + * Sync livelock prevention. Each inode is tagged and synced in + * one shot. If still dirty, it will be redirty_tail()'ed below. + * Update the dirty time to prevent enqueue and sync it again. + */ + if ((inode->i_state & I_DIRTY) && + (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) + inode->dirtied_when = jiffies; + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() @@ -650,6 +660,7 @@ static long wb_writeback(struct bdi_writeback *wb, { struct writeback_control wbc = { .sync_mode = work->sync_mode, + .tagged_writepages = work->tagged_writepages, .older_than_this = NULL, .for_kupdate = work->for_kupdate, .for_background = work->for_background, @@ -657,7 +668,7 @@ static long wb_writeback(struct bdi_writeback *wb, }; unsigned long oldest_jif; long wrote = 0; - long write_chunk; + long write_chunk = MAX_WRITEBACK_PAGES; struct inode *inode; if (wbc.for_kupdate) { @@ -683,9 +694,7 @@ static long wb_writeback(struct bdi_writeback *wb, * (quickly) tag currently dirty pages * (maybe slowly) sync all tagged pages */ - if (wbc.sync_mode == WB_SYNC_NONE) - write_chunk = MAX_WRITEBACK_PAGES; - else + if (wbc.sync_mode == WB_SYNC_ALL || wbc.tagged_writepages) write_chunk = LONG_MAX; wbc.wb_start = jiffies; /* livelock avoidance */ @@ -1188,10 +1197,11 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { - .sb = sb, - .sync_mode = WB_SYNC_NONE, - .done = &done, - .nr_pages = nr, + .sb = sb, + .sync_mode = WB_SYNC_NONE, + .tagged_writepages = 1, + .done = &done, + .nr_pages = nr, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 99e1334..fb6fc95 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -259,10 +259,14 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, forget->forget_one.nlookup = nlookup; spin_lock(&fc->lock); - fc->forget_list_tail->next = forget; - fc->forget_list_tail = forget; - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); + if (fc->connected) { + fc->forget_list_tail->next = forget; + fc->forget_list_tail = forget; + wake_up(&fc->waitq); + kill_fasync(&fc->fasync, SIGIO, POLL_IN); + } else { + kfree(forget); + } spin_unlock(&fc->lock); } @@ -1362,6 +1366,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, if (outarg.namelen > FUSE_NAME_MAX) goto err; + err = -EINVAL; + if (size != sizeof(outarg) + outarg.namelen + 1) + goto err; + name.name = buf; name.len = outarg.namelen; err = fuse_copy_one(cs, buf, outarg.namelen + 1); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index d685752..4e7f64b 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -13,6 +13,7 @@ #include <linux/fs.h> #include <linux/mutex.h> #include <linux/buffer_head.h> +#include <linux/blkdev.h> #include "hfsplus_raw.h" #define DBG_BNODE_REFS 0x00000001 @@ -110,7 +111,9 @@ struct hfsplus_vh; struct hfs_btree; struct hfsplus_sb_info { + void *s_vhdr_buf; struct hfsplus_vh *s_vhdr; + void *s_backup_vhdr_buf; struct hfsplus_vh *s_backup_vhdr; struct hfs_btree *ext_tree; struct hfs_btree *cat_tree; @@ -258,6 +261,15 @@ struct hfsplus_readdir_data { struct hfsplus_cat_key key; }; +/* + * Find minimum acceptible I/O size for an hfsplus sb. + */ +static inline unsigned short hfsplus_min_io_size(struct super_block *sb) +{ + return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev), + HFSPLUS_SECTOR_SIZE); +} + #define hfs_btree_open hfsplus_btree_open #define hfs_btree_close hfsplus_btree_close #define hfs_btree_write hfsplus_btree_write @@ -436,8 +448,8 @@ int hfsplus_compare_dentry(const struct dentry *parent, /* wrapper.c */ int hfsplus_read_wrapper(struct super_block *); int hfs_part_find(struct super_block *, sector_t *, sector_t *); -int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, - void *data, int rw); +int hfsplus_submit_bio(struct super_block *sb, sector_t sector, + void *buf, void **data, int rw); /* time macros */ #define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 40ad88c..eb355d8 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c @@ -88,11 +88,12 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm, return -ENOENT; } -static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, - sector_t *part_start, sector_t *part_size) +static int hfs_parse_new_pmap(struct super_block *sb, void *buf, + struct new_pmap *pm, sector_t *part_start, sector_t *part_size) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); int size = be32_to_cpu(pm->pmMapBlkCnt); + int buf_size = hfsplus_min_io_size(sb); int res; int i = 0; @@ -107,11 +108,14 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, if (++i >= size) return -ENOENT; - res = hfsplus_submit_bio(sb->s_bdev, - *part_start + HFS_PMAP_BLK + i, - pm, READ); - if (res) - return res; + pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE); + if ((u8 *)pm - (u8 *)buf >= buf_size) { + res = hfsplus_submit_bio(sb, + *part_start + HFS_PMAP_BLK + i, + buf, (void **)&pm, READ); + if (res) + return res; + } } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC)); return -ENOENT; @@ -124,15 +128,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, int hfs_part_find(struct super_block *sb, sector_t *part_start, sector_t *part_size) { - void *data; + void *buf, *data; int res; - data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); - if (!data) + buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); + if (!buf) return -ENOMEM; - res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, - data, READ); + res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK, + buf, &data, READ); if (res) goto out; @@ -141,13 +145,13 @@ int hfs_part_find(struct super_block *sb, res = hfs_parse_old_pmap(sb, data, part_start, part_size); break; case HFS_NEW_PMAP_MAGIC: - res = hfs_parse_new_pmap(sb, data, part_start, part_size); + res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size); break; default: res = -ENOENT; break; } out: - kfree(data); + kfree(buf); return res; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 84a47b7..c3a76fd 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -197,17 +197,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait) write_backup = 1; } - error2 = hfsplus_submit_bio(sb->s_bdev, + error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr, WRITE_SYNC); + sbi->s_vhdr_buf, NULL, WRITE_SYNC); if (!error) error = error2; if (!write_backup) goto out; - error2 = hfsplus_submit_bio(sb->s_bdev, + error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, - sbi->s_backup_vhdr, WRITE_SYNC); + sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC); if (!error) error2 = error; out: @@ -251,8 +251,8 @@ static void hfsplus_put_super(struct super_block *sb) hfs_btree_close(sbi->ext_tree); iput(sbi->alloc_file); iput(sbi->hidden_dir); - kfree(sbi->s_vhdr); - kfree(sbi->s_backup_vhdr); + kfree(sbi->s_vhdr_buf); + kfree(sbi->s_backup_vhdr_buf); unload_nls(sbi->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -508,8 +508,8 @@ out_close_cat_tree: out_close_ext_tree: hfs_btree_close(sbi->ext_tree); out_free_vhdr: - kfree(sbi->s_vhdr); - kfree(sbi->s_backup_vhdr); + kfree(sbi->s_vhdr_buf); + kfree(sbi->s_backup_vhdr_buf); out_unload_nls: unload_nls(sbi->nls); unload_nls(nls); diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 4ac88ff..7b8112d 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -31,25 +31,67 @@ static void hfsplus_end_io_sync(struct bio *bio, int err) complete(bio->bi_private); } -int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, - void *data, int rw) +/* + * hfsplus_submit_bio - Perfrom block I/O + * @sb: super block of volume for I/O + * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes + * @buf: buffer for I/O + * @data: output pointer for location of requested data + * @rw: direction of I/O + * + * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than + * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads + * @data will return a pointer to the start of the requested sector, + * which may not be the same location as @buf. + * + * If @sector is not aligned to the bdev logical block size it will + * be rounded down. For writes this means that @buf should contain data + * that starts at the rounded-down address. As long as the data was + * read using hfsplus_submit_bio() and the same buffer is used things + * will work correctly. + */ +int hfsplus_submit_bio(struct super_block *sb, sector_t sector, + void *buf, void **data, int rw) { DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; + unsigned int io_size; + loff_t start; + int offset; + + /* + * Align sector to hardware sector size and find offset. We + * assume that io_size is a power of two, which _should_ + * be true. + */ + io_size = hfsplus_min_io_size(sb); + start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT; + offset = start & (io_size - 1); + sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; - bio->bi_bdev = bdev; + bio->bi_bdev = sb->s_bdev; bio->bi_end_io = hfsplus_end_io_sync; bio->bi_private = &wait; - /* - * We always submit one sector at a time, so bio_add_page must not fail. - */ - if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE, - offset_in_page(data)) != HFSPLUS_SECTOR_SIZE) - BUG(); + if (!(rw & WRITE) && data) + *data = (u8 *)buf + offset; + + while (io_size > 0) { + unsigned int page_offset = offset_in_page(buf); + unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset, + io_size); + + ret = bio_add_page(bio, virt_to_page(buf), len, page_offset); + if (ret != len) { + ret = -EIO; + goto out; + } + io_size -= len; + buf = (u8 *)buf + len; + } submit_bio(rw, bio); wait_for_completion(&wait); @@ -57,8 +99,9 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; +out: bio_put(bio); - return ret; + return ret < 0 ? ret : 0; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) @@ -147,17 +190,17 @@ int hfsplus_read_wrapper(struct super_block *sb) } error = -ENOMEM; - sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); - if (!sbi->s_vhdr) + sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); + if (!sbi->s_vhdr_buf) goto out; - sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); - if (!sbi->s_backup_vhdr) + sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); + if (!sbi->s_backup_vhdr_buf) goto out_free_vhdr; reread: - error = hfsplus_submit_bio(sb->s_bdev, - part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr, READ); + error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, + sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, + READ); if (error) goto out_free_backup_vhdr; @@ -186,9 +229,9 @@ reread: goto reread; } - error = hfsplus_submit_bio(sb->s_bdev, - part_start + part_size - 2, - sbi->s_backup_vhdr, READ); + error = hfsplus_submit_bio(sb, part_start + part_size - 2, + sbi->s_backup_vhdr_buf, + (void **)&sbi->s_backup_vhdr, READ); if (error) goto out_free_backup_vhdr; @@ -232,9 +275,9 @@ reread: return 0; out_free_backup_vhdr: - kfree(sbi->s_backup_vhdr); + kfree(sbi->s_backup_vhdr_buf); out_free_vhdr: - kfree(sbi->s_vhdr); + kfree(sbi->s_vhdr_buf); out: return error; } @@ -2582,6 +2582,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (!dir->i_op->rmdir) return -EPERM; + dget(dentry); mutex_lock(&dentry->d_inode->i_mutex); error = -EBUSY; @@ -2602,6 +2603,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) out: mutex_unlock(&dentry->d_inode->i_mutex); + dput(dentry); if (!error) d_delete(dentry); return error; @@ -3005,6 +3007,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; + dget(new_dentry); if (target) mutex_lock(&target->i_mutex); @@ -3025,6 +3028,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, out: if (target) mutex_unlock(&target->i_mutex); + dput(new_dentry); if (!error) if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry,new_dentry); diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383..07df5f1 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -38,6 +38,7 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; + int slotid; }; struct cb_compound_hdr_arg { @@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( void *dummy, struct cb_process_state *cps); extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); -extern void nfs4_cb_take_slot(struct nfs_client *clp); struct cb_devicenotifyitem { uint32_t cbd_notify_type; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954..aaa09e9 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -333,7 +333,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Normal */ if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { slot->seq_nr++; - return htonl(NFS4_OK); + goto out_ok; } /* Replay */ @@ -352,11 +352,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Wraparound */ if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { slot->seq_nr = 1; - return htonl(NFS4_OK); + goto out_ok; } /* Misordered request */ return htonl(NFS4ERR_SEQ_MISORDERED); +out_ok: + tbl->highest_used_slotid = args->csa_slotid; + return htonl(NFS4_OK); } /* @@ -418,26 +421,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps) { + struct nfs4_slot_table *tbl; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); - cps->clp = NULL; - clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; + tbl = &clp->cl_session->bc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { - status = NFS4ERR_DELAY; + spin_unlock(&tbl->slot_tbl_lock); + status = htonl(NFS4ERR_DELAY); + /* Return NFS4ERR_BADSESSION if we're draining the session + * in order to reset it. + */ + if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) + status = htonl(NFS4ERR_BADSESSION); goto out; } status = validate_seqid(&clp->cl_session->bc_slot_table, args); + spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; + cps->slotid = args->csa_slotid; + /* * Check for pending referring calls. If a match is found, a * related callback was received before the response to the original @@ -454,7 +468,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_slotid = args->csa_slotid; res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - nfs4_cb_take_slot(clp); out: cps->clp = clp; /* put in nfs4_callback_compound */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a7..918ad64 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid--; + tbl->highest_used_slotid = -1; nfs4_check_drain_bc_complete(session); spin_unlock(&tbl->slot_tbl_lock); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (clp && clp->cl_session) - nfs4_callback_free_slot(clp->cl_session); -} - -/* A single slot, so highest used slotid is either 0 or -1 */ -void nfs4_cb_take_slot(struct nfs_client *clp) -{ - struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; - - spin_lock(&tbl->slot_tbl_lock); - tbl->highest_used_slotid++; - BUG_ON(tbl->highest_used_slotid != 0); - spin_unlock(&tbl->slot_tbl_lock); + if (cps->slotid != -1) + nfs4_callback_free_slot(cps->clp->cl_session); } #else /* CONFIG_NFS_V4_1 */ @@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { } #endif /* CONFIG_NFS_V4_1 */ @@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, + .slotid = -1, }; unsigned int nops = 0; @@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r *hdr_res.status = status; *hdr_res.nops = htonl(nops); - nfs4_cb_free_slot(cps.clp); + nfs4_cb_free_slot(&cps); nfs_put_client(cps.clp); dprintk("%s: done, status = %u\n", __func__, ntohl(status)); return rpc_success; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8ff2ea3..1d1dc1e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write) for (i = 0; i < ios->numdevs; i++) { struct osd_sense_info osi; struct osd_request *or = ios->per_dev[i].or; - unsigned dev; int ret; if (!or) @@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write) continue; /* we recovered */ } - dev = ios->per_dev[i].dev; - objlayout_io_set_result(&ios->ol_state, dev, - &ios->layout->comps[dev].oc_object_id, + objlayout_io_set_result(&ios->ol_state, i, + &ios->layout->comps[i].oc_object_id, osd_pri_2_pnfs_err(osi.osd_err_pri), ios->per_dev[i].offset, ios->per_dev[i].length, @@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, } static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, - unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, + unsigned pgbase, struct _objio_per_comp *per_dev, int len, gfp_t gfp_flags) { unsigned pg = *cur_pg; + int cur_len = len; struct request_queue *q = osd_request_queue(_io_od(ios, per_dev->dev)); - per_dev->length += cur_len; - if (per_dev->bio == NULL) { - unsigned stripes = ios->layout->num_comps / - ios->layout->mirrors_p1; - unsigned pages_in_stripe = stripes * + unsigned pages_in_stripe = ios->layout->group_width * (ios->layout->stripe_unit / PAGE_SIZE); unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / - stripes; + ios->layout->group_width; if (BIO_MAX_PAGES_KMALLOC < bio_size) bio_size = BIO_MAX_PAGES_KMALLOC; @@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, } BUG_ON(cur_len); + per_dev->length += len; *cur_pg = pg; return 0; } @@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, int ret = 0; while (length) { - struct _objio_per_comp *per_dev = &ios->per_dev[dev]; + struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; unsigned cur_len, page_off = 0; if (!per_dev->length) { @@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, cur_len = stripe_unit; } - if (max_comp < dev) - max_comp = dev; + if (max_comp < dev - first_dev) + max_comp = dev - first_dev; } else { cur_len = stripe_unit; } @@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; unsigned dev = per_dev->dev; struct pnfs_osd_object_cred *cred = - &ios->layout->comps[dev]; + &ios->layout->comps[cur_comp]; struct osd_obj_id obj = { .partition = cred->oc_object_id.oid_partition_id, .id = cred->oc_object_id.oid_object_id, @@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) for (; cur_comp < last_comp; ++cur_comp, ++dev) { struct osd_request *or = NULL; struct pnfs_osd_object_cred *cred = - &ios->layout->comps[dev]; + &ios->layout->comps[cur_comp]; struct osd_obj_id obj = { .partition = cred->oc_object_id.oid_partition_id, .id = cred->oc_object_id.oid_object_id, diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index 16fc758..b3918f7 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c @@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, p = _osd_xdr_decode_data_map(p, &layout->olo_map); layout->olo_comps_index = be32_to_cpup(p++); layout->olo_num_comps = be32_to_cpup(p++); + dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, + layout->olo_comps_index, layout->olo_num_comps); + iter->total_comps = layout->olo_num_comps; return 0; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 25b6a88..5afaa58 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -877,30 +877,54 @@ struct numa_maps_private { struct numa_maps md; }; -static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) +static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, + unsigned long nr_pages) { int count = page_mapcount(page); - md->pages++; + md->pages += nr_pages; if (pte_dirty || PageDirty(page)) - md->dirty++; + md->dirty += nr_pages; if (PageSwapCache(page)) - md->swapcache++; + md->swapcache += nr_pages; if (PageActive(page) || PageUnevictable(page)) - md->active++; + md->active += nr_pages; if (PageWriteback(page)) - md->writeback++; + md->writeback += nr_pages; if (PageAnon(page)) - md->anon++; + md->anon += nr_pages; if (count > md->mapcount_max) md->mapcount_max = count; - md->node[page_to_nid(page)]++; + md->node[page_to_nid(page)] += nr_pages; +} + +static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + int nid; + + if (!pte_present(pte)) + return NULL; + + page = vm_normal_page(vma, addr, pte); + if (!page) + return NULL; + + if (PageReserved(page)) + return NULL; + + nid = page_to_nid(page); + if (!node_isset(nid, node_states[N_HIGH_MEMORY])) + return NULL; + + return page; } static int gather_pte_stats(pmd_t *pmd, unsigned long addr, @@ -912,26 +936,32 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *pte; md = walk->private; - orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - do { - struct page *page; - int nid; + spin_lock(&walk->mm->page_table_lock); + if (pmd_trans_huge(*pmd)) { + if (pmd_trans_splitting(*pmd)) { + spin_unlock(&walk->mm->page_table_lock); + wait_split_huge_page(md->vma->anon_vma, pmd); + } else { + pte_t huge_pte = *(pte_t *)pmd; + struct page *page; - if (!pte_present(*pte)) - continue; + page = can_gather_numa_stats(huge_pte, md->vma, addr); + if (page) + gather_stats(page, md, pte_dirty(huge_pte), + HPAGE_PMD_SIZE/PAGE_SIZE); + spin_unlock(&walk->mm->page_table_lock); + return 0; + } + } else { + spin_unlock(&walk->mm->page_table_lock); + } - page = vm_normal_page(md->vma, addr, *pte); + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + do { + struct page *page = can_gather_numa_stats(*pte, md->vma, addr); if (!page) continue; - - if (PageReserved(page)) - continue; - - nid = page_to_nid(page); - if (!node_isset(nid, node_states[N_HIGH_MEMORY])) - continue; - - gather_stats(page, md, pte_dirty(*pte)); + gather_stats(page, md, pte_dirty(*pte), 1); } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); @@ -952,7 +982,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, return 0; md = walk->private; - gather_stats(page, md, pte_dirty(*pte)); + gather_stats(page, md, pte_dirty(*pte), 1); return 0; } diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 8633521..8731516 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -70,6 +70,8 @@ #include <linux/ctype.h> #include <linux/writeback.h> #include <linux/capability.h> +#include <linux/kthread.h> +#include <linux/freezer.h> #include <linux/list_sort.h> #include <asm/page.h> diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a1a881e..347cae9 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1412,37 +1412,35 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - error = xfs_syncd_init(mp); - if (error) - goto out_filestream_unmount; - xfs_inode_shrinker_register(mp); error = xfs_mountfs(mp); if (error) - goto out_syncd_stop; + goto out_filestream_unmount; + + error = xfs_syncd_init(mp); + if (error) + goto out_unmount; root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; - goto fail_unmount; + goto out_syncd_stop; } if (is_bad_inode(root)) { error = EINVAL; - goto fail_vnrele; + goto out_syncd_stop; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { error = ENOMEM; - goto fail_vnrele; + goto out_iput; } return 0; - out_syncd_stop: - xfs_inode_shrinker_unregister(mp); - xfs_syncd_stop(mp); out_filestream_unmount: + xfs_inode_shrinker_unregister(mp); xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); @@ -1456,17 +1454,12 @@ xfs_fs_fill_super( out: return -error; - fail_vnrele: - if (sb->s_root) { - dput(sb->s_root); - sb->s_root = NULL; - } else { - iput(root); - } - - fail_unmount: - xfs_inode_shrinker_unregister(mp); + out_iput: + iput(root); + out_syncd_stop: xfs_syncd_stop(mp); + out_unmount: + xfs_inode_shrinker_unregister(mp); /* * Blow away any referenced inode in the filestreams cache. @@ -1667,24 +1660,13 @@ xfs_init_workqueues(void) */ xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); if (!xfs_syncd_wq) - goto out; - - xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); - if (!xfs_ail_wq) - goto out_destroy_syncd; - + return -ENOMEM; return 0; - -out_destroy_syncd: - destroy_workqueue(xfs_syncd_wq); -out: - return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { - destroy_workqueue(xfs_ail_wq); destroy_workqueue(xfs_syncd_wq); } diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 9e0e2fa..8126fc2 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -183,13 +183,14 @@ xfs_qm_dqunpin_wait( * search the buffer cache can be a time consuming thing, and AIL lock is a * spinlock. */ -STATIC void +STATIC bool xfs_qm_dquot_logitem_pushbuf( struct xfs_log_item *lip) { struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_buf *bp; + bool ret = true; ASSERT(XFS_DQ_IS_LOCKED(dqp)); @@ -201,17 +202,20 @@ xfs_qm_dquot_logitem_pushbuf( if (completion_done(&dqp->q_flush) || !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_dqunlock(dqp); - return; + return true; } bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); xfs_dqunlock(dqp); if (!bp) - return; + return true; if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); + if (XFS_BUF_ISPINNED(bp)) + ret = false; xfs_buf_relse(bp); + return ret; } /* diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 7b7e005..a7342e8 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -632,7 +632,7 @@ xfs_buf_item_push( * the xfsbufd to get this buffer written. We have to unlock the buffer * to allow the xfsbufd to write it, too. */ -STATIC void +STATIC bool xfs_buf_item_pushbuf( struct xfs_log_item *lip) { @@ -646,6 +646,7 @@ xfs_buf_item_pushbuf( xfs_buf_delwri_promote(bp); xfs_buf_relse(bp); + return true; } STATIC void diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index b1e88d5..391044c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -713,13 +713,14 @@ xfs_inode_item_committed( * marked delayed write. If that's the case, we'll promote it and that will * allow the caller to write the buffer by triggering the xfsbufd to run. */ -STATIC void +STATIC bool xfs_inode_item_pushbuf( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp; + bool ret = true; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); @@ -730,7 +731,7 @@ xfs_inode_item_pushbuf( if (completion_done(&ip->i_flush) || !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); - return; + return true; } bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, @@ -738,10 +739,13 @@ xfs_inode_item_pushbuf( xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!bp) - return; + return true; if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); + if (XFS_BUF_ISPINNED(bp)) + ret = false; xfs_buf_relse(bp); + return ret; } /* diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c83f63b..efc147f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1426,6 +1426,7 @@ xfs_trans_committed( static inline void xfs_log_item_batch_insert( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t commit_lsn) @@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert( spin_lock(&ailp->xa_lock); /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ - xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); + xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); for (i = 0; i < nr_items; i++) IOP_UNPIN(log_items[i], 0); @@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert( * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is IOP_COMMITTED processing, followed by an * IOP_UNPIN(aborted) call. + * + * The AIL cursor is used to optimise the insert process. If commit_lsn is not + * at the end of the AIL, the insert cursor avoids the need to walk + * the AIL to find the insertion point on every xfs_log_item_batch_insert() + * call. This saves a lot of needless list walking and is a net win, even + * though it slightly increases that amount of AIL lock traffic to set it up + * and tear it down. */ void xfs_trans_committed_bulk( @@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk( #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; struct xfs_log_vec *lv; + struct xfs_ail_cursor cur; int i = 0; + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); + spin_unlock(&ailp->xa_lock); + /* unpin all the log items */ for (lv = log_vector; lv; lv = lv->lv_next ) { struct xfs_log_item *lip = lv->lv_item; @@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk( /* * Not a bulk update option due to unusual item_lsn. * Push into AIL immediately, rechecking the lsn once - * we have the ail lock. Then unpin the item. + * we have the ail lock. Then unpin the item. This does + * not affect the AIL cursor the bulk insert path is + * using. */ spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) @@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk( /* Item is a candidate for bulk AIL insert. */ log_items[i++] = lv->lv_item; if (i >= LOG_ITEM_BATCH_SIZE) { - xfs_log_item_batch_insert(ailp, log_items, + xfs_log_item_batch_insert(ailp, &cur, log_items, LOG_ITEM_BATCH_SIZE, commit_lsn); i = 0; } @@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk( /* make sure we insert the remainder! */ if (i) - xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); + xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); + + spin_lock(&ailp->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 06a9759..53597f4 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -350,7 +350,7 @@ typedef struct xfs_item_ops { void (*iop_unlock)(xfs_log_item_t *); xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); void (*iop_push)(xfs_log_item_t *); - void (*iop_pushbuf)(xfs_log_item_t *); + bool (*iop_pushbuf)(xfs_log_item_t *); void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); } xfs_item_ops_t; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 5fc2380..a4c281b 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -28,8 +28,6 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" -struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ - #ifdef DEBUG /* * Check that the list is sorted as it should be. @@ -272,9 +270,9 @@ xfs_trans_ail_cursor_clear( } /* - * Return the item in the AIL with the current lsn. - * Return the current tree generation number for use - * in calls to xfs_trans_next_ail(). + * Initialise the cursor to the first item in the AIL with the given @lsn. + * This searches the list from lowest LSN to highest. Pass a @lsn of zero + * to initialise the cursor to the first item in the AIL. */ xfs_log_item_t * xfs_trans_ail_cursor_first( @@ -300,31 +298,97 @@ out: } /* - * splice the log item list into the AIL at the given LSN. + * Initialise the cursor to the last item in the AIL with the given @lsn. + * This searches the list from highest LSN to lowest. If there is no item with + * the value of @lsn, then it sets the cursor to the last item with an LSN lower + * than @lsn. + */ +static struct xfs_log_item * +__xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + xfs_lsn_t lsn) +{ + xfs_log_item_t *lip; + + list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) + return lip; + } + return NULL; +} + +/* + * Initialise the cursor to the last item in the AIL with the given @lsn. + * This searches the list from highest LSN to lowest. + */ +struct xfs_log_item * +xfs_trans_ail_cursor_last( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn) +{ + xfs_trans_ail_cursor_init(ailp, cur); + cur->item = __xfs_trans_ail_cursor_last(ailp, lsn); + return cur->item; +} + +/* + * splice the log item list into the AIL at the given LSN. We splice to the + * tail of the given LSN to maintain insert order for push traversals. The + * cursor is optional, allowing repeated updates to the same LSN to avoid + * repeated traversals. */ static void xfs_ail_splice( - struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct list_head *list, + xfs_lsn_t lsn) { - xfs_log_item_t *next_lip; + struct xfs_log_item *lip = cur ? cur->item : NULL; + struct xfs_log_item *next_lip; - /* If the list is empty, just insert the item. */ - if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); - return; + /* + * Get a new cursor if we don't have a placeholder or the existing one + * has been invalidated. + */ + if (!lip || (__psint_t)lip & 1) { + lip = __xfs_trans_ail_cursor_last(ailp, lsn); + + if (!lip) { + /* The list is empty, so just splice and return. */ + if (cur) + cur->item = NULL; + list_splice(list, &ailp->xa_ail); + return; + } } - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) - break; + /* + * Our cursor points to the item we want to insert _after_, so we have + * to update the cursor to point to the end of the list we are splicing + * in so that it points to the correct location for the next splice. + * i.e. before the splice + * + * lsn -> lsn -> lsn + x -> lsn + x ... + * ^ + * | cursor points here + * + * After the splice we have: + * + * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... + * ^ ^ + * | cursor points here | needs to move here + * + * So we set the cursor to the last item in the list to be spliced + * before we execute the splice, resulting in the cursor pointing to + * the correct item after the splice occurs. + */ + if (cur) { + next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); + cur->item = next_lip; } - - ASSERT(&next_lip->li_ail == &ailp->xa_ail || - XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); - - list_splice_init(list, &next_lip->li_ail); + list_splice(list, &lip->li_ail); } /* @@ -340,16 +404,10 @@ xfs_ail_delete( xfs_trans_ail_cursor_clear(ailp, lip); } -/* - * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself - * to run at a later time if there is more work to do to complete the push. - */ -STATIC void -xfs_ail_worker( - struct work_struct *work) +static long +xfsaild_push( + struct xfs_ail *ailp) { - struct xfs_ail *ailp = container_of(to_delayed_work(work), - struct xfs_ail, xa_work); xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; xfs_log_item_t *lip; @@ -412,8 +470,13 @@ xfs_ail_worker( case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); - IOP_PUSHBUF(lip); - ailp->xa_last_pushed_lsn = lsn; + + if (!IOP_PUSHBUF(lip)) { + stuck++; + flush_log = 1; + } else { + ailp->xa_last_pushed_lsn = lsn; + } push_xfsbufd = 1; break; @@ -425,7 +488,6 @@ xfs_ail_worker( case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); - ailp->xa_last_pushed_lsn = lsn; stuck++; break; @@ -486,20 +548,6 @@ out_done: /* We're past our target or empty, so idle */ ailp->xa_last_pushed_lsn = 0; - /* - * We clear the XFS_AIL_PUSHING_BIT first before checking - * whether the target has changed. If the target has changed, - * this pushes the requeue race directly onto the result of the - * atomic test/set bit, so we are guaranteed that either the - * the pusher that changed the target or ourselves will requeue - * the work (but not both). - */ - clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); - smp_rmb(); - if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || - test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) - return; - tout = 50; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* @@ -522,9 +570,30 @@ out_done: tout = 20; } - /* There is more to do, requeue us. */ - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, - msecs_to_jiffies(tout)); + return tout; +} + +static int +xfsaild( + void *data) +{ + struct xfs_ail *ailp = data; + long tout = 0; /* milliseconds */ + + while (!kthread_should_stop()) { + if (tout && tout <= 20) + __set_current_state(TASK_KILLABLE); + else + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); + + try_to_freeze(); + + tout = xfsaild_push(ailp); + } + + return 0; } /* @@ -559,8 +628,9 @@ xfs_ail_push( */ smp_wmb(); xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); - if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) - queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); + smp_wmb(); + + wake_up_process(ailp->xa_task); } /* @@ -645,6 +715,7 @@ xfs_trans_unlocked_item( void xfs_trans_ail_update_bulk( struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock) @@ -674,7 +745,7 @@ xfs_trans_ail_update_bulk( list_add(&lip->li_ail, &tmp); } - xfs_ail_splice(ailp, &tmp, lsn); + xfs_ail_splice(ailp, cur, &tmp, lsn); if (!mlip_changed) { spin_unlock(&ailp->xa_lock); @@ -794,9 +865,18 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); spin_lock_init(&ailp->xa_lock); - INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); + + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", + ailp->xa_mount->m_fsname); + if (IS_ERR(ailp->xa_task)) + goto out_free_ailp; + mp->m_ail = ailp; return 0; + +out_free_ailp: + kmem_free(ailp); + return ENOMEM; } void @@ -805,6 +885,6 @@ xfs_trans_ail_destroy( { struct xfs_ail *ailp = mp->m_ail; - cancel_delayed_work_sync(&ailp->xa_work); + kthread_stop(ailp->xa_task); kmem_free(ailp); } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 6b164e9..fe2e3cb 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -64,24 +64,19 @@ struct xfs_ail_cursor { */ struct xfs_ail { struct xfs_mount *xa_mount; + struct task_struct *xa_task; struct list_head xa_ail; xfs_lsn_t xa_target; struct xfs_ail_cursor xa_cursors; spinlock_t xa_lock; - struct delayed_work xa_work; xfs_lsn_t xa_last_pushed_lsn; - unsigned long xa_flags; }; -#define XFS_AIL_PUSHING_BIT 0 - /* * From xfs_trans_ail.c */ - -extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ - void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); static inline void @@ -90,7 +85,7 @@ xfs_trans_ail_update( struct xfs_log_item *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); } void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, @@ -111,10 +106,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn); +struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); -struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, +struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); |