aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig17
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/inode.c3
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/mntpt.c56
-rw-r--r--fs/aio.c2
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/autofs4/autofs_i.h113
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/expire.c55
-rw-r--r--fs/autofs4/inode.c114
-rw-r--r--fs/autofs4/root.c743
-rw-r--r--fs/autofs4/symlink.c3
-rw-r--r--fs/autofs4/waitq.c17
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/compression.c329
-rw-r--r--fs/btrfs/compression.h72
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/ctree.h49
-rw-r--r--fs/btrfs/disk-io.c412
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c90
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/extent_io.h17
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file.c126
-rw-r--r--fs/btrfs/inode.c201
-rw-r--r--fs/btrfs/ioctl.c220
-rw-r--r--fs/btrfs/ioctl.h12
-rw-r--r--fs/btrfs/lzo.c420
-rw-r--r--fs/btrfs/ordered-data.c18
-rw-r--r--fs/btrfs/ordered-data.h8
-rw-r--r--fs/btrfs/super.c281
-rw-r--r--fs/btrfs/transaction.c11
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/volumes.c626
-rw-r--r--fs/btrfs/volumes.h27
-rw-r--r--fs/btrfs/xattr.c18
-rw-r--r--fs/btrfs/zlib.c369
-rw-r--r--fs/cifs/cifs_dfs_ref.c120
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/cifs/netmisc.c4
-rw-r--r--fs/compat.c10
-rw-r--r--fs/configfs/Kconfig4
-rw-r--r--fs/dcache.c5
-rw-r--r--fs/dlm/Kconfig3
-rw-r--r--fs/ecryptfs/crypto.c30
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/file.c28
-rw-r--r--fs/ecryptfs/inode.c32
-rw-r--r--fs/ecryptfs/keystore.c26
-rw-r--r--fs/ecryptfs/main.c9
-rw-r--r--fs/ecryptfs/mmap.c35
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c11
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fs_struct.c35
-rw-r--r--fs/gfs2/file.c258
-rw-r--r--fs/gfs2/ops_inode.c258
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/internal.h4
-rw-r--r--fs/ioctl.c10
-rw-r--r--fs/jffs2/build.c5
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jffs2/xattr.c12
-rw-r--r--fs/namei.c360
-rw-r--r--fs/namespace.c223
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/namespace.c77
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/ocfs2/Kconfig3
-rw-r--r--fs/ocfs2/file.c10
-rw-r--r--fs/open.c4
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/squashfs/Kconfig18
-rw-r--r--fs/squashfs/Makefile1
-rw-r--r--fs/squashfs/block.c1
-rw-r--r--fs/squashfs/cache.c1
-rw-r--r--fs/squashfs/decompressor.c16
-rw-r--r--fs/squashfs/decompressor.h9
-rw-r--r--fs/squashfs/fragment.c1
-rw-r--r--fs/squashfs/id.c1
-rw-r--r--fs/squashfs/lzo_wrapper.c1
-rw-r--r--fs/squashfs/squashfs.h8
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/squashfs/squashfs_fs_i.h6
-rw-r--r--fs/squashfs/xattr_id.c1
-rw-r--r--fs/squashfs/xz_wrapper.c153
-rw-r--r--fs/squashfs/zlib_wrapper.c15
-rw-r--r--fs/stat.c4
-rw-r--r--fs/super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c56
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c57
-rw-r--r--fs/xfs/support/debug.c6
104 files changed, 4272 insertions, 2175 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 771f457..9a7921a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -30,15 +30,6 @@ config FS_MBCACHE
source "fs/reiserfs/Kconfig"
source "fs/jfs/Kconfig"
-config FS_POSIX_ACL
-# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4)
-#
-# NOTE: you can implement Posix ACLs without these helpers (XFS does).
-# Never use this symbol for ifdefs.
-#
- bool
- default n
-
source "fs/xfs/Kconfig"
source "fs/gfs2/Kconfig"
source "fs/ocfs2/Kconfig"
@@ -47,6 +38,14 @@ source "fs/nilfs2/Kconfig"
endif # BLOCK
+# Posix ACL utility routines
+#
+# Note: Posix ACLs can be implemented without these helpers. Never use
+# this symbol for ifdefs in core code.
+#
+config FS_POSIX_ACL
+ def_bool n
+
config EXPORTFS
tristate
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e6a4ab9..20c106f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -66,6 +66,7 @@ const struct dentry_operations afs_fs_dentry_operations = {
.d_revalidate = afs_d_revalidate,
.d_delete = afs_d_delete,
.d_release = afs_d_release,
+ .d_automount = afs_d_automount,
};
#define AFS_DIR_HASHTBL_SIZE 128
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0747339..db66c52 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -184,7 +184,8 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
inode->i_generation = 0;
set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags);
- inode->i_flags |= S_NOATIME;
+ set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+ inode->i_flags |= S_AUTOMOUNT | S_NOATIME;
unlock_new_inode(inode);
_leave(" = %p", inode);
return inode;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 58c633b..5a9b684 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -592,6 +592,7 @@ extern const struct inode_operations afs_mntpt_inode_operations;
extern const struct inode_operations afs_autocell_inode_operations;
extern const struct file_operations afs_mntpt_file_operations;
+extern struct vfsmount *afs_d_automount(struct path *);
extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
extern void afs_mntpt_kill_timer(void);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index e83c033..aa59184 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -24,7 +24,6 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
struct dentry *dentry,
struct nameidata *nd);
static int afs_mntpt_open(struct inode *inode, struct file *file);
-static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
static void afs_mntpt_expiry_timed_out(struct work_struct *work);
const struct file_operations afs_mntpt_file_operations = {
@@ -34,13 +33,11 @@ const struct file_operations afs_mntpt_file_operations = {
const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
- .follow_link = afs_mntpt_follow_link,
.readlink = page_readlink,
.getattr = afs_getattr,
};
const struct inode_operations afs_autocell_inode_operations = {
- .follow_link = afs_mntpt_follow_link,
.getattr = afs_getattr,
};
@@ -88,6 +85,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
_debug("symlink is a mountpoint");
spin_lock(&vnode->lock);
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+ vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
spin_unlock(&vnode->lock);
}
@@ -238,52 +236,24 @@ error_no_devname:
}
/*
- * follow a link from a mountpoint directory, thus causing it to be mounted
+ * handle an automount point
*/
-static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
+struct vfsmount *afs_d_automount(struct path *path)
{
struct vfsmount *newmnt;
- int err;
- _enter("%p{%s},{%s:%p{%s},}",
- dentry,
- dentry->d_name.name,
- nd->path.mnt->mnt_devname,
- dentry,
- nd->path.dentry->d_name.name);
-
- dput(nd->path.dentry);
- nd->path.dentry = dget(dentry);
-
- newmnt = afs_mntpt_do_automount(nd->path.dentry);
- if (IS_ERR(newmnt)) {
- path_put(&nd->path);
- return (void *)newmnt;
- }
+ _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name);
- mntget(newmnt);
- err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
- switch (err) {
- case 0:
- path_put(&nd->path);
- nd->path.mnt = newmnt;
- nd->path.dentry = dget(newmnt->mnt_root);
- queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
- afs_mntpt_expiry_timeout * HZ);
- break;
- case -EBUSY:
- /* someone else made a mount here whilst we were busy */
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path))
- ;
- err = 0;
- default:
- mntput(newmnt);
- break;
- }
+ newmnt = afs_mntpt_do_automount(path->dentry);
+ if (IS_ERR(newmnt))
+ return newmnt;
- _leave(" = %d", err);
- return ERR_PTR(err);
+ mntget(newmnt); /* prevent immediate expiration */
+ mnt_set_expiry(newmnt, &afs_vfsmounts);
+ queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
+ afs_mntpt_expiry_timeout * HZ);
+ _leave(" = %p {%s}", newmnt, newmnt->mnt_devname);
+ return newmnt;
}
/*
diff --git a/fs/aio.c b/fs/aio.c
index 5e00f15..fc557a3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -87,7 +87,7 @@ static int __init aio_setup(void)
aio_wq = create_workqueue("aio");
abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
- BUG_ON(!abe_pool);
+ BUG_ON(!aio_wq || !abe_pool);
pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index cbe57f3..c5567cb 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -233,7 +233,7 @@ static int __init anon_inode_init(void)
return 0;
err_mntput:
- mntput_long(anon_inode_mnt);
+ mntput(anon_inode_mnt);
err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type);
err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 0fffe1c..54f9237 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -88,18 +88,9 @@ struct autofs_info {
uid_t uid;
gid_t gid;
-
- mode_t mode;
- size_t size;
-
- void (*free)(struct autofs_info *);
- union {
- const char *symlink;
- } u;
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
-#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
struct autofs_wait_queue {
@@ -176,14 +167,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
return 0;
}
-static inline void autofs4_copy_atime(struct file *src, struct file *dst)
-{
- dst->f_path.dentry->d_inode->i_atime =
- src->f_path.dentry->d_inode->i_atime;
- return;
-}
-
-struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *);
+struct inode *autofs4_get_inode(struct super_block *, mode_t);
void autofs4_free_ino(struct autofs_info *);
/* Expiration */
@@ -212,16 +196,89 @@ void autofs_dev_ioctl_exit(void);
extern const struct inode_operations autofs4_symlink_inode_operations;
extern const struct inode_operations autofs4_dir_inode_operations;
-extern const struct inode_operations autofs4_root_inode_operations;
-extern const struct inode_operations autofs4_indirect_root_inode_operations;
-extern const struct inode_operations autofs4_direct_root_inode_operations;
extern const struct file_operations autofs4_dir_operations;
extern const struct file_operations autofs4_root_operations;
+extern const struct dentry_operations autofs4_dentry_operations;
+
+/* VFS automount flags management functions */
+
+static inline void __managed_dentry_set_automount(struct dentry *dentry)
+{
+ dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+}
+
+static inline void managed_dentry_set_automount(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_set_automount(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_clear_automount(struct dentry *dentry)
+{
+ dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
+}
+
+static inline void managed_dentry_clear_automount(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_automount(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_set_transit(struct dentry *dentry)
+{
+ dentry->d_flags |= DCACHE_MANAGE_TRANSIT;
+}
+
+static inline void managed_dentry_set_transit(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_set_transit(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_clear_transit(struct dentry *dentry)
+{
+ dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT;
+}
+
+static inline void managed_dentry_clear_transit(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_transit(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_set_managed(struct dentry *dentry)
+{
+ dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
+}
+
+static inline void managed_dentry_set_managed(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_set_managed(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void __managed_dentry_clear_managed(struct dentry *dentry)
+{
+ dentry->d_flags &= ~(DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
+}
+
+static inline void managed_dentry_clear_managed(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_managed(dentry);
+ spin_unlock(&dentry->d_lock);
+}
/* Initializing function */
int autofs4_fill_super(struct super_block *, void *, int);
-struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi, mode_t mode);
+struct autofs_info *autofs4_new_ino(struct autofs_sb_info *);
+void autofs4_clean_ino(struct autofs_info *);
/* Queue management functions */
@@ -229,19 +286,6 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
void autofs4_catatonic_mode(struct autofs_sb_info *);
-static inline int autofs4_follow_mount(struct path *path)
-{
- int res = 0;
-
- while (d_mountpoint(path->dentry)) {
- int followed = follow_down(path);
- if (!followed)
- break;
- res = 1;
- }
- return res;
-}
-
static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
{
return new_encode_dev(sbi->sb->s_dev);
@@ -294,5 +338,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry)
return;
}
-void autofs4_dentry_release(struct dentry *);
extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index eff9a41..1442da4 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -551,7 +551,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
err = have_submounts(path.dentry);
- if (follow_down(&path))
+ if (follow_down_one(&path))
magic = path.mnt->mnt_sb->s_magic;
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cc1d013..f43100b 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -26,10 +26,6 @@ static inline int autofs4_can_expire(struct dentry *dentry,
if (ino == NULL)
return 0;
- /* No point expiring a pending mount */
- if (ino->flags & AUTOFS_INF_PENDING)
- return 0;
-
if (!do_now) {
/* Too young to die */
if (!timeout || time_after(ino->last_used + timeout, now))
@@ -56,7 +52,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
path_get(&path);
- if (!follow_down(&path))
+ if (!follow_down_one(&path))
goto done;
if (is_autofs4_dentry(path.dentry)) {
@@ -100,7 +96,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev,
struct dentry *p, *ret;
if (prev == NULL)
- return dget(prev);
+ return dget(root);
spin_lock(&autofs4_lock);
relock:
@@ -137,7 +133,7 @@ again:
spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
/* Negative dentry - try next */
if (!simple_positive(ret)) {
- spin_unlock(&ret->d_lock);
+ spin_unlock(&p->d_lock);
p = ret;
goto again;
}
@@ -283,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
unsigned long timeout;
struct dentry *root = dget(sb->s_root);
int do_now = how & AUTOFS_EXP_IMMEDIATE;
+ struct autofs_info *ino;
if (!root)
return NULL;
@@ -291,19 +288,21 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
timeout = sbi->exp_timeout;
spin_lock(&sbi->fs_lock);
+ ino = autofs4_dentry_ino(root);
+ /* No point expiring a pending mount */
+ if (ino->flags & AUTOFS_INF_PENDING) {
+ spin_unlock(&sbi->fs_lock);
+ return NULL;
+ }
+ managed_dentry_set_transit(root);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
struct autofs_info *ino = autofs4_dentry_ino(root);
- if (d_mountpoint(root)) {
- ino->flags |= AUTOFS_INF_MOUNTPOINT;
- spin_lock(&root->d_lock);
- root->d_flags &= ~DCACHE_MOUNTED;
- spin_unlock(&root->d_lock);
- }
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
return root;
}
+ managed_dentry_clear_transit(root);
spin_unlock(&sbi->fs_lock);
dput(root);
@@ -340,6 +339,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
while ((dentry = get_next_positive_dentry(dentry, root))) {
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
+ /* No point expiring a pending mount */
+ if (ino->flags & AUTOFS_INF_PENDING)
+ goto cont;
+ managed_dentry_set_transit(dentry);
/*
* Case 1: (i) indirect mount or top level pseudo direct mount
@@ -399,6 +402,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
}
}
next:
+ managed_dentry_clear_transit(dentry);
+cont:
spin_unlock(&sbi->fs_lock);
}
return NULL;
@@ -479,6 +484,8 @@ int autofs4_expire_run(struct super_block *sb,
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
ino->flags &= ~AUTOFS_INF_EXPIRING;
+ if (!d_unhashed(dentry))
+ managed_dentry_clear_transit(dentry);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -504,18 +511,18 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
- spin_lock(&sb->s_root->d_lock);
- /*
- * If we haven't been expired away, then reset
- * mounted status.
- */
- if (mnt->mnt_parent != mnt)
- sb->s_root->d_flags |= DCACHE_MOUNTED;
- spin_unlock(&sb->s_root->d_lock);
- ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
- }
ino->flags &= ~AUTOFS_INF_EXPIRING;
+ spin_lock(&dentry->d_lock);
+ if (ret)
+ __managed_dentry_clear_transit(dentry);
+ else {
+ if ((IS_ROOT(dentry) ||
+ (autofs_type_indirect(sbi->type) &&
+ IS_ROOT(dentry->d_parent))) &&
+ !(dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+ __managed_dentry_set_automount(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
dput(dentry);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index a7bdb9d..180fa24 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -22,77 +22,27 @@
#include "autofs_i.h"
#include <linux/module.h>
-static void ino_lnkfree(struct autofs_info *ino)
+struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
{
- if (ino->u.symlink) {
- kfree(ino->u.symlink);
- ino->u.symlink = NULL;
- }
-}
-
-struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
- struct autofs_sb_info *sbi, mode_t mode)
-{
- int reinit = 1;
-
- if (ino == NULL) {
- reinit = 0;
- ino = kmalloc(sizeof(*ino), GFP_KERNEL);
- }
-
- if (ino == NULL)
- return NULL;
-
- if (!reinit) {
- ino->flags = 0;
- ino->inode = NULL;
- ino->dentry = NULL;
- ino->size = 0;
+ struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL);
+ if (ino) {
INIT_LIST_HEAD(&ino->active);
- ino->active_count = 0;
INIT_LIST_HEAD(&ino->expiring);
- atomic_set(&ino->count, 0);
+ ino->last_used = jiffies;
+ ino->sbi = sbi;
}
+ return ino;
+}
+void autofs4_clean_ino(struct autofs_info *ino)
+{
ino->uid = 0;
ino->gid = 0;
- ino->mode = mode;
ino->last_used = jiffies;
-
- ino->sbi = sbi;
-
- if (reinit && ino->free)
- (ino->free)(ino);
-
- memset(&ino->u, 0, sizeof(ino->u));
-
- ino->free = NULL;
-
- if (S_ISLNK(mode))
- ino->free = ino_lnkfree;
-
- return ino;
}
void autofs4_free_ino(struct autofs_info *ino)
{
- struct autofs_info *p_ino;
-
- if (ino->dentry) {
- ino->dentry->d_fsdata = NULL;
- if (ino->dentry->d_inode) {
- struct dentry *parent = ino->dentry->d_parent;
- if (atomic_dec_and_test(&ino->count)) {
- p_ino = autofs4_dentry_ino(parent);
- if (p_ino && parent != ino->dentry)
- atomic_dec(&p_ino->count);
- }
- dput(ino->dentry);
- }
- ino->dentry = NULL;
- }
- if (ino->free)
- (ino->free)(ino);
kfree(ino);
}
@@ -148,9 +98,16 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
+static void autofs4_evict_inode(struct inode *inode)
+{
+ end_writeback(inode);
+ kfree(inode->i_private);
+}
+
static const struct super_operations autofs4_sops = {
.statfs = simple_statfs,
.show_options = autofs4_show_options,
+ .evict_inode = autofs4_evict_inode,
};
enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
@@ -240,21 +197,6 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
return (*pipefd < 0);
}
-static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi)
-{
- struct autofs_info *ino;
-
- ino = autofs4_init_ino(NULL, sbi, S_IFDIR | 0755);
- if (!ino)
- return NULL;
-
- return ino;
-}
-
-static const struct dentry_operations autofs4_sb_dentry_operations = {
- .d_release = autofs4_dentry_release,
-};
-
int autofs4_fill_super(struct super_block *s, void *data, int silent)
{
struct inode * root_inode;
@@ -292,15 +234,16 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
s->s_blocksize_bits = 10;
s->s_magic = AUTOFS_SUPER_MAGIC;
s->s_op = &autofs4_sops;
+ s->s_d_op = &autofs4_dentry_operations;
s->s_time_gran = 1;
/*
* Get the root inode and dentry, but defer checking for errors.
*/
- ino = autofs4_mkroot(sbi);
+ ino = autofs4_new_ino(sbi);
if (!ino)
goto fail_free;
- root_inode = autofs4_get_inode(s, ino);
+ root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
if (!root_inode)
goto fail_ino;
@@ -309,7 +252,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
goto fail_iput;
pipe = NULL;
- d_set_d_op(root, &autofs4_sb_dentry_operations);
root->d_fsdata = ino;
/* Can this call block? */
@@ -320,10 +262,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
goto fail_dput;
}
+ if (autofs_type_trigger(sbi->type))
+ __managed_dentry_set_managed(root);
+
root_inode->i_fop = &autofs4_root_operations;
- root_inode->i_op = autofs_type_trigger(sbi->type) ?
- &autofs4_direct_root_inode_operations :
- &autofs4_indirect_root_inode_operations;
+ root_inode->i_op = &autofs4_dir_inode_operations;
/* Couldn't this be tested earlier? */
if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
@@ -383,16 +326,14 @@ fail_unlock:
return -EINVAL;
}
-struct inode *autofs4_get_inode(struct super_block *sb,
- struct autofs_info *inf)
+struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
{
struct inode *inode = new_inode(sb);
if (inode == NULL)
return NULL;
- inf->inode = inode;
- inode->i_mode = inf->mode;
+ inode->i_mode = mode;
if (sb->s_root) {
inode->i_uid = sb->s_root->d_inode->i_uid;
inode->i_gid = sb->s_root->d_inode->i_gid;
@@ -400,12 +341,11 @@ struct inode *autofs4_get_inode(struct super_block *sb,
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = get_next_ino();
- if (S_ISDIR(inf->mode)) {
+ if (S_ISDIR(mode)) {
inode->i_nlink = 2;
inode->i_op = &autofs4_dir_inode_operations;
inode->i_fop = &autofs4_dir_operations;
- } else if (S_ISLNK(inf->mode)) {
- inode->i_size = inf->size;
+ } else if (S_ISLNK(mode)) {
inode->i_op = &autofs4_symlink_inode_operations;
}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 651e4ef..014e7ab 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -35,10 +35,9 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
#endif
static int autofs4_dir_open(struct inode *inode, struct file *file);
static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
-static void *autofs4_follow_link(struct dentry *, struct nameidata *);
-
-#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
-#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
+static struct vfsmount *autofs4_d_automount(struct path *);
+static int autofs4_d_manage(struct dentry *, bool, bool);
+static void autofs4_dentry_release(struct dentry *);
const struct file_operations autofs4_root_operations = {
.open = dcache_dir_open,
@@ -60,7 +59,7 @@ const struct file_operations autofs4_dir_operations = {
.llseek = dcache_dir_lseek,
};
-const struct inode_operations autofs4_indirect_root_inode_operations = {
+const struct inode_operations autofs4_dir_inode_operations = {
.lookup = autofs4_lookup,
.unlink = autofs4_dir_unlink,
.symlink = autofs4_dir_symlink,
@@ -68,20 +67,10 @@ const struct inode_operations autofs4_indirect_root_inode_operations = {
.rmdir = autofs4_dir_rmdir,
};
-const struct inode_operations autofs4_direct_root_inode_operations = {
- .lookup = autofs4_lookup,
- .unlink = autofs4_dir_unlink,
- .mkdir = autofs4_dir_mkdir,
- .rmdir = autofs4_dir_rmdir,
- .follow_link = autofs4_follow_link,
-};
-
-const struct inode_operations autofs4_dir_inode_operations = {
- .lookup = autofs4_lookup,
- .unlink = autofs4_dir_unlink,
- .symlink = autofs4_dir_symlink,
- .mkdir = autofs4_dir_mkdir,
- .rmdir = autofs4_dir_rmdir,
+const struct dentry_operations autofs4_dentry_operations = {
+ .d_automount = autofs4_d_automount,
+ .d_manage = autofs4_d_manage,
+ .d_release = autofs4_dentry_release,
};
static void autofs4_add_active(struct dentry *dentry)
@@ -116,14 +105,6 @@ static void autofs4_del_active(struct dentry *dentry)
return;
}
-static unsigned int autofs4_need_mount(unsigned int flags)
-{
- unsigned int res = 0;
- if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS))
- res = 1;
- return res;
-}
-
static int autofs4_dir_open(struct inode *inode, struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
@@ -158,278 +139,27 @@ out:
return dcache_dir_open(inode, file);
}
-static int try_to_fill_dentry(struct dentry *dentry, int flags)
-{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
- int status;
-
- DPRINTK("dentry=%p %.*s ino=%p",
- dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
-
- /*
- * Wait for a pending mount, triggering one if there
- * isn't one already
- */
- if (dentry->d_inode == NULL) {
- DPRINTK("waiting for mount name=%.*s",
- dentry->d_name.len, dentry->d_name.name);
-
- status = autofs4_wait(sbi, dentry, NFY_MOUNT);
-
- DPRINTK("mount done status=%d", status);
-
- /* Turn this into a real negative dentry? */
- if (status == -ENOENT) {
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- return status;
- } else if (status) {
- /* Return a negative dentry, but leave it "pending" */
- return status;
- }
- /* Trigger mount for path component or follow link */
- } else if (ino->flags & AUTOFS_INF_PENDING ||
- autofs4_need_mount(flags)) {
- DPRINTK("waiting for mount name=%.*s",
- dentry->d_name.len, dentry->d_name.name);
-
- spin_lock(&sbi->fs_lock);
- ino->flags |= AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- status = autofs4_wait(sbi, dentry, NFY_MOUNT);
-
- DPRINTK("mount done status=%d", status);
-
- if (status) {
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- return status;
- }
- }
-
- /* Initialize expiry counter after successful mount */
- ino->last_used = jiffies;
-
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
-
- return 0;
-}
-
-/* For autofs direct mounts the follow link triggers the mount */
-static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
- int oz_mode = autofs4_oz_mode(sbi);
- unsigned int lookup_type;
- int status;
-
- DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
- dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
- nd->flags);
- /*
- * For an expire of a covered direct or offset mount we need
- * to break out of follow_down() at the autofs mount trigger
- * (d_mounted--), so we can see the expiring flag, and manage
- * the blocking and following here until the expire is completed.
- */
- if (oz_mode) {
- spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_EXPIRING) {
- spin_unlock(&sbi->fs_lock);
- /* Follow down to our covering mount. */
- if (!follow_down(&nd->path))
- goto done;
- goto follow;
- }
- spin_unlock(&sbi->fs_lock);
- goto done;
- }
-
- /* If an expire request is pending everyone must wait. */
- autofs4_expire_wait(dentry);
-
- /* We trigger a mount for almost all flags */
- lookup_type = autofs4_need_mount(nd->flags);
- spin_lock(&sbi->fs_lock);
- spin_lock(&autofs4_lock);
- spin_lock(&dentry->d_lock);
- if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
- spin_unlock(&sbi->fs_lock);
- goto follow;
- }
-
- /*
- * If the dentry contains directories then it is an autofs
- * multi-mount with no root mount offset. So don't try to
- * mount it again.
- */
- if (ino->flags & AUTOFS_INF_PENDING ||
- (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
- spin_unlock(&sbi->fs_lock);
-
- status = try_to_fill_dentry(dentry, nd->flags);
- if (status)
- goto out_error;
-
- goto follow;
- }
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
- spin_unlock(&sbi->fs_lock);
-follow:
- /*
- * If there is no root mount it must be an autofs
- * multi-mount with no root offset so we don't need
- * to follow it.
- */
- if (d_mountpoint(dentry)) {
- if (!autofs4_follow_mount(&nd->path)) {
- status = -ENOENT;
- goto out_error;
- }
- }
-
-done:
- return NULL;
-
-out_error:
- path_put(&nd->path);
- return ERR_PTR(status);
-}
-
-/*
- * Revalidate is called on every cache lookup. Some of those
- * cache lookups may actually happen while the dentry is not
- * yet completely filled in, and revalidate has to delay such
- * lookups..
- */
-static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
+static void autofs4_dentry_release(struct dentry *de)
{
- struct inode *dir;
- struct autofs_sb_info *sbi;
- int oz_mode;
- int flags = nd ? nd->flags : 0;
- int status = 1;
-
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- dir = dentry->d_parent->d_inode;
- sbi = autofs4_sbi(dir->i_sb);
- oz_mode = autofs4_oz_mode(sbi);
-
- /* Pending dentry */
- spin_lock(&sbi->fs_lock);
- if (autofs4_ispending(dentry)) {
- /* The daemon never causes a mount to trigger */
- spin_unlock(&sbi->fs_lock);
-
- if (oz_mode)
- return 1;
-
- /*
- * If the directory has gone away due to an expire
- * we have been called as ->d_revalidate() and so
- * we need to return false and proceed to ->lookup().
- */
- if (autofs4_expire_wait(dentry) == -EAGAIN)
- return 0;
-
- /*
- * A zero status is success otherwise we have a
- * negative error code.
- */
- status = try_to_fill_dentry(dentry, flags);
- if (status == 0)
- return 1;
-
- return status;
- }
- spin_unlock(&sbi->fs_lock);
-
- /* Negative dentry.. invalidate if "old" */
- if (dentry->d_inode == NULL)
- return 0;
-
- /* Check for a non-mountpoint directory with no contents */
- spin_lock(&autofs4_lock);
- spin_lock(&dentry->d_lock);
- if (S_ISDIR(dentry->d_inode->i_mode) &&
- !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
- DPRINTK("dentry=%p %.*s, emptydir",
- dentry, dentry->d_name.len, dentry->d_name.name);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
-
- /* The daemon never causes a mount to trigger */
- if (oz_mode)
- return 1;
-
- /*
- * A zero status is success otherwise we have a
- * negative error code.
- */
- status = try_to_fill_dentry(dentry, flags);
- if (status == 0)
- return 1;
-
- return status;
- }
- spin_unlock(&dentry->d_lock);
- spin_unlock(&autofs4_lock);
-
- return 1;
-}
-
-void autofs4_dentry_release(struct dentry *de)
-{
- struct autofs_info *inf;
+ struct autofs_info *ino = autofs4_dentry_ino(de);
+ struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
DPRINTK("releasing %p", de);
- inf = autofs4_dentry_ino(de);
- de->d_fsdata = NULL;
-
- if (inf) {
- struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
-
- if (sbi) {
- spin_lock(&sbi->lookup_lock);
- if (!list_empty(&inf->active))
- list_del(&inf->active);
- if (!list_empty(&inf->expiring))
- list_del(&inf->expiring);
- spin_unlock(&sbi->lookup_lock);
- }
-
- inf->dentry = NULL;
- inf->inode = NULL;
+ if (!ino)
+ return;
- autofs4_free_ino(inf);
+ if (sbi) {
+ spin_lock(&sbi->lookup_lock);
+ if (!list_empty(&ino->active))
+ list_del(&ino->active);
+ if (!list_empty(&ino->expiring))
+ list_del(&ino->expiring);
+ spin_unlock(&sbi->lookup_lock);
}
-}
-/* For dentries of directories in the root dir */
-static const struct dentry_operations autofs4_root_dentry_operations = {
- .d_revalidate = autofs4_revalidate,
- .d_release = autofs4_dentry_release,
-};
-
-/* For other dentries */
-static const struct dentry_operations autofs4_dentry_operations = {
- .d_revalidate = autofs4_revalidate,
- .d_release = autofs4_dentry_release,
-};
+ autofs4_free_ino(ino);
+}
static struct dentry *autofs4_lookup_active(struct dentry *dentry)
{
@@ -541,51 +271,246 @@ next:
return NULL;
}
+static int autofs4_mount_wait(struct dentry *dentry)
+{
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ int status;
+
+ if (ino->flags & AUTOFS_INF_PENDING) {
+ DPRINTK("waiting for mount name=%.*s",
+ dentry->d_name.len, dentry->d_name.name);
+ status = autofs4_wait(sbi, dentry, NFY_MOUNT);
+ DPRINTK("mount wait done status=%d", status);
+ ino->last_used = jiffies;
+ return status;
+ }
+ return 0;
+}
+
+static int do_expire_wait(struct dentry *dentry)
+{
+ struct dentry *expiring;
+
+ expiring = autofs4_lookup_expiring(dentry);
+ if (!expiring)
+ return autofs4_expire_wait(dentry);
+ else {
+ /*
+ * If we are racing with expire the request might not
+ * be quite complete, but the directory has been removed
+ * so it must have been successful, just wait for it.
+ */
+ autofs4_expire_wait(expiring);
+ autofs4_del_expiring(expiring);
+ dput(expiring);
+ }
+ return 0;
+}
+
+static struct dentry *autofs4_mountpoint_changed(struct path *path)
+{
+ struct dentry *dentry = path->dentry;
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+
+ /*
+ * If this is an indirect mount the dentry could have gone away
+ * as a result of an expire and a new one created.
+ */
+ if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
+ struct dentry *parent = dentry->d_parent;
+ struct dentry *new = d_lookup(parent, &dentry->d_name);
+ if (!new)
+ return NULL;
+ dput(path->dentry);
+ path->dentry = new;
+ }
+ return path->dentry;
+}
+
+static struct vfsmount *autofs4_d_automount(struct path *path)
+{
+ struct dentry *dentry = path->dentry;
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ int status;
+
+ DPRINTK("dentry=%p %.*s",
+ dentry, dentry->d_name.len, dentry->d_name.name);
+
+ /*
+ * Someone may have manually umounted this or it was a submount
+ * that has gone away.
+ */
+ spin_lock(&dentry->d_lock);
+ if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
+ if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
+ (dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+ __managed_dentry_set_transit(path->dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+
+ /* The daemon never triggers a mount. */
+ if (autofs4_oz_mode(sbi))
+ return NULL;
+
+ /*
+ * If an expire request is pending everyone must wait.
+ * If the expire fails we're still mounted so continue
+ * the follow and return. A return of -EAGAIN (which only
+ * happens with indirect mounts) means the expire completed
+ * and the directory was removed, so just go ahead and try
+ * the mount.
+ */
+ status = do_expire_wait(dentry);
+ if (status && status != -EAGAIN)
+ return NULL;
+
+ /* Callback to the daemon to perform the mount or wait */
+ spin_lock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_PENDING) {
+ spin_unlock(&sbi->fs_lock);
+ status = autofs4_mount_wait(dentry);
+ if (status)
+ return ERR_PTR(status);
+ spin_lock(&sbi->fs_lock);
+ goto done;
+ }
+
+ /*
+ * If the dentry is a symlink it's equivalent to a directory
+ * having d_mountpoint() true, so there's no need to call back
+ * to the daemon.
+ */
+ if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))
+ goto done;
+ if (!d_mountpoint(dentry)) {
+ /*
+ * It's possible that user space hasn't removed directories
+ * after umounting a rootless multi-mount, although it
+ * should. For v5 have_submounts() is sufficient to handle
+ * this because the leaves of the directory tree under the
+ * mount never trigger mounts themselves (they have an autofs
+ * trigger mount mounted on them). But v4 pseudo direct mounts
+ * do need the leaves to to trigger mounts. In this case we
+ * have no choice but to use the list_empty() check and
+ * require user space behave.
+ */
+ if (sbi->version > 4) {
+ if (have_submounts(dentry))
+ goto done;
+ } else {
+ spin_lock(&dentry->d_lock);
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&dentry->d_lock);
+ goto done;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ ino->flags |= AUTOFS_INF_PENDING;
+ spin_unlock(&sbi->fs_lock);
+ status = autofs4_mount_wait(dentry);
+ if (status)
+ return ERR_PTR(status);
+ spin_lock(&sbi->fs_lock);
+ ino->flags &= ~AUTOFS_INF_PENDING;
+ }
+done:
+ if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
+ /*
+ * Any needed mounting has been completed and the path updated
+ * so turn this into a normal dentry so we don't continually
+ * call ->d_automount() and ->d_manage().
+ */
+ spin_lock(&dentry->d_lock);
+ __managed_dentry_clear_transit(dentry);
+ /*
+ * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
+ * symlinks as in all other cases the dentry will be covered by
+ * an actual mount so ->d_automount() won't be called during
+ * the follow.
+ */
+ if ((!d_mountpoint(dentry) &&
+ !list_empty(&dentry->d_subdirs)) ||
+ (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)))
+ __managed_dentry_clear_automount(dentry);
+ spin_unlock(&dentry->d_lock);
+ }
+ spin_unlock(&sbi->fs_lock);
+
+ /* Mount succeeded, check if we ended up with a new dentry */
+ dentry = autofs4_mountpoint_changed(path);
+ if (!dentry)
+ return ERR_PTR(-ENOENT);
+
+ return NULL;
+}
+
+int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk)
+{
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+
+ DPRINTK("dentry=%p %.*s",
+ dentry, dentry->d_name.len, dentry->d_name.name);
+
+ /* The daemon never waits. */
+ if (autofs4_oz_mode(sbi) || mounting_here) {
+ if (!d_mountpoint(dentry))
+ return -EISDIR;
+ return 0;
+ }
+
+ /* We need to sleep, so we need pathwalk to be in ref-mode */
+ if (rcu_walk)
+ return -ECHILD;
+
+ /* Wait for pending expires */
+ do_expire_wait(dentry);
+
+ /*
+ * This dentry may be under construction so wait on mount
+ * completion.
+ */
+ return autofs4_mount_wait(dentry);
+}
+
/* Lookups in the root directory */
static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct autofs_sb_info *sbi;
struct autofs_info *ino;
- struct dentry *expiring, *active;
- int oz_mode;
+ struct dentry *active;
- DPRINTK("name = %.*s",
- dentry->d_name.len, dentry->d_name.name);
+ DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name);
/* File name too long to exist */
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
sbi = autofs4_sbi(dir->i_sb);
- oz_mode = autofs4_oz_mode(sbi);
DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
- current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
+ current->pid, task_pgrp_nr(current), sbi->catatonic,
+ autofs4_oz_mode(sbi));
active = autofs4_lookup_active(dentry);
if (active) {
- dentry = active;
- ino = autofs4_dentry_ino(dentry);
+ return active;
} else {
/*
- * Mark the dentry incomplete but don't hash it. We do this
- * to serialize our inode creation operations (symlink and
- * mkdir) which prevents deadlock during the callback to
- * the daemon. Subsequent user space lookups for the same
- * dentry are placed on the wait queue while the daemon
- * itself is allowed passage unresticted so the create
- * operation itself can then hash the dentry. Finally,
- * we check for the hashed dentry and return the newly
- * hashed dentry.
+ * A dentry that is not within the root can never trigger a
+ * mount operation, unless the directory already exists, so we
+ * can return fail immediately. The daemon however does need
+ * to create directories within the file system.
*/
- d_set_d_op(dentry, &autofs4_root_dentry_operations);
+ if (!autofs4_oz_mode(sbi) && !IS_ROOT(dentry->d_parent))
+ return ERR_PTR(-ENOENT);
- /*
- * And we need to ensure that the same dentry is used for
- * all following lookup calls until it is hashed so that
- * the dentry flags are persistent throughout the request.
- */
- ino = autofs4_init_ino(NULL, sbi, 0555);
+ /* Mark entries in the root as mount triggers */
+ if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent))
+ __managed_dentry_set_managed(dentry);
+
+ ino = autofs4_new_ino(sbi);
if (!ino)
return ERR_PTR(-ENOMEM);
@@ -596,82 +521,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
d_instantiate(dentry, NULL);
}
-
- if (!oz_mode) {
- mutex_unlock(&dir->i_mutex);
- expiring = autofs4_lookup_expiring(dentry);
- if (expiring) {
- /*
- * If we are racing with expire the request might not
- * be quite complete but the directory has been removed
- * so it must have been successful, so just wait for it.
- */
- autofs4_expire_wait(expiring);
- autofs4_del_expiring(expiring);
- dput(expiring);
- }
-
- spin_lock(&sbi->fs_lock);
- ino->flags |= AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- if (dentry->d_op && dentry->d_op->d_revalidate)
- (dentry->d_op->d_revalidate)(dentry, nd);
- mutex_lock(&dir->i_mutex);
- }
-
- /*
- * If we are still pending, check if we had to handle
- * a signal. If so we can force a restart..
- */
- if (ino->flags & AUTOFS_INF_PENDING) {
- /* See if we were interrupted */
- if (signal_pending(current)) {
- sigset_t *sigset = &current->pending.signal;
- if (sigismember (sigset, SIGKILL) ||
- sigismember (sigset, SIGQUIT) ||
- sigismember (sigset, SIGINT)) {
- if (active)
- dput(active);
- return ERR_PTR(-ERESTARTNOINTR);
- }
- }
- if (!oz_mode) {
- spin_lock(&sbi->fs_lock);
- ino->flags &= ~AUTOFS_INF_PENDING;
- spin_unlock(&sbi->fs_lock);
- }
- }
-
- /*
- * If this dentry is unhashed, then we shouldn't honour this
- * lookup. Returning ENOENT here doesn't do the right thing
- * for all system calls, but it should be OK for the operations
- * we permit from an autofs.
- */
- if (!oz_mode && d_unhashed(dentry)) {
- /*
- * A user space application can (and has done in the past)
- * remove and re-create this directory during the callback.
- * This can leave us with an unhashed dentry, but a
- * successful mount! So we need to perform another
- * cached lookup in case the dentry now exists.
- */
- struct dentry *parent = dentry->d_parent;
- struct dentry *new = d_lookup(parent, &dentry->d_name);
- if (new != NULL)
- dentry = new;
- else
- dentry = ERR_PTR(-ENOENT);
-
- if (active)
- dput(active);
-
- return dentry;
- }
-
- if (active)
- return active;
-
return NULL;
}
@@ -683,6 +532,7 @@ static int autofs4_dir_symlink(struct inode *dir,
struct autofs_info *ino = autofs4_dentry_ino(dentry);
struct autofs_info *p_ino;
struct inode *inode;
+ size_t size = strlen(symname);
char *cp;
DPRINTK("%s <- %.*s", symname,
@@ -691,45 +541,35 @@ static int autofs4_dir_symlink(struct inode *dir,
if (!autofs4_oz_mode(sbi))
return -EACCES;
- ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
- if (!ino)
- return -ENOMEM;
+ BUG_ON(!ino);
+
+ autofs4_clean_ino(ino);
autofs4_del_active(dentry);
- ino->size = strlen(symname);
- cp = kmalloc(ino->size + 1, GFP_KERNEL);
- if (!cp) {
- if (!dentry->d_fsdata)
- kfree(ino);
+ cp = kmalloc(size + 1, GFP_KERNEL);
+ if (!cp)
return -ENOMEM;
- }
strcpy(cp, symname);
- inode = autofs4_get_inode(dir->i_sb, ino);
+ inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555);
if (!inode) {
kfree(cp);
if (!dentry->d_fsdata)
kfree(ino);
return -ENOMEM;
}
+ inode->i_private = cp;
+ inode->i_size = size;
d_add(dentry, inode);
- if (dir == dir->i_sb->s_root->d_inode)
- d_set_d_op(dentry, &autofs4_root_dentry_operations);
- else
- d_set_d_op(dentry, &autofs4_dentry_operations);
-
- dentry->d_fsdata = ino;
- ino->dentry = dget(dentry);
+ dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
atomic_inc(&p_ino->count);
- ino->inode = inode;
- ino->u.symlink = cp;
dir->i_mtime = CURRENT_TIME;
return 0;
@@ -782,6 +622,58 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
return 0;
}
+/*
+ * Version 4 of autofs provides a pseudo direct mount implementation
+ * that relies on directories at the leaves of a directory tree under
+ * an indirect mount to trigger mounts. To allow for this we need to
+ * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves
+ * of the directory tree. There is no need to clear the automount flag
+ * following a mount or restore it after an expire because these mounts
+ * are always covered. However, it is neccessary to ensure that these
+ * flags are clear on non-empty directories to avoid unnecessary calls
+ * during path walks.
+ */
+static void autofs_set_leaf_automount_flags(struct dentry *dentry)
+{
+ struct dentry *parent;
+
+ /* root and dentrys in the root are already handled */
+ if (IS_ROOT(dentry->d_parent))
+ return;
+
+ managed_dentry_set_managed(dentry);
+
+ parent = dentry->d_parent;
+ /* only consider parents below dentrys in the root */
+ if (IS_ROOT(parent->d_parent))
+ return;
+ managed_dentry_clear_managed(parent);
+ return;
+}
+
+static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
+{
+ struct list_head *d_child;
+ struct dentry *parent;
+
+ /* flags for dentrys in the root are handled elsewhere */
+ if (IS_ROOT(dentry->d_parent))
+ return;
+
+ managed_dentry_clear_managed(dentry);
+
+ parent = dentry->d_parent;
+ /* only consider parents below dentrys in the root */
+ if (IS_ROOT(parent->d_parent))
+ return;
+ d_child = &dentry->d_u.d_child;
+ /* Set parent managed if it's becoming empty */
+ if (d_child->next == &parent->d_subdirs &&
+ d_child->prev == &parent->d_subdirs)
+ managed_dentry_set_managed(parent);
+ return;
+}
+
static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
@@ -809,6 +701,9 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
spin_unlock(&autofs4_lock);
+ if (sbi->version < 5)
+ autofs_clear_leaf_automount_flags(dentry);
+
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
@@ -837,32 +732,25 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
DPRINTK("dentry %p, creating %.*s",
dentry, dentry->d_name.len, dentry->d_name.name);
- ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
- if (!ino)
- return -ENOMEM;
+ BUG_ON(!ino);
+
+ autofs4_clean_ino(ino);
autofs4_del_active(dentry);
- inode = autofs4_get_inode(dir->i_sb, ino);
- if (!inode) {
- if (!dentry->d_fsdata)
- kfree(ino);
+ inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555);
+ if (!inode)
return -ENOMEM;
- }
d_add(dentry, inode);
- if (dir == dir->i_sb->s_root->d_inode)
- d_set_d_op(dentry, &autofs4_root_dentry_operations);
- else
- d_set_d_op(dentry, &autofs4_dentry_operations);
+ if (sbi->version < 5)
+ autofs_set_leaf_automount_flags(dentry);
- dentry->d_fsdata = ino;
- ino->dentry = dget(dentry);
+ dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
if (p_ino && dentry->d_parent != dentry)
atomic_inc(&p_ino->count);
- ino->inode = inode;
inc_nlink(dir);
dir->i_mtime = CURRENT_TIME;
@@ -944,8 +832,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
int is_autofs4_dentry(struct dentry *dentry)
{
return dentry && dentry->d_inode &&
- (dentry->d_op == &autofs4_root_dentry_operations ||
- dentry->d_op == &autofs4_dentry_operations) &&
+ dentry->d_op == &autofs4_dentry_operations &&
dentry->d_fsdata != NULL;
}
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index b4ea829..f27c094 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -14,8 +14,7 @@
static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
- nd_set_link(nd, (char *)ino->u.symlink);
+ nd_set_link(nd, dentry->d_inode->i_private);
return NULL;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index c5f8459..5601005 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -309,6 +309,9 @@ static int validate_request(struct autofs_wait_queue **wait,
* completed while we waited on the mutex ...
*/
if (notify == NFY_MOUNT) {
+ struct dentry *new = NULL;
+ int valid = 1;
+
/*
* If the dentry was successfully mounted while we slept
* on the wait queue mutex we can return success. If it
@@ -316,8 +319,20 @@ static int validate_request(struct autofs_wait_queue **wait,
* a multi-mount with no mount at it's base) we can
* continue on and create a new request.
*/
+ if (!IS_ROOT(dentry)) {
+ if (dentry->d_inode && d_unhashed(dentry)) {
+ struct dentry *parent = dentry->d_parent;
+ new = d_lookup(parent, &dentry->d_name);
+ if (new)
+ dentry = new;
+ }
+ }
if (have_submounts(dentry))
- return 0;
+ valid = 0;
+
+ if (new)
+ dput(new);
+ return valid;
}
return 1;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 7bb3c02..ecb9fd3 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,8 @@ config BTRFS_FS
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a35eb36..31610ea 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- export.o tree-log.o acl.o free-space-cache.o zlib.o \
+ export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6ae2c8c..15b5ca2 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, value, size);
if (size > 0) {
acl = posix_acl_from_xattr(value, size);
- if (IS_ERR(acl))
+ if (IS_ERR(acl)) {
+ kfree(value);
return acl;
+ }
set_cached_acl(inode, type, acl);
}
kfree(value);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6ad63f1..ccc991c 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -157,7 +157,7 @@ struct btrfs_inode {
/*
* always compress this one file
*/
- unsigned force_compress:1;
+ unsigned force_compress:4;
struct inode vfs_inode;
};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b50bc4b..f745287 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -62,6 +62,9 @@ struct compressed_bio {
/* number of bytes on disk */
unsigned long compressed_len;
+ /* the compression algorithm for this bio */
+ int compress_type;
+
/* number of compressed pages in the array */
unsigned long nr_pages;
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
- cb->start,
- cb->orig_bio->bi_io_vec,
- cb->orig_bio->bi_vcnt,
- cb->compressed_len);
+ ret = btrfs_decompress_biovec(cb->compress_type,
+ cb->compressed_pages,
+ cb->start,
+ cb->orig_bio->bi_io_vec,
+ cb->orig_bio->bi_vcnt,
+ cb->compressed_len);
csum_failed:
if (ret)
cb->errors = 1;
@@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = uncompressed_len;
cb->compressed_len = compressed_len;
+ cb->compress_type = extent_compress_type(bio_flags);
cb->orig_bio = bio;
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
@@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_put(comp_bio);
return 0;
}
+
+static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
+static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
+static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
+static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
+static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
+
+struct btrfs_compress_op *btrfs_compress_op[] = {
+ &btrfs_zlib_compress,
+ &btrfs_lzo_compress,
+};
+
+int __init btrfs_init_compress(void)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+ INIT_LIST_HEAD(&comp_idle_workspace[i]);
+ spin_lock_init(&comp_workspace_lock[i]);
+ atomic_set(&comp_alloc_workspace[i], 0);
+ init_waitqueue_head(&comp_workspace_wait[i]);
+ }
+ return 0;
+}
+
+/*
+ * this finds an available workspace or allocates a new one
+ * ERR_PTR is returned if things go bad.
+ */
+static struct list_head *find_workspace(int type)
+{
+ struct list_head *workspace;
+ int cpus = num_online_cpus();
+ int idx = type - 1;
+
+ struct list_head *idle_workspace = &comp_idle_workspace[idx];
+ spinlock_t *workspace_lock = &comp_workspace_lock[idx];
+ atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
+ wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
+ int *num_workspace = &comp_num_workspace[idx];
+again:
+ spin_lock(workspace_lock);
+ if (!list_empty(idle_workspace)) {
+ workspace = idle_workspace->next;
+ list_del(workspace);
+ (*num_workspace)--;
+ spin_unlock(workspace_lock);
+ return workspace;
+
+ }
+ if (atomic_read(alloc_workspace) > cpus) {
+ DEFINE_WAIT(wait);
+
+ spin_unlock(workspace_lock);
+ prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
+ if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
+ schedule();
+ finish_wait(workspace_wait, &wait);
+ goto again;
+ }
+ atomic_inc(alloc_workspace);
+ spin_unlock(workspace_lock);
+
+ workspace = btrfs_compress_op[idx]->alloc_workspace();
+ if (IS_ERR(workspace)) {
+ atomic_dec(alloc_workspace);
+ wake_up(workspace_wait);
+ }
+ return workspace;
+}
+
+/*
+ * put a workspace struct back on the list or free it if we have enough
+ * idle ones sitting around
+ */
+static void free_workspace(int type, struct list_head *workspace)
+{
+ int idx = type - 1;
+ struct list_head *idle_workspace = &comp_idle_workspace[idx];
+ spinlock_t *workspace_lock = &comp_workspace_lock[idx];
+ atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
+ wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
+ int *num_workspace = &comp_num_workspace[idx];
+
+ spin_lock(workspace_lock);
+ if (*num_workspace < num_online_cpus()) {
+ list_add_tail(workspace, idle_workspace);
+ (*num_workspace)++;
+ spin_unlock(workspace_lock);
+ goto wake;
+ }
+ spin_unlock(workspace_lock);
+
+ btrfs_compress_op[idx]->free_workspace(workspace);
+ atomic_dec(alloc_workspace);
+wake:
+ if (waitqueue_active(workspace_wait))
+ wake_up(workspace_wait);
+}
+
+/*
+ * cleanup function for module exit
+ */
+static void free_workspaces(void)
+{
+ struct list_head *workspace;
+ int i;
+
+ for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+ while (!list_empty(&comp_idle_workspace[i])) {
+ workspace = comp_idle_workspace[i].next;
+ list_del(workspace);
+ btrfs_compress_op[i]->free_workspace(workspace);
+ atomic_dec(&comp_alloc_workspace[i]);
+ }
+ }
+}
+
+/*
+ * given an address space and start/len, compress the bytes.
+ *
+ * pages are allocated to hold the compressed result and stored
+ * in 'pages'
+ *
+ * out_pages is used to return the number of pages allocated. There
+ * may be pages allocated even if we return an error
+ *
+ * total_in is used to return the number of bytes actually read. It
+ * may be smaller then len if we had to exit early because we
+ * ran out of room in the pages array or because we cross the
+ * max_out threshold.
+ *
+ * total_out is used to return the total number of compressed bytes
+ *
+ * max_out tells us the max number of bytes that we're allowed to
+ * stuff into pages
+ */
+int btrfs_compress_pages(int type, struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -1;
+
+ ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
+ start, len, pages,
+ nr_dest_pages, out_pages,
+ total_in, total_out,
+ max_out);
+ free_workspace(type, workspace);
+ return ret;
+}
+
+/*
+ * pages_in is an array of pages with compressed data.
+ *
+ * disk_start is the starting logical offset of this array in the file
+ *
+ * bvec is a bio_vec of pages from the file that we want to decompress into
+ *
+ * vcnt is the count of pages in the biovec
+ *
+ * srclen is the number of bytes in pages_in
+ *
+ * The basic idea is that we have a bio that was created by readpages.
+ * The pages in the bio are for the uncompressed data, and they may not
+ * be contiguous. They all correspond to the range of bytes covered by
+ * the compressed extent.
+ */
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+ struct bio_vec *bvec, int vcnt, size_t srclen)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -ENOMEM;
+
+ ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
+ disk_start,
+ bvec, vcnt, srclen);
+ free_workspace(type, workspace);
+ return ret;
+}
+
+/*
+ * a less complex decompression routine. Our compressed data fits in a
+ * single page, and we want to read a single page out of it.
+ * start_byte tells us the offset into the compressed data we're interested in
+ */
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+ unsigned long start_byte, size_t srclen, size_t destlen)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -ENOMEM;
+
+ ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
+ dest_page, start_byte,
+ srclen, destlen);
+
+ free_workspace(type, workspace);
+ return ret;
+}
+
+void __exit btrfs_exit_compress(void)
+{
+ free_workspaces();
+}
+
+/*
+ * Copy uncompressed data from working buffer to pages.
+ *
+ * buf_start is the byte offset we're of the start of our workspace buffer.
+ *
+ * total_out is the last byte of the buffer
+ */
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+ unsigned long total_out, u64 disk_start,
+ struct bio_vec *bvec, int vcnt,
+ unsigned long *page_index,
+ unsigned long *pg_offset)
+{
+ unsigned long buf_offset;
+ unsigned long current_buf_start;
+ unsigned long start_byte;
+ unsigned long working_bytes = total_out - buf_start;
+ unsigned long bytes;
+ char *kaddr;
+ struct page *page_out = bvec[*page_index].bv_page;
+
+ /*
+ * start byte is the first byte of the page we're currently
+ * copying into relative to the start of the compressed data.
+ */
+ start_byte = page_offset(page_out) - disk_start;
+
+ /* we haven't yet hit data corresponding to this page */
+ if (total_out <= start_byte)
+ return 1;
+
+ /*
+ * the start of the data we care about is offset into
+ * the middle of our working buffer
+ */
+ if (total_out > start_byte && buf_start < start_byte) {
+ buf_offset = start_byte - buf_start;
+ working_bytes -= buf_offset;
+ } else {
+ buf_offset = 0;
+ }
+ current_buf_start = buf_start;
+
+ /* copy bytes from the working buffer into the pages */
+ while (working_bytes > 0) {
+ bytes = min(PAGE_CACHE_SIZE - *pg_offset,
+ PAGE_CACHE_SIZE - buf_offset);
+ bytes = min(bytes, working_bytes);
+ kaddr = kmap_atomic(page_out, KM_USER0);
+ memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+ flush_dcache_page(page_out);
+
+ *pg_offset += bytes;
+ buf_offset += bytes;
+ working_bytes -= bytes;
+ current_buf_start += bytes;
+
+ /* check if we need to pick another page */
+ if (*pg_offset == PAGE_CACHE_SIZE) {
+ (*page_index)++;
+ if (*page_index >= vcnt)
+ return 0;
+
+ page_out = bvec[*page_index].bv_page;
+ *pg_offset = 0;
+ start_byte = page_offset(page_out) - disk_start;
+
+ /*
+ * make sure our new page is covered by this
+ * working buffer
+ */
+ if (total_out <= start_byte)
+ return 1;
+
+ /*
+ * the next page in the biovec might not be adjacent
+ * to the last page, but it might still be found
+ * inside this working buffer. bump our offset pointer
+ */
+ if (total_out > start_byte &&
+ current_buf_start < start_byte) {
+ buf_offset = start_byte - buf_start;
+ working_bytes = total_out - start_byte;
+ current_buf_start = buf_start + buf_offset;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 421f5b4..5100017 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,24 +19,27 @@
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
-int btrfs_zlib_decompress(unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen);
-int btrfs_zlib_compress_pages(struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out);
-int btrfs_zlib_decompress_biovec(struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen);
-void btrfs_zlib_exit(void);
+int btrfs_init_compress(void);
+void btrfs_exit_compress(void);
+
+int btrfs_compress_pages(int type, struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out);
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+ struct bio_vec *bvec, int vcnt, size_t srclen);
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+ unsigned long start_byte, size_t srclen, size_t destlen);
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+ unsigned long total_out, u64 disk_start,
+ struct bio_vec *bvec, int vcnt,
+ unsigned long *page_index,
+ unsigned long *pg_offset);
+
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
+
+struct btrfs_compress_op {
+ struct list_head *(*alloc_workspace)(void);
+
+ void (*free_workspace)(struct list_head *workspace);
+
+ int (*compress_pages)(struct list_head *workspace,
+ struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out);
+
+ int (*decompress_biovec)(struct list_head *workspace,
+ struct page **pages_in,
+ u64 disk_start,
+ struct bio_vec *bvec,
+ int vcnt,
+ size_t srclen);
+
+ int (*decompress)(struct list_head *workspace,
+ unsigned char *data_in,
+ struct page *dest_page,
+ unsigned long start_byte,
+ size_t srclen, size_t destlen);
+};
+
+extern struct btrfs_compress_op btrfs_zlib_compress;
+extern struct btrfs_compress_op btrfs_lzo_compress;
+
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9ac1715..b5baff0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
/* this also releases the path */
void btrfs_free_path(struct btrfs_path *p)
{
+ if (!p)
+ return;
btrfs_release_path(NULL, p);
kmem_cache_free(btrfs_path_cachep, p);
}
@@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
+ if (right == NULL)
+ return 1;
+
btrfs_tree_lock(right);
btrfs_set_lock_blocking(right);
@@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
+ if (left == NULL)
+ return 1;
+
btrfs_tree_lock(left);
btrfs_set_lock_blocking(left);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a142d20..2c98b3a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -27,6 +27,7 @@
#include <linux/backing-dev.h>
#include <linux/wait.h>
#include <linux/slab.h>
+#include <linux/kobject.h>
#include <asm/kmap_types.h>
#include "extent_io.h"
#include "extent_map.h"
@@ -294,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
#define BTRFS_FSID_SIZE 16
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
+
+/*
+ * File system states
+ */
+
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
+
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
@@ -398,13 +407,15 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
- BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -551,9 +562,11 @@ struct btrfs_timespec {
} __attribute__ ((__packed__));
enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LAST = 2,
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_TYPES = 2,
+ BTRFS_COMPRESS_LAST = 3,
};
struct btrfs_inode_item {
@@ -597,6 +610,8 @@ struct btrfs_dir_item {
u8 type;
} __attribute__ ((__packed__));
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
struct btrfs_root_item {
struct btrfs_inode_item inode;
__le64 generation;
@@ -895,7 +910,8 @@ struct btrfs_fs_info {
*/
u64 last_trans_log_full_commit;
u64 open_ioctl_trans;
- unsigned long mount_opt;
+ unsigned long mount_opt:20;
+ unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
struct btrfs_transaction *running_transaction;
@@ -1050,6 +1066,9 @@ struct btrfs_fs_info {
unsigned metadata_ratio;
void *bdev_holder;
+
+ /* filesystem state */
+ u64 fs_state;
};
/*
@@ -1893,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
last_snapshot, 64);
+static inline bool btrfs_root_readonly(struct btrfs_root *root)
+{
+ return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY;
+}
+
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
@@ -2145,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
+u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
@@ -2188,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
+int btrfs_error_unpin_extent_range(struct btrfs_root *root,
+ u64 start, u64 end);
+int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes);
+
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2541,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+ unsigned int line, int errno);
+
+#define btrfs_std_error(fs_info, errno) \
+do { \
+ if ((errno)) \
+ __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\
+} while (0)
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 51d2e4d..b531c36 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,20 @@
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+ int read_only);
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root);
+static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
+static int btrfs_destroy_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages,
+ int mark);
+static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
+ struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
/*
* end_io_wq structs are used to do processing in task context when an IO is
@@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(len == 0);
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+ if (eb == NULL) {
+ WARN_ON(1);
+ goto out;
+ }
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
btrfs_header_generation(eb));
BUG_ON(ret);
@@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
WARN_ON(len == 0);
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+ if (eb == NULL) {
+ ret = -EIO;
+ goto out;
+ }
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
@@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
}
btrfs_free_path(path);
if (ret) {
+ kfree(root);
if (ret > 0)
ret = -ENOENT;
return ERR_PTR(ret);
@@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
- if (!bh)
+ if (!bh) {
+ err = -EINVAL;
goto fail_iput;
+ }
memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
@@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!btrfs_super_root(disk_super))
goto fail_iput;
+ /* check FS state, whether FS is broken. */
+ fs_info->fs_state |= btrfs_super_flags(disk_super);
+
+ btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+
ret = btrfs_parse_options(tree_root, options);
if (ret) {
err = ret;
@@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
}
features = btrfs_super_incompat_flags(disk_super);
- if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
- features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
- btrfs_set_super_incompat_flags(disk_super, features);
- }
+ features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+ if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
+ features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+ btrfs_set_super_incompat_flags(disk_super, features);
features = btrfs_super_compat_ro_flags(disk_super) &
~BTRFS_FEATURE_COMPAT_RO_SUPP;
@@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_set_opt(fs_info->mount_opt, SSD);
}
- if (btrfs_super_log_root(disk_super) != 0) {
+ /* do not make disk changes in broken FS */
+ if (btrfs_super_log_root(disk_super) != 0 &&
+ !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
u64 bytenr = btrfs_super_log_root(disk_super);
if (fs_devices->rw_devices == 0) {
@@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root)
smp_mb();
btrfs_put_block_group_cache(fs_info);
+
+ /*
+ * Here come 2 situations when btrfs is broken to flip readonly:
+ *
+ * 1. when btrfs flips readonly somewhere else before
+ * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
+ * and btrfs will skip to write sb directly to keep
+ * ERROR state on disk.
+ *
+ * 2. when btrfs flips readonly just in btrfs_commit_super,
+ * and in such case, btrfs cannnot write sb via btrfs_commit_super,
+ * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
+ * btrfs will cleanup all FS resources first and write sb then.
+ */
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
- ret = btrfs_commit_super(root);
+ ret = btrfs_commit_super(root);
+ if (ret)
+ printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+ }
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ ret = btrfs_error_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
@@ -2619,6 +2671,352 @@ out:
return 0;
}
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+ int read_only)
+{
+ if (read_only)
+ return;
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ printk(KERN_WARNING "warning: mount fs with errors, "
+ "running btrfsck is recommended\n");
+}
+
+int btrfs_error_commit_super(struct btrfs_root *root)
+{
+ int ret;
+
+ mutex_lock(&root->fs_info->cleaner_mutex);
+ btrfs_run_delayed_iputs(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ down_write(&root->fs_info->cleanup_work_sem);
+ up_write(&root->fs_info->cleanup_work_sem);
+
+ /* cleanup FS via transaction */
+ btrfs_cleanup_transaction(root);
+
+ ret = write_ctree_super(NULL, root, 0);
+
+ return ret;
+}
+
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
+{
+ struct btrfs_inode *btrfs_inode;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ mutex_lock(&root->fs_info->ordered_operations_mutex);
+ spin_lock(&root->fs_info->ordered_extent_lock);
+
+ list_splice_init(&root->fs_info->ordered_operations, &splice);
+ while (!list_empty(&splice)) {
+ btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+ ordered_operations);
+
+ list_del_init(&btrfs_inode->ordered_operations);
+
+ btrfs_invalidate_inodes(btrfs_inode->root);
+ }
+
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+ mutex_unlock(&root->fs_info->ordered_operations_mutex);
+
+ return 0;
+}
+
+static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
+{
+ struct list_head splice;
+ struct btrfs_ordered_extent *ordered;
+ struct inode *inode;
+
+ INIT_LIST_HEAD(&splice);
+
+ spin_lock(&root->fs_info->ordered_extent_lock);
+
+ list_splice_init(&root->fs_info->ordered_extents, &splice);
+ while (!list_empty(&splice)) {
+ ordered = list_entry(splice.next, struct btrfs_ordered_extent,
+ root_extent_list);
+
+ list_del_init(&ordered->root_extent_list);
+ atomic_inc(&ordered->refs);
+
+ /* the inode may be getting freed (in sys_unlink path). */
+ inode = igrab(ordered->inode);
+
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+ if (inode)
+ iput(inode);
+
+ atomic_set(&ordered->refs, 1);
+ btrfs_put_ordered_extent(ordered);
+
+ spin_lock(&root->fs_info->ordered_extent_lock);
+ }
+
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+
+ return 0;
+}
+
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root)
+{
+ struct rb_node *node;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+ int ret = 0;
+
+ delayed_refs = &trans->delayed_refs;
+
+ spin_lock(&delayed_refs->lock);
+ if (delayed_refs->num_entries == 0) {
+ printk(KERN_INFO "delayed_refs has NO entry\n");
+ return ret;
+ }
+
+ node = rb_first(&delayed_refs->root);
+ while (node) {
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ node = rb_next(node);
+
+ ref->in_tree = 0;
+ rb_erase(&ref->rb_node, &delayed_refs->root);
+ delayed_refs->num_entries--;
+
+ atomic_set(&ref->refs, 1);
+ if (btrfs_delayed_ref_is_head(ref)) {
+ struct btrfs_delayed_ref_head *head;
+
+ head = btrfs_delayed_node_to_head(ref);
+ mutex_lock(&head->mutex);
+ kfree(head->extent_op);
+ delayed_refs->num_heads--;
+ if (list_empty(&head->cluster))
+ delayed_refs->num_heads_ready--;
+ list_del_init(&head->cluster);
+ mutex_unlock(&head->mutex);
+ }
+
+ spin_unlock(&delayed_refs->lock);
+ btrfs_put_delayed_ref(ref);
+
+ cond_resched();
+ spin_lock(&delayed_refs->lock);
+ }
+
+ spin_unlock(&delayed_refs->lock);
+
+ return ret;
+}
+
+static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+{
+ struct btrfs_pending_snapshot *snapshot;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ list_splice_init(&t->pending_snapshots, &splice);
+
+ while (!list_empty(&splice)) {
+ snapshot = list_entry(splice.next,
+ struct btrfs_pending_snapshot,
+ list);
+
+ list_del_init(&snapshot->list);
+
+ kfree(snapshot);
+ }
+
+ return 0;
+}
+
+static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
+{
+ struct btrfs_inode *btrfs_inode;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+
+ spin_lock(&root->fs_info->delalloc_lock);
+
+ while (!list_empty(&splice)) {
+ btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+ delalloc_inodes);
+
+ list_del_init(&btrfs_inode->delalloc_inodes);
+
+ btrfs_invalidate_inodes(btrfs_inode->root);
+ }
+
+ spin_unlock(&root->fs_info->delalloc_lock);
+
+ return 0;
+}
+
+static int btrfs_destroy_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages,
+ int mark)
+{
+ int ret;
+ struct page *page;
+ struct inode *btree_inode = root->fs_info->btree_inode;
+ struct extent_buffer *eb;
+ u64 start = 0;
+ u64 end;
+ u64 offset;
+ unsigned long index;
+
+ while (1) {
+ ret = find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark);
+ if (ret)
+ break;
+
+ clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
+ while (start <= end) {
+ index = start >> PAGE_CACHE_SHIFT;
+ start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
+ page = find_get_page(btree_inode->i_mapping, index);
+ if (!page)
+ continue;
+ offset = page_offset(page);
+
+ spin_lock(&dirty_pages->buffer_lock);
+ eb = radix_tree_lookup(
+ &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
+ offset >> PAGE_CACHE_SHIFT);
+ spin_unlock(&dirty_pages->buffer_lock);
+ if (eb) {
+ ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
+ &eb->bflags);
+ atomic_set(&eb->refs, 1);
+ }
+ if (PageWriteback(page))
+ end_page_writeback(page);
+
+ lock_page(page);
+ if (PageDirty(page)) {
+ clear_page_dirty_for_io(page);
+ spin_lock_irq(&page->mapping->tree_lock);
+ radix_tree_tag_clear(&page->mapping->page_tree,
+ page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ spin_unlock_irq(&page->mapping->tree_lock);
+ }
+
+ page->mapping->a_ops->invalidatepage(page, 0);
+ unlock_page(page);
+ }
+ }
+
+ return ret;
+}
+
+static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
+ struct extent_io_tree *pinned_extents)
+{
+ struct extent_io_tree *unpin;
+ u64 start;
+ u64 end;
+ int ret;
+
+ unpin = pinned_extents;
+ while (1) {
+ ret = find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY);
+ if (ret)
+ break;
+
+ /* opt_discard */
+ ret = btrfs_error_discard_extent(root, start, end + 1 - start);
+
+ clear_extent_dirty(unpin, start, end, GFP_NOFS);
+ btrfs_error_unpin_extent_range(root, start, end);
+ cond_resched();
+ }
+
+ return 0;
+}
+
+static int btrfs_cleanup_transaction(struct btrfs_root *root)
+{
+ struct btrfs_transaction *t;
+ LIST_HEAD(list);
+
+ WARN_ON(1);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ mutex_lock(&root->fs_info->transaction_kthread_mutex);
+
+ list_splice_init(&root->fs_info->trans_list, &list);
+ while (!list_empty(&list)) {
+ t = list_entry(list.next, struct btrfs_transaction, list);
+ if (!t)
+ break;
+
+ btrfs_destroy_ordered_operations(root);
+
+ btrfs_destroy_ordered_extents(root);
+
+ btrfs_destroy_delayed_refs(t, root);
+
+ btrfs_block_rsv_release(root,
+ &root->fs_info->trans_block_rsv,
+ t->dirty_pages.dirty_bytes);
+
+ /* FIXME: cleanup wait for commit */
+ t->in_commit = 1;
+ t->blocked = 1;
+ if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
+ wake_up(&root->fs_info->transaction_blocked_wait);
+
+ t->blocked = 0;
+ if (waitqueue_active(&root->fs_info->transaction_wait))
+ wake_up(&root->fs_info->transaction_wait);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+ t->commit_done = 1;
+ if (waitqueue_active(&t->commit_wait))
+ wake_up(&t->commit_wait);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ mutex_lock(&root->fs_info->trans_mutex);
+
+ btrfs_destroy_pending_snapshots(t);
+
+ btrfs_destroy_delalloc_inodes(root);
+
+ spin_lock(&root->fs_info->new_trans_lock);
+ root->fs_info->running_transaction = NULL;
+ spin_unlock(&root->fs_info->new_trans_lock);
+
+ btrfs_destroy_marked_extents(root, &t->dirty_pages,
+ EXTENT_DIRTY);
+
+ btrfs_destroy_pinned_extent(root,
+ root->fs_info->pinned_extents);
+
+ t->use_count = 0;
+ list_del_init(&t->list);
+ memset(t, 0, sizeof(*t));
+ kmem_cache_free(btrfs_transaction_cachep, t);
+ }
+
+ mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ return 0;
+}
+
static struct extent_io_ops btree_extent_io_ops = {
.write_cache_pages_lock_hook = btree_lock_page_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 88e825a..07b20dc 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
+int btrfs_error_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 227e581..b552693 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3089,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
return btrfs_reduce_alloc_profile(root, flags);
}
-static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
+u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
{
u64 flags;
@@ -3161,8 +3161,12 @@ alloc:
bytes + 2 * 1024 * 1024,
alloc_target, 0);
btrfs_end_transaction(trans, root);
- if (ret < 0)
- return ret;
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ return ret;
+ else
+ goto commit_trans;
+ }
if (!data_sinfo) {
btrfs_set_inode_space_info(root, inode);
@@ -3173,6 +3177,7 @@ alloc:
spin_unlock(&data_sinfo->lock);
/* commit the current transaction and try again */
+commit_trans:
if (!committed && !root->fs_info->open_ioctl_trans) {
committed = 1;
trans = btrfs_join_transaction(root, 1);
@@ -3721,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
return 0;
}
- WARN_ON(1);
- printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
- block_rsv->size, block_rsv->reserved,
- block_rsv->freed[0], block_rsv->freed[1]);
-
return -ENOSPC;
}
@@ -7970,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly +
- cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
+ cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
cache->ro = 1;
ret = 0;
}
+
spin_unlock(&cache->lock);
spin_unlock(&sinfo->lock);
return ret;
@@ -8012,6 +8013,62 @@ out:
return ret;
}
+/*
+ * helper to account the unused space of all the readonly block group in the
+ * list. takes mirrors into account.
+ */
+static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
+{
+ struct btrfs_block_group_cache *block_group;
+ u64 free_bytes = 0;
+ int factor;
+
+ list_for_each_entry(block_group, groups_list, list) {
+ spin_lock(&block_group->lock);
+
+ if (!block_group->ro) {
+ spin_unlock(&block_group->lock);
+ continue;
+ }
+
+ if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP))
+ factor = 2;
+ else
+ factor = 1;
+
+ free_bytes += (block_group->key.offset -
+ btrfs_block_group_used(&block_group->item)) *
+ factor;
+
+ spin_unlock(&block_group->lock);
+ }
+
+ return free_bytes;
+}
+
+/*
+ * helper to account the unused space of all the readonly block group in the
+ * space_info. takes mirrors into account.
+ */
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
+{
+ int i;
+ u64 free_bytes = 0;
+
+ spin_lock(&sinfo->lock);
+
+ for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ if (!list_empty(&sinfo->block_groups[i]))
+ free_bytes += __btrfs_get_ro_block_group_free_space(
+ &sinfo->block_groups[i]);
+
+ spin_unlock(&sinfo->lock);
+
+ return free_bytes;
+}
+
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
@@ -8092,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
mutex_lock(&root->fs_info->chunk_mutex);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
u64 min_free = btrfs_block_group_used(&block_group->item);
- u64 dev_offset, max_avail;
+ u64 dev_offset;
/*
* check to make sure we can actually find a chunk with enough
@@ -8100,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
*/
if (device->total_bytes > device->bytes_used + min_free) {
ret = find_free_dev_extent(NULL, device, min_free,
- &dev_offset, &max_avail);
+ &dev_offset, NULL);
if (!ret)
break;
ret = -1;
@@ -8584,3 +8641,14 @@ out:
btrfs_free_path(path);
return ret;
}
+
+int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+{
+ return unpin_extent_range(root, start, end);
+}
+
+int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+ return btrfs_discard_extent(root, bytenr, num_bytes);
+}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e86b9f..2e993cf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
BUG_ON(extent_map_end(em) <= cur);
BUG_ON(end < cur);
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
this_bio_flag = EXTENT_BIO_COMPRESSED;
+ extent_set_compress_type(&this_bio_flag,
+ em->compress_type);
+ }
iosize = min(extent_map_end(em) - cur, end - cur + 1);
cur_end = min(extent_map_end(em) - 1, end);
@@ -3072,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
#endif
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+ if (eb == NULL)
+ return NULL;
eb->start = start;
eb->len = len;
spin_lock_init(&eb->lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4183c81..7083cfa 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,8 +20,12 @@
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
-/* flags for bio submission */
+/*
+ * flags for bio submission. The high bits indicate the compression
+ * type for this bio
+ */
#define EXTENT_BIO_COMPRESSED 1
+#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
#define EXTENT_BUFFER_UPTODATE 0
@@ -135,6 +139,17 @@ struct extent_buffer {
wait_queue_head_t lock_wq;
};
+static inline void extent_set_compress_type(unsigned long *bio_flags,
+ int compress_type)
+{
+ *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
+}
+
+static inline int extent_compress_type(unsigned long bio_flags)
+{
+ return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
+}
+
struct extent_map_tree;
static inline struct extent_state *extent_state_next(struct extent_state *state)
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 23cb8da..b0e1fce 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -3,6 +3,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/hardirq.h>
+#include "ctree.h"
#include "extent_map.h"
@@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask)
return em;
em->in_tree = 0;
em->flags = 0;
+ em->compress_type = BTRFS_COMPRESS_NONE;
atomic_set(&em->refs, 1);
return em;
}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index ab6d74b..28b44db 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,7 +26,8 @@ struct extent_map {
unsigned long flags;
struct block_device *bdev;
atomic_t refs;
- int in_tree;
+ unsigned int in_tree:1;
+ unsigned int compress_type:4;
};
struct extent_map_tree {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 66836d8..c800d58 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,6 +24,7 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
+#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/statfs.h>
@@ -224,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->bdev = em->bdev;
split->flags = flags;
+ split->compress_type = em->compress_type;
ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
free_extent_map(split);
@@ -238,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->len = em->start + em->len - (start + len);
split->bdev = em->bdev;
split->flags = flags;
+ split->compress_type = em->compress_type;
if (compressed) {
split->block_len = em->block_len;
@@ -890,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (err)
goto out;
+ /*
+ * If BTRFS flips readonly due to some impossible error
+ * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
+ * although we have opened a file as writable, we have
+ * to stop this write operation to ensure FS consistency.
+ */
+ if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ err = -EROFS;
+ goto out;
+ }
+
file_update_time(file);
BTRFS_I(inode)->sequence++;
@@ -1237,6 +1251,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
return 0;
}
+static long btrfs_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct extent_state *cached_state = NULL;
+ u64 cur_offset;
+ u64 last_byte;
+ u64 alloc_start;
+ u64 alloc_end;
+ u64 alloc_hint = 0;
+ u64 locked_end;
+ u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+ struct extent_map *em;
+ int ret;
+
+ alloc_start = offset & ~mask;
+ alloc_end = (offset + len + mask) & ~mask;
+
+ /* We only support the FALLOC_FL_KEEP_SIZE mode */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /*
+ * wait for ordered IO before we have any locks. We'll loop again
+ * below with the locks held.
+ */
+ btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+
+ mutex_lock(&inode->i_mutex);
+ ret = inode_newsize_ok(inode, alloc_end);
+ if (ret)
+ goto out;
+
+ if (alloc_start > inode->i_size) {
+ ret = btrfs_cont_expand(inode, alloc_start);
+ if (ret)
+ goto out;
+ }
+
+ ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+ if (ret)
+ goto out;
+
+ locked_end = alloc_end - 1;
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+
+ /* the extent lock is ordered inside the running
+ * transaction
+ */
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
+ locked_end, 0, &cached_state, GFP_NOFS);
+ ordered = btrfs_lookup_first_ordered_extent(inode,
+ alloc_end - 1);
+ if (ordered &&
+ ordered->file_offset + ordered->len > alloc_start &&
+ ordered->file_offset < alloc_end) {
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ alloc_start, locked_end,
+ &cached_state, GFP_NOFS);
+ /*
+ * we can't wait on the range with the transaction
+ * running or with the extent lock held
+ */
+ btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
+ } else {
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ break;
+ }
+ }
+
+ cur_offset = alloc_start;
+ while (1) {
+ em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+ alloc_end - cur_offset, 0);
+ BUG_ON(IS_ERR(em) || !em);
+ last_byte = min(extent_map_end(em), alloc_end);
+ last_byte = (last_byte + mask) & ~mask;
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ (cur_offset >= inode->i_size &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+ ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
+ last_byte - cur_offset,
+ 1 << inode->i_blkbits,
+ offset + len,
+ &alloc_hint);
+ if (ret < 0) {
+ free_extent_map(em);
+ break;
+ }
+ }
+ free_extent_map(em);
+
+ cur_offset = last_byte;
+ if (cur_offset >= alloc_end) {
+ ret = 0;
+ break;
+ }
+ }
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+ &cached_state, GFP_NOFS);
+
+ btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
+out:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -1248,6 +1373,7 @@ const struct file_operations btrfs_file_operations = {
.open = generic_file_open,
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
+ .fallocate = btrfs_fallocate,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a3798a3..160b55b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
size_t cur_size = size;
size_t datasize;
unsigned long offset;
- int use_compress = 0;
+ int compress_type = BTRFS_COMPRESS_NONE;
if (compressed_size && compressed_pages) {
- use_compress = 1;
+ compress_type = root->fs_info->compress_type;
cur_size = compressed_size;
}
@@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_ram_bytes(leaf, ei, size);
ptr = btrfs_file_extent_inline_start(ei);
- if (use_compress) {
+ if (compress_type != BTRFS_COMPRESS_NONE) {
struct page *cpage;
int i = 0;
while (compressed_size > 0) {
@@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
compressed_size -= cur_size;
}
btrfs_set_file_extent_compression(leaf, ei,
- BTRFS_COMPRESS_ZLIB);
+ compress_type);
} else {
page = find_get_page(inode->i_mapping,
start >> PAGE_CACHE_SHIFT);
@@ -263,6 +263,7 @@ struct async_extent {
u64 compressed_size;
struct page **pages;
unsigned long nr_pages;
+ int compress_type;
struct list_head list;
};
@@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow,
u64 start, u64 ram_size,
u64 compressed_size,
struct page **pages,
- unsigned long nr_pages)
+ unsigned long nr_pages,
+ int compress_type)
{
struct async_extent *async_extent;
@@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow,
async_extent->compressed_size = compressed_size;
async_extent->pages = pages;
async_extent->nr_pages = nr_pages;
+ async_extent->compress_type = compress_type;
list_add_tail(&async_extent->list, &cow->extents);
return 0;
}
@@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode,
unsigned long max_uncompressed = 128 * 1024;
int i;
int will_compress;
+ int compress_type = root->fs_info->compress_type;
actual_end = min_t(u64, isize, end + 1);
again:
@@ -381,12 +385,16 @@ again:
WARN_ON(pages);
pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
- ret = btrfs_zlib_compress_pages(inode->i_mapping, start,
- total_compressed, pages,
- nr_pages, &nr_pages_ret,
- &total_in,
- &total_compressed,
- max_compressed);
+ if (BTRFS_I(inode)->force_compress)
+ compress_type = BTRFS_I(inode)->force_compress;
+
+ ret = btrfs_compress_pages(compress_type,
+ inode->i_mapping, start,
+ total_compressed, pages,
+ nr_pages, &nr_pages_ret,
+ &total_in,
+ &total_compressed,
+ max_compressed);
if (!ret) {
unsigned long offset = total_compressed &
@@ -493,7 +501,8 @@ again:
* and will submit them to the elevator.
*/
add_async_extent(async_cow, start, num_bytes,
- total_compressed, pages, nr_pages_ret);
+ total_compressed, pages, nr_pages_ret,
+ compress_type);
if (start + num_bytes < end) {
start += num_bytes;
@@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed:
__set_page_dirty_nobuffers(locked_page);
/* unlocked later on in the async handlers */
}
- add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0);
+ add_async_extent(async_cow, start, end - start + 1,
+ 0, NULL, 0, BTRFS_COMPRESS_NONE);
*num_added += 1;
}
@@ -640,6 +650,7 @@ retry:
em->block_start = ins.objectid;
em->block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
+ em->compress_type = async_extent->compress_type;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -656,11 +667,13 @@ retry:
async_extent->ram_size - 1, 0);
}
- ret = btrfs_add_ordered_extent(inode, async_extent->start,
- ins.objectid,
- async_extent->ram_size,
- ins.offset,
- BTRFS_ORDERED_COMPRESSED);
+ ret = btrfs_add_ordered_extent_compress(inode,
+ async_extent->start,
+ ins.objectid,
+ async_extent->ram_size,
+ ins.offset,
+ BTRFS_ORDERED_COMPRESSED,
+ async_extent->compress_type);
BUG_ON(ret);
/*
@@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct btrfs_ordered_extent *ordered_extent = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
- int compressed = 0;
+ int compress_type = 0;
int ret;
bool nolock = false;
@@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
- compressed = 1;
+ compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
- BUG_ON(compressed);
+ BUG_ON(compress_type);
ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
@@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->disk_len,
ordered_extent->len,
ordered_extent->len,
- compressed, 0, 0,
+ compress_type, 0, 0,
BTRFS_FILE_EXTENT_REG);
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
ordered_extent->file_offset,
@@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
logical = em->block_start;
failrec->bio_flags = EXTENT_BIO_COMPRESSED;
+ extent_set_compress_type(&failrec->bio_flags,
+ em->compress_type);
}
failrec->logical = logical;
free_extent_map(em);
@@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
int err;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
err = inode_change_ok(inode, attr);
if (err)
return err;
@@ -4928,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path,
size_t max_size;
unsigned long inline_size;
unsigned long ptr;
+ int compress_type;
WARN_ON(pg_offset != 0);
+ compress_type = btrfs_file_extent_compression(leaf, item);
max_size = btrfs_file_extent_ram_bytes(leaf, item);
inline_size = btrfs_file_extent_inline_item_len(leaf,
btrfs_item_nr(leaf, path->slots[0]));
@@ -4939,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
read_extent_buffer(leaf, tmp, ptr, inline_size);
max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
- ret = btrfs_zlib_decompress(tmp, page, extent_offset,
- inline_size, max_size);
+ ret = btrfs_decompress(compress_type, tmp, page,
+ extent_offset, inline_size, max_size);
if (ret) {
char *kaddr = kmap_atomic(page, KM_USER0);
unsigned long copy_size = min_t(u64,
@@ -4982,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_trans_handle *trans = NULL;
- int compressed;
+ int compress_type;
again:
read_lock(&em_tree->lock);
@@ -5041,7 +5062,7 @@ again:
found_type = btrfs_file_extent_type(leaf, item);
extent_start = found_key.offset;
- compressed = btrfs_file_extent_compression(leaf, item);
+ compress_type = btrfs_file_extent_compression(leaf, item);
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
extent_end = extent_start +
@@ -5087,8 +5108,9 @@ again:
em->block_start = EXTENT_MAP_HOLE;
goto insert;
}
- if (compressed) {
+ if (compress_type != BTRFS_COMPRESS_NONE) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
em->block_start = bytenr;
em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
item);
@@ -5122,12 +5144,14 @@ again:
em->len = (copy_size + root->sectorsize - 1) &
~((u64)root->sectorsize - 1);
em->orig_start = EXTENT_MAP_INLINE;
- if (compressed)
+ if (compress_type) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->compress_type = compress_type;
+ }
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
if (create == 0 && !PageUptodate(page)) {
- if (btrfs_file_extent_compression(leaf, item) ==
- BTRFS_COMPRESS_ZLIB) {
+ if (btrfs_file_extent_compression(leaf, item) !=
+ BTRFS_COMPRESS_NONE) {
ret = uncompress_inline(path, inode, page,
pg_offset,
extent_offset, item);
@@ -6477,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
ei->dummy_inode = 0;
- ei->force_compress = 0;
+ ei->force_compress = BTRFS_COMPRESS_NONE;
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
@@ -7098,116 +7122,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
min_size, actual_len, alloc_hint, trans);
}
-static long btrfs_fallocate(struct inode *inode, int mode,
- loff_t offset, loff_t len)
-{
- struct extent_state *cached_state = NULL;
- u64 cur_offset;
- u64 last_byte;
- u64 alloc_start;
- u64 alloc_end;
- u64 alloc_hint = 0;
- u64 locked_end;
- u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
- struct extent_map *em;
- int ret;
-
- alloc_start = offset & ~mask;
- alloc_end = (offset + len + mask) & ~mask;
-
- /* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode && (mode != FALLOC_FL_KEEP_SIZE))
- return -EOPNOTSUPP;
-
- /*
- * wait for ordered IO before we have any locks. We'll loop again
- * below with the locks held.
- */
- btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
-
- mutex_lock(&inode->i_mutex);
- ret = inode_newsize_ok(inode, alloc_end);
- if (ret)
- goto out;
-
- if (alloc_start > inode->i_size) {
- ret = btrfs_cont_expand(inode, alloc_start);
- if (ret)
- goto out;
- }
-
- ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
- if (ret)
- goto out;
-
- locked_end = alloc_end - 1;
- while (1) {
- struct btrfs_ordered_extent *ordered;
-
- /* the extent lock is ordered inside the running
- * transaction
- */
- lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
- locked_end, 0, &cached_state, GFP_NOFS);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- alloc_end - 1);
- if (ordered &&
- ordered->file_offset + ordered->len > alloc_start &&
- ordered->file_offset < alloc_end) {
- btrfs_put_ordered_extent(ordered);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- alloc_start, locked_end,
- &cached_state, GFP_NOFS);
- /*
- * we can't wait on the range with the transaction
- * running or with the extent lock held
- */
- btrfs_wait_ordered_range(inode, alloc_start,
- alloc_end - alloc_start);
- } else {
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- break;
- }
- }
-
- cur_offset = alloc_start;
- while (1) {
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
- alloc_end - cur_offset, 0);
- BUG_ON(IS_ERR(em) || !em);
- last_byte = min(extent_map_end(em), alloc_end);
- last_byte = (last_byte + mask) & ~mask;
- if (em->block_start == EXTENT_MAP_HOLE ||
- (cur_offset >= inode->i_size &&
- !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
- last_byte - cur_offset,
- 1 << inode->i_blkbits,
- offset + len,
- &alloc_hint);
- if (ret < 0) {
- free_extent_map(em);
- break;
- }
- }
- free_extent_map(em);
-
- cur_offset = last_byte;
- if (cur_offset >= alloc_end) {
- ret = 0;
- break;
- }
- }
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
- &cached_state, GFP_NOFS);
-
- btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
-out:
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
static int btrfs_set_page_dirty(struct page *page)
{
return __set_page_dirty_nobuffers(page);
@@ -7215,6 +7129,10 @@ static int btrfs_set_page_dirty(struct page *page)
static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
+ return -EROFS;
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
return -EACCES;
return generic_permission(inode, mask, flags, btrfs_check_acl);
@@ -7310,7 +7228,6 @@ static const struct inode_operations btrfs_file_inode_operations = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
- .fallocate = btrfs_fallocate,
.fiemap = btrfs_fiemap,
};
static const struct inode_operations btrfs_special_inode_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f87552a..a506a22 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
unsigned int flags, oldflags;
int ret;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
@@ -360,7 +363,8 @@ fail:
}
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
- char *name, int namelen, u64 *async_transid)
+ char *name, int namelen, u64 *async_transid,
+ bool readonly)
{
struct inode *inode;
struct dentry *parent;
@@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
btrfs_init_block_rsv(&pending_snapshot->block_rsv);
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
+ pending_snapshot->readonly = readonly;
trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
if (IS_ERR(trans)) {
@@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
- u64 *async_transid)
+ u64 *async_transid, bool readonly)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
if (snap_src) {
error = create_snapshot(snap_src, dentry,
- name, namelen, async_transid);
+ name, namelen, async_transid, readonly);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
name, namelen, async_transid);
@@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file,
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
struct page *page;
+ struct btrfs_super_block *disk_super;
unsigned long last_index;
unsigned long ra_pages = root->fs_info->bdi.ra_pages;
unsigned long total_read = 0;
+ u64 features;
u64 page_start;
u64 page_end;
u64 last_len = 0;
@@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file,
u64 defrag_end = 0;
unsigned long i;
int ret;
+ int compress_type = BTRFS_COMPRESS_ZLIB;
+
+ if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
+ if (range->compress_type > BTRFS_COMPRESS_TYPES)
+ return -EINVAL;
+ if (range->compress_type)
+ compress_type = range->compress_type;
+ }
if (inode->i_size == 0)
return 0;
@@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file,
total_read++;
mutex_lock(&inode->i_mutex);
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
- BTRFS_I(inode)->force_compress = 1;
+ BTRFS_I(inode)->force_compress = compress_type;
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
@@ -781,10 +796,17 @@ loop_unlock:
atomic_dec(&root->fs_info->async_submit_draining);
mutex_lock(&inode->i_mutex);
- BTRFS_I(inode)->force_compress = 0;
+ BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
mutex_unlock(&inode->i_mutex);
}
+ disk_super = &root->fs_info->super_copy;
+ features = btrfs_super_incompat_flags(disk_super);
+ if (range->compress_type == BTRFS_COMPRESS_LZO) {
+ features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+ btrfs_set_super_incompat_flags(disk_super, features);
+ }
+
return 0;
err_reservations:
@@ -901,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
char *name,
unsigned long fd,
int subvol,
- u64 *transid)
+ u64 *transid,
+ bool readonly)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct file *src_file;
@@ -919,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
- NULL, transid);
+ NULL, transid, readonly);
} else {
struct inode *src_inode;
src_file = fget(fd);
@@ -938,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
}
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
- transid);
+ transid, readonly);
fput(src_file);
}
out:
@@ -946,58 +969,139 @@ out:
}
static noinline int btrfs_ioctl_snap_create(struct file *file,
- void __user *arg, int subvol,
- int v2)
+ void __user *arg, int subvol)
{
- struct btrfs_ioctl_vol_args *vol_args = NULL;
- struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
- char *name;
- u64 fd;
+ struct btrfs_ioctl_vol_args *vol_args;
int ret;
- if (v2) {
- u64 transid = 0;
- u64 *ptr = NULL;
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
- if (IS_ERR(vol_args_v2))
- return PTR_ERR(vol_args_v2);
+ ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
+ vol_args->fd, subvol,
+ NULL, false);
- if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
- ret = -EINVAL;
- goto out;
- }
-
- name = vol_args_v2->name;
- fd = vol_args_v2->fd;
- vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+ kfree(vol_args);
+ return ret;
+}
- if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
- ptr = &transid;
+static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
+ void __user *arg, int subvol)
+{
+ struct btrfs_ioctl_vol_args_v2 *vol_args;
+ int ret;
+ u64 transid = 0;
+ u64 *ptr = NULL;
+ bool readonly = false;
- ret = btrfs_ioctl_snap_create_transid(file, name, fd,
- subvol, ptr);
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+ vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
- if (ret == 0 && ptr &&
- copy_to_user(arg +
- offsetof(struct btrfs_ioctl_vol_args_v2,
- transid), ptr, sizeof(*ptr)))
- ret = -EFAULT;
- } else {
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
- name = vol_args->name;
- fd = vol_args->fd;
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-
- ret = btrfs_ioctl_snap_create_transid(file, name, fd,
- subvol, NULL);
+ if (vol_args->flags &
+ ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
+ ret = -EOPNOTSUPP;
+ goto out;
}
+
+ if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
+ ptr = &transid;
+ if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
+ readonly = true;
+
+ ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
+ vol_args->fd, subvol,
+ ptr, readonly);
+
+ if (ret == 0 && ptr &&
+ copy_to_user(arg +
+ offsetof(struct btrfs_ioctl_vol_args_v2,
+ transid), ptr, sizeof(*ptr)))
+ ret = -EFAULT;
out:
kfree(vol_args);
- kfree(vol_args_v2);
+ return ret;
+}
+static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
+ void __user *arg)
+{
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret = 0;
+ u64 flags = 0;
+
+ if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
+ return -EINVAL;
+
+ down_read(&root->fs_info->subvol_sem);
+ if (btrfs_root_readonly(root))
+ flags |= BTRFS_SUBVOL_RDONLY;
+ up_read(&root->fs_info->subvol_sem);
+
+ if (copy_to_user(arg, &flags, sizeof(flags)))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
+ void __user *arg)
+{
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ u64 root_flags;
+ u64 flags;
+ int ret = 0;
+
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
+ if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
+ return -EINVAL;
+
+ if (copy_from_user(&flags, arg, sizeof(flags)))
+ return -EFAULT;
+
+ if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+ return -EINVAL;
+
+ if (flags & ~BTRFS_SUBVOL_RDONLY)
+ return -EOPNOTSUPP;
+
+ down_write(&root->fs_info->subvol_sem);
+
+ /* nothing to do */
+ if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
+ goto out;
+
+ root_flags = btrfs_root_flags(&root->root_item);
+ if (flags & BTRFS_SUBVOL_RDONLY)
+ btrfs_set_root_flags(&root->root_item,
+ root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
+ else
+ btrfs_set_root_flags(&root->root_item,
+ root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_reset;
+ }
+
+ ret = btrfs_update_root(trans, root,
+ &root->root_key, &root->root_item);
+
+ btrfs_commit_transaction(trans, root);
+out_reset:
+ if (ret)
+ btrfs_set_root_flags(&root->root_item, root_flags);
+out:
+ up_write(&root->fs_info->subvol_sem);
return ret;
}
@@ -1509,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
struct btrfs_ioctl_defrag_range_args *range;
int ret;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
ret = mnt_want_write(file->f_path.mnt);
if (ret)
return ret;
@@ -1637,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
return -EINVAL;
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
ret = mnt_want_write(file->f_path.mnt);
if (ret)
return ret;
@@ -1958,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
if (file->private_data)
goto out;
+ ret = -EROFS;
+ if (btrfs_root_readonly(root))
+ goto out;
+
ret = mnt_want_write(file->f_path.mnt);
if (ret)
goto out;
@@ -2257,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case BTRFS_IOC_SNAP_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 0, 0);
+ return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SNAP_CREATE_V2:
- return btrfs_ioctl_snap_create(file, argp, 0, 1);
+ return btrfs_ioctl_snap_create_v2(file, argp, 0);
case BTRFS_IOC_SUBVOL_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 1, 0);
+ return btrfs_ioctl_snap_create(file, argp, 1);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
+ case BTRFS_IOC_SUBVOL_GETFLAGS:
+ return btrfs_ioctl_subvol_getflags(file, argp);
+ case BTRFS_IOC_SUBVOL_SETFLAGS:
+ return btrfs_ioctl_subvol_setflags(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL:
return btrfs_ioctl_default_subvol(file, argp);
case BTRFS_IOC_DEFRAG:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index c344d12..8fb3821 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args {
};
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
+#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_NAME_MAX 4039
struct btrfs_ioctl_vol_args_v2 {
@@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args {
*/
__u32 extent_thresh;
+ /*
+ * which compression method to use if turning on compression
+ * for this defrag operation. If unspecified, zlib will
+ * be used
+ */
+ __u32 compress_type;
+
/* spare for later */
- __u32 unused[5];
+ __u32 unused[4];
};
struct btrfs_ioctl_space_info {
@@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#endif
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
new file mode 100644
index 0000000..cc9b450
--- /dev/null
+++ b/fs/btrfs/lzo.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (C) 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/lzo.h>
+#include "compression.h"
+
+#define LZO_LEN 4
+
+struct workspace {
+ void *mem;
+ void *buf; /* where compressed data goes */
+ void *cbuf; /* where decompressed data goes */
+ struct list_head list;
+};
+
+static void lzo_free_workspace(struct list_head *ws)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+
+ vfree(workspace->buf);
+ vfree(workspace->cbuf);
+ vfree(workspace->mem);
+ kfree(workspace);
+}
+
+static struct list_head *lzo_alloc_workspace(void)
+{
+ struct workspace *workspace;
+
+ workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
+ if (!workspace)
+ return ERR_PTR(-ENOMEM);
+
+ workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
+ workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+ workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+ if (!workspace->mem || !workspace->buf || !workspace->cbuf)
+ goto fail;
+
+ INIT_LIST_HEAD(&workspace->list);
+
+ return &workspace->list;
+fail:
+ lzo_free_workspace(&workspace->list);
+ return ERR_PTR(-ENOMEM);
+}
+
+static inline void write_compress_length(char *buf, size_t len)
+{
+ __le32 dlen;
+
+ dlen = cpu_to_le32(len);
+ memcpy(buf, &dlen, LZO_LEN);
+}
+
+static inline size_t read_compress_length(char *buf)
+{
+ __le32 dlen;
+
+ memcpy(&dlen, buf, LZO_LEN);
+ return le32_to_cpu(dlen);
+}
+
+static int lzo_compress_pages(struct list_head *ws,
+ struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ int ret = 0;
+ char *data_in;
+ char *cpage_out;
+ int nr_pages = 0;
+ struct page *in_page = NULL;
+ struct page *out_page = NULL;
+ unsigned long bytes_left;
+
+ size_t in_len;
+ size_t out_len;
+ char *buf;
+ unsigned long tot_in = 0;
+ unsigned long tot_out = 0;
+ unsigned long pg_bytes_left;
+ unsigned long out_offset;
+ unsigned long bytes;
+
+ *out_pages = 0;
+ *total_out = 0;
+ *total_in = 0;
+
+ in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ data_in = kmap(in_page);
+
+ /*
+ * store the size of all chunks of compressed data in
+ * the first 4 bytes
+ */
+ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ cpage_out = kmap(out_page);
+ out_offset = LZO_LEN;
+ tot_out = LZO_LEN;
+ pages[0] = out_page;
+ nr_pages = 1;
+ pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+
+ /* compress at most one page of data each time */
+ in_len = min(len, PAGE_CACHE_SIZE);
+ while (tot_in < len) {
+ ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
+ &out_len, workspace->mem);
+ if (ret != LZO_E_OK) {
+ printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
+ ret);
+ ret = -1;
+ goto out;
+ }
+
+ /* store the size of this chunk of compressed data */
+ write_compress_length(cpage_out + out_offset, out_len);
+ tot_out += LZO_LEN;
+ out_offset += LZO_LEN;
+ pg_bytes_left -= LZO_LEN;
+
+ tot_in += in_len;
+ tot_out += out_len;
+
+ /* copy bytes from the working buffer into the pages */
+ buf = workspace->cbuf;
+ while (out_len) {
+ bytes = min_t(unsigned long, pg_bytes_left, out_len);
+
+ memcpy(cpage_out + out_offset, buf, bytes);
+
+ out_len -= bytes;
+ pg_bytes_left -= bytes;
+ buf += bytes;
+ out_offset += bytes;
+
+ /*
+ * we need another page for writing out.
+ *
+ * Note if there's less than 4 bytes left, we just
+ * skip to a new page.
+ */
+ if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
+ pg_bytes_left == 0) {
+ if (pg_bytes_left) {
+ memset(cpage_out + out_offset, 0,
+ pg_bytes_left);
+ tot_out += pg_bytes_left;
+ }
+
+ /* we're done, don't allocate new page */
+ if (out_len == 0 && tot_in >= len)
+ break;
+
+ kunmap(out_page);
+ if (nr_pages == nr_dest_pages) {
+ out_page = NULL;
+ ret = -1;
+ goto out;
+ }
+
+ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ cpage_out = kmap(out_page);
+ pages[nr_pages++] = out_page;
+
+ pg_bytes_left = PAGE_CACHE_SIZE;
+ out_offset = 0;
+ }
+ }
+
+ /* we're making it bigger, give up */
+ if (tot_in > 8192 && tot_in < tot_out)
+ goto out;
+
+ /* we're all done */
+ if (tot_in >= len)
+ break;
+
+ if (tot_out > max_out)
+ break;
+
+ bytes_left = len - tot_in;
+ kunmap(in_page);
+ page_cache_release(in_page);
+
+ start += PAGE_CACHE_SIZE;
+ in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ data_in = kmap(in_page);
+ in_len = min(bytes_left, PAGE_CACHE_SIZE);
+ }
+
+ if (tot_out > tot_in)
+ goto out;
+
+ /* store the size of all chunks of compressed data */
+ cpage_out = kmap(pages[0]);
+ write_compress_length(cpage_out, tot_out);
+
+ kunmap(pages[0]);
+
+ ret = 0;
+ *total_out = tot_out;
+ *total_in = tot_in;
+out:
+ *out_pages = nr_pages;
+ if (out_page)
+ kunmap(out_page);
+
+ if (in_page) {
+ kunmap(in_page);
+ page_cache_release(in_page);
+ }
+
+ return ret;
+}
+
+static int lzo_decompress_biovec(struct list_head *ws,
+ struct page **pages_in,
+ u64 disk_start,
+ struct bio_vec *bvec,
+ int vcnt,
+ size_t srclen)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ int ret = 0, ret2;
+ char *data_in;
+ unsigned long page_in_index = 0;
+ unsigned long page_out_index = 0;
+ unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
+ PAGE_CACHE_SIZE;
+ unsigned long buf_start;
+ unsigned long buf_offset = 0;
+ unsigned long bytes;
+ unsigned long working_bytes;
+ unsigned long pg_offset;
+
+ size_t in_len;
+ size_t out_len;
+ unsigned long in_offset;
+ unsigned long in_page_bytes_left;
+ unsigned long tot_in;
+ unsigned long tot_out;
+ unsigned long tot_len;
+ char *buf;
+
+ data_in = kmap(pages_in[0]);
+ tot_len = read_compress_length(data_in);
+
+ tot_in = LZO_LEN;
+ in_offset = LZO_LEN;
+ tot_len = min_t(size_t, srclen, tot_len);
+ in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+
+ tot_out = 0;
+ pg_offset = 0;
+
+ while (tot_in < tot_len) {
+ in_len = read_compress_length(data_in + in_offset);
+ in_page_bytes_left -= LZO_LEN;
+ in_offset += LZO_LEN;
+ tot_in += LZO_LEN;
+
+ tot_in += in_len;
+ working_bytes = in_len;
+
+ /* fast path: avoid using the working buffer */
+ if (in_page_bytes_left >= in_len) {
+ buf = data_in + in_offset;
+ bytes = in_len;
+ goto cont;
+ }
+
+ /* copy bytes from the pages into the working buffer */
+ buf = workspace->cbuf;
+ buf_offset = 0;
+ while (working_bytes) {
+ bytes = min(working_bytes, in_page_bytes_left);
+
+ memcpy(buf + buf_offset, data_in + in_offset, bytes);
+ buf_offset += bytes;
+cont:
+ working_bytes -= bytes;
+ in_page_bytes_left -= bytes;
+ in_offset += bytes;
+
+ /* check if we need to pick another page */
+ if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
+ || in_page_bytes_left == 0) {
+ tot_in += in_page_bytes_left;
+
+ if (working_bytes == 0 && tot_in >= tot_len)
+ break;
+
+ kunmap(pages_in[page_in_index]);
+ page_in_index++;
+ if (page_in_index >= total_pages_in) {
+ ret = -1;
+ data_in = NULL;
+ goto done;
+ }
+ data_in = kmap(pages_in[page_in_index]);
+
+ in_page_bytes_left = PAGE_CACHE_SIZE;
+ in_offset = 0;
+ }
+ }
+
+ out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
+ ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
+ &out_len);
+ if (ret != LZO_E_OK) {
+ printk(KERN_WARNING "btrfs decompress failed\n");
+ ret = -1;
+ break;
+ }
+
+ buf_start = tot_out;
+ tot_out += out_len;
+
+ ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
+ tot_out, disk_start,
+ bvec, vcnt,
+ &page_out_index, &pg_offset);
+ if (ret2 == 0)
+ break;
+ }
+done:
+ if (data_in)
+ kunmap(pages_in[page_in_index]);
+ return ret;
+}
+
+static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page,
+ unsigned long start_byte,
+ size_t srclen, size_t destlen)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ size_t in_len;
+ size_t out_len;
+ size_t tot_len;
+ int ret = 0;
+ char *kaddr;
+ unsigned long bytes;
+
+ BUG_ON(srclen < LZO_LEN);
+
+ tot_len = read_compress_length(data_in);
+ data_in += LZO_LEN;
+
+ in_len = read_compress_length(data_in);
+ data_in += LZO_LEN;
+
+ out_len = PAGE_CACHE_SIZE;
+ ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
+ if (ret != LZO_E_OK) {
+ printk(KERN_WARNING "btrfs decompress failed!\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (out_len < start_byte) {
+ ret = -1;
+ goto out;
+ }
+
+ bytes = min_t(unsigned long, destlen, out_len - start_byte);
+
+ kaddr = kmap_atomic(dest_page, KM_USER0);
+ memcpy(kaddr, workspace->buf + start_byte, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+out:
+ return ret;
+}
+
+struct btrfs_compress_op btrfs_lzo_compress = {
+ .alloc_workspace = lzo_alloc_workspace,
+ .free_workspace = lzo_free_workspace,
+ .compress_pages = lzo_compress_pages,
+ .decompress_biovec = lzo_decompress_biovec,
+ .decompress = lzo_decompress,
+};
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ae7737e..2b61e1d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
*/
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
- int type, int dio)
+ int type, int dio, int compress_type)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = inode;
+ entry->compress_type = compress_type;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
- disk_len, type, 0);
+ disk_len, type, 0,
+ BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
- disk_len, type, 1);
+ disk_len, type, 1,
+ BTRFS_COMPRESS_NONE);
+}
+
+int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+ u64 start, u64 len, u64 disk_len,
+ int type, int compress_type)
+{
+ return __btrfs_add_ordered_extent(inode, file_offset, start, len,
+ disk_len, type, 0,
+ compress_type);
}
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 61dca83..ff1f69a 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
-#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
+#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */
#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent {
/* flags (described above) */
unsigned long flags;
+ /* compression algorithm */
+ int compress_type;
+
/* reference count */
atomic_t refs;
@@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
+int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+ u64 start, u64 len, u64 disk_len,
+ int type, int compress_type);
int btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 22acdaa..b2130c4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,90 @@
static const struct super_operations btrfs_super_ops;
+static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
+ char nbuf[16])
+{
+ char *errstr = NULL;
+
+ switch (errno) {
+ case -EIO:
+ errstr = "IO failure";
+ break;
+ case -ENOMEM:
+ errstr = "Out of memory";
+ break;
+ case -EROFS:
+ errstr = "Readonly filesystem";
+ break;
+ default:
+ if (nbuf) {
+ if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
+ errstr = nbuf;
+ }
+ break;
+ }
+
+ return errstr;
+}
+
+static void __save_error_info(struct btrfs_fs_info *fs_info)
+{
+ /*
+ * today we only save the error info into ram. Long term we'll
+ * also send it down to the disk
+ */
+ fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
+}
+
+/* NOTE:
+ * We move write_super stuff at umount in order to avoid deadlock
+ * for umount hold all lock.
+ */
+static void save_error_info(struct btrfs_fs_info *fs_info)
+{
+ __save_error_info(fs_info);
+}
+
+/* btrfs handle error by forcing the filesystem readonly */
+static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
+{
+ struct super_block *sb = fs_info->sb;
+
+ if (sb->s_flags & MS_RDONLY)
+ return;
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ sb->s_flags |= MS_RDONLY;
+ printk(KERN_INFO "btrfs is forced readonly\n");
+ }
+}
+
+/*
+ * __btrfs_std_error decodes expected errors from the caller and
+ * invokes the approciate error response.
+ */
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+ unsigned int line, int errno)
+{
+ struct super_block *sb = fs_info->sb;
+ char nbuf[16];
+ const char *errstr;
+
+ /*
+ * Special case: if the error is EROFS, and we're already
+ * under MS_RDONLY, then it is safe here.
+ */
+ if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
+ return;
+
+ errstr = btrfs_decode_error(fs_info, errno, nbuf);
+ printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ save_error_info(fs_info);
+
+ btrfs_handle_error(fs_info);
+}
+
static void btrfs_put_super(struct super_block *sb)
{
struct btrfs_root *root = btrfs_sb(sb);
@@ -69,9 +153,9 @@ enum {
Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
- Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
- Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
- Opt_user_subvol_rm_allowed,
+ Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
+ Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
+ Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
};
static match_table_t tokens = {
@@ -86,7 +170,9 @@ static match_table_t tokens = {
{Opt_alloc_start, "alloc_start=%s"},
{Opt_thread_pool, "thread_pool=%d"},
{Opt_compress, "compress"},
+ {Opt_compress_type, "compress=%s"},
{Opt_compress_force, "compress-force"},
+ {Opt_compress_force_type, "compress-force=%s"},
{Opt_ssd, "ssd"},
{Opt_ssd_spread, "ssd_spread"},
{Opt_nossd, "nossd"},
@@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
char *p, *num, *orig;
int intarg;
int ret = 0;
+ char *compress_type;
+ bool compress_force = false;
if (!options)
return 0;
@@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, NODATACOW);
btrfs_set_opt(info->mount_opt, NODATASUM);
break;
- case Opt_compress:
- printk(KERN_INFO "btrfs: use compression\n");
- btrfs_set_opt(info->mount_opt, COMPRESS);
- break;
case Opt_compress_force:
- printk(KERN_INFO "btrfs: forcing compression\n");
- btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+ case Opt_compress_force_type:
+ compress_force = true;
+ case Opt_compress:
+ case Opt_compress_type:
+ if (token == Opt_compress ||
+ token == Opt_compress_force ||
+ strcmp(args[0].from, "zlib") == 0) {
+ compress_type = "zlib";
+ info->compress_type = BTRFS_COMPRESS_ZLIB;
+ } else if (strcmp(args[0].from, "lzo") == 0) {
+ compress_type = "lzo";
+ info->compress_type = BTRFS_COMPRESS_LZO;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
btrfs_set_opt(info->mount_opt, COMPRESS);
+ if (compress_force) {
+ btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+ pr_info("btrfs: force %s compression\n",
+ compress_type);
+ } else
+ pr_info("btrfs: use %s compression\n",
+ compress_type);
break;
case Opt_ssd:
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
+/*
+ * The helper to calc the free space on the devices that can be used to store
+ * file data.
+ */
+static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_device_info *devices_info;
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_device *device;
+ u64 skip_space;
+ u64 type;
+ u64 avail_space;
+ u64 used_space;
+ u64 min_stripe_size;
+ int min_stripes = 1;
+ int i = 0, nr_devices;
+ int ret;
+
+ nr_devices = fs_info->fs_devices->rw_devices;
+ BUG_ON(!nr_devices);
+
+ devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
+ GFP_NOFS);
+ if (!devices_info)
+ return -ENOMEM;
+
+ /* calc min stripe number for data space alloction */
+ type = btrfs_get_alloc_profile(root, 1);
+ if (type & BTRFS_BLOCK_GROUP_RAID0)
+ min_stripes = 2;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1)
+ min_stripes = 2;
+ else if (type & BTRFS_BLOCK_GROUP_RAID10)
+ min_stripes = 4;
+
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_stripe_size = 2 * BTRFS_STRIPE_LEN;
+ else
+ min_stripe_size = BTRFS_STRIPE_LEN;
+
+ list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+ if (!device->in_fs_metadata)
+ continue;
+
+ avail_space = device->total_bytes - device->bytes_used;
+
+ /* align with stripe_len */
+ do_div(avail_space, BTRFS_STRIPE_LEN);
+ avail_space *= BTRFS_STRIPE_LEN;
+
+ /*
+ * In order to avoid overwritting the superblock on the drive,
+ * btrfs starts at an offset of at least 1MB when doing chunk
+ * allocation.
+ */
+ skip_space = 1024 * 1024;
+
+ /* user can set the offset in fs_info->alloc_start. */
+ if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
+ device->total_bytes)
+ skip_space = max(fs_info->alloc_start, skip_space);
+
+ /*
+ * btrfs can not use the free space in [0, skip_space - 1],
+ * we must subtract it from the total. In order to implement
+ * it, we account the used space in this range first.
+ */
+ ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
+ &used_space);
+ if (ret) {
+ kfree(devices_info);
+ return ret;
+ }
+
+ /* calc the free space in [0, skip_space - 1] */
+ skip_space -= used_space;
+
+ /*
+ * we can use the free space in [0, skip_space - 1], subtract
+ * it from the total.
+ */
+ if (avail_space && avail_space >= skip_space)
+ avail_space -= skip_space;
+ else
+ avail_space = 0;
+
+ if (avail_space < min_stripe_size)
+ continue;
+
+ devices_info[i].dev = device;
+ devices_info[i].max_avail = avail_space;
+
+ i++;
+ }
+
+ nr_devices = i;
+
+ btrfs_descending_sort_devices(devices_info, nr_devices);
+
+ i = nr_devices - 1;
+ avail_space = 0;
+ while (nr_devices >= min_stripes) {
+ if (devices_info[i].max_avail >= min_stripe_size) {
+ int j;
+ u64 alloc_size;
+
+ avail_space += devices_info[i].max_avail * min_stripes;
+ alloc_size = devices_info[i].max_avail;
+ for (j = i + 1 - min_stripes; j <= i; j++)
+ devices_info[j].max_avail -= alloc_size;
+ }
+ i--;
+ nr_devices--;
+ }
+
+ kfree(devices_info);
+ *free_bytes = avail_space;
+ return 0;
+}
+
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct list_head *head = &root->fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
- u64 total_used_data = 0;
+ u64 total_free_data = 0;
int bits = dentry->d_sb->s_blocksize_bits;
__be32 *fsid = (__be32 *)root->fs_info->fsid;
+ int ret;
+ /* holding chunk_muext to avoid allocating new chunks */
+ mutex_lock(&root->fs_info->chunk_mutex);
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
- if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
- BTRFS_BLOCK_GROUP_SYSTEM))
- total_used_data += found->disk_total;
- else
- total_used_data += found->disk_used;
+ if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+ total_free_data += found->disk_total - found->disk_used;
+ total_free_data -=
+ btrfs_account_ro_block_groups_free_space(found);
+ }
+
total_used += found->disk_used;
}
rcu_read_unlock();
@@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = BTRFS_NAME_LEN;
buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
buf->f_bsize = dentry->d_sb->s_blocksize;
buf->f_type = BTRFS_SUPER_MAGIC;
+ buf->f_bavail = total_free_data;
+ ret = btrfs_calc_avail_data_space(root, &total_free_data);
+ if (ret) {
+ mutex_unlock(&root->fs_info->chunk_mutex);
+ return ret;
+ }
+ buf->f_bavail += total_free_data;
+ buf->f_bavail = buf->f_bavail >> bits;
+ mutex_unlock(&root->fs_info->chunk_mutex);
/* We treat it as constant endianness (it doesn't matter _which_)
because we want the fsid to come out the same whether mounted
@@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void)
if (err)
return err;
- err = btrfs_init_cachep();
+ err = btrfs_init_compress();
if (err)
goto free_sysfs;
+ err = btrfs_init_cachep();
+ if (err)
+ goto free_compress;
+
err = extent_io_init();
if (err)
goto free_cachep;
@@ -928,6 +1171,8 @@ free_extent_io:
extent_io_exit();
free_cachep:
btrfs_destroy_cachep();
+free_compress:
+ btrfs_exit_compress();
free_sysfs:
btrfs_exit_sysfs();
return err;
@@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void)
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();
btrfs_cleanup_fs_uuids();
- btrfs_zlib_exit();
+ btrfs_exit_compress();
}
module_init(init_btrfs_fs)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f50e931..bae5c7b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
int ret;
+
+ if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ return ERR_PTR(-EROFS);
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
@@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
+ u64 root_flags;
new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
if (!new_root_item) {
@@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
+ root_flags = btrfs_root_flags(new_root_item);
+ if (pending->readonly)
+ root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
+ else
+ root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
+ btrfs_set_root_flags(new_root_item, root_flags);
+
old = btrfs_lock_root_node(root);
btrfs_cow_block(trans, root, old, NULL, 0, &old);
btrfs_set_lock_blocking(old);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index f104b57..229a594 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,6 +62,7 @@ struct btrfs_pending_snapshot {
struct btrfs_block_rsv block_rsv;
/* extra metadata reseration for relocation */
int error;
+ bool readonly;
struct list_head list;
};
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1718e1a..d158530 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -22,6 +22,7 @@
#include <linux/blkdev.h>
#include <linux/random.h>
#include <linux/iocontext.h>
+#include <linux/capability.h>
#include <asm/div64.h>
#include "compat.h"
#include "ctree.h"
@@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
set_blocksize(bdev, 4096);
bh = btrfs_read_dev_super(bdev);
- if (!bh)
+ if (!bh) {
+ ret = -EINVAL;
goto error_close;
+ }
disk_super = (struct btrfs_super_block *)bh->b_data;
devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
goto error_close;
bh = btrfs_read_dev_super(bdev);
if (!bh) {
- ret = -EIO;
+ ret = -EINVAL;
goto error_close;
}
disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -729,59 +732,167 @@ error:
return ret;
}
+/* helper to account the used device space in the range */
+int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
+ u64 end, u64 *length)
+{
+ struct btrfs_key key;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_dev_extent *dev_extent;
+ struct btrfs_path *path;
+ u64 extent_end;
+ int ret;
+ int slot;
+ struct extent_buffer *l;
+
+ *length = 0;
+
+ if (start >= device->total_bytes)
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = 2;
+
+ key.objectid = device->devid;
+ key.offset = start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = btrfs_previous_item(root, path, key.objectid, key.type);
+ if (ret < 0)
+ goto out;
+ }
+
+ while (1) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto out;
+
+ break;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid < device->devid)
+ goto next;
+
+ if (key.objectid > device->devid)
+ break;
+
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ goto next;
+
+ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+ extent_end = key.offset + btrfs_dev_extent_length(l,
+ dev_extent);
+ if (key.offset <= start && extent_end > end) {
+ *length = end - start + 1;
+ break;
+ } else if (key.offset <= start && extent_end > start)
+ *length += extent_end - start;
+ else if (key.offset > start && extent_end <= end)
+ *length += extent_end - key.offset;
+ else if (key.offset > start && key.offset <= end) {
+ *length += end - key.offset + 1;
+ break;
+ } else if (key.offset > end)
+ break;
+
+next:
+ path->slots[0]++;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
+ * find_free_dev_extent - find free space in the specified device
+ * @trans: transaction handler
+ * @device: the device which we search the free space in
+ * @num_bytes: the size of the free space that we need
+ * @start: store the start of the free space.
+ * @len: the size of the free space. that we find, or the size of the max
+ * free space if we don't find suitable free space
+ *
* this uses a pretty simple search, the expectation is that it is
* called very infrequently and that a given device has a small number
* of extents
+ *
+ * @start is used to store the start of the free space if we find. But if we
+ * don't find suitable free space, it will be used to store the start position
+ * of the max free space.
+ *
+ * @len is used to store the size of the free space that we find.
+ * But if we don't find suitable free space, it is used to store the size of
+ * the max free space.
*/
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
- u64 *start, u64 *max_avail)
+ u64 *start, u64 *len)
{
struct btrfs_key key;
struct btrfs_root *root = device->dev_root;
- struct btrfs_dev_extent *dev_extent = NULL;
+ struct btrfs_dev_extent *dev_extent;
struct btrfs_path *path;
- u64 hole_size = 0;
- u64 last_byte = 0;
- u64 search_start = 0;
+ u64 hole_size;
+ u64 max_hole_start;
+ u64 max_hole_size;
+ u64 extent_end;
+ u64 search_start;
u64 search_end = device->total_bytes;
int ret;
- int slot = 0;
- int start_found;
+ int slot;
struct extent_buffer *l;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 2;
- start_found = 0;
-
/* FIXME use last free of some kind */
/* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
- search_start = max((u64)1024 * 1024, search_start);
+ search_start = 1024 * 1024;
- if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
+ if (root->fs_info->alloc_start + num_bytes <= search_end)
search_start = max(root->fs_info->alloc_start, search_start);
+ max_hole_start = search_start;
+ max_hole_size = 0;
+
+ if (search_start >= search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ path->reada = 2;
+
key.objectid = device->devid;
key.offset = search_start;
key.type = BTRFS_DEV_EXTENT_KEY;
+
ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
if (ret < 0)
- goto error;
+ goto out;
if (ret > 0) {
ret = btrfs_previous_item(root, path, key.objectid, key.type);
if (ret < 0)
- goto error;
- if (ret > 0)
- start_found = 1;
+ goto out;
}
- l = path->nodes[0];
- btrfs_item_key_to_cpu(l, &key, path->slots[0]);
+
while (1) {
l = path->nodes[0];
slot = path->slots[0];
@@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
if (ret == 0)
continue;
if (ret < 0)
- goto error;
-no_more_items:
- if (!start_found) {
- if (search_start >= search_end) {
- ret = -ENOSPC;
- goto error;
- }
- *start = search_start;
- start_found = 1;
- goto check_pending;
- }
- *start = last_byte > search_start ?
- last_byte : search_start;
- if (search_end <= *start) {
- ret = -ENOSPC;
- goto error;
- }
- goto check_pending;
+ goto out;
+
+ break;
}
btrfs_item_key_to_cpu(l, &key, slot);
@@ -815,48 +911,62 @@ no_more_items:
goto next;
if (key.objectid > device->devid)
- goto no_more_items;
+ break;
- if (key.offset >= search_start && key.offset > last_byte &&
- start_found) {
- if (last_byte < search_start)
- last_byte = search_start;
- hole_size = key.offset - last_byte;
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ goto next;
- if (hole_size > *max_avail)
- *max_avail = hole_size;
+ if (key.offset > search_start) {
+ hole_size = key.offset - search_start;
- if (key.offset > last_byte &&
- hole_size >= num_bytes) {
- *start = last_byte;
- goto check_pending;
+ if (hole_size > max_hole_size) {
+ max_hole_start = search_start;
+ max_hole_size = hole_size;
+ }
+
+ /*
+ * If this free space is greater than which we need,
+ * it must be the max free space that we have found
+ * until now, so max_hole_start must point to the start
+ * of this free space and the length of this free space
+ * is stored in max_hole_size. Thus, we return
+ * max_hole_start and max_hole_size and go back to the
+ * caller.
+ */
+ if (hole_size >= num_bytes) {
+ ret = 0;
+ goto out;
}
}
- if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
- goto next;
- start_found = 1;
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
+ extent_end = key.offset + btrfs_dev_extent_length(l,
+ dev_extent);
+ if (extent_end > search_start)
+ search_start = extent_end;
next:
path->slots[0]++;
cond_resched();
}
-check_pending:
- /* we have to make sure we didn't find an extent that has already
- * been allocated by the map tree or the original allocation
- */
- BUG_ON(*start < search_start);
- if (*start + num_bytes > search_end) {
- ret = -ENOSPC;
- goto error;
+ hole_size = search_end- search_start;
+ if (hole_size > max_hole_size) {
+ max_hole_start = search_start;
+ max_hole_size = hole_size;
}
- /* check for pending inserts here */
- ret = 0;
-error:
+ /* See above. */
+ if (hole_size < num_bytes)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+
+out:
btrfs_free_path(path);
+error:
+ *start = max_hole_start;
+ if (len)
+ *len = max_hole_size;
return ret;
}
@@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
set_blocksize(bdev, 4096);
bh = btrfs_read_dev_super(bdev);
if (!bh) {
- ret = -EIO;
+ ret = -EINVAL;
goto error_close;
}
disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root)
if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
mutex_lock(&dev_root->fs_info->volume_mutex);
dev_root = dev_root->fs_info->dev_root;
@@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
return calc_size * num_stripes;
}
-static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct map_lookup **map_ret,
- u64 *num_bytes, u64 *stripe_size,
- u64 start, u64 type)
+/* Used to sort the devices by max_avail(descending sort) */
+int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
{
- struct btrfs_fs_info *info = extent_root->fs_info;
- struct btrfs_device *device = NULL;
- struct btrfs_fs_devices *fs_devices = info->fs_devices;
- struct list_head *cur;
- struct map_lookup *map = NULL;
- struct extent_map_tree *em_tree;
- struct extent_map *em;
- struct list_head private_devs;
- int min_stripe_size = 1 * 1024 * 1024;
- u64 calc_size = 1024 * 1024 * 1024;
- u64 max_chunk_size = calc_size;
- u64 min_free;
- u64 avail;
- u64 max_avail = 0;
- u64 dev_offset;
- int num_stripes = 1;
- int min_stripes = 1;
- int sub_stripes = 0;
- int looped = 0;
- int ret;
- int index;
- int stripe_len = 64 * 1024;
+ if (((struct btrfs_device_info *)dev_info1)->max_avail >
+ ((struct btrfs_device_info *)dev_info2)->max_avail)
+ return -1;
+ else if (((struct btrfs_device_info *)dev_info1)->max_avail <
+ ((struct btrfs_device_info *)dev_info2)->max_avail)
+ return 1;
+ else
+ return 0;
+}
- if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
- (type & BTRFS_BLOCK_GROUP_DUP)) {
- WARN_ON(1);
- type &= ~BTRFS_BLOCK_GROUP_DUP;
- }
- if (list_empty(&fs_devices->alloc_list))
- return -ENOSPC;
+static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
+ int *num_stripes, int *min_stripes,
+ int *sub_stripes)
+{
+ *num_stripes = 1;
+ *min_stripes = 1;
+ *sub_stripes = 0;
if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
- num_stripes = fs_devices->rw_devices;
- min_stripes = 2;
+ *num_stripes = fs_devices->rw_devices;
+ *min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_DUP)) {
- num_stripes = 2;
- min_stripes = 2;
+ *num_stripes = 2;
+ *min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
if (fs_devices->rw_devices < 2)
return -ENOSPC;
- num_stripes = 2;
- min_stripes = 2;
+ *num_stripes = 2;
+ *min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
- num_stripes = fs_devices->rw_devices;
- if (num_stripes < 4)
+ *num_stripes = fs_devices->rw_devices;
+ if (*num_stripes < 4)
return -ENOSPC;
- num_stripes &= ~(u32)1;
- sub_stripes = 2;
- min_stripes = 4;
+ *num_stripes &= ~(u32)1;
+ *sub_stripes = 2;
+ *min_stripes = 4;
}
+ return 0;
+}
+
+static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices,
+ u64 proposed_size, u64 type,
+ int num_stripes, int small_stripe)
+{
+ int min_stripe_size = 1 * 1024 * 1024;
+ u64 calc_size = proposed_size;
+ u64 max_chunk_size = calc_size;
+ int ncopies = 1;
+
+ if (type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID10))
+ ncopies = 2;
+
if (type & BTRFS_BLOCK_GROUP_DATA) {
max_chunk_size = 10 * calc_size;
min_stripe_size = 64 * 1024 * 1024;
@@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
max_chunk_size);
-again:
- max_avail = 0;
- if (!map || map->num_stripes != num_stripes) {
- kfree(map);
- map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
- if (!map)
- return -ENOMEM;
- map->num_stripes = num_stripes;
- }
-
- if (calc_size * num_stripes > max_chunk_size) {
- calc_size = max_chunk_size;
+ if (calc_size * num_stripes > max_chunk_size * ncopies) {
+ calc_size = max_chunk_size * ncopies;
do_div(calc_size, num_stripes);
- do_div(calc_size, stripe_len);
- calc_size *= stripe_len;
+ do_div(calc_size, BTRFS_STRIPE_LEN);
+ calc_size *= BTRFS_STRIPE_LEN;
}
/* we don't want tiny stripes */
- if (!looped)
+ if (!small_stripe)
calc_size = max_t(u64, min_stripe_size, calc_size);
/*
- * we're about to do_div by the stripe_len so lets make sure
+ * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure
* we end up with something bigger than a stripe
*/
- calc_size = max_t(u64, calc_size, stripe_len * 4);
+ calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN);
+
+ do_div(calc_size, BTRFS_STRIPE_LEN);
+ calc_size *= BTRFS_STRIPE_LEN;
+
+ return calc_size;
+}
+
+static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
+ int num_stripes)
+{
+ struct map_lookup *new;
+ size_t len = map_lookup_size(num_stripes);
+
+ BUG_ON(map->num_stripes < num_stripes);
+
+ if (map->num_stripes == num_stripes)
+ return map;
+
+ new = kmalloc(len, GFP_NOFS);
+ if (!new) {
+ /* just change map->num_stripes */
+ map->num_stripes = num_stripes;
+ return map;
+ }
+
+ memcpy(new, map, len);
+ new->num_stripes = num_stripes;
+ kfree(map);
+ return new;
+}
+
+/*
+ * helper to allocate device space from btrfs_device_info, in which we stored
+ * max free space information of every device. It is used when we can not
+ * allocate chunks by default size.
+ *
+ * By this helper, we can allocate a new chunk as larger as possible.
+ */
+static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
+ struct btrfs_fs_devices *fs_devices,
+ struct btrfs_device_info *devices,
+ int nr_device, u64 type,
+ struct map_lookup **map_lookup,
+ int min_stripes, u64 *stripe_size)
+{
+ int i, index, sort_again = 0;
+ int min_devices = min_stripes;
+ u64 max_avail, min_free;
+ struct map_lookup *map = *map_lookup;
+ int ret;
+
+ if (nr_device < min_stripes)
+ return -ENOSPC;
+
+ btrfs_descending_sort_devices(devices, nr_device);
+
+ max_avail = devices[0].max_avail;
+ if (!max_avail)
+ return -ENOSPC;
+
+ for (i = 0; i < nr_device; i++) {
+ /*
+ * if dev_offset = 0, it means the free space of this device
+ * is less than what we need, and we didn't search max avail
+ * extent on this device, so do it now.
+ */
+ if (!devices[i].dev_offset) {
+ ret = find_free_dev_extent(trans, devices[i].dev,
+ max_avail,
+ &devices[i].dev_offset,
+ &devices[i].max_avail);
+ if (ret != 0 && ret != -ENOSPC)
+ return ret;
+ sort_again = 1;
+ }
+ }
+
+ /* we update the max avail free extent of each devices, sort again */
+ if (sort_again)
+ btrfs_descending_sort_devices(devices, nr_device);
+
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_devices = 1;
+
+ if (!devices[min_devices - 1].max_avail)
+ return -ENOSPC;
+
+ max_avail = devices[min_devices - 1].max_avail;
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ do_div(max_avail, 2);
+
+ max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
+ min_stripes, 1);
+ if (type & BTRFS_BLOCK_GROUP_DUP)
+ min_free = max_avail * 2;
+ else
+ min_free = max_avail;
+
+ if (min_free > devices[min_devices - 1].max_avail)
+ return -ENOSPC;
+
+ map = __shrink_map_lookup_stripes(map, min_stripes);
+ *stripe_size = max_avail;
+
+ index = 0;
+ for (i = 0; i < min_stripes; i++) {
+ map->stripes[i].dev = devices[index].dev;
+ map->stripes[i].physical = devices[index].dev_offset;
+ if (type & BTRFS_BLOCK_GROUP_DUP) {
+ i++;
+ map->stripes[i].dev = devices[index].dev;
+ map->stripes[i].physical = devices[index].dev_offset +
+ max_avail;
+ }
+ index++;
+ }
+ *map_lookup = map;
+
+ return 0;
+}
- do_div(calc_size, stripe_len);
- calc_size *= stripe_len;
+static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root,
+ struct map_lookup **map_ret,
+ u64 *num_bytes, u64 *stripe_size,
+ u64 start, u64 type)
+{
+ struct btrfs_fs_info *info = extent_root->fs_info;
+ struct btrfs_device *device = NULL;
+ struct btrfs_fs_devices *fs_devices = info->fs_devices;
+ struct list_head *cur;
+ struct map_lookup *map;
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ struct btrfs_device_info *devices_info;
+ struct list_head private_devs;
+ u64 calc_size = 1024 * 1024 * 1024;
+ u64 min_free;
+ u64 avail;
+ u64 dev_offset;
+ int num_stripes;
+ int min_stripes;
+ int sub_stripes;
+ int min_devices; /* the min number of devices we need */
+ int i;
+ int ret;
+ int index;
+
+ if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
+ (type & BTRFS_BLOCK_GROUP_DUP)) {
+ WARN_ON(1);
+ type &= ~BTRFS_BLOCK_GROUP_DUP;
+ }
+ if (list_empty(&fs_devices->alloc_list))
+ return -ENOSPC;
+
+ ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
+ &min_stripes, &sub_stripes);
+ if (ret)
+ return ret;
+
+ devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
+ GFP_NOFS);
+ if (!devices_info)
+ return -ENOMEM;
+
+ map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
+ if (!map) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ map->num_stripes = num_stripes;
cur = fs_devices->alloc_list.next;
index = 0;
+ i = 0;
- if (type & BTRFS_BLOCK_GROUP_DUP)
+ calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type,
+ num_stripes, 0);
+
+ if (type & BTRFS_BLOCK_GROUP_DUP) {
min_free = calc_size * 2;
- else
+ min_devices = 1;
+ } else {
min_free = calc_size;
-
- /*
- * we add 1MB because we never use the first 1MB of the device, unless
- * we've looped, then we are likely allocating the maximum amount of
- * space left already
- */
- if (!looped)
- min_free += 1024 * 1024;
+ min_devices = min_stripes;
+ }
INIT_LIST_HEAD(&private_devs);
while (index < num_stripes) {
@@ -2287,27 +2559,39 @@ again:
cur = cur->next;
if (device->in_fs_metadata && avail >= min_free) {
- ret = find_free_dev_extent(trans, device,
- min_free, &dev_offset,
- &max_avail);
+ ret = find_free_dev_extent(trans, device, min_free,
+ &devices_info[i].dev_offset,
+ &devices_info[i].max_avail);
if (ret == 0) {
list_move_tail(&device->dev_alloc_list,
&private_devs);
map->stripes[index].dev = device;
- map->stripes[index].physical = dev_offset;
+ map->stripes[index].physical =
+ devices_info[i].dev_offset;
index++;
if (type & BTRFS_BLOCK_GROUP_DUP) {
map->stripes[index].dev = device;
map->stripes[index].physical =
- dev_offset + calc_size;
+ devices_info[i].dev_offset +
+ calc_size;
index++;
}
- }
- } else if (device->in_fs_metadata && avail > max_avail)
- max_avail = avail;
+ } else if (ret != -ENOSPC)
+ goto error;
+
+ devices_info[i].dev = device;
+ i++;
+ } else if (device->in_fs_metadata &&
+ avail >= BTRFS_STRIPE_LEN) {
+ devices_info[i].dev = device;
+ devices_info[i].max_avail = avail;
+ i++;
+ }
+
if (cur == &fs_devices->alloc_list)
break;
}
+
list_splice(&private_devs, &fs_devices->alloc_list);
if (index < num_stripes) {
if (index >= min_stripes) {
@@ -2316,34 +2600,36 @@ again:
num_stripes /= sub_stripes;
num_stripes *= sub_stripes;
}
- looped = 1;
- goto again;
- }
- if (!looped && max_avail > 0) {
- looped = 1;
- calc_size = max_avail;
- goto again;
+
+ map = __shrink_map_lookup_stripes(map, num_stripes);
+ } else if (i >= min_devices) {
+ ret = __btrfs_alloc_tiny_space(trans, fs_devices,
+ devices_info, i, type,
+ &map, min_stripes,
+ &calc_size);
+ if (ret)
+ goto error;
+ } else {
+ ret = -ENOSPC;
+ goto error;
}
- kfree(map);
- return -ENOSPC;
}
map->sector_size = extent_root->sectorsize;
- map->stripe_len = stripe_len;
- map->io_align = stripe_len;
- map->io_width = stripe_len;
+ map->stripe_len = BTRFS_STRIPE_LEN;
+ map->io_align = BTRFS_STRIPE_LEN;
+ map->io_width = BTRFS_STRIPE_LEN;
map->type = type;
- map->num_stripes = num_stripes;
map->sub_stripes = sub_stripes;
*map_ret = map;
*stripe_size = calc_size;
*num_bytes = chunk_bytes_by_type(type, calc_size,
- num_stripes, sub_stripes);
+ map->num_stripes, sub_stripes);
em = alloc_extent_map(GFP_NOFS);
if (!em) {
- kfree(map);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto error;
}
em->bdev = (struct block_device *)map;
em->start = start;
@@ -2376,7 +2662,13 @@ again:
index++;
}
+ kfree(devices_info);
return 0;
+
+error:
+ kfree(map);
+ kfree(devices_info);
+ return ret;
}
static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 1be7810..7fb59d4 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -20,8 +20,11 @@
#define __BTRFS_VOLUMES_
#include <linux/bio.h>
+#include <linux/sort.h>
#include "async-thread.h"
+#define BTRFS_STRIPE_LEN (64 * 1024)
+
struct buffer_head;
struct btrfs_pending_bios {
struct bio *head;
@@ -136,6 +139,30 @@ struct btrfs_multi_bio {
struct btrfs_bio_stripe stripes[];
};
+struct btrfs_device_info {
+ struct btrfs_device *dev;
+ u64 dev_offset;
+ u64 max_avail;
+};
+
+/* Used to sort the devices by max_avail(descending sort) */
+int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
+
+/*
+ * sort the devices by max_avail, in which max free extent size of each device
+ * is stored.(Descending Sort)
+ */
+static inline void btrfs_descending_sort_devices(
+ struct btrfs_device_info *devices,
+ size_t nr_devices)
+{
+ sort(devices, nr_devices, sizeof(struct btrfs_device_info),
+ btrfs_cmp_device_free_bytes, NULL);
+}
+
+int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
+ u64 end, u64 *length);
+
#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 698fdd2..a577653 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
+ struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+
+ /*
+ * The permission on security.* and system.* is not checked
+ * in permission().
+ */
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
@@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
int btrfs_removexattr(struct dentry *dentry, const char *name)
{
+ struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+
+ /*
+ * The permission on security.* and system.* is not checked
+ * in permission().
+ */
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
/*
* If this is a request for a synthetic attribute in the system.*
* namespace use the generic infrastructure to resolve a handler
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index b9cd544..f5ec2d4 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -32,15 +32,6 @@
#include <linux/bio.h>
#include "compression.h"
-/* Plan: call deflate() with avail_in == *sourcelen,
- avail_out = *dstlen - 12 and flush == Z_FINISH.
- If it doesn't manage to finish, call it again with
- avail_in == 0 and avail_out set to the remaining 12
- bytes for it to clean up.
- Q: Is 12 bytes sufficient?
-*/
-#define STREAM_END_SPACE 12
-
struct workspace {
z_stream inf_strm;
z_stream def_strm;
@@ -48,152 +39,51 @@ struct workspace {
struct list_head list;
};
-static LIST_HEAD(idle_workspace);
-static DEFINE_SPINLOCK(workspace_lock);
-static unsigned long num_workspace;
-static atomic_t alloc_workspace = ATOMIC_INIT(0);
-static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
+static void zlib_free_workspace(struct list_head *ws)
+{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
-/*
- * this finds an available zlib workspace or allocates a new one
- * NULL or an ERR_PTR is returned if things go bad.
- */
-static struct workspace *find_zlib_workspace(void)
+ vfree(workspace->def_strm.workspace);
+ vfree(workspace->inf_strm.workspace);
+ kfree(workspace->buf);
+ kfree(workspace);
+}
+
+static struct list_head *zlib_alloc_workspace(void)
{
struct workspace *workspace;
- int ret;
- int cpus = num_online_cpus();
-
-again:
- spin_lock(&workspace_lock);
- if (!list_empty(&idle_workspace)) {
- workspace = list_entry(idle_workspace.next, struct workspace,
- list);
- list_del(&workspace->list);
- num_workspace--;
- spin_unlock(&workspace_lock);
- return workspace;
- }
- spin_unlock(&workspace_lock);
- if (atomic_read(&alloc_workspace) > cpus) {
- DEFINE_WAIT(wait);
- prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
- if (atomic_read(&alloc_workspace) > cpus)
- schedule();
- finish_wait(&workspace_wait, &wait);
- goto again;
- }
- atomic_inc(&alloc_workspace);
workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
- if (!workspace) {
- ret = -ENOMEM;
- goto fail;
- }
+ if (!workspace)
+ return ERR_PTR(-ENOMEM);
workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
- if (!workspace->def_strm.workspace) {
- ret = -ENOMEM;
- goto fail;
- }
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
- if (!workspace->inf_strm.workspace) {
- ret = -ENOMEM;
- goto fail_inflate;
- }
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
- if (!workspace->buf) {
- ret = -ENOMEM;
- goto fail_kmalloc;
- }
- return workspace;
-
-fail_kmalloc:
- vfree(workspace->inf_strm.workspace);
-fail_inflate:
- vfree(workspace->def_strm.workspace);
-fail:
- kfree(workspace);
- atomic_dec(&alloc_workspace);
- wake_up(&workspace_wait);
- return ERR_PTR(ret);
-}
-
-/*
- * put a workspace struct back on the list or free it if we have enough
- * idle ones sitting around
- */
-static int free_workspace(struct workspace *workspace)
-{
- spin_lock(&workspace_lock);
- if (num_workspace < num_online_cpus()) {
- list_add_tail(&workspace->list, &idle_workspace);
- num_workspace++;
- spin_unlock(&workspace_lock);
- if (waitqueue_active(&workspace_wait))
- wake_up(&workspace_wait);
- return 0;
- }
- spin_unlock(&workspace_lock);
- vfree(workspace->def_strm.workspace);
- vfree(workspace->inf_strm.workspace);
- kfree(workspace->buf);
- kfree(workspace);
+ if (!workspace->def_strm.workspace ||
+ !workspace->inf_strm.workspace || !workspace->buf)
+ goto fail;
- atomic_dec(&alloc_workspace);
- if (waitqueue_active(&workspace_wait))
- wake_up(&workspace_wait);
- return 0;
-}
+ INIT_LIST_HEAD(&workspace->list);
-/*
- * cleanup function for module exit
- */
-static void free_workspaces(void)
-{
- struct workspace *workspace;
- while (!list_empty(&idle_workspace)) {
- workspace = list_entry(idle_workspace.next, struct workspace,
- list);
- list_del(&workspace->list);
- vfree(workspace->def_strm.workspace);
- vfree(workspace->inf_strm.workspace);
- kfree(workspace->buf);
- kfree(workspace);
- atomic_dec(&alloc_workspace);
- }
+ return &workspace->list;
+fail:
+ zlib_free_workspace(&workspace->list);
+ return ERR_PTR(-ENOMEM);
}
-/*
- * given an address space and start/len, compress the bytes.
- *
- * pages are allocated to hold the compressed result and stored
- * in 'pages'
- *
- * out_pages is used to return the number of pages allocated. There
- * may be pages allocated even if we return an error
- *
- * total_in is used to return the number of bytes actually read. It
- * may be smaller then len if we had to exit early because we
- * ran out of room in the pages array or because we cross the
- * max_out threshold.
- *
- * total_out is used to return the total number of compressed bytes
- *
- * max_out tells us the max number of bytes that we're allowed to
- * stuff into pages
- */
-int btrfs_zlib_compress_pages(struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
+static int zlib_compress_pages(struct list_head *ws,
+ struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out)
{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret;
- struct workspace *workspace;
char *data_in;
char *cpage_out;
int nr_pages = 0;
@@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
*total_out = 0;
*total_in = 0;
- workspace = find_zlib_workspace();
- if (IS_ERR(workspace))
- return -1;
-
if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
printk(KERN_WARNING "deflateInit failed\n");
ret = -1;
@@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
data_in = kmap(in_page);
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -1;
+ goto out;
+ }
cpage_out = kmap(out_page);
pages[0] = out_page;
nr_pages = 1;
@@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
goto out;
}
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (out_page == NULL) {
+ ret = -1;
+ goto out;
+ }
cpage_out = kmap(out_page);
pages[nr_pages] = out_page;
nr_pages++;
@@ -314,55 +208,26 @@ out:
kunmap(in_page);
page_cache_release(in_page);
}
- free_workspace(workspace);
return ret;
}
-/*
- * pages_in is an array of pages with compressed data.
- *
- * disk_start is the starting logical offset of this array in the file
- *
- * bvec is a bio_vec of pages from the file that we want to decompress into
- *
- * vcnt is the count of pages in the biovec
- *
- * srclen is the number of bytes in pages_in
- *
- * The basic idea is that we have a bio that was created by readpages.
- * The pages in the bio are for the uncompressed data, and they may not
- * be contiguous. They all correspond to the range of bytes covered by
- * the compressed extent.
- */
-int btrfs_zlib_decompress_biovec(struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen)
+static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
+ u64 disk_start,
+ struct bio_vec *bvec,
+ int vcnt,
+ size_t srclen)
{
- int ret = 0;
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ int ret = 0, ret2;
int wbits = MAX_WBITS;
- struct workspace *workspace;
char *data_in;
size_t total_out = 0;
- unsigned long page_bytes_left;
unsigned long page_in_index = 0;
unsigned long page_out_index = 0;
- struct page *page_out;
unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE;
unsigned long buf_start;
- unsigned long buf_offset;
- unsigned long bytes;
- unsigned long working_bytes;
unsigned long pg_offset;
- unsigned long start_byte;
- unsigned long current_buf_start;
- char *kaddr;
-
- workspace = find_zlib_workspace();
- if (IS_ERR(workspace))
- return -ENOMEM;
data_in = kmap(pages_in[page_in_index]);
workspace->inf_strm.next_in = data_in;
@@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
workspace->inf_strm.total_out = 0;
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
- page_out = bvec[page_out_index].bv_page;
- page_bytes_left = PAGE_CACHE_SIZE;
pg_offset = 0;
/* If it's deflate, and it's got no preset dictionary, then
@@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
printk(KERN_WARNING "inflateInit failed\n");
- ret = -1;
- goto out;
+ return -1;
}
while (workspace->inf_strm.total_in < srclen) {
ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END)
break;
- /*
- * buf start is the byte offset we're of the start of
- * our workspace buffer
- */
- buf_start = total_out;
- /* total_out is the last byte of the workspace buffer */
+ buf_start = total_out;
total_out = workspace->inf_strm.total_out;
- working_bytes = total_out - buf_start;
-
- /*
- * start byte is the first byte of the page we're currently
- * copying into relative to the start of the compressed data.
- */
- start_byte = page_offset(page_out) - disk_start;
-
- if (working_bytes == 0) {
- /* we didn't make progress in this inflate
- * call, we're done
- */
- if (ret != Z_STREAM_END)
- ret = -1;
+ /* we didn't make progress in this inflate call, we're done */
+ if (buf_start == total_out)
break;
- }
- /* we haven't yet hit data corresponding to this page */
- if (total_out <= start_byte)
- goto next;
-
- /*
- * the start of the data we care about is offset into
- * the middle of our working buffer
- */
- if (total_out > start_byte && buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes -= buf_offset;
- } else {
- buf_offset = 0;
- }
- current_buf_start = buf_start;
-
- /* copy bytes from the working buffer into the pages */
- while (working_bytes > 0) {
- bytes = min(PAGE_CACHE_SIZE - pg_offset,
- PAGE_CACHE_SIZE - buf_offset);
- bytes = min(bytes, working_bytes);
- kaddr = kmap_atomic(page_out, KM_USER0);
- memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
- bytes);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(page_out);
-
- pg_offset += bytes;
- page_bytes_left -= bytes;
- buf_offset += bytes;
- working_bytes -= bytes;
- current_buf_start += bytes;
-
- /* check if we need to pick another page */
- if (page_bytes_left == 0) {
- page_out_index++;
- if (page_out_index >= vcnt) {
- ret = 0;
- goto done;
- }
-
- page_out = bvec[page_out_index].bv_page;
- pg_offset = 0;
- page_bytes_left = PAGE_CACHE_SIZE;
- start_byte = page_offset(page_out) - disk_start;
-
- /*
- * make sure our new page is covered by this
- * working buffer
- */
- if (total_out <= start_byte)
- goto next;
-
- /* the next page in the biovec might not
- * be adjacent to the last page, but it
- * might still be found inside this working
- * buffer. bump our offset pointer
- */
- if (total_out > start_byte &&
- current_buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes = total_out - start_byte;
- current_buf_start = buf_start +
- buf_offset;
- }
- }
+ ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
+ total_out, disk_start,
+ bvec, vcnt,
+ &page_out_index, &pg_offset);
+ if (ret2 == 0) {
+ ret = 0;
+ goto done;
}
-next:
+
workspace->inf_strm.next_out = workspace->buf;
workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
@@ -516,35 +301,21 @@ done:
zlib_inflateEnd(&workspace->inf_strm);
if (data_in)
kunmap(pages_in[page_in_index]);
-out:
- free_workspace(workspace);
return ret;
}
-/*
- * a less complex decompression routine. Our compressed data fits in a
- * single page, and we want to read a single page out of it.
- * start_byte tells us the offset into the compressed data we're interested in
- */
-int btrfs_zlib_decompress(unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
+static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page,
+ unsigned long start_byte,
+ size_t srclen, size_t destlen)
{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
int wbits = MAX_WBITS;
- struct workspace *workspace;
unsigned long bytes_left = destlen;
unsigned long total_out = 0;
char *kaddr;
- if (destlen > PAGE_CACHE_SIZE)
- return -ENOMEM;
-
- workspace = find_zlib_workspace();
- if (IS_ERR(workspace))
- return -ENOMEM;
-
workspace->inf_strm.next_in = data_in;
workspace->inf_strm.avail_in = srclen;
workspace->inf_strm.total_in = 0;
@@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
printk(KERN_WARNING "inflateInit failed\n");
- ret = -1;
- goto out;
+ return -1;
}
while (bytes_left > 0) {
@@ -616,12 +386,13 @@ next:
ret = 0;
zlib_inflateEnd(&workspace->inf_strm);
-out:
- free_workspace(workspace);
return ret;
}
-void btrfs_zlib_exit(void)
-{
- free_workspaces();
-}
+struct btrfs_compress_op btrfs_zlib_compress = {
+ .alloc_workspace = zlib_alloc_workspace,
+ .free_workspace = zlib_free_workspace,
+ .compress_pages = zlib_compress_pages,
+ .decompress_biovec = zlib_decompress_biovec,
+ .decompress = zlib_decompress,
+};
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index c68a056..7ed3653 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -255,35 +255,6 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
}
-static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
- struct list_head *mntlist)
-{
- /* stolen from afs code */
- int err;
-
- mntget(newmnt);
- err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
- switch (err) {
- case 0:
- path_put(&nd->path);
- nd->path.mnt = newmnt;
- nd->path.dentry = dget(newmnt->mnt_root);
- schedule_delayed_work(&cifs_dfs_automount_task,
- cifs_dfs_mountpoint_expiry_timeout);
- break;
- case -EBUSY:
- /* someone else made a mount here whilst we were busy */
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path))
- ;
- err = 0;
- default:
- mntput(newmnt);
- break;
- }
- return err;
-}
-
static void dump_referral(const struct dfs_info3_param *ref)
{
cFYI(1, "DFS: ref path: %s", ref->path_name);
@@ -293,45 +264,43 @@ static void dump_referral(const struct dfs_info3_param *ref)
ref->path_consumed);
}
-
-static void*
-cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
+/*
+ * Create a vfsmount that we can automount
+ */
+static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
{
struct dfs_info3_param *referrals = NULL;
unsigned int num_referrals = 0;
struct cifs_sb_info *cifs_sb;
struct cifsSesInfo *ses;
- char *full_path = NULL;
+ char *full_path;
int xid, i;
- int rc = 0;
- struct vfsmount *mnt = ERR_PTR(-ENOENT);
+ int rc;
+ struct vfsmount *mnt;
struct tcon_link *tlink;
cFYI(1, "in %s", __func__);
- BUG_ON(IS_ROOT(dentry));
+ BUG_ON(IS_ROOT(mntpt));
xid = GetXid();
- dput(nd->path.dentry);
- nd->path.dentry = dget(dentry);
-
/*
* The MSDFS spec states that paths in DFS referral requests and
* responses must be prefixed by a single '\' character instead of
* the double backslashes usually used in the UNC. This function
* gives us the latter, so we must adjust the result.
*/
- full_path = build_path_from_dentry(dentry);
- if (full_path == NULL) {
- rc = -ENOMEM;
- goto out_err;
- }
+ mnt = ERR_PTR(-ENOMEM);
+ full_path = build_path_from_dentry(mntpt);
+ if (full_path == NULL)
+ goto free_xid;
- cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
+ cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
tlink = cifs_sb_tlink(cifs_sb);
+ mnt = ERR_PTR(-EINVAL);
if (IS_ERR(tlink)) {
- rc = PTR_ERR(tlink);
- goto out_err;
+ mnt = ERR_CAST(tlink);
+ goto free_full_path;
}
ses = tlink_tcon(tlink)->ses;
@@ -341,46 +310,63 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
cifs_put_tlink(tlink);
+ mnt = ERR_PTR(-ENOENT);
for (i = 0; i < num_referrals; i++) {
int len;
- dump_referral(referrals+i);
+ dump_referral(referrals + i);
/* connect to a node */
len = strlen(referrals[i].node_name);
if (len < 2) {
cERROR(1, "%s: Net Address path too short: %s",
__func__, referrals[i].node_name);
- rc = -EINVAL;
- goto out_err;
+ mnt = ERR_PTR(-EINVAL);
+ break;
}
mnt = cifs_dfs_do_refmount(cifs_sb,
full_path, referrals + i);
cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
referrals[i].node_name, mnt);
-
- /* complete mount procedure if we accured submount */
if (!IS_ERR(mnt))
- break;
+ goto success;
}
- /* we need it cause for() above could exit without valid submount */
- rc = PTR_ERR(mnt);
- if (IS_ERR(mnt))
- goto out_err;
-
- rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
+ /* no valid submounts were found; return error from get_dfs_path() by
+ * preference */
+ if (rc != 0)
+ mnt = ERR_PTR(rc);
-out:
- FreeXid(xid);
+success:
free_dfs_info_array(referrals, num_referrals);
+free_full_path:
kfree(full_path);
+free_xid:
+ FreeXid(xid);
cFYI(1, "leaving %s" , __func__);
- return ERR_PTR(rc);
-out_err:
- path_put(&nd->path);
- goto out;
+ return mnt;
+}
+
+/*
+ * Attempt to automount the referral
+ */
+struct vfsmount *cifs_dfs_d_automount(struct path *path)
+{
+ struct vfsmount *newmnt;
+
+ cFYI(1, "in %s", __func__);
+
+ newmnt = cifs_dfs_do_automount(path->dentry);
+ if (IS_ERR(newmnt)) {
+ cFYI(1, "leaving %s [automount failed]" , __func__);
+ return newmnt;
+ }
+
+ mntget(newmnt); /* prevent immediate expiration */
+ mnt_set_expiry(newmnt, &cifs_dfs_automount_list);
+ schedule_delayed_work(&cifs_dfs_automount_task,
+ cifs_dfs_mountpoint_expiry_timeout);
+ cFYI(1, "leaving %s [ok]" , __func__);
+ return newmnt;
}
const struct inode_operations cifs_dfs_referral_inode_operations = {
- .follow_link = cifs_dfs_follow_mountpoint,
};
-
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 897b2b2..851030f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -93,6 +93,12 @@ extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
extern const struct dentry_operations cifs_dentry_ops;
extern const struct dentry_operations cifs_ci_dentry_ops;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
+#else
+#define cifs_dfs_d_automount NULL
+#endif
+
/* Functions related to symlinks */
extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd);
extern void cifs_put_link(struct dentry *direntry,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a65d311..9f59887 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1113,6 +1113,8 @@ cifs_parse_mount_options(char *options, const char *devname,
} else if (!strnicmp(data, "uid", 3) && value && *value) {
vol->linux_uid = simple_strtoul(value, &value, 0);
uid_specified = true;
+ } else if (!strnicmp(data, "cruid", 5) && value && *value) {
+ vol->cred_uid = simple_strtoul(value, &value, 0);
} else if (!strnicmp(data, "forceuid", 8)) {
override_uid = 1;
} else if (!strnicmp(data, "noforceuid", 10)) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1e95dd6..dd5f229 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -675,6 +675,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
const struct dentry_operations cifs_dentry_ops = {
.d_revalidate = cifs_d_revalidate,
+ .d_automount = cifs_dfs_d_automount,
/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
};
@@ -711,4 +712,5 @@ const struct dentry_operations cifs_ci_dentry_ops = {
.d_revalidate = cifs_d_revalidate,
.d_hash = cifs_ci_hash,
.d_compare = cifs_ci_compare,
+ .d_automount = cifs_dfs_d_automount,
};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b06b606..6c9ee80 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -32,7 +32,7 @@
#include "fscache.h"
-static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
+static void cifs_set_ops(struct inode *inode)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -60,7 +60,7 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
break;
case S_IFDIR:
#ifdef CONFIG_CIFS_DFS_UPCALL
- if (is_dfs_referral) {
+ if (IS_AUTOMOUNT(inode)) {
inode->i_op = &cifs_dfs_referral_inode_operations;
} else {
#else /* NO DFS support, treat as a directory */
@@ -167,7 +167,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
}
spin_unlock(&inode->i_lock);
- cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
+ if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
+ inode->i_flags |= S_AUTOMOUNT;
+ cifs_set_ops(inode);
}
void
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 9aad47a..6783ce6 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -899,8 +899,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
}
/* else ERRHRD class errors or junk - return EIO */
- cFYI(1, "Mapping smb error code %d to POSIX err %d",
- smberrcode, rc);
+ cFYI(1, "Mapping smb error code 0x%x to POSIX err %d",
+ le32_to_cpu(smb->Status.CifsError), rc);
/* generic corrective action e.g. reconnect SMB session on
* ERRbaduid could be added */
diff --git a/fs/compat.c b/fs/compat.c
index eb1740a..f6fd0a0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -257,7 +257,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
}
/*
- * The following statfs calls are copies of code from fs/open.c and
+ * The following statfs calls are copies of code from fs/statfs.c and
* should be checked against those from time to time
*/
asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
@@ -320,7 +320,9 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
__put_user(kbuf->f_namelen, &ubuf->f_namelen) ||
__put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
__put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) ||
- __put_user(kbuf->f_frsize, &ubuf->f_frsize))
+ __put_user(kbuf->f_frsize, &ubuf->f_frsize) ||
+ __put_user(kbuf->f_flags, &ubuf->f_flags) ||
+ __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare)))
return -EFAULT;
return 0;
}
@@ -597,10 +599,8 @@ ssize_t compat_rw_copy_check_uvector(int type,
if (nr_segs > fast_segs) {
ret = -ENOMEM;
iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
- if (iov == NULL) {
- *ret_pointer = fast_pointer;
+ if (iov == NULL)
goto out;
- }
}
*ret_pointer = iov;
diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig
index 13587cc..9febcde 100644
--- a/fs/configfs/Kconfig
+++ b/fs/configfs/Kconfig
@@ -1,8 +1,8 @@
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem"
- depends on SYSFS
+ select SYSFS
help
- configfs is a ram-based filesystem that provides the converse
+ configfs is a RAM-based filesystem that provides the converse
of sysfs's functionality. Where sysfs is a filesystem-based
view of kernel objects, configfs is a filesystem-based manager
of kernel objects, or config_items.
diff --git a/fs/dcache.c b/fs/dcache.c
index 274a222..9f493ee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1380,8 +1380,11 @@ EXPORT_SYMBOL(d_set_d_op);
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
spin_lock(&dentry->d_lock);
- if (inode)
+ if (inode) {
+ if (unlikely(IS_AUTOMOUNT(inode)))
+ dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
list_add(&dentry->d_alias, &inode->i_dentry);
+ }
dentry->d_inode = inode;
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 2dbb422..1897eb1 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -1,8 +1,7 @@
menuconfig DLM
tristate "Distributed Lock Manager (DLM)"
depends on EXPERIMENTAL && INET
- depends on SYSFS && (IPV6 || IPV6=n)
- select CONFIGFS_FS
+ depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
select IP_SCTP
help
A general purpose distributed lock manager for kernel or userspace
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index cbadc1b..bfd8b68 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -348,7 +348,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
BUG_ON(!crypt_stat || !crypt_stat->tfm
|| !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
+ ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
crypt_stat->key_size);
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
@@ -413,10 +413,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
(extent_base + extent_offset));
if (rc) {
- ecryptfs_printk(KERN_ERR, "Error attempting to "
- "derive IV for extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for "
+ "extent [0x%.16llx]; rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
goto out;
}
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -443,9 +442,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
}
rc = 0;
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; "
+ "rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
"encryption:\n");
ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8);
@@ -540,10 +539,9 @@ static int ecryptfs_decrypt_extent(struct page *page,
rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
(extent_base + extent_offset));
if (rc) {
- ecryptfs_printk(KERN_ERR, "Error attempting to "
- "derive IV for extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for "
+ "extent [0x%.16llx]; rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
goto out;
}
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -571,9 +569,9 @@ static int ecryptfs_decrypt_extent(struct page *page,
}
rc = 0;
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; "
- "rc = [%d]\n", (extent_base + extent_offset),
- rc);
+ ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; "
+ "rc = [%d]\n",
+ (unsigned long long)(extent_base + extent_offset), rc);
ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
"decryption:\n");
ecryptfs_dump_hex((char *)(page_address(page)
@@ -780,7 +778,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
}
ecryptfs_printk(KERN_DEBUG,
"Initializing cipher [%s]; strlen = [%d]; "
- "key_size_bits = [%d]\n",
+ "key_size_bits = [%zd]\n",
crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
crypt_stat->key_size << 3);
if (crypt_stat->tfm) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 413a3c4..dbc84ed 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -192,7 +192,6 @@ ecryptfs_get_key_payload_data(struct key *key)
(((struct user_key_payload*)key->payload.data)->data);
}
-#define ECRYPTFS_SUPER_MAGIC 0xf15f
#define ECRYPTFS_MAX_KEYSET_SIZE 1024
#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
@@ -584,6 +583,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
#define ecryptfs_printk(type, fmt, arg...) \
__ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
+__attribute__ ((format(printf, 1, 2)))
void __ecryptfs_printk(const char *fmt, ...);
extern const struct file_operations ecryptfs_main_fops;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 91da029..81e10e6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -47,7 +47,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
- int rc;
+ ssize_t rc;
struct dentry *lower_dentry;
struct vfsmount *lower_vfsmount;
struct file *file = iocb->ki_filp;
@@ -191,18 +191,16 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
| ECRYPTFS_ENCRYPTED);
}
mutex_unlock(&crypt_stat->cs_mutex);
- if (!ecryptfs_inode_to_private(inode)->lower_file) {
- rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to initialize "
- "the persistent file for the dentry with name "
- "[%s]; rc = [%d]\n", __func__,
- ecryptfs_dentry->d_name.name, rc);
- goto out_free;
- }
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out_free;
}
- if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
- && !(file->f_flags & O_RDONLY)) {
+ if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
+ == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
rc = -EPERM;
printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
"file must hence be opened RO\n", __func__);
@@ -243,9 +241,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
}
}
mutex_unlock(&crypt_stat->cs_mutex);
- ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
- "size: [0x%.16x]\n", inode, inode->i_ino,
- i_size_read(inode));
+ ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
+ "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
+ (unsigned long long)i_size_read(inode));
goto out;
out_free:
kmem_cache_free(ecryptfs_file_info_cache,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 64ff023..bd33f87 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -185,15 +185,13 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
"context; rc = [%d]\n", rc);
goto out;
}
- if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
- rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to initialize "
- "the persistent file for the dentry with name "
- "[%s]; rc = [%d]\n", __func__,
- ecryptfs_dentry->d_name.name, rc);
- goto out;
- }
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out;
}
rc = ecryptfs_write_metadata(ecryptfs_dentry);
if (rc) {
@@ -302,15 +300,13 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
rc = -ENOMEM;
goto out;
}
- if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
- rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to initialize "
- "the persistent file for the dentry with name "
- "[%s]; rc = [%d]\n", __func__,
- ecryptfs_dentry->d_name.name, rc);
- goto out_free_kmem;
- }
+ rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+ goto out_free_kmem;
}
crypt_stat = &ecryptfs_inode_to_private(
ecryptfs_dentry->d_inode)->crypt_stat;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index b1f6858..c1436cf 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -59,7 +59,7 @@ static int process_request_key_err(long err_code)
break;
default:
ecryptfs_printk(KERN_WARNING, "Unknown error code: "
- "[0x%.16x]\n", err_code);
+ "[0x%.16lx]\n", err_code);
rc = -EINVAL;
}
return rc;
@@ -130,7 +130,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size,
} else {
rc = -EINVAL;
ecryptfs_printk(KERN_WARNING,
- "Unsupported packet size: [%d]\n", size);
+ "Unsupported packet size: [%zd]\n", size);
}
return rc;
}
@@ -1672,7 +1672,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
auth_tok->session_key.decrypted_key_size);
crypt_stat->flags |= ECRYPTFS_KEY_VALID;
if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n",
+ ecryptfs_printk(KERN_DEBUG, "FEK of size [%zd]:\n",
crypt_stat->key_size);
ecryptfs_dump_hex(crypt_stat->key,
crypt_stat->key_size);
@@ -1754,7 +1754,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) {
ecryptfs_printk(KERN_ERR, "Expected "
"signature of size [%d]; "
- "read size [%d]\n",
+ "read size [%zd]\n",
ECRYPTFS_SIG_SIZE,
tag_11_contents_size);
rc = -EIO;
@@ -1787,8 +1787,8 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
goto out_wipe_list;
break;
default:
- ecryptfs_printk(KERN_DEBUG, "No packet at offset "
- "[%d] of the file header; hex value of "
+ ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] "
+ "of the file header; hex value of "
"character is [0x%.2x]\n", i, src[i]);
next_packet_is_auth_tok_packet = 0;
}
@@ -1864,8 +1864,8 @@ found_matching_auth_tok:
"session key for authentication token with sig "
"[%.*s]; rc = [%d]. Removing auth tok "
"candidate from the list and searching for "
- "the next match.\n", candidate_auth_tok_sig,
- ECRYPTFS_SIG_SIZE_HEX, rc);
+ "the next match.\n", ECRYPTFS_SIG_SIZE_HEX,
+ candidate_auth_tok_sig, rc);
list_for_each_entry_safe(auth_tok_list_item,
auth_tok_list_item_tmp,
&auth_tok_list, list) {
@@ -2168,7 +2168,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
if (encrypted_session_key_valid) {
ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; "
"using auth_tok->session_key.encrypted_key, "
- "where key_rec->enc_key_size = [%d]\n",
+ "where key_rec->enc_key_size = [%zd]\n",
key_rec->enc_key_size);
memcpy(key_rec->enc_key,
auth_tok->session_key.encrypted_key,
@@ -2198,7 +2198,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
if (rc < 1 || rc > 2) {
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat session key; expected rc = 1; "
- "got rc = [%d]. key_rec->enc_key_size = [%d]\n",
+ "got rc = [%d]. key_rec->enc_key_size = [%zd]\n",
rc, key_rec->enc_key_size);
rc = -ENOMEM;
goto out;
@@ -2209,7 +2209,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat encrypted session key; "
"expected rc = 1; got rc = [%d]. "
- "key_rec->enc_key_size = [%d]\n", rc,
+ "key_rec->enc_key_size = [%zd]\n", rc,
key_rec->enc_key_size);
rc = -ENOMEM;
goto out;
@@ -2224,7 +2224,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
goto out;
}
rc = 0;
- ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
+ ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n",
crypt_stat->key_size);
rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
(*key_rec).enc_key_size);
@@ -2235,7 +2235,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
}
ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
if (ecryptfs_verbosity > 0) {
- ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n",
+ ecryptfs_printk(KERN_DEBUG, "EFEK of size [%zd]:\n",
key_rec->enc_key_size);
ecryptfs_dump_hex(key_rec->enc_key,
key_rec->enc_key_size);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d3b28ab..758323a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -36,6 +36,7 @@
#include <linux/parser.h>
#include <linux/fs_stack.h>
#include <linux/slab.h>
+#include <linux/magic.h>
#include "ecryptfs_kernel.h"
/**
@@ -564,6 +565,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
+ s->s_magic = ECRYPTFS_SUPER_MAGIC;
inode = ecryptfs_get_inode(path.dentry->d_inode, s);
rc = PTR_ERR(inode);
@@ -808,9 +810,10 @@ static int __init ecryptfs_init(void)
ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
"larger than the host's page size, and so "
"eCryptfs cannot run on this system. The "
- "default eCryptfs extent size is [%d] bytes; "
- "the page size is [%d] bytes.\n",
- ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE);
+ "default eCryptfs extent size is [%u] bytes; "
+ "the page size is [%lu] bytes.\n",
+ ECRYPTFS_DEFAULT_EXTENT_SIZE,
+ (unsigned long)PAGE_CACHE_SIZE);
goto out;
}
rc = ecryptfs_init_kmem_caches();
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index b1d8275..cc64fca 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -65,7 +65,7 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
rc = ecryptfs_encrypt_page(page);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error encrypting "
- "page (upper index [0x%.16x])\n", page->index);
+ "page (upper index [0x%.16lx])\n", page->index);
ClearPageUptodate(page);
goto out;
}
@@ -237,7 +237,7 @@ out:
ClearPageUptodate(page);
else
SetPageUptodate(page);
- ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
+ ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16lx]\n",
page->index);
unlock_page(page);
return rc;
@@ -290,6 +290,7 @@ static int ecryptfs_write_begin(struct file *file,
return -ENOMEM;
*pagep = page;
+ prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
if (!PageUptodate(page)) {
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(mapping->host)->crypt_stat;
@@ -335,18 +336,23 @@ static int ecryptfs_write_begin(struct file *file,
SetPageUptodate(page);
}
} else {
- rc = ecryptfs_decrypt_page(page);
- if (rc) {
- printk(KERN_ERR "%s: Error decrypting page "
- "at index [%ld]; rc = [%d]\n",
- __func__, page->index, rc);
- ClearPageUptodate(page);
- goto out;
+ if (prev_page_end_size
+ >= i_size_read(page->mapping->host)) {
+ zero_user(page, 0, PAGE_CACHE_SIZE);
+ } else {
+ rc = ecryptfs_decrypt_page(page);
+ if (rc) {
+ printk(KERN_ERR "%s: Error decrypting "
+ "page at index [%ld]; "
+ "rc = [%d]\n",
+ __func__, page->index, rc);
+ ClearPageUptodate(page);
+ goto out;
+ }
}
SetPageUptodate(page);
}
}
- prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
/* If creating a page or more of holes, zero them out via truncate.
* Note, this will increase i_size. */
if (index != 0) {
@@ -488,7 +494,7 @@ static int ecryptfs_write_end(struct file *file,
} else
ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
- "(page w/ index = [0x%.16x], to = [%d])\n", index, to);
+ "(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
to);
@@ -503,19 +509,20 @@ static int ecryptfs_write_end(struct file *file,
rc = fill_zeros_to_end_of_page(page, to);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
- "zeros in page with index = [0x%.16x]\n", index);
+ "zeros in page with index = [0x%.16lx]\n", index);
goto out;
}
rc = ecryptfs_encrypt_page(page);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
- "index [0x%.16x])\n", index);
+ "index [0x%.16lx])\n", index);
goto out;
}
if (pos + copied > i_size_read(ecryptfs_inode)) {
i_size_write(ecryptfs_inode, pos + copied);
ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
- "[0x%.16x]\n", i_size_read(ecryptfs_inode));
+ "[0x%.16llx]\n",
+ (unsigned long long)i_size_read(ecryptfs_inode));
}
rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
if (rc)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1de65f5..0c8d97b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2065,7 +2065,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
extern void ext4_ext_truncate(struct inode *);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
-extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c4068f6..63a7581 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3627,14 +3627,15 @@ static void ext4_falloc_update_inode(struct inode *inode,
}
/*
- * preallocate space for a file. This implements ext4's fallocate inode
+ * preallocate space for a file. This implements ext4's fallocate file
* operation, which gets called from sys_fallocate system call.
* For block-mapped files, posix_fallocate should fall back to the method
* of writing zeroes to the required new blocks (the same behavior which is
* expected for file systems which do not support fallocate() system call).
*/
-long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
+long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
handle_t *handle;
loff_t new_size;
unsigned int max_blocks;
@@ -3645,7 +3646,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
unsigned int credits, blkbits = inode->i_blkbits;
/* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode && (mode != FALLOC_FL_KEEP_SIZE))
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
/*
@@ -3655,10 +3656,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EOPNOTSUPP;
- /* preallocation to directories is currently not supported */
- if (S_ISDIR(inode->i_mode))
- return -ENODEV;
-
map.m_lblk = offset >> blkbits;
/*
* We can't just convert len to max_blocks because
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bb003dc..2e8322c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -210,6 +210,7 @@ const struct file_operations ext4_file_operations = {
.fsync = ext4_sync_file,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
+ .fallocate = ext4_fallocate,
};
const struct inode_operations ext4_file_inode_operations = {
@@ -223,7 +224,6 @@ const struct inode_operations ext4_file_inode_operations = {
.removexattr = generic_removexattr,
#endif
.check_acl = ext4_check_acl,
- .fallocate = ext4_fallocate,
.fiemap = ext4_fiemap,
};
diff --git a/fs/file_table.c b/fs/file_table.c
index c3dee38..c3e89ad 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -311,7 +311,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
struct files_struct *files = current->files;
*fput_needed = 0;
- if (likely((atomic_read(&files->count) == 1))) {
+ if (atomic_read(&files->count) == 1) {
file = fcheck_files(files, fd);
} else {
rcu_read_lock();
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 68ca487..78b519c 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -4,6 +4,19 @@
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/fs_struct.h>
+#include "internal.h"
+
+static inline void path_get_longterm(struct path *path)
+{
+ path_get(path);
+ mnt_make_longterm(path->mnt);
+}
+
+static inline void path_put_longterm(struct path *path)
+{
+ mnt_make_shortterm(path->mnt);
+ path_put(path);
+}
/*
* Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq);
old_root = fs->root;
fs->root = *path;
- path_get_long(path);
+ path_get_longterm(path);
write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_root.dentry)
- path_put_long(&old_root);
+ path_put_longterm(&old_root);
}
/*
@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
- path_get_long(path);
+ path_get_longterm(path);
write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_pwd.dentry)
- path_put_long(&old_pwd);
+ path_put_longterm(&old_pwd);
}
void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
write_seqcount_begin(&fs->seq);
if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) {
- path_get_long(new_root);
+ path_get_longterm(new_root);
fs->root = *new_root;
count++;
}
if (fs->pwd.dentry == old_root->dentry
&& fs->pwd.mnt == old_root->mnt) {
- path_get_long(new_root);
+ path_get_longterm(new_root);
fs->pwd = *new_root;
count++;
}
@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
while (count--)
- path_put_long(old_root);
+ path_put_longterm(old_root);
}
void free_fs_struct(struct fs_struct *fs)
{
- path_put_long(&fs->root);
- path_put_long(&fs->pwd);
+ path_put_longterm(&fs->root);
+ path_put_longterm(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
spin_lock(&old->lock);
fs->root = old->root;
- path_get_long(&fs->root);
+ path_get_longterm(&fs->root);
fs->pwd = old->pwd;
- path_get_long(&fs->pwd);
+ path_get_longterm(&fs->pwd);
spin_unlock(&old->lock);
}
return fs;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index fca6689..7cfdcb9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -19,6 +19,8 @@
#include <linux/fs.h>
#include <linux/gfs2_ondisk.h>
#include <linux/ext2_fs.h>
+#include <linux/falloc.h>
+#include <linux/swap.h>
#include <linux/crc32.h>
#include <linux/writeback.h>
#include <asm/uaccess.h>
@@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
+static void empty_write_end(struct page *page, unsigned from,
+ unsigned to)
+{
+ struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+
+ page_zero_new_buffers(page, from, to);
+ flush_dcache_page(page);
+ mark_page_accessed(page);
+
+ if (!gfs2_is_writeback(ip))
+ gfs2_page_add_databufs(ip, page, from, to);
+
+ block_commit_write(page, from, to);
+}
+
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+{
+ unsigned start, end, next;
+ struct buffer_head *bh, *head;
+ int error;
+
+ if (!page_has_buffers(page)) {
+ error = __block_write_begin(page, from, to - from, gfs2_block_map);
+ if (unlikely(error))
+ return error;
+
+ empty_write_end(page, from, to);
+ return 0;
+ }
+
+ bh = head = page_buffers(page);
+ next = end = 0;
+ while (next < from) {
+ next += bh->b_size;
+ bh = bh->b_this_page;
+ }
+ start = next;
+ do {
+ next += bh->b_size;
+ if (buffer_mapped(bh)) {
+ if (end) {
+ error = __block_write_begin(page, start, end - start,
+ gfs2_block_map);
+ if (unlikely(error))
+ return error;
+ empty_write_end(page, start, end);
+ end = 0;
+ }
+ start = next;
+ }
+ else
+ end = next;
+ bh = bh->b_this_page;
+ } while (next < to);
+
+ if (end) {
+ error = __block_write_begin(page, start, end - start, gfs2_block_map);
+ if (unlikely(error))
+ return error;
+ empty_write_end(page, start, end);
+ }
+
+ return 0;
+}
+
+static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
+ int mode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct buffer_head *dibh;
+ int error;
+ u64 start = offset >> PAGE_CACHE_SHIFT;
+ unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
+ u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ pgoff_t curr;
+ struct page *page;
+ unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
+ unsigned int from, to;
+
+ if (!end_offset)
+ end_offset = PAGE_CACHE_SIZE;
+
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (unlikely(error))
+ goto out;
+
+ gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
+ if (gfs2_is_stuffed(ip)) {
+ error = gfs2_unstuff_dinode(ip, NULL);
+ if (unlikely(error))
+ goto out;
+ }
+
+ curr = start;
+ offset = start << PAGE_CACHE_SHIFT;
+ from = start_offset;
+ to = PAGE_CACHE_SIZE;
+ while (curr <= end) {
+ page = grab_cache_page_write_begin(inode->i_mapping, curr,
+ AOP_FLAG_NOFS);
+ if (unlikely(!page)) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ if (curr == end)
+ to = end_offset;
+ error = write_empty_blocks(page, from, to);
+ if (!error && offset + to > inode->i_size &&
+ !(mode & FALLOC_FL_KEEP_SIZE)) {
+ i_size_write(inode, offset + to);
+ }
+ unlock_page(page);
+ page_cache_release(page);
+ if (error)
+ goto out;
+ curr++;
+ offset += PAGE_CACHE_SIZE;
+ from = 0;
+ }
+
+ gfs2_dinode_out(ip, dibh->b_data);
+ mark_inode_dirty(inode);
+
+ brelse(dibh);
+
+out:
+ return error;
+}
+
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
+ unsigned int *data_blocks, unsigned int *ind_blocks)
+{
+ const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
+ unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
+
+ for (tmp = max_data; tmp > sdp->sd_diptrs;) {
+ tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
+ max_data -= tmp;
+ }
+ /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
+ so it might end up with fewer data blocks */
+ if (max_data <= *data_blocks)
+ return;
+ *data_blocks = max_data;
+ *ind_blocks = max_blocks - max_data;
+ *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
+ if (*len > max) {
+ *len = max;
+ gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
+ }
+}
+
+static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_inode *ip = GFS2_I(inode);
+ unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+ loff_t bytes, max_bytes;
+ struct gfs2_alloc *al;
+ int error;
+ loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
+ next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
+
+ /* We only support the FALLOC_FL_KEEP_SIZE mode */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
+ sdp->sd_sb.sb_bsize_shift;
+
+ len = next - offset;
+ bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
+ if (!bytes)
+ bytes = UINT_MAX;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+ error = gfs2_glock_nq(&ip->i_gh);
+ if (unlikely(error))
+ goto out_uninit;
+
+ if (!gfs2_write_alloc_required(ip, offset, len))
+ goto out_unlock;
+
+ while (len > 0) {
+ if (len < bytes)
+ bytes = len;
+ al = gfs2_alloc_get(ip);
+ if (!al) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
+
+ error = gfs2_quota_lock_check(ip);
+ if (error)
+ goto out_alloc_put;
+
+retry:
+ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+
+ al->al_requested = data_blocks + ind_blocks;
+ error = gfs2_inplace_reserve(ip);
+ if (error) {
+ if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
+ bytes >>= 1;
+ goto retry;
+ }
+ goto out_qunlock;
+ }
+ max_bytes = bytes;
+ calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
+ al->al_requested = data_blocks + ind_blocks;
+
+ rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
+ RES_RG_HDR + gfs2_rg_blocks(al);
+ if (gfs2_is_jdata(ip))
+ rblocks += data_blocks ? data_blocks : 1;
+
+ error = gfs2_trans_begin(sdp, rblocks,
+ PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+ if (error)
+ goto out_trans_fail;
+
+ error = fallocate_chunk(inode, offset, max_bytes, mode);
+ gfs2_trans_end(sdp);
+
+ if (error)
+ goto out_trans_fail;
+
+ len -= max_bytes;
+ offset += max_bytes;
+ gfs2_inplace_release(ip);
+ gfs2_quota_unlock(ip);
+ gfs2_alloc_put(ip);
+ }
+ goto out_unlock;
+
+out_trans_fail:
+ gfs2_inplace_release(ip);
+out_qunlock:
+ gfs2_quota_unlock(ip);
+out_alloc_put:
+ gfs2_alloc_put(ip);
+out_unlock:
+ gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+ gfs2_holder_uninit(&ip->i_gh);
+ return error;
+}
+
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
/**
@@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = {
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.setlease = gfs2_setlease,
+ .fallocate = gfs2_fallocate,
};
const struct file_operations gfs2_dir_fops = {
@@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.setlease = generic_setlease,
+ .fallocate = gfs2_fallocate,
};
const struct file_operations gfs2_dir_fops_nolock = {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 040b5a2..d8b26ac 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,8 +18,6 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/fiemap.h>
-#include <linux/swap.h>
-#include <linux/falloc.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
return ret;
}
-static void empty_write_end(struct page *page, unsigned from,
- unsigned to)
-{
- struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-
- page_zero_new_buffers(page, from, to);
- flush_dcache_page(page);
- mark_page_accessed(page);
-
- if (!gfs2_is_writeback(ip))
- gfs2_page_add_databufs(ip, page, from, to);
-
- block_commit_write(page, from, to);
-}
-
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
-{
- unsigned start, end, next;
- struct buffer_head *bh, *head;
- int error;
-
- if (!page_has_buffers(page)) {
- error = __block_write_begin(page, from, to - from, gfs2_block_map);
- if (unlikely(error))
- return error;
-
- empty_write_end(page, from, to);
- return 0;
- }
-
- bh = head = page_buffers(page);
- next = end = 0;
- while (next < from) {
- next += bh->b_size;
- bh = bh->b_this_page;
- }
- start = next;
- do {
- next += bh->b_size;
- if (buffer_mapped(bh)) {
- if (end) {
- error = __block_write_begin(page, start, end - start,
- gfs2_block_map);
- if (unlikely(error))
- return error;
- empty_write_end(page, start, end);
- end = 0;
- }
- start = next;
- }
- else
- end = next;
- bh = bh->b_this_page;
- } while (next < to);
-
- if (end) {
- error = __block_write_begin(page, start, end - start, gfs2_block_map);
- if (unlikely(error))
- return error;
- empty_write_end(page, start, end);
- }
-
- return 0;
-}
-
-static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
- int mode)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct buffer_head *dibh;
- int error;
- u64 start = offset >> PAGE_CACHE_SHIFT;
- unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
- u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
- pgoff_t curr;
- struct page *page;
- unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
- unsigned int from, to;
-
- if (!end_offset)
- end_offset = PAGE_CACHE_SIZE;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (unlikely(error))
- goto out;
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
- if (gfs2_is_stuffed(ip)) {
- error = gfs2_unstuff_dinode(ip, NULL);
- if (unlikely(error))
- goto out;
- }
-
- curr = start;
- offset = start << PAGE_CACHE_SHIFT;
- from = start_offset;
- to = PAGE_CACHE_SIZE;
- while (curr <= end) {
- page = grab_cache_page_write_begin(inode->i_mapping, curr,
- AOP_FLAG_NOFS);
- if (unlikely(!page)) {
- error = -ENOMEM;
- goto out;
- }
-
- if (curr == end)
- to = end_offset;
- error = write_empty_blocks(page, from, to);
- if (!error && offset + to > inode->i_size &&
- !(mode & FALLOC_FL_KEEP_SIZE)) {
- i_size_write(inode, offset + to);
- }
- unlock_page(page);
- page_cache_release(page);
- if (error)
- goto out;
- curr++;
- offset += PAGE_CACHE_SIZE;
- from = 0;
- }
-
- gfs2_dinode_out(ip, dibh->b_data);
- mark_inode_dirty(inode);
-
- brelse(dibh);
-
-out:
- return error;
-}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
- unsigned int *data_blocks, unsigned int *ind_blocks)
-{
- const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
- unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
-
- for (tmp = max_data; tmp > sdp->sd_diptrs;) {
- tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
- max_data -= tmp;
- }
- /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
- so it might end up with fewer data blocks */
- if (max_data <= *data_blocks)
- return;
- *data_blocks = max_data;
- *ind_blocks = max_blocks - max_data;
- *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
- if (*len > max) {
- *len = max;
- gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
- }
-}
-
-static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
- loff_t len)
-{
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_inode *ip = GFS2_I(inode);
- unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
- loff_t bytes, max_bytes;
- struct gfs2_alloc *al;
- int error;
- loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
- next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
-
- /* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode && (mode != FALLOC_FL_KEEP_SIZE))
- return -EOPNOTSUPP;
-
- offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
- sdp->sd_sb.sb_bsize_shift;
-
- len = next - offset;
- bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
- if (!bytes)
- bytes = UINT_MAX;
-
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
- error = gfs2_glock_nq(&ip->i_gh);
- if (unlikely(error))
- goto out_uninit;
-
- if (!gfs2_write_alloc_required(ip, offset, len))
- goto out_unlock;
-
- while (len > 0) {
- if (len < bytes)
- bytes = len;
- al = gfs2_alloc_get(ip);
- if (!al) {
- error = -ENOMEM;
- goto out_unlock;
- }
-
- error = gfs2_quota_lock_check(ip);
- if (error)
- goto out_alloc_put;
-
-retry:
- gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
-
- al->al_requested = data_blocks + ind_blocks;
- error = gfs2_inplace_reserve(ip);
- if (error) {
- if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
- bytes >>= 1;
- goto retry;
- }
- goto out_qunlock;
- }
- max_bytes = bytes;
- calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
- al->al_requested = data_blocks + ind_blocks;
-
- rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
- RES_RG_HDR + gfs2_rg_blocks(al);
- if (gfs2_is_jdata(ip))
- rblocks += data_blocks ? data_blocks : 1;
-
- error = gfs2_trans_begin(sdp, rblocks,
- PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
- if (error)
- goto out_trans_fail;
-
- error = fallocate_chunk(inode, offset, max_bytes, mode);
- gfs2_trans_end(sdp);
-
- if (error)
- goto out_trans_fail;
-
- len -= max_bytes;
- offset += max_bytes;
- gfs2_inplace_release(ip);
- gfs2_quota_unlock(ip);
- gfs2_alloc_put(ip);
- }
- goto out_unlock;
-
-out_trans_fail:
- gfs2_inplace_release(ip);
-out_qunlock:
- gfs2_quota_unlock(ip);
-out_alloc_put:
- gfs2_alloc_put(ip);
-out_unlock:
- gfs2_glock_dq(&ip->i_gh);
-out_uninit:
- gfs2_holder_uninit(&ip->i_gh);
- return error;
-}
-
-
static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
@@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = {
.getxattr = gfs2_getxattr,
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
- .fallocate = gfs2_fallocate,
.fiemap = gfs2_fiemap,
};
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 56f0da1..1ae35ba 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -281,7 +281,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_size != i_size_read(inode)) {
error = vmtruncate(inode, attr->ia_size);
if (error)
- return error;
+ goto out_unlock;
}
setattr_copy(inode, attr);
diff --git a/fs/internal.h b/fs/internal.h
index 9687c2e..0663568 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,6 +70,10 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
extern void release_mounts(struct list_head *);
extern void umount_tree(struct vfsmount *, int, struct list_head *);
extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+extern int finish_automount(struct vfsmount *, struct path *);
+
+extern void mnt_make_longterm(struct vfsmount *);
+extern void mnt_make_shortterm(struct vfsmount *);
extern void __init mnt_init(void);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d6cc164..a59635e 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -86,7 +86,7 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
u64 phys, u64 len, u32 flags)
{
struct fiemap_extent extent;
- struct fiemap_extent *dest = fieinfo->fi_extents_start;
+ struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
/* only count the extents */
if (fieinfo->fi_extents_max == 0) {
@@ -173,6 +173,7 @@ static int fiemap_check_ranges(struct super_block *sb,
static int ioctl_fiemap(struct file *filp, unsigned long arg)
{
struct fiemap fiemap;
+ struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
struct fiemap_extent_info fieinfo = { 0, };
struct inode *inode = filp->f_path.dentry->d_inode;
struct super_block *sb = inode->i_sb;
@@ -182,8 +183,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
if (!inode->i_op->fiemap)
return -EOPNOTSUPP;
- if (copy_from_user(&fiemap, (struct fiemap __user *)arg,
- sizeof(struct fiemap)))
+ if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
return -EFAULT;
if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
@@ -196,7 +196,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
fieinfo.fi_flags = fiemap.fm_flags;
fieinfo.fi_extents_max = fiemap.fm_extent_count;
- fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
+ fieinfo.fi_extents_start = ufiemap->fm_extents;
if (fiemap.fm_extent_count != 0 &&
!access_ok(VERIFY_WRITE, fieinfo.fi_extents_start,
@@ -209,7 +209,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len);
fiemap.fm_flags = fieinfo.fi_flags;
fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
- if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
+ if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
error = -EFAULT;
return error;
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 85c6be2..3005ec4 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -336,14 +336,13 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
size = sizeof(struct jffs2_eraseblock) * c->nr_blocks;
#ifndef __ECOS
if (jffs2_blocks_use_vmalloc(c))
- c->blocks = vmalloc(size);
+ c->blocks = vzalloc(size);
else
#endif
- c->blocks = kmalloc(size, GFP_KERNEL);
+ c->blocks = kzalloc(size, GFP_KERNEL);
if (!c->blocks)
return -ENOMEM;
- memset(c->blocks, 0, size);
for (i=0; i<c->nr_blocks; i++) {
INIT_LIST_HEAD(&c->blocks[i].list);
c->blocks[i].offset = i * c->sector_size;
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index f864005..0bc6a6c 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -144,4 +144,4 @@ struct jffs2_sb_info {
void *os_priv;
};
-#endif /* _JFFS2_FB_SB */
+#endif /* _JFFS2_FS_SB */
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 9b572ca..4f9cc04 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -151,7 +151,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
offset, je32_to_cpu(rx.hdr_crc), crc);
xd->flags |= JFFS2_XFLAGS_INVALID;
- return EIO;
+ return -EIO;
}
totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len));
if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK
@@ -167,7 +167,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
je32_to_cpu(rx.xid), xd->xid,
je32_to_cpu(rx.version), xd->version);
xd->flags |= JFFS2_XFLAGS_INVALID;
- return EIO;
+ return -EIO;
}
xd->xprefix = rx.xprefix;
xd->name_len = rx.name_len;
@@ -230,7 +230,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum
ref_offset(xd->node), xd->data_crc, crc);
kfree(data);
xd->flags |= JFFS2_XFLAGS_INVALID;
- return EIO;
+ return -EIO;
}
xd->flags |= JFFS2_XFLAGS_HOT;
@@ -268,7 +268,7 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
if (xd->xname)
return 0;
if (xd->flags & JFFS2_XFLAGS_INVALID)
- return EIO;
+ return -EIO;
if (unlikely(is_xattr_datum_unchecked(c, xd)))
rc = do_verify_xattr_datum(c, xd);
if (!rc)
@@ -460,7 +460,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
if (crc != je32_to_cpu(rr.node_crc)) {
JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
offset, je32_to_cpu(rr.node_crc), crc);
- return EIO;
+ return -EIO;
}
if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK
|| je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF
@@ -470,7 +470,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK,
je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF,
je32_to_cpu(rr.totlen), PAD(sizeof(rr)));
- return EIO;
+ return -EIO;
}
ref->ino = je32_to_cpu(rr.ino);
ref->xid = je32_to_cpu(rr.xid);
diff --git a/fs/namei.c b/fs/namei.c
index 8df7a78..7d77f24 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -368,18 +368,6 @@ void path_get(struct path *path)
EXPORT_SYMBOL(path_get);
/**
- * path_get_long - get a long reference to a path
- * @path: path to get the reference to
- *
- * Given a path increment the reference count to the dentry and the vfsmount.
- */
-void path_get_long(struct path *path)
-{
- mntget_long(path->mnt);
- dget(path->dentry);
-}
-
-/**
* path_put - put a reference to a path
* @path: path to put the reference to
*
@@ -393,18 +381,6 @@ void path_put(struct path *path)
EXPORT_SYMBOL(path_put);
/**
- * path_put_long - put a long reference to a path
- * @path: path to put the reference to
- *
- * Given a path decrement the reference count to the dentry and the vfsmount.
- */
-void path_put_long(struct path *path)
-{
- dput(path->dentry);
- mntput_long(path->mnt);
-}
-
-/**
* nameidata_drop_rcu - drop this nameidata out of rcu-walk
* @nd: nameidata pathwalk data to drop
* Returns: 0 on success, -ECHILD on failure
@@ -800,12 +776,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
touch_atime(link->mnt, dentry);
nd_set_link(nd, NULL);
- if (link->mnt != nd->path.mnt) {
- path_to_nameidata(link, nd);
- nd->inode = nd->path.dentry->d_inode;
- dget(dentry);
- }
- mntget(link->mnt);
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -896,54 +868,148 @@ int follow_up(struct path *path)
}
/*
- * serialization is taken care of in namespace.c
+ * Perform an automount
+ * - return -EISDIR to tell follow_managed() to stop and return the path we
+ * were called with.
*/
-static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
- struct inode **inode)
+static int follow_automount(struct path *path, unsigned flags,
+ bool *need_mntput)
{
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted;
- mounted = __lookup_mnt(path->mnt, path->dentry, 1);
- if (!mounted)
- return;
- path->mnt = mounted;
- path->dentry = mounted->mnt_root;
- nd->seq = read_seqcount_begin(&path->dentry->d_seq);
- *inode = path->dentry->d_inode;
+ struct vfsmount *mnt;
+ int err;
+
+ if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
+ return -EREMOTE;
+
+ /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
+ * and this is the terminal part of the path.
+ */
+ if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE))
+ return -EISDIR; /* we actually want to stop here */
+
+ /* We want to mount if someone is trying to open/create a file of any
+ * type under the mountpoint, wants to traverse through the mountpoint
+ * or wants to open the mounted directory.
+ *
+ * We don't want to mount if someone's just doing a stat and they've
+ * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
+ * appended a '/' to the name.
+ */
+ if (!(flags & LOOKUP_FOLLOW) &&
+ !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY |
+ LOOKUP_OPEN | LOOKUP_CREATE)))
+ return -EISDIR;
+
+ current->total_link_count++;
+ if (current->total_link_count >= 40)
+ return -ELOOP;
+
+ mnt = path->dentry->d_op->d_automount(path);
+ if (IS_ERR(mnt)) {
+ /*
+ * The filesystem is allowed to return -EISDIR here to indicate
+ * it doesn't want to automount. For instance, autofs would do
+ * this so that its userspace daemon can mount on this dentry.
+ *
+ * However, we can only permit this if it's a terminal point in
+ * the path being looked up; if it wasn't then the remainder of
+ * the path is inaccessible and we should say so.
+ */
+ if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE))
+ return -EREMOTE;
+ return PTR_ERR(mnt);
}
-}
-static int __follow_mount(struct path *path)
-{
- int res = 0;
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted = lookup_mnt(path);
- if (!mounted)
- break;
+ if (!mnt) /* mount collision */
+ return 0;
+
+ err = finish_automount(mnt, path);
+
+ switch (err) {
+ case -EBUSY:
+ /* Someone else made a mount here whilst we were busy */
+ return 0;
+ case 0:
dput(path->dentry);
- if (res)
+ if (*need_mntput)
mntput(path->mnt);
- path->mnt = mounted;
- path->dentry = dget(mounted->mnt_root);
- res = 1;
+ path->mnt = mnt;
+ path->dentry = dget(mnt->mnt_root);
+ *need_mntput = true;
+ return 0;
+ default:
+ return err;
}
- return res;
+
}
-static void follow_mount(struct path *path)
+/*
+ * Handle a dentry that is managed in some way.
+ * - Flagged for transit management (autofs)
+ * - Flagged as mountpoint
+ * - Flagged as automount point
+ *
+ * This may only be called in refwalk mode.
+ *
+ * Serialization is taken care of in namespace.c
+ */
+static int follow_managed(struct path *path, unsigned flags)
{
- while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted = lookup_mnt(path);
- if (!mounted)
- break;
- dput(path->dentry);
- mntput(path->mnt);
- path->mnt = mounted;
- path->dentry = dget(mounted->mnt_root);
+ unsigned managed;
+ bool need_mntput = false;
+ int ret;
+
+ /* Given that we're not holding a lock here, we retain the value in a
+ * local variable for each dentry as we look at it so that we don't see
+ * the components of that value change under us */
+ while (managed = ACCESS_ONCE(path->dentry->d_flags),
+ managed &= DCACHE_MANAGED_DENTRY,
+ unlikely(managed != 0)) {
+ /* Allow the filesystem to manage the transit without i_mutex
+ * being held. */
+ if (managed & DCACHE_MANAGE_TRANSIT) {
+ BUG_ON(!path->dentry->d_op);
+ BUG_ON(!path->dentry->d_op->d_manage);
+ ret = path->dentry->d_op->d_manage(path->dentry,
+ false, false);
+ if (ret < 0)
+ return ret == -EISDIR ? 0 : ret;
+ }
+
+ /* Transit to a mounted filesystem. */
+ if (managed & DCACHE_MOUNTED) {
+ struct vfsmount *mounted = lookup_mnt(path);
+ if (mounted) {
+ dput(path->dentry);
+ if (need_mntput)
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
+ need_mntput = true;
+ continue;
+ }
+
+ /* Something is mounted on this dentry in another
+ * namespace and/or whatever was mounted there in this
+ * namespace got unmounted before we managed to get the
+ * vfsmount_lock */
+ }
+
+ /* Handle an automount point */
+ if (managed & DCACHE_NEED_AUTOMOUNT) {
+ ret = follow_automount(path, flags, &need_mntput);
+ if (ret < 0)
+ return ret == -EISDIR ? 0 : ret;
+ continue;
+ }
+
+ /* We didn't change the current path point */
+ break;
}
+ return 0;
}
-int follow_down(struct path *path)
+int follow_down_one(struct path *path)
{
struct vfsmount *mounted;
@@ -958,13 +1024,41 @@ int follow_down(struct path *path)
return 0;
}
+/*
+ * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
+ * meet a managed dentry and we're not walking to "..". True is returned to
+ * continue, false to abort.
+ */
+static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
+ struct inode **inode, bool reverse_transit)
+{
+ while (d_mountpoint(path->dentry)) {
+ struct vfsmount *mounted;
+ if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
+ !reverse_transit &&
+ path->dentry->d_op->d_manage(path->dentry, false, true) < 0)
+ return false;
+ mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+ if (!mounted)
+ break;
+ path->mnt = mounted;
+ path->dentry = mounted->mnt_root;
+ nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+ *inode = path->dentry->d_inode;
+ }
+
+ if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
+ return reverse_transit;
+ return true;
+}
+
static int follow_dotdot_rcu(struct nameidata *nd)
{
struct inode *inode = nd->inode;
set_root_rcu(nd);
- while(1) {
+ while (1) {
if (nd->path.dentry == nd->root.dentry &&
nd->path.mnt == nd->root.mnt) {
break;
@@ -987,12 +1081,80 @@ static int follow_dotdot_rcu(struct nameidata *nd)
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
inode = nd->path.dentry->d_inode;
}
- __follow_mount_rcu(nd, &nd->path, &inode);
+ __follow_mount_rcu(nd, &nd->path, &inode, true);
nd->inode = inode;
return 0;
}
+/*
+ * Follow down to the covering mount currently visible to userspace. At each
+ * point, the filesystem owning that dentry may be queried as to whether the
+ * caller is permitted to proceed or not.
+ *
+ * Care must be taken as namespace_sem may be held (indicated by mounting_here
+ * being true).
+ */
+int follow_down(struct path *path, bool mounting_here)
+{
+ unsigned managed;
+ int ret;
+
+ while (managed = ACCESS_ONCE(path->dentry->d_flags),
+ unlikely(managed & DCACHE_MANAGED_DENTRY)) {
+ /* Allow the filesystem to manage the transit without i_mutex
+ * being held.
+ *
+ * We indicate to the filesystem if someone is trying to mount
+ * something here. This gives autofs the chance to deny anyone
+ * other than its daemon the right to mount on its
+ * superstructure.
+ *
+ * The filesystem may sleep at this point.
+ */
+ if (managed & DCACHE_MANAGE_TRANSIT) {
+ BUG_ON(!path->dentry->d_op);
+ BUG_ON(!path->dentry->d_op->d_manage);
+ ret = path->dentry->d_op->d_manage(
+ path->dentry, mounting_here, false);
+ if (ret < 0)
+ return ret == -EISDIR ? 0 : ret;
+ }
+
+ /* Transit to a mounted filesystem. */
+ if (managed & DCACHE_MOUNTED) {
+ struct vfsmount *mounted = lookup_mnt(path);
+ if (!mounted)
+ break;
+ dput(path->dentry);
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
+ continue;
+ }
+
+ /* Don't handle automount points here */
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
+ */
+static void follow_mount(struct path *path)
+{
+ while (d_mountpoint(path->dentry)) {
+ struct vfsmount *mounted = lookup_mnt(path);
+ if (!mounted)
+ break;
+ dput(path->dentry);
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
+ }
+}
+
static void follow_dotdot(struct nameidata *nd)
{
set_root(nd);
@@ -1057,12 +1219,14 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
struct inode *dir;
+ int err;
+
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- int err = parent->d_op->d_hash(parent, nd->inode, name);
+ err = parent->d_op->d_hash(parent, nd->inode, name);
if (err < 0)
return err;
}
@@ -1089,22 +1253,30 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
nd->seq = seq;
if (dentry->d_flags & DCACHE_OP_REVALIDATE)
goto need_revalidate;
+done2:
path->mnt = mnt;
path->dentry = dentry;
- __follow_mount_rcu(nd, path, inode);
- } else {
- dentry = __d_lookup(parent, name);
- if (!dentry)
- goto need_lookup;
+ if (likely(__follow_mount_rcu(nd, path, inode, false)))
+ return 0;
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ /* fallthru */
+ }
+ dentry = __d_lookup(parent, name);
+ if (!dentry)
+ goto need_lookup;
found:
- if (dentry->d_flags & DCACHE_OP_REVALIDATE)
- goto need_revalidate;
+ if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+ goto need_revalidate;
done:
- path->mnt = mnt;
- path->dentry = dentry;
- __follow_mount(path);
- *inode = path->dentry->d_inode;
- }
+ path->mnt = mnt;
+ path->dentry = dentry;
+ err = follow_managed(path, nd->flags);
+ if (unlikely(err < 0)) {
+ path_put_conditional(path, nd);
+ return err;
+ }
+ *inode = path->dentry->d_inode;
return 0;
need_lookup:
@@ -1143,6 +1315,8 @@ need_revalidate:
goto need_lookup;
if (IS_ERR(dentry))
goto fail;
+ if (nd->flags & LOOKUP_RCU)
+ goto done2;
goto done;
fail:
@@ -1150,17 +1324,6 @@ fail:
}
/*
- * This is a temporary kludge to deal with "automount" symlinks; proper
- * solution is to trigger them on follow_mount(), so that do_lookup()
- * would DTRT. To be killed before 2.6.34-final.
- */
-static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
-{
- return inode && unlikely(inode->i_op->follow_link) &&
- ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
-}
-
-/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
* the final dentry. We expect 'base' to be positive and a directory.
@@ -1298,7 +1461,8 @@ last_component:
err = do_lookup(nd, &this, &next, &inode);
if (err)
break;
- if (follow_on_final(inode, lookup_flags)) {
+ if (inode && unlikely(inode->i_op->follow_link) &&
+ (lookup_flags & LOOKUP_FOLLOW)) {
if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
return -ECHILD;
BUG_ON(inode != next.dentry->d_inode);
@@ -2200,11 +2364,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (open_flag & O_EXCL)
goto exit_dput;
- if (__follow_mount(path)) {
- error = -ELOOP;
- if (open_flag & O_NOFOLLOW)
- goto exit_dput;
- }
+ error = follow_managed(path, nd->flags);
+ if (error < 0)
+ goto exit_dput;
error = -ENOENT;
if (!path->dentry->d_inode)
@@ -2353,8 +2515,7 @@ reval:
struct inode *linki = link.dentry->d_inode;
void *cookie;
error = -ELOOP;
- /* S_ISDIR part is a temporary automount kludge */
- if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(linki->i_mode))
+ if (!(nd.flags & LOOKUP_FOLLOW))
goto exit_dput;
if (count++ == 32)
goto exit_dput;
@@ -3413,6 +3574,7 @@ const struct inode_operations page_symlink_inode_operations = {
};
EXPORT_SYMBOL(user_path_at);
+EXPORT_SYMBOL(follow_down_one);
EXPORT_SYMBOL(follow_down);
EXPORT_SYMBOL(follow_up);
EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
diff --git a/fs/namespace.c b/fs/namespace.c
index 3ddfd90..7b0b953 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt)
unsigned int mnt_get_count(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- unsigned int count = atomic_read(&mnt->mnt_longrefs);
+ unsigned int count = 0;
int cpu;
for_each_possible_cpu(cpu) {
@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
if (!mnt->mnt_pcp)
goto out_free_devname;
- atomic_set(&mnt->mnt_longrefs, 1);
+ this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
#else
mnt->mnt_count = 1;
mnt->mnt_writers = 0;
@@ -611,6 +611,21 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
}
+static inline void __mnt_make_longterm(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ atomic_inc(&mnt->mnt_longterm);
+#endif
+}
+
+/* needs vfsmount lock for write */
+static inline void __mnt_make_shortterm(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ atomic_dec(&mnt->mnt_longterm);
+#endif
+}
+
/*
* vfsmount lock must be held for write
*/
@@ -624,8 +639,11 @@ static void commit_tree(struct vfsmount *mnt)
BUG_ON(parent == mnt);
list_add_tail(&head, &mnt->mnt_list);
- list_for_each_entry(m, &head, mnt_list)
+ list_for_each_entry(m, &head, mnt_list) {
m->mnt_ns = n;
+ __mnt_make_longterm(m);
+ }
+
list_splice(&head, n->list.prev);
list_add_tail(&mnt->mnt_hash, mount_hashtable +
@@ -734,51 +752,30 @@ static inline void mntfree(struct vfsmount *mnt)
deactivate_super(sb);
}
-#ifdef CONFIG_SMP
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
+static void mntput_no_expire(struct vfsmount *mnt)
{
- if (!longrefs) {
put_again:
- br_read_lock(vfsmount_lock);
- if (likely(atomic_read(&mnt->mnt_longrefs))) {
- mnt_dec_count(mnt);
- br_read_unlock(vfsmount_lock);
- return;
- }
+#ifdef CONFIG_SMP
+ br_read_lock(vfsmount_lock);
+ if (likely(atomic_read(&mnt->mnt_longterm))) {
+ mnt_dec_count(mnt);
br_read_unlock(vfsmount_lock);
- } else {
- BUG_ON(!atomic_read(&mnt->mnt_longrefs));
- if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
- return;
+ return;
}
+ br_read_unlock(vfsmount_lock);
br_write_lock(vfsmount_lock);
- if (!longrefs)
- mnt_dec_count(mnt);
- else
- atomic_dec(&mnt->mnt_longrefs);
+ mnt_dec_count(mnt);
if (mnt_get_count(mnt)) {
br_write_unlock(vfsmount_lock);
return;
}
- if (unlikely(mnt->mnt_pinned)) {
- mnt_add_count(mnt, mnt->mnt_pinned + 1);
- mnt->mnt_pinned = 0;
- br_write_unlock(vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto put_again;
- }
- br_write_unlock(vfsmount_lock);
- mntfree(mnt);
-}
#else
-static inline void __mntput(struct vfsmount *mnt, int longrefs)
-{
-put_again:
mnt_dec_count(mnt);
if (likely(mnt_get_count(mnt)))
return;
br_write_lock(vfsmount_lock);
+#endif
if (unlikely(mnt->mnt_pinned)) {
mnt_add_count(mnt, mnt->mnt_pinned + 1);
mnt->mnt_pinned = 0;
@@ -789,12 +786,6 @@ put_again:
br_write_unlock(vfsmount_lock);
mntfree(mnt);
}
-#endif
-
-static void mntput_no_expire(struct vfsmount *mnt)
-{
- __mntput(mnt, 0);
-}
void mntput(struct vfsmount *mnt)
{
@@ -802,7 +793,7 @@ void mntput(struct vfsmount *mnt)
/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
if (unlikely(mnt->mnt_expiry_mark))
mnt->mnt_expiry_mark = 0;
- __mntput(mnt, 0);
+ mntput_no_expire(mnt);
}
}
EXPORT_SYMBOL(mntput);
@@ -815,33 +806,6 @@ struct vfsmount *mntget(struct vfsmount *mnt)
}
EXPORT_SYMBOL(mntget);
-void mntput_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- if (mnt) {
- /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
- if (unlikely(mnt->mnt_expiry_mark))
- mnt->mnt_expiry_mark = 0;
- __mntput(mnt, 1);
- }
-#else
- mntput(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntput_long);
-
-struct vfsmount *mntget_long(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
- if (mnt)
- atomic_inc(&mnt->mnt_longrefs);
- return mnt;
-#else
- return mntget(mnt);
-#endif
-}
-EXPORT_SYMBOL(mntget_long);
-
void mnt_pin(struct vfsmount *mnt)
{
br_write_lock(vfsmount_lock);
@@ -1216,7 +1180,7 @@ void release_mounts(struct list_head *head)
dput(dentry);
mntput(m);
}
- mntput_long(mnt);
+ mntput(mnt);
}
}
@@ -1226,19 +1190,21 @@ void release_mounts(struct list_head *head)
*/
void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
{
+ LIST_HEAD(tmp_list);
struct vfsmount *p;
for (p = mnt; p; p = next_mnt(p, mnt))
- list_move(&p->mnt_hash, kill);
+ list_move(&p->mnt_hash, &tmp_list);
if (propagate)
- propagate_umount(kill);
+ propagate_umount(&tmp_list);
- list_for_each_entry(p, kill, mnt_hash) {
+ list_for_each_entry(p, &tmp_list, mnt_hash) {
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
+ __mnt_make_shortterm(p);
list_del_init(&p->mnt_child);
if (p->mnt_parent != p) {
p->mnt_parent->mnt_ghosts++;
@@ -1246,6 +1212,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
}
change_mnt_propagation(p, MS_PRIVATE);
}
+ list_splice(&tmp_list, kill);
}
static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
@@ -1844,9 +1811,10 @@ static int do_move_mount(struct path *path, char *old_name)
return err;
down_write(&namespace_sem);
- while (d_mountpoint(path->dentry) &&
- follow_down(path))
- ;
+ err = follow_down(path, true);
+ if (err < 0)
+ goto out;
+
err = -EINVAL;
if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
goto out;
@@ -1904,6 +1872,8 @@ out:
return err;
}
+static int do_add_mount(struct vfsmount *, struct path *, int);
+
/*
* create a new mount for userspace and request it to be added into the
* namespace's tree
@@ -1912,6 +1882,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
int mnt_flags, char *name, void *data)
{
struct vfsmount *mnt;
+ int err;
if (!type)
return -EINVAL;
@@ -1924,15 +1895,47 @@ static int do_new_mount(struct path *path, char *type, int flags,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- return do_add_mount(mnt, path, mnt_flags, NULL);
+ err = do_add_mount(mnt, path, mnt_flags);
+ if (err)
+ mntput(mnt);
+ return err;
+}
+
+int finish_automount(struct vfsmount *m, struct path *path)
+{
+ int err;
+ /* The new mount record should have at least 2 refs to prevent it being
+ * expired before we get a chance to add it
+ */
+ BUG_ON(mnt_get_count(m) < 2);
+
+ if (m->mnt_sb == path->mnt->mnt_sb &&
+ m->mnt_root == path->dentry) {
+ err = -ELOOP;
+ goto fail;
+ }
+
+ err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
+ if (!err)
+ return 0;
+fail:
+ /* remove m from any expiration list it may be on */
+ if (!list_empty(&m->mnt_expire)) {
+ down_write(&namespace_sem);
+ br_write_lock(vfsmount_lock);
+ list_del_init(&m->mnt_expire);
+ br_write_unlock(vfsmount_lock);
+ up_write(&namespace_sem);
+ }
+ mntput(m);
+ mntput(m);
+ return err;
}
/*
* add a mount into a namespace's mount tree
- * - provide the option of adding the new mount to an expiration list
*/
-int do_add_mount(struct vfsmount *newmnt, struct path *path,
- int mnt_flags, struct list_head *fslist)
+static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
{
int err;
@@ -1940,9 +1943,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
down_write(&namespace_sem);
/* Something was mounted here while we slept */
- while (d_mountpoint(path->dentry) &&
- follow_down(path))
- ;
+ err = follow_down(path, true);
+ if (err < 0)
+ goto unlock;
+
err = -EINVAL;
if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
goto unlock;
@@ -1958,22 +1962,29 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
goto unlock;
newmnt->mnt_flags = mnt_flags;
- if ((err = graft_tree(newmnt, path)))
- goto unlock;
-
- if (fslist) /* add to the specified expiration list */
- list_add_tail(&newmnt->mnt_expire, fslist);
-
- up_write(&namespace_sem);
- return 0;
+ err = graft_tree(newmnt, path);
unlock:
up_write(&namespace_sem);
- mntput_long(newmnt);
return err;
}
-EXPORT_SYMBOL_GPL(do_add_mount);
+/**
+ * mnt_set_expiry - Put a mount on an expiration list
+ * @mnt: The mount to list.
+ * @expiry_list: The list to add the mount to.
+ */
+void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
+{
+ down_write(&namespace_sem);
+ br_write_lock(vfsmount_lock);
+
+ list_add_tail(&mnt->mnt_expire, expiry_list);
+
+ br_write_unlock(vfsmount_lock);
+ up_write(&namespace_sem);
+}
+EXPORT_SYMBOL(mnt_set_expiry);
/*
* process a list of expirable mountpoints with the intent of discarding any
@@ -2262,6 +2273,22 @@ static struct mnt_namespace *alloc_mnt_ns(void)
return new_ns;
}
+void mnt_make_longterm(struct vfsmount *mnt)
+{
+ __mnt_make_longterm(mnt);
+}
+
+void mnt_make_shortterm(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
+ return;
+ br_write_lock(vfsmount_lock);
+ atomic_dec(&mnt->mnt_longterm);
+ br_write_unlock(vfsmount_lock);
+#endif
+}
+
/*
* Allocate a new namespace structure and populate it with contents
* copied from the namespace of the passed in task structure.
@@ -2299,14 +2326,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
q = new_ns->root;
while (p) {
q->mnt_ns = new_ns;
+ __mnt_make_longterm(q);
if (fs) {
if (p == fs->root.mnt) {
+ fs->root.mnt = mntget(q);
+ __mnt_make_longterm(q);
+ mnt_make_shortterm(p);
rootmnt = p;
- fs->root.mnt = mntget_long(q);
}
if (p == fs->pwd.mnt) {
+ fs->pwd.mnt = mntget(q);
+ __mnt_make_longterm(q);
+ mnt_make_shortterm(p);
pwdmnt = p;
- fs->pwd.mnt = mntget_long(q);
}
}
p = next_mnt(p, mnt_ns->root);
@@ -2315,9 +2347,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
up_write(&namespace_sem);
if (rootmnt)
- mntput_long(rootmnt);
+ mntput(rootmnt);
if (pwdmnt)
- mntput_long(pwdmnt);
+ mntput(pwdmnt);
return new_ns;
}
@@ -2350,6 +2382,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
new_ns = alloc_mnt_ns();
if (!IS_ERR(new_ns)) {
mnt->mnt_ns = new_ns;
+ __mnt_make_longterm(mnt);
new_ns->root = mnt;
list_add(&new_ns->list, &new_ns->root->mnt_list);
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index df8c03a..2c3eb33 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -970,7 +970,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
{
struct nfs_server *server = NFS_SERVER(inode);
- if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags))
+ if (IS_AUTOMOUNT(inode))
return 0;
if (nd != NULL) {
/* VFS wants an on-the-wire revalidation */
@@ -1173,6 +1173,7 @@ const struct dentry_operations nfs_dentry_operations = {
.d_revalidate = nfs_lookup_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
+ .d_automount = nfs_d_automount,
};
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
@@ -1246,6 +1247,7 @@ const struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs_open_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
+ .d_automount = nfs_d_automount,
};
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ce00b70..d851242 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -300,7 +300,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
else
inode->i_op = &nfs_mountpoint_inode_operations;
inode->i_fop = NULL;
- set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags);
+ inode->i_flags |= S_AUTOMOUNT;
}
} else if (S_ISLNK(inode->i_mode))
inode->i_op = &nfs_symlink_inode_operations;
@@ -1208,7 +1208,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
/* Update the fsid? */
if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
- !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
+ !IS_AUTOMOUNT(inode))
server->fsid = fattr->fsid;
/*
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bfa3a34..4644f04 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -252,6 +252,7 @@ extern char *nfs_path(const char *base,
const struct dentry *droot,
const struct dentry *dentry,
char *buffer, ssize_t buflen);
+extern struct vfsmount *nfs_d_automount(struct path *path);
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 74aaf39..f32b860 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -97,9 +97,8 @@ Elong:
}
/*
- * nfs_follow_mountpoint - handle crossing a mountpoint on the server
- * @dentry - dentry of mountpoint
- * @nd - nameidata info
+ * nfs_d_automount - Handle crossing a mountpoint on the server
+ * @path - The mountpoint
*
* When we encounter a mountpoint on the server, we want to set up
* a mountpoint on the client too, to prevent inode numbers from
@@ -109,87 +108,65 @@ Elong:
* situation, and that different filesystems may want to use
* different security flavours.
*/
-static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
+struct vfsmount *nfs_d_automount(struct path *path)
{
struct vfsmount *mnt;
- struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+ struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
struct dentry *parent;
struct nfs_fh *fh = NULL;
struct nfs_fattr *fattr = NULL;
int err;
- dprintk("--> nfs_follow_mountpoint()\n");
+ dprintk("--> nfs_d_automount()\n");
- err = -ESTALE;
- if (IS_ROOT(dentry))
- goto out_err;
+ mnt = ERR_PTR(-ESTALE);
+ if (IS_ROOT(path->dentry))
+ goto out_nofree;
- err = -ENOMEM;
+ mnt = ERR_PTR(-ENOMEM);
fh = nfs_alloc_fhandle();
fattr = nfs_alloc_fattr();
if (fh == NULL || fattr == NULL)
- goto out_err;
+ goto out;
dprintk("%s: enter\n", __func__);
- dput(nd->path.dentry);
- nd->path.dentry = dget(dentry);
- /* Look it up again */
- parent = dget_parent(nd->path.dentry);
+ /* Look it up again to get its attributes */
+ parent = dget_parent(path->dentry);
err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
- &nd->path.dentry->d_name,
+ &path->dentry->d_name,
fh, fattr);
dput(parent);
- if (err != 0)
- goto out_err;
+ if (err != 0) {
+ mnt = ERR_PTR(err);
+ goto out;
+ }
if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
- mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
+ mnt = nfs_do_refmount(path->mnt, path->dentry);
else
- mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh,
- fattr);
- err = PTR_ERR(mnt);
+ mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr);
if (IS_ERR(mnt))
- goto out_err;
+ goto out;
- mntget(mnt);
- err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
- &nfs_automount_list);
- if (err < 0) {
- mntput(mnt);
- if (err == -EBUSY)
- goto out_follow;
- goto out_err;
- }
- path_put(&nd->path);
- nd->path.mnt = mnt;
- nd->path.dentry = dget(mnt->mnt_root);
+ dprintk("%s: done, success\n", __func__);
+ mntget(mnt); /* prevent immediate expiration */
+ mnt_set_expiry(mnt, &nfs_automount_list);
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+
out:
nfs_free_fattr(fattr);
nfs_free_fhandle(fh);
- dprintk("%s: done, returned %d\n", __func__, err);
-
- dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
- return ERR_PTR(err);
-out_err:
- path_put(&nd->path);
- goto out;
-out_follow:
- while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path))
- ;
- err = 0;
- goto out;
+out_nofree:
+ dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt);
+ return mnt;
}
const struct inode_operations nfs_mountpoint_inode_operations = {
- .follow_link = nfs_follow_mountpoint,
.getattr = nfs_getattr,
};
const struct inode_operations nfs_referral_inode_operations = {
- .follow_link = nfs_follow_mountpoint,
};
static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a3c7f70..641117f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -87,8 +87,9 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
.dentry = dget(dentry)};
int err = 0;
- while (d_mountpoint(path.dentry) && follow_down(&path))
- ;
+ err = follow_down(&path, false);
+ if (err < 0)
+ goto out;
exp2 = rqst_exp_get_by_name(rqstp, &path);
if (IS_ERR(exp2)) {
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index ab152c0..77a8de5 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -1,7 +1,6 @@
config OCFS2_FS
tristate "OCFS2 file system support"
- depends on NET && SYSFS
- select CONFIGFS_FS
+ depends on NET && SYSFS && CONFIGFS_FS
select JBD2
select CRC32
select QUOTA
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 63e3fca..a665195 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1989,20 +1989,20 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
}
-static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
+static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
loff_t len)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_space_resv sr;
int change_size = 1;
int cmd = OCFS2_IOC_RESVSP64;
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
if (!ocfs2_writes_unwritten_extents(osb))
return -EOPNOTSUPP;
- if (S_ISDIR(inode->i_mode))
- return -ENODEV;
-
if (mode & FALLOC_FL_KEEP_SIZE)
change_size = 0;
@@ -2610,7 +2610,6 @@ const struct inode_operations ocfs2_file_iops = {
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
- .fallocate = ocfs2_fallocate,
.fiemap = ocfs2_fiemap,
};
@@ -2642,6 +2641,7 @@ const struct file_operations ocfs2_fops = {
.flock = ocfs2_flock,
.splice_read = ocfs2_file_splice_read,
.splice_write = ocfs2_file_splice_write,
+ .fallocate = ocfs2_fallocate,
};
const struct file_operations ocfs2_dops = {
diff --git a/fs/open.c b/fs/open.c
index 5b6ef7e..e52389e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -255,10 +255,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
return -EFBIG;
- if (!inode->i_op->fallocate)
+ if (!file->f_op->fallocate)
return -EOPNOTSUPP;
- return inode->i_op->fallocate(inode, mode, offset, len);
+ return file->f_op->fallocate(file, mode, offset, len);
}
SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
diff --git a/fs/pipe.c b/fs/pipe.c
index e2e95fb..89e9e19 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
static void __exit exit_pipe_fs(void)
{
unregister_filesystem(&pipe_fs_type);
- mntput_long(pipe_mnt);
+ mntput(pipe_mnt);
}
fs_initcall(init_pipe_fs);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index e5f63da..aa68a8a 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -29,7 +29,6 @@ config SQUASHFS
config SQUASHFS_XATTR
bool "Squashfs XATTR support"
depends on SQUASHFS
- default n
help
Saying Y here includes support for extended attributes (xattrs).
Xattrs are name:value pairs associated with inodes by
@@ -40,7 +39,6 @@ config SQUASHFS_XATTR
config SQUASHFS_LZO
bool "Include support for LZO compressed file systems"
depends on SQUASHFS
- default n
select LZO_DECOMPRESS
help
Saying Y here includes support for reading Squashfs file systems
@@ -53,10 +51,24 @@ config SQUASHFS_LZO
If unsure, say N.
+config SQUASHFS_XZ
+ bool "Include support for XZ compressed file systems"
+ depends on SQUASHFS
+ select XZ_DEC
+ help
+ Saying Y here includes support for reading Squashfs file systems
+ compressed with XZ compresssion. XZ gives better compression than
+ the default zlib compression, at the expense of greater CPU and
+ memory overhead.
+
+ XZ is not the standard compression used in Squashfs and so most
+ file systems will be readable without selecting this option.
+
+ If unsure, say N.
+
config SQUASHFS_EMBEDDED
bool "Additional option for memory-constrained systems"
depends on SQUASHFS
- default n
help
Saying Y here allows you to specify cache size.
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 7672bac..cecf2be 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -7,3 +7,4 @@ squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
+squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 653c030..2fb2882 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -34,7 +34,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "decompressor.h"
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 57314be..26b15ae 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -55,7 +55,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
/*
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 24af9ce..a5940e5 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -27,7 +27,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "decompressor.h"
#include "squashfs.h"
@@ -41,23 +40,26 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
};
#ifndef CONFIG_SQUASHFS_LZO
-static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = {
+static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
};
#endif
+#ifndef CONFIG_SQUASHFS_XZ
+static const struct squashfs_decompressor squashfs_xz_comp_ops = {
+ NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+};
+#endif
+
static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
NULL, NULL, NULL, 0, "unknown", 0
};
static const struct squashfs_decompressor *decompressor[] = {
&squashfs_zlib_comp_ops,
- &squashfs_lzma_unsupported_comp_ops,
-#ifdef CONFIG_SQUASHFS_LZO
&squashfs_lzo_comp_ops,
-#else
- &squashfs_lzo_unsupported_comp_ops,
-#endif
+ &squashfs_xz_comp_ops,
+ &squashfs_lzma_unsupported_comp_ops,
&squashfs_unknown_comp_ops
};
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 7425f80..3b305a7 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -52,4 +52,13 @@ static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
length, srclength, pages);
}
+
+#ifdef CONFIG_SQUASHFS_XZ
+extern const struct squashfs_decompressor squashfs_xz_comp_ops;
+#endif
+
+#ifdef CONFIG_SQUASHFS_LZO
+extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
+#endif
+
#endif
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index 7c90bbd..7eef571 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -39,7 +39,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
/*
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index b7f64bc..d8f3245 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -37,7 +37,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
/*
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 5d87789..7da759e 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -29,7 +29,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "decompressor.h"
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 5d45569..ba729d8 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -27,11 +27,6 @@
#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args)
-static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
-{
- return list_entry(inode, struct squashfs_inode_info, vfs_inode);
-}
-
/* block.c */
extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
int, int);
@@ -104,6 +99,3 @@ extern const struct xattr_handler *squashfs_xattr_handlers[];
/* zlib_wrapper.c */
extern const struct squashfs_decompressor squashfs_zlib_comp_ops;
-
-/* lzo_wrapper.c */
-extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index c5137fc..39533fe 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -238,6 +238,7 @@ struct meta_index {
#define ZLIB_COMPRESSION 1
#define LZMA_COMPRESSION 2
#define LZO_COMPRESSION 3
+#define XZ_COMPRESSION 4
struct squashfs_super_block {
__le32 s_magic;
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
index d3e3a37..359baef 100644
--- a/fs/squashfs/squashfs_fs_i.h
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -45,4 +45,10 @@ struct squashfs_inode_info {
};
struct inode vfs_inode;
};
+
+
+static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
+{
+ return list_entry(inode, struct squashfs_inode_info, vfs_inode);
+}
#endif
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index d33be5d..05385db 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -32,7 +32,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "xattr.h"
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
new file mode 100644
index 0000000..856756c
--- /dev/null
+++ b/fs/squashfs/xz_wrapper.c
@@ -0,0 +1,153 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * xz_wrapper.c
+ */
+
+
+#include <linux/mutex.h>
+#include <linux/buffer_head.h>
+#include <linux/slab.h>
+#include <linux/xz.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "decompressor.h"
+
+struct squashfs_xz {
+ struct xz_dec *state;
+ struct xz_buf buf;
+};
+
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk)
+{
+ int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+
+ struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+ if (stream == NULL)
+ goto failed;
+
+ stream->state = xz_dec_init(XZ_PREALLOC, block_size);
+ if (stream->state == NULL)
+ goto failed;
+
+ return stream;
+
+failed:
+ ERROR("Failed to allocate xz workspace\n");
+ kfree(stream);
+ return NULL;
+}
+
+
+static void squashfs_xz_free(void *strm)
+{
+ struct squashfs_xz *stream = strm;
+
+ if (stream) {
+ xz_dec_end(stream->state);
+ kfree(stream);
+ }
+}
+
+
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+ struct buffer_head **bh, int b, int offset, int length, int srclength,
+ int pages)
+{
+ enum xz_ret xz_err;
+ int avail, total = 0, k = 0, page = 0;
+ struct squashfs_xz *stream = msblk->stream;
+
+ mutex_lock(&msblk->read_data_mutex);
+
+ xz_dec_reset(stream->state);
+ stream->buf.in_pos = 0;
+ stream->buf.in_size = 0;
+ stream->buf.out_pos = 0;
+ stream->buf.out_size = PAGE_CACHE_SIZE;
+ stream->buf.out = buffer[page++];
+
+ do {
+ if (stream->buf.in_pos == stream->buf.in_size && k < b) {
+ avail = min(length, msblk->devblksize - offset);
+ length -= avail;
+ wait_on_buffer(bh[k]);
+ if (!buffer_uptodate(bh[k]))
+ goto release_mutex;
+
+ if (avail == 0) {
+ offset = 0;
+ put_bh(bh[k++]);
+ continue;
+ }
+
+ stream->buf.in = bh[k]->b_data + offset;
+ stream->buf.in_size = avail;
+ stream->buf.in_pos = 0;
+ offset = 0;
+ }
+
+ if (stream->buf.out_pos == stream->buf.out_size
+ && page < pages) {
+ stream->buf.out = buffer[page++];
+ stream->buf.out_pos = 0;
+ total += PAGE_CACHE_SIZE;
+ }
+
+ xz_err = xz_dec_run(stream->state, &stream->buf);
+
+ if (stream->buf.in_pos == stream->buf.in_size && k < b)
+ put_bh(bh[k++]);
+ } while (xz_err == XZ_OK);
+
+ if (xz_err != XZ_STREAM_END) {
+ ERROR("xz_dec_run error, data probably corrupt\n");
+ goto release_mutex;
+ }
+
+ if (k < b) {
+ ERROR("xz_uncompress error, input remaining\n");
+ goto release_mutex;
+ }
+
+ total += stream->buf.out_pos;
+ mutex_unlock(&msblk->read_data_mutex);
+ return total;
+
+release_mutex:
+ mutex_unlock(&msblk->read_data_mutex);
+
+ for (; k < b; k++)
+ put_bh(bh[k]);
+
+ return -EIO;
+}
+
+const struct squashfs_decompressor squashfs_xz_comp_ops = {
+ .init = squashfs_xz_init,
+ .free = squashfs_xz_free,
+ .decompress = squashfs_xz_uncompress,
+ .id = XZ_COMPRESSION,
+ .name = "xz",
+ .supported = 1
+};
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 7a60387..818a5e0 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -29,7 +29,6 @@
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
#include "squashfs.h"
#include "decompressor.h"
@@ -66,8 +65,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
struct buffer_head **bh, int b, int offset, int length, int srclength,
int pages)
{
- int zlib_err = 0, zlib_init = 0;
- int avail, bytes, k = 0, page = 0;
+ int zlib_err, zlib_init = 0;
+ int k = 0, page = 0;
z_stream *stream = msblk->stream;
mutex_lock(&msblk->read_data_mutex);
@@ -75,11 +74,10 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
stream->avail_out = 0;
stream->avail_in = 0;
- bytes = length;
do {
if (stream->avail_in == 0 && k < b) {
- avail = min(bytes, msblk->devblksize - offset);
- bytes -= avail;
+ int avail = min(length, msblk->devblksize - offset);
+ length -= avail;
wait_on_buffer(bh[k]);
if (!buffer_uptodate(bh[k]))
goto release_mutex;
@@ -128,6 +126,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
goto release_mutex;
}
+ if (k < b) {
+ ERROR("zlib_uncompress error, data remaining\n");
+ goto release_mutex;
+ }
+
length = stream->total_out;
mutex_unlock(&msblk->read_data_mutex);
return length;
diff --git a/fs/stat.c b/fs/stat.c
index 12e90e2..d5c61cf 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,11 +75,13 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int error = -EINVAL;
int lookup_flags = 0;
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0)
goto out;
if (!(flag & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
+ if (flag & AT_NO_AUTOMOUNT)
+ lookup_flags |= LOOKUP_NO_AUTOMOUNT;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
diff --git a/fs/super.c b/fs/super.c
index 4f6a357..74e149e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return mnt;
err:
- mntput_long(mnt);
+ mntput(mnt);
return ERR_PTR(err);
}
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ef51eb43e..a55c1b4 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -37,6 +37,7 @@
#include "xfs_trace.h"
#include <linux/dcache.h>
+#include <linux/falloc.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -882,6 +883,60 @@ out_unlock:
return ret;
}
+STATIC long
+xfs_file_fallocate(
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ long error;
+ loff_t new_size = 0;
+ xfs_flock64_t bf;
+ xfs_inode_t *ip = XFS_I(inode);
+ int cmd = XFS_IOC_RESVSP;
+
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ return -EOPNOTSUPP;
+
+ bf.l_whence = 0;
+ bf.l_start = offset;
+ bf.l_len = len;
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ cmd = XFS_IOC_UNRESVSP;
+
+ /* check the new inode size is valid before allocating */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ error = inode_newsize_ok(inode, new_size);
+ if (error)
+ goto out_unlock;
+ }
+
+ error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
+ if (error)
+ goto out_unlock;
+
+ /* Change file size if needed */
+ if (new_size) {
+ struct iattr iattr;
+
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = new_size;
+ error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+ }
+
+out_unlock:
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
+
+
STATIC int
xfs_file_open(
struct inode *inode,
@@ -1000,6 +1055,7 @@ const struct file_operations xfs_file_operations = {
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
+ .fallocate = xfs_file_fallocate,
};
const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index da54403..bd57278 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -46,7 +46,6 @@
#include <linux/namei.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
-#include <linux/falloc.h>
#include <linux/fiemap.h>
#include <linux/slab.h>
@@ -505,61 +504,6 @@ xfs_vn_setattr(
return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
}
-STATIC long
-xfs_vn_fallocate(
- struct inode *inode,
- int mode,
- loff_t offset,
- loff_t len)
-{
- long error;
- loff_t new_size = 0;
- xfs_flock64_t bf;
- xfs_inode_t *ip = XFS_I(inode);
- int cmd = XFS_IOC_RESVSP;
-
- /* preallocation on directories not yet supported */
- error = -ENODEV;
- if (S_ISDIR(inode->i_mode))
- goto out_error;
-
- bf.l_whence = 0;
- bf.l_start = offset;
- bf.l_len = len;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- if (mode & FALLOC_FL_PUNCH_HOLE)
- cmd = XFS_IOC_UNRESVSP;
-
- /* check the new inode size is valid before allocating */
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
- new_size = offset + len;
- error = inode_newsize_ok(inode, new_size);
- if (error)
- goto out_unlock;
- }
-
- error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
- if (error)
- goto out_unlock;
-
- /* Change file size if needed */
- if (new_size) {
- struct iattr iattr;
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = new_size;
- error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
- }
-
-out_unlock:
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-out_error:
- return error;
-}
-
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
/*
@@ -653,7 +597,6 @@ static const struct inode_operations xfs_inode_operations = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
- .fallocate = xfs_vn_fallocate,
.fiemap = xfs_vn_fiemap,
};
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index e6cf955..0df8889 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -75,11 +75,11 @@ xfs_cmn_err(
{
struct va_format vaf;
va_list args;
- int panic = 0;
+ int do_panic = 0;
if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
printk(KERN_ALERT "XFS: Transforming an alert into a BUG.");
- panic = 1;
+ do_panic = 1;
}
va_start(args, fmt);
@@ -89,7 +89,7 @@ xfs_cmn_err(
printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf);
va_end(args);
- BUG_ON(panic);
+ BUG_ON(do_panic);
}
void