aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/flock.c1
-rw-r--r--fs/aio.c24
-rw-r--r--fs/binfmt_elf.c9
-rw-r--r--fs/bio-integrity.c170
-rw-r--r--fs/bio.c11
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/extent-tree.c566
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/inode.c25
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/relocation.c5
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--fs/compat.c4
-rw-r--r--fs/eventfd.c122
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext2/namei.c12
-rw-r--r--fs/fuse/dev.c83
-rw-r--r--fs/fuse/dir.c57
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h27
-rw-r--r--fs/fuse/inode.c68
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/jffs2/scan.c4
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/notify/inotify/inotify_user.c3
-rw-r--r--fs/sync.c5
27 files changed, 933 insertions, 293 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 210acaf..3ff8bdd 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -432,7 +432,6 @@ vfs_rejected_lock:
list_del_init(&fl->fl_u.afs.link);
if (list_empty(&vnode->granted_locks))
afs_defer_unlock(vnode, key);
- spin_unlock(&vnode->lock);
goto abort_attempt;
}
diff --git a/fs/aio.c b/fs/aio.c
index 76da125..d065b2c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
{
assert_spin_locked(&ctx->ctx_lock);
+ if (req->ki_eventfd != NULL)
+ eventfd_ctx_put(req->ki_eventfd);
if (req->ki_dtor)
req->ki_dtor(req);
if (req->ki_iovec != &req->ki_inline_vec)
@@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data)
/* Complete the fput(s) */
if (req->ki_filp != NULL)
__fput(req->ki_filp);
- if (req->ki_eventfd != NULL)
- __fput(req->ki_eventfd);
/* Link the iocb into the context's free list */
spin_lock_irq(&ctx->ctx_lock);
@@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data)
*/
static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
{
- int schedule_putreq = 0;
-
dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
req, atomic_long_read(&req->ki_filp->f_count));
@@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
* we would not be holding the last reference to the file*, so
* this function will be executed w/out any aio kthread wakeup.
*/
- if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
- schedule_putreq++;
- else
- req->ki_filp = NULL;
- if (req->ki_eventfd != NULL) {
- if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
- schedule_putreq++;
- else
- req->ki_eventfd = NULL;
- }
- if (unlikely(schedule_putreq)) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
queue_work(aio_wq, &fput_work);
- } else
+ } else {
+ req->ki_filp = NULL;
really_put_req(ctx, req);
+ }
return 1;
}
@@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
+ req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9fa212b..b7c1603 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
info->thread = NULL;
psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
- fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
-
if (psinfo == NULL)
return 0;
+ fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+
/*
* Figure out how many notes we're going to need for each thread.
*/
@@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
elf = kmalloc(sizeof(*elf), GFP_KERNEL);
if (!elf)
goto out;
-
+ /*
+ * The number of segs are recored into ELF header as 16bit value.
+ * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
+ */
segs = current->mm->map_count;
#ifdef ELF_CORE_EXTRA_PHDRS
segs += ELF_CORE_EXTRA_PHDRS;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 31c46a2..49a34e7 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -1,7 +1,7 @@
/*
* bio-integrity.c - bio data integrity extensions
*
- * Copyright (C) 2007, 2008 Oracle Corporation
+ * Copyright (C) 2007, 2008, 2009 Oracle Corporation
* Written by: Martin K. Petersen <martin.petersen@oracle.com>
*
* This program is free software; you can redistribute it and/or
@@ -25,63 +25,121 @@
#include <linux/bio.h>
#include <linux/workqueue.h>
-static struct kmem_cache *bio_integrity_slab __read_mostly;
-static mempool_t *bio_integrity_pool;
-static struct bio_set *integrity_bio_set;
+struct integrity_slab {
+ struct kmem_cache *slab;
+ unsigned short nr_vecs;
+ char name[8];
+};
+
+#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
+struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
+ IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
+};
+#undef IS
+
static struct workqueue_struct *kintegrityd_wq;
+static inline unsigned int vecs_to_idx(unsigned int nr)
+{
+ switch (nr) {
+ case 1:
+ return 0;
+ case 2 ... 4:
+ return 1;
+ case 5 ... 16:
+ return 2;
+ case 17 ... 64:
+ return 3;
+ case 65 ... 128:
+ return 4;
+ case 129 ... BIO_MAX_PAGES:
+ return 5;
+ default:
+ BUG();
+ }
+}
+
+static inline int use_bip_pool(unsigned int idx)
+{
+ if (idx == BIOVEC_NR_POOLS)
+ return 1;
+
+ return 0;
+}
+
/**
- * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to
* @gfp_mask: Memory allocation mask
* @nr_vecs: Number of integrity metadata scatter-gather elements
+ * @bs: bio_set to allocate from
*
* Description: This function prepares a bio for attaching integrity
* metadata. nr_vecs specifies the maximum number of pages containing
* integrity metadata that can be attached.
*/
-struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
- gfp_t gfp_mask,
- unsigned int nr_vecs)
+struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
+ gfp_t gfp_mask,
+ unsigned int nr_vecs,
+ struct bio_set *bs)
{
struct bio_integrity_payload *bip;
- struct bio_vec *iv;
- unsigned long idx;
+ unsigned int idx = vecs_to_idx(nr_vecs);
BUG_ON(bio == NULL);
+ bip = NULL;
- bip = mempool_alloc(bio_integrity_pool, gfp_mask);
- if (unlikely(bip == NULL)) {
- printk(KERN_ERR "%s: could not alloc bip\n", __func__);
- return NULL;
- }
+ /* Lower order allocations come straight from slab */
+ if (!use_bip_pool(idx))
+ bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
- memset(bip, 0, sizeof(*bip));
+ /* Use mempool if lower order alloc failed or max vecs were requested */
+ if (bip == NULL) {
+ bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
- iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
- if (unlikely(iv == NULL)) {
- printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
- mempool_free(bip, bio_integrity_pool);
- return NULL;
+ if (unlikely(bip == NULL)) {
+ printk(KERN_ERR "%s: could not alloc bip\n", __func__);
+ return NULL;
+ }
}
- bip->bip_pool = idx;
- bip->bip_vec = iv;
+ memset(bip, 0, sizeof(*bip));
+
+ bip->bip_slab = idx;
bip->bip_bio = bio;
bio->bi_integrity = bip;
return bip;
}
+EXPORT_SYMBOL(bio_integrity_alloc_bioset);
+
+/**
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * @bio: bio to attach integrity metadata to
+ * @gfp_mask: Memory allocation mask
+ * @nr_vecs: Number of integrity metadata scatter-gather elements
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata. nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+ gfp_t gfp_mask,
+ unsigned int nr_vecs)
+{
+ return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
+}
EXPORT_SYMBOL(bio_integrity_alloc);
/**
* bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed
+ * @bs: bio_set this bio was allocated from
*
* Description: Used to free the integrity portion of a bio. Usually
* called from bio_free().
*/
-void bio_integrity_free(struct bio *bio)
+void bio_integrity_free(struct bio *bio, struct bio_set *bs)
{
struct bio_integrity_payload *bip = bio->bi_integrity;
@@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio)
&& bip->bip_buf != NULL)
kfree(bip->bip_buf);
- bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool);
- mempool_free(bip, bio_integrity_pool);
+ if (use_bip_pool(bip->bip_slab))
+ mempool_free(bip, bs->bio_integrity_pool);
+ else
+ kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
bio->bi_integrity = NULL;
}
@@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv;
- if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
+ if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0;
}
@@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
bp->iv1 = bip->bip_vec[0];
bp->iv2 = bip->bip_vec[0];
- bp->bip1.bip_vec = &bp->iv1;
- bp->bip2.bip_vec = &bp->iv2;
+ bp->bip1.bip_vec[0] = bp->iv1;
+ bp->bip2.bip_vec[0] = bp->iv2;
bp->iv1.bv_len = sectors * bi->tuple_size;
bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split);
* @bio: New bio
* @bio_src: Original bio
* @gfp_mask: Memory allocation mask
+ * @bs: bio_set to allocate bip from
*
* Description: Called to allocate a bip when cloning a bio
*/
-int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+ gfp_t gfp_mask, struct bio_set *bs)
{
struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
struct bio_integrity_payload *bip;
BUG_ON(bip_src == NULL);
- bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
+ bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
if (bip == NULL)
return -EIO;
@@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
}
EXPORT_SYMBOL(bio_integrity_clone);
-static int __init bio_integrity_init(void)
+int bioset_integrity_create(struct bio_set *bs, int pool_size)
{
- kintegrityd_wq = create_workqueue("kintegrityd");
+ unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
+
+ bs->bio_integrity_pool =
+ mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
+ if (!bs->bio_integrity_pool)
+ return -1;
+
+ return 0;
+}
+EXPORT_SYMBOL(bioset_integrity_create);
+
+void bioset_integrity_free(struct bio_set *bs)
+{
+ if (bs->bio_integrity_pool)
+ mempool_destroy(bs->bio_integrity_pool);
+}
+EXPORT_SYMBOL(bioset_integrity_free);
+
+void __init bio_integrity_init(void)
+{
+ unsigned int i;
+
+ kintegrityd_wq = create_workqueue("kintegrityd");
if (!kintegrityd_wq)
panic("Failed to create kintegrityd\n");
- bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
+ unsigned int size;
- bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
- bio_integrity_slab);
- if (!bio_integrity_pool)
- panic("bio_integrity: can't allocate bip pool\n");
+ size = sizeof(struct bio_integrity_payload)
+ + bip_slab[i].nr_vecs * sizeof(struct bio_vec);
- integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
- if (!integrity_bio_set)
- panic("bio_integrity: can't allocate bio_set\n");
-
- return 0;
+ bip_slab[i].slab =
+ kmem_cache_create(bip_slab[i].name, size, 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ }
}
-subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 24c9140..1486b19 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
if (bio_integrity(bio))
- bio_integrity_free(bio);
+ bio_integrity_free(bio, bs);
/*
* If we have front padding, adjust the bio pointer before freeing
@@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
static void bio_kmalloc_destructor(struct bio *bio)
{
if (bio_integrity(bio))
- bio_integrity_free(bio);
+ bio_integrity_free(bio, fs_bio_set);
kfree(bio);
}
@@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
if (bio_integrity(bio)) {
int ret;
- ret = bio_integrity_clone(b, bio, gfp_mask);
+ ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
if (ret < 0) {
bio_put(b);
@@ -1539,6 +1539,7 @@ void bioset_free(struct bio_set *bs)
if (bs->bio_pool)
mempool_destroy(bs->bio_pool);
+ bioset_integrity_free(bs);
biovec_free_pools(bs);
bio_put_slab(bs);
@@ -1579,6 +1580,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
if (!bs->bio_pool)
goto bad;
+ if (bioset_integrity_create(bs, pool_size))
+ goto bad;
+
if (!biovec_create_pools(bs, pool_size))
return bs;
@@ -1616,6 +1620,7 @@ static int __init init_bio(void)
if (!bio_slabs)
panic("bio: can't allocate bios\n");
+ bio_integrity_init();
biovec_init_slabs();
fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7f88628..6e4f6c5 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
"btrfs-%s-%d", workers->name,
workers->num_workers + i);
if (IS_ERR(worker->task)) {
- kfree(worker);
ret = PTR_ERR(worker->task);
+ kfree(worker);
goto fail;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2779c2f..98a8738 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2074,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root);
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d20..a5aca39 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
return type;
}
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key)
{
- int level;
- BUG_ON(!path->keep_locks);
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ for (; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
break;
- btrfs_assert_tree_locked(path->nodes[level]);
if (path->slots[level] + 1 >=
btrfs_header_nritems(path->nodes[level]))
continue;
@@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
* For simplicity, we just do not add new inline back
* ref if there is any kind of item for this block
*/
- if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
+ if (find_next_key(path, 0, &key) == 0 &&
+ key.objectid == bytenr &&
key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
err = -EAGAIN;
goto out;
@@ -2697,7 +2696,7 @@ again:
printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
", %llu bytes_used, %llu bytes_reserved, "
- "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+ "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
"%llu total\n", (unsigned long long)bytes,
(unsigned long long)data_sinfo->bytes_delalloc,
(unsigned long long)data_sinfo->bytes_used,
@@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return buf;
}
+#if 0
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf)
{
@@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
return 0;
}
-#if 0
-
static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_leaf_ref *ref)
@@ -4553,262 +4551,471 @@ out:
}
#endif
+struct walk_control {
+ u64 refs[BTRFS_MAX_LEVEL];
+ u64 flags[BTRFS_MAX_LEVEL];
+ struct btrfs_key update_progress;
+ int stage;
+ int level;
+ int shared_level;
+ int update_ref;
+ int keep_locks;
+};
+
+#define DROP_REFERENCE 1
+#define UPDATE_BACKREF 2
+
/*
- * helper function for drop_subtree, this function is similar to
- * walk_down_tree. The main difference is that it checks reference
- * counts while tree blocks are locked.
+ * hepler to process tree block while walking down the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block. if the block is shared and
+ * we need update back refs for the subtree rooted at the
+ * block, this function changes wc->stage to UPDATE_BACKREF
+ *
+ * when wc->stage == UPDATE_BACKREF, this function updates
+ * back refs for pointers in the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
*/
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_path *path, int *level)
+ struct btrfs_path *path,
+ struct walk_control *wc)
{
- struct extent_buffer *next;
- struct extent_buffer *cur;
- struct extent_buffer *parent;
- u64 bytenr;
- u64 ptr_gen;
- u64 refs;
- u64 flags;
- u32 blocksize;
+ int level = wc->level;
+ struct extent_buffer *eb = path->nodes[level];
+ struct btrfs_key key;
+ u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
int ret;
- cur = path->nodes[*level];
- ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
- &refs, &flags);
- BUG_ON(ret);
- if (refs > 1)
- goto out;
+ if (wc->stage == UPDATE_BACKREF &&
+ btrfs_header_owner(eb) != root->root_key.objectid)
+ return 1;
- BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+ /*
+ * when reference count of tree block is 1, it won't increase
+ * again. once full backref flag is set, we never clear it.
+ */
+ if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
+ (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
+ BUG_ON(!path->locks[level]);
+ ret = btrfs_lookup_extent_info(trans, root,
+ eb->start, eb->len,
+ &wc->refs[level],
+ &wc->flags[level]);
+ BUG_ON(ret);
+ BUG_ON(wc->refs[level] == 0);
+ }
- while (*level >= 0) {
- cur = path->nodes[*level];
- if (*level == 0) {
- ret = btrfs_drop_leaf_ref(trans, root, cur);
- BUG_ON(ret);
- clean_tree_block(trans, root, cur);
- break;
- }
- if (path->slots[*level] >= btrfs_header_nritems(cur)) {
- clean_tree_block(trans, root, cur);
- break;
+ if (wc->stage == DROP_REFERENCE &&
+ wc->update_ref && wc->refs[level] > 1) {
+ BUG_ON(eb == root->node);
+ BUG_ON(path->slots[level] > 0);
+ if (level == 0)
+ btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
+ else
+ btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
+ if (btrfs_header_owner(eb) == root->root_key.objectid &&
+ btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
+ wc->stage = UPDATE_BACKREF;
+ wc->shared_level = level;
}
+ }
- bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
- blocksize = btrfs_level_size(root, *level - 1);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+ if (wc->stage == DROP_REFERENCE) {
+ if (wc->refs[level] > 1)
+ return 1;
- next = read_tree_block(root, bytenr, blocksize, ptr_gen);
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
+ if (path->locks[level] && !wc->keep_locks) {
+ btrfs_tree_unlock(eb);
+ path->locks[level] = 0;
+ }
+ return 0;
+ }
- ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
- &refs, &flags);
+ /* wc->stage == UPDATE_BACKREF */
+ if (!(wc->flags[level] & flag)) {
+ BUG_ON(!path->locks[level]);
+ ret = btrfs_inc_ref(trans, root, eb, 1);
BUG_ON(ret);
- if (refs > 1) {
- parent = path->nodes[*level];
- ret = btrfs_free_extent(trans, root, bytenr,
- blocksize, parent->start,
- btrfs_header_owner(parent),
- *level - 1, 0);
+ ret = btrfs_dec_ref(trans, root, eb, 0);
+ BUG_ON(ret);
+ ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
+ eb->len, flag, 0);
+ BUG_ON(ret);
+ wc->flags[level] |= flag;
+ }
+
+ /*
+ * the block is shared by multiple trees, so it's not good to
+ * keep the tree lock
+ */
+ if (path->locks[level] && level > 0) {
+ btrfs_tree_unlock(eb);
+ path->locks[level] = 0;
+ }
+ return 0;
+}
+
+/*
+ * hepler to process tree block while walking up the tree.
+ *
+ * when wc->stage == DROP_REFERENCE, this function drops
+ * reference count on the block.
+ *
+ * when wc->stage == UPDATE_BACKREF, this function changes
+ * wc->stage back to DROP_REFERENCE if we changed wc->stage
+ * to UPDATE_BACKREF previously while processing the block.
+ *
+ * NOTE: return value 1 means we should stop walking up.
+ */
+static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct walk_control *wc)
+{
+ int ret = 0;
+ int level = wc->level;
+ struct extent_buffer *eb = path->nodes[level];
+ u64 parent = 0;
+
+ if (wc->stage == UPDATE_BACKREF) {
+ BUG_ON(wc->shared_level < level);
+ if (level < wc->shared_level)
+ goto out;
+
+ BUG_ON(wc->refs[level] <= 1);
+ ret = find_next_key(path, level + 1, &wc->update_progress);
+ if (ret > 0)
+ wc->update_ref = 0;
+
+ wc->stage = DROP_REFERENCE;
+ wc->shared_level = -1;
+ path->slots[level] = 0;
+
+ /*
+ * check reference count again if the block isn't locked.
+ * we should start walking down the tree again if reference
+ * count is one.
+ */
+ if (!path->locks[level]) {
+ BUG_ON(level == 0);
+ btrfs_tree_lock(eb);
+ btrfs_set_lock_blocking(eb);
+ path->locks[level] = 1;
+
+ ret = btrfs_lookup_extent_info(trans, root,
+ eb->start, eb->len,
+ &wc->refs[level],
+ &wc->flags[level]);
BUG_ON(ret);
- path->slots[*level]++;
- btrfs_tree_unlock(next);
- free_extent_buffer(next);
- continue;
+ BUG_ON(wc->refs[level] == 0);
+ if (wc->refs[level] == 1) {
+ btrfs_tree_unlock(eb);
+ path->locks[level] = 0;
+ return 1;
+ }
+ } else {
+ BUG_ON(level != 0);
}
+ }
- BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+ /* wc->stage == DROP_REFERENCE */
+ BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
- *level = btrfs_header_level(next);
- path->nodes[*level] = next;
- path->slots[*level] = 0;
- path->locks[*level] = 1;
- cond_resched();
+ if (wc->refs[level] == 1) {
+ if (level == 0) {
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ ret = btrfs_dec_ref(trans, root, eb, 1);
+ else
+ ret = btrfs_dec_ref(trans, root, eb, 0);
+ BUG_ON(ret);
+ }
+ /* make block locked assertion in clean_tree_block happy */
+ if (!path->locks[level] &&
+ btrfs_header_generation(eb) == trans->transid) {
+ btrfs_tree_lock(eb);
+ btrfs_set_lock_blocking(eb);
+ path->locks[level] = 1;
+ }
+ clean_tree_block(trans, root, eb);
+ }
+
+ if (eb == root->node) {
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ parent = eb->start;
+ else
+ BUG_ON(root->root_key.objectid !=
+ btrfs_header_owner(eb));
+ } else {
+ if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
+ parent = path->nodes[level + 1]->start;
+ else
+ BUG_ON(root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level + 1]));
}
-out:
- if (path->nodes[*level] == root->node)
- parent = path->nodes[*level];
- else
- parent = path->nodes[*level + 1];
- bytenr = path->nodes[*level]->start;
- blocksize = path->nodes[*level]->len;
- ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
- btrfs_header_owner(parent), *level, 0);
+ ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
+ root->root_key.objectid, level, 0);
BUG_ON(ret);
+out:
+ wc->refs[level] = 0;
+ wc->flags[level] = 0;
+ return ret;
+}
+
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct walk_control *wc)
+{
+ struct extent_buffer *next;
+ struct extent_buffer *cur;
+ u64 bytenr;
+ u64 ptr_gen;
+ u32 blocksize;
+ int level = wc->level;
+ int ret;
+
+ while (level >= 0) {
+ cur = path->nodes[level];
+ BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
- if (path->locks[*level]) {
- btrfs_tree_unlock(path->nodes[*level]);
- path->locks[*level] = 0;
+ ret = walk_down_proc(trans, root, path, wc);
+ if (ret > 0)
+ break;
+
+ if (level == 0)
+ break;
+
+ bytenr = btrfs_node_blockptr(cur, path->slots[level]);
+ blocksize = btrfs_level_size(root, level - 1);
+ ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
+
+ next = read_tree_block(root, bytenr, blocksize, ptr_gen);
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+
+ level--;
+ BUG_ON(level != btrfs_header_level(next));
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ path->locks[level] = 1;
+ wc->level = level;
}
- free_extent_buffer(path->nodes[*level]);
- path->nodes[*level] = NULL;
- *level += 1;
- cond_resched();
return 0;
}
-/*
- * helper for dropping snapshots. This walks back up the tree in the path
- * to find the first node higher up where we haven't yet gone through
- * all the slots
- */
static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- int *level, int max_level)
+ struct walk_control *wc, int max_level)
{
- struct btrfs_root_item *root_item = &root->root_item;
- int i;
- int slot;
+ int level = wc->level;
int ret;
- for (i = *level; i < max_level && path->nodes[i]; i++) {
- slot = path->slots[i];
- if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
- /*
- * there is more work to do in this level.
- * Update the drop_progress marker to reflect
- * the work we've done so far, and then bump
- * the slot number
- */
- path->slots[i]++;
- WARN_ON(*level == 0);
- if (max_level == BTRFS_MAX_LEVEL) {
- btrfs_node_key(path->nodes[i],
- &root_item->drop_progress,
- path->slots[i]);
- root_item->drop_level = i;
- }
- *level = i;
+ path->slots[level] = btrfs_header_nritems(path->nodes[level]);
+ while (level < max_level && path->nodes[level]) {
+ wc->level = level;
+ if (path->slots[level] + 1 <
+ btrfs_header_nritems(path->nodes[level])) {
+ path->slots[level]++;
return 0;
} else {
- struct extent_buffer *parent;
-
- /*
- * this whole node is done, free our reference
- * on it and go up one level
- */
- if (path->nodes[*level] == root->node)
- parent = path->nodes[*level];
- else
- parent = path->nodes[*level + 1];
+ ret = walk_up_proc(trans, root, path, wc);
+ if (ret > 0)
+ return 0;
- clean_tree_block(trans, root, path->nodes[i]);
- ret = btrfs_free_extent(trans, root,
- path->nodes[i]->start,
- path->nodes[i]->len,
- parent->start,
- btrfs_header_owner(parent),
- *level, 0);
- BUG_ON(ret);
- if (path->locks[*level]) {
- btrfs_tree_unlock(path->nodes[i]);
- path->locks[i] = 0;
+ if (path->locks[level]) {
+ btrfs_tree_unlock(path->nodes[level]);
+ path->locks[level] = 0;
}
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- *level = i + 1;
+ free_extent_buffer(path->nodes[level]);
+ path->nodes[level] = NULL;
+ level++;
}
}
return 1;
}
/*
- * drop the reference count on the tree rooted at 'snap'. This traverses
- * the tree freeing any blocks that have a ref count of zero after being
- * decremented.
+ * drop a subvolume tree.
+ *
+ * this function traverses the tree freeing any blocks that only
+ * referenced by the tree.
+ *
+ * when a shared tree block is found. this function decreases its
+ * reference count by one. if update_ref is true, this function
+ * also make sure backrefs for the shared block and all lower level
+ * blocks are properly updated.
*/
-int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root)
+int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
{
- int ret = 0;
- int wret;
- int level;
struct btrfs_path *path;
- int update_count;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
struct btrfs_root_item *root_item = &root->root_item;
+ struct walk_control *wc;
+ struct btrfs_key key;
+ int err = 0;
+ int ret;
+ int level;
path = btrfs_alloc_path();
BUG_ON(!path);
- level = btrfs_header_level(root->node);
+ wc = kzalloc(sizeof(*wc), GFP_NOFS);
+ BUG_ON(!wc);
+
+ trans = btrfs_start_transaction(tree_root, 1);
+
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+ level = btrfs_header_level(root->node);
path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(path->nodes[level]);
path->slots[level] = 0;
path->locks[level] = 1;
+ memset(&wc->update_progress, 0,
+ sizeof(wc->update_progress));
} else {
- struct btrfs_key key;
- struct btrfs_disk_key found_key;
- struct extent_buffer *node;
-
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
+ memcpy(&wc->update_progress, &key,
+ sizeof(wc->update_progress));
+
level = root_item->drop_level;
+ BUG_ON(level == 0);
path->lowest_level = level;
- wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (wret < 0) {
- ret = wret;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ path->lowest_level = 0;
+ if (ret < 0) {
+ err = ret;
goto out;
}
- node = path->nodes[level];
- btrfs_node_key(node, &found_key, path->slots[level]);
- WARN_ON(memcmp(&found_key, &root_item->drop_progress,
- sizeof(found_key)));
+ btrfs_node_key_to_cpu(path->nodes[level], &key,
+ path->slots[level]);
+ WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+
/*
* unlock our path, this is safe because only this
* function is allowed to delete this snapshot
*/
btrfs_unlock_up_safe(path, 0);
+
+ level = btrfs_header_level(root->node);
+ while (1) {
+ btrfs_tree_lock(path->nodes[level]);
+ btrfs_set_lock_blocking(path->nodes[level]);
+
+ ret = btrfs_lookup_extent_info(trans, root,
+ path->nodes[level]->start,
+ path->nodes[level]->len,
+ &wc->refs[level],
+ &wc->flags[level]);
+ BUG_ON(ret);
+ BUG_ON(wc->refs[level] == 0);
+
+ if (level == root_item->drop_level)
+ break;
+
+ btrfs_tree_unlock(path->nodes[level]);
+ WARN_ON(wc->refs[level] != 1);
+ level--;
+ }
}
+
+ wc->level = level;
+ wc->shared_level = -1;
+ wc->stage = DROP_REFERENCE;
+ wc->update_ref = update_ref;
+ wc->keep_locks = 0;
+
while (1) {
- unsigned long update;
- wret = walk_down_tree(trans, root, path, &level);
- if (wret > 0)
+ ret = walk_down_tree(trans, root, path, wc);
+ if (ret < 0) {
+ err = ret;
break;
- if (wret < 0)
- ret = wret;
+ }
- wret = walk_up_tree(trans, root, path, &level,
- BTRFS_MAX_LEVEL);
- if (wret > 0)
+ ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
+ if (ret < 0) {
+ err = ret;
break;
- if (wret < 0)
- ret = wret;
- if (trans->transaction->in_commit ||
- trans->transaction->delayed_refs.flushing) {
- ret = -EAGAIN;
+ }
+
+ if (ret > 0) {
+ BUG_ON(wc->stage != DROP_REFERENCE);
break;
}
- for (update_count = 0; update_count < 16; update_count++) {
+
+ if (wc->stage == DROP_REFERENCE) {
+ level = wc->level;
+ btrfs_node_key(path->nodes[level],
+ &root_item->drop_progress,
+ path->slots[level]);
+ root_item->drop_level = level;
+ }
+
+ BUG_ON(wc->level == 0);
+ if (trans->transaction->in_commit ||
+ trans->transaction->delayed_refs.flushing) {
+ ret = btrfs_update_root(trans, tree_root,
+ &root->root_key,
+ root_item);
+ BUG_ON(ret);
+
+ btrfs_end_transaction(trans, tree_root);
+ trans = btrfs_start_transaction(tree_root, 1);
+ } else {
+ unsigned long update;
update = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (update)
- btrfs_run_delayed_refs(trans, root, update);
- else
- break;
+ btrfs_run_delayed_refs(trans, tree_root,
+ update);
}
}
+ btrfs_release_path(root, path);
+ BUG_ON(err);
+
+ ret = btrfs_del_root(trans, tree_root, &root->root_key);
+ BUG_ON(ret);
+
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
+ kfree(root);
out:
+ btrfs_end_transaction(trans, tree_root);
+ kfree(wc);
btrfs_free_path(path);
- return ret;
+ return err;
}
+/*
+ * drop subtree rooted at tree block 'node'.
+ *
+ * NOTE: this function will unlock and release tree block 'node'
+ */
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
struct extent_buffer *parent)
{
struct btrfs_path *path;
+ struct walk_control *wc;
int level;
int parent_level;
int ret = 0;
int wret;
+ BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
+
path = btrfs_alloc_path();
BUG_ON(!path);
+ wc = kzalloc(sizeof(*wc), GFP_NOFS);
+ BUG_ON(!wc);
+
btrfs_assert_tree_locked(parent);
parent_level = btrfs_header_level(parent);
extent_buffer_get(parent);
@@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(node);
level = btrfs_header_level(node);
- extent_buffer_get(node);
path->nodes[level] = node;
path->slots[level] = 0;
+ path->locks[level] = 1;
+
+ wc->refs[parent_level] = 1;
+ wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+ wc->level = level;
+ wc->shared_level = -1;
+ wc->stage = DROP_REFERENCE;
+ wc->update_ref = 0;
+ wc->keep_locks = 1;
while (1) {
- wret = walk_down_tree(trans, root, path, &level);
- if (wret < 0)
+ wret = walk_down_tree(trans, root, path, wc);
+ if (wret < 0) {
ret = wret;
- if (wret != 0)
break;
+ }
- wret = walk_up_tree(trans, root, path, &level, parent_level);
+ wret = walk_up_tree(trans, root, path, wc, parent_level);
if (wret < 0)
ret = wret;
if (wret != 0)
break;
}
+ kfree(wc);
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 126477e..7c3cd24 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
}
if (end_pos > isize) {
i_size_write(inode, end_pos);
- btrfs_update_inode(trans, root, inode);
+ /* we've only changed i_size in ram, and we haven't updated
+ * the disk i_size. There is no need to log the inode
+ * at this time.
+ */
}
err = btrfs_end_transaction(trans, root);
out_unlock:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dbe1aab..7ffa3d3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3580,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
owner = 1;
BTRFS_I(inode)->block_group =
btrfs_find_block_group(root, 0, alloc_hint, owner);
- if ((mode & S_IFREG)) {
- if (btrfs_test_opt(root, NODATASUM))
- BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
- if (btrfs_test_opt(root, NODATACOW))
- BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
- }
key[0].objectid = objectid;
btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3640,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_inherit_iflags(inode, dir);
+ if ((mode & S_IFREG)) {
+ if (btrfs_test_opt(root, NODATASUM))
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
+ if (btrfs_test_opt(root, NODATACOW))
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+ }
+
insert_inode_hash(inode);
inode_tree_add(inode);
return inode;
@@ -5082,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
struct extent_map *em;
struct btrfs_trans_handle *trans;
+ struct btrfs_root *root;
int ret;
alloc_start = offset & ~mask;
@@ -5100,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
goto out;
}
+ root = BTRFS_I(inode)->root;
+
+ ret = btrfs_check_data_free_space(root, inode,
+ alloc_end - alloc_start);
+ if (ret)
+ goto out;
+
locked_end = alloc_end - 1;
while (1) {
struct btrfs_ordered_extent *ordered;
@@ -5107,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
if (!trans) {
ret = -EIO;
- goto out;
+ goto out_free;
}
/* the extent lock is ordered inside the running
@@ -5168,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
GFP_NOFS);
btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+out_free:
+ btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index eff18f5..9f4db84 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
struct btrfs_file_extent_item);
comp = btrfs_file_extent_compression(leaf, extent);
type = btrfs_file_extent_type(leaf, extent);
- if (type == BTRFS_FILE_EXTENT_REG) {
+ if (type == BTRFS_FILE_EXTENT_REG ||
+ type == BTRFS_FILE_EXTENT_PREALLOC) {
disko = btrfs_file_extent_disk_bytenr(leaf,
extent);
diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
new_key.objectid = inode->i_ino;
new_key.offset = key.offset + destoff - off;
- if (type == BTRFS_FILE_EXTENT_REG) {
+ if (type == BTRFS_FILE_EXTENT_REG ||
+ type == BTRFS_FILE_EXTENT_PREALLOC) {
ret = btrfs_insert_empty_item(trans, root, path,
&new_key, size);
if (ret)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b23dc20..0083979 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work)
btrfs_end_transaction(trans, root);
}
- btrfs_drop_dead_root(reloc_root);
+ btrfs_drop_snapshot(reloc_root, 0);
if (atomic_dec_and_test(async->num_pending))
complete(async->done);
@@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
BUG_ON(ret);
-
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
}
if (!lowest) {
btrfs_tree_unlock(upper->eb);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4e83457..2dbf1c1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
return 0;
}
+#if 0
/*
* when dropping snapshots, we generate a ton of delayed refs, and it makes
* sense not to join the transaction while it is trying to flush the current
@@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
btrfs_btree_balance_dirty(tree_root, nr);
return ret;
}
+#endif
/*
* new snapshots need to be created at a very specific time in the
@@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
while (!list_empty(&list)) {
root = list_entry(list.next, struct btrfs_root, root_list);
list_del_init(&root->root_list);
- btrfs_drop_dead_root(root);
+ btrfs_drop_snapshot(root, 0);
}
return 0;
}
diff --git a/fs/compat.c b/fs/compat.c
index cdd51a3..fbadb94 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1486,8 +1486,8 @@ int compat_do_execve(char * filename,
if (!bprm)
goto out_files;
- retval = mutex_lock_interruptible(&current->cred_guard_mutex);
- if (retval < 0)
+ retval = -ERESTARTNOINTR;
+ if (mutex_lock_interruptible(&current->cred_guard_mutex))
goto out_free;
current->in_execve = 1;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3f0e197..31d12de 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -14,35 +14,44 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/anon_inodes.h>
-#include <linux/eventfd.h>
#include <linux/syscalls.h>
#include <linux/module.h>
+#include <linux/kref.h>
+#include <linux/eventfd.h>
struct eventfd_ctx {
+ struct kref kref;
wait_queue_head_t wqh;
/*
* Every time that a write(2) is performed on an eventfd, the
* value of the __u64 being written is added to "count" and a
* wakeup is performed on "wqh". A read(2) will return the "count"
* value to userspace, and will reset "count" to zero. The kernel
- * size eventfd_signal() also, adds to the "count" counter and
+ * side eventfd_signal() also, adds to the "count" counter and
* issue a wakeup.
*/
__u64 count;
unsigned int flags;
};
-/*
- * Adds "n" to the eventfd counter "count". Returns "n" in case of
- * success, or a value lower then "n" in case of coutner overflow.
- * This function is supposed to be called by the kernel in paths
- * that do not allow sleeping. In this function we allow the counter
- * to reach the ULLONG_MAX value, and we signal this as overflow
- * condition by returining a POLLERR to poll(2).
+/**
+ * eventfd_signal - Adds @n to the eventfd counter.
+ * @ctx: [in] Pointer to the eventfd context.
+ * @n: [in] Value of the counter to be added to the eventfd internal counter.
+ * The value cannot be negative.
+ *
+ * This function is supposed to be called by the kernel in paths that do not
+ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+ * value, and we signal this as overflow condition by returining a POLLERR
+ * to poll(2).
+ *
+ * Returns @n in case of success, a non-negative number lower than @n in case
+ * of overflow, or the following error codes:
+ *
+ * -EINVAL : The value of @n is negative.
*/
-int eventfd_signal(struct file *file, int n)
+int eventfd_signal(struct eventfd_ctx *ctx, int n)
{
- struct eventfd_ctx *ctx = file->private_data;
unsigned long flags;
if (n < 0)
@@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n)
}
EXPORT_SYMBOL_GPL(eventfd_signal);
+static void eventfd_free(struct kref *kref)
+{
+ struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
+
+ kfree(ctx);
+}
+
+/**
+ * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to the eventfd context.
+ *
+ * Returns: In case of success, returns a pointer to the eventfd context.
+ */
+struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
+{
+ kref_get(&ctx->kref);
+ return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_get);
+
+/**
+ * eventfd_ctx_put - Releases a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to eventfd context.
+ *
+ * The eventfd context reference must have been previously acquired either
+ * with eventfd_ctx_get() or eventfd_ctx_fdget()).
+ */
+void eventfd_ctx_put(struct eventfd_ctx *ctx)
+{
+ kref_put(&ctx->kref, eventfd_free);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_put);
+
static int eventfd_release(struct inode *inode, struct file *file)
{
- kfree(file->private_data);
+ struct eventfd_ctx *ctx = file->private_data;
+
+ wake_up_poll(&ctx->wqh, POLLHUP);
+ eventfd_ctx_put(ctx);
return 0;
}
@@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = {
.write = eventfd_write,
};
+/**
+ * eventfd_fget - Acquire a reference of an eventfd file descriptor.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the eventfd file structure in case of success, or the
+ * following error pointer:
+ *
+ * -EBADF : Invalid @fd file descriptor.
+ * -EINVAL : The @fd file descriptor is not an eventfd file.
+ */
struct file *eventfd_fget(int fd)
{
struct file *file;
@@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd)
}
EXPORT_SYMBOL_GPL(eventfd_fget);
+/**
+ * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointers returned by the following functions:
+ *
+ * eventfd_fget
+ */
+struct eventfd_ctx *eventfd_ctx_fdget(int fd)
+{
+ struct file *file;
+ struct eventfd_ctx *ctx;
+
+ file = eventfd_fget(fd);
+ if (IS_ERR(file))
+ return (struct eventfd_ctx *) file;
+ ctx = eventfd_ctx_get(file->private_data);
+ fput(file);
+
+ return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
+
+/**
+ * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
+ * @file: [in] Eventfd file pointer.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointer:
+ *
+ * -EINVAL : The @fd file descriptor is not an eventfd file.
+ */
+struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
+{
+ if (file->f_op != &eventfd_fops)
+ return ERR_PTR(-EINVAL);
+
+ return eventfd_ctx_get(file->private_data);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
+
SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
{
int fd;
@@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
if (!ctx)
return -ENOMEM;
+ kref_init(&ctx->kref);
init_waitqueue_head(&ctx->wqh);
ctx->count = count;
ctx->flags = flags;
diff --git a/fs/exec.c b/fs/exec.c
index e639957..4a8849e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1277,8 +1277,8 @@ int do_execve(char * filename,
if (!bprm)
goto out_files;
- retval = mutex_lock_interruptible(&current->cred_guard_mutex);
- if (retval < 0)
+ retval = -ERESTARTNOINTR;
+ if (mutex_lock_interruptible(&current->cred_guard_mutex))
goto out_free;
current->in_execve = 1;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 6524eca..e1dedb0 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
inode = NULL;
if (ino) {
inode = ext2_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ if (unlikely(IS_ERR(inode))) {
+ if (PTR_ERR(inode) == -ESTALE) {
+ ext2_error(dir->i_sb, __func__,
+ "deleted inode referenced: %lu",
+ ino);
+ return ERR_PTR(-EIO);
+ } else {
+ return ERR_CAST(inode);
+ }
+ }
}
return d_splice_alias(inode, dentry);
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8fed2ed..f58ecbc 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -849,6 +849,81 @@ err:
return err;
}
+static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_inval_inode_out outarg;
+ int err = -EINVAL;
+
+ if (size != sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+ fuse_copy_finish(cs);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (!fc->sb)
+ goto err_unlock;
+
+ err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
+ outarg.off, outarg.len);
+
+err_unlock:
+ up_read(&fc->killsb);
+ return err;
+
+err:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_inval_entry_out outarg;
+ int err = -EINVAL;
+ char buf[FUSE_NAME_MAX+1];
+ struct qstr name;
+
+ if (size < sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+
+ err = -ENAMETOOLONG;
+ if (outarg.namelen > FUSE_NAME_MAX)
+ goto err;
+
+ name.name = buf;
+ name.len = outarg.namelen;
+ err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+ if (err)
+ goto err;
+ fuse_copy_finish(cs);
+ buf[outarg.namelen] = 0;
+ name.hash = full_name_hash(name.name, name.len);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (!fc->sb)
+ goto err_unlock;
+
+ err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+
+err_unlock:
+ up_read(&fc->killsb);
+ return err;
+
+err:
+ fuse_copy_finish(cs);
+ return err;
+}
+
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
@@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_POLL:
return fuse_notify_poll(fc, size, cs);
+ case FUSE_NOTIFY_INVAL_INODE:
+ return fuse_notify_inval_inode(fc, size, cs);
+
+ case FUSE_NOTIFY_INVAL_ENTRY:
+ return fuse_notify_inval_entry(fc, size, cs);
+
default:
fuse_copy_finish(cs);
return -EINVAL;
@@ -910,7 +991,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
int err;
- unsigned nbytes = iov_length(iov, nr_segs);
+ size_t nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
struct fuse_copy_state cs;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b3089a0..e703654 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_req *req;
struct fuse_req *forget_req;
- struct fuse_open_in inarg;
+ struct fuse_create_in inarg;
struct fuse_open_out outopen;
struct fuse_entry_out outentry;
struct fuse_file *ff;
@@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
if (!ff)
goto out_put_request;
+ if (!fc->dont_mask)
+ mode &= ~current_umask();
+
flags &= ~O_NOCTTY;
memset(&inarg, 0, sizeof(inarg));
memset(&outentry, 0, sizeof(outentry));
inarg.flags = flags;
inarg.mode = mode;
+ inarg.umask = current_umask();
req->in.h.opcode = FUSE_CREATE;
req->in.h.nodeid = get_node_id(dir);
req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+ sizeof(inarg);
req->in.args[0].value = &inarg;
req->in.args[1].size = entry->d_name.len + 1;
req->in.args[1].value = entry->d_name.name;
@@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
if (IS_ERR(req))
return PTR_ERR(req);
+ if (!fc->dont_mask)
+ mode &= ~current_umask();
+
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
inarg.rdev = new_encode_dev(rdev);
+ inarg.umask = current_umask();
req->in.h.opcode = FUSE_MKNOD;
req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+ sizeof(inarg);
req->in.args[0].value = &inarg;
req->in.args[1].size = entry->d_name.len + 1;
req->in.args[1].value = entry->d_name.name;
@@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
if (IS_ERR(req))
return PTR_ERR(req);
+ if (!fc->dont_mask)
+ mode &= ~current_umask();
+
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
+ inarg.umask = current_umask();
req->in.h.opcode = FUSE_MKDIR;
req->in.numargs = 2;
req->in.args[0].size = sizeof(inarg);
@@ -845,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
return err;
}
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+ struct qstr *name)
+{
+ int err = -ENOTDIR;
+ struct inode *parent;
+ struct dentry *dir;
+ struct dentry *entry;
+
+ parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+ if (!parent)
+ return -ENOENT;
+
+ mutex_lock(&parent->i_mutex);
+ if (!S_ISDIR(parent->i_mode))
+ goto unlock;
+
+ err = -ENOENT;
+ dir = d_find_alias(parent);
+ if (!dir)
+ goto unlock;
+
+ entry = d_lookup(dir, name);
+ dput(dir);
+ if (!entry)
+ goto unlock;
+
+ fuse_invalidate_attr(parent);
+ fuse_invalidate_entry(entry);
+ dput(entry);
+ err = 0;
+
+ unlock:
+ mutex_unlock(&parent->i_mutex);
+ iput(parent);
+ return err;
+}
+
/*
* Calling into a user-controlled filesystem gives the filesystem
* daemon ptrace-like capabilities over the requester process. This
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fce6ce6..cbc4640 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
req = fuse_get_req(fc);
if (IS_ERR(req))
- return PTR_ERR(req);
+ return POLLERR;
req->in.h.opcode = FUSE_POLL;
req->in.h.nodeid = ff->nodeid;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aaf2f9f..52b641f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -446,6 +446,9 @@ struct fuse_conn {
/** Do multi-page cached writes */
unsigned big_writes:1;
+ /** Don't apply umask to creation modes */
+ unsigned dont_mask:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -481,6 +484,12 @@ struct fuse_conn {
/** Called on final put */
void (*release)(struct fuse_conn *);
+
+ /** Super block for this connection. */
+ struct super_block *sb;
+
+ /** Read/write semaphore to hold when accessing sb. */
+ struct rw_semaphore killsb;
};
static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -509,6 +518,11 @@ extern const struct file_operations fuse_dev_operations;
extern const struct dentry_operations fuse_dentry_operations;
/**
+ * Inode to nodeid comparison.
+ */
+int fuse_inode_eq(struct inode *inode, void *_nodeidp);
+
+/**
* Get a filled in inode
*/
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
@@ -708,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode);
u64 fuse_get_attr_version(struct fuse_conn *fc);
+/**
+ * File-system tells the kernel to invalidate cache for the given node id.
+ */
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+ loff_t offset, loff_t len);
+
+/**
+ * File-system tells the kernel to invalidate parent attributes and
+ * the dentry matching parent/name.
+ */
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+ struct qstr *name);
+
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
bool isdir);
ssize_t fuse_direct_io(struct file *file, const char __user *buf,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8673cc..f91ccc4 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
BUG();
}
-static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+int fuse_inode_eq(struct inode *inode, void *_nodeidp)
{
u64 nodeid = *(u64 *) _nodeidp;
if (get_node_id(inode) == nodeid)
@@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
return inode;
}
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode;
+ pgoff_t pg_start;
+ pgoff_t pg_end;
+
+ inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ return -ENOENT;
+
+ fuse_invalidate_attr(inode);
+ if (offset >= 0) {
+ pg_start = offset >> PAGE_CACHE_SHIFT;
+ if (len <= 0)
+ pg_end = -1;
+ else
+ pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pg_start, pg_end);
+ }
+ iput(inode);
+ return 0;
+}
+
static void fuse_umount_begin(struct super_block *sb)
{
fuse_abort_conn(get_fuse_conn_super(sb));
@@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc)
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
mutex_init(&fc->inst_mutex);
+ init_rwsem(&fc->killsb);
atomic_set(&fc->count, 1);
init_waitqueue_head(&fc->waitq);
init_waitqueue_head(&fc->blocked_waitq);
@@ -725,6 +751,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
}
if (arg->flags & FUSE_BIG_WRITES)
fc->big_writes = 1;
+ if (arg->flags & FUSE_DONT_MASK)
+ fc->dont_mask = 1;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
@@ -748,7 +776,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
- FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -860,10 +888,16 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fuse_conn_init(fc);
fc->dev = sb->s_dev;
+ fc->sb = sb;
err = fuse_bdi_init(fc, sb);
if (err)
goto err_put_conn;
+ /* Handle umasking inside the fuse code */
+ if (sb->s_flags & MS_POSIXACL)
+ fc->dont_mask = 1;
+ sb->s_flags |= MS_POSIXACL;
+
fc->release = fuse_free_conn;
fc->flags = d.flags;
fc->user_id = d.user_id;
@@ -941,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type,
return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
}
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ if (fc) {
+ down_write(&fc->killsb);
+ fc->sb = NULL;
+ up_write(&fc->killsb);
+ }
+
+ kill_anon_super(sb);
+}
+
static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
.fs_flags = FS_HAS_SUBTYPE,
.get_sb = fuse_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = fuse_kill_sb_anon,
};
#ifdef CONFIG_BLOCK
@@ -958,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
mnt);
}
+static void fuse_kill_sb_blk(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ if (fc) {
+ down_write(&fc->killsb);
+ fc->sb = NULL;
+ up_write(&fc->killsb);
+ }
+
+ kill_block_super(sb);
+}
+
static struct file_system_type fuseblk_fs_type = {
.owner = THIS_MODULE,
.name = "fuseblk",
.get_sb = fuse_get_sb_blk,
- .kill_sb = kill_block_super,
+ .kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fe02ad4..032604e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
sb->s_blocksize_bits = 10;
sb->s_magic = HOSTFS_SUPER_MAGIC;
sb->s_op = &hostfs_sbops;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
/* NULL is printed as <NULL> by sprintf: avoid that. */
if (req_root == NULL)
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 7515e73..696686c 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
if (jffs2_sum_active()) {
s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
if (!s) {
- kfree(flashbuf);
JFFS2_WARNING("Can't allocate memory for summary\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4145083..23341c1 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -678,7 +678,6 @@ __be32
nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
int access, struct file **filp)
{
- const struct cred *cred = current_cred();
struct dentry *dentry;
struct inode *inode;
int flags = O_RDONLY|O_LARGEFILE;
@@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
vfs_dq_init(inode);
}
*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
- flags, cred);
+ flags, current_cred());
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
else
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ff231ad..ff27a29 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on)
static int inotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
+ struct user_struct *user = group->inotify_data.user;
fsnotify_clear_marks_by_group(group);
/* free this group, matching get was inotify_init->fsnotify_obtain_group */
fsnotify_put_group(group);
+ atomic_dec(&user->inotify_devs);
+
return 0;
}
diff --git a/fs/sync.c b/fs/sync.c
index dd20002..3422ba6 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -112,8 +112,13 @@ restart:
mutex_unlock(&mutex);
}
+/*
+ * sync everything. Start out by waking pdflush, because that writes back
+ * all queues in parallel.
+ */
SYSCALL_DEFINE0(sync)
{
+ wakeup_pdflush(0);
sync_filesystems(0);
sync_filesystems(1);
if (unlikely(laptop_mode))