diff options
author | Ziyan <jaraidaniel@gmail.com> | 2015-10-24 18:19:09 +0200 |
---|---|---|
committer | Ziyan <jaraidaniel@gmail.com> | 2015-10-25 16:24:52 +0100 |
commit | 101878938737bd16a5f7fb932b51041f7dbbb733 (patch) | |
tree | 235732d29c6741f22f2020218b2fad41c40b57b8 /fs | |
parent | 540bea4ab32149f8bc71fe34b73340c8d9abc053 (diff) | |
parent | 5dba9ddd98cbc7ad319d687887981a0ea0062c75 (diff) | |
download | kernel_samsung_espresso10-101878938737bd16a5f7fb932b51041f7dbbb733.zip kernel_samsung_espresso10-101878938737bd16a5f7fb932b51041f7dbbb733.tar.gz kernel_samsung_espresso10-101878938737bd16a5f7fb932b51041f7dbbb733.tar.bz2 |
Merge remote-tracking branch 'linux-stable/linux-3.0.y' into p-android-omap-3.0-dev-espresso
Conflicts:
Makefile
arch/arm/include/asm/hardware/cache-l2x0.h
arch/arm/kernel/smp.c
arch/arm/mach-omap2/board-4430sdp.c
arch/arm/mach-omap2/board-omap4panda.c
arch/arm/mach-omap2/opp.c
arch/ia64/include/asm/futex.h
drivers/bluetooth/ath3k.c
drivers/bluetooth/btusb.c
drivers/firmware/efivars.c
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_irq_kms.c
drivers/hwmon/fam15h_power.c
drivers/mfd/twl6030-irq.c
drivers/mmc/core/sdio.c
drivers/net/tun.c
drivers/net/usb/ipheth.c
drivers/net/usb/usbnet.c
drivers/usb/core/hub.c
drivers/usb/host/xhci-mem.c
drivers/usb/host/xhci.h
drivers/usb/musb/omap2430.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/option.c
drivers/usb/serial/qcserial.c
drivers/usb/serial/ti_usb_3410_5052.c
drivers/usb/serial/ti_usb_3410_5052.h
drivers/video/omap2/dss/hdmi.c
fs/splice.c
include/asm-generic/pgtable.h
include/net/sch_generic.h
kernel/cgroup.c
kernel/futex.c
kernel/time/timekeeping.c
net/ipv4/route.c
net/ipv4/syncookies.c
net/ipv4/tcp_ipv4.c
net/wireless/util.c
security/commoncap.c
sound/soc/soc-dapm.c
Diffstat (limited to 'fs')
151 files changed, 1371 insertions, 774 deletions
@@ -1395,6 +1395,10 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat) if (ret < 0) goto out; + ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret); + if (ret < 0) + goto out; + kiocb->ki_nr_segs = kiocb->ki_nbytes; kiocb->ki_cur_seg = 0; /* ki_nbytes/left now reflect bytes instead of segs */ @@ -1406,11 +1410,17 @@ out: return ret; } -static ssize_t aio_setup_single_vector(struct kiocb *kiocb) +static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb) { + int bytes; + + bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left); + if (bytes < 0) + return bytes; + kiocb->ki_iovec = &kiocb->ki_inline_vec; kiocb->ki_iovec->iov_base = kiocb->ki_buf; - kiocb->ki_iovec->iov_len = kiocb->ki_left; + kiocb->ki_iovec->iov_len = bytes; kiocb->ki_nr_segs = 1; kiocb->ki_cur_seg = 0; return 0; @@ -1435,10 +1445,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf, kiocb->ki_left))) break; - ret = security_file_permission(file, MAY_READ); - if (unlikely(ret)) - break; - ret = aio_setup_single_vector(kiocb); + ret = aio_setup_single_vector(READ, file, kiocb); if (ret) break; ret = -EINVAL; @@ -1453,10 +1460,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf, kiocb->ki_left))) break; - ret = security_file_permission(file, MAY_WRITE); - if (unlikely(ret)) - break; - ret = aio_setup_single_vector(kiocb); + ret = aio_setup_single_vector(WRITE, file, kiocb); if (ret) break; ret = -EINVAL; @@ -1467,9 +1471,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_READ))) break; - ret = security_file_permission(file, MAY_READ); - if (unlikely(ret)) - break; ret = aio_setup_vectored_rw(READ, kiocb, compat); if (ret) break; @@ -1481,9 +1482,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_WRITE))) break; - ret = security_file_permission(file, MAY_WRITE); - if (unlikely(ret)) - break; ret = aio_setup_vectored_rw(WRITE, kiocb, compat); if (ret) break; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 450f529..2c69d12 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -61,15 +61,6 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) goto done; - - /* - * Otherwise it's an offset mount and we need to check - * if we can umount its mount, if there is one. - */ - if (!d_mountpoint(path.dentry)) { - status = 0; - goto done; - } } /* Update the expiry counter if fs is busy */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f55ae23..790fa63 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -392,10 +392,12 @@ static struct vfsmount *autofs4_d_automount(struct path *path) ino->flags |= AUTOFS_INF_PENDING; spin_unlock(&sbi->fs_lock); status = autofs4_mount_wait(dentry); - if (status) - return ERR_PTR(status); spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_PENDING; + if (status) { + spin_unlock(&sbi->fs_lock); + return ERR_PTR(status); + } } done: if (!(ino->flags & AUTOFS_INF_EXPIRING)) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 618493e..7e8299f 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1669,30 +1669,19 @@ static int elf_note_info_init(struct elf_note_info *info) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); if (!info->psinfo) - goto notes_free; + return 0; info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); if (!info->prstatus) - goto psinfo_free; + return 0; info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); if (!info->fpu) - goto prstatus_free; + return 0; #ifdef ELF_CORE_COPY_XFPREGS info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); if (!info->xfpu) - goto fpu_free; + return 0; #endif return 1; -#ifdef ELF_CORE_COPY_XFPREGS - fpu_free: - kfree(info->fpu); -#endif - prstatus_free: - kfree(info->prstatus); - psinfo_free: - kfree(info->psinfo); - notes_free: - kfree(info->notes); - return 0; } static int fill_note_info(struct elfhdr *elf, int phdrs, diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index b8e8b0a..4a1b984 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -42,7 +42,6 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->recursion_depth++; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 1befe2e..b2497d4 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -116,10 +116,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; - retval = -ENOEXEC; - if (bprm->recursion_depth > BINPRM_MAX_RECURSION) - goto _ret; - /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); @@ -176,7 +172,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto _error; bprm->argc ++; - bprm->interp = iname; /* for binfmt_script */ + /* Update interp in case binfmt_script needs it. */ + retval = bprm_change_interp(iname, bprm); + if (retval < 0) + goto _error; interp_file = open_exec (iname); retval = PTR_ERR (interp_file); @@ -197,8 +196,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval < 0) goto _error; - bprm->recursion_depth++; - retval = search_binary_handler (bprm, regs); if (retval < 0) goto _error; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 396a988..211ede0 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -22,15 +22,13 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) char interp[BINPRM_BUF_SIZE]; int retval; - if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || - (bprm->recursion_depth > BINPRM_MAX_RECURSION)) + if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; /* * This section does the #! interpretation. * Sorta complicated, but hopefully it will work. -TYT */ - bprm->recursion_depth++; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; @@ -82,7 +80,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) retval = copy_strings_kernel(1, &i_name, bprm); if (retval) return retval; bprm->argc++; - bprm->interp = interp; + retval = bprm_change_interp(interp, bprm); + if (retval < 0) + return retval; /* * OK, now restart the process with the interpreter's dentry. @@ -786,12 +786,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - int ret = 0; + struct bio_vec *bvec; + int ret = 0, i; - if (!bio_flagged(bio, BIO_NULL_MAPPED)) - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, bio_data_dir(bio) == READ, - 0, bmd->is_our_pages); + if (!bio_flagged(bio, BIO_NULL_MAPPED)) { + /* + * if we're in a workqueue, the request is orphaned, so + * don't copy into a random user address space, just free. + */ + if (current->mm) + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); + else if (bmd->is_our_pages) + __bio_for_each_segment(bvec, bio, i, 0) + __free_page(bvec->bv_page); + } bio_free_map_data(bmd); bio_put(bio); return ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index 74fc5ed..b5eb8c1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -55,16 +55,24 @@ EXPORT_SYMBOL(I_BDEV); static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { + bool wakeup_bdi = false; + spin_lock(&inode_wb_list_lock); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&inode_wb_list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } -static sector_t max_block(struct block_device *bdev) +sector_t blkdev_max_block(struct block_device *bdev) { sector_t retval = ~((sector_t)0); loff_t sz = i_size_read(bdev->bd_inode); @@ -135,7 +143,7 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= max_block(I_BDEV(inode))) { + if (iblock >= blkdev_max_block(I_BDEV(inode))) { if (create) return -EIO; @@ -157,7 +165,7 @@ static int blkdev_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - sector_t end_block = max_block(I_BDEV(inode)); + sector_t end_block = blkdev_max_block(I_BDEV(inode)); unsigned long max_blocks = bh->b_size >> inode->i_blkbits; if ((iblock + max_blocks) > end_block) { @@ -576,6 +584,7 @@ struct block_device *bdgrab(struct block_device *bdev) ihold(bdev->bd_inode); return bdev; } +EXPORT_SYMBOL(bdgrab); long nr_blockdev_pages(void) { @@ -1052,7 +1061,9 @@ void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); - bdev->bd_inode->i_size = size; + mutex_lock(&bdev->bd_inode->i_mutex); + i_size_write(bdev->bd_inode, size); + mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 7ec1409..8006a28 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -212,10 +212,17 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, work->ordered_func(work); - /* now take the lock again and call the freeing code */ + /* now take the lock again and drop our item from the list */ spin_lock(&workers->order_lock); list_del(&work->order_list); + spin_unlock(&workers->order_lock); + + /* + * we don't want to call the ordered free functions + * with the lock held though + */ work->ordered_free(work); + spin_lock(&workers->order_lock); } spin_unlock(&workers->order_lock); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ac8db5d..57106a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -801,7 +801,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, #ifdef CONFIG_MIGRATION static int btree_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { /* * we can't safely write a btree page from here, @@ -816,7 +817,7 @@ static int btree_migratepage(struct address_space *mapping, if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e20a65..01220b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3786,7 +3786,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) spin_lock(&block_rsv->lock); spin_lock(&sinfo->lock); - block_rsv->size = num_bytes; + block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved + sinfo->bytes_readonly + diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a3c4751..a205027 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1539,7 +1539,11 @@ static noinline int copy_to_sk(struct btrfs_root *root, item_off = btrfs_item_ptr_offset(leaf, i); item_len = btrfs_item_size_nr(leaf, i); - if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + btrfs_item_key_to_cpu(leaf, key, i); + if (!key_in_sk(key, sk)) + continue; + + if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) item_len = 0; if (sizeof(sh) + item_len + *sk_offset > @@ -1548,10 +1552,6 @@ static noinline int copy_to_sk(struct btrfs_root *root, goto overflow; } - btrfs_item_key_to_cpu(leaf, key, i); - if (!key_in_sk(key, sk)) - continue; - sh.objectid = key->objectid; sh.offset = key->offset; sh.type = key->type; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e0a3dc..2ab5837 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -670,6 +670,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -892,6 +893,7 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -935,14 +937,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7fa128d..88dec16 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -315,6 +315,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, unsigned long src_ptr; unsigned long dst_ptr; int overwrite_root = 0; + bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) overwrite_root = 1; @@ -324,6 +325,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, /* look for the key in the destination tree */ ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + if (ret == 0) { char *src_copy; char *dst_copy; @@ -365,6 +369,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } + /* + * We need to load the old nbytes into the inode so when we + * replay the extents we've logged we get the right nbytes. + */ + if (inode_item) { + struct btrfs_inode_item *item; + u64 nbytes; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nbytes = btrfs_inode_nbytes(path->nodes[0], item); + item = btrfs_item_ptr(eb, slot, + struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, nbytes); + } + } else if (inode_item) { + struct btrfs_inode_item *item; + + /* + * New inode, set nbytes to 0 so that the nbytes comes out + * properly when we replay the extents. + */ + item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, 0); } insert: btrfs_release_path(path); @@ -487,7 +515,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, u64 extent_end; u64 alloc_hint; u64 start = key->offset; - u64 saved_nbytes; + u64 nbytes = 0; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -497,10 +525,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, found_type = btrfs_file_extent_type(eb, item); if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) - extent_end = start + btrfs_file_extent_num_bytes(eb, item); - else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + nbytes = btrfs_file_extent_num_bytes(eb, item); + extent_end = start + nbytes; + + /* + * We don't add to the inodes nbytes if we are prealloc or a + * hole. + */ + if (btrfs_file_extent_disk_bytenr(eb, item) == 0) + nbytes = 0; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); + nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = (start + size + mask) & ~mask; } else { ret = 0; @@ -549,7 +586,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, inode, start, extent_end, &alloc_hint, 1); @@ -637,7 +673,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - inode_set_bytes(inode, saved_nbytes); + inode_add_bytes(inode, nbytes); btrfs_update_inode(trans, root, inode); out: if (inode) @@ -691,6 +727,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, kfree(name); iput(inode); + + btrfs_run_delayed_items(trans, root); return ret; } @@ -896,6 +934,7 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, victim_name, victim_name_len); + btrfs_run_delayed_items(trans, root); } kfree(victim_name); ptr = (unsigned long)(victim_ref + 1) + victim_name_len; @@ -1476,6 +1515,9 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); BUG_ON(ret); + + btrfs_run_delayed_items(trans, root); + kfree(name); iput(inode); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 43baaf0..7745ad5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -512,6 +512,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->writeable = 0; new_device->in_fs_metadata = 0; new_device->can_discard = 0; + spin_lock_init(&new_device->io_lock); list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -545,6 +546,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) __btrfs_close_devices(fs_devices); free_fs_devices(fs_devices); } + /* + * Wait for rcu kworkers under __btrfs_close_devices + * to finish all blkdev_puts so device is really + * free when umount is done. + */ + rcu_barrier(); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index 1a80b04..166028b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -961,13 +961,14 @@ link_dev_buffers(struct page *page, struct buffer_head *head) /* * Initialise the state of a blockdev page's buffers. */ -static void +static sector_t init_page_buffers(struct page *page, struct block_device *bdev, sector_t block, int size) { struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; int uptodate = PageUptodate(page); + sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); do { if (!buffer_mapped(bh)) { @@ -976,38 +977,47 @@ init_page_buffers(struct page *page, struct block_device *bdev, bh->b_blocknr = block; if (uptodate) set_buffer_uptodate(bh); - set_buffer_mapped(bh); + if (block < end_block) + set_buffer_mapped(bh); } block++; bh = bh->b_this_page; } while (bh != head); + + /* + * Caller needs to validate requested block against end of device. + */ + return end_block; } /* * Create the page-cache page that contains the requested block. * - * This is user purely for blockdev mappings. + * This is used purely for blockdev mappings. */ -static struct page * +static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size) + pgoff_t index, int size, int sizebits) { struct inode *inode = bdev->bd_inode; struct page *page; struct buffer_head *bh; + sector_t end_block; + int ret = 0; /* Will call free_more_memory() */ page = find_or_create_page(inode->i_mapping, index, (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) - return NULL; + return ret; BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); if (bh->b_size == size) { - init_page_buffers(page, bdev, block, size); - return page; + end_block = init_page_buffers(page, bdev, + index << sizebits, size); + goto done; } if (!try_to_free_buffers(page)) goto failed; @@ -1027,15 +1037,15 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - init_page_buffers(page, bdev, block, size); + end_block = init_page_buffers(page, bdev, index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); - return page; +done: + ret = (block < end_block) ? 1 : -ENXIO; failed: - BUG(); unlock_page(page); page_cache_release(page); - return NULL; + return ret; } /* @@ -1045,7 +1055,6 @@ failed: static int grow_buffers(struct block_device *bdev, sector_t block, int size) { - struct page *page; pgoff_t index; int sizebits; @@ -1069,14 +1078,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) bdevname(bdev, b)); return -EIO; } - block = index << sizebits; + /* Create a page with the proper size buffers.. */ - page = grow_dev_page(bdev, block, index, size); - if (!page) - return 0; - unlock_page(page); - page_cache_release(page); - return 1; + return grow_dev_page(bdev, block, index, size, sizebits); } static struct buffer_head * @@ -1095,7 +1099,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) } for (;;) { - struct buffer_head * bh; + struct buffer_head *bh; int ret; bh = __find_get_block(bdev, block, size); @@ -1363,10 +1367,6 @@ EXPORT_SYMBOL(__find_get_block); * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() cannot fail - it just keeps trying. If you pass it an - * illegal block number, __getblk() will happily return a buffer_head - * which represents the non-existent block. Very weird. - * * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * attempt is failing. FIXME, perhaps? */ diff --git a/fs/ceph/export.c b/fs/ceph/export.c index f67b687..a080779 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -84,7 +84,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, * FIXME: we should try harder by querying the mds for the ino. */ static struct dentry *__fh_to_dentry(struct super_block *sb, - struct ceph_nfs_fh *fh) + struct ceph_nfs_fh *fh, int fh_len) { struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct inode *inode; @@ -92,6 +92,9 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, struct ceph_vino vino; int err; + if (fh_len < sizeof(*fh) / 4) + return ERR_PTR(-ESTALE); + dout("__fh_to_dentry %llx\n", fh->ino); vino.ino = fh->ino; vino.snap = CEPH_NOSNAP; @@ -136,7 +139,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, * convert connectable fh to dentry */ static struct dentry *__cfh_to_dentry(struct super_block *sb, - struct ceph_nfs_confh *cfh) + struct ceph_nfs_confh *cfh, int fh_len) { struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct inode *inode; @@ -144,6 +147,9 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, struct ceph_vino vino; int err; + if (fh_len < sizeof(*cfh) / 4) + return ERR_PTR(-ESTALE); + dout("__cfh_to_dentry %llx (%llx/%x)\n", cfh->ino, cfh->parent_ino, cfh->parent_name_hash); @@ -193,9 +199,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { if (fh_type == 1) - return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw); + return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, + fh_len); else - return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw); + return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, + fh_len); } /* @@ -216,6 +224,8 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, if (fh_type == 1) return ERR_PTR(-ESTALE); + if (fh_len < sizeof(*cfh) / 4) + return ERR_PTR(-ESTALE); pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, cfh->parent_name_hash); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f2f77fd..1775022 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -70,8 +70,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. + * + * NOTE: for the time being, we make bsize == frsize to humor + * not-yet-ancient versions of glibc that are broken. + * Someday, we will probably want to report a real block + * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >> (CEPH_BLOCK_SHIFT-10); @@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f5cabef..9091926 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -21,7 +21,7 @@ /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ -#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ +#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cfd1ce3..1d36db1 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } - /* mechlistMIC */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - /* Check if we have reached the end of the blob, but with - no mechListMic (e.g. NTLMSSP instead of KRB5) */ - if (ctx.error == ASN1_ERR_DEC_EMPTY) - goto decode_negtoken_exit; - cFYI(1, "Error decoding last part negTokenInit exit3"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - /* tag = 3 indicating mechListMIC */ - cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* sequence */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit5"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - } - - /* sequence of */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit 7"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* general string */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit9"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) - || (tag != ASN1_GENSTR)) { - cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - cFYI(1, "Need to call asn1_octets_decode() function for %s", - ctx.pointer); /* is this UTF-8 or ASCII? */ -decode_negtoken_exit: + /* + * We currently ignore anything at the end of the SPNEGO blob after + * the mechTypes have been parsed, since none of that info is + * used at the moment. + */ return 1; } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 8d8f28c..993384e4 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/vfs.h> #include <linux/fs.h> +#include <linux/inet.h> #include "cifsglob.h" #include "cifsproto.h" #include "cifsfs.h" @@ -149,7 +150,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata, * assuming that we have 'unc=' and 'ip=' in * the original sb_mountdata */ - md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12; + md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12 + + INET6_ADDRSTRLEN; mountdata = kzalloc(md_len+1, GFP_KERNEL); if (mountdata == NULL) { rc = -ENOMEM; @@ -225,6 +227,8 @@ compose_mount_options_out: compose_mount_options_err: kfree(mountdata); mountdata = ERR_PTR(rc); + kfree(*devname); + *devname = NULL; goto compose_mount_options_out; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 53e7d72..bf6aa8c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -571,6 +571,11 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) dentry = ERR_PTR(-ENOENT); break; } + if (!S_ISDIR(dir->i_mode)) { + dput(dentry); + dentry = ERR_PTR(-ENOTDIR); + break; + } /* skip separators */ while (*s == sep) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6255fa8..7cb9dd2 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -43,6 +43,7 @@ #define CIFS_MIN_RCV_POOL 4 +#define MAX_REOPEN_ATT 5 /* these many maximum attempts to reopen a file */ /* * default attribute cache timeout (jiffies) */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 07132c4..219933b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3473,13 +3473,12 @@ CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid, int rc = 0; int bytes_returned = 0; SET_SEC_DESC_REQ *pSMB = NULL; - NTRANSACT_RSP *pSMBr = NULL; + void *pSMBr; setCifsAclRetry: - rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, - (void **) &pSMBr); + rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, &pSMBr); if (rc) - return (rc); + return rc; pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; @@ -3507,9 +3506,8 @@ setCifsAclRetry: pSMB->AclFlags = cpu_to_le32(CIFS_ACL_DACL); if (pntsd && acllen) { - memcpy((char *) &pSMBr->hdr.Protocol + data_offset, - (char *) pntsd, - acllen); + memcpy((char *)pSMBr + offsetof(struct smb_hdr, Protocol) + + data_offset, pntsd, acllen); inc_rfc1001_len(pSMB, byte_count + data_count); } else inc_rfc1001_len(pSMB, byte_count); @@ -5291,7 +5289,8 @@ CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; + data_offset = (char *)pSMB + + offsetof(struct smb_hdr, Protocol) + offset; count = sizeof(FILE_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); @@ -5560,7 +5559,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, u16 fid, u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; - FILE_UNIX_BASIC_INFO *data_offset; + char *data_offset; int rc = 0; u16 params, param_offset, offset, byte_count, count; @@ -5582,8 +5581,9 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (FILE_UNIX_BASIC_INFO *) - ((char *)(&pSMB->hdr.Protocol) + offset); + data_offset = (char *)pSMB + + offsetof(struct smb_hdr, Protocol) + offset; + count = sizeof(FILE_UNIX_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); @@ -5605,7 +5605,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, inc_rfc1001_len(pSMB, byte_count); pSMB->ByteCount = cpu_to_le16(byte_count); - cifs_fill_unix_set_info(data_offset, args); + cifs_fill_unix_set_info((FILE_UNIX_BASIC_INFO *)data_offset, args); rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); if (rc) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b775809..9e6ee47 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -158,6 +158,7 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); /* we should try only the port we connected to before */ + mutex_lock(&server->srv_mutex); rc = generic_ip_connect(server); if (rc) { cFYI(1, "reconnect error %d", rc); @@ -169,6 +170,7 @@ cifs_reconnect(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); } + mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a9b4a24..9040cb0 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -973,10 +973,11 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) { - struct cifsFileInfo *open_file; + struct cifsFileInfo *open_file, *inv_file = NULL; struct cifs_sb_info *cifs_sb; bool any_available = false; int rc; + unsigned int refind = 0; /* Having a null inode here (because mapping->host was set to zero by the VFS or MM) should not happen but we had reports of on oops (due to @@ -996,40 +997,25 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, spin_lock(&cifs_file_list_lock); refind_writable: + if (refind > MAX_REOPEN_ATT) { + spin_unlock(&cifs_file_list_lock); + return NULL; + } list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (!any_available && open_file->pid != current->tgid) continue; if (fsuid_only && open_file->uid != current_fsuid()) continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { - cifsFileInfo_get(open_file); - if (!open_file->invalidHandle) { /* found a good writable file */ + cifsFileInfo_get(open_file); spin_unlock(&cifs_file_list_lock); return open_file; + } else { + if (!inv_file) + inv_file = open_file; } - - spin_unlock(&cifs_file_list_lock); - - /* Had to unlock since following call can block */ - rc = cifs_reopen_file(open_file, false); - if (!rc) - return open_file; - - /* if it fails, try another handle if possible */ - cFYI(1, "wp failed on reopen file"); - cifsFileInfo_put(open_file); - - spin_lock(&cifs_file_list_lock); - - /* else we simply continue to the next entry. Thus - we do not loop on reopen errors. If we - can not reopen the file, for example if we - reconnected to a server with another client - racing to delete or lock the file we would not - make progress if we restarted before the beginning - of the loop here. */ } } /* couldn't find useable FH with same pid, try any available */ @@ -1037,7 +1023,30 @@ refind_writable: any_available = true; goto refind_writable; } + + if (inv_file) { + any_available = false; + cifsFileInfo_get(inv_file); + } + spin_unlock(&cifs_file_list_lock); + + if (inv_file) { + rc = cifs_reopen_file(inv_file, false); + if (!rc) + return inv_file; + else { + spin_lock(&cifs_file_list_lock); + list_move_tail(&inv_file->flist, + &cifs_inode->openFileList); + spin_unlock(&cifs_file_list_lock); + cifsFileInfo_put(inv_file); + spin_lock(&cifs_file_list_lock); + ++refind; + goto refind_writable; + } + } + return NULL; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 745e5cd..6f37228 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -173,7 +173,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL) inode->i_flags |= S_AUTOMOUNT; - cifs_set_ops(inode); + if (inode->i_state & I_NEW) + cifs_set_ops(inode); } void diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 6751e74..c71032b 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -85,9 +85,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { - /* FIXME: check for inode number changes? */ - if (dentry->d_inode != NULL) + inode = dentry->d_inode; + /* update inode in place if i_ino didn't change */ + if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); return dentry; + } d_drop(dentry); dput(dentry); } diff --git a/fs/compat.c b/fs/compat.c index 0ea0083..f77a963 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -576,6 +576,10 @@ ssize_t compat_rw_copy_check_uvector(int type, } *ret_pointer = iov; + ret = -EFAULT; + if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + goto out; + /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an @@ -1106,17 +1110,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (!file->f_op) goto out; - ret = -EFAULT; - if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) - goto out; - - tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, + ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, UIO_FASTIOV, iovstack, &iov); - if (tot_len == 0) { - ret = 0; + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; @@ -1177,11 +1176,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_readv(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } @@ -1236,11 +1238,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_writev(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 61abb63..3deb58d 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -208,6 +208,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, err = get_user(palp, &up->palette); err |= get_user(length, &up->length); + if (err) + return -EFAULT; up_native = compat_alloc_user_space(sizeof(struct video_spu_palette)); err = put_user(compat_ptr(palp), &up_native->palette); diff --git a/fs/dcache.c b/fs/dcache.c index 0b51cfc..ecc0742 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -290,7 +290,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) * Inform try_to_ascend() that we are no longer attached to the * dentry tree */ - dentry->d_flags |= DCACHE_DISCONNECTED; + dentry->d_flags |= DCACHE_DENTRY_KILLED; if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1015,7 +1015,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq * or deletion */ if (new != old->d_parent || - (old->d_flags & DCACHE_DISCONNECTED) || + (old->d_flags & DCACHE_DENTRY_KILLED) || (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&new->d_lock); new = NULL; @@ -1101,6 +1101,8 @@ positive: return 1; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1203,6 +1205,8 @@ out: rename_retry: if (found) return found; + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1559,7 +1563,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) */ struct dentry *d_obtain_alias(struct inode *inode) { - static const struct qstr anonstring = { .name = "" }; + static const struct qstr anonstring = { .name = "/", .len = 1 }; struct dentry *tmp; struct dentry *res; @@ -2990,6 +2994,8 @@ resume: return; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e7a7a2f..eac5b7c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -380,8 +380,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove); */ void debugfs_remove_recursive(struct dentry *dentry) { - struct dentry *child; - struct dentry *parent; + struct dentry *child, *next, *parent; if (!dentry) return; @@ -391,61 +390,37 @@ void debugfs_remove_recursive(struct dentry *dentry) return; parent = dentry; + down: mutex_lock(&parent->d_inode->i_mutex); + list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { + if (!debugfs_positive(child)) + continue; - while (1) { - /* - * When all dentries under "parent" has been removed, - * walk up the tree until we reach our starting point. - */ - if (list_empty(&parent->d_subdirs)) { - mutex_unlock(&parent->d_inode->i_mutex); - if (parent == dentry) - break; - parent = parent->d_parent; - mutex_lock(&parent->d_inode->i_mutex); - } - child = list_entry(parent->d_subdirs.next, struct dentry, - d_u.d_child); - next_sibling: - - /* - * If "child" isn't empty, walk down the tree and - * remove all its descendants first. - */ + /* perhaps simple_empty(child) makes more sense */ if (!list_empty(&child->d_subdirs)) { mutex_unlock(&parent->d_inode->i_mutex); parent = child; - mutex_lock(&parent->d_inode->i_mutex); - continue; - } - __debugfs_remove(child, parent); - if (parent->d_subdirs.next == &child->d_u.d_child) { - /* - * Try the next sibling. - */ - if (child->d_u.d_child.next != &parent->d_subdirs) { - child = list_entry(child->d_u.d_child.next, - struct dentry, - d_u.d_child); - goto next_sibling; - } - - /* - * Avoid infinite loop if we fail to remove - * one dentry. - */ - mutex_unlock(&parent->d_inode->i_mutex); - break; + goto down; } - simple_release_fs(&debugfs_mount, &debugfs_mount_count); + up: + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); } - parent = dentry->d_parent; + mutex_unlock(&parent->d_inode->i_mutex); + child = parent; + parent = parent->d_parent; mutex_lock(&parent->d_inode->i_mutex); - __debugfs_remove(dentry, parent); + + if (child != dentry) { + next = list_entry(child->d_u.d_child.next, struct dentry, + d_u.d_child); + goto up; + } + + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); mutex_unlock(&parent->d_inode->i_mutex); - simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 2717329..4a91a05 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -653,6 +653,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_old_dir_dentry; struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; + struct inode *target_inode; lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); @@ -660,6 +661,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, dget(lower_new_dentry); lower_old_dir_dentry = dget_parent(lower_old_dentry); lower_new_dir_dentry = dget_parent(lower_new_dentry); + target_inode = new_dentry->d_inode; trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); /* source should not be ancestor of target */ if (trap == lower_old_dentry) { @@ -675,6 +677,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, lower_new_dir_dentry->d_inode, lower_new_dentry); if (rc) goto out_lock; + if (target_inode) + fsstack_copy_attr_all(target_inode, + ecryptfs_inode_to_lower(target_inode)); fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); if (new_dir != old_dir) fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 69f994a..0dbe58a 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file, (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); if (!IS_ERR(*lower_file)) goto out; - if (flags & O_RDONLY) { + if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index b4a6bef..d1125e3 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -279,6 +279,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, char *fnek_src; char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; + u8 cipher_code; *check_ruid = 0; @@ -420,6 +421,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, && !fn_cipher_key_bytes_set) mount_crypt_stat->global_default_fn_cipher_key_bytes = mount_crypt_stat->global_default_cipher_key_size; + + cipher_code = ecryptfs_code_for_cipher_string( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + if (!cipher_code) { + ecryptfs_printk(KERN_ERR, + "eCryptfs doesn't support cipher: %s", + mount_crypt_stat->global_default_cipher_name); + rc = -EINVAL; + goto out; + } + mutex_lock(&key_tfm_list_mutex); if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, NULL)) { @@ -505,7 +518,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - s->s_flags = flags; rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); if (rc) goto out1; @@ -541,6 +553,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags } ecryptfs_set_superblock_lower(s, path.dentry->d_sb); + + /** + * Set the POSIX ACL flag based on whether they're enabled in the lower + * mount. Force a read-only eCryptfs mount if the lower mount is ro. + * Allow a ro eCryptfs mount even when the lower mount is rw. + */ + s->s_flags = flags & ~MS_POSIXACL; + s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; s->s_magic = ECRYPTFS_SUPER_MAGIC; diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 0dc5a3d..de42310 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) { + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EINVAL; + } mutex_lock(&daemon->mux); mutex_unlock(&ecryptfs_daemon_hash_mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { @@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) goto out_unlock_daemon; } daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; + file->private_data = daemon; atomic_inc(&ecryptfs_num_miscdev_opens); out_unlock_daemon: mutex_unlock(&daemon->mux); @@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) mutex_lock(&ecryptfs_daemon_hash_mux); rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) + daemon = file->private_data; mutex_lock(&daemon->mux); - BUG_ON(daemon->pid != task_pid(current)); BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; atomic_dec(&ecryptfs_num_miscdev_opens); @@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, u16 msg_flags, struct ecryptfs_daemon *daemon) { - int rc = 0; + struct ecryptfs_message *msg; - mutex_lock(&msg_ctx->mux); - msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), - GFP_KERNEL); - if (!msg_ctx->msg) { - rc = -ENOMEM; + msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL); + if (!msg) { printk(KERN_ERR "%s: Out of memory whilst attempting " "to kmalloc(%zd, GFP_KERNEL)\n", __func__, - (sizeof(*msg_ctx->msg) + data_size)); - goto out_unlock; + (sizeof(*msg) + data_size)); + return -ENOMEM; } + + mutex_lock(&msg_ctx->mux); + msg_ctx->msg = msg; msg_ctx->msg->index = msg_ctx->index; msg_ctx->msg->data_len = data_size; msg_ctx->type = msg_type; memcpy(msg_ctx->msg->data, data, data_size); msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); - mutex_lock(&daemon->mux); list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); + mutex_unlock(&msg_ctx->mux); + + mutex_lock(&daemon->mux); daemon->num_queued_msg_ctx++; wake_up_interruptible(&daemon->wait); mutex_unlock(&daemon->mux); -out_unlock: - mutex_unlock(&msg_ctx->mux); - return rc; + + return 0; } /** @@ -246,8 +251,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) { + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EINVAL; + } mutex_lock(&daemon->mux); + if (task_pid(current) != daemon->pid) { + mutex_unlock(&daemon->mux); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EPERM; + } if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { rc = 0; mutex_unlock(&ecryptfs_daemon_hash_mux); @@ -284,9 +297,6 @@ check_list: * message from the queue; try again */ goto check_list; } - BUG_ON(euid != daemon->euid); - BUG_ON(current_user_ns() != daemon->user_ns); - BUG_ON(task_pid(current) != daemon->pid); msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, struct ecryptfs_msg_ctx, daemon_out_list); BUG_ON(!msg_ctx); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 35a852a..35a1a61 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1197,10 +1197,30 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * otherwise we might miss an event that happens between the * f_op->poll() call and the new event set registering. */ - epi->event.events = event->events; + epi->event.events = event->events; /* need barrier below */ epi->event.data = event->data; /* protected by mtx */ /* + * The following barrier has two effects: + * + * 1) Flush epi changes above to other CPUs. This ensures + * we do not miss events from ep_poll_callback if an + * event occurs immediately after we call f_op->poll(). + * We need this because we did not take ep->lock while + * changing epi above (but ep_poll_callback does take + * ep->lock). + * + * 2) We also need to ensure we do not miss _past_ events + * when calling f_op->poll(). This barrier also + * pairs with the barrier in wq_has_sleeper (see + * comments for wq_has_sleeper). + * + * This barrier will now guarantee ep_poll_callback or f_op->poll + * (or both) will notice the readiness of an item. + */ + smp_mb(); + + /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ @@ -1149,13 +1149,6 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - /* - * Flush performance counters when crossing a - * security domain: - */ - if (!get_dumpable(current->mm)) - perf_event_exit_task(current); - /* An exec changes our domain. We are no longer part of the thread group */ @@ -1192,9 +1185,24 @@ void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } + /* If a binfmt changed the interp, free it. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); kfree(bprm); } +int bprm_change_interp(char *interp, struct linux_binprm *bprm) +{ + /* If a binfmt changed the interp, free it first. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); + bprm->interp = kstrdup(interp, GFP_KERNEL); + if (!bprm->interp) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(bprm_change_interp); + /* * install the new credentials for this executable */ @@ -1204,6 +1212,15 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; + + /* + * Disable monitoring for regular users + * when executing setuid binaries. Must + * wait until new credentials are committed + * by commit_creds() above + */ + if (get_dumpable(current->mm) != SUID_DUMP_USER) + perf_event_exit_task(current); /* * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's @@ -1354,6 +1371,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) int try,retval; struct linux_binfmt *fmt; + /* This allows 4 levels of binfmt rewrites before failing hard. */ + if (depth > 5) + return -ELOOP; + retval = security_bprm_check(bprm); if (retval) return retval; @@ -1372,12 +1393,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); + bprm->recursion_depth = depth + 1; retval = fn(bprm, regs); - /* - * Restore the depth counter to its starting value - * in this call, so we don't have to rely on every - * load_binary function to restore it on return. - */ bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index bfc2dc4..0b3da7c 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -561,8 +561,12 @@ got: if (IS_DIRSYNC(inode)) handle->h_sync = 1; if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index db9ba1a..0aedb27 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3013,6 +3013,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; + __le32 disksize; again: /* we can't allow multiple procs in here at once, its a bit racey */ @@ -3050,7 +3052,11 @@ again: raw_inode->i_gid_high = 0; } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); + disksize = cpu_to_le32(ei->i_disksize); + if (disksize != raw_inode->i_size) { + need_datasync = 1; + raw_inode->i_size = disksize; + } raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); @@ -3066,8 +3072,11 @@ again: if (!S_ISREG(inode->i_mode)) { raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); } else { - raw_inode->i_size_high = - cpu_to_le32(ei->i_disksize >> 32); + disksize = cpu_to_le32(ei->i_disksize >> 32); + if (disksize != raw_inode->i_size_high) { + raw_inode->i_size_high = disksize; + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, @@ -3120,6 +3129,8 @@ again: ext3_clear_inode_state(inode, EXT3_STATE_NEW); atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); + if (need_datasync) + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); out_brelse: brelse (bh); ext3_std_error(inode->i_sb, err); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index e5a7111..8c9f82d 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -584,11 +584,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) +((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse (bh); - return count; + /* silently ignore the rest of the block */ + break; } ext3fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || diff --git a/fs/ext3/super.c b/fs/ext3/super.c index aad153e..ba57a63 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -371,7 +371,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) return bdev; fail: - ext3_msg(sb, "error: failed to open journal device %s: %ld", + ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; @@ -892,7 +892,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) /*todo: use simple_strtoll with >32bit ext3 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { - ext3_msg(sb, "error: invalid sb specification: %s", + ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s", (char *) *data); return 1; } diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9ed1bb1..5459168 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -82,4 +82,5 @@ config EXT4_DEBUG Enables run-time debugging support for the ext4 filesystem. If you select Y here, then you will be able to turn on debugging - with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" + with a command such as: + echo 1 > /sys/module/ext4/parameters/mballoc_debug diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 21eacd7..4922087 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -450,8 +450,10 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, retry: handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto release_and_out; + } error = ext4_set_acl(handle, inode, type, acl); ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 264f694..ebe95f5 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -514,7 +514,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) if (bitmap_bh == NULL) continue; - x = ext4_count_free(bitmap_bh, sb->s_blocksize); + x = ext4_count_free(bitmap_bh->b_data, + EXT4_BLOCKS_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", i, ext4_free_blks_count(sb, gdp), x); bitmap_count += x; diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index fa3af81..012faaa 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -15,15 +15,13 @@ static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; -unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars) +unsigned int ext4_count_free(char *bitmap, unsigned int numchars) { unsigned int i, sum = 0; - if (!map) - return 0; for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; + sum += nibblemap[bitmap[i] & 0xf] + + nibblemap[(bitmap[i] >> 4) & 0xf]; return sum; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1a34c1c..2041de7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -288,9 +288,9 @@ struct ext4_group_desc */ struct flex_groups { - atomic_t free_inodes; - atomic_t free_blocks; - atomic_t used_dirs; + atomic64_t free_blocks; + atomic_t free_inodes; + atomic_t used_dirs; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ @@ -1713,7 +1713,7 @@ struct mmpd_data { # define NORET_AND noreturn, /* bitmap.c */ -extern unsigned int ext4_count_free(struct buffer_head *, unsigned); +extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); /* balloc.c */ extern unsigned int ext4_block_group(struct super_block *sb, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 611647b..d432d37 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2140,13 +2140,14 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, * last index in the block only. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) + struct ext4_ext_path *path, int depth) { int err; ext4_fsblk_t leaf; /* free index block */ - path--; + depth--; + path = path + depth; leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); @@ -2162,6 +2163,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext_debug("index is empty, remove it, free block %llu\n", leaf); ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; + path->p_idx->ei_block = (path+1)->p_idx->ei_block; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } return err; } @@ -2509,7 +2523,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* if this leaf is free, then we should * remove it from index block above */ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(handle, inode, path + depth); + err = ext4_ext_rm_idx(handle, inode, path, depth); out: return err; @@ -2639,7 +2653,7 @@ again: /* index is empty, remove it; * handle must be already prepared by the * truncatei_leaf() */ - err = ext4_ext_rm_idx(handle, inode, path + i); + err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); @@ -2887,6 +2901,7 @@ static int ext4_split_extent(handle_t *handle, int err = 0; int uninitialized; int split_flag1, flags1; + int allocated = map->m_len; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -2905,6 +2920,8 @@ static int ext4_split_extent(handle_t *handle, map->m_lblk + map->m_len, split_flag1, flags1); if (err) goto out; + } else { + allocated = ee_len - (map->m_lblk - ee_block); } ext4_ext_drop_refs(path); @@ -2927,7 +2944,7 @@ static int ext4_split_extent(handle_t *handle, ext4_ext_show_leaf(inode, path); out: - return err ? err : map->m_len; + return err ? err : allocated; } #define EXT4_EXT_ZERO_LEN 7 @@ -3295,6 +3312,7 @@ out: allocated - map->m_len); allocated = map->m_len; } + map->m_len = allocated; /* * If we have done fallocate with the offset that is already @@ -4137,7 +4155,7 @@ static int ext4_xattr_fiemap(struct inode *inode, error = ext4_get_inode_loc(inode, &iloc); if (error) return error; - physical = iloc.bh->b_blocknr << blockbits; + physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; @@ -4145,7 +4163,7 @@ static int ext4_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; + physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 21bb2f6..443ffb8 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -22,6 +22,7 @@ #include <linux/random.h> #include <linux/bitops.h> #include <linux/blkdev.h> +#include <linux/math64.h> #include <asm/byteorder.h> #include "ext4.h" @@ -345,8 +346,8 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, ext4_group_t ngroups = ext4_get_groups_count(sb); int flex_size = ext4_flex_bg_size(sbi); ext4_group_t best_flex = parent_fbg_group; - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; - int flexbg_free_blocks; + ext4_fsblk_t blocks_per_flex = sbi->s_blocks_per_group * flex_size; + ext4_fsblk_t flexbg_free_blocks; int flex_freeb_ratio; ext4_group_t n_fbg_groups; ext4_group_t i; @@ -355,8 +356,8 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, sbi->s_log_groups_per_flex; find_close_to_parent: - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; + flexbg_free_blocks = atomic64_read(&flex_group[best_flex].free_blocks); + flex_freeb_ratio = div64_u64(flexbg_free_blocks * 100, blocks_per_flex); if (atomic_read(&flex_group[best_flex].free_inodes) && flex_freeb_ratio > free_block_ratio) goto found_flexbg; @@ -370,8 +371,8 @@ find_close_to_parent: if (i == parent_fbg_group || i == parent_fbg_group - 1) continue; - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; + flexbg_free_blocks = atomic64_read(&flex_group[i].free_blocks); + flex_freeb_ratio = div64_u64(flexbg_free_blocks * 100, blocks_per_flex); if (flex_freeb_ratio > free_block_ratio && (atomic_read(&flex_group[i].free_inodes))) { @@ -380,14 +381,14 @@ find_close_to_parent: } if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || - ((atomic_read(&flex_group[i].free_blocks) > - atomic_read(&flex_group[best_flex].free_blocks)) && + ((atomic64_read(&flex_group[i].free_blocks) > + atomic64_read(&flex_group[best_flex].free_blocks)) && atomic_read(&flex_group[i].free_inodes))) best_flex = i; } if (!atomic_read(&flex_group[best_flex].free_inodes) || - !atomic_read(&flex_group[best_flex].free_blocks)) + !atomic64_read(&flex_group[best_flex].free_blocks)) return -1; found_flexbg: @@ -406,8 +407,8 @@ out: } struct orlov_stats { + __u64 free_blocks; __u32 free_inodes; - __u32 free_blocks; __u32 used_dirs; }; @@ -424,7 +425,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, if (flex_size > 1) { stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_blocks = atomic_read(&flex_group[g].free_blocks); + stats->free_blocks = atomic64_read(&flex_group[g].free_blocks); stats->used_dirs = atomic_read(&flex_group[g].used_dirs); return; } @@ -1021,8 +1022,12 @@ got: if (IS_DIRSYNC(inode)) ext4_handle_sync(handle); if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; @@ -1189,7 +1194,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) if (!bitmap_bh) continue; - x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); + x = ext4_count_free(bitmap_bh->b_data, + EXT4_INODES_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); bitmap_count += x; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c1e6a72..5de8a27 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1134,6 +1134,15 @@ void ext4_da_update_reserve_space(struct inode *inode, used = ei->i_reserved_data_blocks; } + if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " + "with only %d reserved metadata blocks\n", __func__, + inode->i_ino, ei->i_allocated_meta_blocks, + ei->i_reserved_meta_blocks); + WARN_ON(1); + ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; + } + /* Update per-inode reservations */ ei->i_reserved_data_blocks -= used; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; @@ -2190,6 +2199,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) index = mpd->first_page; end = mpd->next_page - 1; + + pagevec_init(&pvec, 0); while (index <= end) { nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); if (nr_pages == 0) @@ -5142,6 +5153,7 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ @@ -5190,7 +5202,10 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - ext4_isize_set(raw_inode, ei->i_disksize); + if (ei->i_disksize != ext4_isize(raw_inode)) { + ext4_isize_set(raw_inode, ei->i_disksize); + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, @@ -5243,7 +5258,7 @@ static int ext4_do_update_inode(handle_t *handle, err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); - ext4_update_inode_fsync_trans(handle, inode, 0); + ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: brelse(bh); ext4_std_error(inode->i_sb, err); @@ -5466,7 +5481,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode; - unsigned long delalloc_blocks; + unsigned long long delalloc_blocks; inode = dentry->d_inode; generic_fillattr(inode, stat); @@ -5483,7 +5498,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, */ delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; - stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); return 0; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 808c554..4cbe1c2 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -35,7 +35,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) handle_t *handle = NULL; int err, migrate = 0; struct ext4_iloc iloc; - unsigned int oldflags; + unsigned int oldflags, mask, i; unsigned int jflag; if (!inode_owner_or_capable(inode)) @@ -112,9 +112,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) goto flags_err; - flags = flags & EXT4_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; - ei->i_flags = flags; + for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { + if (!(mask & EXT4_FL_USER_MODIFIABLE)) + continue; + if (mask & flags) + ext4_set_inode_flag(inode, i); + else + ext4_clear_inode_flag(inode, i); + } ext4_set_inode_flags(inode); inode->i_ctime = ext4_current_time(inode); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0f1be7f..cdb8414 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2026,7 +2026,11 @@ repeat: group = ac->ac_g_ex.fe_group; for (i = 0; i < ngroups; group++, i++) { - if (group == ngroups) + /* + * Artificially restricted ngroups for non-extent + * files makes group > ngroups possible on first loop. + */ + if (group >= ngroups) group = 0; /* This now checks without needing the buddy page */ @@ -2528,6 +2532,9 @@ int ext4_mb_release(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); + if (sbi->s_proc) + remove_proc_entry("mb_groups", sbi->s_proc); + if (sbi->s_group_info) { for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); @@ -2575,8 +2582,6 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - if (sbi->s_proc) - remove_proc_entry("mb_groups", sbi->s_proc); return 0; } @@ -2813,8 +2818,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); - atomic_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_sub(ac->ac_b_ex.fe_len, + &sbi->s_flex_groups[flex_group].free_blocks); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4110,7 +4115,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; /* Add the prealloc space to lg */ - rcu_read_lock(); + spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], pa_inode_list) { spin_lock(&tmp_pa->pa_lock); @@ -4134,12 +4139,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) if (!added) list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list[order]); - rcu_read_unlock(); + spin_unlock(&lg->lg_prealloc_lock); /* Now trim the list to be not more than 8 elements */ if (lg_prealloc_count > 8) { ext4_mb_discard_lg_preallocations(sb, lg, - order, lg_prealloc_count); + order, lg_prealloc_count); return; } return ; @@ -4583,6 +4588,7 @@ do_more: */ new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); if (!new_entry) { + ext4_mb_unload_buddy(&e4b); err = -ENOMEM; goto error_return; } @@ -4612,7 +4618,7 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(count, &sbi->s_flex_groups[flex_group].free_blocks); } ext4_mb_unload_buddy(&e4b); @@ -4743,8 +4749,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(blocks_freed, - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(blocks_freed, + &sbi->s_flex_groups[flex_group].free_blocks); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index f57455a..72f9732 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1209,7 +1209,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - + /* TODO: This is non obvious task to swap blocks for inodes with full + jornaling enabled */ + if (ext4_should_journal_data(orig_inode) || + ext4_should_journal_data(donor_inode)) { + return -EINVAL; + } /* Protect orig and donor inodes against a truncate */ ret1 = mext_inode_double_lock(orig_inode, donor_inode); if (ret1 < 0) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 3d36d5a..595d087 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -585,11 +585,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (ext4_check_dir_entry(dir, NULL, de, bh, (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) + ((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse(bh); - return count; + /* silently ignore the rest of the block */ + break; } ext4fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || @@ -1799,9 +1796,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT4_FS_XATTR inode->i_op = &ext4_special_inode_operations; -#endif err = ext4_add_nondir(handle, dentry, inode); } ext4_journal_stop(handle); @@ -2064,7 +2059,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) int err = 0; /* ext4_handle_valid() assumes a valid handle_t pointer */ - if (handle && !ext4_handle_valid(handle)) + if (handle && !ext4_handle_valid(handle) && + !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) return 0; mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 80bbc9c..d2661aa 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -499,6 +499,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return err; exit_inode: + kfree(n_group_desc); /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: @@ -928,8 +929,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, input->group); - atomic_add(input->free_blocks_count, - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(input->free_blocks_count, + &sbi->s_flex_groups[flex_group].free_blocks); atomic_add(EXT4_INODES_PER_GROUP(sb), &sbi->s_flex_groups[flex_group].free_inodes); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 63fe3ca..e05cd34 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -860,6 +860,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; ei->i_da_metadata_calc_len = 0; + ei->i_da_metadata_calc_last_lblock = 0; spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; @@ -1991,8 +1992,8 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group = ext4_flex_group(sbi, i); atomic_add(ext4_free_inodes_count(sb, gdp), &sbi->s_flex_groups[flex_group].free_inodes); - atomic_add(ext4_free_blks_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(ext4_free_blks_count(sb, gdp), + &sbi->s_flex_groups[flex_group].free_blocks); atomic_add(ext4_used_dirs_count(sb, gdp), &sbi->s_flex_groups[flex_group].used_dirs); } @@ -2203,7 +2204,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); + mutex_lock(&inode->i_mutex); ext4_truncate(inode); + mutex_unlock(&inode->i_mutex); nr_truncates++; } else { ext4_msg(sb, KERN_DEBUG, @@ -3619,7 +3622,8 @@ no_journal: goto failed_mount4; } - ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); + if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) + sb->s_flags |= MS_RDONLY; /* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { @@ -3677,22 +3681,19 @@ no_journal: if (err) { ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", err); - goto failed_mount4; + goto failed_mount5; } err = ext4_register_li_request(sb, first_not_zeroed); if (err) - goto failed_mount4; + goto failed_mount6; sbi->s_kobj.kset = ext4_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, "%s", sb->s_id); - if (err) { - ext4_mb_release(sb); - ext4_ext_release(sb); - goto failed_mount4; - }; + if (err) + goto failed_mount7; EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); @@ -3726,13 +3727,19 @@ cantfind_ext4: ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; +failed_mount7: + ext4_unregister_li_request(sb); +failed_mount6: + ext4_ext_release(sb); +failed_mount5: + ext4_mb_release(sb); + ext4_release_system_zone(sb); failed_mount4: iput(root); sb->s_root = NULL; ext4_msg(sb, KERN_ERR, "mount failed"); destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); failed_mount_wq: - ext4_release_system_zone(sb); if (sbi->s_journal) { jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -4436,7 +4443,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } ext4_setup_system_zone(sb); - if (sbi->s_journal == NULL) + if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 19fe4e3..8f797ae 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -487,18 +487,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, bh, 0, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + unlock_buffer(bh); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); + if (ce) + mb_cache_entry_release(ce); + unlock_buffer(bh); error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); dquot_free_block(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); - if (ce) - mb_cache_entry_release(ce); } - unlock_buffer(bh); out: ext4_std_error(inode->i_sb, error); return; @@ -1270,6 +1271,8 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; + kfree(is); is = NULL; + kfree(bs); bs = NULL; goto retry; } error = -1; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 9836839..9fc6975 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1236,6 +1236,19 @@ static int fat_read_root(struct inode *inode) return 0; } +static unsigned long calc_fat_clusters(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); + + /* Divide first to avoid overflow */ + if (sbi->fat_bits != 12) { + unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits; + return ent_per_sec * sbi->fat_length; + } + + return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; +} + /* * Read the super block of an MS-DOS FS. */ @@ -1442,7 +1455,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; /* check that FAT table does not overflow */ - fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; + fat_clusters = calc_fat_clusters(sb); total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); if (total_clusters > MAX_FAT(sb)) { if (!silent) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 20b4ea5..6ee3c36 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -514,7 +514,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, int charlen; if (utf8) { - *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); + *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN, + (wchar_t *) outname, FAT_LFN_LEN + 2); if (*outlen < 0) return *outlen; else if (*outlen > FAT_LFN_LEN) @@ -14,7 +14,7 @@ #include <linux/sched.h> #include <linux/pipe_fs_i.h> -static void wait_for_partner(struct inode* inode, unsigned int *cnt) +static int wait_for_partner(struct inode* inode, unsigned int *cnt) { int cur = *cnt; @@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt) if (signal_pending(current)) break; } + return cur == *cnt ? -ERESTARTSYS : 0; } static void wake_up_partner(struct inode* inode) @@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp) * seen a writer */ filp->f_version = pipe->w_counter; } else { - wait_for_partner(inode, &pipe->w_counter); - if(signal_pending(current)) + if (wait_for_partner(inode, &pipe->w_counter)) goto err_rd; } } @@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp) wake_up_partner(inode); if (!pipe->readers) { - wait_for_partner(inode, &pipe->r_counter); - if (signal_pending(current)) + if (wait_for_partner(inode, &pipe->r_counter)) goto err_wr; } break; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190..73c0bd7 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -276,5 +276,5 @@ const struct file_operations fscache_stats_fops = { .open = fscache_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c858b5c..947b822 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1528,6 +1528,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->pages[req->num_pages] = page; req->num_pages++; + offset = 0; num -= this_num; total_len += this_num; index++; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d5016071..607a973 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -858,6 +858,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, if (stat) { generic_fillattr(inode, stat); stat->mode = fi->orig_i_mode; + stat->ino = fi->orig_ino; } } @@ -1438,6 +1439,8 @@ static int fuse_setxattr(struct dentry *entry, const char *name, fc->no_setxattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } @@ -1567,6 +1570,8 @@ static int fuse_removexattr(struct dentry *entry, const char *name) fc->no_removexattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 82a6646..2e1c10fe 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1298,7 +1298,6 @@ static int fuse_writepage_locked(struct page *page) inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); - end_page_writeback(page); spin_lock(&fc->lock); list_add(&req->writepages_entry, &fi->writepages); @@ -1306,6 +1305,8 @@ static int fuse_writepage_locked(struct page *page) fuse_flush_writepages(inode); spin_unlock(&fc->lock); + end_page_writeback(page); + return 0; err_free: @@ -1710,7 +1711,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) size_t n; u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; - for (n = 0; n < count; n++) { + for (n = 0; n < count; n++, iov++) { if (iov->iov_len > (size_t) max) return -ENOMEM; max -= iov->iov_len; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b788bec..f621550 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -82,6 +82,9 @@ struct fuse_inode { preserve the original mode */ mode_t orig_i_mode; + /** 64 bit inode number */ + u64 orig_ino; + /** Version of last attribute change */ u64 attr_version; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 38f84cd..69a1e0f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -91,6 +91,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->nlookup = 0; fi->attr_version = 0; fi->writectr = 0; + fi->orig_ino = 0; INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); @@ -140,6 +141,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } +/* + * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down + * so that it will fit. + */ +static ino_t fuse_squash_ino(u64 ino64) +{ + ino_t ino = (ino_t) ino64; + if (sizeof(ino_t) < sizeof(u64)) + ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; + return ino; +} + void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -149,7 +162,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->attr_version = ++fc->attr_version; fi->i_time = attr_valid; - inode->i_ino = attr->ino; + inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); inode->i_nlink = attr->nlink; inode->i_uid = attr->uid; @@ -175,6 +188,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->orig_i_mode = inode->i_mode; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; + + fi->orig_ino = attr->ino; } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index fe9945f..5235d6e 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -167,6 +167,8 @@ static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, case GFS2_SMALL_FH_SIZE: case GFS2_LARGE_FH_SIZE: case GFS2_OLD_FH_SIZE: + if (fh_len < GFS2_SMALL_FH_SIZE) + return NULL; this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; this.no_formal_ino |= be32_to_cpu(fh[1]); this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; @@ -186,6 +188,8 @@ static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, switch (fh_type) { case GFS2_LARGE_FH_SIZE: case GFS2_OLD_FH_SIZE: + if (fh_len < GFS2_LARGE_FH_SIZE) + return NULL; parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; parent.no_formal_ino |= be32_to_cpu(fh[5]); parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 05bbb12..c133253 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -139,16 +139,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct gfs2_meta_header *mh; struct gfs2_trans *tr; - lock_buffer(bd->bd_bh); - gfs2_log_lock(sdp); if (!list_empty(&bd->bd_list_tr)) - goto out; + return; tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); if (!list_empty(&le->le_list)) - goto out; + return; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_meta_check(sdp, bd->bd_bh); @@ -159,9 +157,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd->bd_bh); } static void buf_lo_before_commit(struct gfs2_sbd *sdp) @@ -528,11 +523,9 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct address_space *mapping = bd->bd_bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); - lock_buffer(bd->bd_bh); - gfs2_log_lock(sdp); if (tr) { if (!list_empty(&bd->bd_list_tr)) - goto out; + return; tr->tr_touched = 1; if (gfs2_is_jdata(ip)) { tr->tr_num_buf++; @@ -540,7 +533,7 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) } } if (!list_empty(&le->le_list)) - goto out; + return; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); @@ -552,9 +545,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) } else { list_add_tail(&le->le_list, &sdp->sd_log_le_ordered); } -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd->bd_bh); } static void gfs2_check_magic(struct buffer_head *bh) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 9ec73a8..e6453c3 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -145,14 +145,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_bufdata *bd; + lock_buffer(bh); + gfs2_log_lock(sdp); bd = bh->b_private; if (bd) gfs2_assert(sdp, bd->bd_gl == gl); else { + gfs2_log_unlock(sdp); + unlock_buffer(bh); gfs2_attach_bufdata(gl, bh, meta); bd = bh->b_private; + lock_buffer(bh); + gfs2_log_lock(sdp); } lops_add(sdp, &bd->bd_le); + gfs2_log_unlock(sdp); + unlock_buffer(bh); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index b1991a2..9d8c087 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -499,7 +499,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; int res; res = pagecache_write_begin(NULL, mapping, size, 0, diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 7b8112d..aac1563 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -56,7 +56,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; - unsigned int io_size; + u64 io_size; loff_t start; int offset; diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index a790821..ea3d1ca 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -17,7 +17,8 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; - if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) { + unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; + if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 98580a3..f760c15 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -553,7 +553,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_cp_table = NULL; sbi->sb_c_bitmap = -1; sbi->sb_max_fwd_alloc = 0xffffff; - + + if (sbi->sb_fs_size >= 0x80000000) { + hpfs_error(s, "invalid size in superblock: %08x", + (unsigned)sbi->sb_fs_size); + goto bail4; + } + /* Load bitmap directory */ if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) goto bail4; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8b0c875..6327a06 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -568,7 +568,8 @@ static int hugetlbfs_set_page_dirty(struct page *page) } static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { int rc; diff --git a/fs/isofs/export.c b/fs/isofs/export.c index dd4687f..fd88add 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -135,6 +135,7 @@ isofs_export_encode_fh(struct dentry *dentry, len = 3; fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ + fh16[3] = 0; /* avoid leaking uninitialized data */ fh32[2] = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { struct inode *parent; @@ -179,7 +180,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb, { struct isofs_fid *ifid = (struct isofs_fid *)fid; - if (fh_type != 2) + if (fh_len < 2 || fh_type != 2) return NULL; return isofs_export_iget(sb, diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index b3cc858..26f6364 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -119,8 +119,8 @@ static void destroy_inodecache(void) static int isofs_remount(struct super_block *sb, int *flags, char *data) { - /* we probably want a lot more here */ - *flags |= MS_RDONLY; + if (!(*flags & MS_RDONLY)) + return -EROFS; return 0; } @@ -769,15 +769,6 @@ root_found: */ s->s_maxbytes = 0x80000000000LL; - /* - * The CDROM is read-only, has no nodes (devices) on it, and since - * all of the files appear to be owned by root, we really do not want - * to allow suid. (suid or devices will not show up unless we have - * Rock Ridge extensions) - */ - - s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; - /* Set this for reference. Its not currently used except on write which we don't have .. */ @@ -1528,6 +1519,9 @@ struct inode *isofs_iget(struct super_block *sb, static struct dentry *isofs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { + /* We don't support read-write mounts */ + if (!(flags & MS_RDONLY)) + return ERR_PTR(-EACCES); return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); } diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 72ffa97..dcd23f8 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -85,7 +85,12 @@ nope: static void release_data_buffer(struct buffer_head *bh) { if (buffer_freed(bh)) { + WARN_ON_ONCE(buffer_dirty(bh)); clear_buffer_freed(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; release_buffer_page(bh); } else put_bh(bh); @@ -840,17 +845,35 @@ restart_loop: * there's no point in keeping a checkpoint record for * it. */ - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future after the "add to orphan" - * operation been committed, That's not only a performance - * gain, it also stops aliasing problems if the buffer is - * left behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh) && !jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + /* + * A buffer which has been freed while still being journaled by + * a previous transaction. + */ + if (buffer_freed(bh)) { + /* + * If the running transaction is the one containing + * "add to orphan" operation (b_next_transaction != + * NULL), we have to wait for that transaction to + * commit before we can really get rid of the buffer. + * So just clear b_modified to not confuse transaction + * credit accounting and refile the buffer to + * BJ_Forget of the running transaction. If the just + * committed transaction contains "add to orphan" + * operation, we can completely invalidate the buffer + * now. We are rather throughout in that since the + * buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial + * page. + */ + jh->b_modified = 0; + if (!jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index f7ee81a..d7ab092 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1837,15 +1837,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * We're outside-transaction here. Either or both of j_running_transaction * and j_committing_transaction may be NULL. */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + int partial_page) { transaction_t *transaction; struct journal_head *jh; int may_free = 1; - int ret; BUFFER_TRACE(bh, "entry"); +retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1873,10 +1874,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * clear the buffer dirty bit at latest at the moment when the * transaction marking the buffer as freed in the filesystem * structures is committed because from that moment on the - * buffer can be reallocated and used by a different page. + * block can be reallocated and used by a different page. * Since the block hasn't been freed yet but the inode has * already been added to orphan list, it is safe for us to add * the buffer to BJ_Forget list of the newest transaction. + * + * Also we have to clear buffer_mapped flag of a truncated buffer + * because the buffer_head may be attached to the page straddling + * i_size (can happen only when blocksize < pagesize) and thus the + * buffer_head can be reused when the file is extended again. So we end + * up keeping around invalidated buffers attached to transactions' + * BJ_Forget list just to stop checkpointing code from cleaning up + * the transaction this buffer was modified in. */ transaction = jh->b_transaction; if (transaction == NULL) { @@ -1903,13 +1912,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * committed, the buffer won't be needed any * longer. */ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_running_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* There is no currently-running transaction. So the * orphan record which we wrote for this file must have @@ -1917,13 +1922,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * the committing transaction, if it exists. */ if (journal->j_committing_transaction) { JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_committing_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* The orphan record's transaction has * committed. We can cleanse this buffer */ @@ -1944,10 +1945,26 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } /* * The buffer is committing, we simply cannot touch - * it. So we just set j_next_transaction to the - * running transaction (if there is one) and mark - * buffer as freed so that commit code knows it should - * clear dirty bits when it is done with the buffer. + * it. If the page is straddling i_size we have to wait + * for commit and try again. + */ + if (partial_page) { + tid_t tid = journal->j_committing_transaction->t_tid; + + journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + unlock_buffer(bh); + log_wait_commit(journal, tid); + lock_buffer(bh); + goto retry; + } + /* + * OK, buffer won't be reachable after truncate. We just set + * j_next_transaction to the running transaction (if there is + * one) and mark buffer as freed so that commit code knows it + * should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) @@ -1970,6 +1987,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } zap_buffer: + /* + * This is tricky. Although the buffer is truncated, it may be reused + * if blocksize < pagesize and it is attached to the page straddling + * EOF. Since the buffer might have been added to BJ_Forget list of the + * running transaction, journal_get_write_access() won't clear + * b_modified and credit accounting gets confused. So clear b_modified + * here. */ + jh->b_modified = 0; journal_put_journal_head(jh); zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); @@ -2018,7 +2043,8 @@ void journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); + may_free &= journal_unmap_buffer(journal, bh, + offset > 0); unlock_buffer(bh); } curr_off = next_off; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 9baa39e..26e11db 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -178,7 +178,8 @@ repeat: if (!new_transaction) goto alloc_transaction; write_lock(&journal->j_state_lock); - if (!journal->j_running_transaction) { + if (!journal->j_running_transaction && + !journal->j_barrier_count) { jbd2_get_transaction(journal, new_transaction); new_transaction = NULL; } @@ -468,10 +469,10 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) &transaction->t_outstanding_credits); if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); + tid = transaction->t_tid; spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 31dce61..4bbd521 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -225,8 +225,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) return 0; D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n")); - spin_lock(&c->erase_completion_lock); mutex_lock(&c->alloc_sem); + spin_lock(&c->erase_completion_lock); } /* First, work out which block we're garbage-collecting */ diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 694aa5b..e304795 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -355,14 +355,16 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, spin_unlock(&c->erase_completion_lock); ret = jffs2_prealloc_raw_node_refs(c, jeb, 1); - if (ret) - return ret; + /* Just lock it again and continue. Nothing much can change because we hold c->alloc_sem anyway. In fact, it's not entirely clear why we hold c->erase_completion_lock in the majority of this function... but that's a question for another (more caffeine-rich) day. */ spin_lock(&c->erase_completion_lock); + if (ret) + return ret; + waste = jeb->free_size; jffs2_link_node_ref(c, jeb, (jeb->offset + c->sector_size - waste) | REF_OBSOLETE, diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 1096559..09100b4 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -125,7 +125,7 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int wait = wbc->sync_mode == WB_SYNC_ALL; - if (test_cflag(COMMIT_Nolink, inode)) + if (inode->i_nlink == 0) return 0; /* * If COMMIT_DIRTY is not set, the inode isn't really dirty. diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 9197a1b..b6f17c0 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3047,6 +3047,14 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) dir_index = (u32) filp->f_pos; + /* + * NFSv4 reserves cookies 1 and 2 for . and .. so we add + * the value we return to the vfs is one greater than the + * one we use internally. + */ + if (dir_index) + dir_index--; + if (dir_index > 1) { struct dir_table_slot dirtab_slot; @@ -3086,7 +3094,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (p->header.flag & BT_INTERNAL) { jfs_err("jfs_readdir: bad index table"); DT_PUTPAGE(mp); - filp->f_pos = -1; + filp->f_pos = DIREND; return 0; } } else { @@ -3094,7 +3102,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * self "." */ - filp->f_pos = 0; + filp->f_pos = 1; if (filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR)) return 0; @@ -3102,7 +3110,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * parent ".." */ - filp->f_pos = 1; + filp->f_pos = 2; if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) return 0; @@ -3123,24 +3131,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." + * pn = 0; index = 1: First entry "." + * pn = 0; index = 2: Second entry ".." * pn > 0: Real entries, pn=1 -> leftmost page * pn = index = -1: No more entries */ dtpos = filp->f_pos; - if (dtpos == 0) { + if (dtpos < 2) { /* build "." entry */ + filp->f_pos = 1; if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, DT_DIR)) return 0; - dtoffset->index = 1; + dtoffset->index = 2; filp->f_pos = dtpos; } if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { + if (dtoffset->index == 2) { /* build ".." entry */ if (filldir(dirent, "..", 2, filp->f_pos, @@ -3233,6 +3242,12 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } jfs_dirent->position = unique_pos++; } + /* + * We add 1 to the index because we may + * use a value of 2 internally, and NFSv4 + * doesn't like that. + */ + jfs_dirent->position++; } else { jfs_dirent->position = dtpos; len = min(d_namleft, DTLHDRDATALEN_LEGACY); diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 583636f..ee55e45 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1057,7 +1057,8 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) */ void jfs_syncpt(struct jfs_log *log, int hard_sync) { LOG_LOCK(log); - lmLogSync(log, hard_sync); + if (!test_bit(log_QUIESCE, &log->flag)) + lmLogSync(log, hard_sync); LOG_UNLOCK(log); } diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 8d4ea83..de88922 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -141,6 +141,9 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) timeout); if (ret < 0) return -ERESTARTSYS; + /* Reset the lock status after a server reboot so we resend */ + if (block->b_status == nlm_lck_denied_grace_period) + block->b_status = nlm_lck_blocked; req->a_res.status = block->b_status; return 0; } diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 36057ce..6e2a2d5 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr, { __be32 *p; - BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD); + WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD); p = xdr_reserve_space(xdr, 4); *p = stat; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 6e31695..db7be3a 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -913,6 +913,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -922,6 +923,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -931,7 +933,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index d27aab1..d413af3 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -67,7 +67,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Obtain file pointer. Not used by FREE_ALL call. */ if (filp != NULL) { - if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0) + error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh)); + if (error != 0) goto no_locks; *filp = file; @@ -315,7 +315,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock, return 0; } -static int assign_type(struct file_lock *fl, int type) +static int assign_type(struct file_lock *fl, long type) { switch (type) { case F_RDLCK: @@ -452,7 +452,7 @@ static const struct lock_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, int type, struct file_lock *fl) +static int lease_init(struct file *filp, long type, struct file_lock *fl) { if (assign_type(fl, type) != 0) return -EINVAL; @@ -470,7 +470,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lock *lease_alloc(struct file *filp, int type) +static struct file_lock *lease_alloc(struct file *filp, long type) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; diff --git a/fs/namespace.c b/fs/namespace.c index edc1c4a..b3d8f51 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1244,8 +1244,9 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); + if (p->mnt_ns) + __mnt_make_shortterm(p); p->mnt_ns = NULL; - __mnt_make_shortterm(p); list_del_init(&p->mnt_child); if (p->mnt_parent != p) { p->mnt_parent->mnt_ghosts++; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b8..0cb731f 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -673,8 +673,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, */ static void nfs_destroy_server(struct nfs_server *server) { - if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) || - !(server->flags & NFS_MOUNT_LOCAL_FCNTL)) + if (server->nlm_host) nlmclnt_done(server->nlm_host); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 462a006..4033264 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1100,7 +1100,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; - if (nd->flags & LOOKUP_RCU) + if (nd && (nd->flags & LOOKUP_RCU)) return -ECHILD; parent = dget_parent(dentry); @@ -1216,11 +1216,14 @@ static int nfs_dentry_delete(const struct dentry *dentry) } +/* Ensure that we revalidate inode->i_nlink */ static void nfs_drop_nlink(struct inode *inode) { spin_lock(&inode->i_lock); - if (inode->i_nlink > 0) - drop_nlink(inode); + /* drop the inode if we're reasonably sure this is the last link */ + if (inode->i_nlink == 1) + clear_nlink(inode); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } @@ -1235,8 +1238,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - drop_nlink(inode); nfs_complete_unlink(dentry, inode); + nfs_drop_nlink(inode); } iput(inode); } @@ -1498,7 +1501,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct nfs_open_context *ctx; int openflags, ret = 0; - if (nd->flags & LOOKUP_RCU) + if (nd && (nd->flags & LOOKUP_RCU)) return -ECHILD; inode = dentry->d_inode; @@ -1788,10 +1791,8 @@ static int nfs_safe_remove(struct dentry *dentry) if (inode != NULL) { nfs_inode_return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - /* The VFS may want to delete this inode */ if (error == 0) nfs_drop_nlink(inode); - nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); if (error == -ENOENT) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index a6e711a..ee02db5 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -213,7 +213,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) { char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; struct nfs_dns_ent key, *item; - unsigned long ttl; + unsigned int ttl; ssize_t len; int ret = -EINVAL; @@ -236,7 +236,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) key.namelen = len; memset(&key.h, 0, sizeof(key.h)); - ttl = get_expiry(&buf); + if (get_uint(&buf, &ttl) < 0) + goto out; if (ttl == 0) goto out; key.h.expiry_time = ttl + seconds_since_boot(); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index dd2f130..6c6e2c4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -493,8 +493,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - /* Only do I/O if gfp is a superset of GFP_KERNEL */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) { + /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not + * doing this memory reclaim for a fs-related allocation. + */ + if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && + !(current->flags & PF_FSTRANS)) { int how = FLUSH_SYNC; /* Don't let kswapd deadlock waiting for OOM RPC calls */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c48f9f6..873c6f2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -150,7 +150,7 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347..399a505 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -274,8 +274,9 @@ extern void nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); /* namespace.c */ +#define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, - char *buffer, ssize_t buflen); + char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); /* getroot.c */ @@ -315,7 +316,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, - struct page *, struct page *); + struct page *, struct page *, enum migrate_mode); #else #define nfs_migrate_page NULL #endif @@ -349,7 +350,7 @@ static inline char *nfs_devname(struct dentry *dentry, char *buffer, ssize_t buflen) { char *dummy; - return nfs_path(&dummy, dentry, buffer, buflen); + return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL); } /* diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index d4c2d6b..3d93216 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info) else msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; - status = rpc_call_sync(mnt_clnt, &msg, 0); + status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT); rpc_shutdown_client(mnt_clnt); if (status < 0) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1f063ba..d6122ef 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -37,6 +37,7 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry, * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer + * @flags - options (see below) * * Helper function for constructing the server pathname * by arbitrary hashed dentry. @@ -44,8 +45,14 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry, * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition * and in generating /proc/mounts and friends. + * + * Supported flags: + * NFS_PATH_CANONICAL: ensure there is exactly one slash after + * the original device (export) name + * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) +char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, + unsigned flags) { char *end; int namelen; @@ -78,7 +85,7 @@ rename_retry: rcu_read_unlock(); goto rename_retry; } - if (*end != '/') { + if ((flags & NFS_PATH_CANONICAL) && *end != '/') { if (--buflen < 0) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); @@ -95,9 +102,11 @@ rename_retry: return end; } namelen = strlen(base); - /* Strip off excess slashes in base string */ - while (namelen > 0 && base[namelen - 1] == '/') - namelen--; + if (flags & NFS_PATH_CANONICAL) { + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + } buflen -= namelen; if (buflen < 0) { spin_unlock(&dentry->d_lock); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 771741f..f0a6990 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -68,7 +68,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); @@ -633,7 +633,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; - __be32 *verf = NFS_COOKIEVERF(dir); + __be32 *verf = NFS_I(dir)->cookieverf; struct nfs3_readdirargs arg = { .fh = NFS_FH(dir), .cookie = cookie, diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index bb80c49..96f2b67 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -57,7 +57,8 @@ Elong: static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) { char *limit; - char *path = nfs_path(&limit, dentry, buffer, buflen); + char *path = nfs_path(&limit, dentry, buffer, buflen, + NFS_PATH_CANONICAL); if (!IS_ERR(path)) { char *colon = strchr(path, ':'); if (colon && colon < limit) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3d67302..7204bcc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -94,6 +94,8 @@ static int nfs4_map_errors(int err) case -NFS4ERR_BADOWNER: case -NFS4ERR_BADNAME: return -EINVAL; + case -NFS4ERR_SHARE_DENIED: + return -EACCES; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -298,8 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); nfs4_schedule_session_recovery(clp->cl_session); - exception->retry = 1; - break; + goto wait_on_recovery; #endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { @@ -1334,6 +1335,12 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -ENOMEM: err = 0; goto out; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + err = -EAGAIN; + goto out; } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -1771,6 +1778,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in nfs_setattr_update_inode(state->inode, sattr); nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); } + nfs_revalidate_inode(server, state->inode); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); *res = state; @@ -3015,11 +3023,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } @@ -3438,19 +3446,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) */ #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) -static void buf_to_pages(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) -{ - const void *p = buf; - - *pgbase = offset_in_page(buf); - p -= *pgbase; - while (p < buf + buflen) { - *(pages++) = virt_to_page(p); - p += PAGE_CACHE_SIZE; - } -} - static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) { @@ -3547,9 +3542,19 @@ out: nfs4_set_cached_acl(inode, acl); } +/* + * The getxattr API returns the required buffer length when called with a + * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating + * the required buf. On a NULL buf, we send a page of data to the server + * guessing that the ACL request can be serviced by a page. If so, we cache + * up to the page of ACL data, and the 2nd call to getxattr is serviced by + * the cache. If not so, we throw away the page, and cache the required + * length. The next getxattr call will then produce another round trip to + * the server, this time with the input buf of the required size. + */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES]; + struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; struct nfs_getaclargs args = { .fh = NFS_FH(inode), .acl_pages = pages, @@ -3564,41 +3569,61 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - struct page *localpage = NULL; - int ret; - - if (buflen < PAGE_SIZE) { - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - localpage = alloc_page(GFP_KERNEL); - resp_buf = page_address(localpage); - if (localpage == NULL) - return -ENOMEM; - args.acl_pages[0] = localpage; - args.acl_pgbase = 0; - args.acl_len = PAGE_SIZE; - } else { - resp_buf = buf; - buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + int ret = -ENOMEM, npages, i; + size_t acl_len = 0; + + npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + if (npages == 0) + npages = 1; + + for (i = 0; i < npages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free; } - ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); + if (npages > 1) { + /* for decoding across pages */ + res.acl_scratch = alloc_page(GFP_KERNEL); + if (!res.acl_scratch) + goto out_free; + } + args.acl_len = npages * PAGE_SIZE; + args.acl_pgbase = 0; + /* Let decode_getfacl know not to fail if the ACL data is larger than + * the page we send as a guess */ + if (buf == NULL) + res.acl_flags |= NFS4_ACL_LEN_REQUEST; + resp_buf = page_address(pages[0]); + + dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", + __func__, buf, buflen, npages, args.acl_len); + ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), + &msg, &args.seq_args, &res.seq_res, 0); if (ret) goto out_free; - if (res.acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, res.acl_len); + + acl_len = res.acl_len - res.acl_data_offset; + if (acl_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, acl_len); else - nfs4_write_cached_acl(inode, resp_buf, res.acl_len); + nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, + acl_len); if (buf) { ret = -ERANGE; - if (res.acl_len > buflen) + if (acl_len > buflen) goto out_free; - if (localpage) - memcpy(buf, resp_buf, res.acl_len); + _copy_from_pages(buf, pages, res.acl_data_offset, + res.acl_len); } - ret = res.acl_len; + ret = acl_len; out_free: - if (localpage) - __free_page(localpage); + for (i = 0; i < npages; i++) + if (pages[i]) + __free_page(pages[i]); + if (res.acl_scratch) + __free_page(res.acl_scratch); return ret; } @@ -3629,6 +3654,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) + /* -ENOENT is returned if there is no ACL or if there is an ACL + * but no cached acl data, just the acl length */ return ret; return nfs4_get_acl_uncached(inode, buf, buflen); } @@ -4113,6 +4140,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) nfs_restart_rpc(task, calldata->server->nfs_client); } + nfs_release_seqid(calldata->arg.seqid); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) @@ -5763,12 +5791,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; } spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } + if (task->tk_status == 0 && lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fc97fd5..4204e96 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2374,11 +2374,12 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz + 1; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); + encode_nops(&hdr); } @@ -4714,17 +4715,18 @@ decode_restorefh(struct xdr_stream *xdr) } static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, - size_t *acl_len) + struct nfs_getaclres *res) { - __be32 *savep; + __be32 *savep, *bm_p; uint32_t attrlen, bitmap[2] = {0}; struct kvec *iov = req->rq_rcv_buf.head; int status; - *acl_len = 0; + res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + bm_p = xdr->p; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -4736,18 +4738,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, size_t hdrlen; u32 recvd; + /* The bitmap (xdr len + bitmaps) and the attr xdr len words + * are stored with the acl data to handle the problem of + * variable length bitmaps.*/ + xdr->p = bm_p; + res->acl_data_offset = be32_to_cpup(bm_p) + 2; + res->acl_data_offset <<= 2; + /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + attrlen += res->acl_data_offset; recvd = req->rq_rcv_buf.len - hdrlen; if (attrlen > recvd) { - dprintk("NFS: server cheating in getattr" - " acl reply: attrlen %u > recvd %u\n", + if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { + /* getxattr interface called with a NULL buf */ + res->acl_len = attrlen; + goto out; + } + dprintk("NFS: acl reply: attrlen %u > recvd %u\n", attrlen, recvd); return -EINVAL; } xdr_read_pages(xdr, attrlen); - *acl_len = attrlen; + res->acl_len = attrlen; } else status = -EOPNOTSUPP; @@ -5673,6 +5687,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; + if (res->acl_scratch != NULL) { + void *p = page_address(res->acl_scratch); + xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); + } status = decode_compound_hdr(xdr, &hdr); if (status) goto out; @@ -5682,7 +5700,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getacl(xdr, rqstp, &res->acl_len); + status = decode_getacl(xdr, rqstp, res); out: return status; @@ -5745,7 +5763,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_open(xdr, res); if (status) goto out; - if (decode_getfh(xdr, &res->fh) != 0) + status = decode_getfh(xdr, &res->fh); + if (status) goto out; if (decode_getfattr(xdr, res->f_attr, res->server, !RPC_IS_ASYNC(rqstp->rq_task)) != 0) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8e7b61d..a5b2419 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -763,7 +763,7 @@ static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) int err = 0; if (!page) return -ENOMEM; - devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE); + devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE, 0); if (IS_ERR(devname)) err = PTR_ERR(devname); else @@ -1052,7 +1052,7 @@ static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); *option = match_strdup(args); - return !option; + return !*option; } static int nfs_get_option_ul(substring_t args[], unsigned long *option) @@ -1815,6 +1815,7 @@ static int nfs_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); + args->nfs_server.port = ntohs(data->addr.sin_port); if (!nfs_verify_server_address(sap)) goto out_no_address; @@ -2528,6 +2529,7 @@ static int nfs4_validate_mount_data(void *options, return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; + args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { if (data->auth_flavourlen > 1) @@ -3096,4 +3098,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, return res; } +MODULE_ALIAS("nfs4"); + #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f2f80c0..58bb999 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1662,7 +1662,7 @@ out_error: #ifdef CONFIG_MIGRATION int nfs_migrate_page(struct address_space *mapping, struct page *newpage, - struct page *page) + struct page *page, enum migrate_mode mode) { /* * If PagePrivate is set, then the page is currently associated with @@ -1677,7 +1677,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, nfs_fscache_release_page(page, GFP_KERNEL); - return migrate_page(mapping, newpage, page); + return migrate_page(mapping, newpage, page, mode); } #endif diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 4b470f6..d7c4f02 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -403,7 +403,7 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int migrated, i, err; /* listsize */ - err = get_int(mesg, &fsloc->locations_count); + err = get_uint(mesg, &fsloc->locations_count); if (err) return err; if (fsloc->locations_count > MAX_FS_LOCATIONS) @@ -461,7 +461,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { - err = get_int(mesg, &f->pseudoflavor); + err = get_uint(mesg, &f->pseudoflavor); if (err) return err; /* @@ -470,7 +470,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) * problem at export time instead of when a client fails * to authenticate. */ - err = get_int(mesg, &f->flags); + err = get_uint(mesg, &f->flags); if (err) return err; /* Only some flags are allowed to differ between flavors: */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 92f7eb7..4ec38df 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -189,13 +189,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - /* - * It's also safe to get rid of the RDWR open *if* - * we no longer have need of the other kind of access - * or if we already have the other kind of open: - */ - if (fp->fi_fds[1-oflag] - || atomic_read(&fp->fi_access[1 - oflag]) == 0) + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6c74097..45f53ae 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -159,8 +159,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ memcpy(p, argp->p, avail); /* step to next page */ - argp->p = page_address(argp->pagelist[0]); argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; @@ -262,7 +262,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace; + u32 nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; @@ -342,10 +342,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_atime.tv_sec); + READ64(iattr->ia_atime.tv_sec); READ32(iattr->ia_atime.tv_nsec); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -368,10 +365,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_mtime.tv_sec); + READ64(iattr->ia_mtime.tv_sec); READ32(iattr->ia_mtime.tv_nsec); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -2010,7 +2004,7 @@ out_acl: if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { if ((buflen -= 4) < 0) goto out_resource; - WRITE32(1); + WRITE32(0); } if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { if ((buflen -= 4) < 0) @@ -2148,8 +2142,7 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.atime.tv_sec); + WRITE64((s64)stat.atime.tv_sec); WRITE32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { @@ -2162,15 +2155,13 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_METADATA) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.ctime.tv_sec); + WRITE64((s64)stat.ctime.tv_sec); WRITE32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.mtime.tv_sec); + WRITE64((s64)stat.mtime.tv_sec); WRITE32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { @@ -2682,11 +2673,16 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, len = maxcount; v = 0; while (len > 0) { - pn = resp->rqstp->rq_resused++; + pn = resp->rqstp->rq_resused; + if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */ + maxcount -= len; + break; + } resp->rqstp->rq_vec[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); resp->rqstp->rq_vec[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; + resp->rqstp->rq_resused++; v++; len -= PAGE_SIZE; } @@ -2734,6 +2730,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused]) + return nfserr_resource; page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); @@ -2783,6 +2781,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused]) + return nfserr_resource; RESERVE_SPACE(8); /* verifier */ savep = p; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 08a07a2..57ceaf3 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) while (!list_empty(head)) { ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b9b45fc..373cd7b 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -195,13 +195,32 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) static int nilfs_set_page_dirty(struct page *page) { - int ret = __set_page_dirty_buffers(page); + int ret = __set_page_dirty_nobuffers(page); - if (ret) { + if (page_has_buffers(page)) { struct inode *inode = page->mapping->host; - unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); + unsigned nr_dirty = 0; + struct buffer_head *bh, *head; - nilfs_set_file_dirty(inode, nr_dirty); + /* + * This page is locked by callers, and no other thread + * concurrently marks its buffers dirty since they are + * only dirtied through routines in fs/buffer.c in + * which call sites of mark_buffer_dirty are protected + * by page lock. + */ + bh = head = page_buffers(page); + do { + /* Do not mark hole blocks dirty */ + if (buffer_dirty(bh) || !buffer_mapped(bh)) + continue; + + set_buffer_dirty(bh); + nr_dirty++; + } while (bh = bh->b_this_page, bh != head); + + if (nr_dirty) + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 3e65427..cee648e 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -182,7 +182,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, if (copy_from_user(&cpmode, argp, sizeof(cpmode))) goto out; - down_read(&inode->i_sb->s_umount); + mutex_lock(&nilfs->ns_snapshot_mount_mutex); nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( @@ -192,7 +192,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, else nilfs_transaction_commit(inode->i_sb); /* never fails */ - up_read(&inode->i_sb->s_umount); + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); out: mnt_drop_write(filp->f_path.mnt); return ret; @@ -661,8 +661,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " "cannot read source blocks: err=%d\n", ret); - else + else { + if (nilfs_sb_need_update(nilfs)) + set_nilfs_discontinued(nilfs); ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + } nilfs_remove_all_gcinodes(nilfs); clear_nilfs_gc_running(nilfs); diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 850a7c0..07a666a 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -345,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - bio_put(bio); - /* to be detected by submit_seg_bio() */ + /* to be detected by nilfs_segbuf_submit_bio() */ } if (!uptodate) @@ -377,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); + segbuf->sb_nbio++; if (bio_flagged(bio, BIO_EOPNOTSUPP)) { bio_put(bio); err = -EOPNOTSUPP; goto failed; } - segbuf->sb_nbio++; bio_put(bio); wi->bio = NULL; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index bb24ab6..6f24e67 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) continue; list_del_init(&ii->i_dirty); + truncate_inode_pages(&ii->vfs_inode.i_data, 0); + nilfs_btnode_cache_clear(&ii->i_btnode_cache); iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8351c44..97bfbdd 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -951,6 +951,8 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, struct nilfs_root *root; int ret; + mutex_lock(&nilfs->ns_snapshot_mount_mutex); + down_read(&nilfs->ns_segctor_sem); ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); up_read(&nilfs->ns_segctor_sem); @@ -975,6 +977,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = nilfs_get_root_dentry(s, root, root_dentry); nilfs_put_root(root); out: + mutex_unlock(&nilfs->ns_snapshot_mount_mutex); return ret; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 35a8970..1c98f53 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -76,6 +76,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) nilfs->ns_bdev = bdev; atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); + mutex_init(&nilfs->ns_snapshot_mount_mutex); INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 9992b11..de7435f 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -47,6 +47,7 @@ enum { * @ns_flags: flags * @ns_bdev: block device * @ns_sem: semaphore for shared states + * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super block @@ -99,6 +100,7 @@ struct the_nilfs { struct block_device *ns_bdev; struct rw_semaphore ns_sem; + struct mutex ns_snapshot_mount_mutex; /* * used for diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9..0eb059e 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) } EXPORT_SYMBOL(utf32_to_utf8); -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + *s = (wchar_t) c; + break; + case UTF16_LITTLE_ENDIAN: + *s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *s = __cpu_to_be16(c); + break; + } +} + +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, + wchar_t *pwcs, int maxlen) { u16 *op; int size; unicode_t u; op = pwcs; - while (*s && len > 0) { + while (len > 0 && maxlen > 0 && *s) { if (*s & 0x80) { size = utf8_to_utf32(s, len, &u); if (size < 0) return -EINVAL; + s += size; + len -= size; if (u >= PLANE_SIZE) { + if (maxlen < 2) + break; u -= PLANE_SIZE; - *op++ = (wchar_t) (SURROGATE_PAIR | - ((u >> 10) & SURROGATE_BITS)); - *op++ = (wchar_t) (SURROGATE_PAIR | + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | SURROGATE_LOW | - (u & SURROGATE_BITS)); + (u & SURROGATE_BITS), + endian); + maxlen -= 2; } else { - *op++ = (wchar_t) u; + put_utf16(op++, u, endian); + maxlen--; } - s += size; - len -= size; } else { - *op++ = *s++; + put_utf16(op++, *s++, endian); len--; + maxlen--; } } return op - pwcs; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index f35794b..0c2f912 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -18,9 +18,16 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) old->tgid == new->tgid) { switch (old->data_type) { case (FSNOTIFY_EVENT_PATH): +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + /* dont merge two permission events */ + if ((old->mask & FAN_ALL_PERM_EVENTS) && + (new->mask & FAN_ALL_PERM_EVENTS)) + return false; +#endif if ((old->path.mnt == new->path.mnt) && (old->path.dentry == new->path.dentry)) return true; + break; case (FSNOTIFY_EVENT_NONE): return true; default: diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9fde1c0..9860f6b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -118,6 +118,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->reserved = 0; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); if (unlikely(event->mask & FAN_Q_OVERFLOW)) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc..6f292dd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -579,8 +579,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!(mask & IN_ALL_EVENTS))) - return -EINVAL; fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) @@ -632,8 +630,6 @@ static int inotify_new_watch(struct fsnotify_group *group, /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!(mask & IN_ALL_EVENTS))) - return -EINVAL; tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 7642d7c..ab4046f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2539,6 +2539,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, * everything is up to the caller :) */ status = ocfs2_should_refresh_lock_res(lockres); if (status < 0) { + ocfs2_cluster_unlock(osb, lockres, level); mlog_errno(status); goto bail; } @@ -2547,8 +2548,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb, ocfs2_complete_lock_res_refresh(lockres, status); - if (status < 0) + if (status < 0) { + ocfs2_cluster_unlock(osb, lockres, level); mlog_errno(status); + } ocfs2_track_lock_refresh(lockres); } bail: diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 23457b4..774a032 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -782,7 +782,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, cpos = map_start >> osb->s_clustersize_bits; mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, map_start + map_len); - mapping_end -= cpos; is_last = 0; while (cpos < mapping_end && !is_last) { u32 fe_flags; @@ -791,7 +790,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, &hole_size, &rec, &is_last); if (ret) { mlog_errno(ret); - goto out; + goto out_unlock; } if (rec.e_blkno == 0ULL) { diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f169da4..b7e74b5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, * cluster groups will be staying in cache for the duration of * this operation. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, @@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * Do this *after* figuring out how many bits we're taking out * of our target group. */ - if (ac->ac_allow_chain_relink && + if (!ac->ac_disable_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, @@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; - ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); @@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b8afabf..a36d0aa 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,7 +49,7 @@ struct ocfs2_alloc_context { /* these are used by the chain search */ u16 ac_chain; - int ac_allow_chain_relink; + int ac_disable_chain_relink; group_search_t *ac_group_search; u64 ac_last_group; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 81ecf9c..61a84cf 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6497,6 +6497,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(args->new_inode))) { + struct ocfs2_extent_list *el = &new_di->id2.i_list; + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); @@ -396,10 +396,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct file *file; struct inode *inode; - int error; + int error, fput_needed; error = -EBADF; - file = fget(fd); + file = fget_raw_light(fd, &fput_needed); if (!file) goto out; @@ -413,7 +413,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) if (!error) set_fs_pwd(current->fs, &file->f_path); out_putf: - fput(file); + fput_light(file, fput_needed); out: return error; } @@ -900,9 +900,10 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op) int lookup_flags = 0; int acc_mode; - if (!(flags & O_CREAT)) - mode = 0; - op->mode = mode; + if (flags & O_CREAT) + op->mode = (mode & S_IALLUGO) | S_IFREG; + else + op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY; @@ -859,6 +859,9 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) { int ret = -ENOENT; + if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE))) + return -EINVAL; + mutex_lock(&inode->i_mutex); if (inode->i_pipe) { diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index fbb0b47..d5378d0 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) /* prevent the page from being discarded on memory pressure */ SetPageDirty(page); + SetPageUptodate(page); unlock_page(page); put_page(page); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 4fd5bb3..ebe8db4 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1568,8 +1568,10 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, reiserfs_warning(sb, "reiserfs-13077", "nfsd/reiserfs, fhtype=%d, len=%d - odd", fh_type, fh_len); - fh_type = 5; + fh_type = fh_len; } + if (fh_len < 2) + return NULL; return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); @@ -1578,6 +1580,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { + if (fh_type > fh_len) + fh_type = fh_len; if (fh_type < 4) return NULL; @@ -1779,8 +1783,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, BUG_ON(!th->t_trans_id); - dquot_initialize(inode); + reiserfs_write_unlock(inode->i_sb); err = dquot_alloc_inode(inode); + reiserfs_write_lock(inode->i_sb); if (err) goto out_end_trans; if (!dir->i_nlink) { @@ -1976,8 +1981,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, out_end_trans: journal_end(th, th->t_super, th->t_blocks_allocated); + reiserfs_write_unlock(inode->i_sb); /* Drop can be outside and it needs more credits so it's better to have it outside */ dquot_drop(inode); + reiserfs_write_lock(inode->i_sb); inode->i_flags |= S_NOQUOTA; make_bad_inode(inode); @@ -3101,10 +3108,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) /* must be turned off for recursive notify_change calls */ ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); - depth = reiserfs_write_lock_once(inode->i_sb); if (is_quota_modification(inode, attr)) dquot_initialize(inode); - + depth = reiserfs_write_lock_once(inode->i_sb); if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate @@ -3165,7 +3171,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) error = journal_begin(&th, inode->i_sb, jbegin_count); if (error) goto out; + reiserfs_write_unlock_once(inode->i_sb, depth); error = dquot_transfer(inode, attr); + depth = reiserfs_write_lock_once(inode->i_sb); if (error) { journal_end(&th, inode->i_sb, jbegin_count); goto out; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 313d39d..3ae9926 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree key2type(&(key->on_disk_key))); #endif + reiserfs_write_unlock(inode->i_sb); retval = dquot_alloc_space_nodirty(inode, pasted_size); + reiserfs_write_lock(inode->i_sb); if (retval) { pathrelse(search_path); return retval; @@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif + reiserfs_write_unlock(inode->i_sb); /* We can't dirty inode here. It would be immediately written but * appropriate stat item isn't inserted yet... */ retval = dquot_alloc_space_nodirty(inode, quota_bytes); + reiserfs_write_lock(inode->i_sb); if (retval) { pathrelse(path); return retval; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f19dfbf..7527623 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -254,7 +254,9 @@ static int finish_unfinished(struct super_block *s) retval = remove_save_link_only(s, &save_link_key, 0); continue; } + reiserfs_write_unlock(s); dquot_initialize(inode); + reiserfs_write_lock(s); if (truncate && S_ISDIR(inode->i_mode)) { /* We got a truncate request for a dir which is impossible. @@ -1207,7 +1209,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) kfree(qf_names[i]); #endif err = -EINVAL; - goto out_err; + goto out_unlock; } #ifdef CONFIG_QUOTA handle_quota_files(s, qf_names, &qfmt); @@ -1250,7 +1252,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) if (blocks) { err = reiserfs_resize(s, blocks); if (err != 0) - goto out_err; + goto out_unlock; } if (*mount_flags & MS_RDONLY) { @@ -1260,9 +1262,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) /* it is read-only already */ goto out_ok; + /* + * Drop write lock. Quota will retake it when needed and lock + * ordering requires calling dquot_suspend() without it. + */ + reiserfs_write_unlock(s); err = dquot_suspend(s, -1); if (err < 0) goto out_err; + reiserfs_write_lock(s); /* try to remount file system with read-only permissions */ if (sb_umount_state(rs) == REISERFS_VALID_FS @@ -1272,7 +1280,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) err = journal_begin(&th, s, 10); if (err) - goto out_err; + goto out_unlock; /* Mounting a rw partition read-only. */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1287,7 +1295,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) if (reiserfs_is_journal_aborted(journal)) { err = journal->j_errno; - goto out_err; + goto out_unlock; } handle_data_mode(s, mount_options); @@ -1296,7 +1304,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ err = journal_begin(&th, s, 10); if (err) - goto out_err; + goto out_unlock; /* Mount a partition which is read-only, read-write */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1313,11 +1321,17 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) SB_JOURNAL(s)->j_must_wait = 1; err = journal_end(&th, s, 10); if (err) - goto out_err; + goto out_unlock; s->s_dirt = 0; if (!(*mount_flags & MS_RDONLY)) { + /* + * Drop write lock. Quota will retake it when needed and lock + * ordering requires calling dquot_resume() without it. + */ + reiserfs_write_unlock(s); dquot_resume(s, -1); + reiserfs_write_lock(s); finish_unfinished(s); reiserfs_xattr_init(s, *mount_flags); } @@ -1327,9 +1341,10 @@ out_ok: reiserfs_write_unlock(s); return 0; +out_unlock: + reiserfs_write_unlock(s); out_err: kfree(new_opts); - reiserfs_write_unlock(s); return err; } @@ -1952,13 +1967,15 @@ static int reiserfs_write_dquot(struct dquot *dquot) REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (ret) goto out; + reiserfs_write_unlock(dquot->dq_sb); ret = dquot_commit(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -1974,13 +1991,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot) REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (ret) goto out; + reiserfs_write_unlock(dquot->dq_sb); ret = dquot_acquire(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -1994,19 +2013,21 @@ static int reiserfs_release_dquot(struct dquot *dquot) ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + reiserfs_write_unlock(dquot->dq_sb); if (ret) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); goto out; } ret = dquot_release(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: reiserfs_write_unlock(dquot->dq_sb); +out: return ret; } @@ -2031,11 +2052,13 @@ static int reiserfs_write_info(struct super_block *sb, int type) ret = journal_begin(&th, sb, 2); if (ret) goto out; + reiserfs_write_unlock(sb); ret = dquot_commit_info(sb, type); + reiserfs_write_lock(sb); err = journal_end(&th, sb, 2); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(sb); return ret; } @@ -2059,8 +2082,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, struct inode *inode; struct reiserfs_transaction_handle th; - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) - return -EINVAL; + reiserfs_write_lock(sb); + if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) { + err = -EINVAL; + goto out; + } /* Quotafile not on the same filesystem? */ if (path->mnt->mnt_sb != sb) { @@ -2102,8 +2128,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (err) goto out; } - err = dquot_quota_on(sb, type, format_id, path); + reiserfs_write_unlock(sb); + return dquot_quota_on(sb, type, format_id, path); out: + reiserfs_write_unlock(sb); return err; } @@ -2177,7 +2205,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; + reiserfs_write_lock(sb); err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); + reiserfs_write_unlock(sb); if (err) goto out; if (offset || tocopy != sb->s_blocksize) @@ -2193,10 +2223,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, flush_dcache_page(bh->b_page); set_buffer_uptodate(bh); unlock_buffer(bh); + reiserfs_write_lock(sb); reiserfs_prepare_for_journal(sb, bh, 1); journal_mark_dirty(current->journal_info, sb, bh); if (!journal_quota) reiserfs_add_ordered_list(inode, bh); + reiserfs_write_unlock(sb); brelse(bh); offset = 0; towrite -= tocopy; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d780896..6e3ca4e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) return -ENOSPC; - if (name[0] == '.' && (name[1] == '\0' || - (name[1] == '.' && name[2] == '\0'))) + if (name[0] == '.' && (namelen < 2 || + (namelen == 2 && name[1] == '.'))) return 0; dentry = lookup_one_len(name, dbuf->xadir, namelen); diff --git a/fs/splice.c b/fs/splice.c index 9d89008..16d0cb4 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -274,13 +274,16 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) * Check if we need to grow the arrays holding pages and partial page * descriptions. */ -int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) +int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { - if (pipe->buffers <= PIPE_DEF_BUFFERS) + unsigned int buffers = ACCESS_ONCE(pipe->buffers); + + spd->nr_pages_max = buffers; + if (buffers <= PIPE_DEF_BUFFERS) return 0; - spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); - spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); + spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL); + spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL); if (spd->pages && spd->partial) return 0; @@ -290,10 +293,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) return -ENOMEM; } -void splice_shrink_spd(struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) +void splice_shrink_spd(struct splice_pipe_desc *spd) { - if (pipe->buffers <= PIPE_DEF_BUFFERS) + if (spd->nr_pages_max <= PIPE_DEF_BUFFERS) return; kfree(spd->pages); @@ -316,6 +318,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &page_cache_pipe_buf_ops, .spd_release = spd_release_page, @@ -327,7 +330,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, index = *ppos >> PAGE_CACHE_SHIFT; loff = *ppos & ~PAGE_CACHE_MASK; req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - nr_pages = min(req_pages, pipe->buffers); + nr_pages = min(req_pages, spd.nr_pages_max); /* * Lookup the (hopefully) full range of pages we need. @@ -498,7 +501,7 @@ fill_it: if (spd.nr_pages) error = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return error; } @@ -599,6 +602,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &default_pipe_buf_ops, .spd_release = spd_release_page, @@ -609,8 +613,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, res = -ENOMEM; vec = __vec; - if (pipe->buffers > PIPE_DEF_BUFFERS) { - vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); + if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { + vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL); if (!vec) goto shrink_ret; } @@ -618,7 +622,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, offset = *ppos & ~PAGE_CACHE_MASK; nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { + for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) { struct page *page; page = alloc_page(GFP_USER); @@ -666,7 +670,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, shrink_ret: if (vec != __vec) kfree(vec); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return res; err: @@ -693,8 +697,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, return -EINVAL; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; - if (sd->len < sd->total_len) + + if (sd->len < sd->total_len && pipe->nrbufs > 1) more |= MSG_SENDPAGE_NOTLAST; + return file->f_op->sendpage(file, buf->page, buf->offset, sd->len, &pos, more); } @@ -1616,6 +1622,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &user_page_pipe_buf_ops, .spd_release = spd_release_page, @@ -1631,13 +1638,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, spd.partial, flags & SPLICE_F_GIFT, - pipe->buffers); + spd.nr_pages_max); if (spd.nr_pages <= 0) ret = spd.nr_pages; else ret = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return ret; } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 7438850..b5a8636 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -290,7 +290,7 @@ handle_fragments: check_directory_table: /* Sanity check directory_table */ - if (msblk->directory_table >= next_table) { + if (msblk->directory_table > next_table) { err = -EINVAL; goto failed_mount; } @@ -57,7 +57,7 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) { - struct file *f = fget(fd); + struct file *f = fget_raw(fd); int error = -EBADF; if (f) { diff --git a/fs/statfs.c b/fs/statfs.c index 9cf04a1..a133c3e 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -86,7 +86,7 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) int fd_statfs(int fd, struct kstatfs *st) { - struct file *file = fget(fd); + struct file *file = fget_raw(fd); int error = -EBADF; if (file) { error = vfs_statfs(&file->f_path, st); @@ -222,19 +222,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -335,11 +335,6 @@ retry: destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -512,10 +507,10 @@ restart: if (list_empty(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); @@ -1009,6 +1004,8 @@ int freeze_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem freeze failed\n"); sb->s_frozen = SB_UNFROZEN; + smp_wmb(); + wake_up(&sb->s_wait_unfrozen); deactivate_locked_super(sb); return ret; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index ea9120a..3ab78b8 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -404,20 +404,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) /** * sysfs_pathname - return full path to sysfs dirent * @sd: sysfs_dirent whose path we want - * @path: caller allocated buffer + * @path: caller allocated buffer of size PATH_MAX * * Gives the name "/" to the sysfs_root entry; any path returned * is relative to wherever sysfs is mounted. - * - * XXX: does no error checking on @path size */ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) { if (sd->s_parent) { sysfs_pathname(sd->s_parent, path); - strcat(path, "/"); + strlcat(path, "/", PATH_MAX); } - strcat(path, sd->s_name); + strlcat(path, sd->s_name, PATH_MAX); return path; } @@ -450,9 +448,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) char *path = kzalloc(PATH_MAX, GFP_KERNEL); WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", - (path == NULL) ? sd->s_name : - strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"), - sd->s_name)); + (path == NULL) ? sd->s_name + : (sysfs_pathname(acxt->parent_sd, path), + strlcat(path, "/", PATH_MAX), + strlcat(path, sd->s_name, PATH_MAX), + path)); kfree(path); } @@ -917,6 +917,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } if (filp->f_pos == 1) { if (parent_sd->s_parent) @@ -925,6 +927,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } mutex_lock(&sysfs_mutex); for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); @@ -955,7 +959,6 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) return 0; } - const struct file_operations sysfs_dir_operations = { .read = generic_read_dir, .readdir = sysfs_readdir, diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef5abd3..936a038 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -355,31 +355,50 @@ static unsigned int vfs_dent_type(uint8_t type) static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) { int err, over = 0; + loff_t pos = file->f_pos; struct qstr nm; union ubifs_key key; struct ubifs_dent_node *dent; struct inode *dir = file->f_path.dentry->d_inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, pos); - if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) + if (pos > UBIFS_S_KEY_HASH_MASK || pos == 2) /* * The directory was seek'ed to a senseless position or there * are no more entries. */ return 0; + if (file->f_version == 0) { + /* + * The file was seek'ed, which means that @file->private_data + * is now invalid. This may also be just the first + * 'ubifs_readdir()' invocation, in which case + * @file->private_data is NULL, and the below code is + * basically a no-op. + */ + kfree(file->private_data); + file->private_data = NULL; + } + + /* + * 'generic_file_llseek()' unconditionally sets @file->f_version to + * zero, and we use this for detecting whether the file was seek'ed. + */ + file->f_version = 1; + /* File positions 0 and 1 correspond to "." and ".." */ - if (file->f_pos == 0) { + if (pos == 0) { ubifs_assert(!file->private_data); over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); if (over) return 0; - file->f_pos = 1; + file->f_pos = pos = 1; } - if (file->f_pos == 1) { + if (pos == 1) { ubifs_assert(!file->private_data); over = filldir(dirent, "..", 2, 1, parent_ino(file->f_path.dentry), DT_DIR); @@ -395,7 +414,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -403,17 +422,16 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. - * Find the entry corresponding to @file->f_pos or the - * closest one. + * Find the entry corresponding to @pos or the closest one. */ - dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); + dent_key_init_hash(c, &key, dir->i_ino, pos); nm.name = NULL; dent = ubifs_tnc_next_ent(c, &key, &nm); if (IS_ERR(dent)) { err = PTR_ERR(dent); goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -425,7 +443,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) ubifs_inode(dir)->creat_sqnum); nm.len = le16_to_cpu(dent->nlen); - over = filldir(dirent, dent->name, nm.len, file->f_pos, + over = filldir(dirent, dent->name, nm.len, pos, le64_to_cpu(dent->inum), vfs_dent_type(dent->type)); if (over) @@ -441,9 +459,17 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) } kfree(file->private_data); - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; cond_resched(); + + if (file->f_version == 0) + /* + * The file was seek'ed meanwhile, lets return and start + * reading direntries from the new position on the next + * invocation. + */ + return 0; } out: @@ -454,15 +480,13 @@ out: kfree(file->private_data); file->private_data = NULL; + /* 2 is a special value indicating that there are no more direntries */ file->f_pos = 2; return 0; } -/* If a directory is seeked, we have to free saved readdir() state */ static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) { - kfree(file->private_data); - file->private_data = NULL; return generic_file_llseek(file, offset, origin); } diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 2559d17..5dc48ca 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c) if (!lprops) { lprops = ubifs_fast_find_freeable(c); if (!lprops) { - ubifs_assert(c->freeable_cnt == 0); - if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + /* + * The first condition means the following: go scan the + * LPT if there are uncategorized lprops, which means + * there may be freeable LEBs there (UBIFS does not + * store the information about freeable LEBs in the + * master node). + */ + if (c->in_a_category_cnt != c->main_lebs || + c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + ubifs_assert(c->freeable_cnt == 0); lprops = scan_for_leb_for_idx(c); if (IS_ERR(lprops)) { err = PTR_ERR(lprops); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 667884f..ae6f74a 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, default: ubifs_assert(0); } + lprops->flags &= ~LPROPS_CAT_MASK; lprops->flags |= cat; + c->in_a_category_cnt += 1; + ubifs_assert(c->in_a_category_cnt <= c->main_lebs); } /** @@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c, default: ubifs_assert(0); } + + c->in_a_category_cnt -= 1; + ubifs_assert(c->in_a_category_cnt >= 0); } /** diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index c606f01..1250016 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -715,8 +715,12 @@ static int fixup_free_space(struct ubifs_info *c) lnum = ubifs_next_log_lnum(c, lnum); } - /* Fixup the current log head */ - err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); + /* + * Fixup the log head which contains the only a CS node at the + * beginning. + */ + err = fixup_leb(c, c->lhead_lnum, + ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size)); if (err) goto out; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 529be05..db04976 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1583,6 +1583,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->remounting_rw = 1; c->ro_mount = 0; + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + return err; + } + err = check_free_space(c); if (err) goto out; @@ -1699,12 +1705,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = dbg_check_space_info(c); } - if (c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - goto out; - } - mutex_unlock(&c->umount_mutex); return err; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index f79983d..62d50a9 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1187,6 +1187,8 @@ struct ubifs_debug_info; * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list + * @in_a_category_cnt: count of lprops which are in a certain category, which + * basically meants that they were loaded from the flash * * @ltab_lnum: LEB number of LPT's own lprops table * @ltab_offs: offset of LPT's own lprops table @@ -1416,6 +1418,7 @@ struct ubifs_info { struct list_head freeable_list; struct list_head frdi_idx_list; int freeable_cnt; + int in_a_category_cnt; int ltab_lnum; int ltab_offs; diff --git a/fs/udf/file.c b/fs/udf/file.c index 3438b00..8eb9628 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -39,20 +39,24 @@ #include "udf_i.h" #include "udf_sb.h" -static int udf_adinicb_readpage(struct file *file, struct page *page) +static void __udf_adinicb_readpage(struct page *page) { struct inode *inode = page->mapping->host; char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - BUG_ON(!PageLocked(page)); - kaddr = kmap(page); - memset(kaddr, 0, PAGE_CACHE_SIZE); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); + memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); +} + +static int udf_adinicb_readpage(struct file *file, struct page *page) +{ + BUG_ON(!PageLocked(page)); + __udf_adinicb_readpage(page); unlock_page(page); return 0; @@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page, return 0; } +static int udf_adinicb_write_begin(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned len, unsigned flags, struct page **pagep, + void **fsdata) +{ + struct page *page; + + if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE)) + return -EIO; + page = grab_cache_page_write_begin(mapping, 0, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) + __udf_adinicb_readpage(page); + return 0; +} + static int udf_adinicb_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file, const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, - .write_begin = simple_write_begin, - .write_end = udf_adinicb_write_end, + .write_begin = udf_adinicb_write_begin, + .write_end = udf_adinicb_write_end, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 262050f..957c974 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -575,6 +575,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, struct udf_inode_info *iinfo = UDF_I(inode); int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; int lastblock = 0; + bool isBeyondEOF; prev_epos.offset = udf_file_entry_alloc_offset(inode); prev_epos.block = iinfo->i_location; @@ -653,7 +654,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, /* Are we beyond EOF? */ if (etype == -1) { int ret; - + isBeyondEOF = 1; if (count) { if (c) laarr[0] = laarr[1]; @@ -696,6 +697,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, endnum = c + 1; lastblock = 1; } else { + isBeyondEOF = 0; endnum = startnum = ((count > 2) ? 2 : count); /* if the current extent is in position 0, @@ -738,10 +740,13 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, goal, err); if (!newblocknum) { brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); *err = -ENOSPC; return NULL; } - iinfo->i_lenExtents += inode->i_sb->s_blocksize; + if (isBeyondEOF) + iinfo->i_lenExtents += inode->i_sb->s_blocksize; } /* if the extent the requsted block is located in contains multiple @@ -768,6 +773,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, udf_update_extents(inode, laarr, startnum, endnum, &prev_epos); brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); newblock = udf_get_pblock(inode->i_sb, newblocknum, iinfo->i_location.partitionReferenceNum, 0); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f1dce84..d8c1bb5 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1297,6 +1297,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, *lenp = 3; fid->udf.block = location.logicalBlockNum; fid->udf.partref = location.partitionReferenceNum; + fid->udf.parent_partref = 0; fid->udf.generation = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { diff --git a/fs/udf/super.c b/fs/udf/super.c index 7f0e18a..b0c7b53 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -56,6 +56,7 @@ #include <linux/seq_file.h> #include <linux/bitmap.h> #include <linux/crc-itu-t.h> +#include <linux/log2.h> #include <asm/byteorder.h> #include "udf_sb.h" @@ -1244,16 +1245,65 @@ out_bh: return ret; } +static int udf_load_sparable_map(struct super_block *sb, + struct udf_part_map *map, + struct sparablePartitionMap *spm) +{ + uint32_t loc; + uint16_t ident; + struct sparingTable *st; + struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing; + int i; + struct buffer_head *bh; + + map->s_partition_type = UDF_SPARABLE_MAP15; + sdata->s_packet_len = le16_to_cpu(spm->packetLength); + if (!is_power_of_2(sdata->s_packet_len)) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Invalid packet length %u\n", + (unsigned)sdata->s_packet_len); + return -EIO; + } + if (spm->numSparingTables > 4) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Too many sparing tables (%d)\n", + (int)spm->numSparingTables); + return -EIO; + } + + for (i = 0; i < spm->numSparingTables; i++) { + loc = le32_to_cpu(spm->locSparingTable[i]); + bh = udf_read_tagged(sb, loc, loc, &ident); + if (!bh) + continue; + + st = (struct sparingTable *)bh->b_data; + if (ident != 0 || + strncmp(st->sparingIdent.ident, UDF_ID_SPARING, + strlen(UDF_ID_SPARING)) || + sizeof(*st) + le16_to_cpu(st->reallocationTableLen) > + sb->s_blocksize) { + brelse(bh); + continue; + } + + sdata->s_spar_map[i] = bh; + } + map->s_partition_func = udf_get_pblock_spar15; + return 0; +} + static int udf_load_logicalvol(struct super_block *sb, sector_t block, struct kernel_lb_addr *fileset) { struct logicalVolDesc *lvd; - int i, j, offset; + int i, offset; uint8_t type; struct udf_sb_info *sbi = UDF_SB(sb); struct genericPartitionMap *gpm; uint16_t ident; struct buffer_head *bh; + unsigned int table_len; int ret = 0; bh = udf_read_tagged(sb, block, block, &ident); @@ -1261,15 +1311,21 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, return 1; BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; - - i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); - if (i != 0) { - ret = i; + table_len = le32_to_cpu(lvd->mapTableLength); + if (sizeof(*lvd) + table_len > sb->s_blocksize) { + udf_error(sb, __func__, "error loading logical volume descriptor: " + "Partition table too long (%u > %lu)\n", table_len, + sb->s_blocksize - sizeof(*lvd)); + ret = 1; goto out_bh; } + ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); + if (ret) + goto out_bh; + for (i = 0, offset = 0; - i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); + i < sbi->s_partitions && offset < table_len; i++, offset += gpm->partitionMapLength) { struct udf_part_map *map = &sbi->s_partmaps[i]; gpm = (struct genericPartitionMap *) @@ -1304,38 +1360,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, } else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { - uint32_t loc; - struct sparingTable *st; - struct sparablePartitionMap *spm = - (struct sparablePartitionMap *)gpm; - - map->s_partition_type = UDF_SPARABLE_MAP15; - map->s_type_specific.s_sparing.s_packet_len = - le16_to_cpu(spm->packetLength); - for (j = 0; j < spm->numSparingTables; j++) { - struct buffer_head *bh2; - - loc = le32_to_cpu( - spm->locSparingTable[j]); - bh2 = udf_read_tagged(sb, loc, loc, - &ident); - map->s_type_specific.s_sparing. - s_spar_map[j] = bh2; - - if (bh2 == NULL) - continue; - - st = (struct sparingTable *)bh2->b_data; - if (ident != 0 || strncmp( - st->sparingIdent.ident, - UDF_ID_SPARING, - strlen(UDF_ID_SPARING))) { - brelse(bh2); - map->s_type_specific.s_sparing. - s_spar_map[j] = NULL; - } + if (udf_load_sparable_map(sb, map, + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; + goto out_bh; } - map->s_partition_func = udf_get_pblock_spar15; } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 4858c19..54706dc 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -79,7 +79,7 @@ struct udf_virtual_data { struct udf_bitmap { __u32 s_extLength; __u32 s_extPosition; - __u16 s_nr_groups; + int s_nr_groups; struct buffer_head **s_block_bitmap; }; diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index fed3f3c..844b22b 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -195,6 +195,9 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; struct inode *inode = NULL; + if (fh_len < xfs_fileid_length(fileid_type)) + return NULL; + switch (fileid_type) { case FILEID_INO32_GEN_PARENT: inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b75fd67..6a5a1af 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3512,7 +3512,7 @@ xlog_do_recovery_pass( * - order is important. */ error = xlog_bread_offset(log, 0, - bblks - split_bblks, hbp, + bblks - split_bblks, dbp, offset + BBTOB(split_bblks)); if (error) goto bread_err2; |