From 9a84bc2154b9787917cfc69cd619110bb9ab802f Mon Sep 17 00:00:00 2001 From: Jim Somerville Date: Thu, 21 Feb 2013 16:41:59 -0800 Subject: inotify: remove broken mask checks causing unmount to be EINVAL commit 676a0675cf9200ac047fb50825f80867b3bb733b upstream. Running the command: inotifywait -e unmount /mnt/disk immediately aborts with a -EINVAL return code. This is however a valid parameter. This abort occurs only if unmount is the sole event parameter. If other event parameters are supplied, then the unmount event wait will work. The problem was introduced by commit 44b350fc23e ("inotify: Fix mask checks"). In that commit, it states: The mask checks in inotify_update_existing_watch() and inotify_new_watch() are useless because inotify_arg_to_mask() sets FS_IN_IGNORED and FS_EVENT_ON_CHILD bits anyway. But instead of removing the useless checks, it did this: mask = inotify_arg_to_mask(arg); - if (unlikely(!mask)) + if (unlikely(!(mask & IN_ALL_EVENTS))) return -EINVAL; The problem is that IN_ALL_EVENTS doesn't include IN_UNMOUNT, and other parts of the code keep IN_UNMOUNT separate from IN_ALL_EVENTS. So the check should be: if (unlikely(!(mask & (IN_ALL_EVENTS | IN_UNMOUNT)))) But inotify_arg_to_mask(arg) always sets the IN_UNMOUNT bit in the mask anyway, so the check is always going to pass and thus should simply be removed. Also note that inotify_arg_to_mask completely controls what mask bits get set from arg, there's no way for invalid bits to get enabled there. Lets fix it by simply removing the useless broken checks. Signed-off-by: Jim Somerville Signed-off-by: Paul Gortmaker Cc: Jerome Marchand Cc: John McCutchan Cc: Robert Love Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/notify/inotify/inotify_user.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc..6f292dd 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -579,8 +579,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!(mask & IN_ALL_EVENTS))) - return -EINVAL; fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) @@ -632,8 +630,6 @@ static int inotify_new_watch(struct fsnotify_group *group, /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!(mask & IN_ALL_EVENTS))) - return -EINVAL; tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) -- cgit v1.1 From 099f19c0426e465c44b66cc93f923461af6e5f8d Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 21 Feb 2013 16:42:45 -0800 Subject: ocfs2: unlock super lock if lockres refresh failed commit 3278bb748d2437eb1464765f36429e5d6aa91c38 upstream. If lockres refresh failed, the super lock will never be released which will cause some processes on other cluster nodes hung forever. Signed-off-by: Junxiao Bi Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/dlmglue.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 7642d7c..ab4046f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2539,6 +2539,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, * everything is up to the caller :) */ status = ocfs2_should_refresh_lock_res(lockres); if (status < 0) { + ocfs2_cluster_unlock(osb, lockres, level); mlog_errno(status); goto bail; } @@ -2547,8 +2548,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb, ocfs2_complete_lock_res_refresh(lockres, status); - if (status < 0) + if (status < 0) { + ocfs2_cluster_unlock(osb, lockres, level); mlog_errno(status); + } ocfs2_track_lock_refresh(lockres); } bail: -- cgit v1.1 From 8f0b9cb82a2d156297664de6e6df14afa39c5b7f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Feb 2013 12:04:42 -0500 Subject: NLM: Ensure that we resend all pending blocking locks after a reclaim commit 666b3d803a511fbc9bc5e5ea8ce66010cf03ea13 upstream. Currently, nlmclnt_lock will break out of the for(;;) loop when the reclaimer wakes up the blocking lock thread by setting nlm_lck_denied_grace_period. This causes the lock request to fail with an ENOLCK error. The intention was always to ensure that we resend the lock request after the grace period has expired. Reported-by: Wangyuan Zhang Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/lockd/clntproc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index e374050..5ee055e 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -550,6 +550,9 @@ again: status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; + /* Resend the blocking lock request after a server reboot */ + if (resp->status == nlm_lck_denied_grace_period) + continue; if (resp->status != nlm_lck_blocked) break; } -- cgit v1.1 From f1d8678b90aeaf7a3c5c30cea8a838f54cd5a8bc Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 6 Oct 2011 12:10:11 -0400 Subject: ext4: Free resources in some error path in ext4_fill_super commit dcf2d804ed6ffe5e942b909ed5e5b74628be6ee4 upstream. Some of the error path in ext4_fill_super don't release the resouces properly. So this patch just try to release them in the right way. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Signed-off-by: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f1aa1a2..c6a3363 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3681,22 +3681,19 @@ no_journal: if (err) { ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", err); - goto failed_mount4; + goto failed_mount5; } err = ext4_register_li_request(sb, first_not_zeroed); if (err) - goto failed_mount4; + goto failed_mount6; sbi->s_kobj.kset = ext4_kset; init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, "%s", sb->s_id); - if (err) { - ext4_mb_release(sb); - ext4_ext_release(sb); - goto failed_mount4; - }; + if (err) + goto failed_mount7; EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); @@ -3730,13 +3727,19 @@ cantfind_ext4: ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; +failed_mount7: + ext4_unregister_li_request(sb); +failed_mount6: + ext4_ext_release(sb); +failed_mount5: + ext4_mb_release(sb); + ext4_release_system_zone(sb); failed_mount4: iput(root); sb->s_root = NULL; ext4_msg(sb, KERN_ERR, "mount failed"); destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); failed_mount_wq: - ext4_release_system_zone(sb); if (sbi->s_journal) { jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; -- cgit v1.1 From 131fb7cce88c8c953429ecee478477db81727273 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Jul 2011 12:58:41 -0400 Subject: ext4: add missing kfree() on error return path in add_new_gdb() commit c49bafa3842751b8955a962859f42d307673d75d upstream. We added some more error handling in b40971426a "ext4: add error checking to calls to ext4_handle_dirty_metadata()". But we need to call kfree() as well to avoid a memory leak. Signed-off-by: Dan Carpenter Signed-off-by: "Theodore Ts'o" Signed-off-by: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 80bbc9c..244100f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -499,6 +499,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return err; exit_inode: + kfree(n_group_desc); /* ext4_handle_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: -- cgit v1.1 From 8eac4364548b8f53476602969a2fba65d029d8b7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 17 Nov 2011 16:42:19 -0500 Subject: NLS: improve UTF8 -> UTF16 string conversion routine commit 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd upstream. The utf8s_to_utf16s conversion routine needs to be improved. Unlike its utf16s_to_utf8s sibling, it doesn't accept arguments specifying the maximum length of the output buffer or the endianness of its 16-bit output. This patch (as1501) adds the two missing arguments, and adjusts the only two places in the kernel where the function is called. A follow-on patch will add a third caller that does utilize the new capabilities. The two conversion routines are still annoyingly inconsistent in the way they handle invalid byte combinations. But that's a subject for a different patch. Signed-off-by: Alan Stern CC: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- fs/fat/namei_vfat.c | 3 ++- fs/nls/nls_base.c | 43 +++++++++++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 20b4ea5..6ee3c36 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -514,7 +514,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, int charlen; if (utf8) { - *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); + *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN, + (wchar_t *) outname, FAT_LFN_LEN + 2); if (*outlen < 0) return *outlen; else if (*outlen > FAT_LFN_LEN) diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9..0eb059e 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) } EXPORT_SYMBOL(utf32_to_utf8); -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + *s = (wchar_t) c; + break; + case UTF16_LITTLE_ENDIAN: + *s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *s = __cpu_to_be16(c); + break; + } +} + +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, + wchar_t *pwcs, int maxlen) { u16 *op; int size; unicode_t u; op = pwcs; - while (*s && len > 0) { + while (len > 0 && maxlen > 0 && *s) { if (*s & 0x80) { size = utf8_to_utf32(s, len, &u); if (size < 0) return -EINVAL; + s += size; + len -= size; if (u >= PLANE_SIZE) { + if (maxlen < 2) + break; u -= PLANE_SIZE; - *op++ = (wchar_t) (SURROGATE_PAIR | - ((u >> 10) & SURROGATE_BITS)); - *op++ = (wchar_t) (SURROGATE_PAIR | + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | SURROGATE_LOW | - (u & SURROGATE_BITS)); + (u & SURROGATE_BITS), + endian); + maxlen -= 2; } else { - *op++ = (wchar_t) u; + put_utf16(op++, u, endian); + maxlen--; } - s += size; - len -= size; } else { - *op++ = *s++; + put_utf16(op++, *s++, endian); len--; + maxlen--; } } return op - pwcs; -- cgit v1.1 From ad315127433dc9a8147e7f6b43f21b3d0061003a Mon Sep 17 00:00:00 2001 From: "Xiaowei.Hu" Date: Wed, 27 Feb 2013 17:02:49 -0800 Subject: ocfs2: ac->ac_allow_chain_relink=0 won't disable group relink commit 309a85b6861fedbb48a22d45e0e079d1be993b3a upstream. ocfs2_block_group_alloc_discontig() disables chain relink by setting ac->ac_allow_chain_relink = 0 because it grabs clusters from multiple cluster groups. It doesn't keep the credits for all chain relink,but ocfs2_claim_suballoc_bits overrides this in this call trace: ocfs2_block_group_claim_bits()->ocfs2_claim_clusters()-> __ocfs2_claim_clusters()->ocfs2_claim_suballoc_bits() ocfs2_claim_suballoc_bits set ac->ac_allow_chain_relink = 1; then call ocfs2_search_chain() one time and disable it again, and then we run out of credits. Fix is to allow relink by default and disable it in ocfs2_block_group_alloc_discontig. Without this patch, End-users will run into a crash due to run out of credits, backtrace like this: RIP: 0010:[] [] jbd2_journal_dirty_metadata+0x164/0x170 [jbd2] RSP: 0018:ffff8801b919b5b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88022139ddc0 RCX: ffff880159f652d0 RDX: ffff880178aa3000 RSI: ffff880159f652d0 RDI: ffff880087f09bf8 RBP: ffff8801b919b5e8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000001e00 R11: 00000000000150b0 R12: ffff880159f652d0 R13: ffff8801a0cae908 R14: ffff880087f09bf8 R15: ffff88018d177800 FS: 00007fc9b0b6b6e0(0000) GS:ffff88022fd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000040819c CR3: 0000000184017000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process dd (pid: 9945, threadinfo ffff8801b919a000, task ffff880149a264c0) Call Trace: ocfs2_journal_dirty+0x2f/0x70 [ocfs2] ocfs2_relink_block_group+0x111/0x480 [ocfs2] ocfs2_search_chain+0x455/0x9a0 [ocfs2] ... Signed-off-by: Xiaowei.Hu Reviewed-by: Srinivas Eeda Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/suballoc.c | 7 +++---- fs/ocfs2/suballoc.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f169da4..b7e74b5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, * cluster groups will be staying in cache for the duration of * this operation. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, @@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * Do this *after* figuring out how many bits we're taking out * of our target group. */ - if (ac->ac_allow_chain_relink && + if (!ac->ac_disable_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, @@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; - ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); @@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b8afabf..a36d0aa 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,7 +49,7 @@ struct ocfs2_alloc_context { /* these are used by the chain search */ u16 ac_chain; - int ac_allow_chain_relink; + int ac_disable_chain_relink; group_search_t *ac_group_search; u64 ac_last_group; -- cgit v1.1 From fd80f53550720f200ca0469e9419c750f895ab50 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Fri, 1 Feb 2013 21:31:27 -0500 Subject: ext4: fix race in ext4_mb_add_n_trim() commit f1167009711032b0d747ec89a632a626c901a1ad upstream. In ext4_mb_add_n_trim(), lg_prealloc_lock should be taken when changing the lg_prealloc_list. Signed-off-by: Niu Yawei Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b6adf68..31bbdb5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4111,7 +4111,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; /* Add the prealloc space to lg */ - rcu_read_lock(); + spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], pa_inode_list) { spin_lock(&tmp_pa->pa_lock); @@ -4135,12 +4135,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) if (!added) list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list[order]); - rcu_read_unlock(); + spin_unlock(&lg->lg_prealloc_lock); /* Now trim the list to be not more than 8 elements */ if (lg_prealloc_count > 8) { ext4_mb_discard_lg_preallocations(sb, lg, - order, lg_prealloc_count); + order, lg_prealloc_count); return; } return ; -- cgit v1.1 From 1244d244aea7290c80cea240baa7e5b93b439dc8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Feb 2013 14:06:20 -0500 Subject: btrfs: Init io_lock after cloning btrfs device struct commit 1cba0cdf5e4dbcd9e5fa5b54d7a028e55e2ca057 upstream. __btrfs_close_devices() clones btrfs device structs with memcpy(). Some of the fields in the clone are reinitialized, but it's missing to init io_lock. In mainline this goes unnoticed, but on RT it leaves the plist pointing to the original about to be freed lock struct. Initialize io_lock after cloning, so no references to the original struct are left. Reported-and-tested-by: Mike Galbraith Signed-off-by: Thomas Gleixner Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 43baaf0..06c8ced 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -512,6 +512,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->writeable = 0; new_device->in_fs_metadata = 0; new_device->can_discard = 0; + spin_lock_init(&new_device->io_lock); list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); -- cgit v1.1 From 53c4e85f063e53ccfe9c34a78e0a33b849b37fba Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 1 Feb 2013 15:11:01 -0500 Subject: cifs: ensure that cifs_get_root() only traverses directories commit ce2ac52105aa663056dfc17966ebed1bf93e6e64 upstream. Kjell Braden reported this oops: [ 833.211970] BUG: unable to handle kernel NULL pointer dereference at (null) [ 833.212816] IP: [< (null)>] (null) [ 833.213280] PGD 1b9b2067 PUD e9f7067 PMD 0 [ 833.213874] Oops: 0010 [#1] SMP [ 833.214344] CPU 0 [ 833.214458] Modules linked in: des_generic md4 nls_utf8 cifs vboxvideo drm snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq bnep rfcomm snd_timer bluetooth snd_seq_device ppdev snd vboxguest parport_pc joydev mac_hid soundcore snd_page_alloc psmouse i2c_piix4 serio_raw lp parport usbhid hid e1000 [ 833.215629] [ 833.215629] Pid: 1752, comm: mount.cifs Not tainted 3.0.0-rc7-bisectcifs-fec11dd9a0+ #18 innotek GmbH VirtualBox/VirtualBox [ 833.215629] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 833.215629] RSP: 0018:ffff8800119c9c50 EFLAGS: 00010282 [ 833.215629] RAX: ffffffffa02186c0 RBX: ffff88000c427780 RCX: 0000000000000000 [ 833.215629] RDX: 0000000000000000 RSI: ffff88000c427780 RDI: ffff88000c4362e8 [ 833.215629] RBP: ffff8800119c9c88 R08: ffff88001fc15e30 R09: 00000000d69515c7 [ 833.215629] R10: ffffffffa0201972 R11: ffff88000e8f6a28 R12: ffff88000c4362e8 [ 833.215629] R13: 0000000000000000 R14: 0000000000000000 R15: ffff88001181aaa6 [ 833.215629] FS: 00007f2986171700(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 833.215629] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 833.215629] CR2: 0000000000000000 CR3: 000000001b982000 CR4: 00000000000006f0 [ 833.215629] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 833.215629] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 833.215629] Process mount.cifs (pid: 1752, threadinfo ffff8800119c8000, task ffff88001c1c16f0) [ 833.215629] Stack: [ 833.215629] ffffffff8116a9b5 ffff8800119c9c88 ffffffff81178075 0000000000000286 [ 833.215629] 0000000000000000 ffff88000c4276c0 ffff8800119c9ce8 ffff8800119c9cc8 [ 833.215629] ffffffff8116b06e ffff88001bc6fc00 ffff88000c4276c0 ffff88000c4276c0 [ 833.215629] Call Trace: [ 833.215629] [] ? d_alloc_and_lookup+0x45/0x90 [ 833.215629] [] ? d_lookup+0x35/0x60 [ 833.215629] [] __lookup_hash.part.14+0x9e/0xc0 [ 833.215629] [] lookup_one_len+0x146/0x1e0 [ 833.215629] [] ? _raw_spin_lock+0xe/0x20 [ 833.215629] [] cifs_do_mount+0x26d/0x500 [cifs] [ 833.215629] [] mount_fs+0x43/0x1b0 [ 833.215629] [] vfs_kern_mount+0x6a/0xd0 [ 833.215629] [] do_kern_mount+0x54/0x110 [ 833.215629] [] do_mount+0x262/0x840 [ 833.215629] [] ? __get_free_pages+0xe/0x50 [ 833.215629] [] ? copy_mount_options+0x3a/0x180 [ 833.215629] [] sys_mount+0x8d/0xe0 [ 833.215629] [] system_call_fastpath+0x16/0x1b [ 833.215629] Code: Bad RIP value. [ 833.215629] RIP [< (null)>] (null) [ 833.215629] RSP [ 833.215629] CR2: 0000000000000000 [ 833.238525] ---[ end trace ec00758b8d44f529 ]--- When walking down the path on the server, it's possible to hit a symlink. The path walking code assumes that the caller will handle that situation properly, but cifs_get_root() isn't set up for it. This patch prevents the oops by simply returning an error. A better solution would be to try and chase the symlinks here, but that's fairly complicated to handle. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=53221 Reported-and-tested-by: Kjell Braden Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsfs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 53e7d72..bf6aa8c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -571,6 +571,11 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) dentry = ERR_PTR(-ENOENT); break; } + if (!S_ISDIR(dir->i_mode)) { + dput(dentry); + dentry = ERR_PTR(-ENOTDIR); + break; + } /* skip separators */ while (*s == sep) -- cgit v1.1 From 542e9d5675e96e3affcac837b358b6dadebbbc37 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Mar 2013 02:59:49 +0000 Subject: vfs: fix pipe counter breakage commit a930d8790552658140d7d0d2e316af4f0d76a512 upstream. If you open a pipe for neither read nor write, the pipe code will not add any usage counters to the pipe, causing the 'struct pipe_inode_info" to be potentially released early. That doesn't normally matter, since you cannot actually use the pipe, but the pipe release code - particularly fasync handling - still expects the actual pipe infrastructure to all be there. And rather than adding NULL pointer checks, let's just disallow this case, the same way we already do for the named pipe ("fifo") case. This is ancient going back to pre-2.4 days, and until trinity, nobody naver noticed. Reported-by: Dave Jones Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/pipe.c b/fs/pipe.c index 0499a96..342aa86 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -859,6 +859,9 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) { int ret = -ENOENT; + if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE))) + return -EINVAL; + mutex_lock(&inode->i_mutex); if (inode->i_pipe) { -- cgit v1.1 From 64365ddb08f27f2583f2323a147520fed192ef40 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 9 Mar 2013 15:28:44 +0100 Subject: ext3: Fix format string issues commit 8d0c2d10dd72c5292eda7a06231056a4c972e4cc upstream. ext3_msg() takes the printk prefix as the second parameter and the format string as the third parameter. Two callers of ext3_msg omit the prefix and pass the format string as the second parameter and the first parameter to the format string as the third parameter. In both cases this string comes from an arbitrary source. Which means the string may contain format string characters, which will lead to undefined and potentially harmful behavior. The issue was introduced in commit 4cf46b67eb("ext3: Unify log messages in ext3") and is fixed by this patch. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext3/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index aad153e..ba57a63 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -371,7 +371,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) return bdev; fail: - ext3_msg(sb, "error: failed to open journal device %s: %ld", + ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; @@ -892,7 +892,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) /*todo: use simple_strtoll with >32bit ext3 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { - ext3_msg(sb, "error: invalid sb specification: %s", + ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s", (char *) *data); return 1; } -- cgit v1.1 From d039dc5859a079df992b83138e8dec32f5c8fbf0 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 9 Mar 2013 15:18:39 +0000 Subject: btrfs: use rcu_barrier() to wait for bdev puts at unmount commit bc178622d40d87e75abc131007342429c9b03351 upstream. Doing this would reliably fail with -EBUSY for me: # mount /dev/sdb2 /mnt/scratch; umount /mnt/scratch; mkfs.btrfs -f /dev/sdb2 ... unable to open /dev/sdb2: Device or resource busy because mkfs.btrfs tries to open the device O_EXCL, and somebody still has it. Using systemtap to track bdev gets & puts shows a kworker thread doing a blkdev put after mkfs attempts a get; this is left over from the unmount path: btrfs_close_devices __btrfs_close_devices call_rcu(&device->rcu, free_device); free_device INIT_WORK(&device->rcu_work, __free_device); schedule_work(&device->rcu_work); so unmount might complete before __free_device fires & does its blkdev_put. Adding an rcu_barrier() to btrfs_close_devices() causes unmount to wait until all blkdev_put()s are done, and the device is truly free once unmount completes. Signed-off-by: Eric Sandeen Signed-off-by: Josef Bacik Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 06c8ced..7745ad5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -546,6 +546,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) __btrfs_close_devices(fs_devices); free_fs_devices(fs_devices); } + /* + * Wait for rcu kworkers under __btrfs_close_devices + * to finish all blkdev_puts so device is really + * free when umount is done. + */ + rcu_barrier(); return ret; } -- cgit v1.1 From 827401f0005244b97f9742df38996b3c76d8e297 Mon Sep 17 00:00:00 2001 From: Guo Chao Date: Thu, 21 Feb 2013 15:16:42 -0800 Subject: block: use i_size_write() in bd_set_size() commit d646a02a9d44d1421f273ae3923d97b47b918176 upstream. blkdev_ioctl(GETBLKSIZE) uses i_size_read() to read size of block device. If we update block size directly, reader may see intermediate result in some machines and configurations. Use i_size_write() instead. Signed-off-by: Guo Chao Cc: Alexander Viro Cc: Guo Chao Cc: M. Hindess Cc: Nikanth Karthikesan Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe Acked-by: Jeff Mahoney Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index a580028..77e8e5b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1052,7 +1052,9 @@ void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); - bdev->bd_inode->i_size = size; + mutex_lock(&bdev->bd_inode->i_mutex); + i_size_write(bdev->bd_inode, size); + mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; -- cgit v1.1 From 554d123e99dbb1d2192501c8d295e556d777388e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Mar 2013 09:52:19 -0400 Subject: cifs: ignore everything in SPNEGO blob after mechTypes commit f853c616883a8de966873a1dab283f1369e275a1 upstream. We've had several reports of people attempting to mount Windows 8 shares and getting failures with a return code of -EINVAL. The default sec= mode changed recently to sec=ntlmssp. With that, we expect and parse a SPNEGO blob from the server in the NEGOTIATE reply. The current decode_negTokenInit function first parses all of the mechTypes and then tries to parse the rest of the negTokenInit reply. The parser however currently expects a mechListMIC or nothing to follow the mechTypes, but Windows 8 puts a mechToken field there instead to carry some info for the new NegoEx stuff. In practice, we don't do anything with the fields after the mechTypes anyway so I don't see any real benefit in continuing to parse them. This patch just has the kernel ignore the fields after the mechTypes. We'll probably need to reinstate some of this if we ever want to support NegoEx. Reported-by: Jason Burgess Reported-by: Yan Li Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/asn1.c | 53 +++++------------------------------------------------ 1 file changed, 5 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cfd1ce3..1d36db1 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } - /* mechlistMIC */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - /* Check if we have reached the end of the blob, but with - no mechListMic (e.g. NTLMSSP instead of KRB5) */ - if (ctx.error == ASN1_ERR_DEC_EMPTY) - goto decode_negtoken_exit; - cFYI(1, "Error decoding last part negTokenInit exit3"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - /* tag = 3 indicating mechListMIC */ - cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* sequence */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit5"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - } - - /* sequence of */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit 7"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* general string */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit9"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) - || (tag != ASN1_GENSTR)) { - cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - cFYI(1, "Need to call asn1_octets_decode() function for %s", - ctx.pointer); /* is this UTF-8 or ASCII? */ -decode_negtoken_exit: + /* + * We currently ignore anything at the end of the SPNEGO blob after + * the mechTypes have been parsed, since none of that info is + * used at the moment. + */ return 1; } -- cgit v1.1 From 84bd1744acdd6e3cef26df9de51c834b8aa48b9a Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 10 Mar 2013 21:20:23 -0400 Subject: ext4: fix the wrong number of the allocated blocks in ext4_split_extent() commit 3a2256702e47f68f921dfad41b1764d05c572329 upstream. This commit fixes a wrong return value of the number of the allocated blocks in ext4_split_extent. When the length of blocks we want to allocate is greater than the length of the current extent, we return a wrong number. Let's see what happens in the following case when we call ext4_split_extent(). map: [48, 72] ex: [32, 64, u] 'ex' will be split into two parts: ex1: [32, 47, u] ex2: [48, 64, w] 'map->m_len' is returned from this function, and the value is 24. But the real length is 16. So it should be fixed. Meanwhile in this commit we use right length of the allocated blocks when get_reserved_cluster_alloc in ext4_ext_handle_uninitialized_extents is called. Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" Cc: Dmitry Monakhov Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 680df5d..354ba48 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2901,6 +2901,7 @@ static int ext4_split_extent(handle_t *handle, int err = 0; int uninitialized; int split_flag1, flags1; + int allocated = map->m_len; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -2919,6 +2920,8 @@ static int ext4_split_extent(handle_t *handle, map->m_lblk + map->m_len, split_flag1, flags1); if (err) goto out; + } else { + allocated = ee_len - (map->m_lblk - ee_block); } ext4_ext_drop_refs(path); @@ -2941,7 +2944,7 @@ static int ext4_split_extent(handle_t *handle, ext4_ext_show_leaf(inode, path); out: - return err ? err : map->m_len; + return err ? err : allocated; } #define EXT4_EXT_ZERO_LEN 7 @@ -3309,6 +3312,7 @@ out: allocated - map->m_len); allocated = map->m_len; } + map->m_len = allocated; /* * If we have done fallocate with the offset that is already -- cgit v1.1 From 400ac274e9c84f439a2e419bcb392aefbd9f4a3e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Feb 2013 13:59:56 +0100 Subject: udf: Fix bitmap overflow on large filesystems with small block size commit 89b1f39eb4189de745fae554b0d614d87c8d5c63 upstream. For large UDF filesystems with 512-byte blocks the number of necessary bitmap blocks is larger than 2^16 so s_nr_groups in udf_bitmap overflows (the number will overflow for filesystems larger than 128 GB with 512-byte blocks). That results in ENOSPC errors despite the filesystem has plenty of free space. Fix the problem by changing s_nr_groups' type to 'int'. That is enough even for filesystems 2^32 blocks (UDF maximum) and 512-byte blocksize. Reported-and-tested-by: v10lator@myway.de Signed-off-by: Jan Kara Cc: Jim Trigg Signed-off-by: Greg Kroah-Hartman --- fs/udf/udf_sb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 4858c19..54706dc 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -79,7 +79,7 @@ struct udf_virtual_data { struct udf_bitmap { __u32 s_extLength; __u32 s_extPosition; - __u16 s_nr_groups; + int s_nr_groups; struct buffer_head **s_block_bitmap; }; -- cgit v1.1 From c18508394610b47964ef6c2d4d71b85873ce10fe Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 25 Feb 2013 10:20:36 -0500 Subject: Fix: compat_rw_copy_check_uvector() misuse in aio, readv, writev, and security keys commit 8aec0f5d4137532de14e6554fd5dd201ff3a3c49 upstream. Looking at mm/process_vm_access.c:process_vm_rw() and comparing it to compat_process_vm_rw() shows that the compatibility code requires an explicit "access_ok()" check before calling compat_rw_copy_check_uvector(). The same difference seems to appear when we compare fs/read_write.c:do_readv_writev() to fs/compat.c:compat_do_readv_writev(). This subtle difference between the compat and non-compat requirements should probably be debated, as it seems to be error-prone. In fact, there are two others sites that use this function in the Linux kernel, and they both seem to get it wrong: Now shifting our attention to fs/aio.c, we see that aio_setup_iocb() also ends up calling compat_rw_copy_check_uvector() through aio_setup_vectored_rw(). Unfortunately, the access_ok() check appears to be missing. Same situation for security/keys/compat.c:compat_keyctl_instantiate_key_iov(). I propose that we add the access_ok() check directly into compat_rw_copy_check_uvector(), so callers don't have to worry about it, and it therefore makes the compat call code similar to its non-compat counterpart. Place the access_ok() check in the same location where copy_from_user() can trigger a -EFAULT error in the non-compat code, so the ABI behaviors are alike on both compat and non-compat. While we are here, fix compat_do_readv_writev() so it checks for compat_rw_copy_check_uvector() negative return values. And also, fix a memory leak in compat_keyctl_instantiate_key_iov() error handling. Acked-by: Linus Torvalds Acked-by: Al Viro Signed-off-by: Mathieu Desnoyers Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/compat.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index e5358c2..f77a963 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -576,6 +576,10 @@ ssize_t compat_rw_copy_check_uvector(int type, } *ret_pointer = iov; + ret = -EFAULT; + if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + goto out; + /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an @@ -1106,17 +1110,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (!file->f_op) goto out; - ret = -EFAULT; - if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) - goto out; - - tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, + ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, UIO_FASTIOV, iovstack, &iov); - if (tot_len == 0) { - ret = 0; + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; -- cgit v1.1 From 89e2149fdbd22db4c354df58f2939b8878e6f10d Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 12 Jul 2012 08:46:54 +0200 Subject: isofs: avoid info leak on export commit fe685aabf7c8c9f138e5ea900954d295bf229175 upstream. For type 1 the parent_offset member in struct isofs_fid gets copied uninitialized to userland. Fix this by initializing it to 0. Signed-off-by: Mathias Krause Signed-off-by: Jan Kara Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/isofs/export.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 516eb21..fd88add 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -135,6 +135,7 @@ isofs_export_encode_fh(struct dentry *dentry, len = 3; fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ + fh16[3] = 0; /* avoid leaking uninitialized data */ fh32[2] = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { struct inode *parent; -- cgit v1.1 From cb536e41582fc565991c09ad63d4ae623870f1b0 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 12 Jul 2012 08:46:55 +0200 Subject: udf: avoid info leak on export commit 0143fc5e9f6f5aad4764801015bc8d4b4a278200 upstream. For type 0x51 the udf.parent_partref member in struct fid gets copied uninitialized to userland. Fix this by initializing it to 0. Signed-off-by: Mathias Krause Signed-off-by: Jan Kara Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/udf/namei.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f1dce84..d8c1bb5 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1297,6 +1297,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, *lenp = 3; fid->udf.block = location.logicalBlockNum; fid->udf.partref = location.partitionReferenceNum; + fid->udf.parent_partref = 0; fid->udf.generation = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { -- cgit v1.1 From ea8d2d19ad17ceafc883b86e448a405cf7808927 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 17 Dec 2012 16:03:20 -0800 Subject: exec: use -ELOOP for max recursion depth commit d740269867021faf4ce38a449353d2b986c34a67 upstream. To avoid an explosion of request_module calls on a chain of abusive scripts, fail maximum recursion with -ELOOP instead of -ENOEXEC. As soon as maximum recursion depth is hit, the error will fail all the way back up the chain, aborting immediately. This also has the side-effect of stopping the user's shell from attempting to reexecute the top-level file as a shell script. As seen in the dash source: if (cmd != path_bshell && errno == ENOEXEC) { *argv-- = cmd; *argv = cmd = path_bshell; goto repeat; } The above logic was designed for running scripts automatically that lacked the "#!" header, not to re-try failed recursion. On a legitimate -ENOEXEC, things continue to behave as the shell expects. Additionally, when tracking recursion, the binfmt handlers should not be involved. The recursion being tracked is the depth of calls through search_binary_handler(), so that function should be exclusively responsible for tracking the depth. Signed-off-by: Kees Cook Cc: halfdog Cc: P J P Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_em86.c | 1 - fs/binfmt_misc.c | 6 ------ fs/binfmt_script.c | 4 +--- fs/exec.c | 10 +++++----- 4 files changed, 6 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index b8e8b0a..4a1b984 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -42,7 +42,6 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->recursion_depth++; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 5463952..b2497d4 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -116,10 +116,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; - retval = -ENOEXEC; - if (bprm->recursion_depth > BINPRM_MAX_RECURSION) - goto _ret; - /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); @@ -200,8 +196,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval < 0) goto _error; - bprm->recursion_depth++; - retval = search_binary_handler (bprm, regs); if (retval < 0) goto _error; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index e39c18a..211ede0 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -22,15 +22,13 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) char interp[BINPRM_BUF_SIZE]; int retval; - if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || - (bprm->recursion_depth > BINPRM_MAX_RECURSION)) + if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; /* * This section does the #! interpretation. * Sorta complicated, but hopefully it will work. -TYT */ - bprm->recursion_depth++; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/exec.c b/fs/exec.c index 08f3e4e..3801daf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1369,6 +1369,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) int try,retval; struct linux_binfmt *fmt; + /* This allows 4 levels of binfmt rewrites before failing hard. */ + if (depth > 5) + return -ELOOP; + retval = security_bprm_check(bprm); if (retval) return retval; @@ -1387,12 +1391,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); + bprm->recursion_depth = depth + 1; retval = fn(bprm, regs); - /* - * Restore the depth counter to its starting value - * in this call, so we don't have to rely on every - * load_binary function to restore it on return. - */ bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) -- cgit v1.1 From f366c8f271888f48e15cc7c0ab70f184c220c8a4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 20 Mar 2013 23:25:24 +0800 Subject: sysfs: fix race between readdir and lseek commit 991f76f837bf22c5bb07261cfd86525a0a96650c upstream. While readdir() is running, lseek() may set filp->f_pos as zero, then may leave filp->private_data pointing to one sysfs_dirent object without holding its reference counter, so the sysfs_dirent object may be used after free in next readdir(). This patch holds inode->i_mutex to avoid the problem since the lock is always held in readdir path. Reported-by: Dave Jones Tested-by: Sasha Levin Signed-off-by: Ming Lei Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 567b3db..4afb70a 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -955,10 +955,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) return 0; } +static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_path.dentry->d_inode; + loff_t ret; + + mutex_lock(&inode->i_mutex); + ret = generic_file_llseek(file, offset, whence); + mutex_unlock(&inode->i_mutex); + + return ret; +} const struct file_operations sysfs_dir_operations = { .read = generic_read_dir, .readdir = sysfs_readdir, .release = sysfs_dir_release, - .llseek = generic_file_llseek, + .llseek = sysfs_dir_llseek, }; -- cgit v1.1 From b76c1eabd474cd44937fc60a26be2b926a366e55 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 20 Mar 2013 23:25:25 +0800 Subject: sysfs: handle failure path correctly for readdir() commit e5110f411d2ee35bf8d202ccca2e89c633060dca upstream. In case of 'if (filp->f_pos == 0 or 1)' of sysfs_readdir(), the failure from filldir() isn't handled, and the reference counter of the sysfs_dirent object pointed by filp->private_data will be released without clearing filp->private_data, so use after free bug will be triggered later. This patch returns immeadiately under the situation for fixing the bug, and it is reasonable to return from readdir() when filldir() fails. Reported-by: Dave Jones Tested-by: Sasha Levin Signed-off-by: Ming Lei Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 4afb70a..7cbc585 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -917,6 +917,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } if (filp->f_pos == 1) { if (parent_sd->s_parent) @@ -925,6 +927,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } mutex_lock(&sysfs_mutex); for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); -- cgit v1.1 From 8c7028941242372574880e513207abdbe486c3e5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 26 Mar 2013 15:31:45 -0400 Subject: Btrfs: limit the global reserve to 512mb commit fdf30d1c1b386e1b73116cc7e0fb14e962b763b0 upstream. A user reported a problem where he was getting early ENOSPC with hundreds of gigs of free data space and 6 gigs of free metadata space. This is because the global block reserve was taking up the entire free metadata space. This is ridiculous, we have infrastructure in place to throttle if we start using too much of the global reserve, so instead of letting it get this huge just limit it to 512mb so that users can still get work done. This allowed the user to complete his rsync without issues. Thanks Reported-and-tested-by: Stefan Priebe Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e20a65..01220b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3786,7 +3786,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) spin_lock(&block_rsv->lock); spin_lock(&sinfo->lock); - block_rsv->size = num_bytes; + block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved + sinfo->bytes_readonly + -- cgit v1.1 From c938c22b48302eeb9a6f3cc83f223f37d98ba6f7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 19 Mar 2013 12:36:52 +0100 Subject: NFSv4: include bitmap in nfsv4 get acl data commit bf118a342f10dafe44b14451a1392c3254629a1f upstream. The NFSv4 bitmap size is unbounded: a server can return an arbitrary sized bitmap in an FATTR4_WORD0_ACL request. Replace using the nfs4_fattr_bitmap_maxsz as a guess to the maximum bitmask returned by a server with the inclusion of the bitmap (xdr length plus bitmasks) and the acl data xdr length to the (cached) acl page data. This is a general solution to commit e5012d1f "NFSv4.1: update nfs4_fattr_bitmap_maxsz" and fixes hitting a BUG_ON in xdr_shrink_bufhead when getting ACLs. Fix a bug in decode_getacl that returned -EINVAL on ACLs > page when getxattr was called with a NULL buffer, preventing ACL > PAGE_SIZE from being retrieved. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 96 +++++++++++++++++++++++++++++++++---------------------- fs/nfs/nfs4xdr.c | 31 +++++++++++++----- 2 files changed, 80 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3720caa..7090b21 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3440,19 +3440,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) */ #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) -static void buf_to_pages(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) -{ - const void *p = buf; - - *pgbase = offset_in_page(buf); - p -= *pgbase; - while (p < buf + buflen) { - *(pages++) = virt_to_page(p); - p += PAGE_CACHE_SIZE; - } -} - static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) { @@ -3549,9 +3536,19 @@ out: nfs4_set_cached_acl(inode, acl); } +/* + * The getxattr API returns the required buffer length when called with a + * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating + * the required buf. On a NULL buf, we send a page of data to the server + * guessing that the ACL request can be serviced by a page. If so, we cache + * up to the page of ACL data, and the 2nd call to getxattr is serviced by + * the cache. If not so, we throw away the page, and cache the required + * length. The next getxattr call will then produce another round trip to + * the server, this time with the input buf of the required size. + */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES]; + struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; struct nfs_getaclargs args = { .fh = NFS_FH(inode), .acl_pages = pages, @@ -3566,41 +3563,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - struct page *localpage = NULL; - int ret; + int ret = -ENOMEM, npages, i, acl_len = 0; - if (buflen < PAGE_SIZE) { - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - localpage = alloc_page(GFP_KERNEL); - resp_buf = page_address(localpage); - if (localpage == NULL) - return -ENOMEM; - args.acl_pages[0] = localpage; - args.acl_pgbase = 0; - args.acl_len = PAGE_SIZE; - } else { - resp_buf = buf; - buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + if (npages == 0) + npages = 1; + + for (i = 0; i < npages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free; + } + if (npages > 1) { + /* for decoding across pages */ + args.acl_scratch = alloc_page(GFP_KERNEL); + if (!args.acl_scratch) + goto out_free; } - ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); + args.acl_len = npages * PAGE_SIZE; + args.acl_pgbase = 0; + /* Let decode_getfacl know not to fail if the ACL data is larger than + * the page we send as a guess */ + if (buf == NULL) + res.acl_flags |= NFS4_ACL_LEN_REQUEST; + resp_buf = page_address(pages[0]); + + dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", + __func__, buf, buflen, npages, args.acl_len); + ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), + &msg, &args.seq_args, &res.seq_res, 0); if (ret) goto out_free; - if (res.acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, res.acl_len); + + acl_len = res.acl_len - res.acl_data_offset; + if (acl_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, acl_len); else - nfs4_write_cached_acl(inode, resp_buf, res.acl_len); + nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, + acl_len); if (buf) { ret = -ERANGE; - if (res.acl_len > buflen) + if (acl_len > buflen) goto out_free; - if (localpage) - memcpy(buf, resp_buf, res.acl_len); + _copy_from_pages(buf, pages, res.acl_data_offset, + res.acl_len); } - ret = res.acl_len; + ret = acl_len; out_free: - if (localpage) - __free_page(localpage); + for (i = 0; i < npages; i++) + if (pages[i]) + __free_page(pages[i]); + if (args.acl_scratch) + __free_page(args.acl_scratch); return ret; } @@ -3631,6 +3647,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) + /* -ENOENT is returned if there is no ACL or if there is an ACL + * but no cached acl data, just the acl length */ return ret; return nfs4_get_acl_uncached(inode, buf, buflen); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5fcc67b..c557734 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2374,11 +2374,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz + 1; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); + xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); + encode_nops(&hdr); } @@ -4714,17 +4716,18 @@ decode_restorefh(struct xdr_stream *xdr) } static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, - size_t *acl_len) + struct nfs_getaclres *res) { - __be32 *savep; + __be32 *savep, *bm_p; uint32_t attrlen, bitmap[2] = {0}; struct kvec *iov = req->rq_rcv_buf.head; int status; - *acl_len = 0; + res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + bm_p = xdr->p; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -4736,18 +4739,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, size_t hdrlen; u32 recvd; + /* The bitmap (xdr len + bitmaps) and the attr xdr len words + * are stored with the acl data to handle the problem of + * variable length bitmaps.*/ + xdr->p = bm_p; + res->acl_data_offset = be32_to_cpup(bm_p) + 2; + res->acl_data_offset <<= 2; + /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + attrlen += res->acl_data_offset; recvd = req->rq_rcv_buf.len - hdrlen; if (attrlen > recvd) { - dprintk("NFS: server cheating in getattr" - " acl reply: attrlen %u > recvd %u\n", + if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { + /* getxattr interface called with a NULL buf */ + res->acl_len = attrlen; + goto out; + } + dprintk("NFS: acl reply: attrlen %u > recvd %u\n", attrlen, recvd); return -EINVAL; } xdr_read_pages(xdr, attrlen); - *acl_len = attrlen; + res->acl_len = attrlen; } else status = -EOPNOTSUPP; @@ -5682,7 +5697,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getacl(xdr, rqstp, &res->acl_len); + status = decode_getacl(xdr, rqstp, res); out: return status; -- cgit v1.1 From 01b140abad66f022ff6dff7cc1307b07281035fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Mar 2013 12:36:53 +0100 Subject: NFSv4: Fix an Oops in the NFSv4 getacl code commit 331818f1c468a24e581aedcbe52af799366a9dfe upstream. Commit bf118a342f10dafe44b14451a1392c3254629a1f (NFSv4: include bitmap in nfsv4 get acl data) introduces the 'acl_scratch' page for the case where we may need to decode multi-page data. However it fails to take into account the fact that the variable may be NULL (for the case where we're not doing multi-page decode), and it also attaches it to the encoding xdr_stream rather than the decoding one. The immediate result is an Oops in nfs4_xdr_enc_getacl due to the call to page_address() with a NULL page pointer. Signed-off-by: Trond Myklebust Cc: Andy Adamson Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/nfs4xdr.c | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7090b21..50ec294 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3578,8 +3578,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu } if (npages > 1) { /* for decoding across pages */ - args.acl_scratch = alloc_page(GFP_KERNEL); - if (!args.acl_scratch) + res.acl_scratch = alloc_page(GFP_KERNEL); + if (!res.acl_scratch) goto out_free; } args.acl_len = npages * PAGE_SIZE; @@ -3615,8 +3615,8 @@ out_free: for (i = 0; i < npages; i++) if (pages[i]) __free_page(pages[i]); - if (args.acl_scratch) - __free_page(args.acl_scratch); + if (res.acl_scratch) + __free_page(res.acl_scratch); return ret; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c557734..4204e96 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2379,7 +2379,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); - xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); encode_nops(&hdr); } @@ -5688,6 +5687,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; + if (res->acl_scratch != NULL) { + void *p = page_address(res->acl_scratch); + xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); + } status = decode_compound_hdr(xdr, &hdr); if (status) goto out; -- cgit v1.1 From 2c34b4ae8f8228e1ec083be0333426eca4a31357 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Mar 2013 12:36:54 +0100 Subject: NFS: nfs_getaclargs.acl_len is a size_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 56d08fef2369d5ca9ad2e1fc697f5379fd8af751 upstream. Squelch compiler warnings: fs/nfs/nfs4proc.c: In function ‘__nfs4_get_acl_uncached’: fs/nfs/nfs4proc.c:3811:14: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] fs/nfs/nfs4proc.c:3818:15: warning: comparison between signed and unsigned integer expressions [-Wsign-compare] Introduced by commit bf118a34 "NFSv4: include bitmap in nfsv4 get acl data", Dec 7, 2011. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 50ec294..894e326 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3563,7 +3563,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - int ret = -ENOMEM, npages, i, acl_len = 0; + int ret = -ENOMEM, npages, i; + size_t acl_len = 0; npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* As long as we're doing a round trip to the server anyway, -- cgit v1.1 From 582b4c3dc62284aec367a3f4f74ce8101303e9c4 Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Mon, 1 Apr 2013 09:47:56 -0700 Subject: loop: prevent bdev freeing while device in use commit c1681bf8a7b1b98edee8b862a42c19c4e53205fd upstream. struct block_device lifecycle is defined by its inode (see fs/block_dev.c) - block_device allocated first time we access /dev/loopXX and deallocated on bdev_destroy_inode. When we create the device "losetup /dev/loopXX afile" we want that block_device stay alive until we destroy the loop device with "losetup -d". But because we do not hold /dev/loopXX inode its counter goes 0, and inode/bdev can be destroyed at any moment. Usually it happens at memory pressure or when user drops inode cache (like in the test below). When later in loop_clr_fd() we want to use bdev we have use-after-free error with following stack: BUG: unable to handle kernel NULL pointer dereference at 0000000000000280 bd_set_size+0x10/0xa0 loop_clr_fd+0x1f8/0x420 [loop] lo_ioctl+0x200/0x7e0 [loop] lo_compat_ioctl+0x47/0xe0 [loop] compat_blkdev_ioctl+0x341/0x1290 do_filp_open+0x42/0xa0 compat_sys_ioctl+0xc1/0xf20 do_sys_open+0x16e/0x1d0 sysenter_dispatch+0x7/0x1a To prevent use-after-free we need to grab the device in loop_set_fd() and put it later in loop_clr_fd(). The issue is reprodusible on current Linus head and v3.3. Here is the test: dd if=/dev/zero of=loop.file bs=1M count=1 while [ true ]; do losetup /dev/loop0 loop.file echo 2 > /proc/sys/vm/drop_caches losetup -d /dev/loop0 done [ Doing bdgrab/bput in loop_set_fd/loop_clr_fd is safe, because every time we call loop_set_fd() we check that loop_device->lo_state is Lo_unbound and set it to Lo_bound If somebody will try to set_fd again it will get EBUSY. And if we try to loop_clr_fd() on unbound loop device we'll get ENXIO. loop_set_fd/loop_clr_fd (and any other loop ioctl) is called under loop_device->lo_ctl_mutex. ] Signed-off-by: Anatol Pomozov Cc: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 77e8e5b..97e4cb5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -576,6 +576,7 @@ struct block_device *bdgrab(struct block_device *bdev) ihold(bdev->bd_inode); return bdev; } +EXPORT_SYMBOL(bdgrab); long nr_blockdev_pages(void) { -- cgit v1.1 From 7e36f505caf7882b6cc89ecedcd7f26749ef917a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 26 Mar 2013 14:11:13 -0400 Subject: nfsd4: reject "negative" acl lengths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 64a817cfbded8674f345d1117b117f942a351a69 upstream. Since we only enforce an upper bound, not a lower bound, a "negative" length can get through here. The symptom seen was a warning when we attempt to a kmalloc with an excessive size. Reported-by: Toralf Förster Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ecdd18a..59ac3f4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -262,7 +262,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace; + u32 nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; -- cgit v1.1 From 503f4bdcc078e7abee273a85ce322de81b18a224 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 11 Mar 2013 23:39:59 -0400 Subject: ext4: use atomic64_t for the per-flexbg free_clusters count commit 90ba983f6889e65a3b506b30dc606aa9d1d46cd2 upstream. A user who was using a 8TB+ file system and with a very large flexbg size (> 65536) could cause the atomic_t used in the struct flex_groups to overflow. This was detected by PaX security patchset: http://forums.grsecurity.net/viewtopic.php?f=3&t=3289&p=12551#p12551 This bug was introduced in commit 9f24e4208f7e, so it's been around since 2.6.30. :-( Fix this by using an atomic64_t for struct orlav_stats's free_clusters. [Backported for 3.0-stable. Renamed free_clusters back to free_blocks; fixed a few more atomic_read's of free_blocks left in 3.0.] Signed-off-by: "Theodore Ts'o" Reviewed-by: Lukas Czerner Signed-off-by: Lingzhu Xiang Reviewed-by: CAI Qian Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 6 +++--- fs/ext4/ialloc.c | 18 +++++++++--------- fs/ext4/mballoc.c | 10 +++++----- fs/ext4/resize.c | 4 ++-- fs/ext4/super.c | 4 ++-- 5 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e0113aa..2041de7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -288,9 +288,9 @@ struct ext4_group_desc */ struct flex_groups { - atomic_t free_inodes; - atomic_t free_blocks; - atomic_t used_dirs; + atomic64_t free_blocks; + atomic_t free_inodes; + atomic_t used_dirs; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 29272de..dd732c7 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -345,8 +345,8 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, ext4_group_t ngroups = ext4_get_groups_count(sb); int flex_size = ext4_flex_bg_size(sbi); ext4_group_t best_flex = parent_fbg_group; - int blocks_per_flex = sbi->s_blocks_per_group * flex_size; - int flexbg_free_blocks; + ext4_fsblk_t blocks_per_flex = sbi->s_blocks_per_group * flex_size; + ext4_fsblk_t flexbg_free_blocks; int flex_freeb_ratio; ext4_group_t n_fbg_groups; ext4_group_t i; @@ -355,7 +355,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, sbi->s_log_groups_per_flex; find_close_to_parent: - flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); + flexbg_free_blocks = atomic64_read(&flex_group[best_flex].free_blocks); flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; if (atomic_read(&flex_group[best_flex].free_inodes) && flex_freeb_ratio > free_block_ratio) @@ -370,7 +370,7 @@ find_close_to_parent: if (i == parent_fbg_group || i == parent_fbg_group - 1) continue; - flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); + flexbg_free_blocks = atomic64_read(&flex_group[i].free_blocks); flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; if (flex_freeb_ratio > free_block_ratio && @@ -380,14 +380,14 @@ find_close_to_parent: } if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || - ((atomic_read(&flex_group[i].free_blocks) > - atomic_read(&flex_group[best_flex].free_blocks)) && + ((atomic64_read(&flex_group[i].free_blocks) > + atomic64_read(&flex_group[best_flex].free_blocks)) && atomic_read(&flex_group[i].free_inodes))) best_flex = i; } if (!atomic_read(&flex_group[best_flex].free_inodes) || - !atomic_read(&flex_group[best_flex].free_blocks)) + !atomic64_read(&flex_group[best_flex].free_blocks)) return -1; found_flexbg: @@ -406,8 +406,8 @@ out: } struct orlov_stats { + __u64 free_blocks; __u32 free_inodes; - __u32 free_blocks; __u32 used_dirs; }; @@ -424,7 +424,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, if (flex_size > 1) { stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_blocks = atomic_read(&flex_group[g].free_blocks); + stats->free_blocks = atomic64_read(&flex_group[g].free_blocks); stats->used_dirs = atomic_read(&flex_group[g].used_dirs); return; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 31bbdb5..35959f6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2814,8 +2814,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); - atomic_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_sub(ac->ac_b_ex.fe_len, + &sbi->s_flex_groups[flex_group].free_blocks); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4614,7 +4614,7 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(count, &sbi->s_flex_groups[flex_group].free_blocks); } ext4_mb_unload_buddy(&e4b); @@ -4745,8 +4745,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(blocks_freed, - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(blocks_freed, + &sbi->s_flex_groups[flex_group].free_blocks); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 244100f..d2661aa 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -929,8 +929,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, input->group); - atomic_add(input->free_blocks_count, - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(input->free_blocks_count, + &sbi->s_flex_groups[flex_group].free_blocks); atomic_add(EXT4_INODES_PER_GROUP(sb), &sbi->s_flex_groups[flex_group].free_inodes); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c6a3363..e05cd34 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1992,8 +1992,8 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group = ext4_flex_group(sbi, i); atomic_add(ext4_free_inodes_count(sb, gdp), &sbi->s_flex_groups[flex_group].free_inodes); - atomic_add(ext4_free_blks_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_blocks); + atomic64_add(ext4_free_blks_count(sb, gdp), + &sbi->s_flex_groups[flex_group].free_blocks); atomic_add(ext4_used_dirs_count(sb, gdp), &sbi->s_flex_groups[flex_group].used_dirs); } -- cgit v1.1 From 1ae92500b8c8b67f02071c38ee0d54701e036e59 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 14 Mar 2013 10:49:23 +0200 Subject: UBIFS: make space fixup work in the remount case commit 67e753ca41782913d805ff4a8a2b0f60b26b7915 upstream. The UBIFS space fixup is a useful feature which allows to fixup the "broken" flash space at the time of the first mount. The "broken" space is usually the result of using a "dumb" industrial flasher which is not able to skip empty NAND pages and just writes all 0xFFs to the empty space, which has grave side-effects for UBIFS when UBIFS trise to write useful data to those empty pages. The fix-up feature works roughly like this: 1. mkfs.ubifs sets the fixup flag in UBIFS superblock when creating the image (see -F option) 2. when the file-system is mounted for the first time, UBIFS notices the fixup flag and re-writes the entire media atomically, which may take really a lot of time. 3. UBIFS clears the fixup flag in the superblock. This works fine when the file system is mounted R/W for the very first time. But it did not really work in the case when we first mount the file-system R/O, and then re-mount R/W. The reason was that we started the fixup procedure too late, which we cannot really do because we have to fixup the space before it starts being used. Signed-off-by: Artem Bityutskiy Reported-by: Mark Jackson Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/super.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 529be05..db04976 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1583,6 +1583,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->remounting_rw = 1; c->ro_mount = 0; + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + return err; + } + err = check_free_space(c); if (err) goto out; @@ -1699,12 +1705,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = dbg_check_space_info(c); } - if (c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - goto out; - } - mutex_unlock(&c->umount_mutex); return err; -- cgit v1.1 From 06cc39593b0a9fed3f8ecbed6d30d8c3368ccbe0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 29 Mar 2013 15:39:16 +0100 Subject: reiserfs: Fix warning and inode leak when deleting inode with xattrs commit 35e5cbc0af240778e61113286c019837e06aeec6 upstream. After commit 21d8a15a (lookup_one_len: don't accept . and ..) reiserfs started failing to delete xattrs from inode. This was due to a buggy test for '.' and '..' in fill_with_dentries() which resulted in passing '.' and '..' entries to lookup_one_len() in some cases. That returned error and so we failed to iterate over all xattrs of and inode. Fix the test in fill_with_dentries() along the lines of the one in lookup_one_len(). Reported-by: Pawel Zawora Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d780896..6e3ca4e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) return -ENOSPC; - if (name[0] == '.' && (name[1] == '\0' || - (name[1] == '.' && name[2] == '\0'))) + if (name[0] == '.' && (namelen < 2 || + (namelen == 2 && name[1] == '.'))) return 0; dentry = lookup_one_len(name, dbuf->xadir, namelen); -- cgit v1.1 From a58e3e13f5f8c03a88195c45771b2ef3a7a53f75 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Fri, 5 Apr 2013 16:38:53 -0700 Subject: ext4: fixup 64-bit divides in 3.0-stable backport of upstream fix Replace C division operators with div64_u64 for divides introduced in: commit 503f4bdcc078e7abee273a85ce322de81b18a224 ext4: use atomic64_t for the per-flexbg free_clusters count Specific to the 3.0-stable backport of the upstream patch. Signed-off-by: Todd Poynor Reviewed-by: "Theodore Ts'o" Cc: Christoph Biedl Cc: Lukas Czerner Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ialloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index dd732c7..443ffb8 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "ext4.h" @@ -356,7 +357,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, find_close_to_parent: flexbg_free_blocks = atomic64_read(&flex_group[best_flex].free_blocks); - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; + flex_freeb_ratio = div64_u64(flexbg_free_blocks * 100, blocks_per_flex); if (atomic_read(&flex_group[best_flex].free_inodes) && flex_freeb_ratio > free_block_ratio) goto found_flexbg; @@ -371,7 +372,7 @@ find_close_to_parent: continue; flexbg_free_blocks = atomic64_read(&flex_group[i].free_blocks); - flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; + flex_freeb_ratio = div64_u64(flexbg_free_blocks * 100, blocks_per_flex); if (flex_freeb_ratio > free_block_ratio && (atomic_read(&flex_group[i].free_inodes))) { -- cgit v1.1 From 7c72cbc040d7160849369d59166f7be32d1991fd Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Wed, 17 Apr 2013 15:58:33 -0700 Subject: hfsplus: fix potential overflow in hfsplus_file_truncate() commit 12f267a20aecf8b84a2a9069b9011f1661c779b4 upstream. Change a u32 to loff_t hfsplus_file_truncate(). Signed-off-by: Vyacheslav Dubeyko Cc: Christoph Hellwig Cc: Al Viro Cc: Hin-Tak Leung Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hfsplus/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index b1991a2..9d8c087 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -499,7 +499,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; int res; res = pagecache_write_begin(NULL, mapping, size, 0, -- cgit v1.1 From bb5489176fda28aff304b59e0405f7d7d4906224 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 22 Apr 2013 15:40:15 +0200 Subject: Revert "sysfs: fix race between readdir and lseek" This reverts commit 991f76f837bf22c5bb07261cfd86525a0a96650c in Linus' tree which is f366c8f271888f48e15cc7c0ab70f184c220c8a4 in linux-stable.git It depends on ef3d0fd27e90f ("vfs: do (nearly) lockless generic_file_llseek") which is available only in 3.2+. When applied on 3.0 codebase, it causes A-A deadlock, whenever anyone does seek() on sysfs, as both generic_file_llseek() and sysfs_dir_llseek() obtain i_mutex. Signed-off-by: Jiri Kosina Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 7cbc585..3ab78b8 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -959,21 +959,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) return 0; } -static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) -{ - struct inode *inode = file->f_path.dentry->d_inode; - loff_t ret; - - mutex_lock(&inode->i_mutex); - ret = generic_file_llseek(file, offset, whence); - mutex_unlock(&inode->i_mutex); - - return ret; -} - const struct file_operations sysfs_dir_operations = { .read = generic_read_dir, .readdir = sysfs_readdir, .release = sysfs_dir_release, - .llseek = sysfs_dir_llseek, + .llseek = generic_file_llseek, }; -- cgit v1.1 From 7a860c4dfb8ebaf777a7df185df4436ea343189a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Apr 2013 20:50:09 +0000 Subject: Btrfs: make sure nbytes are right after log replay commit 4bc4bee4595662d8bff92180d5c32e3313a704b0 upstream. While trying to track down a tree log replay bug I noticed that fsck was always complaining about nbytes not being right for our fsynced file. That is because the new fsync stuff doesn't wait for ordered extents to complete, so the inodes nbytes are not necessarily updated properly when we log it. So to fix this we need to set nbytes to whatever it is on the inode that is on disk, so when we replay the extents we can just add the bytes that are being added as we replay the extent. This makes it work for the case that we have the wrong nbytes or the case that we logged everything and nbytes is actually correct. With this I'm no longer getting nbytes errors out of btrfsck. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason Signed-off-by: Lingzhu Xiang Reviewed-by: CAI Qian Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index faf7d0b..88dec16 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -315,6 +315,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, unsigned long src_ptr; unsigned long dst_ptr; int overwrite_root = 0; + bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) overwrite_root = 1; @@ -324,6 +325,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, /* look for the key in the destination tree */ ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + if (ret == 0) { char *src_copy; char *dst_copy; @@ -365,6 +369,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } + /* + * We need to load the old nbytes into the inode so when we + * replay the extents we've logged we get the right nbytes. + */ + if (inode_item) { + struct btrfs_inode_item *item; + u64 nbytes; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nbytes = btrfs_inode_nbytes(path->nodes[0], item); + item = btrfs_item_ptr(eb, slot, + struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, nbytes); + } + } else if (inode_item) { + struct btrfs_inode_item *item; + + /* + * New inode, set nbytes to 0 so that the nbytes comes out + * properly when we replay the extents. + */ + item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, 0); } insert: btrfs_release_path(path); @@ -487,7 +515,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, u64 extent_end; u64 alloc_hint; u64 start = key->offset; - u64 saved_nbytes; + u64 nbytes = 0; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -497,10 +525,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, found_type = btrfs_file_extent_type(eb, item); if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) - extent_end = start + btrfs_file_extent_num_bytes(eb, item); - else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + nbytes = btrfs_file_extent_num_bytes(eb, item); + extent_end = start + nbytes; + + /* + * We don't add to the inodes nbytes if we are prealloc or a + * hole. + */ + if (btrfs_file_extent_disk_bytenr(eb, item) == 0) + nbytes = 0; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); + nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = (start + size + mask) & ~mask; } else { ret = 0; @@ -549,7 +586,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, inode, start, extent_end, &alloc_hint, 1); @@ -637,7 +673,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - inode_set_bytes(inode, saved_nbytes); + inode_add_bytes(inode, nbytes); btrfs_update_inode(trans, root, inode); out: if (inode) -- cgit v1.1 From 204435f3d3f96513892dfd13aa65298abfeed130 Mon Sep 17 00:00:00 2001 From: Anurup m Date: Mon, 29 Apr 2013 15:05:52 -0700 Subject: fs/fscache/stats.c: fix memory leak commit ec686c9239b4d472052a271c505d04dae84214cc upstream. There is a kernel memory leak observed when the proc file /proc/fs/fscache/stats is read. The reason is that in fscache_stats_open, single_open is called and the respective release function is not called during release. Hence fix with correct release function - single_release(). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=57101 Signed-off-by: Anurup m Cc: shyju pv Cc: Sanil kumar Cc: Nataraj m Cc: Li Zefan Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fscache/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190..73c0bd7 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -276,5 +276,5 @@ const struct file_operations fscache_stats_fops = { .open = fscache_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; -- cgit v1.1 From 3b5f7654971e0dcb6c422d14cbae7309686bb344 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 21 Apr 2013 18:01:06 -0400 Subject: LOCKD: Ensure that nlmclnt_block resets block->b_status after a server reboot commit 1dfd89af8697a299e7982ae740d4695ecd917eef upstream. After a server reboot, the reclaimer thread will recover all the existing locks. For locks that are blocked, however, it will change the value of block->b_status to nlm_lck_denied_grace_period in order to signal that they need to wake up and resend the original blocking lock request. Due to a bug, however, the block->b_status never gets reset after the blocked locks have been woken up, and so the process goes into an infinite loop of resends until the blocked lock is satisfied. Reported-by: Marc Eshel Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/lockd/clntlock.c | 3 +++ fs/lockd/clntproc.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 8d4ea83..de88922 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -141,6 +141,9 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) timeout); if (ret < 0) return -ERESTARTSYS; + /* Reset the lock status after a server reboot so we resend */ + if (block->b_status == nlm_lck_denied_grace_period) + block->b_status = nlm_lck_blocked; req->a_res.status = block->b_status; return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 5ee055e..e374050 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -550,9 +550,6 @@ again: status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; - /* Resend the blocking lock request after a server reboot */ - if (resp->status == nlm_lck_denied_grace_period) - continue; if (resp->status != nlm_lck_blocked) break; } -- cgit v1.1 From ebcd3f67c004ee5c51a9379d744e5546be73f227 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Apr 2013 15:34:05 -0400 Subject: NFSv4: Handle NFS4ERR_DELAY and NFS4ERR_GRACE in nfs4_open_delegation_recall commit 8b6cc4d6f841d31f72fe7478453759166d366274 upstream. A server shouldn't normally return NFS4ERR_GRACE if the client holds a delegation, since no conflicting lock reclaims can be granted, however the spec does not require the server to grant the open in this instance Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 894e326..7204bcc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1335,6 +1335,12 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -ENOMEM: err = 0; goto out; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + err = -EAGAIN; + goto out; } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); -- cgit v1.1 From bc2da6406bec3dfffde77426330468e40243b1ea Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 28 Mar 2013 20:37:14 -0400 Subject: nfsd4: don't close read-write opens too soon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0c7c3e67ab91ec6caa44bdf1fc89a48012ceb0c5 upstream. Don't actually close any opens until we don't need them at all. This means being left with write access when it's not really necessary, but that's better than putting a file that might still have posix locks held on it, as we have been. Reported-by: Toralf Förster Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 92f7eb7..4ec38df 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -189,13 +189,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - /* - * It's also safe to get rid of the RDWR open *if* - * we no longer have need of the other kind of access - * or if we already have the other kind of open: - */ - if (fp->fi_fds[1-oflag] - || atomic_read(&fp->fi_access[1 - oflag]) == 0) + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } -- cgit v1.1 From cfb0a900641f0d5c84bc1e68bbf3e312ae37c549 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 19 Apr 2013 16:09:38 -0400 Subject: nfsd: Decode and send 64bit time values commit bf8d909705e9d9bac31d9b8eac6734d2b51332a7 upstream. The seconds field of an nfstime4 structure is 64bit, but we are assuming that the first 32bits are zero-filled. So if the client tries to set atime to a value before the epoch (touch -t 196001010101), then the server will save the wrong value on disk. Signed-off-by: Bryan Schumaker Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 59ac3f4..401b356 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -342,10 +342,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_atime.tv_sec); + READ64(iattr->ia_atime.tv_sec); READ32(iattr->ia_atime.tv_nsec); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -368,10 +365,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_mtime.tv_sec); + READ64(iattr->ia_mtime.tv_sec); READ32(iattr->ia_mtime.tv_nsec); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -2148,8 +2142,7 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.atime.tv_sec); + WRITE64((s64)stat.atime.tv_sec); WRITE32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { @@ -2162,15 +2155,13 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_METADATA) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.ctime.tv_sec); + WRITE64((s64)stat.ctime.tv_sec); WRITE32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.mtime.tv_sec); + WRITE64((s64)stat.mtime.tv_sec); WRITE32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { -- cgit v1.1 From 8b715460ae5db65f37aefdd3d1330189e193f789 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 21 Apr 2013 20:32:03 -0400 Subject: ext4: fix Kconfig documentation for CONFIG_EXT4_DEBUG commit 7f3e3c7cfcec148ccca9c0dd2dbfd7b00b7ac10f upstream. Fox the Kconfig documentation for CONFIG_EXT4_DEBUG to match the change made by commit a0b30c1229: ext4: use module parameters instead of debugfs for mballoc_debug Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9ed1bb1..5459168 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -82,4 +82,5 @@ config EXT4_DEBUG Enables run-time debugging support for the ext4 filesystem. If you select Y here, then you will be able to turn on debugging - with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" + with a command such as: + echo 1 > /sys/module/ext4/parameters/mballoc_debug -- cgit v1.1 From eda948e04f7804886db45005eae5793a6ffb0bbc Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 6 May 2013 13:49:30 +0800 Subject: autofs - remove autofs dentry mount check commit ce8a5dbdf9e709bdaf4618d7ef8cceb91e8adc69 upstream. When checking if an autofs mount point is busy it isn't sufficient to only check if it's a mount point. For example, if the mount of an offset mountpoint in a tree is denied for this host by its export and the dentry becomes a process working directory the check incorrectly returns the mount as not in use at expire. This can happen since the default when mounting within a tree is nostrict, which means ingnore mount fails on mounts within the tree and continue. The nostrict option is meant to allow mounting in this case. Signed-off-by: David Jeffery Signed-off-by: Ian Kent Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/autofs4/expire.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'fs') diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 450f529..2c69d12 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -61,15 +61,6 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) goto done; - - /* - * Otherwise it's an offset mount and we need to check - * if we can umount its mount, if there is one. - */ - if (!d_mountpoint(path.dentry)) { - status = 0; - goto done; - } } /* Update the expiry counter if fs is busy */ -- cgit v1.1 From 7fb7465071b6f553c5c5cd8aca704cfc6896917c Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Sun, 5 May 2013 23:10:00 -0400 Subject: ext4: limit group search loop for non-extent files commit e6155736ad76b2070652745f9e54cdea3f0d8567 upstream. In the case where we are allocating for a non-extent file, we must limit the groups we allocate from to those below 2^32 blocks, and ext4_mb_regular_allocator() attempts to do this initially by putting a cap on ngroups for the subsequent search loop. However, the initial target group comes in from the allocation context (ac), and it may already be beyond the artificially limited ngroups. In this case, the limit if (group == ngroups) group = 0; at the top of the loop is never true, and the loop will run away. Catch this case inside the loop and reset the search to start at group 0. [sandeen@redhat.com: add commit msg & comments] Signed-off-by: Lachlan McIlroy Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 35959f6..cdb8414 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2026,7 +2026,11 @@ repeat: group = ac->ac_g_ex.fe_group; for (i = 0; i < ngroups; group++, i++) { - if (group == ngroups) + /* + * Artificially restricted ngroups for non-extent + * files makes group > ngroups possible on first loop. + */ + if (group >= ngroups) group = 0; /* This now checks without needing the buddy page */ -- cgit v1.1 From 3f9831881378bae530028b35a7a06c811441162d Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Mon, 6 May 2013 17:40:18 +0000 Subject: btrfs: don't stop searching after encountering the wrong item commit 03b71c6ca6286625d8f1ed44aabab9b5bf5dac10 upstream. The search ioctl skips items that are too large for a result buffer, but inline items of a certain size occuring before any search result is found would trigger an overflow and stop the search entirely. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=57641 Signed-off-by: Gabriel de Perthuis Signed-off-by: Josef Bacik Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a3c4751..a205027 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1539,7 +1539,11 @@ static noinline int copy_to_sk(struct btrfs_root *root, item_off = btrfs_item_ptr_offset(leaf, i); item_len = btrfs_item_size_nr(leaf, i); - if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + btrfs_item_key_to_cpu(leaf, key, i); + if (!key_in_sk(key, sk)) + continue; + + if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) item_len = 0; if (sizeof(sh) + item_len + *sk_offset > @@ -1548,10 +1552,6 @@ static noinline int copy_to_sk(struct btrfs_root *root, goto overflow; } - btrfs_item_key_to_cpu(leaf, key, i); - if (!key_in_sk(key, sk)) - continue; - sh.objectid = key->objectid; sh.offset = key->offset; sh.type = key->type; -- cgit v1.1 From dae84f25ac695ff740ce1ca2226e5451ba296fc3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 7 May 2013 11:28:31 -0400 Subject: cifs: only set ops for inodes in I_NEW state commit c2b93e0699723700f886ce17bb65ffd771195a6d upstream. It's generally not safe to reset the inode ops once they've been set. In the case where the inode was originally thought to be a directory and then later found to be a DFS referral, this can lead to an oops when we try to trigger an inode op on it after changing the ops to the blank referral operations. Reported-and-Tested-by: Sachin Prabhu Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 745e5cd..6f37228 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -173,7 +173,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL) inode->i_flags |= S_AUTOMOUNT; - cifs_set_ops(inode); + if (inode->i_state & I_NEW) + cifs_set_ops(inode); } void -- cgit v1.1 From 2b90057252cfde394556c989a0eb740ed6734389 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 24 May 2013 15:55:08 -0700 Subject: fat: fix possible overflow for fat_clusters commit 7b92d03c3239f43e5b86c9cc9630f026d36ee995 upstream. Intermediate value of fat_clusters can be overflowed on 32bits arch. Reported-by: Krzysztof Strasburger Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/fat/inode.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fat/inode.c b/fs/fat/inode.c index cb8d839..11a9744 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1236,6 +1236,19 @@ static int fat_read_root(struct inode *inode) return 0; } +static unsigned long calc_fat_clusters(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); + + /* Divide first to avoid overflow */ + if (sbi->fat_bits != 12) { + unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits; + return ent_per_sec * sbi->fat_length; + } + + return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; +} + /* * Read the super block of an MS-DOS FS. */ @@ -1433,7 +1446,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; /* check that FAT table does not overflow */ - fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; + fat_clusters = calc_fat_clusters(sb); total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); if (total_clusters > MAX_FAT(sb)) { if (!silent) -- cgit v1.1 From 70d53e5acf842b00efe048846aaf49d1f664b23d Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 24 May 2013 15:55:34 -0700 Subject: ocfs2: goto out_unlock if ocfs2_get_clusters_nocache() failed in ocfs2_fiemap() commit b4ca2b4b577c3530e34dcfaafccb2cc680ce95d1 upstream. Last time we found there is lock/unlock bug in ocfs2_file_aio_write, and then we did a thorough search for all lock resources in ocfs2_inode_info, including rw, inode and open lockres and found this bug. My kernel version is 3.0.13, and it is also in the lastest version 3.9. In ocfs2_fiemap, once ocfs2_get_clusters_nocache failed, it should goto out_unlock instead of out, because we need release buffer head, up read alloc sem and unlock inode. Signed-off-by: Joseph Qi Reviewed-by: Jie Liu Cc: Mark Fasheh Cc: Joel Becker Acked-by: Sunil Mushran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/extent_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 23457b4..5941284 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -791,7 +791,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, &hole_size, &rec, &is_last); if (ret) { mlog_errno(ret); - goto out; + goto out_unlock; } if (rec.e_blkno == 0ULL) { -- cgit v1.1 From ed2bfaffd22f37e669feea0a97ab81d187ccb768 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 24 May 2013 15:55:29 -0700 Subject: nilfs2: fix issue of nilfs_set_page_dirty() for page at EOF boundary commit 136e8770cd5d1fe38b3c613100dd6dc4db6d4fa6 upstream. nilfs2: fix issue of nilfs_set_page_dirty for page at EOF boundary DESCRIPTION: There are use-cases when NILFS2 file system (formatted with block size lesser than 4 KB) can be remounted in RO mode because of encountering of "broken bmap" issue. The issue was reported by Anthony Doggett : "The machine I've been trialling nilfs on is running Debian Testing, Linux version 3.2.0-4-686-pae (debian-kernel@lists.debian.org) (gcc version 4.6.3 (Debian 4.6.3-14) ) #1 SMP Debian 3.2.35-2), but I've also reproduced it (identically) with Debian Unstable amd64 and Debian Experimental (using the 3.8-trunk kernel). The problematic partitions were formatted with "mkfs.nilfs2 -b 1024 -B 8192"." SYMPTOMS: (1) System log contains error messages likewise: [63102.496756] nilfs_direct_assign: invalid pointer: 0 [63102.496786] NILFS error (device dm-17): nilfs_bmap_assign: broken bmap (inode number=28) [63102.496798] [63102.524403] Remounting filesystem read-only (2) The NILFS2 file system is remounted in RO mode. REPRODUSING PATH: (1) Create volume group with name "unencrypted" by means of vgcreate utility. (2) Run script (prepared by Anthony Doggett ): ----------------[BEGIN SCRIPT]-------------------- VG=unencrypted lvcreate --size 2G --name ntest $VG mkfs.nilfs2 -b 1024 -B 8192 /dev/mapper/$VG-ntest mkdir /var/tmp/n mkdir /var/tmp/n/ntest mount /dev/mapper/$VG-ntest /var/tmp/n/ntest mkdir /var/tmp/n/ntest/thedir cd /var/tmp/n/ntest/thedir sleep 2 date darcs init sleep 2 dmesg|tail -n 5 date darcs whatsnew || true date sleep 2 dmesg|tail -n 5 ----------------[END SCRIPT]-------------------- REPRODUCIBILITY: 100% INVESTIGATION: As it was discovered, the issue takes place during segment construction after executing such sequence of user-space operations: open("_darcs/index", O_RDWR|O_CREAT|O_NOCTTY, 0666) = 7 fstat(7, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0 ftruncate(7, 60) The error message "NILFS error (device dm-17): nilfs_bmap_assign: broken bmap (inode number=28)" takes place because of trying to get block number for third block of the file with logical offset #3072 bytes. As it is possible to see from above output, the file has 60 bytes of the whole size. So, it is enough one block (1 KB in size) allocation for the whole file. Trying to operate with several blocks instead of one takes place because of discovering several dirty buffers for this file in nilfs_segctor_scan_file() method. The root cause of this issue is in nilfs_set_page_dirty function which is called just before writing to an mmapped page. When nilfs_page_mkwrite function handles a page at EOF boundary, it fills hole blocks only inside EOF through __block_page_mkwrite(). The __block_page_mkwrite() function calls set_page_dirty() after filling hole blocks, thus nilfs_set_page_dirty function (= a_ops->set_page_dirty) is called. However, the current implementation of nilfs_set_page_dirty() wrongly marks all buffers dirty even for page at EOF boundary. As a result, buffers outside EOF are inconsistently marked dirty and queued for write even though they are not mapped with nilfs_get_block function. FIX: This modifies nilfs_set_page_dirty() not to mark hole blocks dirty. Thanks to Vyacheslav Dubeyko for his effort on analysis and proposals for this issue. Signed-off-by: Ryusuke Konishi Reported-by: Anthony Doggett Reported-by: Vyacheslav Dubeyko Cc: Vyacheslav Dubeyko Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/inode.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b9b45fc..373cd7b 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -195,13 +195,32 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) static int nilfs_set_page_dirty(struct page *page) { - int ret = __set_page_dirty_buffers(page); + int ret = __set_page_dirty_nobuffers(page); - if (ret) { + if (page_has_buffers(page)) { struct inode *inode = page->mapping->host; - unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); + unsigned nr_dirty = 0; + struct buffer_head *bh, *head; - nilfs_set_file_dirty(inode, nr_dirty); + /* + * This page is locked by callers, and no other thread + * concurrently marks its buffers dirty since they are + * only dirtied through routines in fs/buffer.c in + * which call sites of mark_buffer_dirty are protected + * by page lock. + */ + bh = head = page_buffers(page); + do { + /* Do not mark hole blocks dirty */ + if (buffer_dirty(bh) || !buffer_mapped(bh)) + continue; + + set_buffer_dirty(bh); + nr_dirty++; + } while (bh = bh->b_this_page, bh != head); + + if (nr_dirty) + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } -- cgit v1.1 From 1b968601f5b7e98cf519684589f853768d377144 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 24 May 2013 07:40:04 -0400 Subject: cifs: fix potential buffer overrun when composing a new options string commit 166faf21bd14bc5c5295a44874bf7f3930c30b20 upstream. Consider the case where we have a very short ip= string in the original mount options, and when we chase a referral we end up with a very long IPv6 address. Be sure to allow for that possibility when estimating the size of the string to allocate. Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_dfs_ref.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 51feb1a..993384e4 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "cifsglob.h" #include "cifsproto.h" #include "cifsfs.h" @@ -149,7 +150,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata, * assuming that we have 'unc=' and 'ip=' in * the original sb_mountdata */ - md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12; + md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12 + + INET6_ADDRSTRLEN; mountdata = kzalloc(md_len+1, GFP_KERNEL); if (mountdata == NULL) { rc = -ENOMEM; -- cgit v1.1 From 3b1a317977a8d16d743e8f85ed9a5a7f668b61d5 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 1 May 2013 11:08:38 -0500 Subject: jfs: fix a couple races commit 73aaa22d5ffb2630456bac2f9a4ed9b81d0d7271 upstream. This patch fixes races uncovered by xfstests testcase 068. One race is the result of jfs_sync() trying to write a sync point to the journal after it has been frozen (or possibly in the process). Since freezing sync's the journal, there is no need to write a sync point so we simply want to return. The second involves jfs_write_inode() being called on a deleted inode. It calls jfs_flush_journal which is held up by the jfs_commit thread doing the final iput on the same deleted inode, which itself is waiting for the I_SYNC flag to be cleared. jfs_write_inode need not do anything when i_nlink is zero, which is the easy fix. Reported-by: Michael L. Semon Signed-off-by: Dave Kleikamp Signed-off-by: Greg Kroah-Hartman --- fs/jfs/inode.c | 2 +- fs/jfs/jfs_logmgr.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 1096559..09100b4 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -125,7 +125,7 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int wait = wbc->sync_mode == WB_SYNC_ALL; - if (test_cflag(COMMIT_Nolink, inode)) + if (inode->i_nlink == 0) return 0; /* * If COMMIT_DIRTY is not set, the inode isn't really dirty. diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 583636f..ee55e45 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1057,7 +1057,8 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) */ void jfs_syncpt(struct jfs_log *log, int hard_sync) { LOG_LOCK(log); - lmLogSync(log, hard_sync); + if (!test_bit(log_QUIESCE, &log->flag)) + lmLogSync(log, hard_sync); LOG_UNLOCK(log); } -- cgit v1.1 From 088d9d4d782d66941fc9e29ff4eb0dd72f84493b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 22 Feb 2013 15:31:00 -0800 Subject: ceph: fix statvfs fr_size commit 92a49fb0f79f3300e6e50ddf56238e70678e4202 upstream. Different versions of glibc are broken in different ways, but the short of it is that for the time being, frsize should == bsize, and be used as the multiple for the blocks, free, and available fields. This mirrors what is done for NFS. The previous reporting of the page size for frsize meant that newer glibc and df would report a very small value for the fs size. Fixes http://tracker.ceph.com/issues/3793. Signed-off-by: Sage Weil Reviewed-by: Greg Farnum Signed-off-by: Greg Kroah-Hartman --- fs/ceph/super.c | 7 ++++++- fs/ceph/super.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f2f77fd..1775022 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -70,8 +70,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. + * + * NOTE: for the time being, we make bsize == frsize to humor + * not-yet-ancient versions of glibc that are broken. + * Someday, we will probably want to report a real block + * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >> (CEPH_BLOCK_SHIFT-10); @@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f5cabef..9091926 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -21,7 +21,7 @@ /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ -#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ +#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ -- cgit v1.1 From 5d28835c59b02d4389003bbfa7c4276375f39ed6 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 20 Jun 2013 11:36:28 +0200 Subject: perf: Disable monitoring on setuid processes for regular users commit 2976b10f05bd7f6dab9f9e7524451ddfed656a89 upstream. There was a a bug in setup_new_exec(), whereby the test to disabled perf monitoring was not correct because the new credentials for the process were not yet committed and therefore the get_dumpable() test was never firing. The patch fixes the problem by moving the perf_event test until after the credentials are committed. Signed-off-by: Stephane Eranian Tested-by: Jiri Olsa Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 3801daf..9ab31ca 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1149,13 +1149,6 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - /* - * Flush performance counters when crossing a - * security domain: - */ - if (!get_dumpable(current->mm)) - perf_event_exit_task(current); - /* An exec changes our domain. We are no longer part of the thread group */ @@ -1219,6 +1212,15 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; + + /* + * Disable monitoring for regular users + * when executing setuid binaries. Must + * wait until new credentials are committed + * by commit_creds() above + */ + if (get_dumpable(current->mm) != SUID_DUMP_USER) + perf_event_exit_task(current); /* * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's -- cgit v1.1 From 1f4f917e5e882de1e701d7b26955884ed16604d5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 28 Jun 2013 14:15:14 +0300 Subject: UBIFS: prepare to fix a horrid bug commit 33f1a63ae84dfd9ad298cf275b8f1887043ced36 upstream. Al Viro pointed me to the fact that '->readdir()' and '->llseek()' have no mutual exclusion, which means the 'ubifs_dir_llseek()' can be run while we are in the middle of 'ubifs_readdir()'. First of all, this means that 'file->private_data' can be freed while 'ubifs_readdir()' uses it. But this particular patch does not fix the problem. This patch is only a preparation, and the fix will follow next. In this patch we make 'ubifs_readdir()' stop using 'file->f_pos' directly, because 'file->f_pos' can be changed by '->llseek()' at any point. This may lead 'ubifs_readdir()' to returning inconsistent data: directory entry names may correspond to incorrect file positions. So here we introduce a local variable 'pos', read 'file->f_pose' once at very the beginning, and then stick to 'pos'. The result of this is that when 'ubifs_dir_llseek()' changes 'file->f_pos' while we are in the middle of 'ubifs_readdir()', the latter "wins". Reported-by: Al Viro Tested-by: Artem Bityutskiy Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/dir.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef5abd3..8ecabb1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -355,15 +355,16 @@ static unsigned int vfs_dent_type(uint8_t type) static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) { int err, over = 0; + loff_t pos = file->f_pos; struct qstr nm; union ubifs_key key; struct ubifs_dent_node *dent; struct inode *dir = file->f_path.dentry->d_inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, pos); - if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) + if (pos > UBIFS_S_KEY_HASH_MASK || pos == 2) /* * The directory was seek'ed to a senseless position or there * are no more entries. @@ -371,15 +372,15 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) return 0; /* File positions 0 and 1 correspond to "." and ".." */ - if (file->f_pos == 0) { + if (pos == 0) { ubifs_assert(!file->private_data); over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); if (over) return 0; - file->f_pos = 1; + file->f_pos = pos = 1; } - if (file->f_pos == 1) { + if (pos == 1) { ubifs_assert(!file->private_data); over = filldir(dirent, "..", 2, 1, parent_ino(file->f_path.dentry), DT_DIR); @@ -395,7 +396,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -403,17 +404,16 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. - * Find the entry corresponding to @file->f_pos or the - * closest one. + * Find the entry corresponding to @pos or the closest one. */ - dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); + dent_key_init_hash(c, &key, dir->i_ino, pos); nm.name = NULL; dent = ubifs_tnc_next_ent(c, &key, &nm); if (IS_ERR(dent)) { err = PTR_ERR(dent); goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -425,7 +425,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) ubifs_inode(dir)->creat_sqnum); nm.len = le16_to_cpu(dent->nlen); - over = filldir(dirent, dent->name, nm.len, file->f_pos, + over = filldir(dirent, dent->name, nm.len, pos, le64_to_cpu(dent->inum), vfs_dent_type(dent->type)); if (over) @@ -441,7 +441,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) } kfree(file->private_data); - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; cond_resched(); } -- cgit v1.1 From c6c46477761504a89d5c68331f3b86fe31b51338 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 28 Jun 2013 14:15:15 +0300 Subject: UBIFS: fix a horrid bug commit 605c912bb843c024b1ed173dc427cd5c08e5d54d upstream. Al Viro pointed me to the fact that '->readdir()' and '->llseek()' have no mutual exclusion, which means the 'ubifs_dir_llseek()' can be run while we are in the middle of 'ubifs_readdir()'. This means that 'file->private_data' can be freed while 'ubifs_readdir()' uses it, and this is a very bad bug: not only 'ubifs_readdir()' can return garbage, but this may corrupt memory and lead to all kinds of problems like crashes an security holes. This patch fixes the problem by using the 'file->f_version' field, which '->llseek()' always unconditionally sets to zero. We set it to 1 in 'ubifs_readdir()' and whenever we detect that it became 0, we know there was a seek and it is time to clear the state saved in 'file->private_data'. I tested this patch by writing a user-space program which runds readdir and seek in parallell. I could easily crash the kernel without these patches, but could not crash it with these patches. Reported-by: Al Viro Tested-by: Artem Bityutskiy Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/dir.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 8ecabb1..936a038 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -371,6 +371,24 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) */ return 0; + if (file->f_version == 0) { + /* + * The file was seek'ed, which means that @file->private_data + * is now invalid. This may also be just the first + * 'ubifs_readdir()' invocation, in which case + * @file->private_data is NULL, and the below code is + * basically a no-op. + */ + kfree(file->private_data); + file->private_data = NULL; + } + + /* + * 'generic_file_llseek()' unconditionally sets @file->f_version to + * zero, and we use this for detecting whether the file was seek'ed. + */ + file->f_version = 1; + /* File positions 0 and 1 correspond to "." and ".." */ if (pos == 0) { ubifs_assert(!file->private_data); @@ -444,6 +462,14 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; cond_resched(); + + if (file->f_version == 0) + /* + * The file was seek'ed meanwhile, lets return and start + * reading direntries from the new position on the next + * invocation. + */ + return 0; } out: @@ -454,15 +480,13 @@ out: kfree(file->private_data); file->private_data = NULL; + /* 2 is a special value indicating that there are no more direntries */ file->f_pos = 2; return 0; } -/* If a directory is seeked, we have to free saved readdir() state */ static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) { - kfree(file->private_data); - file->private_data = NULL; return generic_file_llseek(file, offset, origin); } -- cgit v1.1 From c55035cb085d79311a429129183f43b85bac3994 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 4 Jul 2013 18:42:29 +0200 Subject: hpfs: better test for errors commit 3ebacb05044f82c5f0bb456a894eb9dc57d0ed90 upstream. The test if bitmap access is out of bound could errorneously pass if the device size is divisible by 16384 sectors and we are asking for one bitmap after the end. Check for invalid size in the superblock. Invalid size could cause integer overflows in the rest of the code. Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/hpfs/map.c | 3 ++- fs/hpfs/super.c | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index a790821..ea3d1ca 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -17,7 +17,8 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; - if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) { + unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; + if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 98580a3..f760c15 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -553,7 +553,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_cp_table = NULL; sbi->sb_c_bitmap = -1; sbi->sb_max_fwd_alloc = 0xffffff; - + + if (sbi->sb_fs_size >= 0x80000000) { + hpfs_error(s, "invalid size in superblock: %08x", + (unsigned)sbi->sb_fs_size); + goto bail4; + } + /* Load bitmap directory */ if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) goto bail4; -- cgit v1.1 From 5583db3830ede897a8e4709452bcf0bc5266bdd9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Jun 2013 11:48:11 -0400 Subject: nfsd4: fix decoding of compounds across page boundaries commit 247500820ebd02ad87525db5d9b199e5b66f6636 upstream. A freebsd NFSv4.0 client was getting rare IO errors expanding a tarball. A network trace showed the server returning BAD_XDR on the final getattr of a getattr+write+getattr compound. The final getattr started on a page boundary. I believe the Linux client ignores errors on the post-write getattr, and that that's why we haven't seen this before. Reported-by: Rick Macklem Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 401b356..45f53ae 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -159,8 +159,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ memcpy(p, argp->p, avail); /* step to next page */ - argp->p = page_address(argp->pagelist[0]); argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; -- cgit v1.1 From 23643c00e5d692fa53fc7630931e6694b02f27ef Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Jul 2013 08:12:40 -0400 Subject: jbd2: fix theoretical race in jbd2__journal_restart commit 39c04153fda8c32e85b51c96eb5511a326ad7609 upstream. Once we decrement transaction->t_updates, if this is the last handle holding the transaction from closing, and once we release the t_handle_lock spinlock, it's possible for the transaction to commit and be released. In practice with normal kernels, this probably won't happen, since the commit happens in a separate kernel thread and it's unlikely this could all happen within the space of a few CPU cycles. On the other hand, with a real-time kernel, this could potentially happen, so save the tid found in transaction->t_tid before we release t_handle_lock. It would require an insane configuration, such as one where the jbd2 thread was set to a very high real-time priority, perhaps because a high priority real-time thread is trying to read or write to a file system. But some people who use real-time kernels have been known to do insane things, including controlling laser-wielding industrial robots. :-) Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4ef2aae..26e11db 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -469,10 +469,10 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) &transaction->t_outstanding_credits); if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); + tid = transaction->t_tid; spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) -- cgit v1.1 From 791750989d8eba46434f2c0b02154ace47be6c8e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 1 Jul 2013 08:12:38 -0400 Subject: ext3,ext4: don't mess with dir_file->f_pos in htree_dirblock_to_tree() commit 64cb927371cd2ec43758d8a094a003d27bc3d0dc upstream. Both ext3 and ext4 htree_dirblock_to_tree() is just filling the in-core rbtree for use by call_filldir(). All updates of ->f_pos are done by the latter; bumping it here (on error) is obviously wrong - we might very well have it nowhere near the block we'd found an error in. Signed-off-by: Al Viro Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext3/namei.c | 7 ++----- fs/ext4/namei.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index e5a7111..8c9f82d 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -584,11 +584,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, (block<i_sb)) +((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse (bh); - return count; + /* silently ignore the rest of the block */ + break; } ext3fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 78585fc..8d3716f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -585,11 +585,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (ext4_check_dir_entry(dir, NULL, de, bh, (block<i_sb)) + ((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse(bh); - return count; + /* silently ignore the rest of the block */ + break; } ext4fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || -- cgit v1.1 From 3a32958d2ac96070c53d04bd8e013c97b260b5e6 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 3 Jul 2013 15:01:03 -0700 Subject: ocfs2: xattr: fix inlined xattr reflink commit ef962df057aaafd714f5c22ba3de1be459571fdf upstream. Inlined xattr shared free space of inode block with inlined data or data extent record, so the size of the later two should be adjusted when inlined xattr is enabled. See ocfs2_xattr_ibody_init(). But this isn't done well when reflink. For inode with inlined data, its max inlined data size is adjusted in ocfs2_duplicate_inline_data(), no problem. But for inode with data extent record, its record count isn't adjusted. Fix it, or data extent record and inlined xattr may overwrite each other, then cause data corruption or xattr failure. One panic caused by this bug in our test environment is the following: kernel BUG at fs/ocfs2/xattr.c:1435! invalid opcode: 0000 [#1] SMP Pid: 10871, comm: multi_reflink_t Not tainted 2.6.39-300.17.1.el5uek #1 RIP: ocfs2_xa_offset_pointer+0x17/0x20 [ocfs2] RSP: e02b:ffff88007a587948 EFLAGS: 00010283 RAX: 0000000000000000 RBX: 0000000000000010 RCX: 00000000000051e4 RDX: ffff880057092060 RSI: 0000000000000f80 RDI: ffff88007a587a68 RBP: ffff88007a587948 R08: 00000000000062f4 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000010 R13: ffff88007a587a68 R14: 0000000000000001 R15: ffff88007a587c68 FS: 00007fccff7f06e0(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000015cf000 CR3: 000000007aa76000 CR4: 0000000000000660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process multi_reflink_t Call Trace: ocfs2_xa_reuse_entry+0x60/0x280 [ocfs2] ocfs2_xa_prepare_entry+0x17e/0x2a0 [ocfs2] ocfs2_xa_set+0xcc/0x250 [ocfs2] ocfs2_xattr_ibody_set+0x98/0x230 [ocfs2] __ocfs2_xattr_set_handle+0x4f/0x700 [ocfs2] ocfs2_xattr_set+0x6c6/0x890 [ocfs2] ocfs2_xattr_user_set+0x46/0x50 [ocfs2] generic_setxattr+0x70/0x90 __vfs_setxattr_noperm+0x80/0x1a0 vfs_setxattr+0xa9/0xb0 setxattr+0xc3/0x120 sys_fsetxattr+0xa8/0xd0 system_call_fastpath+0x16/0x1b Signed-off-by: Junxiao Bi Reviewed-by: Jie Liu Acked-by: Joel Becker Cc: Mark Fasheh Cc: Sunil Mushran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/xattr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 81ecf9c..61a84cf 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6497,6 +6497,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(args->new_inode))) { + struct ocfs2_extent_list *el = &new_di->id2.i_list; + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); -- cgit v1.1 From d19c4370e3e590ed083c77238866719025476108 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:38:56 -0400 Subject: ext4: fix data offset overflow in ext4_xattr_fiemap() on 32-bit archs commit a60697f411eb365fb09e639e6f183fe33d1eb796 upstream. On 32-bit architectures with 32-bit sector_t computation of data offset in ext4_xattr_fiemap() can overflow resulting in reporting bogus data location. Fix the problem by typing block number to proper type before shifting. Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 354ba48..d432d37 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4155,7 +4155,7 @@ static int ext4_xattr_fiemap(struct inode *inode, error = ext4_get_inode_loc(inode, &iloc); if (error) return error; - physical = iloc.bh->b_blocknr << blockbits; + physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; @@ -4163,7 +4163,7 @@ static int ext4_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; + physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; } -- cgit v1.1 From 4b9cf8edf9d6203e0ed7a38844f8c3c35b101a61 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 31 May 2013 19:39:56 -0400 Subject: ext4: fix overflow when counting used blocks on 32-bit architectures commit 8af8eecc1331dbf5e8c662022272cf667e213da5 upstream. The arithmetics adding delalloc blocks to the number of used blocks in ext4_getattr() can easily overflow on 32-bit archs as we first multiply number of blocks by blocksize and then divide back by 512. Make the arithmetics more clever and also use proper type (unsigned long long instead of unsigned long). Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7e56946..5de8a27 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5481,7 +5481,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode; - unsigned long delalloc_blocks; + unsigned long long delalloc_blocks; inode = dentry->d_inode; generic_fillattr(inode, stat); @@ -5498,7 +5498,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, */ delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; - stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); return 0; } -- cgit v1.1 From a4c38fde90666a77d461fd352914f954232361ab Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 28 Jun 2013 16:04:02 +0200 Subject: writeback: Fix periodic writeback after fs mount commit a5faeaf9109578e65e1a32e2a3e76c8b47e7dcb6 upstream. Code in blkdev.c moves a device inode to default_backing_dev_info when the last reference to the device is put and moves the device inode back to its bdi when the first reference is acquired. This includes moving to wb.b_dirty list if the device inode is dirty. The code however doesn't setup timer to wake corresponding flusher thread and while wb.b_dirty list is non-empty __mark_inode_dirty() will not set it up either. Thus periodic writeback is effectively disabled until a sync(2) call which can lead to unexpected data loss in case of crash or power failure. Fix the problem by setting up a timer for periodic writeback in case we add the first dirty inode to wb.b_dirty list in bdev_inode_switch_bdi(). Reported-by: Bert De Jonghe Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 97e4cb5..b5eb8c1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -55,13 +55,21 @@ EXPORT_SYMBOL(I_BDEV); static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { + bool wakeup_bdi = false; + spin_lock(&inode_wb_list_lock); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&inode_wb_list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } sector_t blkdev_max_block(struct block_device *bdev) -- cgit v1.1 From a00c4c9f176094d7b71acd410f871b609f5e7c84 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Wed, 10 Jul 2013 13:19:50 -0400 Subject: lockd: protect nlm_blocked access in nlmsvc_retry_blocked commit 1c327d962fc420aea046c16215a552710bde8231 upstream. In nlmsvc_retry_blocked, the check that the list is non-empty and acquiring the pointer of the first entry is unprotected by any lock. This allows a rare race condition when there is only one entry on the list. A function such as nlmsvc_grant_callback() can be called, which will temporarily remove the entry from the list. Between the list_empty() and list_entry(),the list may become empty, causing an invalid pointer to be used as an nlm_block, leading to a possible crash. This patch adds the nlm_block_lock around these calls to prevent concurrent use of the nlm_blocked list. This was a regression introduced by f904be9cc77f361d37d71468b13ff3d1a1823dea "lockd: Mostly remove BKL from the server". Signed-off-by: David Jeffery Cc: Bryan Schumaker Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svclock.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 6e31695..db7be3a 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -913,6 +913,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -922,6 +923,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -931,7 +933,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } -- cgit v1.1 From dbb1314f28d3a5b561b9d3b7598ccf204f88472d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Jul 2013 03:13:55 +0400 Subject: livelock avoidance in sget() commit acfec9a5a892f98461f52ed5770de99a3e571ae2 upstream. Eric Sandeen has found a nasty livelock in sget() - take a mount(2) about to fail. The superblock is on ->fs_supers, ->s_umount is held exclusive, ->s_active is 1. Along comes two more processes, trying to mount the same thing; sget() in each is picking that superblock, bumping ->s_count and trying to grab ->s_umount. ->s_active is 3 now. Original mount(2) finally gets to deactivate_locked_super() on failure; ->s_active is 2, superblock is still ->fs_supers because shutdown will *not* happen until ->s_active hits 0. ->s_umount is dropped and now we have two processes chasing each other: s_active = 2, A acquired ->s_umount, B blocked A sees that the damn thing is stillborn, does deactivate_locked_super() s_active = 1, A drops ->s_umount, B gets it A restarts the search and finds the same superblock. And bumps it ->s_active. s_active = 2, B holds ->s_umount, A blocked on trying to get it ... and we are in the earlier situation with A and B switched places. The root cause, of course, is that ->s_active should not grow until we'd got MS_BORN. Then failing ->mount() will have deactivate_locked_super() shut the damn thing down. Fortunately, it's easy to do - the key point is that grab_super() is called only for superblocks currently on ->fs_supers, so it can bump ->s_count and grab ->s_umount first, then check MS_BORN and bump ->s_active; we must never increment ->s_count for superblocks past ->kill_sb(), but grab_super() is never called for those. The bug is pretty old; we would've caught it by now, if not for accidental exclusion between sget() for block filesystems; the things like cgroup or e.g. mtd-based filesystems don't have anything of that sort, so they get bitten. The right way to deal with that is obviously to fix sget()... Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/super.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index caf4dfa..a448af5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -222,19 +222,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -335,11 +335,6 @@ retry: destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -512,10 +507,10 @@ restart: if (list_empty(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); -- cgit v1.1 From 0157e289c1a4bdcf3ce388dd4024bf59336e38ce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Jul 2013 15:59:40 -0700 Subject: fanotify: info leak in copy_event_to_user() commit de1e0c40aceb9d5bff09c3a3b97b2f1b178af53f upstream. The ->reserved field isn't cleared so we leak one byte of stack information to userspace. Signed-off-by: Dan Carpenter Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- fs/notify/fanotify/fanotify_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9fde1c0..9860f6b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -118,6 +118,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->reserved = 0; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); if (unlikely(event->mask & FAN_Q_OVERFLOW)) -- cgit v1.1 From 1b48f57ff205805a81c56c7b480347349bf19620 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 26 Jul 2013 17:12:56 +0200 Subject: debugfs: debugfs_remove_recursive() must not rely on list_empty(d_subdirs) commit 776164c1faac4966ab14418bb0922e1820da1d19 upstream. debugfs_remove_recursive() is wrong, 1. it wrongly assumes that !list_empty(d_subdirs) means that this dir should be removed. This is not that bad by itself, but: 2. if d_subdirs does not becomes empty after __debugfs_remove() it gives up and silently fails, it doesn't even try to remove other entries. However ->d_subdirs can be non-empty because it still has the already deleted !debugfs_positive() entries. 3. simple_release_fs() is called even if __debugfs_remove() fails. Suppose we have dir1/ dir2/ file2 file1 and someone opens dir1/dir2/file2. Now, debugfs_remove_recursive(dir1/dir2) succeeds, and dir1/dir2 goes away. But debugfs_remove_recursive(dir1) silently fails and doesn't remove this directory. Because it tries to delete (the already deleted) dir1/dir2/file2 again and then fails due to "Avoid infinite loop" logic. Test-case: #!/bin/sh cd /sys/kernel/debug/tracing echo 'p:probe/sigprocmask sigprocmask' >> kprobe_events sleep 1000 < events/probe/sigprocmask/id & echo -n >| kprobe_events [ -d events/probe ] && echo "ERR!! failed to rm probe" And after that it is not possible to create another probe entry. With this patch debugfs_remove_recursive() skips !debugfs_positive() files although this is not strictly needed. The most important change is that it does not try to make ->d_subdirs empty, it simply scans the whole list(s) recursively and removes as much as possible. Link: http://lkml.kernel.org/r/20130726151256.GC19472@redhat.com Acked-by: Greg Kroah-Hartman Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 69 +++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e7a7a2f..eac5b7c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -380,8 +380,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove); */ void debugfs_remove_recursive(struct dentry *dentry) { - struct dentry *child; - struct dentry *parent; + struct dentry *child, *next, *parent; if (!dentry) return; @@ -391,61 +390,37 @@ void debugfs_remove_recursive(struct dentry *dentry) return; parent = dentry; + down: mutex_lock(&parent->d_inode->i_mutex); + list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { + if (!debugfs_positive(child)) + continue; - while (1) { - /* - * When all dentries under "parent" has been removed, - * walk up the tree until we reach our starting point. - */ - if (list_empty(&parent->d_subdirs)) { - mutex_unlock(&parent->d_inode->i_mutex); - if (parent == dentry) - break; - parent = parent->d_parent; - mutex_lock(&parent->d_inode->i_mutex); - } - child = list_entry(parent->d_subdirs.next, struct dentry, - d_u.d_child); - next_sibling: - - /* - * If "child" isn't empty, walk down the tree and - * remove all its descendants first. - */ + /* perhaps simple_empty(child) makes more sense */ if (!list_empty(&child->d_subdirs)) { mutex_unlock(&parent->d_inode->i_mutex); parent = child; - mutex_lock(&parent->d_inode->i_mutex); - continue; - } - __debugfs_remove(child, parent); - if (parent->d_subdirs.next == &child->d_u.d_child) { - /* - * Try the next sibling. - */ - if (child->d_u.d_child.next != &parent->d_subdirs) { - child = list_entry(child->d_u.d_child.next, - struct dentry, - d_u.d_child); - goto next_sibling; - } - - /* - * Avoid infinite loop if we fail to remove - * one dentry. - */ - mutex_unlock(&parent->d_inode->i_mutex); - break; + goto down; } - simple_release_fs(&debugfs_mount, &debugfs_mount_count); + up: + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); } - parent = dentry->d_parent; + mutex_unlock(&parent->d_inode->i_mutex); + child = parent; + parent = parent->d_parent; mutex_lock(&parent->d_inode->i_mutex); - __debugfs_remove(dentry, parent); + + if (child != dentry) { + next = list_entry(child->d_u.d_child.next, struct dentry, + d_u.d_child); + goto up; + } + + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); mutex_unlock(&parent->d_inode->i_mutex); - simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); -- cgit v1.1 From a9d8aaedf15cc981df0aebda88b113eeee9c5cab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 26 Mar 2012 09:55:29 -0400 Subject: cifs: silence compiler warnings showing up with gcc-4.7.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b2a3ad9ca502169fc4c11296fa20f56059c7c031 upstream. gcc-4.7.0 has started throwing these warnings when building cifs.ko. CC [M] fs/cifs/cifssmb.o fs/cifs/cifssmb.c: In function ‘CIFSSMBSetCIFSACL’: fs/cifs/cifssmb.c:3905:9: warning: array subscript is above array bounds [-Warray-bounds] fs/cifs/cifssmb.c: In function ‘CIFSSMBSetFileInfo’: fs/cifs/cifssmb.c:5711:8: warning: array subscript is above array bounds [-Warray-bounds] fs/cifs/cifssmb.c: In function ‘CIFSSMBUnixSetFileInfo’: fs/cifs/cifssmb.c:6001:25: warning: array subscript is above array bounds [-Warray-bounds] This patch cleans up the code a bit by using the offsetof macro instead of the funky "&pSMB->hdr.Protocol" construct. Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 07132c4..219933b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3473,13 +3473,12 @@ CIFSSMBSetCIFSACL(const int xid, struct cifs_tcon *tcon, __u16 fid, int rc = 0; int bytes_returned = 0; SET_SEC_DESC_REQ *pSMB = NULL; - NTRANSACT_RSP *pSMBr = NULL; + void *pSMBr; setCifsAclRetry: - rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, - (void **) &pSMBr); + rc = smb_init(SMB_COM_NT_TRANSACT, 19, tcon, (void **) &pSMB, &pSMBr); if (rc) - return (rc); + return rc; pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; @@ -3507,9 +3506,8 @@ setCifsAclRetry: pSMB->AclFlags = cpu_to_le32(CIFS_ACL_DACL); if (pntsd && acllen) { - memcpy((char *) &pSMBr->hdr.Protocol + data_offset, - (char *) pntsd, - acllen); + memcpy((char *)pSMBr + offsetof(struct smb_hdr, Protocol) + + data_offset, pntsd, acllen); inc_rfc1001_len(pSMB, byte_count + data_count); } else inc_rfc1001_len(pSMB, byte_count); @@ -5291,7 +5289,8 @@ CIFSSMBSetFileInfo(const int xid, struct cifs_tcon *tcon, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (char *) (&pSMB->hdr.Protocol) + offset; + data_offset = (char *)pSMB + + offsetof(struct smb_hdr, Protocol) + offset; count = sizeof(FILE_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); @@ -5560,7 +5559,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, u16 fid, u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; - FILE_UNIX_BASIC_INFO *data_offset; + char *data_offset; int rc = 0; u16 params, param_offset, offset, byte_count, count; @@ -5582,8 +5581,9 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; - data_offset = (FILE_UNIX_BASIC_INFO *) - ((char *)(&pSMB->hdr.Protocol) + offset); + data_offset = (char *)pSMB + + offsetof(struct smb_hdr, Protocol) + offset; + count = sizeof(FILE_UNIX_BASIC_INFO); pSMB->MaxParameterCount = cpu_to_le16(2); @@ -5605,7 +5605,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifs_tcon *tcon, inc_rfc1001_len(pSMB, byte_count); pSMB->ByteCount = cpu_to_le16(byte_count); - cifs_fill_unix_set_info(data_offset, args); + cifs_fill_unix_set_info((FILE_UNIX_BASIC_INFO *)data_offset, args); rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); if (rc) -- cgit v1.1 From 03b9342ba877075b024e3932b43afa68e5d3f0fd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 8 Nov 2012 16:09:37 -0800 Subject: vfs: d_obtain_alias() needs to use "/" as default name. commit b911a6bdeef5848c468597d040e3407e0aee04ce upstream. NFS appears to use d_obtain_alias() to create the root dentry rather than d_make_root. This can cause 'prepend_path()' to complain that the root has a weird name if an NFS filesystem is lazily unmounted. e.g. if "/mnt" is an NFS mount then { cd /mnt; umount -l /mnt ; ls -l /proc/self/cwd; } will cause a WARN message like WARNING: at /home/git/linux/fs/dcache.c:2624 prepend_path+0x1d7/0x1e0() ... Root dentry has weird name <> to appear in kernel logs. So change d_obtain_alias() to use "/" rather than "" as the anonymous name. Signed-off-by: NeilBrown Cc: Trond Myklebust Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro [bwh: Backported to 3.2: use named initialisers instead of QSTR_INIT()] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 8b64f38..ecc0742 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1563,7 +1563,7 @@ static struct dentry * d_find_any_alias(struct inode *inode) */ struct dentry *d_obtain_alias(struct inode *inode) { - static const struct qstr anonstring = { .name = "" }; + static const struct qstr anonstring = { .name = "/", .len = 1 }; struct dentry *tmp; struct dentry *res; -- cgit v1.1 From 637a641c5478377849d936fa276f92a4f418bb65 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Thu, 22 Aug 2013 16:35:44 -0700 Subject: nilfs2: remove double bio_put() in nilfs_end_bio_write() for BIO_EOPNOTSUPP error commit 2df37a19c686c2d7c4e9b4ce1505b5141e3e5552 upstream. Remove double call of bio_put() in nilfs_end_bio_write() for the case of BIO_EOPNOTSUPP error detection. The issue was found by Dan Carpenter and he suggests first version of the fix too. Signed-off-by: Vyacheslav Dubeyko Reported-by: Dan Carpenter Acked-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segbuf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 850a7c0..ec6df85 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -345,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - bio_put(bio); - /* to be detected by submit_seg_bio() */ + /* to be detected by nilfs_segbuf_submit_bio() */ } if (!uptodate) -- cgit v1.1 From 3c6766ab27147383b3a084d29cb87163342f14b0 Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Thu, 22 Aug 2013 16:35:45 -0700 Subject: nilfs2: fix issue with counting number of bio requests for BIO_EOPNOTSUPP error detection commit 4bf93b50fd04118ac7f33a3c2b8a0a1f9fa80bc9 upstream. Fix the issue with improper counting number of flying bio requests for BIO_EOPNOTSUPP error detection case. The sb_nbio must be incremented exactly the same number of times as complete() function was called (or will be called) because nilfs_segbuf_wait() will call wail_for_completion() for the number of times set to sb_nbio: do { wait_for_completion(&segbuf->sb_bio_event); } while (--segbuf->sb_nbio > 0); Two functions complete() and wait_for_completion() must be called the same number of times for the same sb_bio_event. Otherwise, wait_for_completion() will hang or leak. Signed-off-by: Vyacheslav Dubeyko Cc: Dan Carpenter Acked-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index ec6df85..07a666a 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -376,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); + segbuf->sb_nbio++; if (bio_flagged(bio, BIO_EOPNOTSUPP)) { bio_put(bio); err = -EOPNOTSUPP; goto failed; } - segbuf->sb_nbio++; bio_put(bio); wi->bio = NULL; -- cgit v1.1 From 59fb9f6b1f98c9ab912bbac7b55ba5bc2e40750b Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 15 Aug 2013 15:36:49 -0500 Subject: jfs: fix readdir cookie incompatibility with NFSv4 commit 44512449c0ab368889dd13ae0031fba74ee7e1d2 upstream. NFSv4 reserves readdir cookie values 0-2 for special entries (. and ..), but jfs allows a value of 2 for a non-special entry. This incompatibility can result in the nfs client reporting a readdir loop. This patch doesn't change the value stored internally, but adds one to the value exposed to the iterate method. Signed-off-by: Dave Kleikamp [bwh: Backported to 3.2: - Adjust context - s/ctx->pos/filp->f_pos/] Tested-by: Christian Kujau Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/jfs/jfs_dtree.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 9197a1b..b6f17c0 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3047,6 +3047,14 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) dir_index = (u32) filp->f_pos; + /* + * NFSv4 reserves cookies 1 and 2 for . and .. so we add + * the value we return to the vfs is one greater than the + * one we use internally. + */ + if (dir_index) + dir_index--; + if (dir_index > 1) { struct dir_table_slot dirtab_slot; @@ -3086,7 +3094,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (p->header.flag & BT_INTERNAL) { jfs_err("jfs_readdir: bad index table"); DT_PUTPAGE(mp); - filp->f_pos = -1; + filp->f_pos = DIREND; return 0; } } else { @@ -3094,7 +3102,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * self "." */ - filp->f_pos = 0; + filp->f_pos = 1; if (filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR)) return 0; @@ -3102,7 +3110,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * parent ".." */ - filp->f_pos = 1; + filp->f_pos = 2; if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) return 0; @@ -3123,24 +3131,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." + * pn = 0; index = 1: First entry "." + * pn = 0; index = 2: Second entry ".." * pn > 0: Real entries, pn=1 -> leftmost page * pn = index = -1: No more entries */ dtpos = filp->f_pos; - if (dtpos == 0) { + if (dtpos < 2) { /* build "." entry */ + filp->f_pos = 1; if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, DT_DIR)) return 0; - dtoffset->index = 1; + dtoffset->index = 2; filp->f_pos = dtpos; } if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { + if (dtoffset->index == 2) { /* build ".." entry */ if (filldir(dirent, "..", 2, filp->f_pos, @@ -3233,6 +3242,12 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } jfs_dirent->position = unique_pos++; } + /* + * We add 1 to the index because we may + * use a value of 2 internally, and NFSv4 + * doesn't like that. + */ + jfs_dirent->position++; } else { jfs_dirent->position = dtpos; len = min(d_namleft, DTLHDRDATALEN_LEGACY); -- cgit v1.1 From eb18ce5b78b1efb313a14532d2883420163e681a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 5 Aug 2013 17:55:01 -0700 Subject: SCSI: sg: Fix user memory corruption when SG_IO is interrupted by a signal commit 35dc248383bbab0a7203fca4d722875bc81ef091 upstream. There is a nasty bug in the SCSI SG_IO ioctl that in some circumstances leads to one process writing data into the address space of some other random unrelated process if the ioctl is interrupted by a signal. What happens is the following: - A process issues an SG_IO ioctl with direction DXFER_FROM_DEV (ie the underlying SCSI command will transfer data from the SCSI device to the buffer provided in the ioctl) - Before the command finishes, a signal is sent to the process waiting in the ioctl. This will end up waking up the sg_ioctl() code: result = wait_event_interruptible(sfp->read_wait, (srp_done(sfp, srp) || sdp->detached)); but neither srp_done() nor sdp->detached is true, so we end up just setting srp->orphan and returning to userspace: srp->orphan = 1; write_unlock_irq(&sfp->rq_list_lock); return result; /* -ERESTARTSYS because signal hit process */ At this point the original process is done with the ioctl and blithely goes ahead handling the signal, reissuing the ioctl, etc. - Eventually, the SCSI command issued by the first ioctl finishes and ends up in sg_rq_end_io(). At the end of that function, we run through: write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { if (sfp->keep_orphan) srp->sg_io_owned = 0; else done = 0; } srp->done = done; write_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (likely(done)) { /* Now wake up any sg_read() that is waiting for this * packet. */ wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); kref_put(&sfp->f_ref, sg_remove_sfp); } else { INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); schedule_work(&srp->ew.work); } Since srp->orphan *is* set, we set done to 0 (assuming the userspace app has not set keep_orphan via an SG_SET_KEEP_ORPHAN ioctl), and therefore we end up scheduling sg_rq_end_io_usercontext() to run in a workqueue. - In workqueue context we go through sg_rq_end_io_usercontext() -> sg_finish_rem_req() -> blk_rq_unmap_user() -> ... -> bio_uncopy_user() -> __bio_copy_iov() -> copy_to_user(). The key point here is that we are doing copy_to_user() on a workqueue -- that is, we're on a kernel thread with current->mm equal to whatever random previous user process was scheduled before this kernel thread. So we end up copying whatever data the SCSI command returned to the virtual address of the buffer passed into the original ioctl, but it's quite likely we do this copying into a different address space! As suggested by James Bottomley , add a check for current->mm (which is NULL if we're on a kernel thread without a real userspace address space) in bio_uncopy_user(), and skip the copy if we're on a kernel thread. There's no reason that I can think of for any caller of bio_uncopy_user() to want to do copying on a kernel thread with a random active userspace address space. Huge thanks to Costa Sapuntzakis for the original pointer to this bug in the sg code. Signed-off-by: Roland Dreier Tested-by: David Milburn Cc: Jens Axboe Signed-off-by: James Bottomley [lizf: backported to 3.4: - Use __bio_for_each_segment() instead of bio_for_each_segment_all()] Signed-off-by: Li Zefan Signed-off-by: Greg Kroah-Hartman --- fs/bio.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/bio.c b/fs/bio.c index 9bfade8..5a48044 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -786,12 +786,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - int ret = 0; + struct bio_vec *bvec; + int ret = 0, i; - if (!bio_flagged(bio, BIO_NULL_MAPPED)) - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, bio_data_dir(bio) == READ, - 0, bmd->is_our_pages); + if (!bio_flagged(bio, BIO_NULL_MAPPED)) { + /* + * if we're in a workqueue, the request is orphaned, so + * don't copy into a random user address space, just free. + */ + if (current->mm) + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); + else if (bmd->is_our_pages) + __bio_for_each_segment(bvec, bio, i, 0) + __free_page(bvec->bv_page); + } bio_free_map_data(bmd); bio_put(bio); return ret; -- cgit v1.1 From 186ad2276339b0fc63f06146627b6d15c5058c4f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Sep 2013 08:38:10 -0400 Subject: cifs: ensure that srv_mutex is held when dealing with ssocket pointer commit 73e216a8a42c0ef3d08071705c946c38fdbe12b0 upstream. Oleksii reported that he had seen an oops similar to this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 IP: [] sock_sendmsg+0x93/0xd0 PGD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: ipt_MASQUERADE xt_REDIRECT xt_tcpudp iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack ip_tables x_tables carl9170 ath usb_storage f2fs nfnetlink_log nfnetlink md4 cifs dns_resolver hid_generic usbhid hid af_packet uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_core videodev rfcomm btusb bnep bluetooth qmi_wwan qcserial cdc_wdm usb_wwan usbnet usbserial mii snd_hda_codec_hdmi snd_hda_codec_realtek iwldvm mac80211 coretemp intel_powerclamp kvm_intel kvm iwlwifi snd_hda_intel cfg80211 snd_hda_codec xhci_hcd e1000e ehci_pci snd_hwdep sdhci_pci snd_pcm ehci_hcd microcode psmouse sdhci thinkpad_acpi mmc_core i2c_i801 pcspkr usbcore hwmon snd_timer snd_page_alloc snd ptp rfkill pps_core soundcore evdev usb_common vboxnetflt(O) vboxdrv(O)Oops#2 Part8 loop tun binfmt_misc fuse msr acpi_call(O) ipv6 autofs4 CPU: 0 PID: 21612 Comm: kworker/0:1 Tainted: G W O 3.10.1SIGN #28 Hardware name: LENOVO 2306CTO/2306CTO, BIOS G2ET92WW (2.52 ) 02/22/2013 Workqueue: cifsiod cifs_echo_request [cifs] task: ffff8801e1f416f0 ti: ffff880148744000 task.ti: ffff880148744000 RIP: 0010:[] [] sock_sendmsg+0x93/0xd0 RSP: 0000:ffff880148745b00 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880148745b78 RCX: 0000000000000048 RDX: ffff880148745c90 RSI: ffff880181864a00 RDI: ffff880148745b78 RBP: ffff880148745c48 R08: 0000000000000048 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880181864a00 R13: ffff880148745c90 R14: 0000000000000048 R15: 0000000000000048 FS: 0000000000000000(0000) GS:ffff88021e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000088 CR3: 000000020c42c000 CR4: 00000000001407b0 Oops#2 Part7 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffff880148745b30 ffffffff810c4af9 0000004848745b30 ffff880181864a00 ffffffff81ffbc40 0000000000000000 ffff880148745c90 ffffffff810a5aab ffff880148745bc0 ffffffff81ffbc40 ffff880148745b60 ffffffff815a9fb8 Call Trace: [] ? finish_task_switch+0x49/0xe0 [] ? lock_timer_base.isra.36+0x2b/0x50 [] ? _raw_spin_unlock_irqrestore+0x18/0x40 [] ? try_to_del_timer_sync+0x4f/0x70 [] ? _raw_spin_unlock_bh+0x1f/0x30 [] kernel_sendmsg+0x37/0x50 [] smb_send_kvec+0xd0/0x1d0 [cifs] [] smb_send_rqst+0x83/0x1f0 [cifs] [] cifs_call_async+0xec/0x1b0 [cifs] [] ? free_rsp_buf+0x40/0x40 [cifs] Oops#2 Part6 [] SMB2_echo+0x8e/0xb0 [cifs] [] cifs_echo_request+0x79/0xa0 [cifs] [] process_one_work+0x173/0x4a0 [] worker_thread+0x121/0x3a0 [] ? manage_workers.isra.27+0x2b0/0x2b0 [] kthread+0xc0/0xd0 [] ? kthread_create_on_node+0x120/0x120 [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x120/0x120 Code: 84 24 b8 00 00 00 4c 89 f1 4c 89 ea 4c 89 e6 48 89 df 4c 89 60 18 48 c7 40 28 00 00 00 00 4c 89 68 30 44 89 70 14 49 8b 44 24 28 90 88 00 00 00 3d ef fd ff ff 74 10 48 8d 65 e0 5b 41 5c 41 RIP [] sock_sendmsg+0x93/0xd0 RSP CR2: 0000000000000088 The client was in the middle of trying to send a frame when the server->ssocket pointer got zeroed out. In most places, that we access that pointer, the srv_mutex is held. There's only one spot that I see that the server->ssocket pointer gets set and the srv_mutex isn't held. This patch corrects that. The upstream bug report was here: https://bugzilla.kernel.org/show_bug.cgi?id=60557 Reported-by: Oleksii Shevchuk Signed-off-by: Jeff Layton Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b775809..9e6ee47 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -158,6 +158,7 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); /* we should try only the port we connected to before */ + mutex_lock(&server->srv_mutex); rc = generic_ip_connect(server); if (rc) { cFYI(1, "reconnect error %d", rc); @@ -169,6 +170,7 @@ cifs_reconnect(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); } + mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; -- cgit v1.1 From 6becb5ec6dbf34850e39fd97860206c1628ccb94 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 11 Sep 2013 14:20:05 -0700 Subject: ocfs2: fix the end cluster offset of FIEMAP commit 28e8be31803b19d0d8f76216cb11b480b8a98bec upstream. Call fiemap ioctl(2) with given start offset as well as an desired mapping range should show extents if possible. However, we somehow figure out the end offset of mapping via 'mapping_end -= cpos' before iterating the extent records which would cause problems if the given fiemap length is too small to a cluster size, e.g, Cluster size 4096: debugfs.ocfs2 1.6.3 Block Size Bits: 12 Cluster Size Bits: 12 The extended fiemap test utility From David: https://gist.github.com/anonymous/6172331 # dd if=/dev/urandom of=/ocfs2/test_file bs=1M count=1000 # ./fiemap /ocfs2/test_file 4096 10 start: 4096, length: 10 File /ocfs2/test_file has 0 extents: # Logical Physical Length Flags ^^^^^ <-- No extent is shown In this case, at ocfs2_fiemap(): cpos == mapping_end == 1. Hence the loop of searching extent records was not executed at all. This patch remove the in question 'mapping_end -= cpos', and loops until the cpos is larger than the mapping_end as usual. # ./fiemap /ocfs2/test_file 4096 10 start: 4096, length: 10 File /ocfs2/test_file has 1 extents: # Logical Physical Length Flags 0: 0000000000000000 0000000056a01000 0000000006a00000 0000 Signed-off-by: Jie Liu Reported-by: David Weber Tested-by: David Weber Cc: Sunil Mushran Cc: Mark Fashen Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/extent_map.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 5941284..774a032 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -782,7 +782,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, cpos = map_start >> osb->s_clustersize_bits; mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, map_start + map_len); - mapping_end -= cpos; is_last = 0; while (cpos < mapping_end && !is_last) { u32 fe_flags; -- cgit v1.1 From cd76cc4459ac83aa17226ceac9058abbfe8385c8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 25 Jul 2013 11:49:11 +0200 Subject: isofs: Refuse RW mount of the filesystem instead of making it RO commit 17b7f7cf58926844e1dd40f5eb5348d481deca6a upstream. Refuse RW mount of isofs filesystem. So far we just silently changed it to RO mount but when the media is writeable, block layer won't notice this change and thus will think device is used RW and will block eject button of the drive. That is unexpected by users because for non-writeable media eject button works just fine. Userspace mount(8) command handles this just fine and retries mounting with MS_RDONLY set so userspace shouldn't see any regression. Plus any tool mounting isofs is likely confronted with the case of read-only media where block layer already refuses to mount the filesystem without MS_RDONLY set so our behavior shouldn't be anything new for it. Reported-by: Hui Wang Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/isofs/inode.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index b3cc858..26f6364 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -119,8 +119,8 @@ static void destroy_inodecache(void) static int isofs_remount(struct super_block *sb, int *flags, char *data) { - /* we probably want a lot more here */ - *flags |= MS_RDONLY; + if (!(*flags & MS_RDONLY)) + return -EROFS; return 0; } @@ -769,15 +769,6 @@ root_found: */ s->s_maxbytes = 0x80000000000LL; - /* - * The CDROM is read-only, has no nodes (devices) on it, and since - * all of the files appear to be owned by root, we really do not want - * to allow suid. (suid or devices will not show up unless we have - * Rock Ridge extensions) - */ - - s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; - /* Set this for reference. Its not currently used except on write which we don't have .. */ @@ -1528,6 +1519,9 @@ struct inode *isofs_iget(struct super_block *sb, static struct dentry *isofs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { + /* We don't support read-write mounts */ + if (!(flags & MS_RDONLY)) + return ERR_PTR(-EACCES); return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); } -- cgit v1.1 From f35b12e6453f12bb65f7d492b751f05cdeb41518 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Mon, 12 Aug 2013 20:39:30 +0400 Subject: fuse: postpone end_page_writeback() in fuse_writepage_locked() commit 4a4ac4eba1010ef9a804569058ab29e3450c0315 upstream. The patch fixes a race between ftruncate(2), mmap-ed write and write(2): 1) An user makes a page dirty via mmap-ed write. 2) The user performs shrinking truncate(2) intended to purge the page. 3) Before fuse_do_setattr calls truncate_pagecache, the page goes to writeback. fuse_writepage_locked fills FUSE_WRITE request and releases the original page by end_page_writeback. 4) fuse_do_setattr() completes and successfully returns. Since now, i_mutex is free. 5) Ordinary write(2) extends i_size back to cover the page. Note that fuse_send_write_pages do wait for fuse writeback, but for another page->index. 6) fuse_writepage_locked proceeds by queueing FUSE_WRITE request. fuse_send_writepage is supposed to crop inarg->size of the request, but it doesn't because i_size has already been extended back. Moving end_page_writeback to the end of fuse_writepage_locked fixes the race because now the fact that truncate_pagecache is successfully returned infers that fuse_writepage_locked has already called end_page_writeback. And this, in turn, infers that fuse_flush_writepages has already called fuse_send_writepage, and the latter used valid (shrunk) i_size. write(2) could not extend it because of i_mutex held by ftruncate(2). Signed-off-by: Maxim Patlasov Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 79fca8d..2e1c10fe 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1298,7 +1298,6 @@ static int fuse_writepage_locked(struct page *page) inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); - end_page_writeback(page); spin_lock(&fc->lock); list_add(&req->writepages_entry, &fi->writepages); @@ -1306,6 +1305,8 @@ static int fuse_writepage_locked(struct page *page) fuse_flush_writepages(inode); spin_unlock(&fc->lock); + end_page_writeback(page); + return 0; err_free: -- cgit v1.1 From 59e272211a96b1a4aee7ca1c328353f6c9b7e92e Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 20 Aug 2013 02:21:07 -0400 Subject: fuse: invalidate inode attributes on xattr modification commit d331a415aef98717393dda0be69b7947da08eba3 upstream. Calls like setxattr and removexattr result in updation of ctime. Therefore invalidate inode attributes to force a refresh. Signed-off-by: Anand Avati Reviewed-by: Brian Foster Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c04a025..607a973 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1439,6 +1439,8 @@ static int fuse_setxattr(struct dentry *entry, const char *name, fc->no_setxattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } @@ -1568,6 +1570,8 @@ static int fuse_removexattr(struct dentry *entry, const char *name) fc->no_removexattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } -- cgit v1.1 From 07d351b5f618e5be5bd97443d25db41eb1bb8244 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 23 Mar 2012 02:42:23 +0100 Subject: fanotify: dont merge permission events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03a1cec1f17ac1a6041996b3e40f96b5a2f90e1b upstream. Boyd Yang reported a problem for the case that multiple threads of the same thread group are waiting for a reponse for a permission event. In this case it is possible that some of the threads are never woken up, even if the response for the event has been received (see http://marc.info/?l=linux-kernel&m=131822913806350&w=2). The reason is that we are currently merging permission events if they belong to the same thread group. But we are not prepared to wake up more than one waiter for each event. We do wait_event(group->fanotify_data.access_waitq, event->response || atomic_read(&group->fanotify_data.bypass_perm)); and after that event->response = 0; which is the reason that even if we woke up all waiters for the same event some of them may see event->response being already set 0 again, then go back to sleep and block forever. With this patch we avoid that more than one thread is waiting for a response by not merging permission events for the same thread group any more. Reported-by: Boyd Yang Signed-off-by: Lino Sanfilippo Signed-off-by: Eric Paris Cc: Mihai Donțu Signed-off-by: Greg Kroah-Hartman --- fs/notify/fanotify/fanotify.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index a506360..0c2f912 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -18,6 +18,12 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) old->tgid == new->tgid) { switch (old->data_type) { case (FSNOTIFY_EVENT_PATH): +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + /* dont merge two permission events */ + if ((old->mask & FAN_ALL_PERM_EVENTS) && + (new->mask & FAN_ALL_PERM_EVENTS)) + return false; +#endif if ((old->path.mnt == new->path.mnt) && (old->path.dentry == new->path.dentry)) return true; -- cgit v1.1 From 2e4e7cb96933d2c9794125b038bd9ea9cba9bcfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Jun 2012 15:24:40 +0200 Subject: splice: fix racy pipe->buffers uses commit 047fe3605235888f3ebcda0c728cb31937eadfe6 upstream. Dave Jones reported a kernel BUG at mm/slub.c:3474! triggered by splice_shrink_spd() called from vmsplice_to_pipe() commit 35f3d14dbbc5 (pipe: add support for shrinking and growing pipes) added capability to adjust pipe->buffers. Problem is some paths don't hold pipe mutex and assume pipe->buffers doesn't change for their duration. Fix this by adding nr_pages_max field in struct splice_pipe_desc, and use it in place of pipe->buffers where appropriate. splice_shrink_spd() loses its struct pipe_inode_info argument. Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Jens Axboe Cc: Alexander Viro Cc: Tom Herbert Cc: stable # 2.6.35 Tested-by: Dave Jones Signed-off-by: Jens Axboe Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- fs/splice.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index ea92b7c..16d0cb4 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -274,13 +274,16 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) * Check if we need to grow the arrays holding pages and partial page * descriptions. */ -int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) +int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { - if (pipe->buffers <= PIPE_DEF_BUFFERS) + unsigned int buffers = ACCESS_ONCE(pipe->buffers); + + spd->nr_pages_max = buffers; + if (buffers <= PIPE_DEF_BUFFERS) return 0; - spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); - spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); + spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL); + spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL); if (spd->pages && spd->partial) return 0; @@ -290,10 +293,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) return -ENOMEM; } -void splice_shrink_spd(struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) +void splice_shrink_spd(struct splice_pipe_desc *spd) { - if (pipe->buffers <= PIPE_DEF_BUFFERS) + if (spd->nr_pages_max <= PIPE_DEF_BUFFERS) return; kfree(spd->pages); @@ -316,6 +318,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &page_cache_pipe_buf_ops, .spd_release = spd_release_page, @@ -327,7 +330,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, index = *ppos >> PAGE_CACHE_SHIFT; loff = *ppos & ~PAGE_CACHE_MASK; req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - nr_pages = min(req_pages, pipe->buffers); + nr_pages = min(req_pages, spd.nr_pages_max); /* * Lookup the (hopefully) full range of pages we need. @@ -498,7 +501,7 @@ fill_it: if (spd.nr_pages) error = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return error; } @@ -599,6 +602,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &default_pipe_buf_ops, .spd_release = spd_release_page, @@ -609,8 +613,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, res = -ENOMEM; vec = __vec; - if (pipe->buffers > PIPE_DEF_BUFFERS) { - vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); + if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { + vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL); if (!vec) goto shrink_ret; } @@ -618,7 +622,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, offset = *ppos & ~PAGE_CACHE_MASK; nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { + for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) { struct page *page; page = alloc_page(GFP_USER); @@ -666,7 +670,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, shrink_ret: if (vec != __vec) kfree(vec); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return res; err: @@ -1618,6 +1622,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &user_page_pipe_buf_ops, .spd_release = spd_release_page, @@ -1633,13 +1638,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, spd.partial, flags & SPLICE_F_GIFT, - pipe->buffers); + spd.nr_pages_max); if (spd.nr_pages <= 0) ret = spd.nr_pages; else ret = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return ret; } -- cgit v1.1 From 3609e1162ce723d3269924e95f2663d8bd72a537 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 30 Jul 2013 16:30:30 -0400 Subject: Btrfs: change how we queue blocks for backref checking commit b6c60c8018c4e9beb2f83fc82c09f9d033766571 upstream. Previously we only added blocks to the list to have their backrefs checked if the level of the block is right above the one we are searching for. This is because we want to make sure we don't add the entire path up to the root to the lists to make sure we process things one at a time. This assumes that if any blocks in the path to the root are going to be not checked (shared in other words) then they will be in the level right above the current block on up. This isn't quite right though since we can have blocks higher up the list that are shared because they are attached to a reloc root. But we won't add this block to be checked and then later on we will BUG_ON(!upper->checked). So instead keep track of wether or not we've queued a block to be checked in this current search, and if we haven't go ahead and queue it to be checked. This patch fixed the panic I was seeing where we BUG_ON(!upper->checked). Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/relocation.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e0a3dc..2ab5837 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -670,6 +670,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -892,6 +893,7 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -935,14 +937,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else -- cgit v1.1 From 6ac3a550f14a5b4a24417097cd6abcb8c79a0d5a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 27 Dec 2012 01:42:50 -0500 Subject: ext4: avoid hang when mounting non-journal filesystems with orphan list commit 0e9a9a1ad619e7e987815d20262d36a2f95717ca upstream. When trying to mount a file system which does not contain a journal, but which does have a orphan list containing an inode which needs to be truncated, the mount call with hang forever in ext4_orphan_cleanup() because ext4_orphan_del() will return immediately without removing the inode from the orphan list, leading to an uninterruptible loop in kernel code which will busy out one of the CPU's on the system. This can be trivially reproduced by trying to mount the file system found in tests/f_orphan_extents_inode/image.gz from the e2fsprogs source tree. If a malicious user were to put this on a USB stick, and mount it on a Linux desktop which has automatic mounts enabled, this could be considered a potential denial of service attack. (Not a big deal in practice, but professional paranoids worry about such things, and have even been known to allocate CVE numbers for such problems.) -js: This is a fix for CVE-2013-2015. Signed-off-by: "Theodore Ts'o" Reviewed-by: Zheng Liu Acked-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 8d3716f..595d087 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2059,7 +2059,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) int err = 0; /* ext4_handle_valid() assumes a valid handle_t pointer */ - if (handle && !ext4_handle_valid(handle)) + if (handle && !ext4_handle_valid(handle) && + !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) return 0; mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); -- cgit v1.1 From 0ffc34c0d09a7d36ff84d1ac506a352a06ef5e4e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 30 Sep 2013 08:35:10 -0700 Subject: vfs: allow O_PATH file descriptors for fstatfs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9d05746e7b16d8565dddbe3200faa1e669d23bbf upstream. Olga reported that file descriptors opened with O_PATH do not work with fstatfs(), found during further development of ksh93's thread support. There is no reason to not allow O_PATH file descriptors here (fstatfs is very much a path operation), so use "fdget_raw()". See commit 55815f70147d ("vfs: make O_PATH file descriptors usable for 'fstat()'") for a very similar issue reported for fstat() by the same team. Reported-and-tested-by: ольга крыжановская Acked-by: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/statfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/statfs.c b/fs/statfs.c index 9cf04a1..a133c3e 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -86,7 +86,7 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) int fd_statfs(int fd, struct kstatfs *st) { - struct file *file = fget(fd); + struct file *file = fget_raw(fd); int error = -EBADF; if (file) { error = vfs_statfs(&file->f_path, st); -- cgit v1.1 From 546a50594e6241e2002cddbef7c350e177e08e52 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 10 Oct 2013 20:05:35 -0400 Subject: ext4: fix memory leak in xattr commit 6e4ea8e33b2057b85d75175dd89b93f5e26de3bc upstream. If we take the 2nd retry path in ext4_expand_extra_isize_ea, we potentionally return from the function without having freed these allocations. If we don't do the return, we over-write the previous allocation pointers, so we leak either way. Spotted with Coverity. [ Fixed by tytso to set is and bs to NULL after freeing these pointers, in case in the retry loop we later end up triggering an error causing a jump to cleanup, at which point we could have a double free bug. -- Ted ] Signed-off-by: Dave Jones Signed-off-by: "Theodore Ts'o" Reviewed-by: Eric Sandeen Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c2865cc..8f797ae 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1271,6 +1271,8 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; + kfree(is); is = NULL; + kfree(bs); bs = NULL; goto retry; } error = -1; -- cgit v1.1