diff options
author | David Woodhouse <dwmw2@infradead.org> | 2006-12-01 09:56:43 +0000 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2006-12-01 09:56:43 +0000 |
commit | bd3c97a7c718bfb9f1e4f31c16c383a5c6f815eb (patch) | |
tree | 3f56594e813c6f35cbacbdb3e137ba5bfd0b3069 /fs | |
parent | 6c33cafc794d07c9254c160789120a0e98c088c9 (diff) | |
parent | 0215ffb08ce99e2bb59eca114a99499a4d06e704 (diff) | |
download | kernel_samsung_tuna-bd3c97a7c718bfb9f1e4f31c16c383a5c6f815eb.zip kernel_samsung_tuna-bd3c97a7c718bfb9f1e4f31c16c383a5c6f815eb.tar.gz kernel_samsung_tuna-bd3c97a7c718bfb9f1e4f31c16c383a5c6f815eb.tar.bz2 |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
70 files changed, 1098 insertions, 631 deletions
@@ -1777,6 +1777,7 @@ config RPCSEC_GSS_KRB5 select CRYPTO select CRYPTO_MD5 select CRYPTO_DES + select CRYPTO_CBC help Provides for secure RPC calls by means of a gss-api mechanism based on Kerberos V5. This is required for @@ -1795,6 +1796,7 @@ config RPCSEC_GSS_SPKM3 select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CAST5 + select CRYPTO_CBC help Provides for secure RPC calls by means of a gss-api mechanism based on the SPKM3 public-key mechanism. @@ -2058,8 +2060,7 @@ config CODA_FS_OLD_API For most cases you probably want to say N. config AFS_FS -# for fs/nls/Config.in - tristate "Andrew File System support (AFS) (Experimental)" + tristate "Andrew File System support (AFS) (EXPERIMENTAL)" depends on INET && EXPERIMENTAL select RXRPC help diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 54c518c..38ede5c 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -25,6 +25,14 @@ void autofs_kill_sb(struct super_block *sb) struct autofs_sb_info *sbi = autofs_sbi(sb); unsigned int n; + /* + * In the event of a failure in get_sb_nodev the superblock + * info is not present so nothing else has been setup, so + * just exit when we are called from deactivate_super. + */ + if (!sbi) + return; + if ( !sbi->catatonic ) autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */ @@ -136,7 +144,8 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) s->s_fs_info = sbi; sbi->magic = AUTOFS_SBI_MAGIC; - sbi->catatonic = 0; + sbi->pipe = NULL; + sbi->catatonic = 1; sbi->exp_timeout = 0; sbi->oz_pgrp = process_group(current); autofs_initialize_hash(&sbi->dirhash); @@ -180,6 +189,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) if ( !pipe->f_op || !pipe->f_op->write ) goto fail_fput; sbi->pipe = pipe; + sbi->catatonic = 0; /* * Success! Install the root dentry now to indicate completion. @@ -198,6 +208,8 @@ fail_iput: iput(root_inode); fail_free: kfree(sbi); + s->s_fs_info = NULL; + kill_anon_super(s); fail_unlock: return -EINVAL; } diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index 633f628..19a9caf 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -41,6 +41,7 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi) wq = nwq; } fput(sbi->pipe); /* Close the pipe */ + sbi->pipe = NULL; autofs_hash_dputall(&sbi->dirhash); /* Remove all dentry pointers */ } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 51fd859..ce7c0f1 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -99,6 +99,9 @@ static void autofs4_force_release(struct autofs_sb_info *sbi) struct dentry *this_parent = sbi->sb->s_root; struct list_head *next; + if (!sbi->sb->s_root) + return; + spin_lock(&dcache_lock); repeat: next = this_parent->d_subdirs.next; @@ -146,6 +149,14 @@ void autofs4_kill_sb(struct super_block *sb) { struct autofs_sb_info *sbi = autofs4_sbi(sb); + /* + * In the event of a failure in get_sb_nodev the superblock + * info is not present so nothing else has been setup, so + * just exit when we are called from deactivate_super. + */ + if (!sbi) + return; + sb->s_fs_info = NULL; if ( !sbi->catatonic ) @@ -310,7 +321,8 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) s->s_fs_info = sbi; sbi->magic = AUTOFS_SBI_MAGIC; sbi->pipefd = -1; - sbi->catatonic = 0; + sbi->pipe = NULL; + sbi->catatonic = 1; sbi->exp_timeout = 0; sbi->oz_pgrp = process_group(current); sbi->sb = s; @@ -388,6 +400,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) goto fail_fput; sbi->pipe = pipe; sbi->pipefd = pipefd; + sbi->catatonic = 0; /* * Success! Install the root dentry now to indicate completion. @@ -412,6 +425,8 @@ fail_ino: kfree(ino); fail_free: kfree(sbi); + s->s_fs_info = NULL; + kill_anon_super(s); fail_unlock: return -EINVAL; } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index c0a6c8d..1e4a539 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -41,10 +41,8 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi) wake_up_interruptible(&wq->queue); wq = nwq; } - if (sbi->pipe) { - fput(sbi->pipe); /* Close the pipe */ - sbi->pipe = NULL; - } + fput(sbi->pipe); /* Close the pipe */ + sbi->pipe = NULL; } static int autofs4_write(struct file *file, const void *addr, int bytes) diff --git a/fs/block_dev.c b/fs/block_dev.c index bc8f27c..36c0e7a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -642,34 +642,47 @@ static void free_bd_holder(struct bd_holder *bo) } /** + * find_bd_holder - find matching struct bd_holder from the block device + * + * @bdev: struct block device to be searched + * @bo: target struct bd_holder + * + * Returns matching entry with @bo in @bdev->bd_holder_list. + * If found, increment the reference count and return the pointer. + * If not found, returns NULL. + */ +static struct bd_holder *find_bd_holder(struct block_device *bdev, + struct bd_holder *bo) +{ + struct bd_holder *tmp; + + list_for_each_entry(tmp, &bdev->bd_holder_list, list) + if (tmp->sdir == bo->sdir) { + tmp->count++; + return tmp; + } + + return NULL; +} + +/** * add_bd_holder - create sysfs symlinks for bd_claim() relationship * * @bdev: block device to be bd_claimed * @bo: preallocated and initialized by alloc_bd_holder() * - * If there is no matching entry with @bo in @bdev->bd_holder_list, - * add @bo to the list, create symlinks. + * Add @bo to @bdev->bd_holder_list, create symlinks. * - * Returns 0 if symlinks are created or already there. - * Returns -ve if something fails and @bo can be freed. + * Returns 0 if symlinks are created. + * Returns -ve if something fails. */ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) { - struct bd_holder *tmp; int ret; if (!bo) return -EINVAL; - list_for_each_entry(tmp, &bdev->bd_holder_list, list) { - if (tmp->sdir == bo->sdir) { - tmp->count++; - /* We've already done what we need to do here. */ - free_bd_holder(bo); - return 0; - } - } - if (!bd_holder_grab_dirs(bdev, bo)) return -EBUSY; @@ -740,7 +753,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, struct kobject *kobj) { int res; - struct bd_holder *bo; + struct bd_holder *bo, *found; if (!kobj) return -EINVAL; @@ -751,9 +764,16 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); res = bd_claim(bdev, holder); - if (res == 0) - res = add_bd_holder(bdev, bo); - if (res) + if (res == 0) { + found = find_bd_holder(bdev, bo); + if (found == NULL) { + res = add_bd_holder(bdev, bo); + if (res) + bd_release(bdev); + } + } + + if (res || found) free_bd_holder(bo); mutex_unlock(&bdev->bd_mutex); @@ -1131,6 +1151,8 @@ static int blkdev_open(struct inode * inode, struct file * filp) filp->f_flags |= O_LARGEFILE; bdev = bd_acquire(inode); + if (bdev == NULL) + return -ENOMEM; res = do_open(bdev, filp, BD_MUTEX_NORMAL); if (res) diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 1eb9a2e..0b3c37e 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,6 +1,11 @@ Version 1.46 ------------ Support deep tree mounts. Better support OS/2, Win9x (DOS) time stamps. +Allow null user to be specified on mount ("username="). Do not return +EINVAL on readdir when filldir fails due to overwritten blocksize +(fixes FC problem). Return error in rename 2nd attempt retry (ie report +if rename by handle also fails, after rename by path fails, we were +not reporting whether the retry worked or not). Version 1.45 ------------ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4093d53..71f7791 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -822,10 +822,13 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) } else if (strnicmp(data, "nouser_xattr",12) == 0) { vol->no_xattr = 1; } else if (strnicmp(data, "user", 4) == 0) { - if (!value || !*value) { + if (!value) { printk(KERN_WARNING "CIFS: invalid or missing username\n"); return 1; /* needs_arg; */ + } else if(!*value) { + /* null user, ie anonymous, authentication */ + vol->nullauth = 1; } if (strnlen(value, 200) < 200) { vol->username = value; @@ -1642,6 +1645,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* BB fixme parse for domain name here */ cFYI(1, ("Username: %s ", volume_info.username)); + } else if (volume_info.nullauth) { + cFYI(1,("null user")); } else { cifserror("No username specified"); /* In userspace mount helper we can get user name from alternate diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 976a691..2436ed8 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -492,10 +492,14 @@ int cifs_close(struct inode *inode, struct file *file) the struct would be in each open file, but this should give enough time to clear the socket */ - cERROR(1,("close with pending writes")); +#ifdef CONFIG_CIFS_DEBUG2 + cFYI(1,("close delay, write pending")); +#endif /* DEBUG2 */ msleep(timeout); timeout *= 4; - } + } + if(atomic_read(&pSMBFile->wrtPending)) + cERROR(1,("close with pending writes")); rc = CIFSSMBClose(xid, pTcon, pSMBFile->netfid); } @@ -1806,13 +1810,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, } if ((rc < 0) || (smb_read_data == NULL)) { cFYI(1, ("Read error in readpages: %d", rc)); - /* clean up remaing pages off list */ - while (!list_empty(page_list) && (i < num_pages)) { - page = list_entry(page_list->prev, struct page, - lru); - list_del(&page->lru); - page_cache_release(page); - } break; } else if (bytes_read > 0) { pSMBr = (struct smb_com_read_rsp *)smb_read_data; @@ -1831,13 +1828,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, this case is ok - if we are at server EOF we will hit it on next read */ - /* while (!list_empty(page_list) && (i < num_pages)) { - page = list_entry(page_list->prev, - struct page, list); - list_del(&page->list); - page_cache_release(page); - } - break; */ + /* break; */ } } else { cFYI(1, ("No bytes read (%d) at offset %lld . " @@ -1845,14 +1836,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, bytes_read, offset)); /* BB turn off caching and do new lookup on file size at server? */ - while (!list_empty(page_list) && (i < num_pages)) { - page = list_entry(page_list->prev, struct page, - lru); - list_del(&page->lru); - - /* BB removeme - replace with zero of page? */ - page_cache_release(page); - } break; } if (smb_read_data) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 35d54bb..1ad8c9f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -885,10 +885,14 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); if (info_buf_source != NULL) { info_buf_target = info_buf_source + 1; - rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, - info_buf_source, cifs_sb_source->local_nls, - cifs_sb_source->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->capabilities & CAP_UNIX) + rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, + info_buf_source, + cifs_sb_source->local_nls, + cifs_sb_source->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + /* else rc is still EEXIST so will fall through to + unlink the target and retry rename */ if (rc == 0) { rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName, info_buf_target, @@ -937,7 +941,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, cifs_sb_source->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc==0) { - CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName, + rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName, cifs_sb_source->local_nls, cifs_sb_source->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -1085,8 +1089,10 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { int err = cifs_revalidate(dentry); - if (!err) + if (!err) { generic_fillattr(dentry->d_inode, stat); + stat->blksize = CIFS_MAX_MSGSIZE; + } return err; } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b5b0a2a..ed18c39 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -896,6 +896,10 @@ static int cifs_filldir(char *pfindEntry, struct file *file, tmp_inode->i_ino,obj_type); if(rc) { cFYI(1,("filldir rc = %d",rc)); + /* we can not return filldir errors to the caller + since they are "normal" when the stat blocksize + is too small - we return remapped error instead */ + rc = -EOVERFLOW; } dput(tmp_dentry); @@ -1074,6 +1078,11 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) we want to check for that here? */ rc = cifs_filldir(current_entry, file, filldir, direntry, tmp_buf, max_len); + if(rc == -EOVERFLOW) { + rc = 0; + break; + } + file->f_pos++; if(file->f_pos == cifsFile->srch_inf.index_of_last_entry) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index a8a0835..bbdda99 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -90,7 +90,9 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, } */ /* copy user */ if(ses->userName == NULL) { - /* BB what about null user mounts - check that we do this BB */ + /* null user mount */ + *bcc_ptr = 0; + *(bcc_ptr+1) = 0; } else { /* 300 should be long enough for any conceivable user name */ bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 300, nls_cp); @@ -98,10 +100,13 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null termination */ /* copy domain */ - if(ses->domainName == NULL) - bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, - "CIFS_LINUX_DOM", 32, nls_cp); - else + if(ses->domainName == NULL) { + /* Sending null domain better than using a bogus domain name (as + we did briefly in 2.6.18) since server will use its default */ + *bcc_ptr = 0; + *(bcc_ptr+1) = 0; + bytes_ret = 0; + } else bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, 256, nls_cp); bcc_ptr += 2 * bytes_ret; @@ -144,13 +149,11 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses, /* copy domain */ - if(ses->domainName == NULL) { - strcpy(bcc_ptr, "CIFS_LINUX_DOM"); - bcc_ptr += 14; /* strlen(CIFS_LINUX_DOM) */ - } else { + if(ses->domainName != NULL) { strncpy(bcc_ptr, ses->domainName, 256); bcc_ptr += strnlen(ses->domainName, 256); - } + } /* else we will send a null domain name + so the server will default to its own domain */ *bcc_ptr = 0; bcc_ptr++; diff --git a/fs/compat.c b/fs/compat.c index 50624d4..8d0a001 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1835,9 +1835,12 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); - if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) { + if (tsp) { struct compat_timespec rts; + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + rts.tv_sec = timeout / HZ; rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); if (rts.tv_nsec >= NSEC_PER_SEC) { @@ -1846,8 +1849,19 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, } if (compat_timespec_compare(&rts, &ts) >= 0) rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) - ret = -EFAULT; + if (copy_to_user(tsp, &rts, sizeof(rts))) { +sticky: + /* + * If an application puts its timeval in read-only + * memory, we don't want the Linux-specific update to + * the timeval to cause a fault after the select has + * completed successfully. However, because we're not + * updating the timeval, we can't restart the system + * call. + */ + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + } } if (ret == -ERESTARTNOHAND) { diff --git a/fs/dcache.c b/fs/dcache.c index 2bac4ba..fd4a428 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -478,11 +478,12 @@ static void prune_dcache(int count, struct super_block *sb) up_read(s_umount); } spin_unlock(&dentry->d_lock); - /* Cannot remove the first dentry, and it isn't appropriate - * to move it to the head of the list, so give up, and try - * later + /* + * Insert dentry at the head of the list as inserting at the + * tail leads to a cycle. */ - break; + list_add(&dentry->d_lru, &dentry_unused); + dentry_stat.nr_unused++; } spin_unlock(&dcache_lock); } @@ -556,6 +557,7 @@ repeat: static void shrink_dcache_for_umount_subtree(struct dentry *dentry) { struct dentry *parent; + unsigned detached = 0; BUG_ON(!IS_ROOT(dentry)); @@ -620,7 +622,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) atomic_dec(&parent->d_count); list_del(&dentry->d_u.d_child); - dentry_stat.nr_dentry--; /* For d_free, below */ + detached++; inode = dentry->d_inode; if (inode) { @@ -638,7 +640,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) * otherwise we ascend to the parent and move to the * next sibling if there is one */ if (!parent) - return; + goto out; dentry = parent; @@ -647,6 +649,11 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) dentry = list_entry(dentry->d_subdirs.next, struct dentry, d_u.d_child); } +out: + /* several dentries were freed, need to correct nr_dentry */ + spin_lock(&dcache_lock); + dentry_stat.nr_dentry -= detached; + spin_unlock(&dcache_lock); } /* @@ -1469,23 +1476,21 @@ static void switch_names(struct dentry *dentry, struct dentry *target) * deleted it. */ -/** - * d_move - move a dentry +/* + * d_move_locked - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. */ - -void d_move(struct dentry * dentry, struct dentry * target) +static void d_move_locked(struct dentry * dentry, struct dentry * target) { struct hlist_head *list; if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); /* * XXXX: do we really need to take target->d_lock? @@ -1536,7 +1541,81 @@ already_unhashed: fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); +} + +/** + * d_move - move a dentry + * @dentry: entry to move + * @target: new dentry + * + * Update the dcache to reflect the move of a file name. Negative + * dcache entries should not be moved in this way. + */ + +void d_move(struct dentry * dentry, struct dentry * target) +{ + spin_lock(&dcache_lock); + d_move_locked(dentry, target); + spin_unlock(&dcache_lock); +} + +/* + * Helper that returns 1 if p1 is a parent of p2, else 0 + */ +static int d_isparent(struct dentry *p1, struct dentry *p2) +{ + struct dentry *p; + + for (p = p2; p->d_parent != p; p = p->d_parent) { + if (p->d_parent == p1) + return 1; + } + return 0; +} + +/* + * This helper attempts to cope with remotely renamed directories + * + * It assumes that the caller is already holding + * dentry->d_parent->d_inode->i_mutex and the dcache_lock + * + * Note: If ever the locking in lock_rename() changes, then please + * remember to update this too... + * + * On return, dcache_lock will have been unlocked. + */ +static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) +{ + struct mutex *m1 = NULL, *m2 = NULL; + struct dentry *ret; + + /* If alias and dentry share a parent, then no extra locks required */ + if (alias->d_parent == dentry->d_parent) + goto out_unalias; + + /* Check for loops */ + ret = ERR_PTR(-ELOOP); + if (d_isparent(alias, dentry)) + goto out_err; + + /* See lock_rename() */ + ret = ERR_PTR(-EBUSY); + if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) + goto out_err; + m1 = &dentry->d_sb->s_vfs_rename_mutex; + if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) + goto out_err; + m2 = &alias->d_parent->d_inode->i_mutex; +out_unalias: + d_move_locked(alias, dentry); + ret = alias; +out_err: spin_unlock(&dcache_lock); + if (m2) + mutex_unlock(m2); + if (m1) + mutex_unlock(m1); + return ret; } /* @@ -1581,7 +1660,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) */ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) { - struct dentry *alias, *actual; + struct dentry *actual; BUG_ON(!d_unhashed(dentry)); @@ -1593,26 +1672,27 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) goto found_lock; } - /* See if a disconnected directory already exists as an anonymous root - * that we should splice into the tree instead */ - if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) { - spin_lock(&alias->d_lock); - - /* Is this a mountpoint that we could splice into our tree? */ - if (IS_ROOT(alias)) - goto connect_mountpoint; - - if (alias->d_name.len == dentry->d_name.len && - alias->d_parent == dentry->d_parent && - memcmp(alias->d_name.name, - dentry->d_name.name, - dentry->d_name.len) == 0) - goto replace_with_alias; - - spin_unlock(&alias->d_lock); - - /* Doh! Seem to be aliasing directories for some reason... */ - dput(alias); + if (S_ISDIR(inode->i_mode)) { + struct dentry *alias; + + /* Does an aliased dentry already exist? */ + alias = __d_find_alias(inode, 0); + if (alias) { + actual = alias; + /* Is this an anonymous mountpoint that we could splice + * into our tree? */ + if (IS_ROOT(alias)) { + spin_lock(&alias->d_lock); + __d_materialise_dentry(dentry, alias); + __d_drop(alias); + goto found; + } + /* Nope, but we must(!) avoid directory aliasing */ + actual = __d_unalias(dentry, alias); + if (IS_ERR(actual)) + dput(alias); + goto out_nolock; + } } /* Add a unique reference */ @@ -1628,7 +1708,7 @@ found: _d_rehash(actual); spin_unlock(&actual->d_lock); spin_unlock(&dcache_lock); - +out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); return NULL; @@ -1637,16 +1717,6 @@ found: iput(inode); return actual; - /* Convert the anonymous/root alias into an ordinary dentry */ -connect_mountpoint: - __d_materialise_dentry(dentry, alias); - - /* Replace the candidate dentry with the alias in the tree */ -replace_with_alias: - __d_drop(alias); - actual = alias; - goto found; - shouldnt_be_hashed: spin_unlock(&dcache_lock); BUG(); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e77676d..137d76c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -21,6 +21,7 @@ #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/init.h> +#include <linux/kobject.h> #include <linux/namei.h> #include <linux/debugfs.h> @@ -147,13 +148,13 @@ static int debugfs_create_by_name(const char *name, mode_t mode, *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry)) { + if (!IS_ERR(*dentry)) { if ((mode & S_IFMT) == S_IFDIR) error = debugfs_mkdir(parent->d_inode, *dentry, mode); else error = debugfs_create(parent->d_inode, *dentry, mode); } else - error = PTR_ERR(dentry); + error = PTR_ERR(*dentry); mutex_unlock(&parent->d_inode->i_mutex); return error; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 109333c..f8842ca 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -43,6 +43,10 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len) ssize_t ret = len; int n = simple_strtol(buf, NULL, 0); + ls = dlm_find_lockspace_local(ls->ls_local_handle); + if (!ls) + return -EINVAL; + switch (n) { case 0: dlm_ls_stop(ls); @@ -53,6 +57,7 @@ static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len) default: ret = -EINVAL; } + dlm_put_lockspace(ls); return ret; } @@ -143,6 +148,12 @@ static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr, return a->store ? a->store(ls, buf, len) : len; } +static void lockspace_kobj_release(struct kobject *k) +{ + struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj); + kfree(ls); +} + static struct sysfs_ops dlm_attr_ops = { .show = dlm_attr_show, .store = dlm_attr_store, @@ -151,6 +162,7 @@ static struct sysfs_ops dlm_attr_ops = { static struct kobj_type dlm_ktype = { .default_attrs = dlm_attrs, .sysfs_ops = &dlm_attr_ops, + .release = lockspace_kobj_release, }; static struct kset dlm_kset = { @@ -678,7 +690,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_clear_members_gone(ls); kfree(ls->ls_node_array); kobject_unregister(&ls->ls_kobj); - kfree(ls); + /* The ls structure will be freed when the kobject is done with */ mutex_lock(&ls_lock); ls_count--; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index ed35a9712..f63a775 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -94,25 +94,53 @@ static int ecryptfs_calculate_md5(char *dst, struct ecryptfs_crypt_stat *crypt_stat, char *src, int len) { - int rc = 0; struct scatterlist sg; + struct hash_desc desc = { + .tfm = crypt_stat->hash_tfm, + .flags = CRYPTO_TFM_REQ_MAY_SLEEP + }; + int rc = 0; - mutex_lock(&crypt_stat->cs_md5_tfm_mutex); + mutex_lock(&crypt_stat->cs_hash_tfm_mutex); sg_init_one(&sg, (u8 *)src, len); - if (!crypt_stat->md5_tfm) { - crypt_stat->md5_tfm = - crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP); - if (!crypt_stat->md5_tfm) { - rc = -ENOMEM; + if (!desc.tfm) { + desc.tfm = crypto_alloc_hash(ECRYPTFS_DEFAULT_HASH, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(desc.tfm)) { + rc = PTR_ERR(desc.tfm); ecryptfs_printk(KERN_ERR, "Error attempting to " - "allocate crypto context\n"); + "allocate crypto context; rc = [%d]\n", + rc); goto out; } + crypt_stat->hash_tfm = desc.tfm; } - crypto_digest_init(crypt_stat->md5_tfm); - crypto_digest_update(crypt_stat->md5_tfm, &sg, 1); - crypto_digest_final(crypt_stat->md5_tfm, dst); - mutex_unlock(&crypt_stat->cs_md5_tfm_mutex); + crypto_hash_init(&desc); + crypto_hash_update(&desc, &sg, len); + crypto_hash_final(&desc, dst); + mutex_unlock(&crypt_stat->cs_hash_tfm_mutex); +out: + return rc; +} + +int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, + char *cipher_name, + char *chaining_modifier) +{ + int cipher_name_len = strlen(cipher_name); + int chaining_modifier_len = strlen(chaining_modifier); + int algified_name_len; + int rc; + + algified_name_len = (chaining_modifier_len + cipher_name_len + 3); + (*algified_name) = kmalloc(algified_name_len, GFP_KERNEL); + if (!(*algified_name)) { + rc = -ENOMEM; + goto out; + } + snprintf((*algified_name), algified_name_len, "%s(%s)", + chaining_modifier, cipher_name); + rc = 0; out: return rc; } @@ -178,7 +206,7 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); mutex_init(&crypt_stat->cs_mutex); mutex_init(&crypt_stat->cs_tfm_mutex); - mutex_init(&crypt_stat->cs_md5_tfm_mutex); + mutex_init(&crypt_stat->cs_hash_tfm_mutex); ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED); } @@ -191,9 +219,9 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat) { if (crypt_stat->tfm) - crypto_free_tfm(crypt_stat->tfm); - if (crypt_stat->md5_tfm) - crypto_free_tfm(crypt_stat->md5_tfm); + crypto_free_blkcipher(crypt_stat->tfm); + if (crypt_stat->hash_tfm) + crypto_free_hash(crypt_stat->hash_tfm); memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat)); } @@ -203,7 +231,7 @@ void ecryptfs_destruct_mount_crypt_stat( if (mount_crypt_stat->global_auth_tok_key) key_put(mount_crypt_stat->global_auth_tok_key); if (mount_crypt_stat->global_key_tfm) - crypto_free_tfm(mount_crypt_stat->global_key_tfm); + crypto_free_blkcipher(mount_crypt_stat->global_key_tfm); memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat)); } @@ -269,6 +297,11 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct scatterlist *src_sg, int size, unsigned char *iv) { + struct blkcipher_desc desc = { + .tfm = crypt_stat->tfm, + .info = iv, + .flags = CRYPTO_TFM_REQ_MAY_SLEEP + }; int rc = 0; BUG_ON(!crypt_stat || !crypt_stat->tfm @@ -282,8 +315,8 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, } /* Consider doing this once, when the file is opened */ mutex_lock(&crypt_stat->cs_tfm_mutex); - rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); + rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); if (rc) { ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", rc); @@ -292,7 +325,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, goto out; } ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); - crypto_cipher_encrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, iv); + crypto_blkcipher_encrypt_iv(&desc, dest_sg, src_sg, size); mutex_unlock(&crypt_stat->cs_tfm_mutex); out: return rc; @@ -675,12 +708,17 @@ static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct scatterlist *src_sg, int size, unsigned char *iv) { + struct blkcipher_desc desc = { + .tfm = crypt_stat->tfm, + .info = iv, + .flags = CRYPTO_TFM_REQ_MAY_SLEEP + }; int rc = 0; /* Consider doing this once, when the file is opened */ mutex_lock(&crypt_stat->cs_tfm_mutex); - rc = crypto_cipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); + rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); if (rc) { ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", rc); @@ -689,8 +727,7 @@ static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, goto out; } ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); - rc = crypto_cipher_decrypt_iv(crypt_stat->tfm, dest_sg, src_sg, size, - iv); + rc = crypto_blkcipher_decrypt_iv(&desc, dest_sg, src_sg, size); mutex_unlock(&crypt_stat->cs_tfm_mutex); if (rc) { ecryptfs_printk(KERN_ERR, "Error decrypting; rc = [%d]\n", @@ -759,6 +796,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, */ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) { + char *full_alg_name; int rc = -EINVAL; if (!crypt_stat->cipher) { @@ -775,16 +813,25 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) goto out; } mutex_lock(&crypt_stat->cs_tfm_mutex); - crypt_stat->tfm = crypto_alloc_tfm(crypt_stat->cipher, - ECRYPTFS_DEFAULT_CHAINING_MODE - | CRYPTO_TFM_REQ_WEAK_KEY); - mutex_unlock(&crypt_stat->cs_tfm_mutex); - if (!crypt_stat->tfm) { + rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, + crypt_stat->cipher, "cbc"); + if (rc) + goto out; + crypt_stat->tfm = crypto_alloc_blkcipher(full_alg_name, 0, + CRYPTO_ALG_ASYNC); + kfree(full_alg_name); + if (IS_ERR(crypt_stat->tfm)) { + rc = PTR_ERR(crypt_stat->tfm); ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): " "Error initializing cipher [%s]\n", crypt_stat->cipher); + mutex_unlock(&crypt_stat->cs_tfm_mutex); goto out; } + crypto_blkcipher_set_flags(crypt_stat->tfm, + (ECRYPTFS_DEFAULT_CHAINING_MODE + | CRYPTO_TFM_REQ_WEAK_KEY)); + mutex_unlock(&crypt_stat->cs_tfm_mutex); rc = 0; out: return rc; @@ -1145,28 +1192,28 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code) int ecryptfs_read_header_region(char *data, struct dentry *dentry, struct vfsmount *mnt) { - struct file *file; + struct file *lower_file; mm_segment_t oldfs; int rc; - mnt = mntget(mnt); - file = dentry_open(dentry, mnt, O_RDONLY); - if (IS_ERR(file)) { - ecryptfs_printk(KERN_DEBUG, "Error opening file to " - "read header region\n"); - mntput(mnt); - rc = PTR_ERR(file); + if ((rc = ecryptfs_open_lower_file(&lower_file, dentry, mnt, + O_RDONLY))) { + printk(KERN_ERR + "Error opening lower_file to read header region\n"); goto out; } - file->f_pos = 0; + lower_file->f_pos = 0; oldfs = get_fs(); set_fs(get_ds()); /* For releases 0.1 and 0.2, all of the header information * fits in the first data extent-sized region. */ - rc = file->f_op->read(file, (char __user *)data, - ECRYPTFS_DEFAULT_EXTENT_SIZE, &file->f_pos); + rc = lower_file->f_op->read(lower_file, (char __user *)data, + ECRYPTFS_DEFAULT_EXTENT_SIZE, &lower_file->f_pos); set_fs(oldfs); - fput(file); + if ((rc = ecryptfs_close_lower_file(lower_file))) { + printk(KERN_ERR "Error closing lower_file\n"); + goto out; + } rc = 0; out: return rc; @@ -1573,84 +1620,52 @@ out: /** * ecryptfs_process_cipher - Perform cipher initialization. - * @tfm: Crypto context set by this function * @key_tfm: Crypto context for key material, set by this function - * @cipher_name: Name of the cipher. - * @key_size: Size of the key in bytes. + * @cipher_name: Name of the cipher + * @key_size: Size of the key in bytes * * Returns zero on success. Any crypto_tfm structs allocated here * should be released by other functions, such as on a superblock put * event, regardless of whether this function succeeds for fails. */ int -ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, - char *cipher_name, size_t key_size) +ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name, + size_t *key_size) { char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; + char *full_alg_name; int rc; - *tfm = *key_tfm = NULL; - if (key_size > ECRYPTFS_MAX_KEY_BYTES) { + *key_tfm = NULL; + if (*key_size > ECRYPTFS_MAX_KEY_BYTES) { rc = -EINVAL; printk(KERN_ERR "Requested key size is [%Zd] bytes; maximum " - "allowable is [%d]\n", key_size, ECRYPTFS_MAX_KEY_BYTES); + "allowable is [%d]\n", *key_size, ECRYPTFS_MAX_KEY_BYTES); goto out; } - *tfm = crypto_alloc_tfm(cipher_name, (ECRYPTFS_DEFAULT_CHAINING_MODE - | CRYPTO_TFM_REQ_WEAK_KEY)); - if (!(*tfm)) { - rc = -EINVAL; - printk(KERN_ERR "Unable to allocate crypto cipher with name " - "[%s]\n", cipher_name); + rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, cipher_name, + "ecb"); + if (rc) goto out; - } - *key_tfm = crypto_alloc_tfm(cipher_name, CRYPTO_TFM_REQ_WEAK_KEY); - if (!(*key_tfm)) { - rc = -EINVAL; + *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC); + kfree(full_alg_name); + if (IS_ERR(*key_tfm)) { + rc = PTR_ERR(*key_tfm); printk(KERN_ERR "Unable to allocate crypto cipher with name " - "[%s]\n", cipher_name); - goto out; - } - if (key_size < crypto_tfm_alg_min_keysize(*tfm)) { - rc = -EINVAL; - printk(KERN_ERR "Request key size is [%Zd]; minimum key size " - "supported by cipher [%s] is [%d]\n", key_size, - cipher_name, crypto_tfm_alg_min_keysize(*tfm)); - goto out; - } - if (key_size < crypto_tfm_alg_min_keysize(*key_tfm)) { - rc = -EINVAL; - printk(KERN_ERR "Request key size is [%Zd]; minimum key size " - "supported by cipher [%s] is [%d]\n", key_size, - cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); + "[%s]; rc = [%d]\n", cipher_name, rc); goto out; } - if (key_size > crypto_tfm_alg_max_keysize(*tfm)) { - rc = -EINVAL; - printk(KERN_ERR "Request key size is [%Zd]; maximum key size " - "supported by cipher [%s] is [%d]\n", key_size, - cipher_name, crypto_tfm_alg_min_keysize(*tfm)); - goto out; - } - if (key_size > crypto_tfm_alg_max_keysize(*key_tfm)) { - rc = -EINVAL; - printk(KERN_ERR "Request key size is [%Zd]; maximum key size " - "supported by cipher [%s] is [%d]\n", key_size, - cipher_name, crypto_tfm_alg_min_keysize(*key_tfm)); - goto out; - } - get_random_bytes(dummy_key, key_size); - rc = crypto_cipher_setkey(*tfm, dummy_key, key_size); - if (rc) { - printk(KERN_ERR "Error attempting to set key of size [%Zd] for " - "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); - rc = -EINVAL; - goto out; + crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY); + if (*key_size == 0) { + struct blkcipher_alg *alg = crypto_blkcipher_alg(*key_tfm); + + *key_size = alg->max_keysize; } - rc = crypto_cipher_setkey(*key_tfm, dummy_key, key_size); + get_random_bytes(dummy_key, *key_size); + rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size); if (rc) { printk(KERN_ERR "Error attempting to set key of size [%Zd] for " - "cipher [%s]; rc = [%d]\n", key_size, cipher_name, rc); + "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc); rc = -EINVAL; goto out; } diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index f0d2a43..52d1e36 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -24,6 +24,7 @@ #include <linux/dcache.h> #include <linux/namei.h> +#include <linux/mount.h> #include "ecryptfs_kernel.h" /** @@ -56,6 +57,12 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); nd->dentry = dentry_save; nd->mnt = vfsmount_save; + if (dentry->d_inode) { + struct inode *lower_inode = + ecryptfs_inode_to_lower(dentry->d_inode); + + ecryptfs_copy_attr_all(dentry->d_inode, lower_inode); + } out: return rc; } @@ -76,8 +83,13 @@ static void ecryptfs_d_release(struct dentry *dentry) if (ecryptfs_dentry_to_private(dentry)) kmem_cache_free(ecryptfs_dentry_info_cache, ecryptfs_dentry_to_private(dentry)); - if (lower_dentry) + if (lower_dentry) { + struct vfsmount *lower_mnt = + ecryptfs_dentry_to_lower_mnt(dentry); + + mntput(lower_mnt); dput(lower_dentry); + } return; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 872c995..f992533 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -175,6 +175,7 @@ ecryptfs_get_key_payload_data(struct key *key) #define ECRYPTFS_DEFAULT_CIPHER "aes" #define ECRYPTFS_DEFAULT_KEY_BYTES 16 #define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC +#define ECRYPTFS_DEFAULT_HASH "md5" #define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C #define ECRYPTFS_TAG_11_PACKET_TYPE 0xED #define MD5_DIGEST_SIZE 16 @@ -204,15 +205,15 @@ struct ecryptfs_crypt_stat { size_t extent_shift; unsigned int extent_mask; struct ecryptfs_mount_crypt_stat *mount_crypt_stat; - struct crypto_tfm *tfm; - struct crypto_tfm *md5_tfm; /* Crypto context for generating - * the initialization vectors */ + struct crypto_blkcipher *tfm; + struct crypto_hash *hash_tfm; /* Crypto context for generating + * the initialization vectors */ unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX]; struct mutex cs_tfm_mutex; - struct mutex cs_md5_tfm_mutex; + struct mutex cs_hash_tfm_mutex; struct mutex cs_mutex; }; @@ -244,7 +245,7 @@ struct ecryptfs_mount_crypt_stat { struct ecryptfs_auth_tok *global_auth_tok; struct key *global_auth_tok_key; size_t global_default_cipher_key_size; - struct crypto_tfm *global_key_tfm; + struct crypto_blkcipher *global_key_tfm; struct mutex global_key_tfm_mutex; unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; @@ -425,6 +426,9 @@ void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat); void ecryptfs_destruct_mount_crypt_stat( struct ecryptfs_mount_crypt_stat *mount_crypt_stat); int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_crypto_api_algify_cipher_name(char **algified_name, + char *cipher_name, + char *chaining_modifier); int ecryptfs_write_inode_size_to_header(struct file *lower_file, struct inode *lower_inode, struct inode *inode); @@ -473,10 +477,14 @@ ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, unsigned char *src, struct dentry *ecryptfs_dentry); int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); int -ecryptfs_process_cipher(struct crypto_tfm **tfm, struct crypto_tfm **key_tfm, - char *cipher_name, size_t key_size); +ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name, + size_t *key_size); int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); int ecryptfs_inode_set(struct inode *inode, void *lower_inode); void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); +int ecryptfs_open_lower_file(struct file **lower_file, + struct dentry *lower_dentry, + struct vfsmount *lower_mnt, int flags); +int ecryptfs_close_lower_file(struct file *lower_file); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index c8550c9..a92ef05 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -198,6 +198,33 @@ retry: struct kmem_cache *ecryptfs_file_info_cache; +int ecryptfs_open_lower_file(struct file **lower_file, + struct dentry *lower_dentry, + struct vfsmount *lower_mnt, int flags) +{ + int rc = 0; + + dget(lower_dentry); + mntget(lower_mnt); + *lower_file = dentry_open(lower_dentry, lower_mnt, flags); + if (IS_ERR(*lower_file)) { + printk(KERN_ERR "Error opening lower file for lower_dentry " + "[0x%p], lower_mnt [0x%p], and flags [0x%x]\n", + lower_dentry, lower_mnt, flags); + rc = PTR_ERR(*lower_file); + *lower_file = NULL; + goto out; + } +out: + return rc; +} + +int ecryptfs_close_lower_file(struct file *lower_file) +{ + fput(lower_file); + return 0; +} + /** * ecryptfs_open * @inode: inode speciying file to open @@ -244,19 +271,15 @@ static int ecryptfs_open(struct inode *inode, struct file *file) ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); - /* This mntget & dget is undone via fput when the file is released */ - dget(lower_dentry); lower_flags = file->f_flags; if ((lower_flags & O_ACCMODE) == O_WRONLY) lower_flags = (lower_flags & O_ACCMODE) | O_RDWR; if (file->f_flags & O_APPEND) lower_flags &= ~O_APPEND; lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); - mntget(lower_mnt); /* Corresponding fput() in ecryptfs_release() */ - lower_file = dentry_open(lower_dentry, lower_mnt, lower_flags); - if (IS_ERR(lower_file)) { - rc = PTR_ERR(lower_file); + if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt, + lower_flags))) { ecryptfs_printk(KERN_ERR, "Error opening lower file\n"); goto out_puts; } @@ -341,11 +364,16 @@ static int ecryptfs_release(struct inode *inode, struct file *file) struct file *lower_file = ecryptfs_file_to_lower(file); struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file); struct inode *lower_inode = ecryptfs_inode_to_lower(inode); + int rc; - fput(lower_file); + if ((rc = ecryptfs_close_lower_file(lower_file))) { + printk(KERN_ERR "Error closing lower_file\n"); + goto out; + } inode->i_blocks = lower_inode->i_blocks; kmem_cache_free(ecryptfs_file_info_cache, file_info); - return 0; +out: + return rc; } static int diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index efdd2b7..dfcc684 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -231,7 +231,6 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) int lower_flags; struct ecryptfs_crypt_stat *crypt_stat; struct dentry *lower_dentry; - struct dentry *tlower_dentry = NULL; struct file *lower_file; struct inode *inode, *lower_inode; struct vfsmount *lower_mnt; @@ -241,30 +240,19 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) lower_dentry->d_name.name); inode = ecryptfs_dentry->d_inode; crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; - tlower_dentry = dget(lower_dentry); - if (!tlower_dentry) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry\n"); - goto out; - } lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR; #if BITS_PER_LONG != 32 lower_flags |= O_LARGEFILE; #endif lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); - mntget(lower_mnt); /* Corresponding fput() at end of this function */ - lower_file = dentry_open(tlower_dentry, lower_mnt, lower_flags); - if (IS_ERR(lower_file)) { - rc = PTR_ERR(lower_file); + if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt, + lower_flags))) { ecryptfs_printk(KERN_ERR, "Error opening dentry; rc = [%i]\n", rc); goto out; } - /* fput(lower_file) should handle the puts if we do this */ - lower_file->f_dentry = tlower_dentry; - lower_file->f_vfsmnt = lower_mnt; - lower_inode = tlower_dentry->d_inode; + lower_inode = lower_dentry->d_inode; if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED); @@ -285,7 +273,8 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) } rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode); out_fput: - fput(lower_file); + if ((rc = ecryptfs_close_lower_file(lower_file))) + printk(KERN_ERR "Error closing lower_file\n"); out: return rc; } @@ -336,7 +325,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *lower_dir_dentry; struct dentry *lower_dentry; struct vfsmount *lower_mnt; - struct dentry *tlower_dentry = NULL; char *encoded_name; unsigned int encoded_namelen; struct ecryptfs_crypt_stat *crypt_stat = NULL; @@ -347,27 +335,32 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); dentry->d_op = &ecryptfs_dops; if ((dentry->d_name.len == 1 && !strcmp(dentry->d_name.name, ".")) - || (dentry->d_name.len == 2 && !strcmp(dentry->d_name.name, ".."))) - goto out_drop; + || (dentry->d_name.len == 2 + && !strcmp(dentry->d_name.name, ".."))) { + d_drop(dentry); + goto out; + } encoded_namelen = ecryptfs_encode_filename(crypt_stat, dentry->d_name.name, dentry->d_name.len, &encoded_name); if (encoded_namelen < 0) { rc = encoded_namelen; - goto out_drop; + d_drop(dentry); + goto out; } ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen " "= [%d]\n", encoded_name, encoded_namelen); lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry, encoded_namelen - 1); kfree(encoded_name); - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); if (IS_ERR(lower_dentry)) { ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n"); rc = PTR_ERR(lower_dentry); - goto out_drop; + d_drop(dentry); + goto out; } + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); ecryptfs_printk(KERN_DEBUG, "lower_dentry = [%p]; lower_dentry->" "d_name.name = [%s]\n", lower_dentry, lower_dentry->d_name.name); @@ -408,12 +401,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, "as we *think* we are about to unlink\n"); goto out; } - tlower_dentry = dget(lower_dentry); - if (!tlower_dentry || IS_ERR(tlower_dentry)) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Cannot dget lower_dentry\n"); - goto out_dput; - } /* Released in this function */ page_virt = (char *)kmem_cache_alloc(ecryptfs_header_cache_2, @@ -425,7 +412,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, goto out_dput; } memset(page_virt, 0, PAGE_CACHE_SIZE); - rc = ecryptfs_read_header_region(page_virt, tlower_dentry, nd->mnt); + rc = ecryptfs_read_header_region(page_virt, lower_dentry, nd->mnt); crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) ecryptfs_set_default_sizes(crypt_stat); @@ -448,9 +435,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, out_dput: dput(lower_dentry); - if (tlower_dentry) - dput(tlower_dentry); -out_drop: d_drop(dentry); out: return ERR_PTR(rc); @@ -486,8 +470,9 @@ out_lock: unlock_dir(lower_dir_dentry); dput(lower_new_dentry); dput(lower_old_dentry); - if (!new_dentry->d_inode) - d_drop(new_dentry); + d_drop(lower_old_dentry); + d_drop(new_dentry); + d_drop(old_dentry); return rc; } @@ -500,7 +485,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) lock_parent(lower_dentry); rc = vfs_unlink(lower_dir_inode, lower_dentry); if (rc) { - ecryptfs_printk(KERN_ERR, "Error in vfs_unlink\n"); + printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); goto out_unlock; } ecryptfs_copy_attr_times(dir, lower_dir_inode); @@ -576,41 +561,24 @@ out: static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) { - int rc = 0; - struct dentry *tdentry = NULL; struct dentry *lower_dentry; - struct dentry *tlower_dentry = NULL; struct dentry *lower_dir_dentry; + int rc; lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (!(tdentry = dget(dentry))) { - rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Error dget'ing dentry [%p]\n", - dentry); - goto out; - } + dget(dentry); lower_dir_dentry = lock_parent(lower_dentry); - if (!(tlower_dentry = dget(lower_dentry))) { - rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Error dget'ing lower_dentry " - "[%p]\n", lower_dentry); - goto out; - } + dget(lower_dentry); rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); - if (!rc) { - d_delete(tlower_dentry); - tlower_dentry = NULL; - } + dput(lower_dentry); + if (!rc) + d_delete(lower_dentry); ecryptfs_copy_attr_times(dir, lower_dir_dentry->d_inode); dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; unlock_dir(lower_dir_dentry); if (!rc) d_drop(dentry); -out: - if (tdentry) - dput(tdentry); - if (tlower_dentry) - dput(tlower_dentry); + dput(dentry); return rc; } @@ -663,6 +631,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, ecryptfs_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); out_lock: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dput(lower_new_dentry->d_parent); + dput(lower_old_dentry->d_parent); dput(lower_new_dentry); dput(lower_old_dentry); return rc; @@ -832,12 +802,11 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) } lower_dentry = ecryptfs_dentry_to_lower(dentry); /* This dget & mntget is released through fput at out_fput: */ - dget(lower_dentry); lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); - mntget(lower_mnt); - lower_file = dentry_open(lower_dentry, lower_mnt, O_RDWR); - if (unlikely(IS_ERR(lower_file))) { - rc = PTR_ERR(lower_file); + if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt, + O_RDWR))) { + ecryptfs_printk(KERN_ERR, + "Error opening dentry; rc = [%i]\n", rc); goto out_free; } ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file); @@ -879,7 +848,8 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) = CURRENT_TIME; mark_inode_dirty_sync(inode); out_fput: - fput(lower_file); + if ((rc = ecryptfs_close_lower_file(lower_file))) + printk(KERN_ERR "Error closing lower_file\n"); out_free: if (ecryptfs_file_to_private(&fake_ecryptfs_file)) kmem_cache_free(ecryptfs_file_info_cache, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index ba45478..c3746f5 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -458,14 +458,16 @@ out: static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat) { - int rc = 0; struct ecryptfs_password *password_s_ptr; - struct crypto_tfm *tfm = NULL; struct scatterlist src_sg[2], dst_sg[2]; struct mutex *tfm_mutex = NULL; /* TODO: Use virt_to_scatterlist for these */ char *encrypted_session_key; char *session_key; + struct blkcipher_desc desc = { + .flags = CRYPTO_TFM_REQ_MAY_SLEEP + }; + int rc = 0; password_s_ptr = &auth_tok->token.password; if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags, @@ -482,30 +484,37 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, if (!strcmp(crypt_stat->cipher, crypt_stat->mount_crypt_stat->global_default_cipher_name) && crypt_stat->mount_crypt_stat->global_key_tfm) { - tfm = crypt_stat->mount_crypt_stat->global_key_tfm; + desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm; tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; } else { - tfm = crypto_alloc_tfm(crypt_stat->cipher, - CRYPTO_TFM_REQ_WEAK_KEY); - if (!tfm) { - printk(KERN_ERR "Error allocating crypto context\n"); - rc = -ENOMEM; + char *full_alg_name; + + rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, + crypt_stat->cipher, + "ecb"); + if (rc) + goto out; + desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0, + CRYPTO_ALG_ASYNC); + kfree(full_alg_name); + if (IS_ERR(desc.tfm)) { + rc = PTR_ERR(desc.tfm); + printk(KERN_ERR "Error allocating crypto context; " + "rc = [%d]\n", rc); goto out; } - } - if (password_s_ptr->session_key_encryption_key_bytes - < crypto_tfm_alg_min_keysize(tfm)) { - printk(KERN_WARNING "Session key encryption key is [%d] bytes; " - "minimum keysize for selected cipher is [%d] bytes.\n", - password_s_ptr->session_key_encryption_key_bytes, - crypto_tfm_alg_min_keysize(tfm)); - rc = -EINVAL; - goto out; + crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY); } if (tfm_mutex) mutex_lock(tfm_mutex); - crypto_cipher_setkey(tfm, password_s_ptr->session_key_encryption_key, - crypt_stat->key_size); + rc = crypto_blkcipher_setkey(desc.tfm, + password_s_ptr->session_key_encryption_key, + crypt_stat->key_size); + if (rc < 0) { + printk(KERN_ERR "Error setting key for crypto context\n"); + rc = -EINVAL; + goto out_free_tfm; + } /* TODO: virt_to_scatterlist */ encrypted_session_key = (char *)__get_free_page(GFP_KERNEL); if (!encrypted_session_key) { @@ -531,9 +540,12 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, auth_tok->session_key.decrypted_key_size = auth_tok->session_key.encrypted_key_size; dst_sg[0].length = auth_tok->session_key.encrypted_key_size; - /* TODO: Handle error condition */ - crypto_cipher_decrypt(tfm, dst_sg, src_sg, - auth_tok->session_key.encrypted_key_size); + rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg, + auth_tok->session_key.encrypted_key_size); + if (rc) { + printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc); + goto out_free_memory; + } auth_tok->session_key.decrypted_key_size = auth_tok->session_key.encrypted_key_size; memcpy(auth_tok->session_key.decrypted_key, session_key, @@ -546,6 +558,7 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok, if (ecryptfs_verbosity > 0) ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); +out_free_memory: memset(encrypted_session_key, 0, PAGE_CACHE_SIZE); free_page((unsigned long)encrypted_session_key); memset(session_key, 0, PAGE_CACHE_SIZE); @@ -554,7 +567,7 @@ out_free_tfm: if (tfm_mutex) mutex_unlock(tfm_mutex); else - crypto_free_tfm(tfm); + crypto_free_blkcipher(desc.tfm); out: return rc; } @@ -803,19 +816,21 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat, struct ecryptfs_key_record *key_rec, size_t *packet_size) { - int rc = 0; - size_t i; size_t signature_is_valid = 0; size_t encrypted_session_key_valid = 0; char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; struct scatterlist dest_sg[2]; struct scatterlist src_sg[2]; - struct crypto_tfm *tfm = NULL; struct mutex *tfm_mutex = NULL; size_t key_rec_size; size_t packet_size_length; size_t cipher_code; + struct blkcipher_desc desc = { + .tfm = NULL, + .flags = CRYPTO_TFM_REQ_MAY_SLEEP + }; + int rc = 0; (*packet_size) = 0; /* Check for a valid signature on the auth_tok */ @@ -882,33 +897,48 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok, if (!strcmp(crypt_stat->cipher, crypt_stat->mount_crypt_stat->global_default_cipher_name) && crypt_stat->mount_crypt_stat->global_key_tfm) { - tfm = crypt_stat->mount_crypt_stat->global_key_tfm; + desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm; tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex; - } else - tfm = crypto_alloc_tfm(crypt_stat->cipher, 0); - if (!tfm) { - ecryptfs_printk(KERN_ERR, "Could not initialize crypto " - "context for cipher [%s]\n", - crypt_stat->cipher); - rc = -EINVAL; - goto out; + } else { + char *full_alg_name; + + rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name, + crypt_stat->cipher, + "ecb"); + if (rc) + goto out; + desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0, + CRYPTO_ALG_ASYNC); + kfree(full_alg_name); + if (IS_ERR(desc.tfm)) { + rc = PTR_ERR(desc.tfm); + ecryptfs_printk(KERN_ERR, "Could not initialize crypto " + "context for cipher [%s]; rc = [%d]\n", + crypt_stat->cipher, rc); + goto out; + } + crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY); } if (tfm_mutex) mutex_lock(tfm_mutex); - rc = crypto_cipher_setkey(tfm, session_key_encryption_key, - crypt_stat->key_size); + rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key, + crypt_stat->key_size); if (rc < 0) { if (tfm_mutex) mutex_unlock(tfm_mutex); ecryptfs_printk(KERN_ERR, "Error setting key for crypto " - "context\n"); + "context; rc = [%d]\n", rc); goto out; } rc = 0; ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", crypt_stat->key_size); - crypto_cipher_encrypt(tfm, dest_sg, src_sg, - (*key_rec).enc_key_size); + rc = crypto_blkcipher_encrypt(&desc, dest_sg, src_sg, + (*key_rec).enc_key_size); + if (rc) { + printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc); + goto out; + } if (tfm_mutex) mutex_unlock(tfm_mutex); ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); @@ -971,8 +1001,8 @@ encrypted_session_key_set: (*key_rec).enc_key_size); (*packet_size) += (*key_rec).enc_key_size; out: - if (tfm && !tfm_mutex) - crypto_free_tfm(tfm); + if (desc.tfm && !tfm_mutex) + crypto_free_blkcipher(desc.tfm); if (rc) (*packet_size) = 0; return rc; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 5938a23..a78d87d 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -208,7 +208,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) char *cipher_name_dst; char *cipher_name_src; char *cipher_key_bytes_src; - struct crypto_tfm *tmp_tfm; int cipher_name_len; if (!options) { @@ -305,25 +304,19 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) = '\0'; } if (!cipher_key_bytes_set) { - mount_crypt_stat->global_default_cipher_key_size = - ECRYPTFS_DEFAULT_KEY_BYTES; - ecryptfs_printk(KERN_DEBUG, "Cipher key size was not " - "specified. Defaulting to [%d]\n", - mount_crypt_stat-> - global_default_cipher_key_size); + mount_crypt_stat->global_default_cipher_key_size = 0; } rc = ecryptfs_process_cipher( - &tmp_tfm, &mount_crypt_stat->global_key_tfm, mount_crypt_stat->global_default_cipher_name, - mount_crypt_stat->global_default_cipher_key_size); - if (tmp_tfm) - crypto_free_tfm(tmp_tfm); + &mount_crypt_stat->global_default_cipher_key_size); if (rc) { printk(KERN_ERR "Error attempting to initialize cipher [%s] " "with key size [%Zd] bytes; rc = [%d]\n", mount_crypt_stat->global_default_cipher_name, mount_crypt_stat->global_default_cipher_key_size, rc); + mount_crypt_stat->global_key_tfm = NULL; + mount_crypt_stat->global_auth_tok_key = NULL; rc = -EINVAL; goto out; } diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index c337c04..825757a 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -138,23 +138,6 @@ static void ecryptfs_clear_inode(struct inode *inode) } /** - * ecryptfs_umount_begin - * - * Called in do_umount(). - */ -static void ecryptfs_umount_begin(struct vfsmount *vfsmnt, int flags) -{ - struct vfsmount *lower_mnt = - ecryptfs_dentry_to_lower_mnt(vfsmnt->mnt_sb->s_root); - struct super_block *lower_sb; - - mntput(lower_mnt); - lower_sb = lower_mnt->mnt_sb; - if (lower_sb->s_op->umount_begin) - lower_sb->s_op->umount_begin(lower_mnt, flags); -} - -/** * ecryptfs_show_options * * Prints the directory we are currently mounted over. @@ -193,6 +176,5 @@ struct super_operations ecryptfs_sops = { .statfs = ecryptfs_statfs, .remount_fs = NULL, .clear_inode = ecryptfs_clear_inode, - .umount_begin = ecryptfs_umount_begin, .show_options = ecryptfs_show_options }; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1e95780..4fe49c3 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -69,44 +69,49 @@ static int verify_group_input(struct super_block *sb, else if (outside(input->block_bitmap, start, end)) ext4_warning(sb, __FUNCTION__, "Block bitmap not in group (block %llu)", - input->block_bitmap); + (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) ext4_warning(sb, __FUNCTION__, "Inode bitmap not in group (block %llu)", - input->inode_bitmap); + (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) ext4_warning(sb, __FUNCTION__, "Inode table not in group (blocks %llu-%llu)", - input->inode_table, itend - 1); + (unsigned long long)input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) ext4_warning(sb, __FUNCTION__, "Block bitmap same as inode bitmap (%llu)", - input->block_bitmap); + (unsigned long long)input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) ext4_warning(sb, __FUNCTION__, "Block bitmap (%llu) in inode table (%llu-%llu)", - input->block_bitmap, input->inode_table, itend-1); + (unsigned long long)input->block_bitmap, + (unsigned long long)input->inode_table, itend - 1); else if (inside(input->inode_bitmap, input->inode_table, itend)) ext4_warning(sb, __FUNCTION__, "Inode bitmap (%llu) in inode table (%llu-%llu)", - input->inode_bitmap, input->inode_table, itend-1); + (unsigned long long)input->inode_bitmap, + (unsigned long long)input->inode_table, itend - 1); else if (inside(input->block_bitmap, start, metaend)) ext4_warning(sb, __FUNCTION__, "Block bitmap (%llu) in GDT table" " (%llu-%llu)", - input->block_bitmap, start, metaend - 1); + (unsigned long long)input->block_bitmap, + start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) ext4_warning(sb, __FUNCTION__, "Inode bitmap (%llu) in GDT table" " (%llu-%llu)", - input->inode_bitmap, start, metaend - 1); + (unsigned long long)input->inode_bitmap, + start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) ext4_warning(sb, __FUNCTION__, "Inode table (%llu-%llu) overlaps" "GDT table (%llu-%llu)", - input->inode_table, itend - 1, start, metaend - 1); + (unsigned long long)input->inode_table, + itend - 1, start, metaend - 1); else err = 0; brelse(bh); diff --git a/fs/fat/file.c b/fs/fat/file.c index 8337451..0aa813d 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -303,7 +303,17 @@ void fat_truncate(struct inode *inode) fat_flush_inodes(inode->i_sb, inode, NULL); } +int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size; + return 0; +} +EXPORT_SYMBOL_GPL(fat_getattr); + struct inode_operations fat_file_inode_operations = { .truncate = fat_truncate, .setattr = fat_notify_change, + .getattr = fat_getattr, }; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index cfc8f81..c71a6c0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -138,6 +138,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) struct fuse_entry_out outarg; struct fuse_conn *fc; struct fuse_req *req; + struct fuse_req *forget_req; struct dentry *parent; /* Doesn't hurt to "reset" the validity timeout */ @@ -152,25 +153,33 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (IS_ERR(req)) return 0; + forget_req = fuse_get_req(fc); + if (IS_ERR(forget_req)) { + fuse_put_request(fc, req); + return 0; + } + parent = dget_parent(entry); fuse_lookup_init(req, parent->d_inode, entry, &outarg); request_send(fc, req); dput(parent); err = req->out.h.error; + fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT */ if (!err && !outarg.nodeid) err = -ENOENT; if (!err) { struct fuse_inode *fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { - fuse_send_forget(fc, req, outarg.nodeid, 1); + fuse_send_forget(fc, forget_req, + outarg.nodeid, 1); return 0; } spin_lock(&fc->lock); fi->nlookup ++; spin_unlock(&fc->lock); } - fuse_put_request(fc, req); + fuse_put_request(fc, forget_req); if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) return 0; @@ -221,6 +230,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct inode *inode = NULL; struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; + struct fuse_req *forget_req; if (entry->d_name.len > FUSE_NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -229,9 +239,16 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, if (IS_ERR(req)) return ERR_PTR(PTR_ERR(req)); + forget_req = fuse_get_req(fc); + if (IS_ERR(forget_req)) { + fuse_put_request(fc, req); + return ERR_PTR(PTR_ERR(forget_req)); + } + fuse_lookup_init(req, dir, entry, &outarg); request_send(fc, req); err = req->out.h.error; + fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT, but with valid timeout */ if (!err && outarg.nodeid && (invalid_nodeid(outarg.nodeid) || !valid_mode(outarg.attr.mode))) @@ -240,11 +257,11 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); if (!inode) { - fuse_send_forget(fc, req, outarg.nodeid, 1); + fuse_send_forget(fc, forget_req, outarg.nodeid, 1); return ERR_PTR(-ENOMEM); } } - fuse_put_request(fc, req); + fuse_put_request(fc, forget_req); if (err && err != -ENOENT) return ERR_PTR(err); @@ -388,6 +405,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, struct fuse_entry_out outarg; struct inode *inode; int err; + struct fuse_req *forget_req; + + forget_req = fuse_get_req(fc); + if (IS_ERR(forget_req)) { + fuse_put_request(fc, req); + return PTR_ERR(forget_req); + } req->in.h.nodeid = get_node_id(dir); req->out.numargs = 1; @@ -395,24 +419,24 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; - if (err) { - fuse_put_request(fc, req); - return err; - } + fuse_put_request(fc, req); + if (err) + goto out_put_forget_req; + err = -EIO; if (invalid_nodeid(outarg.nodeid)) - goto out_put_request; + goto out_put_forget_req; if ((outarg.attr.mode ^ mode) & S_IFMT) - goto out_put_request; + goto out_put_forget_req; inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); if (!inode) { - fuse_send_forget(fc, req, outarg.nodeid, 1); + fuse_send_forget(fc, forget_req, outarg.nodeid, 1); return -ENOMEM; } - fuse_put_request(fc, req); + fuse_put_request(fc, forget_req); if (S_ISDIR(inode->i_mode)) { struct dentry *alias; @@ -434,8 +458,8 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, fuse_invalidate_attr(dir); return 0; - out_put_request: - fuse_put_request(fc, req); + out_put_forget_req: + fuse_put_request(fc, forget_req); return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2bb5ace..763a50d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -397,14 +397,14 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, err = -EIO; if (is_bad_inode(inode)) - goto clean_pages_up; + goto out; data.file = file; data.inode = inode; data.req = fuse_get_req(fc); err = PTR_ERR(data.req); if (IS_ERR(data.req)) - goto clean_pages_up; + goto out; err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); if (!err) { @@ -413,10 +413,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, else fuse_put_request(fc, data.req); } - return err; - -clean_pages_up: - put_pages_list(pages); +out: return err; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 57c43ac..d470e52 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -157,6 +157,9 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, struct gfs2_glock *io_gl; int error; + if (!inode) + return ERR_PTR(-ENOBUFS); + if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); umode_t mode = DT2IF(type); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 21508a1..9889c1e 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -84,8 +84,8 @@ static int __init init_gfs2_fs(void) gfs2_inode_cachep = kmem_cache_create("gfs2_inode", sizeof(struct gfs2_inode), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_PANIC|SLAB_MEM_SPREAD), + 0, SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD, gfs2_init_inode_once, NULL); if (!gfs2_inode_cachep) goto fail; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 8d5963c..015640b 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -337,13 +337,6 @@ out: out_noerror: ret = 0; out_unlock: - /* unlock all pages, we can't do any I/O right now */ - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - list_del(&page->lru); - unlock_page(page); - page_cache_release(page); - } if (do_unlock) gfs2_holder_uninit(&gh); goto out; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 06f06f7..b47d959 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -138,16 +138,27 @@ static void gfs2_put_super(struct super_block *sb) } /** - * gfs2_write_super - disk commit all incore transactions - * @sb: the filesystem + * gfs2_write_super + * @sb: the superblock * - * This function is called every time sync(2) is called. - * After this exits, all dirty buffers are synced. */ static void gfs2_write_super(struct super_block *sb) { + sb->s_dirt = 0; +} + +/** + * gfs2_sync_fs - sync the filesystem + * @sb: the superblock + * + * Flushes the log to disk. + */ +static int gfs2_sync_fs(struct super_block *sb, int wait) +{ + sb->s_dirt = 0; gfs2_log_flush(sb->s_fs_info, NULL); + return 0; } /** @@ -452,17 +463,18 @@ static void gfs2_destroy_inode(struct inode *inode) } struct super_operations gfs2_super_ops = { - .alloc_inode = gfs2_alloc_inode, - .destroy_inode = gfs2_destroy_inode, - .write_inode = gfs2_write_inode, - .delete_inode = gfs2_delete_inode, - .put_super = gfs2_put_super, - .write_super = gfs2_write_super, - .write_super_lockfs = gfs2_write_super_lockfs, - .unlockfs = gfs2_unlockfs, - .statfs = gfs2_statfs, - .remount_fs = gfs2_remount_fs, - .clear_inode = gfs2_clear_inode, - .show_options = gfs2_show_options, + .alloc_inode = gfs2_alloc_inode, + .destroy_inode = gfs2_destroy_inode, + .write_inode = gfs2_write_inode, + .delete_inode = gfs2_delete_inode, + .put_super = gfs2_put_super, + .write_super = gfs2_write_super, + .sync_fs = gfs2_sync_fs, + .write_super_lockfs = gfs2_write_super_lockfs, + .unlockfs = gfs2_unlockfs, + .statfs = gfs2_statfs, + .remount_fs = gfs2_remount_fs, + .clear_inode = gfs2_clear_inode, + .show_options = gfs2_show_options, }; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index d43b4fc..85b17b3 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -390,11 +390,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) hfs_find_exit(&fd); goto bail_no_root; } + res = -EINVAL; root_inode = hfs_iget(sb, &fd.search_key->cat, &rec); hfs_find_exit(&fd); if (!root_inode) goto bail_no_root; + res = -ENOMEM; sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) goto bail_iput; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 4ee3f00..7f47569 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -62,24 +62,19 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; - if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1)) - return -EINVAL; - - if (vma->vm_start & ~HPAGE_MASK) - return -EINVAL; - - if (vma->vm_end & ~HPAGE_MASK) - return -EINVAL; - - if (vma->vm_end - vma->vm_start < HPAGE_SIZE) - return -EINVAL; + /* + * vma alignment has already been checked by prepare_hugepage_range. + * If you add any error returns here, do so after setting VM_HUGETLB, + * so is_vm_hugetlb_page tests below unmap_region go the right way + * when do_mmap_pgoff unwinds (may be important on powerpc and ia64). + */ + vma->vm_flags |= VM_HUGETLB | VM_RESERVED; + vma->vm_ops = &hugetlb_vm_ops; vma_len = (loff_t)(vma->vm_end - vma->vm_start); mutex_lock(&inode->i_mutex); file_accessed(file); - vma->vm_flags |= VM_HUGETLB | VM_RESERVED; - vma->vm_ops = &hugetlb_vm_ops; ret = -ENOMEM; len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); @@ -271,26 +266,24 @@ static void hugetlbfs_drop_inode(struct inode *inode) hugetlbfs_forget_inode(inode); } -/* - * h_pgoff is in HPAGE_SIZE units. - * vma->vm_pgoff is in PAGE_SIZE units. - */ static inline void -hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) +hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) { struct vm_area_struct *vma; struct prio_tree_iter iter; - vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { - unsigned long h_vm_pgoff; + vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { unsigned long v_offset; - h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); - v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT; /* - * Is this VMA fully outside the truncation point? + * Can the expression below overflow on 32-bit arches? + * No, because the prio_tree returns us only those vmas + * which overlap the truncated area starting at pgoff, + * and no vma on a 32-bit arch can span beyond the 4GB. */ - if (h_vm_pgoff >= h_pgoff) + if (vma->vm_pgoff < pgoff) + v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; + else v_offset = 0; __unmap_hugepage_range(vma, @@ -303,14 +296,14 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) */ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) { - unsigned long pgoff; + pgoff_t pgoff; struct address_space *mapping = inode->i_mapping; if (offset > inode->i_size) return -EINVAL; BUG_ON(offset & ~HPAGE_MASK); - pgoff = offset >> HPAGE_SHIFT; + pgoff = offset >> PAGE_SHIFT; inode->i_size = offset; spin_lock(&mapping->i_mmap_lock); @@ -624,7 +617,6 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) do_div(size, 100); rest++; } - size &= HPAGE_MASK; pconfig->nr_blocks = (size >> HPAGE_SHIFT); value = rest; } else if (!strcmp(opt,"nr_inodes")) { @@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode) wake_up_bit(&inode->i_state, __I_LOCK); } +/* + * We rarely want to lock two inodes that do not have a parent/child + * relationship (such as directory, child inode) simultaneously. The + * vast majority of file systems should be able to get along fine + * without this. Do not use these functions except as a last resort. + */ +void inode_double_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { + if (inode1) + mutex_lock(&inode1->i_mutex); + else if (inode2) + mutex_lock(&inode2->i_mutex); + return; + } + + if (inode1 < inode2) { + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); + } +} +EXPORT_SYMBOL(inode_double_lock); + +void inode_double_unlock(struct inode *inode1, struct inode *inode2) +{ + if (inode1) + mutex_unlock(&inode1->i_mutex); + + if (inode2 && inode2 != inode1) + mutex_unlock(&inode2->i_mutex); +} +EXPORT_SYMBOL(inode_double_unlock); + static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index d5c6304..4f82bcd6 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -967,6 +967,13 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) */ jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + + /* Now that we have bh_state locked, are we really still mapped? */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); + goto no_journal; + } + if (jh->b_transaction) { JBUFFER_TRACE(jh, "has transaction"); if (jh->b_transaction != handle->h_transaction) { @@ -1028,6 +1035,11 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) sync_dirty_buffer(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + /* Since we dropped the lock... */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "buffer got unmapped"); + goto no_journal; + } /* The buffer may become locked again at any time if it is redirtied */ } @@ -1824,6 +1836,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } } } else if (transaction == journal->j_committing_transaction) { + JBUFFER_TRACE(jh, "on committing transaction"); if (jh->b_jlist == BJ_Locked) { /* * The buffer is on the committing transaction's locked @@ -1838,7 +1851,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * can remove it's next_transaction pointer from the * running transaction if that is set, but nothing * else. */ - JBUFFER_TRACE(jh, "on committing transaction"); set_buffer_freed(bh); if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == @@ -1858,6 +1870,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * i_size already for this truncate so recovery will not * expose the disk blocks we are discarding here.) */ J_ASSERT_JH(jh, transaction == journal->j_running_transaction); + JBUFFER_TRACE(jh, "on running transaction"); may_free = __dispose_buffer(jh, transaction); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b6cf2be..c051a94 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -967,6 +967,13 @@ int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) */ jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + + /* Now that we have bh_state locked, are we really still mapped? */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); + goto no_journal; + } + if (jh->b_transaction) { JBUFFER_TRACE(jh, "has transaction"); if (jh->b_transaction != handle->h_transaction) { @@ -1028,6 +1035,11 @@ int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) sync_dirty_buffer(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); + /* Since we dropped the lock... */ + if (!buffer_mapped(bh)) { + JBUFFER_TRACE(jh, "buffer got unmapped"); + goto no_journal; + } /* The buffer may become locked again at any time if it is redirtied */ } @@ -1824,6 +1836,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } } } else if (transaction == journal->j_committing_transaction) { + JBUFFER_TRACE(jh, "on committing transaction"); if (jh->b_jlist == BJ_Locked) { /* * The buffer is on the committing transaction's locked @@ -1838,7 +1851,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * can remove it's next_transaction pointer from the * running transaction if that is set, but nothing * else. */ - JBUFFER_TRACE(jh, "on committing transaction"); set_buffer_freed(bh); if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == @@ -1858,6 +1870,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * i_size already for this truncate so recovery will not * expose the disk blocks we are discarding here.) */ J_ASSERT_JH(jh, transaction == journal->j_running_transaction); + JBUFFER_TRACE(jh, "on running transaction"); may_free = __dispose_buffer(jh, transaction); } diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 34181b8..aa9132d 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -109,6 +109,8 @@ const struct file_operations jfs_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, .ioctl = jfs_ioctl, diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 489a3d6..ee9b473 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -318,7 +318,7 @@ int diRead(struct inode *ip) struct inomap *imap; int block_offset; int inodes_left; - uint pageno; + unsigned long pageno; int rel_inode; jfs_info("diRead: ino = %ld", ip->i_ino); @@ -606,7 +606,7 @@ int diWrite(tid_t tid, struct inode *ip) int block_offset; int inodes_left; struct metapage *mp; - uint pageno; + unsigned long pageno; int rel_inode; int dioffset; struct inode *ipimap; diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 4c7985e..b753ba2 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -756,6 +756,11 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return -EOPNOTSUPP; } +/* + * Most of the permission checking is done by xattr_permission in the vfs. + * The local file system is responsible for handling the system.* namespace. + * We also need to verify that this is a namespace that we recognize. + */ static int can_set_xattr(struct inode *inode, const char *name, const void *value, size_t value_len) { @@ -771,10 +776,6 @@ static int can_set_xattr(struct inode *inode, const char *name, strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) return -EOPNOTSUPP; - if (!S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX)) - return -EPERM; - return 0; } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 6341392..8ca1808 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -353,9 +353,6 @@ EXPORT_SYMBOL(lockd_down); * Sysctl parameters (same as module parameters, different interface). */ -/* Something that isn't CTL_ANY, CTL_NONE or a value that may clash. */ -#define CTL_UNNUMBERED -2 - static ctl_table nlm_sysctls[] = { { .ctl_name = CTL_UNNUMBERED, diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index b0f01b3..4524619 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -654,6 +654,7 @@ static struct inode_operations msdos_dir_inode_operations = { .rmdir = msdos_rmdir, .rename = msdos_rename, .setattr = fat_notify_change, + .getattr = fat_getattr, }; static int msdos_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4133ef5..b34cd16 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -935,8 +935,17 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru no_entry: res = d_materialise_unique(dentry, inode); - if (res != NULL) + if (res != NULL) { + struct dentry *parent; + if (IS_ERR(res)) + goto out_unlock; + /* Was a directory renamed! */ + parent = dget_parent(res); + if (!IS_ROOT(parent)) + nfs_mark_for_revalidate(parent->d_inode); + dput(parent); dentry = res; + } nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: @@ -1132,6 +1141,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) alias = d_materialise_unique(dentry, inode); if (alias != NULL) { dput(dentry); + if (IS_ERR(alias)) + return NULL; dentry = alias; } diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 2fe3403..3ea50ac 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -18,11 +18,6 @@ static const int nfs_set_port_min = 0; static const int nfs_set_port_max = 65535; static struct ctl_table_header *nfs_callback_sysctl_table; -/* - * Something that isn't CTL_ANY, CTL_NONE or a value that may clash. - * Use the same values as fs/lockd/svc.c - */ -#define CTL_UNNUMBERED -2 static ctl_table nfs_cb_sysctls[] = { #ifdef CONFIG_NFS_V4 diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 64db601..7f5bad0 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -258,7 +258,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, /* Now create the file and set attributes */ nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, attr, newfhp, - argp->createmode, argp->verf, NULL); + argp->createmode, argp->verf, NULL, NULL); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0a7bbdc..50bc942 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -93,6 +93,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o { struct svc_fh resfh; __be32 status; + int created = 0; fh_init(&resfh, NFS4_FHSIZE); open->op_truncate = 0; @@ -105,28 +106,27 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, &resfh, open->op_createmode, - (u32 *)open->op_verf.data, &open->op_truncate); - } - else { + (u32 *)open->op_verf.data, &open->op_truncate, &created); + } else { status = nfsd_lookup(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &resfh); fh_unlock(current_fh); } + if (status) + goto out; - if (!status) { - set_change_info(&open->op_cinfo, current_fh); + set_change_info(&open->op_cinfo, current_fh); - /* set reply cache */ - fh_dup2(current_fh, &resfh); - open->op_stateowner->so_replay.rp_openfh_len = - resfh.fh_handle.fh_size; - memcpy(open->op_stateowner->so_replay.rp_openfh, - &resfh.fh_handle.fh_base, - resfh.fh_handle.fh_size); + /* set reply cache */ + fh_dup2(current_fh, &resfh); + open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size; + memcpy(open->op_stateowner->so_replay.rp_openfh, + &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); + if (!created) status = do_open_permission(rqstp, current_fh, open, MAY_NOP); - } +out: fh_put(&resfh); return status; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e9d0770..81b8565 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) * any regular files anyway, just in case the directory was created by * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); - mutex_lock(&dir->d_inode->i_mutex); + mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); status = vfs_rmdir(dir->d_inode, dentry); mutex_unlock(&dir->d_inode->i_mutex); return status; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f21e917..bb4d926 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1177,7 +1177,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, /* * Get the dir op function pointer. */ - err = nfserr_perm; + err = 0; switch (type) { case S_IFREG: host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); @@ -1237,7 +1237,7 @@ __be32 nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, struct iattr *iap, struct svc_fh *resfhp, int createmode, u32 *verifier, - int *truncp) + int *truncp, int *created) { struct dentry *dentry, *dchild = NULL; struct inode *dirp; @@ -1331,6 +1331,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); if (host_err < 0) goto out_nfserr; + if (created) + *created = 1; if (EX_ISSYNC(fhp->fh_export)) { err = nfserrno(nfsd_sync_dir(dentry)); diff --git a/fs/proc/base.c b/fs/proc/base.c index 8df27401..795319c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -442,7 +442,8 @@ static int mountstats_open(struct inode *inode, struct file *file) if (task) { task_lock(task); - namespace = task->nsproxy->namespace; + if (task->nsproxy) + namespace = task->nsproxy->namespace; if (namespace) get_namespace(namespace); task_unlock(task); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index b67ce93..ac14318 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -74,7 +74,8 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) igrab(inode); reiserfs_warning(inode->i_sb, "pinning inode %lu because the " - "preallocation can't be freed"); + "preallocation can't be freed", + inode->i_ino); goto out; } } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 9041802..1724999 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1619,6 +1619,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) "jmacd-8: reiserfs_fill_super: unable to read bitmap"); goto error; } + errval = -EINVAL; #ifdef CONFIG_REISERFS_CHECK SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON"); SWARN(silent, s, "- it is slow mode for debugging."); diff --git a/fs/splice.c b/fs/splice.c index a567010..da74583 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -74,7 +74,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, wait_on_page_writeback(page); if (PagePrivate(page)) - try_to_release_page(page, mapping_gfp_mask(mapping)); + try_to_release_page(page, GFP_KERNEL); /* * If we succeeded in removing the mapping, set LRU flag @@ -333,7 +333,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; error = add_to_page_cache_lru(page, mapping, index, - mapping_gfp_mask(mapping)); + GFP_KERNEL); if (unlikely(error)) { page_cache_release(page); if (error == -EEXIST) @@ -557,7 +557,6 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, { struct file *file = sd->file; struct address_space *mapping = file->f_mapping; - gfp_t gfp_mask = mapping_gfp_mask(mapping); unsigned int offset, this_len; struct page *page; pgoff_t index; @@ -591,7 +590,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, goto find_page; page = buf->page; - if (add_to_page_cache(page, mapping, index, gfp_mask)) { + if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) { unlock_page(page); goto find_page; } @@ -613,7 +612,7 @@ find_page: * This will also lock the page */ ret = add_to_page_cache_lru(page, mapping, index, - gfp_mask); + GFP_KERNEL); if (unlikely(ret)) goto out; } @@ -707,9 +706,9 @@ out_ret: * key here is the 'actor' worker passed in that actually moves the data * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. */ -ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags, - splice_actor *actor) +static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, splice_actor *actor) { int ret, do_wakeup, err; struct splice_desc sd; @@ -722,9 +721,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, sd.file = out; sd.pos = *ppos; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); - for (;;) { if (pipe->nrbufs) { struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; @@ -797,9 +793,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, pipe_wait(pipe); } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); - if (do_wakeup) { smp_mb(); if (waitqueue_active(&pipe->wait)) @@ -810,6 +803,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, return ret; } +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor) +{ + ssize_t ret; + struct inode *inode = out->f_mapping->host; + + /* + * The actor worker might be calling ->prepare_write and + * ->commit_write. Most of the time, these expect i_mutex to + * be held. Since this may result in an ABBA deadlock with + * pipe->inode, we have to order lock acquiry here. + */ + inode_double_lock(inode, pipe->inode); + ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); + inode_double_unlock(inode, pipe->inode); + + return ret; +} + +/** + * generic_file_splice_write_nolock - generic_file_splice_write without mutexes + * @pipe: pipe info + * @out: file to write to + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. The caller is responsible + * for acquiring i_mutex on both inodes. + * + */ +ssize_t +generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + int err; + + err = remove_suid(out->f_dentry); + if (unlikely(err)) + return err; + + ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + if (ret > 0) { + *ppos += ret; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + + if (err) + ret = err; + } + } + + return ret; +} + +EXPORT_SYMBOL(generic_file_splice_write_nolock); + /** * generic_file_splice_write - splice data from a pipe to a file * @pipe: pipe info @@ -826,12 +886,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; ssize_t ret; + int err; + + err = should_remove_suid(out->f_dentry); + if (unlikely(err)) { + mutex_lock(&inode->i_mutex); + err = __remove_suid(out->f_dentry, err); + mutex_unlock(&inode->i_mutex); + if (err) + return err; + } ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { - struct inode *inode = mapping->host; - *ppos += ret; /* @@ -839,8 +908,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); @@ -1042,6 +1109,19 @@ out_release: EXPORT_SYMBOL(do_splice_direct); /* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +static inline struct pipe_inode_info *pipe_info(struct inode *inode) +{ + if (S_ISFIFO(inode->i_mode)) + return inode->i_pipe; + + return NULL; +} + +/* * Determine where to splice to/from. */ static long do_splice(struct file *in, loff_t __user *off_in, @@ -1052,7 +1132,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, loff_t offset, *off; long ret; - pipe = in->f_dentry->d_inode->i_pipe; + pipe = pipe_info(in->f_dentry->d_inode); if (pipe) { if (off_in) return -ESPIPE; @@ -1073,7 +1153,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, return ret; } - pipe = out->f_dentry->d_inode->i_pipe; + pipe = pipe_info(out->f_dentry->d_inode); if (pipe) { if (off_out) return -ESPIPE; @@ -1231,7 +1311,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, static long do_vmsplice(struct file *file, const struct iovec __user *iov, unsigned long nr_segs, unsigned int flags) { - struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe; + struct pipe_inode_info *pipe; struct page *pages[PIPE_BUFFERS]; struct partial_page partial[PIPE_BUFFERS]; struct splice_pipe_desc spd = { @@ -1241,7 +1321,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, .ops = &user_page_pipe_buf_ops, }; - if (unlikely(!pipe)) + pipe = pipe_info(file->f_dentry->d_inode); + if (!pipe) return -EBADF; if (unlikely(nr_segs > UIO_MAXIOV)) return -EINVAL; @@ -1400,13 +1481,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, * grabbing by inode address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - if (ipipe->inode < opipe->inode) { - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); - } + inode_double_lock(ipipe->inode, opipe->inode); do { if (!opipe->readers) { @@ -1450,8 +1525,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, i++; } while (len); - mutex_unlock(&ipipe->inode->i_mutex); - mutex_unlock(&opipe->inode->i_mutex); + inode_double_unlock(ipipe->inode, opipe->inode); /* * If we put data in the output pipe, wakeup any potential readers. @@ -1475,8 +1549,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, static long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { - struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; - struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; + struct pipe_inode_info *ipipe = pipe_info(in->f_dentry->d_inode); + struct pipe_inode_info *opipe = pipe_info(out->f_dentry->d_inode); int ret = -EINVAL; /* diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index edb711f..0afd745 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -1004,6 +1004,7 @@ static struct inode_operations vfat_dir_inode_operations = { .rmdir = vfat_rmdir, .rename = vfat_rename, .setattr = fat_notify_change, + .getattr = fat_getattr, }; static int vfat_fill_super(struct super_block *sb, void *data, int silent) @@ -48,14 +48,21 @@ xattr_permission(struct inode *inode, const char *name, int mask) return 0; /* - * The trusted.* namespace can only accessed by a privilegued user. + * The trusted.* namespace can only be accessed by a privileged user. */ if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + /* In user.* namespace, only regular files and directories can have + * extended attributes. For sticky directories, only the owner and + * privileged user can write attributes. + */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { - if (!S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + return -EPERM; + if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && + (mask & MAY_WRITE) && (current->fsuid != inode->i_uid) && + !capable(CAP_FOWNER)) return -EPERM; } diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index 291948d..b49989b 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -21,22 +21,7 @@ EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char XFS_LINUX := linux-2.6 ifeq ($(CONFIG_XFS_DEBUG),y) - EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG - EXTRA_CFLAGS += -DXFS_BUF_LOCK_TRACKING -endif -ifeq ($(CONFIG_XFS_TRACE),y) - EXTRA_CFLAGS += -DXFS_ALLOC_TRACE - EXTRA_CFLAGS += -DXFS_ATTR_TRACE - EXTRA_CFLAGS += -DXFS_BLI_TRACE - EXTRA_CFLAGS += -DXFS_BMAP_TRACE - EXTRA_CFLAGS += -DXFS_BMBT_TRACE - EXTRA_CFLAGS += -DXFS_DIR2_TRACE - EXTRA_CFLAGS += -DXFS_DQUOT_TRACE - EXTRA_CFLAGS += -DXFS_ILOCK_TRACE - EXTRA_CFLAGS += -DXFS_LOG_TRACE - EXTRA_CFLAGS += -DXFS_RW_TRACE - EXTRA_CFLAGS += -DXFS_BUF_TRACE - EXTRA_CFLAGS += -DXFS_VNODE_TRACE + EXTRA_CFLAGS += -g endif obj-$(CONFIG_XFS_FS) += xfs.o diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index db5f5a3..d338284 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -15,6 +15,7 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "xfs.h" #include <linux/stddef.h> #include <linux/errno.h> #include <linux/slab.h> @@ -31,7 +32,6 @@ #include <linux/kthread.h> #include <linux/migrate.h> #include <linux/backing-dev.h> -#include "xfs_linux.h" STATIC kmem_zone_t *xfs_buf_zone; STATIC kmem_shaker_t xfs_buf_shake; @@ -1406,7 +1406,7 @@ xfs_alloc_bufhash( btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * - sizeof(xfs_bufhash_t), KM_SLEEP); + sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); for (i = 0; i < (1 << btp->bt_hashshift); i++) { spin_lock_init(&btp->bt_hash[i].bh_lock); INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h new file mode 100644 index 0000000..a8b0b16 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_dmapi_priv.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DMAPI_PRIV_H__ +#define __XFS_DMAPI_PRIV_H__ + +/* + * Based on IO_ISDIRECT, decide which i_ flag is set. + */ +#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ + DM_FLAGS_IMUX : 0) +#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) + +#endif /*__XFS_DMAPI_PRIV_H__*/ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index a74f854..74d0948 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -341,8 +341,11 @@ xfs_open_by_handle( put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); } - if (inode->i_mode & S_IFREG) + if (inode->i_mode & S_IFREG) { + /* invisible operation should not change atime */ + filp->f_flags |= O_NOATIME; filp->f_op = &xfs_invis_file_operations; + } fd_install(new_fd, filp); return new_fd; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 38c4d12..de05abb 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -227,9 +227,7 @@ xfs_initialize_vnode( xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); xfs_set_inodeops(inode); - spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_INEW; - spin_unlock(&ip->i_flags_lock); + xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(inode); diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index c75f683..4363512 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -15,11 +15,9 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <xfs.h> #include "debug.h" #include "spin.h" -#include <asm/page.h> -#include <linux/sched.h> -#include <linux/kernel.h> static char message[256]; /* keep it off the stack */ static DEFINE_SPINLOCK(xfs_err_lock); diff --git a/fs/xfs/support/move.c b/fs/xfs/support/move.c index caefa17..ac8617c 100644 --- a/fs/xfs/support/move.c +++ b/fs/xfs/support/move.c @@ -22,7 +22,7 @@ * as we go. */ int -uio_read(caddr_t src, size_t len, struct uio *uio) +xfs_uio_read(caddr_t src, size_t len, struct uio *uio) { size_t count; diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h index 97a2498..977879c 100644 --- a/fs/xfs/support/move.h +++ b/fs/xfs/support/move.h @@ -65,6 +65,6 @@ struct uio { typedef struct uio uio_t; typedef struct iovec iovec_t; -extern int uio_read (caddr_t, size_t, uio_t *); +extern int xfs_uio_read (caddr_t, size_t, uio_t *); #endif /* __XFS_SUPPORT_MOVE_H__ */ diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 1a48dbb..bf0a120 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -17,5 +17,28 @@ */ #ifndef __XFS_H__ #define __XFS_H__ + +#ifdef CONFIG_XFS_DEBUG +#define STATIC +#define DEBUG 1 +#define XFS_BUF_LOCK_TRACKING 1 +/* #define QUOTADEBUG 1 */ +#endif + +#ifdef CONFIG_XFS_TRACE +#define XFS_ALLOC_TRACE 1 +#define XFS_ATTR_TRACE 1 +#define XFS_BLI_TRACE 1 +#define XFS_BMAP_TRACE 1 +#define XFS_BMBT_TRACE 1 +#define XFS_DIR2_TRACE 1 +#define XFS_DQUOT_TRACE 1 +#define XFS_ILOCK_TRACE 1 +#define XFS_LOG_TRACE 1 +#define XFS_RW_TRACE 1 +#define XFS_BUF_TRACE 1 +#define XFS_VNODE_TRACE 1 +#endif + #include <linux-2.6/xfs_linux.h> #endif /* __XFS_H__ */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 5b050c0..498ad50d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1171,6 +1171,8 @@ xfs_bmap_add_extent_delay_real( xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); xfs_bmbt_set_blockcount(ep, temp); r[0] = *new; + r[1].br_state = PREV.br_state; + r[1].br_startblock = 0; r[1].br_startoff = new_endoff; temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_blockcount = temp2; diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 8edbe1a..8e8e527 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -678,7 +678,7 @@ xfs_dir2_put_dirent64_uio( idbp->d_off = pa->cook; idbp->d_name[namelen] = '\0'; memcpy(idbp->d_name, pa->name, namelen); - rval = uio_read((caddr_t)idbp, reclen, uio); + rval = xfs_uio_read((caddr_t)idbp, reclen, uio); pa->done = (rval == 0); return rval; } diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 4e7865a..adc3d25 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -157,27 +157,9 @@ typedef enum { #define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */ /* - * Based on IO_ISDIRECT, decide which i_ flag is set. + * Pull in platform specific event flags defines */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) -#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_IMUX : 0) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22)) -#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) -#endif - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21) -#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - 0 : DM_FLAGS_IMUX) -#define DM_SEM_FLAG_WR (DM_FLAGS_IMUX) -#endif - +#include "xfs_dmapi_priv.h" /* * Macros to turn caller specified delay/block flags into diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b73d216..c1c89da 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -215,7 +215,7 @@ again: * If INEW is set this inode is being set up * we need to pause and try again. */ - if (ip->i_flags & XFS_INEW) { + if (xfs_iflags_test(ip, XFS_INEW)) { read_unlock(&ih->ih_lock); delay(1); XFS_STATS_INC(xs_ig_frecycle); @@ -230,22 +230,50 @@ again: * on its way out of the system, * we need to pause and try again. */ - if (ip->i_flags & XFS_IRECLAIM) { + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { read_unlock(&ih->ih_lock); delay(1); XFS_STATS_INC(xs_ig_frecycle); goto again; } + ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + + /* + * If lookup is racing with unlink, then we + * should return an error immediately so we + * don't remove it from the reclaim list and + * potentially leak the inode. + */ + if ((ip->i_d.di_mode == 0) && + !(flags & XFS_IGET_CREATE)) { + read_unlock(&ih->ih_lock); + return ENOENT; + } + + /* + * There may be transactions sitting in the + * incore log buffers or being flushed to disk + * at this time. We can't clear the + * XFS_IRECLAIMABLE flag until these + * transactions have hit the disk, otherwise we + * will void the guarantee the flag provides + * xfs_iunpin() + */ + if (xfs_ipincount(ip)) { + read_unlock(&ih->ih_lock); + xfs_log_force(mp, 0, + XFS_LOG_FORCE|XFS_LOG_SYNC); + XFS_STATS_INC(xs_ig_frecycle); + goto again; + } vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); XFS_STATS_INC(xs_ig_found); - spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_IRECLAIMABLE; - spin_unlock(&ip->i_flags_lock); + xfs_iflags_clear(ip, XFS_IRECLAIMABLE); version = ih->ih_version; read_unlock(&ih->ih_lock); xfs_ihash_promote(ih, ip, version); @@ -299,10 +327,7 @@ finish_inode: if (lock_flags != 0) xfs_ilock(ip, lock_flags); - spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_ISTALE; - spin_unlock(&ip->i_flags_lock); - + xfs_iflags_clear(ip, XFS_ISTALE); vn_trace_exit(vp, "xfs_iget.found", (inst_t *)__return_address); goto return_ip; @@ -371,10 +396,7 @@ finish_inode: ih->ih_next = ip; ip->i_udquot = ip->i_gdquot = NULL; ih->ih_version++; - spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_INEW; - spin_unlock(&ip->i_flags_lock); - + xfs_iflags_set(ip, XFS_INEW); write_unlock(&ih->ih_lock); /* @@ -625,7 +647,7 @@ xfs_iput_new(xfs_inode_t *ip, vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); if ((ip->i_d.di_mode == 0)) { - ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE)); + ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); vn_mark_bad(vp); } if (inode->i_state & I_NEW) @@ -683,6 +705,7 @@ xfs_ireclaim(xfs_inode_t *ip) /* * Free all memory associated with the inode. */ + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_idestroy(ip); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c27d7d49..44dfac5 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2193,7 +2193,7 @@ xfs_ifree_cluster( /* Inode not in memory or we found it already, * nothing to do */ - if (!ip || (ip->i_flags & XFS_ISTALE)) { + if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { read_unlock(&ih->ih_lock); continue; } @@ -2215,10 +2215,7 @@ xfs_ifree_cluster( if (ip == free_ip) { if (xfs_iflock_nowait(ip)) { - spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_ISTALE; - spin_unlock(&ip->i_flags_lock); - + xfs_iflags_set(ip, XFS_ISTALE); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); } else { @@ -2231,9 +2228,7 @@ xfs_ifree_cluster( if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { if (xfs_iflock_nowait(ip)) { - spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_ISTALE; - spin_unlock(&ip->i_flags_lock); + xfs_iflags_set(ip, XFS_ISTALE); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); @@ -2263,9 +2258,7 @@ xfs_ifree_cluster( AIL_LOCK(mp,s); iip->ili_flush_lsn = iip->ili_item.li_lsn; AIL_UNLOCK(mp, s); - spin_lock(&iip->ili_inode->i_flags_lock); - iip->ili_inode->i_flags |= XFS_ISTALE; - spin_unlock(&iip->ili_inode->i_flags_lock); + xfs_iflags_set(iip->ili_inode, XFS_ISTALE); pre_flushed++; } lip = lip->li_bio_list; @@ -2748,42 +2741,39 @@ xfs_iunpin( { ASSERT(atomic_read(&ip->i_pincount) > 0); - if (atomic_dec_and_test(&ip->i_pincount)) { + if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) { + /* - * If the inode is currently being reclaimed, the - * linux inode _and_ the xfs vnode may have been - * freed so we cannot reference either of them safely. - * Hence we should not try to do anything to them - * if the xfs inode is currently in the reclaim - * path. + * If the inode is currently being reclaimed, the link between + * the bhv_vnode and the xfs_inode will be broken after the + * XFS_IRECLAIM* flag is set. Hence, if these flags are not + * set, then we can move forward and mark the linux inode dirty + * knowing that it is still valid as it won't freed until after + * the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The + * i_flags_lock is used to synchronise the setting of the + * XFS_IRECLAIM* flags and the breaking of the link, and so we + * can execute atomically w.r.t to reclaim by holding this lock + * here. * - * However, we still need to issue the unpin wakeup - * call as the inode reclaim may be blocked waiting for - * the inode to become unpinned. + * However, we still need to issue the unpin wakeup call as the + * inode reclaim may be blocked waiting for the inode to become + * unpinned. */ - struct inode *inode = NULL; - spin_lock(&ip->i_flags_lock); - if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { + if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) { bhv_vnode_t *vp = XFS_ITOV_NULL(ip); + struct inode *inode = NULL; + + BUG_ON(vp == NULL); + inode = vn_to_inode(vp); + BUG_ON(inode->i_state & I_CLEAR); /* make sync come back and flush this inode */ - if (vp) { - inode = vn_to_inode(vp); - - if (!(inode->i_state & - (I_NEW|I_FREEING|I_CLEAR))) { - inode = igrab(inode); - if (inode) - mark_inode_dirty_sync(inode); - } else - inode = NULL; - } + if (!(inode->i_state & (I_NEW|I_FREEING))) + mark_inode_dirty_sync(inode); } spin_unlock(&ip->i_flags_lock); wake_up(&ip->i_ipin_wait); - if (inode) - iput(inode); } } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index e96eb08..bc82372 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -305,6 +305,47 @@ typedef struct xfs_inode { #endif } xfs_inode_t; + +/* + * i_flags helper functions + */ +static inline void +__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) +{ + ip->i_flags |= flags; +} + +static inline void +xfs_iflags_set(xfs_inode_t *ip, unsigned short flags) +{ + spin_lock(&ip->i_flags_lock); + __xfs_iflags_set(ip, flags); + spin_unlock(&ip->i_flags_lock); +} + +static inline void +xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags) +{ + spin_lock(&ip->i_flags_lock); + ip->i_flags &= ~flags; + spin_unlock(&ip->i_flags_lock); +} + +static inline int +__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) +{ + return (ip->i_flags & flags); +} + +static inline int +xfs_iflags_test(xfs_inode_t *ip, unsigned short flags) +{ + int ret; + spin_lock(&ip->i_flags_lock); + ret = __xfs_iflags_test(ip, flags); + spin_unlock(&ip->i_flags_lock); + return ret; +} #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 061e2ff..bda774a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1013,7 +1013,7 @@ xfs_readlink( pathlen = (int)ip->i_d.di_size; if (ip->i_df.if_flags & XFS_IFINLINE) { - error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); + error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); } else { /* @@ -1044,7 +1044,7 @@ xfs_readlink( byte_cnt = pathlen; pathlen -= byte_cnt; - error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); + error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); xfs_buf_relse (bp); } @@ -3827,11 +3827,16 @@ xfs_reclaim( */ xfs_synchronize_atime(ip); - /* If we have nothing to flush with this inode then complete the - * teardown now, otherwise break the link between the xfs inode - * and the linux inode and clean up the xfs inode later. This - * avoids flushing the inode to disk during the delete operation - * itself. + /* + * If we have nothing to flush with this inode then complete the + * teardown now, otherwise break the link between the xfs inode and the + * linux inode and clean up the xfs inode later. This avoids flushing + * the inode to disk during the delete operation itself. + * + * When breaking the link, we need to set the XFS_IRECLAIMABLE flag + * first to ensure that xfs_iunpin() will never see an xfs inode + * that has a linux inode being reclaimed. Synchronisation is provided + * by the i_flags_lock. */ if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -3840,13 +3845,13 @@ xfs_reclaim( } else { xfs_mount_t *mp = ip->i_mount; - /* Protect sync from us */ + /* Protect sync and unpin from us */ XFS_MOUNT_ILOCK(mp); - vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); - list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_IRECLAIMABLE; + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); + vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); spin_unlock(&ip->i_flags_lock); + list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); XFS_MOUNT_IUNLOCK(mp); } return 0; @@ -3872,8 +3877,8 @@ xfs_finish_reclaim( */ write_lock(&ih->ih_lock); spin_lock(&ip->i_flags_lock); - if ((ip->i_flags & XFS_IRECLAIM) || - (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); if (locked) { @@ -3882,7 +3887,7 @@ xfs_finish_reclaim( } return 1; } - ip->i_flags |= XFS_IRECLAIM; + __xfs_iflags_set(ip, XFS_IRECLAIM); spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); |