diff options
Diffstat (limited to 'fs')
275 files changed, 2567 insertions, 4586 deletions
diff --git a/fs/9p/conv.c b/fs/9p/conv.c index a767e05..1e89814 100644 --- a/fs/9p/conv.c +++ b/fs/9p/conv.c @@ -24,7 +24,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/9p/error.c b/fs/9p/error.c index 981fe8e..ae91555 100644 --- a/fs/9p/error.c +++ b/fs/9p/error.c @@ -27,7 +27,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/list.h> diff --git a/fs/9p/fcall.c b/fs/9p/fcall.c index 6f26178..8556097 100644 --- a/fs/9p/fcall.c +++ b/fs/9p/fcall.c @@ -24,7 +24,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/9p/fcprint.c b/fs/9p/fcprint.c index 583e827..34b9611 100644 --- a/fs/9p/fcprint.c +++ b/fs/9p/fcprint.c @@ -21,7 +21,6 @@ * Boston, MA 02111-1301 USA * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/9p/fid.c b/fs/9p/fid.c index b7608af..70492cc 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -20,7 +20,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/9p/mux.c b/fs/9p/mux.c index 12e1baa..90a79c7 100644 --- a/fs/9p/mux.c +++ b/fs/9p/mux.c @@ -23,7 +23,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> @@ -932,6 +931,8 @@ v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, r.rcall || r.err); } while (!r.rcall && !r.err && err==-ERESTARTSYS && m->trans->status==Connected && !m->err); + + err = -ERESTARTSYS; } sigpending = 1; } diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c index 94e0a7f..34d4335 100644 --- a/fs/9p/trans_fd.c +++ b/fs/9p/trans_fd.c @@ -25,7 +25,6 @@ * */ -#include <linux/config.h> #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index d37416e..22f7ccd 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -23,7 +23,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index f867b8d..450b0c1 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -38,7 +38,7 @@ */ extern struct file_system_type v9fs_fs_type; -extern struct address_space_operations v9fs_addr_operations; +extern const struct address_space_operations v9fs_addr_operations; extern const struct file_operations v9fs_file_operations; extern const struct file_operations v9fs_dir_operations; extern struct dentry_operations v9fs_dentry_operations; diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index efda46f..9dfd259 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -31,7 +31,6 @@ #include <linux/string.h> #include <linux/smp_lock.h> #include <linux/inet.h> -#include <linux/version.h> #include <linux/pagemap.h> #include <linux/idr.h> @@ -103,6 +102,6 @@ UnmapAndUnlock: return retval; } -struct address_space_operations v9fs_addr_operations = { +const struct address_space_operations v9fs_addr_operations = { .readpage = v9fs_vfs_readpage, }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 1a8e460..c3c47ed 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -31,7 +31,6 @@ #include <linux/string.h> #include <linux/smp_lock.h> #include <linux/inet.h> -#include <linux/version.h> #include <linux/list.h> #include <asm/uaccess.h> #include <linux/idr.h> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 5c6bdf8..2f580a1 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -300,7 +300,7 @@ clunk_fid: fid = V9FS_NOFID; put_fid: - if (fid >= 0) + if (fid != V9FS_NOFID) v9fs_put_idpool(fid, &v9ses->fidpool); kfree(fcall); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8b15bb2..63320d4 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -25,7 +25,6 @@ */ #include <linux/kernel.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> @@ -326,7 +326,7 @@ source "fs/xfs/Kconfig" config OCFS2_FS tristate "OCFS2 file system support (EXPERIMENTAL)" - depends on NET && EXPERIMENTAL + depends on NET && SYSFS && EXPERIMENTAL select CONFIGFS_FS select JBD select CRC32 @@ -356,6 +356,16 @@ config OCFS2_FS - POSIX ACLs - readpages / writepages (not user visible) +config OCFS2_DEBUG_MASKLOG + bool "OCFS2 logging support" + depends on OCFS2_FS + default y + help + The ocfs2 filesystem has an extensive logging system. The system + allows selection of events to log via files in /sys/o2cb/logmask/. + This option will enlarge your kernel, but it allows debugging of + ocfs2 filesystem issues. + config MINIX_FS tristate "Minix fs support" help @@ -1116,7 +1126,7 @@ config JFFS2_SUMMARY config JFFS2_FS_XATTR bool "JFFS2 XATTR support (EXPERIMENTAL)" - depends on JFFS2_FS && EXPERIMENTAL && !JFFS2_FS_WRITEBUFFER + depends on JFFS2_FS && EXPERIMENTAL default n help Extended attributes are name:value pairs associated with inodes by diff --git a/fs/Makefile b/fs/Makefile index d0ea6bf..8913542 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -66,7 +66,6 @@ obj-$(CONFIG_MSDOS_FS) += msdos/ obj-$(CONFIG_VFAT_FS) += vfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ -obj-$(CONFIG_DEVFS_FS) += devfs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ obj-$(CONFIG_HFS_FS) += hfs/ obj-$(CONFIG_VXFS_FS) += freevxfs/ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 7b075fc..d3c7905 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -9,7 +9,6 @@ * * Common directory handling for ADFS */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/adfs_fs.h> diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index a02802a..534f3ee 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -72,7 +72,7 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, adfs_get_block); } -static struct address_space_operations adfs_aops = { +static const struct address_space_operations adfs_aops = { .readpage = adfs_readpage, .writepage = adfs_writepage, .sync_page = block_sync_page, diff --git a/fs/affs/affs.h b/fs/affs/affs.h index a43a876..0ddd4cc 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -195,9 +195,9 @@ extern struct inode_operations affs_symlink_inode_operations; extern const struct file_operations affs_file_operations; extern const struct file_operations affs_file_operations_ofs; extern const struct file_operations affs_dir_operations; -extern struct address_space_operations affs_symlink_aops; -extern struct address_space_operations affs_aops; -extern struct address_space_operations affs_aops_ofs; +extern const struct address_space_operations affs_symlink_aops; +extern const struct address_space_operations affs_aops; +extern const struct address_space_operations affs_aops_ofs; extern struct dentry_operations affs_dentry_operations; extern struct dentry_operations affs_dentry_operations_intl; diff --git a/fs/affs/file.c b/fs/affs/file.c index 7076262..3de8590 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -406,7 +406,7 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,affs_get_block); } -struct address_space_operations affs_aops = { +const struct address_space_operations affs_aops = { .readpage = affs_readpage, .writepage = affs_writepage, .sync_page = block_sync_page, @@ -759,7 +759,7 @@ out: goto done; } -struct address_space_operations affs_aops_ofs = { +const struct address_space_operations affs_aops_ofs = { .readpage = affs_readpage_ofs, //.writepage = affs_writepage_ofs, //.sync_page = affs_sync_page_ofs, diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c index 426f0f0..f802256 100644 --- a/fs/affs/symlink.c +++ b/fs/affs/symlink.c @@ -66,7 +66,7 @@ fail: return err; } -struct address_space_operations affs_symlink_aops = { +const struct address_space_operations affs_symlink_aops = { .readpage = affs_symlink_readpage, }; diff --git a/fs/afs/file.c b/fs/afs/file.c index 7bb7168..67d6634 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -35,7 +35,7 @@ struct inode_operations afs_file_inode_operations = { .getattr = afs_inode_getattr, }; -struct address_space_operations afs_fs_aops = { +const struct address_space_operations afs_fs_aops = { .readpage = afs_file_readpage, .sync_page = block_sync_page, .set_page_dirty = __set_page_dirty_nobuffers, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 72febdf..e88b3b6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -69,7 +69,7 @@ extern const struct file_operations afs_dir_file_operations; /* * file.c */ -extern struct address_space_operations afs_fs_aops; +extern const struct address_space_operations afs_fs_aops; extern struct inode_operations afs_file_inode_operations; #ifdef AFS_CACHING_SUPPORT diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 08201fa..fcaeead 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -73,7 +73,7 @@ static struct inode_operations befs_dir_inode_operations = { .lookup = befs_lookup, }; -static struct address_space_operations befs_aops = { +static const struct address_space_operations befs_aops = { .readpage = befs_readpage, .sync_page = block_sync_page, .bmap = befs_bmap, @@ -325,7 +325,7 @@ befs_read_inode(struct inode *inode) if (!bh) { befs_error(sb, "unable to read inode block - " "inode = %lu", inode->i_ino); - goto unaquire_none; + goto unacquire_none; } raw_inode = (befs_inode *) bh->b_data; @@ -334,7 +334,7 @@ befs_read_inode(struct inode *inode) if (befs_check_inode(sb, raw_inode, inode->i_ino) != BEFS_OK) { befs_error(sb, "Bad inode: %lu", inode->i_ino); - goto unaquire_bh; + goto unacquire_bh; } inode->i_mode = (umode_t) fs32_to_cpu(sb, raw_inode->mode); @@ -402,17 +402,17 @@ befs_read_inode(struct inode *inode) befs_error(sb, "Inode %lu is not a regular file, " "directory or symlink. THAT IS WRONG! BeFS has no " "on disk special files", inode->i_ino); - goto unaquire_bh; + goto unacquire_bh; } brelse(bh); befs_debug(sb, "<--- befs_read_inode()"); return; - unaquire_bh: + unacquire_bh: brelse(bh); - unaquire_none: + unacquire_none: make_bad_inode(inode); befs_debug(sb, "<--- befs_read_inode() - Bad inode"); return; @@ -761,14 +761,14 @@ befs_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_ERR "BeFS(%s): Unable to allocate memory for private " "portion of superblock. Bailing.\n", sb->s_id); - goto unaquire_none; + goto unacquire_none; } befs_sb = BEFS_SB(sb); memset(befs_sb, 0, sizeof(befs_sb_info)); if (!parse_options((char *) data, &befs_sb->mount_opts)) { befs_error(sb, "cannot parse mount options"); - goto unaquire_priv_sbp; + goto unacquire_priv_sbp; } befs_debug(sb, "---> befs_fill_super()"); @@ -794,7 +794,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) if (!(bh = sb_bread(sb, sb_block))) { befs_error(sb, "unable to read superblock"); - goto unaquire_priv_sbp; + goto unacquire_priv_sbp; } /* account for offset of super block on x86 */ @@ -809,20 +809,20 @@ befs_fill_super(struct super_block *sb, void *data, int silent) } if (befs_load_sb(sb, disk_sb) != BEFS_OK) - goto unaquire_bh; + goto unacquire_bh; befs_dump_super_block(sb, disk_sb); brelse(bh); if (befs_check_sb(sb) != BEFS_OK) - goto unaquire_priv_sbp; + goto unacquire_priv_sbp; if( befs_sb->num_blocks > ~((sector_t)0) ) { befs_error(sb, "blocks count: %Lu " "is larger than the host can use", befs_sb->num_blocks); - goto unaquire_priv_sbp; + goto unacquire_priv_sbp; } /* @@ -838,7 +838,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) { iput(root); befs_error(sb, "get root inode failed"); - goto unaquire_priv_sbp; + goto unacquire_priv_sbp; } /* load nls library */ @@ -860,13 +860,13 @@ befs_fill_super(struct super_block *sb, void *data, int silent) return 0; /*****************/ - unaquire_bh: + unacquire_bh: brelse(bh); - unaquire_priv_sbp: + unacquire_priv_sbp: kfree(sb->s_fs_info); - unaquire_none: + unacquire_none: sb->s_fs_info = NULL; return -EINVAL; } @@ -925,18 +925,18 @@ init_befs_fs(void) err = befs_init_inodecache(); if (err) - goto unaquire_none; + goto unacquire_none; err = register_filesystem(&befs_fs_type); if (err) - goto unaquire_inodecache; + goto unacquire_inodecache; return 0; -unaquire_inodecache: +unacquire_inodecache: befs_destroy_inodecache(); -unaquire_none: +unacquire_none: return err; } diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 9d79100..31973bb 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -50,7 +50,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode) /* file.c */ extern struct inode_operations bfs_file_inops; extern const struct file_operations bfs_file_operations; -extern struct address_space_operations bfs_aops; +extern const struct address_space_operations bfs_aops; /* dir.c */ extern struct inode_operations bfs_dir_inops; diff --git a/fs/bfs/file.c b/fs/bfs/file.c index d83cd74..3d5aca2 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -153,7 +153,7 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, bfs_get_block); } -struct address_space_operations bfs_aops = { +const struct address_space_operations bfs_aops = { .readpage = bfs_readpage, .writepage = bfs_writepage, .sync_page = block_sync_page, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d043440..672a3b9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = { .min_coredump = ELF_EXEC_PAGESIZE }; -#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) +#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) static int set_brk(unsigned long start, unsigned long end) { @@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, * <= p_memsize so it's only necessary to check p_memsz. */ k = load_addr + eppnt->p_vaddr; - if (k > TASK_SIZE || + if (BAD_ADDR(k) || eppnt->p_filesz > eppnt->p_memsz || eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) { @@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * allowed task size. Note that p_filesz must always be * <= p_memsz so it is only necessary to check p_memsz. */ - if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || + if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ @@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) interpreter, &interp_load_addr); if (BAD_ADDR(elf_entry)) { - printk(KERN_ERR "Unable to load interpreter %.128s\n", - elf_interpreter); force_sig(SIGSEGV, current); - retval = -ENOEXEC; /* Nobody gets to see this, but.. */ + retval = IS_ERR((void *)elf_entry) ? + (int)elf_entry : -EINVAL; goto out_free_dentry; } reloc_func_desc = interp_load_addr; @@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } else { elf_entry = loc->elf_ex.e_entry; if (BAD_ADDR(elf_entry)) { - send_sig(SIGSEGV, current, 0); - retval = -ENOEXEC; /* Nobody gets to see this, but.. */ + force_sig(SIGSEGV, current); + retval = -EINVAL; goto out_free_dentry; } } @@ -1186,8 +1185,6 @@ static int maydump(struct vm_area_struct *vma) return 1; } -#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) - /* An ELF note in memory */ struct memelfnote { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index eba4e23..2f33658 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1,6 +1,6 @@ /* binfmt_elf_fdpic.c: FDPIC ELF binary format * - * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * Derived from binfmt_elf.c * @@ -24,7 +24,9 @@ #include <linux/file.h> #include <linux/fcntl.h> #include <linux/slab.h> +#include <linux/pagemap.h> #include <linux/highmem.h> +#include <linux/highuid.h> #include <linux/personality.h> #include <linux/ptrace.h> #include <linux/init.h> @@ -48,45 +50,59 @@ typedef char *elf_caddr_t; #define kdebug(fmt, ...) do {} while(0) #endif +#if 0 +#define kdcore(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ ) +#else +#define kdcore(fmt, ...) do {} while(0) +#endif + MODULE_LICENSE("GPL"); -static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs); -//static int load_elf_fdpic_library(struct file *); -static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file); -static int elf_fdpic_map_file(struct elf_fdpic_params *params, - struct file *file, - struct mm_struct *mm, - const char *what); +static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *); +static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *); +static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *, + struct mm_struct *, const char *); -static int create_elf_fdpic_tables(struct linux_binprm *bprm, - struct mm_struct *mm, - struct elf_fdpic_params *exec_params, - struct elf_fdpic_params *interp_params); +static int create_elf_fdpic_tables(struct linux_binprm *, struct mm_struct *, + struct elf_fdpic_params *, + struct elf_fdpic_params *); #ifndef CONFIG_MMU -static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp); -static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params, - struct file *file, - struct mm_struct *mm); +static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *, + unsigned long *); +static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *, + struct file *, + struct mm_struct *); #endif -static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, - struct file *file, - struct mm_struct *mm); +static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *, + struct file *, struct mm_struct *); + +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *); +#endif static struct linux_binfmt elf_fdpic_format = { .module = THIS_MODULE, .load_binary = load_elf_fdpic_binary, -// .load_shlib = load_elf_fdpic_library, -// .core_dump = elf_fdpic_core_dump, +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) + .core_dump = elf_fdpic_core_dump, +#endif .min_coredump = ELF_EXEC_PAGESIZE, }; -static int __init init_elf_fdpic_binfmt(void) { return register_binfmt(&elf_fdpic_format); } -static void __exit exit_elf_fdpic_binfmt(void) { unregister_binfmt(&elf_fdpic_format); } +static int __init init_elf_fdpic_binfmt(void) +{ + return register_binfmt(&elf_fdpic_format); +} + +static void __exit exit_elf_fdpic_binfmt(void) +{ + unregister_binfmt(&elf_fdpic_format); +} -module_init(init_elf_fdpic_binfmt) -module_exit(exit_elf_fdpic_binfmt) +core_initcall(init_elf_fdpic_binfmt); +module_exit(exit_elf_fdpic_binfmt); static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) { @@ -105,7 +121,8 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) /* * read the program headers table into memory */ -static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file) +static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, + struct file *file) { struct elf32_phdr *phdr; unsigned long size; @@ -121,7 +138,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f if (!params->phdrs) return -ENOMEM; - retval = kernel_read(file, params->hdr.e_phoff, (char *) params->phdrs, size); + retval = kernel_read(file, params->hdr.e_phoff, + (char *) params->phdrs, size); if (retval < 0) return retval; @@ -141,17 +159,24 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f } return 0; -} /* end elf_fdpic_fetch_phdrs() */ +} /*****************************************************************************/ /* * load an fdpic binary into various bits of memory */ -static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs) +static int load_elf_fdpic_binary(struct linux_binprm *bprm, + struct pt_regs *regs) { struct elf_fdpic_params exec_params, interp_params; struct elf_phdr *phdr; - unsigned long stack_size; + unsigned long stack_size, entryaddr; +#ifndef CONFIG_MMU + unsigned long fullsize; +#endif +#ifdef ELF_FDPIC_PLAT_INIT + unsigned long dynaddr; +#endif struct file *interpreter = NULL; /* to shut gcc up */ char *interpreter_name = NULL; int executable_stack; @@ -212,7 +237,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs goto error; } - retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); + retval = kernel_read(interpreter, 0, bprm->buf, + BINPRM_BUF_SIZE); if (retval < 0) goto error; @@ -295,7 +321,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs ¤t->mm->start_stack, ¤t->mm->start_brk); - retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack); + retval = setup_arg_pages(bprm, current->mm->start_stack, + executable_stack); if (retval < 0) { send_sig(SIGKILL, current, 0); goto error_kill; @@ -303,7 +330,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs #endif /* load the executable and interpreter into memory */ - retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, "executable"); + retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, + "executable"); if (retval < 0) goto error_kill; @@ -324,7 +352,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs if (!current->mm->start_brk) current->mm->start_brk = current->mm->end_data; - current->mm->brk = current->mm->start_brk = PAGE_ALIGN(current->mm->start_brk); + current->mm->brk = current->mm->start_brk = + PAGE_ALIGN(current->mm->start_brk); #else /* create a stack and brk area big enough for everyone @@ -336,47 +365,45 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs stack_size = PAGE_SIZE * 2; down_write(¤t->mm->mmap_sem); - current->mm->start_brk = do_mmap(NULL, - 0, - stack_size, + current->mm->start_brk = do_mmap(NULL, 0, stack_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0); - if (IS_ERR((void *) current->mm->start_brk)) { + if (IS_ERR_VALUE(current->mm->start_brk)) { up_write(¤t->mm->mmap_sem); retval = current->mm->start_brk; current->mm->start_brk = 0; goto error_kill; } - if (do_mremap(current->mm->start_brk, - stack_size, - ksize((char *) current->mm->start_brk), - 0, 0 - ) == current->mm->start_brk - ) - stack_size = ksize((char *) current->mm->start_brk); + /* expand the stack mapping to use up the entire allocation granule */ + fullsize = ksize((char *) current->mm->start_brk); + if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, + fullsize, 0, 0))) + stack_size = fullsize; up_write(¤t->mm->mmap_sem); current->mm->brk = current->mm->start_brk; current->mm->context.end_brk = current->mm->start_brk; - current->mm->context.end_brk += (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; + current->mm->context.end_brk += + (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; current->mm->start_stack = current->mm->start_brk + stack_size; #endif compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; - if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) + if (create_elf_fdpic_tables(bprm, current->mm, + &exec_params, &interp_params) < 0) goto error_kill; - kdebug("- start_code %lx", (long) current->mm->start_code); - kdebug("- end_code %lx", (long) current->mm->end_code); - kdebug("- start_data %lx", (long) current->mm->start_data); - kdebug("- end_data %lx", (long) current->mm->end_data); - kdebug("- start_brk %lx", (long) current->mm->start_brk); - kdebug("- brk %lx", (long) current->mm->brk); - kdebug("- start_stack %lx", (long) current->mm->start_stack); + kdebug("- start_code %lx", current->mm->start_code); + kdebug("- end_code %lx", current->mm->end_code); + kdebug("- start_data %lx", current->mm->start_data); + kdebug("- end_data %lx", current->mm->end_data); + kdebug("- start_brk %lx", current->mm->start_brk); + kdebug("- brk %lx", current->mm->brk); + kdebug("- start_stack %lx", current->mm->start_stack); #ifdef ELF_FDPIC_PLAT_INIT /* @@ -385,21 +412,18 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs * example. This macro performs whatever initialization to * the regs structure is required. */ - ELF_FDPIC_PLAT_INIT(regs, - exec_params.map_addr, - interp_params.map_addr, - interp_params.dynamic_addr ?: exec_params.dynamic_addr - ); + dynaddr = interp_params.dynamic_addr ?: exec_params.dynamic_addr; + ELF_FDPIC_PLAT_INIT(regs, exec_params.map_addr, interp_params.map_addr, + dynaddr); #endif /* everything is now ready... get the userspace context ready to roll */ - start_thread(regs, - interp_params.entry_addr ?: exec_params.entry_addr, - current->mm->start_stack); + entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; + start_thread(regs, entryaddr, current->mm->start_stack); if (unlikely(current->ptrace & PT_PTRACED)) { if (current->ptrace & PT_TRACE_EXEC) - ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); + ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); else send_sig(SIGTRAP, current, 0); } @@ -419,11 +443,11 @@ error: return retval; /* unrecoverable error - kill the process */ - error_kill: +error_kill: send_sig(SIGSEGV, current, 0); goto error; -} /* end load_elf_fdpic_binary() */ +} /*****************************************************************************/ /* @@ -459,6 +483,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, */ hwcap = ELF_HWCAP; k_platform = ELF_PLATFORM; + u_platform = NULL; if (k_platform) { platform_len = strlen(k_platform) + 1; @@ -470,11 +495,11 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, #if defined(__i386__) && defined(CONFIG_SMP) /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions - * by the processes running on the same package. One thing we can do - * is to shuffle the initial stack for them. + * by the processes running on the same package. One thing we can do is + * to shuffle the initial stack for them. * - * the conditionals here are unneeded, but kept in to make the - * code behaviour the same as pre change unless we have hyperthreaded + * the conditionals here are unneeded, but kept in to make the code + * behaviour the same as pre change unless we have hyperthreaded * processors. This keeps Mr Marcelo Person happier but should be * removed for 2.5 */ @@ -497,11 +522,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, if (interp_params->loadmap) { len = sizeof(struct elf32_fdpic_loadmap); - len += sizeof(struct elf32_fdpic_loadseg) * interp_params->loadmap->nsegs; + len += sizeof(struct elf32_fdpic_loadseg) * + interp_params->loadmap->nsegs; sp = (sp - len) & ~7UL; interp_params->map_addr = sp; - if (copy_to_user((void __user *) sp, interp_params->loadmap, len) != 0) + if (copy_to_user((void __user *) sp, interp_params->loadmap, + len) != 0) return -EFAULT; current->mm->context.interp_fdpic_loadmap = (unsigned long) sp; @@ -525,34 +552,37 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, sp -= sp & 15UL; /* put the ELF interpreter info on the stack */ -#define NEW_AUX_ENT(nr, id, val) \ - do { \ - struct { unsigned long _id, _val; } __user *ent = (void __user *) csp; \ - __put_user((id), &ent[nr]._id); \ - __put_user((val), &ent[nr]._val); \ +#define NEW_AUX_ENT(nr, id, val) \ + do { \ + struct { unsigned long _id, _val; } __user *ent; \ + \ + ent = (void __user *) csp; \ + __put_user((id), &ent[nr]._id); \ + __put_user((val), &ent[nr]._val); \ } while (0) csp -= 2 * sizeof(unsigned long); NEW_AUX_ENT(0, AT_NULL, 0); if (k_platform) { csp -= 2 * sizeof(unsigned long); - NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform); + NEW_AUX_ENT(0, AT_PLATFORM, + (elf_addr_t) (unsigned long) u_platform); } csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); - NEW_AUX_ENT( 0, AT_HWCAP, hwcap); - NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); - NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); - NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); - NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); - NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); - NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); - NEW_AUX_ENT( 7, AT_FLAGS, 0); - NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); - NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); - NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); - NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); + NEW_AUX_ENT( 0, AT_HWCAP, hwcap); + NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); + NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); + NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); + NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); + NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); + NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); + NEW_AUX_ENT( 7, AT_FLAGS, 0); + NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); + NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); + NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); + NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); + NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); #ifdef ARCH_DLINFO /* ARCH_DLINFO must come last so platform specific code can enforce @@ -578,7 +608,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, #ifdef CONFIG_MMU current->mm->arg_start = bprm->p; #else - current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p); + current->mm->arg_start = current->mm->start_stack - + (MAX_ARG_PAGES * PAGE_SIZE - bprm->p); #endif p = (char __user *) current->mm->arg_start; @@ -606,7 +637,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, mm->start_stack = (unsigned long) sp; return 0; -} /* end create_elf_fdpic_tables() */ +} /*****************************************************************************/ /* @@ -614,7 +645,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, * the stack */ #ifndef CONFIG_MMU -static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp) +static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, + unsigned long *_sp) { unsigned long index, stop, sp; char *src; @@ -635,9 +667,9 @@ static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned *_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15; - out: +out: return ret; -} /* end elf_fdpic_transfer_args_to_stack() */ +} #endif /*****************************************************************************/ @@ -712,17 +744,18 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, seg = loadmap->segs; for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { if (params->hdr.e_entry >= seg->p_vaddr && - params->hdr.e_entry < seg->p_vaddr + seg->p_memsz - ) { + params->hdr.e_entry < seg->p_vaddr + seg->p_memsz) { params->entry_addr = - (params->hdr.e_entry - seg->p_vaddr) + seg->addr; + (params->hdr.e_entry - seg->p_vaddr) + + seg->addr; break; } } } /* determine where the program header table has wound up if mapped */ - stop = params->hdr.e_phoff + params->hdr.e_phnum * sizeof (struct elf_phdr); + stop = params->hdr.e_phoff; + stop += params->hdr.e_phnum * sizeof (struct elf_phdr); phdr = params->phdrs; for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { @@ -736,9 +769,11 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, seg = loadmap->segs; for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { if (phdr->p_vaddr >= seg->p_vaddr && - phdr->p_vaddr + phdr->p_filesz <= seg->p_vaddr + seg->p_memsz - ) { - params->ph_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr + + phdr->p_vaddr + phdr->p_filesz <= + seg->p_vaddr + seg->p_memsz) { + params->ph_addr = + (phdr->p_vaddr - seg->p_vaddr) + + seg->addr + params->hdr.e_phoff - phdr->p_offset; break; } @@ -755,18 +790,22 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, seg = loadmap->segs; for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { if (phdr->p_vaddr >= seg->p_vaddr && - phdr->p_vaddr + phdr->p_memsz <= seg->p_vaddr + seg->p_memsz - ) { - params->dynamic_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr; - - /* check the dynamic section contains at least one item, and that - * the last item is a NULL entry */ + phdr->p_vaddr + phdr->p_memsz <= + seg->p_vaddr + seg->p_memsz) { + params->dynamic_addr = + (phdr->p_vaddr - seg->p_vaddr) + + seg->addr; + + /* check the dynamic section contains at least + * one item, and that the last item is a NULL + * entry */ if (phdr->p_memsz == 0 || phdr->p_memsz % sizeof(Elf32_Dyn) != 0) goto dynamic_error; tmp = phdr->p_memsz / sizeof(Elf32_Dyn); - if (((Elf32_Dyn *) params->dynamic_addr)[tmp - 1].d_tag != 0) + if (((Elf32_Dyn *) + params->dynamic_addr)[tmp - 1].d_tag != 0) goto dynamic_error; break; } @@ -775,8 +814,8 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, } /* now elide adjacent segments in the load map on MMU linux - * - on uClinux the holes between may actually be filled with system stuff or stuff from - * other processes + * - on uClinux the holes between may actually be filled with system + * stuff or stuff from other processes */ #ifdef CONFIG_MMU nloads = loadmap->nsegs; @@ -787,7 +826,9 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) { load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz); if (load_addr == (seg->addr & PAGE_MASK)) { - mseg->p_memsz += load_addr - (mseg->addr + mseg->p_memsz); + mseg->p_memsz += + load_addr - + (mseg->addr + mseg->p_memsz); mseg->p_memsz += seg->addr & ~PAGE_MASK; mseg->p_memsz += seg->p_memsz; loadmap->nsegs--; @@ -815,20 +856,21 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, return 0; - dynamic_error: +dynamic_error: printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n", what, file->f_dentry->d_inode->i_ino); return -ELIBBAD; -} /* end elf_fdpic_map_file() */ +} /*****************************************************************************/ /* * map a file with constant displacement under uClinux */ #ifndef CONFIG_MMU -static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params, - struct file *file, - struct mm_struct *mm) +static int elf_fdpic_map_file_constdisp_on_uclinux( + struct elf_fdpic_params *params, + struct file *file, + struct mm_struct *mm) { struct elf32_fdpic_loadseg *seg; struct elf32_phdr *phdr; @@ -839,7 +881,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para load_addr = params->load_addr; seg = params->loadmap->segs; - /* determine the bounds of the contiguous overall allocation we must make */ + /* determine the bounds of the contiguous overall allocation we must + * make */ phdr = params->phdrs; for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { if (params->phdrs[loop].p_type != PT_LOAD) @@ -860,7 +903,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para maddr = do_mmap(NULL, load_addr, top - base, PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0); up_write(&mm->mmap_sem); - if (IS_ERR((void *) maddr)) + if (IS_ERR_VALUE(maddr)) return (int) maddr; if (load_addr != 0) @@ -878,7 +921,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para seg->p_vaddr = phdr->p_vaddr; seg->p_memsz = phdr->p_memsz; - ret = file->f_op->read(file, (void *) seg->addr, phdr->p_filesz, &fpos); + ret = file->f_op->read(file, (void *) seg->addr, + phdr->p_filesz, &fpos); if (ret < 0) return ret; @@ -895,8 +939,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para if (phdr->p_flags & PF_X) { mm->start_code = seg->addr; mm->end_code = seg->addr + phdr->p_memsz; - } - else if (!mm->start_data) { + } else if (!mm->start_data) { mm->start_data = seg->addr; #ifndef CONFIG_MMU mm->end_data = seg->addr + phdr->p_memsz; @@ -913,7 +956,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para } return 0; -} /* end elf_fdpic_map_file_constdisp_on_uclinux() */ +} #endif /*****************************************************************************/ @@ -974,14 +1017,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, case ELF_FDPIC_FLAG_CONSTDISP: /* constant displacement - * - can be mapped anywhere, but must be mapped as a unit + * - can be mapped anywhere, but must be mapped as a + * unit */ if (!dvset) { maddr = load_addr; delta_vaddr = phdr->p_vaddr; dvset = 1; - } - else { + } else { maddr = load_addr + phdr->p_vaddr - delta_vaddr; flags |= MAP_FIXED; } @@ -1005,13 +1048,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, up_write(&mm->mmap_sem); kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx", - loop, phdr->p_memsz + disp, prot, flags, phdr->p_offset - disp, - maddr); + loop, phdr->p_memsz + disp, prot, flags, + phdr->p_offset - disp, maddr); - if (IS_ERR((void *) maddr)) + if (IS_ERR_VALUE(maddr)) return (int) maddr; - if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == ELF_FDPIC_FLAG_CONTIGUOUS) + if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == + ELF_FDPIC_FLAG_CONTIGUOUS) load_addr += PAGE_ALIGN(phdr->p_memsz + disp); seg->addr = maddr + disp; @@ -1022,7 +1066,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, if (phdr->p_offset == 0) params->elfhdr_addr = seg->addr; - /* clear the bit between beginning of mapping and beginning of PT_LOAD */ + /* clear the bit between beginning of mapping and beginning of + * PT_LOAD */ if (prot & PROT_WRITE && disp > 0) { kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); clear_user((void __user *) maddr, disp); @@ -1038,19 +1083,20 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK); #ifdef CONFIG_MMU - if (excess > excess1) { unsigned long xaddr = maddr + phdr->p_filesz + excess1; unsigned long xmaddr; flags |= MAP_FIXED | MAP_ANONYMOUS; down_write(&mm->mmap_sem); - xmaddr = do_mmap(NULL, xaddr, excess - excess1, prot, flags, 0); + xmaddr = do_mmap(NULL, xaddr, excess - excess1, + prot, flags, 0); up_write(&mm->mmap_sem); kdebug("mmap[%d] <anon>" " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx", - loop, xaddr, excess - excess1, prot, flags, xmaddr); + loop, xaddr, excess - excess1, prot, flags, + xmaddr); if (xmaddr != xaddr) return -ENOMEM; @@ -1059,7 +1105,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, if (prot & PROT_WRITE && excess1 > 0) { kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr + phdr->p_filesz, excess1); - clear_user((void __user *) maddr + phdr->p_filesz, excess1); + clear_user((void __user *) maddr + phdr->p_filesz, + excess1); } #else @@ -1074,8 +1121,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, if (phdr->p_flags & PF_X) { mm->start_code = maddr; mm->end_code = maddr + phdr->p_memsz; - } - else if (!mm->start_data) { + } else if (!mm->start_data) { mm->start_data = maddr; mm->end_data = maddr + phdr->p_memsz; } @@ -1085,4 +1131,662 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, } return 0; -} /* end elf_fdpic_map_file_by_direct_mmap() */ +} + +/*****************************************************************************/ +/* + * ELF-FDPIC core dumper + * + * Modelled on fs/exec.c:aout_core_dump() + * Jeremy Fitzhardinge <jeremy@sw.oz.au> + * + * Modelled on fs/binfmt_elf.c core dumper + */ +#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) + +/* + * These are the only things you should do on a core-file: use only these + * functions to write out all the necessary info. + */ +static int dump_write(struct file *file, const void *addr, int nr) +{ + return file->f_op->write(file, addr, nr, &file->f_pos) == nr; +} + +static int dump_seek(struct file *file, loff_t off) +{ + if (file->f_op->llseek) { + if (file->f_op->llseek(file, off, SEEK_SET) != off) + return 0; + } else { + file->f_pos = off; + } + return 1; +} + +/* + * Decide whether a segment is worth dumping; default is yes to be + * sure (missing info is worse than too much; etc). + * Personally I'd include everything, and use the coredump limit... + * + * I think we should skip something. But I am not sure how. H.J. + */ +static int maydump(struct vm_area_struct *vma) +{ + /* Do not dump I/O mapped devices or special mappings */ + if (vma->vm_flags & (VM_IO | VM_RESERVED)) { + kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); + return 0; + } + + /* If we may not read the contents, don't allow us to dump + * them either. "dump_write()" can't handle it anyway. + */ + if (!(vma->vm_flags & VM_READ)) { + kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags); + return 0; + } + + /* Dump shared memory only if mapped from an anonymous file. */ + if (vma->vm_flags & VM_SHARED) { + if (vma->vm_file->f_dentry->d_inode->i_nlink == 0) { + kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags); + return 1; + } + + kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags); + return 0; + } + +#ifdef CONFIG_MMU + /* If it hasn't been written to, don't write it out */ + if (!vma->anon_vma) { + kdcore("%08lx: %08lx: no (!anon)", vma->vm_start, vma->vm_flags); + return 0; + } +#endif + + kdcore("%08lx: %08lx: yes", vma->vm_start, vma->vm_flags); + return 1; +} + +/* An ELF note in memory */ +struct memelfnote +{ + const char *name; + int type; + unsigned int datasz; + void *data; +}; + +static int notesize(struct memelfnote *en) +{ + int sz; + + sz = sizeof(struct elf_note); + sz += roundup(strlen(en->name) + 1, 4); + sz += roundup(en->datasz, 4); + + return sz; +} + +/* #define DEBUG */ + +#define DUMP_WRITE(addr, nr) \ + do { if (!dump_write(file, (addr), (nr))) return 0; } while(0) +#define DUMP_SEEK(off) \ + do { if (!dump_seek(file, (off))) return 0; } while(0) + +static int writenote(struct memelfnote *men, struct file *file) +{ + struct elf_note en; + + en.n_namesz = strlen(men->name) + 1; + en.n_descsz = men->datasz; + en.n_type = men->type; + + DUMP_WRITE(&en, sizeof(en)); + DUMP_WRITE(men->name, en.n_namesz); + /* XXX - cast from long long to long to avoid need for libgcc.a */ + DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ + DUMP_WRITE(men->data, men->datasz); + DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ + + return 1; +} +#undef DUMP_WRITE +#undef DUMP_SEEK + +#define DUMP_WRITE(addr, nr) \ + if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ + goto end_coredump; +#define DUMP_SEEK(off) \ + if (!dump_seek(file, (off))) \ + goto end_coredump; + +static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) +{ + memcpy(elf->e_ident, ELFMAG, SELFMAG); + elf->e_ident[EI_CLASS] = ELF_CLASS; + elf->e_ident[EI_DATA] = ELF_DATA; + elf->e_ident[EI_VERSION] = EV_CURRENT; + elf->e_ident[EI_OSABI] = ELF_OSABI; + memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); + + elf->e_type = ET_CORE; + elf->e_machine = ELF_ARCH; + elf->e_version = EV_CURRENT; + elf->e_entry = 0; + elf->e_phoff = sizeof(struct elfhdr); + elf->e_shoff = 0; + elf->e_flags = ELF_FDPIC_CORE_EFLAGS; + elf->e_ehsize = sizeof(struct elfhdr); + elf->e_phentsize = sizeof(struct elf_phdr); + elf->e_phnum = segs; + elf->e_shentsize = 0; + elf->e_shnum = 0; + elf->e_shstrndx = 0; + return; +} + +static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) +{ + phdr->p_type = PT_NOTE; + phdr->p_offset = offset; + phdr->p_vaddr = 0; + phdr->p_paddr = 0; + phdr->p_filesz = sz; + phdr->p_memsz = 0; + phdr->p_flags = 0; + phdr->p_align = 0; + return; +} + +static inline void fill_note(struct memelfnote *note, const char *name, int type, + unsigned int sz, void *data) +{ + note->name = name; + note->type = type; + note->datasz = sz; + note->data = data; + return; +} + +/* + * fill up all the fields in prstatus from the given task struct, except + * registers which need to be filled up seperately. + */ +static void fill_prstatus(struct elf_prstatus *prstatus, + struct task_struct *p, long signr) +{ + prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; + prstatus->pr_sigpend = p->pending.signal.sig[0]; + prstatus->pr_sighold = p->blocked.sig[0]; + prstatus->pr_pid = p->pid; + prstatus->pr_ppid = p->parent->pid; + prstatus->pr_pgrp = process_group(p); + prstatus->pr_sid = p->signal->session; + if (thread_group_leader(p)) { + /* + * This is the record for the group leader. Add in the + * cumulative times of previous dead threads. This total + * won't include the time of each live thread whose state + * is included in the core dump. The final total reported + * to our parent process when it calls wait4 will include + * those sums as well as the little bit more time it takes + * this and each other thread to finish dying after the + * core dump synchronization phase. + */ + cputime_to_timeval(cputime_add(p->utime, p->signal->utime), + &prstatus->pr_utime); + cputime_to_timeval(cputime_add(p->stime, p->signal->stime), + &prstatus->pr_stime); + } else { + cputime_to_timeval(p->utime, &prstatus->pr_utime); + cputime_to_timeval(p->stime, &prstatus->pr_stime); + } + cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); + cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + + prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap; + prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap; +} + +static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, + struct mm_struct *mm) +{ + unsigned int i, len; + + /* first copy the parameters from user space */ + memset(psinfo, 0, sizeof(struct elf_prpsinfo)); + + len = mm->arg_end - mm->arg_start; + if (len >= ELF_PRARGSZ) + len = ELF_PRARGSZ - 1; + if (copy_from_user(&psinfo->pr_psargs, + (const char __user *) mm->arg_start, len)) + return -EFAULT; + for (i = 0; i < len; i++) + if (psinfo->pr_psargs[i] == 0) + psinfo->pr_psargs[i] = ' '; + psinfo->pr_psargs[len] = 0; + + psinfo->pr_pid = p->pid; + psinfo->pr_ppid = p->parent->pid; + psinfo->pr_pgrp = process_group(p); + psinfo->pr_sid = p->signal->session; + + i = p->state ? ffz(~p->state) + 1 : 0; + psinfo->pr_state = i; + psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; + psinfo->pr_zomb = psinfo->pr_sname == 'Z'; + psinfo->pr_nice = task_nice(p); + psinfo->pr_flag = p->flags; + SET_UID(psinfo->pr_uid, p->uid); + SET_GID(psinfo->pr_gid, p->gid); + strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); + + return 0; +} + +/* Here is the structure in which status of each thread is captured. */ +struct elf_thread_status +{ + struct list_head list; + struct elf_prstatus prstatus; /* NT_PRSTATUS */ + elf_fpregset_t fpu; /* NT_PRFPREG */ + struct task_struct *thread; +#ifdef ELF_CORE_COPY_XFPREGS + elf_fpxregset_t xfpu; /* NT_PRXFPREG */ +#endif + struct memelfnote notes[3]; + int num_notes; +}; + +/* + * In order to add the specific thread information for the elf file format, + * we need to keep a linked list of every thread's pr_status and then create + * a single section for them in the final core file. + */ +static int elf_dump_thread_status(long signr, struct elf_thread_status *t) +{ + struct task_struct *p = t->thread; + int sz = 0; + + t->num_notes = 0; + + fill_prstatus(&t->prstatus, p, signr); + elf_core_copy_task_regs(p, &t->prstatus.pr_reg); + + fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), + &t->prstatus); + t->num_notes++; + sz += notesize(&t->notes[0]); + + t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu); + if (t->prstatus.pr_fpvalid) { + fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), + &t->fpu); + t->num_notes++; + sz += notesize(&t->notes[1]); + } + +#ifdef ELF_CORE_COPY_XFPREGS + if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { + fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), + &t->xfpu); + t->num_notes++; + sz += notesize(&t->notes[2]); + } +#endif + return sz; +} + +/* + * dump the segments for an MMU process + */ +#ifdef CONFIG_MMU +static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, + size_t *size, unsigned long *limit) +{ + struct vm_area_struct *vma; + + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { + unsigned long addr; + + if (!maydump(vma)) + continue; + + for (addr = vma->vm_start; + addr < vma->vm_end; + addr += PAGE_SIZE + ) { + struct vm_area_struct *vma; + struct page *page; + + if (get_user_pages(current, current->mm, addr, 1, 0, 1, + &page, &vma) <= 0) { + DUMP_SEEK(file->f_pos + PAGE_SIZE); + } + else if (page == ZERO_PAGE(addr)) { + DUMP_SEEK(file->f_pos + PAGE_SIZE); + page_cache_release(page); + } + else { + void *kaddr; + + flush_cache_page(vma, addr, page_to_pfn(page)); + kaddr = kmap(page); + if ((*size += PAGE_SIZE) > *limit || + !dump_write(file, kaddr, PAGE_SIZE) + ) { + kunmap(page); + page_cache_release(page); + return -EIO; + } + kunmap(page); + page_cache_release(page); + } + } + } + + return 0; + +end_coredump: + return -EFBIG; +} +#endif + +/* + * dump the segments for a NOMMU process + */ +#ifndef CONFIG_MMU +static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, + size_t *size, unsigned long *limit) +{ + struct vm_list_struct *vml; + + for (vml = current->mm->context.vmlist; vml; vml = vml->next) { + struct vm_area_struct *vma = vml->vma; + + if (!maydump(vma)) + continue; + + if ((*size += PAGE_SIZE) > *limit) + return -EFBIG; + + if (!dump_write(file, (void *) vma->vm_start, + vma->vm_end - vma->vm_start)) + return -EIO; + } + + return 0; +} +#endif + +/* + * Actual dumper + * + * This is a two-pass process; first we find the offsets of the bits, + * and then they are actually written out. If we run out of core limit + * we just truncate. + */ +static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, + struct file *file) +{ +#define NUM_NOTES 6 + int has_dumped = 0; + mm_segment_t fs; + int segs; + size_t size = 0; + int i; + struct vm_area_struct *vma; + struct elfhdr *elf = NULL; + loff_t offset = 0, dataoff; + unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; + int numnote; + struct memelfnote *notes = NULL; + struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ + struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ + struct task_struct *g, *p; + LIST_HEAD(thread_list); + struct list_head *t; + elf_fpregset_t *fpu = NULL; +#ifdef ELF_CORE_COPY_XFPREGS + elf_fpxregset_t *xfpu = NULL; +#endif + int thread_status_size = 0; +#ifndef CONFIG_MMU + struct vm_list_struct *vml; +#endif + elf_addr_t *auxv; + + /* + * We no longer stop all VM operations. + * + * This is because those proceses that could possibly change map_count + * or the mmap / vma pages are now blocked in do_exit on current + * finishing this core dump. + * + * Only ptrace can touch these memory addresses, but it doesn't change + * the map_count or the pages allocated. So no possibility of crashing + * exists while dumping the mm->vm_next areas to the core file. + */ + + /* alloc memory for large data structures: too large to be on stack */ + elf = kmalloc(sizeof(*elf), GFP_KERNEL); + if (!elf) + goto cleanup; + prstatus = kzalloc(sizeof(*prstatus), GFP_KERNEL); + if (!prstatus) + goto cleanup; + psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); + if (!psinfo) + goto cleanup; + notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL); + if (!notes) + goto cleanup; + fpu = kmalloc(sizeof(*fpu), GFP_KERNEL); + if (!fpu) + goto cleanup; +#ifdef ELF_CORE_COPY_XFPREGS + xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL); + if (!xfpu) + goto cleanup; +#endif + + if (signr) { + struct elf_thread_status *tmp; + read_lock(&tasklist_lock); + do_each_thread(g,p) + if (current->mm == p->mm && current != p) { + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); + if (!tmp) { + read_unlock(&tasklist_lock); + goto cleanup; + } + INIT_LIST_HEAD(&tmp->list); + tmp->thread = p; + list_add(&tmp->list, &thread_list); + } + while_each_thread(g,p); + read_unlock(&tasklist_lock); + list_for_each(t, &thread_list) { + struct elf_thread_status *tmp; + int sz; + + tmp = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(signr, tmp); + thread_status_size += sz; + } + } + + /* now collect the dump for the current */ + fill_prstatus(prstatus, current, signr); + elf_core_copy_regs(&prstatus->pr_reg, regs); + +#ifdef CONFIG_MMU + segs = current->mm->map_count; +#else + segs = 0; + for (vml = current->mm->context.vmlist; vml; vml = vml->next) + segs++; +#endif +#ifdef ELF_CORE_EXTRA_PHDRS + segs += ELF_CORE_EXTRA_PHDRS; +#endif + + /* Set up header */ + fill_elf_fdpic_header(elf, segs + 1); /* including notes section */ + + has_dumped = 1; + current->flags |= PF_DUMPCORE; + + /* + * Set up the notes in similar form to SVR4 core dumps made + * with info from their /proc. + */ + + fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus); + fill_psinfo(psinfo, current->group_leader, current->mm); + fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); + + numnote = 2; + + auxv = (elf_addr_t *) current->mm->saved_auxv; + + i = 0; + do + i += 2; + while (auxv[i - 2] != AT_NULL); + fill_note(¬es[numnote++], "CORE", NT_AUXV, + i * sizeof(elf_addr_t), auxv); + + /* Try to dump the FPU. */ + if ((prstatus->pr_fpvalid = + elf_core_copy_task_fpregs(current, regs, fpu))) + fill_note(notes + numnote++, + "CORE", NT_PRFPREG, sizeof(*fpu), fpu); +#ifdef ELF_CORE_COPY_XFPREGS + if (elf_core_copy_task_xfpregs(current, xfpu)) + fill_note(notes + numnote++, + "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu); +#endif + + fs = get_fs(); + set_fs(KERNEL_DS); + + DUMP_WRITE(elf, sizeof(*elf)); + offset += sizeof(*elf); /* Elf header */ + offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ + + /* Write notes phdr entry */ + { + struct elf_phdr phdr; + int sz = 0; + + for (i = 0; i < numnote; i++) + sz += notesize(notes + i); + + sz += thread_status_size; + + fill_elf_note_phdr(&phdr, sz, offset); + offset += sz; + DUMP_WRITE(&phdr, sizeof(phdr)); + } + + /* Page-align dumped data */ + dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); + + /* write program headers for segments dump */ + for ( +#ifdef CONFIG_MMU + vma = current->mm->mmap; vma; vma = vma->vm_next +#else + vml = current->mm->context.vmlist; vml; vml = vml->next +#endif + ) { + struct elf_phdr phdr; + size_t sz; + +#ifndef CONFIG_MMU + vma = vml->vma; +#endif + + sz = vma->vm_end - vma->vm_start; + + phdr.p_type = PT_LOAD; + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; + phdr.p_filesz = maydump(vma) ? sz : 0; + phdr.p_memsz = sz; + offset += phdr.p_filesz; + phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; + if (vma->vm_flags & VM_WRITE) + phdr.p_flags |= PF_W; + if (vma->vm_flags & VM_EXEC) + phdr.p_flags |= PF_X; + phdr.p_align = ELF_EXEC_PAGESIZE; + + DUMP_WRITE(&phdr, sizeof(phdr)); + } + +#ifdef ELF_CORE_WRITE_EXTRA_PHDRS + ELF_CORE_WRITE_EXTRA_PHDRS; +#endif + + /* write out the notes section */ + for (i = 0; i < numnote; i++) + if (!writenote(notes + i, file)) + goto end_coredump; + + /* write out the thread status notes section */ + list_for_each(t, &thread_list) { + struct elf_thread_status *tmp = + list_entry(t, struct elf_thread_status, list); + + for (i = 0; i < tmp->num_notes; i++) + if (!writenote(&tmp->notes[i], file)) + goto end_coredump; + } + + DUMP_SEEK(dataoff); + + if (elf_fdpic_dump_segments(file, current->mm, &size, &limit) < 0) + goto end_coredump; + +#ifdef ELF_CORE_WRITE_EXTRA_DATA + ELF_CORE_WRITE_EXTRA_DATA; +#endif + + if (file->f_pos != offset) { + /* Sanity check */ + printk(KERN_WARNING + "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", + file->f_pos, offset); + } + +end_coredump: + set_fs(fs); + +cleanup: + while (!list_empty(&thread_list)) { + struct list_head *tmp = thread_list.next; + list_del(tmp); + kfree(list_entry(tmp, struct elf_thread_status, list)); + } + + kfree(elf); + kfree(prstatus); + kfree(psinfo); + kfree(notes); + kfree(fpu); +#ifdef ELF_CORE_COPY_XFPREGS + kfree(xfpu); +#endif + return has_dumped; +#undef NUM_NOTES +} + +#endif /* USE_ELF_CORE_DUMP */ diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c94d52e..a62fd40 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -16,7 +16,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 00a91dc..32b5d62 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -32,7 +32,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -#include <linux/config.h> #include <linux/elf.h> diff --git a/fs/block_dev.c b/fs/block_dev.c index 028d9fb..3753457 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -5,14 +5,12 @@ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE */ -#include <linux/config.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/fcntl.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/major.h> -#include <linux/devfs_fs_kernel.h> #include <linux/smp_lock.h> #include <linux/highmem.h> #include <linux/blkdev.h> @@ -741,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, if (!bo) return -ENOMEM; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); res = bd_claim(bdev, holder); if (res || !add_bd_holder(bdev, bo)) free_bd_holder(bo); @@ -766,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev, if (!kobj) return; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); bd_release(bdev); if ((bo = del_bd_holder(bdev, kobj))) free_bd_holder(bo); @@ -824,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags); + +struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode) +{ + struct block_device *bdev = bdget(dev); + int err = -ENOMEM; + int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; + if (bdev) + err = blkdev_get_partition(bdev, mode, flags); + return err ? ERR_PTR(err) : bdev; +} + +EXPORT_SYMBOL(open_partition_by_devnum); + + /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This @@ -870,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int do_open(struct block_device *bdev, struct file *file) +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); + +static int +do_open(struct block_device *bdev, struct file *file, unsigned int subclass) { struct module *owner = NULL; struct gendisk *disk; @@ -887,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file) } owner = disk->fops->owner; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, subclass); + if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; @@ -914,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file) ret = -ENOMEM; if (!whole) goto out_first; - ret = blkdev_get(whole, file->f_mode, file->f_flags); + ret = blkdev_get_whole(whole, file->f_mode, file->f_flags); if (ret) goto out_first; bdev->bd_contains = whole; - mutex_lock(&whole->bd_mutex); + mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE); whole->bd_part_count++; p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = @@ -946,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file) if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); } else { - mutex_lock(&bdev->bd_contains->bd_mutex); + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + BD_MUTEX_PARTITION); bdev->bd_contains->bd_part_count++; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -987,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) fake_file.f_dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - return do_open(bdev, &fake_file); + return do_open(bdev, &fake_file, BD_MUTEX_NORMAL); } EXPORT_SYMBOL(blkdev_get); +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) +{ + /* + * This crockload is due to bad choice of ->open() type. + * It will go away. + * For now, block device ->open() routine must _not_ + * examine anything in 'inode' argument except ->i_rdev. + */ + struct file fake_file = {}; + struct dentry fake_dentry = {}; + fake_file.f_mode = mode; + fake_file.f_flags = flags; + fake_file.f_dentry = &fake_dentry; + fake_dentry.d_inode = bdev->bd_inode; + + return do_open(bdev, &fake_file, BD_MUTEX_WHOLE); +} + +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags) +{ + /* + * This crockload is due to bad choice of ->open() type. + * It will go away. + * For now, block device ->open() routine must _not_ + * examine anything in 'inode' argument except ->i_rdev. + */ + struct file fake_file = {}; + struct dentry fake_dentry = {}; + fake_file.f_mode = mode; + fake_file.f_flags = flags; + fake_file.f_dentry = &fake_dentry; + fake_dentry.d_inode = bdev->bd_inode; + + return do_open(bdev, &fake_file, BD_MUTEX_PARTITION); +} + static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1007,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) bdev = bd_acquire(inode); - res = do_open(bdev, filp); + res = do_open(bdev, filp, BD_MUTEX_NORMAL); if (res) return res; @@ -1021,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp) return res; } -int blkdev_put(struct block_device *bdev) +static int __blkdev_put(struct block_device *bdev, unsigned int subclass) { int ret = 0; struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, subclass); lock_kernel(); if (!--bdev->bd_openers) { sync_blockdev(bdev); @@ -1037,7 +1095,8 @@ int blkdev_put(struct block_device *bdev) if (disk->fops->release) ret = disk->fops->release(bd_inode, NULL); } else { - mutex_lock(&bdev->bd_contains->bd_mutex); + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + subclass + 1); bdev->bd_contains->bd_part_count--; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -1053,9 +1112,8 @@ int blkdev_put(struct block_device *bdev) } bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) { - blkdev_put(bdev->bd_contains); - } + if (bdev != bdev->bd_contains) + __blkdev_put(bdev->bd_contains, subclass + 1); bdev->bd_contains = NULL; } unlock_kernel(); @@ -1064,8 +1122,20 @@ int blkdev_put(struct block_device *bdev) return ret; } +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_NORMAL); +} + EXPORT_SYMBOL(blkdev_put); +int blkdev_put_partition(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_PARTITION); +} + +EXPORT_SYMBOL(blkdev_put_partition); + static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); @@ -1095,7 +1165,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); } -struct address_space_operations def_blk_aops = { +const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, .writepage = blkdev_writepage, .sync_page = block_sync_page, diff --git a/fs/buffer.c b/fs/buffer.c index f23bb64..3660dcb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -18,7 +18,6 @@ * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/syscalls.h> #include <linux/fs.h> @@ -852,7 +851,7 @@ int __set_page_dirty_buffers(struct page *page) write_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ if (mapping_cap_account_dirty(mapping)) - inc_page_state(nr_dirty); + __inc_zone_page_state(page, NR_FILE_DIRTY); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); @@ -2598,7 +2597,7 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from) unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned to; struct page *page; - struct address_space_operations *a_ops = mapping->a_ops; + const struct address_space_operations *a_ops = mapping->a_ops; char *kaddr; int ret = 0; diff --git a/fs/char_dev.c b/fs/char_dev.c index f3418f7..a4cbc67 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -4,7 +4,6 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/config.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/slab.h> @@ -14,7 +13,6 @@ #include <linux/errno.h> #include <linux/module.h> #include <linux/smp_lock.h> -#include <linux/devfs_fs_kernel.h> #include <linux/seq_file.h> #include <linux/kobject.h> diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 031cdf2..2e75883 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -17,7 +17,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index a6384d8..8f75c6f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -32,8 +32,8 @@ #define TRUE 1 #endif -extern struct address_space_operations cifs_addr_ops; -extern struct address_space_operations cifs_addr_ops_smallbuf; +extern const struct address_space_operations cifs_addr_ops; +extern const struct address_space_operations cifs_addr_ops_smallbuf; /* Functions related to super block operations */ extern struct super_operations cifs_super_ops; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e9c1573..944d2b9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -324,7 +324,7 @@ out: return rc; } -/* Try to reaquire byte range locks that were released when session */ +/* Try to reacquire byte range locks that were released when session */ /* to server was lost */ static int cifs_relock_file(struct cifsFileInfo *cifsFile) { @@ -1942,7 +1942,7 @@ static int cifs_prepare_write(struct file *file, struct page *page, return 0; } -struct address_space_operations cifs_addr_ops = { +const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, .writepage = cifs_writepage, @@ -1959,7 +1959,7 @@ struct address_space_operations cifs_addr_ops = { * contain the header plus one complete page of data. Otherwise, we need * to leave cifs_readpages out of the address space operations. */ -struct address_space_operations cifs_addr_ops_smallbuf = { +const struct address_space_operations cifs_addr_ops_smallbuf = { .readpage = cifs_readpage, .writepage = cifs_writepage, .writepages = cifs_writepages, diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 7caee8d..803aacf 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -28,7 +28,6 @@ #include <linux/delay.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> -#include <linux/devfs_fs_kernel.h> #include <linux/vmalloc.h> #include <linux/fs.h> #include <linux/file.h> @@ -365,22 +364,12 @@ static int init_coda_psdev(void) err = PTR_ERR(coda_psdev_class); goto out_chrdev; } - devfs_mk_dir ("coda"); - for (i = 0; i < MAX_CODADEVS; i++) { + for (i = 0; i < MAX_CODADEVS; i++) class_device_create(coda_psdev_class, NULL, MKDEV(CODA_PSDEV_MAJOR,i), NULL, "cfs%d", i); - err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i), - S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i); - if (err) - goto out_class; - } coda_sysctl_init(); goto out; -out_class: - for (i = 0; i < MAX_CODADEVS; i++) - class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); - class_destroy(coda_psdev_class); out_chrdev: unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); out: @@ -419,12 +408,9 @@ static int __init init_coda(void) } return 0; out: - for (i = 0; i < MAX_CODADEVS; i++) { + for (i = 0; i < MAX_CODADEVS; i++) class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); - devfs_remove("coda/%d", i); - } class_destroy(coda_psdev_class); - devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); coda_sysctl_clean(); out1: @@ -441,12 +427,9 @@ static void __exit exit_coda(void) if ( err != 0 ) { printk("coda: failed to unregister filesystem\n"); } - for (i = 0; i < MAX_CODADEVS; i++) { + for (i = 0; i < MAX_CODADEVS; i++) class_device_destroy(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR, i)); - devfs_remove("coda/%d", i); - } class_destroy(coda_psdev_class); - devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR, "coda"); coda_sysctl_clean(); coda_destroy_inodecache(); diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index b35e5bb..76e00a6 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -50,6 +50,6 @@ fail: return error; } -struct address_space_operations coda_symlink_aops = { +const struct address_space_operations coda_symlink_aops = { .readpage = coda_symlink_filler, }; diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index f0b1075..1c82e9a 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -11,7 +11,6 @@ * */ -#include <linux/config.h> #include <linux/time.h> #include <linux/mm.h> #include <linux/sysctl.h> diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index d8ecfed..4063a93 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -10,7 +10,6 @@ * ioctls. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/compat.h> #include <linux/kernel.h> @@ -44,7 +43,6 @@ #include <linux/loop.h> #include <linux/auto_fs.h> #include <linux/auto_fs4.h> -#include <linux/devfs_fs.h> #include <linux/tty.h> #include <linux/vt_kern.h> #include <linux/fb.h> diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 207f800..df02545 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -211,7 +211,7 @@ static void remove_dir(struct dentry * d) struct configfs_dirent * sd; sd = d->d_fsdata; - list_del_init(&sd->s_sibling); + list_del_init(&sd->s_sibling); configfs_put(sd); if (d->d_inode) simple_rmdir(parent->d_inode,d); @@ -330,7 +330,7 @@ static int configfs_detach_prep(struct dentry *dentry) ret = configfs_detach_prep(sd->s_dentry); if (!ret) - continue; + continue; } else ret = -ENOTEMPTY; @@ -931,7 +931,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name) new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { - if (!new_dentry->d_inode) { + if (!new_dentry->d_inode) { error = config_item_set_name(item, "%s", new_name); if (!error) { d_add(new_dentry, NULL); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index c153bd9..e14488c 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -38,7 +38,7 @@ extern struct super_block * configfs_sb; -static struct address_space_operations configfs_aops = { +static const struct address_space_operations configfs_aops = { .readpage = simple_readpage, .prepare_write = simple_prepare_write, .commit_write = simple_commit_write diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index e5512e2..fb65e08 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -66,7 +66,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length) } static int create_link(struct config_item *parent_item, - struct config_item *item, + struct config_item *item, struct dentry *dentry) { struct configfs_dirent *target_sd = item->ci_dentry->d_fsdata; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index c45d738..223c043 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -30,7 +30,7 @@ static struct super_operations cramfs_ops; static struct inode_operations cramfs_dir_inode_operations; static const struct file_operations cramfs_directory_operations; -static struct address_space_operations cramfs_aops; +static const struct address_space_operations cramfs_aops; static DEFINE_MUTEX(read_mutex); @@ -501,7 +501,7 @@ static int cramfs_readpage(struct file *file, struct page * page) return 0; } -static struct address_space_operations cramfs_aops = { +static const struct address_space_operations cramfs_aops = { .readpage = cramfs_readpage }; diff --git a/fs/dcache.c b/fs/dcache.c index 48b44a7..1b4a3a3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -14,7 +14,6 @@ * the dcache entry is deleted or garbage collected. */ -#include <linux/config.h> #include <linux/syscalls.h> #include <linux/string.h> #include <linux/mm.h> @@ -39,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); -static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); @@ -1340,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target) */ if (target < dentry) { spin_lock(&target->d_lock); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { spin_lock(&dentry->d_lock); - spin_lock(&target->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); } /* Move the dentry to the target hash queue, if on different bucket */ diff --git a/fs/dcookies.c b/fs/dcookies.c index 8749339..0c4b067 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -12,7 +12,6 @@ * to the pair and can be looked up from userspace. */ -#include <linux/config.h> #include <linux/syscalls.h> #include <linux/module.h> #include <linux/slab.h> diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 66a5054..39640fd 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -13,7 +13,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/pagemap.h> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 6fa1e04..e8ae304 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -16,7 +16,6 @@ /* uncomment to get debug messages from the debug filesystem, ah the irony. */ /* #define DEBUG */ -#include <linux/config.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/mount.h> diff --git a/fs/devfs/Makefile b/fs/devfs/Makefile deleted file mode 100644 index 6dd8d12..0000000 --- a/fs/devfs/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the linux devfs-filesystem routines. -# - -obj-$(CONFIG_DEVFS_FS) += devfs.o - -devfs-objs := base.o util.o - diff --git a/fs/devfs/base.c b/fs/devfs/base.c deleted file mode 100644 index 51a97f1..0000000 --- a/fs/devfs/base.c +++ /dev/null @@ -1,2836 +0,0 @@ -/* devfs (Device FileSystem) driver. - - Copyright (C) 1998-2002 Richard Gooch - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Richard Gooch may be reached by email at rgooch@atnf.csiro.au - The postal address is: - Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. - - ChangeLog - - 19980110 Richard Gooch <rgooch@atnf.csiro.au> - Original version. - v0.1 - 19980111 Richard Gooch <rgooch@atnf.csiro.au> - Created per-fs inode table rather than using inode->u.generic_ip - v0.2 - 19980111 Richard Gooch <rgooch@atnf.csiro.au> - Created .epoch inode which has a ctime of 0. - Fixed loss of named pipes when dentries lost. - Fixed loss of inode data when devfs_register() follows mknod(). - v0.3 - 19980111 Richard Gooch <rgooch@atnf.csiro.au> - Fix for when compiling with CONFIG_KERNELD. - 19980112 Richard Gooch <rgooch@atnf.csiro.au> - Fix for readdir() which sometimes didn't show entries. - Added <<tolerant>> option to <devfs_register>. - v0.4 - 19980113 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_fill_file> function. - v0.5 - 19980115 Richard Gooch <rgooch@atnf.csiro.au> - Added subdirectory support. Major restructuring. - 19980116 Richard Gooch <rgooch@atnf.csiro.au> - Fixed <find_by_dev> to not search major=0,minor=0. - Added symlink support. - v0.6 - 19980120 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_mk_dir> function and support directory unregister - 19980120 Richard Gooch <rgooch@atnf.csiro.au> - Auto-ownership uses real uid/gid rather than effective uid/gid. - v0.7 - 19980121 Richard Gooch <rgooch@atnf.csiro.au> - Supported creation of sockets. - v0.8 - 19980122 Richard Gooch <rgooch@atnf.csiro.au> - Added DEVFS_FL_HIDE_UNREG flag. - Interface change to <devfs_mk_symlink>. - Created <devfs_symlink> to support symlink(2). - v0.9 - 19980123 Richard Gooch <rgooch@atnf.csiro.au> - Added check to <devfs_fill_file> to check inode is in devfs. - Added optional traversal of symlinks. - v0.10 - 19980124 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_get_flags> and <devfs_set_flags>. - v0.11 - 19980125 C. Scott Ananian <cananian@alumni.princeton.edu> - Created <devfs_find_handle>. - 19980125 Richard Gooch <rgooch@atnf.csiro.au> - Allow removal of symlinks. - v0.12 - 19980125 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_set_symlink_destination>. - 19980126 Richard Gooch <rgooch@atnf.csiro.au> - Moved DEVFS_SUPER_MAGIC into header file. - Added DEVFS_FL_HIDE flag. - Created <devfs_get_maj_min>. - Created <devfs_get_handle_from_inode>. - Fixed minor bug in <find_by_dev>. - 19980127 Richard Gooch <rgooch@atnf.csiro.au> - Changed interface to <find_by_dev>, <find_entry>, - <devfs_unregister>, <devfs_fill_file> and <devfs_find_handle>. - Fixed inode times when symlink created with symlink(2). - v0.13 - 19980129 C. Scott Ananian <cananian@alumni.princeton.edu> - Exported <devfs_set_symlink_destination>, <devfs_get_maj_min> - and <devfs_get_handle_from_inode>. - 19980129 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_unlink> to support unlink(2). - v0.14 - 19980129 Richard Gooch <rgooch@atnf.csiro.au> - Fixed kerneld support for entries in devfs subdirectories. - 19980130 Richard Gooch <rgooch@atnf.csiro.au> - Bugfixes in <call_kerneld>. - v0.15 - 19980207 Richard Gooch <rgooch@atnf.csiro.au> - Call kerneld when looking up unregistered entries. - v0.16 - 19980326 Richard Gooch <rgooch@atnf.csiro.au> - Modified interface to <devfs_find_handle> for symlink traversal. - v0.17 - 19980331 Richard Gooch <rgooch@atnf.csiro.au> - Fixed persistence bug with device numbers for manually created - device files. - Fixed problem with recreating symlinks with different content. - v0.18 - 19980401 Richard Gooch <rgooch@atnf.csiro.au> - Changed to CONFIG_KMOD. - Hide entries which are manually unlinked. - Always invalidate devfs dentry cache when registering entries. - Created <devfs_rmdir> to support rmdir(2). - Ensure directories created by <devfs_mk_dir> are visible. - v0.19 - 19980402 Richard Gooch <rgooch@atnf.csiro.au> - Invalidate devfs dentry cache when making directories. - Invalidate devfs dentry cache when removing entries. - Fixed persistence bug with fifos. - v0.20 - 19980421 Richard Gooch <rgooch@atnf.csiro.au> - Print process command when debugging kerneld/kmod. - Added debugging for register/unregister/change operations. - 19980422 Richard Gooch <rgooch@atnf.csiro.au> - Added "devfs=" boot options. - v0.21 - 19980426 Richard Gooch <rgooch@atnf.csiro.au> - No longer lock/unlock superblock in <devfs_put_super>. - Drop negative dentries when they are released. - Manage dcache more efficiently. - v0.22 - 19980427 Richard Gooch <rgooch@atnf.csiro.au> - Added DEVFS_FL_AUTO_DEVNUM flag. - v0.23 - 19980430 Richard Gooch <rgooch@atnf.csiro.au> - No longer set unnecessary methods. - v0.24 - 19980504 Richard Gooch <rgooch@atnf.csiro.au> - Added PID display to <call_kerneld> debugging message. - Added "after" debugging message to <call_kerneld>. - 19980519 Richard Gooch <rgooch@atnf.csiro.au> - Added "diread" and "diwrite" boot options. - 19980520 Richard Gooch <rgooch@atnf.csiro.au> - Fixed persistence problem with permissions. - v0.25 - 19980602 Richard Gooch <rgooch@atnf.csiro.au> - Support legacy device nodes. - Fixed bug where recreated inodes were hidden. - v0.26 - 19980602 Richard Gooch <rgooch@atnf.csiro.au> - Improved debugging in <get_vfs_inode>. - 19980607 Richard Gooch <rgooch@atnf.csiro.au> - No longer free old dentries in <devfs_mk_dir>. - Free all dentries for a given entry when deleting inodes. - v0.27 - 19980627 Richard Gooch <rgooch@atnf.csiro.au> - Limit auto-device numbering to majors 128 to 239. - v0.28 - 19980629 Richard Gooch <rgooch@atnf.csiro.au> - Fixed inode times persistence problem. - v0.29 - 19980704 Richard Gooch <rgooch@atnf.csiro.au> - Fixed spelling in <devfs_readlink> debug. - Fixed bug in <devfs_setup> parsing "dilookup". - v0.30 - 19980705 Richard Gooch <rgooch@atnf.csiro.au> - Fixed devfs inode leak when manually recreating inodes. - Fixed permission persistence problem when recreating inodes. - v0.31 - 19980727 Richard Gooch <rgooch@atnf.csiro.au> - Removed harmless "unused variable" compiler warning. - Fixed modes for manually recreated device nodes. - v0.32 - 19980728 Richard Gooch <rgooch@atnf.csiro.au> - Added NULL devfs inode warning in <devfs_read_inode>. - Force all inode nlink values to 1. - v0.33 - 19980730 Richard Gooch <rgooch@atnf.csiro.au> - Added "dimknod" boot option. - Set inode nlink to 0 when freeing dentries. - Fixed modes for manually recreated symlinks. - v0.34 - 19980802 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bugs in recreated directories and symlinks. - v0.35 - 19980806 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bugs in recreated device nodes. - 19980807 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bug in currently unused <devfs_get_handle_from_inode>. - Defined new <devfs_handle_t> type. - Improved debugging when getting entries. - Fixed bug where directories could be emptied. - v0.36 - 19980809 Richard Gooch <rgooch@atnf.csiro.au> - Replaced dummy .epoch inode with .devfsd character device. - 19980810 Richard Gooch <rgooch@atnf.csiro.au> - Implemented devfsd protocol revision 0. - v0.37 - 19980819 Richard Gooch <rgooch@atnf.csiro.au> - Added soothing message to warning in <devfs_d_iput>. - v0.38 - 19980829 Richard Gooch <rgooch@atnf.csiro.au> - Use GCC extensions for structure initialisations. - Implemented async open notification. - Incremented devfsd protocol revision to 1. - v0.39 - 19980908 Richard Gooch <rgooch@atnf.csiro.au> - Moved async open notification to end of <devfs_open>. - v0.40 - 19980910 Richard Gooch <rgooch@atnf.csiro.au> - Prepended "/dev/" to module load request. - Renamed <call_kerneld> to <call_kmod>. - v0.41 - 19980910 Richard Gooch <rgooch@atnf.csiro.au> - Fixed typo "AYSNC" -> "ASYNC". - v0.42 - 19980910 Richard Gooch <rgooch@atnf.csiro.au> - Added open flag for files. - v0.43 - 19980927 Richard Gooch <rgooch@atnf.csiro.au> - Set i_blocks=0 and i_blksize=1024 in <devfs_read_inode>. - v0.44 - 19981005 Richard Gooch <rgooch@atnf.csiro.au> - Added test for empty <<name>> in <devfs_find_handle>. - Renamed <generate_path> to <devfs_generate_path> and published. - v0.45 - 19981006 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_get_fops>. - v0.46 - 19981007 Richard Gooch <rgooch@atnf.csiro.au> - Limit auto-device numbering to majors 144 to 239. - v0.47 - 19981010 Richard Gooch <rgooch@atnf.csiro.au> - Updated <devfs_follow_link> for VFS change in 2.1.125. - v0.48 - 19981022 Richard Gooch <rgooch@atnf.csiro.au> - Created DEVFS_ FL_COMPAT flag. - v0.49 - 19981023 Richard Gooch <rgooch@atnf.csiro.au> - Created "nocompat" boot option. - v0.50 - 19981025 Richard Gooch <rgooch@atnf.csiro.au> - Replaced "mount" boot option with "nomount". - v0.51 - 19981110 Richard Gooch <rgooch@atnf.csiro.au> - Created "only" boot option. - v0.52 - 19981112 Richard Gooch <rgooch@atnf.csiro.au> - Added DEVFS_FL_REMOVABLE flag. - v0.53 - 19981114 Richard Gooch <rgooch@atnf.csiro.au> - Only call <scan_dir_for_removable> on first call to - <devfs_readdir>. - v0.54 - 19981205 Richard Gooch <rgooch@atnf.csiro.au> - Updated <devfs_rmdir> for VFS change in 2.1.131. - v0.55 - 19981218 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_mk_compat>. - 19981220 Richard Gooch <rgooch@atnf.csiro.au> - Check for partitions on removable media in <devfs_lookup>. - v0.56 - 19990118 Richard Gooch <rgooch@atnf.csiro.au> - Added support for registering regular files. - Created <devfs_set_file_size>. - Update devfs inodes from entries if not changed through FS. - v0.57 - 19990124 Richard Gooch <rgooch@atnf.csiro.au> - Fixed <devfs_fill_file> to only initialise temporary inodes. - Trap for NULL fops in <devfs_register>. - Return -ENODEV in <devfs_fill_file> for non-driver inodes. - v0.58 - 19990126 Richard Gooch <rgooch@atnf.csiro.au> - Switched from PATH_MAX to DEVFS_PATHLEN. - v0.59 - 19990127 Richard Gooch <rgooch@atnf.csiro.au> - Created "nottycompat" boot option. - v0.60 - 19990318 Richard Gooch <rgooch@atnf.csiro.au> - Fixed <devfsd_read> to not overrun event buffer. - v0.61 - 19990329 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_auto_unregister>. - v0.62 - 19990330 Richard Gooch <rgooch@atnf.csiro.au> - Don't return unregistred entries in <devfs_find_handle>. - Panic in <devfs_unregister> if entry unregistered. - 19990401 Richard Gooch <rgooch@atnf.csiro.au> - Don't panic in <devfs_auto_unregister> for duplicates. - v0.63 - 19990402 Richard Gooch <rgooch@atnf.csiro.au> - Don't unregister already unregistered entries in <unregister>. - v0.64 - 19990510 Richard Gooch <rgooch@atnf.csiro.au> - Disable warning messages when unable to read partition table for - removable media. - v0.65 - 19990512 Richard Gooch <rgooch@atnf.csiro.au> - Updated <devfs_lookup> for VFS change in 2.3.1-pre1. - Created "oops-on-panic" boot option. - Improved debugging in <devfs_register> and <devfs_unregister>. - v0.66 - 19990519 Richard Gooch <rgooch@atnf.csiro.au> - Added documentation for some functions. - 19990525 Richard Gooch <rgooch@atnf.csiro.au> - Removed "oops-on-panic" boot option: now always Oops. - v0.67 - 19990531 Richard Gooch <rgooch@atnf.csiro.au> - Improved debugging in <devfs_register>. - v0.68 - 19990604 Richard Gooch <rgooch@atnf.csiro.au> - Added "diunlink" and "nokmod" boot options. - Removed superfluous warning message in <devfs_d_iput>. - v0.69 - 19990611 Richard Gooch <rgooch@atnf.csiro.au> - Took account of change to <d_alloc_root>. - v0.70 - 19990614 Richard Gooch <rgooch@atnf.csiro.au> - Created separate event queue for each mounted devfs. - Removed <devfs_invalidate_dcache>. - Created new ioctl()s. - Incremented devfsd protocol revision to 3. - Fixed bug when re-creating directories: contents were lost. - Block access to inodes until devfsd updates permissions. - 19990615 Richard Gooch <rgooch@atnf.csiro.au> - Support 2.2.x kernels. - v0.71 - 19990623 Richard Gooch <rgooch@atnf.csiro.au> - Switched to sending process uid/gid to devfsd. - Renamed <call_kmod> to <try_modload>. - Added DEVFSD_NOTIFY_LOOKUP event. - 19990624 Richard Gooch <rgooch@atnf.csiro.au> - Added DEVFSD_NOTIFY_CHANGE event. - Incremented devfsd protocol revision to 4. - v0.72 - 19990713 Richard Gooch <rgooch@atnf.csiro.au> - Return EISDIR rather than EINVAL for read(2) on directories. - v0.73 - 19990809 Richard Gooch <rgooch@atnf.csiro.au> - Changed <devfs_setup> to new __init scheme. - v0.74 - 19990901 Richard Gooch <rgooch@atnf.csiro.au> - Changed remaining function declarations to new __init scheme. - v0.75 - 19991013 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_get_info>, <devfs_set_info>, - <devfs_get_first_child> and <devfs_get_next_sibling>. - Added <<dir>> parameter to <devfs_register>, <devfs_mk_compat>, - <devfs_mk_dir> and <devfs_find_handle>. - Work sponsored by SGI. - v0.76 - 19991017 Richard Gooch <rgooch@atnf.csiro.au> - Allow multiple unregistrations. - Work sponsored by SGI. - v0.77 - 19991026 Richard Gooch <rgooch@atnf.csiro.au> - Added major and minor number to devfsd protocol. - Incremented devfsd protocol revision to 5. - Work sponsored by SGI. - v0.78 - 19991030 Richard Gooch <rgooch@atnf.csiro.au> - Support info pointer for all devfs entry types. - Added <<info>> parameter to <devfs_mk_dir> and - <devfs_mk_symlink>. - Work sponsored by SGI. - v0.79 - 19991031 Richard Gooch <rgooch@atnf.csiro.au> - Support "../" when searching devfs namespace. - Work sponsored by SGI. - v0.80 - 19991101 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_get_unregister_slave>. - Work sponsored by SGI. - v0.81 - 19991103 Richard Gooch <rgooch@atnf.csiro.au> - Exported <devfs_get_parent>. - Work sponsored by SGI. - v0.82 - 19991104 Richard Gooch <rgooch@atnf.csiro.au> - Removed unused <devfs_set_symlink_destination>. - 19991105 Richard Gooch <rgooch@atnf.csiro.au> - Do not hide entries from devfsd or children. - Removed DEVFS_ FL_TTY_COMPAT flag. - Removed "nottycompat" boot option. - Removed <devfs_mk_compat>. - Work sponsored by SGI. - v0.83 - 19991107 Richard Gooch <rgooch@atnf.csiro.au> - Added DEVFS_FL_WAIT flag. - Work sponsored by SGI. - v0.84 - 19991107 Richard Gooch <rgooch@atnf.csiro.au> - Support new "disc" naming scheme in <get_removable_partition>. - Allow NULL fops in <devfs_register>. - Work sponsored by SGI. - v0.85 - 19991110 Richard Gooch <rgooch@atnf.csiro.au> - Fall back to major table if NULL fops given to <devfs_register>. - Work sponsored by SGI. - v0.86 - 19991204 Richard Gooch <rgooch@atnf.csiro.au> - Support fifos when unregistering. - Work sponsored by SGI. - v0.87 - 19991209 Richard Gooch <rgooch@atnf.csiro.au> - Removed obsolete DEVFS_ FL_COMPAT and DEVFS_ FL_TOLERANT flags. - Work sponsored by SGI. - v0.88 - 19991214 Richard Gooch <rgooch@atnf.csiro.au> - Removed kmod support. - Work sponsored by SGI. - v0.89 - 19991216 Richard Gooch <rgooch@atnf.csiro.au> - Improved debugging in <get_vfs_inode>. - Ensure dentries created by devfsd will be cleaned up. - Work sponsored by SGI. - v0.90 - 19991223 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_get_name>. - Work sponsored by SGI. - v0.91 - 20000203 Richard Gooch <rgooch@atnf.csiro.au> - Ported to kernel 2.3.42. - Removed <devfs_fill_file>. - Work sponsored by SGI. - v0.92 - 20000306 Richard Gooch <rgooch@atnf.csiro.au> - Added DEVFS_ FL_NO_PERSISTENCE flag. - Removed unnecessary call to <update_devfs_inode_from_entry> in - <devfs_readdir>. - Work sponsored by SGI. - v0.93 - 20000413 Richard Gooch <rgooch@atnf.csiro.au> - Set inode->i_size to correct size for symlinks. - 20000414 Richard Gooch <rgooch@atnf.csiro.au> - Only give lookup() method to directories to comply with new VFS - assumptions. - Work sponsored by SGI. - 20000415 Richard Gooch <rgooch@atnf.csiro.au> - Remove unnecessary tests in symlink methods. - Don't kill existing block ops in <devfs_read_inode>. - Work sponsored by SGI. - v0.94 - 20000424 Richard Gooch <rgooch@atnf.csiro.au> - Don't create missing directories in <devfs_find_handle>. - Work sponsored by SGI. - v0.95 - 20000430 Richard Gooch <rgooch@atnf.csiro.au> - Added CONFIG_DEVFS_MOUNT. - Work sponsored by SGI. - v0.96 - 20000608 Richard Gooch <rgooch@atnf.csiro.au> - Disabled multi-mount capability (use VFS bindings instead). - Work sponsored by SGI. - v0.97 - 20000610 Richard Gooch <rgooch@atnf.csiro.au> - Switched to FS_SINGLE to disable multi-mounts. - 20000612 Richard Gooch <rgooch@atnf.csiro.au> - Removed module support. - Removed multi-mount code. - Removed compatibility macros: VFS has changed too much. - Work sponsored by SGI. - v0.98 - 20000614 Richard Gooch <rgooch@atnf.csiro.au> - Merged devfs inode into devfs entry. - Work sponsored by SGI. - v0.99 - 20000619 Richard Gooch <rgooch@atnf.csiro.au> - Removed dead code in <devfs_register> which used to call - <free_dentries>. - Work sponsored by SGI. - v0.100 - 20000621 Richard Gooch <rgooch@atnf.csiro.au> - Changed interface to <devfs_register>. - Work sponsored by SGI. - v0.101 - 20000622 Richard Gooch <rgooch@atnf.csiro.au> - Simplified interface to <devfs_mk_symlink> and <devfs_mk_dir>. - Simplified interface to <devfs_find_handle>. - Work sponsored by SGI. - v0.102 - 20010519 Richard Gooch <rgooch@atnf.csiro.au> - Ensure <devfs_generate_path> terminates string for root entry. - Exported <devfs_get_name> to modules. - 20010520 Richard Gooch <rgooch@atnf.csiro.au> - Make <devfs_mk_symlink> send events to devfsd. - Cleaned up option processing in <devfs_setup>. - 20010521 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bugs in handling symlinks: could leak or cause Oops. - 20010522 Richard Gooch <rgooch@atnf.csiro.au> - Cleaned up directory handling by separating fops. - v0.103 - 20010601 Richard Gooch <rgooch@atnf.csiro.au> - Fixed handling of inverted options in <devfs_setup>. - v0.104 - 20010604 Richard Gooch <rgooch@atnf.csiro.au> - Adjusted <try_modload> to account for <devfs_generate_path> fix. - v0.105 - 20010617 Richard Gooch <rgooch@atnf.csiro.au> - Answered question posed by Al Viro and removed his comments. - Moved setting of registered flag after other fields are changed. - Fixed race between <devfsd_close> and <devfsd_notify_one>. - Global VFS changes added bogus BKL to <devfsd_close>: removed. - Widened locking in <devfs_readlink> and <devfs_follow_link>. - Replaced <devfsd_read> stack usage with <devfsd_ioctl> kmalloc. - Simplified locking in <devfsd_ioctl> and fixed memory leak. - v0.106 - 20010709 Richard Gooch <rgooch@atnf.csiro.au> - Removed broken devnum allocation and use <devfs_alloc_devnum>. - Fixed old devnum leak by calling new <devfs_dealloc_devnum>. - v0.107 - 20010712 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bug in <devfs_setup> which could hang boot process. - v0.108 - 20010730 Richard Gooch <rgooch@atnf.csiro.au> - Added DEVFSD_NOTIFY_DELETE event. - 20010801 Richard Gooch <rgooch@atnf.csiro.au> - Removed #include <asm/segment.h>. - v0.109 - 20010807 Richard Gooch <rgooch@atnf.csiro.au> - Fixed inode table races by removing it and using - inode->u.generic_ip instead. - Moved <devfs_read_inode> into <get_vfs_inode>. - Moved <devfs_write_inode> into <devfs_notify_change>. - v0.110 - 20010808 Richard Gooch <rgooch@atnf.csiro.au> - Fixed race in <devfs_do_symlink> for uni-processor. - v0.111 - 20010818 Richard Gooch <rgooch@atnf.csiro.au> - Removed remnant of multi-mount support in <devfs_mknod>. - Removed unused DEVFS_FL_SHOW_UNREG flag. - v0.112 - 20010820 Richard Gooch <rgooch@atnf.csiro.au> - Removed nlink field from struct devfs_inode. - v0.113 - 20010823 Richard Gooch <rgooch@atnf.csiro.au> - Replaced BKL with global rwsem to protect symlink data (quick - and dirty hack). - v0.114 - 20010827 Richard Gooch <rgooch@atnf.csiro.au> - Replaced global rwsem for symlink with per-link refcount. - v0.115 - 20010919 Richard Gooch <rgooch@atnf.csiro.au> - Set inode->i_mapping->a_ops for block nodes in <get_vfs_inode>. - v0.116 - 20011008 Richard Gooch <rgooch@atnf.csiro.au> - Fixed overrun in <devfs_link> by removing function (not needed). - 20011009 Richard Gooch <rgooch@atnf.csiro.au> - Fixed buffer underrun in <try_modload>. - 20011029 Richard Gooch <rgooch@atnf.csiro.au> - Fixed race in <devfsd_ioctl> when setting event mask. - 20011114 Richard Gooch <rgooch@atnf.csiro.au> - First release of new locking code. - v1.0 - 20011117 Richard Gooch <rgooch@atnf.csiro.au> - Discard temporary buffer, now use "%s" for dentry names. - 20011118 Richard Gooch <rgooch@atnf.csiro.au> - Don't generate path in <try_modload>: use fake entry instead. - Use "existing" directory in <_devfs_make_parent_for_leaf>. - 20011122 Richard Gooch <rgooch@atnf.csiro.au> - Use slab cache rather than fixed buffer for devfsd events. - v1.1 - 20011125 Richard Gooch <rgooch@atnf.csiro.au> - Send DEVFSD_NOTIFY_REGISTERED events in <devfs_mk_dir>. - 20011127 Richard Gooch <rgooch@atnf.csiro.au> - Fixed locking bug in <devfs_d_revalidate_wait> due to typo. - Do not send CREATE, CHANGE, ASYNC_OPEN or DELETE events from - devfsd or children. - v1.2 - 20011202 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bug in <devfsd_read>: was dereferencing freed pointer. - v1.3 - 20011203 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bug in <devfsd_close>: was dereferencing freed pointer. - Added process group check for devfsd privileges. - v1.4 - 20011204 Richard Gooch <rgooch@atnf.csiro.au> - Use SLAB_ATOMIC in <devfsd_notify_de> from <devfs_d_delete>. - v1.5 - 20011211 Richard Gooch <rgooch@atnf.csiro.au> - Return old entry in <devfs_mk_dir> for 2.4.x kernels. - 20011212 Richard Gooch <rgooch@atnf.csiro.au> - Increment refcount on module in <check_disc_changed>. - 20011215 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_get_handle> and exported <devfs_put>. - Increment refcount on module in <devfs_get_ops>. - Created <devfs_put_ops>. - v1.6 - 20011216 Richard Gooch <rgooch@atnf.csiro.au> - Added poisoning to <devfs_put>. - Improved debugging messages. - v1.7 - 20011221 Richard Gooch <rgooch@atnf.csiro.au> - Corrected (made useful) debugging message in <unregister>. - Moved <kmem_cache_create> in <mount_devfs_fs> to <init_devfs_fs> - 20011224 Richard Gooch <rgooch@atnf.csiro.au> - Added magic number to guard against scribbling drivers. - 20011226 Richard Gooch <rgooch@atnf.csiro.au> - Only return old entry in <devfs_mk_dir> if a directory. - Defined macros for error and debug messages. - v1.8 - 20020113 Richard Gooch <rgooch@atnf.csiro.au> - Fixed (rare, old) race in <devfs_lookup>. - v1.9 - 20020120 Richard Gooch <rgooch@atnf.csiro.au> - Fixed deadlock bug in <devfs_d_revalidate_wait>. - Tag VFS deletable in <devfs_mk_symlink> if handle ignored. - v1.10 - 20020129 Richard Gooch <rgooch@atnf.csiro.au> - Added KERN_* to remaining messages. - Cleaned up declaration of <stat_read>. - v1.11 - 20020219 Richard Gooch <rgooch@atnf.csiro.au> - Changed <devfs_rmdir> to allow later additions if not yet empty. - v1.12 - 20020406 Richard Gooch <rgooch@atnf.csiro.au> - Removed silently introduced calls to lock_kernel() and - unlock_kernel() due to recent VFS locking changes. BKL isn't - required in devfs. - v1.13 - 20020428 Richard Gooch <rgooch@atnf.csiro.au> - Removed 2.4.x compatibility code. - v1.14 - 20020510 Richard Gooch <rgooch@atnf.csiro.au> - Added BKL to <devfs_open> because drivers still need it. - v1.15 - 20020512 Richard Gooch <rgooch@atnf.csiro.au> - Protected <scan_dir_for_removable> and <get_removable_partition> - from changing directory contents. - v1.16 - 20020514 Richard Gooch <rgooch@atnf.csiro.au> - Minor cleanup of <scan_dir_for_removable>. - v1.17 - 20020721 Richard Gooch <rgooch@atnf.csiro.au> - Switched to ISO C structure field initialisers. - Switch to set_current_state() and move before add_wait_queue(). - 20020722 Richard Gooch <rgooch@atnf.csiro.au> - Fixed devfs entry leak in <devfs_readdir> when *readdir fails. - v1.18 - 20020725 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_find_and_unregister>. - v1.19 - 20020728 Richard Gooch <rgooch@atnf.csiro.au> - Removed deprecated <devfs_find_handle>. - v1.20 - 20020820 Richard Gooch <rgooch@atnf.csiro.au> - Fixed module unload race in <devfs_open>. - v1.21 - 20021013 Richard Gooch <rgooch@atnf.csiro.au> - Removed DEVFS_ FL_AUTO_OWNER. - Switched lingering structure field initialiser to ISO C. - Added locking when updating FCB flags. - v1.22 -*/ -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/time.h> -#include <linux/tty.h> -#include <linux/timer.h> -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/wait.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/ioport.h> -#include <linux/delay.h> -#include <linux/ctype.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/devfs_fs.h> -#include <linux/devfs_fs_kernel.h> -#include <linux/smp_lock.h> -#include <linux/smp.h> -#include <linux/rwsem.h> -#include <linux/sched.h> -#include <linux/namei.h> -#include <linux/bitops.h> - -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/pgtable.h> -#include <asm/atomic.h> - -#define DEVFS_VERSION "2004-01-31" - -#define DEVFS_NAME "devfs" - -#define FIRST_INODE 1 - -#define STRING_LENGTH 256 -#define FAKE_BLOCK_SIZE 1024 -#define POISON_PTR ( *(void **) poison_array ) -#define MAGIC_VALUE 0x327db823 - -#ifndef TRUE -# define TRUE 1 -# define FALSE 0 -#endif - -#define MODE_DIR (S_IFDIR | S_IWUSR | S_IRUGO | S_IXUGO) - -#define DEBUG_NONE 0x0000000 -#define DEBUG_MODULE_LOAD 0x0000001 -#define DEBUG_REGISTER 0x0000002 -#define DEBUG_UNREGISTER 0x0000004 -#define DEBUG_FREE 0x0000008 -#define DEBUG_SET_FLAGS 0x0000010 -#define DEBUG_S_READ 0x0000100 /* Break */ -#define DEBUG_I_LOOKUP 0x0001000 /* Break */ -#define DEBUG_I_CREATE 0x0002000 -#define DEBUG_I_GET 0x0004000 -#define DEBUG_I_CHANGE 0x0008000 -#define DEBUG_I_UNLINK 0x0010000 -#define DEBUG_I_RLINK 0x0020000 -#define DEBUG_I_FLINK 0x0040000 -#define DEBUG_I_MKNOD 0x0080000 -#define DEBUG_F_READDIR 0x0100000 /* Break */ -#define DEBUG_D_DELETE 0x1000000 /* Break */ -#define DEBUG_D_RELEASE 0x2000000 -#define DEBUG_D_IPUT 0x4000000 -#define DEBUG_ALL 0xfffffff -#define DEBUG_DISABLED DEBUG_NONE - -#define OPTION_NONE 0x00 -#define OPTION_MOUNT 0x01 - -#define PRINTK(format, args...) \ - {printk (KERN_ERR "%s" format, __FUNCTION__ , ## args);} - -#define OOPS(format, args...) \ - {printk (KERN_CRIT "%s" format, __FUNCTION__ , ## args); \ - printk ("Forcing Oops\n"); \ - BUG();} - -#ifdef CONFIG_DEVFS_DEBUG -# define VERIFY_ENTRY(de) \ - {if ((de) && (de)->magic_number != MAGIC_VALUE) \ - OOPS ("(%p): bad magic value: %x\n", (de), (de)->magic_number);} -# define WRITE_ENTRY_MAGIC(de,magic) (de)->magic_number = (magic) -# define DPRINTK(flag, format, args...) \ - {if (devfs_debug & flag) \ - printk (KERN_INFO "%s" format, __FUNCTION__ , ## args);} -#else -# define VERIFY_ENTRY(de) -# define WRITE_ENTRY_MAGIC(de,magic) -# define DPRINTK(flag, format, args...) -#endif - -typedef struct devfs_entry *devfs_handle_t; - -struct directory_type { - rwlock_t lock; /* Lock for searching(R)/updating(W) */ - struct devfs_entry *first; - struct devfs_entry *last; - unsigned char no_more_additions:1; -}; - -struct symlink_type { - unsigned int length; /* Not including the NULL-termimator */ - char *linkname; /* This is NULL-terminated */ -}; - -struct devfs_inode { /* This structure is for "persistent" inode storage */ - struct dentry *dentry; - struct timespec atime; - struct timespec mtime; - struct timespec ctime; - unsigned int ino; /* Inode number as seen in the VFS */ - uid_t uid; - gid_t gid; -}; - -struct devfs_entry { -#ifdef CONFIG_DEVFS_DEBUG - unsigned int magic_number; -#endif - void *info; - atomic_t refcount; /* When this drops to zero, it's unused */ - union { - struct directory_type dir; - dev_t dev; - struct symlink_type symlink; - const char *name; /* Only used for (mode == 0) */ - } u; - struct devfs_entry *prev; /* Previous entry in the parent directory */ - struct devfs_entry *next; /* Next entry in the parent directory */ - struct devfs_entry *parent; /* The parent directory */ - struct devfs_inode inode; - umode_t mode; - unsigned short namelen; /* I think 64k+ filenames are a way off... */ - unsigned char vfs:1; /* Whether the VFS may delete the entry */ - char name[1]; /* This is just a dummy: the allocated array - is bigger. This is NULL-terminated */ -}; - -/* The root of the device tree */ -static struct devfs_entry *root_entry; - -struct devfsd_buf_entry { - struct devfs_entry *de; /* The name is generated with this */ - unsigned short type; /* The type of event */ - umode_t mode; - uid_t uid; - gid_t gid; - struct devfsd_buf_entry *next; -}; - -struct fs_info { /* This structure is for the mounted devfs */ - struct super_block *sb; - spinlock_t devfsd_buffer_lock; /* Lock when inserting/deleting events */ - struct devfsd_buf_entry *devfsd_first_event; - struct devfsd_buf_entry *devfsd_last_event; - volatile int devfsd_sleeping; - volatile struct task_struct *devfsd_task; - volatile pid_t devfsd_pgrp; - volatile struct file *devfsd_file; - struct devfsd_notify_struct *devfsd_info; - volatile unsigned long devfsd_event_mask; - atomic_t devfsd_overrun_count; - wait_queue_head_t devfsd_wait_queue; /* Wake devfsd on input */ - wait_queue_head_t revalidate_wait_queue; /* Wake when devfsd sleeps */ -}; - -static struct fs_info fs_info = {.devfsd_buffer_lock = SPIN_LOCK_UNLOCKED }; -static kmem_cache_t *devfsd_buf_cache; -#ifdef CONFIG_DEVFS_DEBUG -static unsigned int devfs_debug_init __initdata = DEBUG_NONE; -static unsigned int devfs_debug = DEBUG_NONE; -static DEFINE_SPINLOCK(stat_lock); -static unsigned int stat_num_entries; -static unsigned int stat_num_bytes; -#endif -static unsigned char poison_array[8] = - { 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a }; - -#ifdef CONFIG_DEVFS_MOUNT -static unsigned int boot_options = OPTION_MOUNT; -#else -static unsigned int boot_options = OPTION_NONE; -#endif - -/* Forward function declarations */ -static devfs_handle_t _devfs_walk_path(struct devfs_entry *dir, - const char *name, int namelen, - int traverse_symlink); -static ssize_t devfsd_read(struct file *file, char __user *buf, size_t len, - loff_t * ppos); -static int devfsd_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg); -static int devfsd_close(struct inode *inode, struct file *file); -#ifdef CONFIG_DEVFS_DEBUG -static ssize_t stat_read(struct file *file, char __user *buf, size_t len, - loff_t * ppos); -static const struct file_operations stat_fops = { - .open = nonseekable_open, - .read = stat_read, -}; -#endif - -/* Devfs daemon file operations */ -static const struct file_operations devfsd_fops = { - .open = nonseekable_open, - .read = devfsd_read, - .ioctl = devfsd_ioctl, - .release = devfsd_close, -}; - -/* Support functions follow */ - -/** - * devfs_get - Get a reference to a devfs entry. - * @de: The devfs entry. - */ - -static struct devfs_entry *devfs_get(struct devfs_entry *de) -{ - VERIFY_ENTRY(de); - if (de) - atomic_inc(&de->refcount); - return de; -} /* End Function devfs_get */ - -/** - * devfs_put - Put (release) a reference to a devfs entry. - * @de: The handle to the devfs entry. - */ - -static void devfs_put(devfs_handle_t de) -{ - if (!de) - return; - VERIFY_ENTRY(de); - if (de->info == POISON_PTR) - OOPS("(%p): poisoned pointer\n", de); - if (!atomic_dec_and_test(&de->refcount)) - return; - if (de == root_entry) - OOPS("(%p): root entry being freed\n", de); - DPRINTK(DEBUG_FREE, "(%s): de: %p, parent: %p \"%s\"\n", - de->name, de, de->parent, - de->parent ? de->parent->name : "no parent"); - if (S_ISLNK(de->mode)) - kfree(de->u.symlink.linkname); - WRITE_ENTRY_MAGIC(de, 0); -#ifdef CONFIG_DEVFS_DEBUG - spin_lock(&stat_lock); - --stat_num_entries; - stat_num_bytes -= sizeof *de + de->namelen; - if (S_ISLNK(de->mode)) - stat_num_bytes -= de->u.symlink.length + 1; - spin_unlock(&stat_lock); -#endif - de->info = POISON_PTR; - kfree(de); -} /* End Function devfs_put */ - -/** - * _devfs_search_dir - Search for a devfs entry in a directory. - * @dir: The directory to search. - * @name: The name of the entry to search for. - * @namelen: The number of characters in @name. - * - * Search for a devfs entry in a directory and returns a pointer to the entry - * on success, else %NULL. The directory must be locked already. - * An implicit devfs_get() is performed on the returned entry. - */ - -static struct devfs_entry *_devfs_search_dir(struct devfs_entry *dir, - const char *name, - unsigned int namelen) -{ - struct devfs_entry *curr; - - if (!S_ISDIR(dir->mode)) { - PRINTK("(%s): not a directory\n", dir->name); - return NULL; - } - for (curr = dir->u.dir.first; curr != NULL; curr = curr->next) { - if (curr->namelen != namelen) - continue; - if (memcmp(curr->name, name, namelen) == 0) - break; - /* Not found: try the next one */ - } - return devfs_get(curr); -} /* End Function _devfs_search_dir */ - -/** - * _devfs_alloc_entry - Allocate a devfs entry. - * @name: the name of the entry - * @namelen: the number of characters in @name - * @mode: the mode for the entry - * - * Allocate a devfs entry and returns a pointer to the entry on success, else - * %NULL. - */ - -static struct devfs_entry *_devfs_alloc_entry(const char *name, - unsigned int namelen, - umode_t mode) -{ - struct devfs_entry *new; - static unsigned long inode_counter = FIRST_INODE; - static DEFINE_SPINLOCK(counter_lock); - - if (name && (namelen < 1)) - namelen = strlen(name); - if ((new = kmalloc(sizeof *new + namelen, GFP_KERNEL)) == NULL) - return NULL; - memset(new, 0, sizeof *new + namelen); /* Will set '\0' on name */ - new->mode = mode; - if (S_ISDIR(mode)) - rwlock_init(&new->u.dir.lock); - atomic_set(&new->refcount, 1); - spin_lock(&counter_lock); - new->inode.ino = inode_counter++; - spin_unlock(&counter_lock); - if (name) - memcpy(new->name, name, namelen); - new->namelen = namelen; - WRITE_ENTRY_MAGIC(new, MAGIC_VALUE); -#ifdef CONFIG_DEVFS_DEBUG - spin_lock(&stat_lock); - ++stat_num_entries; - stat_num_bytes += sizeof *new + namelen; - spin_unlock(&stat_lock); -#endif - return new; -} /* End Function _devfs_alloc_entry */ - -/** - * _devfs_append_entry - Append a devfs entry to a directory's child list. - * @dir: The directory to add to. - * @de: The devfs entry to append. - * @old_de: If an existing entry exists, it will be written here. This may - * be %NULL. An implicit devfs_get() is performed on this entry. - * - * Append a devfs entry to a directory's list of children, checking first to - * see if an entry of the same name exists. The directory will be locked. - * The value 0 is returned on success, else a negative error code. - * On failure, an implicit devfs_put() is performed on %de. - */ - -static int _devfs_append_entry(devfs_handle_t dir, devfs_handle_t de, - devfs_handle_t * old_de) -{ - int retval; - - if (old_de) - *old_de = NULL; - if (!S_ISDIR(dir->mode)) { - PRINTK("(%s): dir: \"%s\" is not a directory\n", de->name, - dir->name); - devfs_put(de); - return -ENOTDIR; - } - write_lock(&dir->u.dir.lock); - if (dir->u.dir.no_more_additions) - retval = -ENOENT; - else { - struct devfs_entry *old; - - old = _devfs_search_dir(dir, de->name, de->namelen); - if (old_de) - *old_de = old; - else - devfs_put(old); - if (old == NULL) { - de->parent = dir; - de->prev = dir->u.dir.last; - /* Append to the directory's list of children */ - if (dir->u.dir.first == NULL) - dir->u.dir.first = de; - else - dir->u.dir.last->next = de; - dir->u.dir.last = de; - retval = 0; - } else - retval = -EEXIST; - } - write_unlock(&dir->u.dir.lock); - if (retval) - devfs_put(de); - return retval; -} /* End Function _devfs_append_entry */ - -/** - * _devfs_get_root_entry - Get the root devfs entry. - * - * Returns the root devfs entry on success, else %NULL. - * - * TODO it must be called asynchronously due to the fact - * that devfs is initialized relatively late. Proper way - * is to remove module_init from init_devfs_fs and manually - * call it early enough during system init - */ - -static struct devfs_entry *_devfs_get_root_entry(void) -{ - struct devfs_entry *new; - static DEFINE_SPINLOCK(root_lock); - - if (root_entry) - return root_entry; - - new = _devfs_alloc_entry(NULL, 0, MODE_DIR); - if (new == NULL) - return NULL; - - spin_lock(&root_lock); - if (root_entry) { - spin_unlock(&root_lock); - devfs_put(new); - return root_entry; - } - root_entry = new; - spin_unlock(&root_lock); - - return root_entry; -} /* End Function _devfs_get_root_entry */ - -/** - * _devfs_descend - Descend down a tree using the next component name. - * @dir: The directory to search. - * @name: The component name to search for. - * @namelen: The length of %name. - * @next_pos: The position of the next '/' or '\0' is written here. - * - * Descend into a directory, searching for a component. This function forms - * the core of a tree-walking algorithm. The directory will be locked. - * The devfs entry corresponding to the component is returned. If there is - * no matching entry, %NULL is returned. - * An implicit devfs_get() is performed on the returned entry. - */ - -static struct devfs_entry *_devfs_descend(struct devfs_entry *dir, - const char *name, int namelen, - int *next_pos) -{ - const char *stop, *ptr; - struct devfs_entry *entry; - - if ((namelen >= 3) && (strncmp(name, "../", 3) == 0)) { /* Special-case going to parent directory */ - *next_pos = 3; - return devfs_get(dir->parent); - } - stop = name + namelen; - /* Search for a possible '/' */ - for (ptr = name; (ptr < stop) && (*ptr != '/'); ++ptr) ; - *next_pos = ptr - name; - read_lock(&dir->u.dir.lock); - entry = _devfs_search_dir(dir, name, *next_pos); - read_unlock(&dir->u.dir.lock); - return entry; -} /* End Function _devfs_descend */ - -static devfs_handle_t _devfs_make_parent_for_leaf(struct devfs_entry *dir, - const char *name, - int namelen, int *leaf_pos) -{ - int next_pos = 0; - - if (dir == NULL) - dir = _devfs_get_root_entry(); - if (dir == NULL) - return NULL; - devfs_get(dir); - /* Search for possible trailing component and ignore it */ - for (--namelen; (namelen > 0) && (name[namelen] != '/'); --namelen) ; - *leaf_pos = (name[namelen] == '/') ? (namelen + 1) : 0; - for (; namelen > 0; name += next_pos, namelen -= next_pos) { - struct devfs_entry *de, *old = NULL; - - if ((de = - _devfs_descend(dir, name, namelen, &next_pos)) == NULL) { - de = _devfs_alloc_entry(name, next_pos, MODE_DIR); - devfs_get(de); - if (!de || _devfs_append_entry(dir, de, &old)) { - devfs_put(de); - if (!old || !S_ISDIR(old->mode)) { - devfs_put(old); - devfs_put(dir); - return NULL; - } - de = old; /* Use the existing directory */ - } - } - if (de == dir->parent) { - devfs_put(dir); - devfs_put(de); - return NULL; - } - devfs_put(dir); - dir = de; - if (name[next_pos] == '/') - ++next_pos; - } - return dir; -} /* End Function _devfs_make_parent_for_leaf */ - -static devfs_handle_t _devfs_prepare_leaf(devfs_handle_t * dir, - const char *name, umode_t mode) -{ - int namelen, leaf_pos; - struct devfs_entry *de; - - namelen = strlen(name); - if ((*dir = _devfs_make_parent_for_leaf(*dir, name, namelen, - &leaf_pos)) == NULL) { - PRINTK("(%s): could not create parent path\n", name); - return NULL; - } - if ((de = _devfs_alloc_entry(name + leaf_pos, namelen - leaf_pos, mode)) - == NULL) { - PRINTK("(%s): could not allocate entry\n", name); - devfs_put(*dir); - return NULL; - } - return de; -} /* End Function _devfs_prepare_leaf */ - -static devfs_handle_t _devfs_walk_path(struct devfs_entry *dir, - const char *name, int namelen, - int traverse_symlink) -{ - int next_pos = 0; - - if (dir == NULL) - dir = _devfs_get_root_entry(); - if (dir == NULL) - return NULL; - devfs_get(dir); - for (; namelen > 0; name += next_pos, namelen -= next_pos) { - struct devfs_entry *de, *link; - - if (!S_ISDIR(dir->mode)) { - devfs_put(dir); - return NULL; - } - - if ((de = - _devfs_descend(dir, name, namelen, &next_pos)) == NULL) { - devfs_put(dir); - return NULL; - } - if (S_ISLNK(de->mode) && traverse_symlink) { /* Need to follow the link: this is a stack chomper */ - /* FIXME what if it puts outside of mounted tree? */ - link = _devfs_walk_path(dir, de->u.symlink.linkname, - de->u.symlink.length, TRUE); - devfs_put(de); - if (!link) { - devfs_put(dir); - return NULL; - } - de = link; - } - devfs_put(dir); - dir = de; - if (name[next_pos] == '/') - ++next_pos; - } - return dir; -} /* End Function _devfs_walk_path */ - -/** - * _devfs_find_entry - Find a devfs entry. - * @dir: The handle to the parent devfs directory entry. If this is %NULL the - * name is relative to the root of the devfs. - * @name: The name of the entry. This may be %NULL. - * @traverse_symlink: If %TRUE then symbolic links are traversed. - * - * Returns the devfs_entry pointer on success, else %NULL. An implicit - * devfs_get() is performed. - */ - -static struct devfs_entry *_devfs_find_entry(devfs_handle_t dir, - const char *name, - int traverse_symlink) -{ - unsigned int namelen = strlen(name); - - if (name[0] == '/') { - /* Skip leading pathname component */ - if (namelen < 2) { - PRINTK("(%s): too short\n", name); - return NULL; - } - for (++name, --namelen; (*name != '/') && (namelen > 0); - ++name, --namelen) ; - if (namelen < 2) { - PRINTK("(%s): too short\n", name); - return NULL; - } - ++name; - --namelen; - } - return _devfs_walk_path(dir, name, namelen, traverse_symlink); -} /* End Function _devfs_find_entry */ - -static struct devfs_entry *get_devfs_entry_from_vfs_inode(struct inode *inode) -{ - if (inode == NULL) - return NULL; - VERIFY_ENTRY((struct devfs_entry *)inode->u.generic_ip); - return inode->u.generic_ip; -} /* End Function get_devfs_entry_from_vfs_inode */ - -/** - * free_dentry - Free the dentry for a device entry and invalidate inode. - * @de: The entry. - * - * This must only be called after the entry has been unhooked from its - * parent directory. - */ - -static void free_dentry(struct devfs_entry *de) -{ - struct dentry *dentry = de->inode.dentry; - - if (!dentry) - return; - spin_lock(&dcache_lock); - dget_locked(dentry); - spin_unlock(&dcache_lock); - /* Forcefully remove the inode */ - if (dentry->d_inode != NULL) - dentry->d_inode->i_nlink = 0; - d_drop(dentry); - dput(dentry); -} /* End Function free_dentry */ - -/** - * is_devfsd_or_child - Test if the current process is devfsd or one of its children. - * @fs_info: The filesystem information. - * - * Returns %TRUE if devfsd or child, else %FALSE. - */ - -static int is_devfsd_or_child(struct fs_info *fs_info) -{ - struct task_struct *p = current; - - if (p == fs_info->devfsd_task) - return (TRUE); - if (process_group(p) == fs_info->devfsd_pgrp) - return (TRUE); - read_lock(&tasklist_lock); - for (; p != &init_task; p = p->real_parent) { - if (p == fs_info->devfsd_task) { - read_unlock(&tasklist_lock); - return (TRUE); - } - } - read_unlock(&tasklist_lock); - return (FALSE); -} /* End Function is_devfsd_or_child */ - -/** - * devfsd_queue_empty - Test if devfsd has work pending in its event queue. - * @fs_info: The filesystem information. - * - * Returns %TRUE if the queue is empty, else %FALSE. - */ - -static inline int devfsd_queue_empty(struct fs_info *fs_info) -{ - return (fs_info->devfsd_last_event) ? FALSE : TRUE; -} /* End Function devfsd_queue_empty */ - -/** - * wait_for_devfsd_finished - Wait for devfsd to finish processing its event queue. - * @fs_info: The filesystem information. - * - * Returns %TRUE if no more waiting will be required, else %FALSE. - */ - -static int wait_for_devfsd_finished(struct fs_info *fs_info) -{ - DECLARE_WAITQUEUE(wait, current); - - if (fs_info->devfsd_task == NULL) - return (TRUE); - if (devfsd_queue_empty(fs_info) && fs_info->devfsd_sleeping) - return TRUE; - if (is_devfsd_or_child(fs_info)) - return (FALSE); - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&fs_info->revalidate_wait_queue, &wait); - if (!devfsd_queue_empty(fs_info) || !fs_info->devfsd_sleeping) - if (fs_info->devfsd_task) - schedule(); - remove_wait_queue(&fs_info->revalidate_wait_queue, &wait); - __set_current_state(TASK_RUNNING); - return (TRUE); -} /* End Function wait_for_devfsd_finished */ - -/** - * devfsd_notify_de - Notify the devfsd daemon of a change. - * @de: The devfs entry that has changed. This and all parent entries will - * have their reference counts incremented if the event was queued. - * @type: The type of change. - * @mode: The mode of the entry. - * @uid: The user ID. - * @gid: The group ID. - * @fs_info: The filesystem info. - * - * Returns %TRUE if an event was queued and devfsd woken up, else %FALSE. - */ - -static int devfsd_notify_de(struct devfs_entry *de, - unsigned short type, umode_t mode, - uid_t uid, gid_t gid, struct fs_info *fs_info) -{ - struct devfsd_buf_entry *entry; - struct devfs_entry *curr; - - if (!(fs_info->devfsd_event_mask & (1 << type))) - return (FALSE); - if ((entry = kmem_cache_alloc(devfsd_buf_cache, SLAB_KERNEL)) == NULL) { - atomic_inc(&fs_info->devfsd_overrun_count); - return (FALSE); - } - for (curr = de; curr != NULL; curr = curr->parent) - devfs_get(curr); - entry->de = de; - entry->type = type; - entry->mode = mode; - entry->uid = uid; - entry->gid = gid; - entry->next = NULL; - spin_lock(&fs_info->devfsd_buffer_lock); - if (!fs_info->devfsd_first_event) - fs_info->devfsd_first_event = entry; - if (fs_info->devfsd_last_event) - fs_info->devfsd_last_event->next = entry; - fs_info->devfsd_last_event = entry; - spin_unlock(&fs_info->devfsd_buffer_lock); - wake_up_interruptible(&fs_info->devfsd_wait_queue); - return (TRUE); -} /* End Function devfsd_notify_de */ - -/** - * devfsd_notify - Notify the devfsd daemon of a change. - * @de: The devfs entry that has changed. - * @type: The type of change event. - * @wait: If TRUE, the function waits for the daemon to finish processing - * the event. - */ - -static void devfsd_notify(struct devfs_entry *de, unsigned short type) -{ - devfsd_notify_de(de, type, de->mode, current->euid, - current->egid, &fs_info); -} - -static int devfs_mk_dev(dev_t dev, umode_t mode, const char *fmt, va_list args) -{ - struct devfs_entry *dir = NULL, *de; - char buf[64]; - int error, n; - - n = vsnprintf(buf, sizeof(buf), fmt, args); - if (n >= sizeof(buf) || !buf[0]) { - printk(KERN_WARNING "%s: invalid format string %s\n", - __FUNCTION__, fmt); - return -EINVAL; - } - - de = _devfs_prepare_leaf(&dir, buf, mode); - if (!de) { - printk(KERN_WARNING "%s: could not prepare leaf for %s\n", - __FUNCTION__, buf); - return -ENOMEM; /* could be more accurate... */ - } - - de->u.dev = dev; - - error = _devfs_append_entry(dir, de, NULL); - if (error) { - printk(KERN_WARNING "%s: could not append to parent for %s\n", - __FUNCTION__, buf); - goto out; - } - - devfsd_notify(de, DEVFSD_NOTIFY_REGISTERED); - out: - devfs_put(dir); - return error; -} - -int devfs_mk_bdev(dev_t dev, umode_t mode, const char *fmt, ...) -{ - va_list args; - - if (!S_ISBLK(mode)) { - printk(KERN_WARNING "%s: invalide mode (%u) for %s\n", - __FUNCTION__, mode, fmt); - return -EINVAL; - } - - va_start(args, fmt); - return devfs_mk_dev(dev, mode, fmt, args); -} - -EXPORT_SYMBOL(devfs_mk_bdev); - -int devfs_mk_cdev(dev_t dev, umode_t mode, const char *fmt, ...) -{ - va_list args; - - if (!S_ISCHR(mode)) { - printk(KERN_WARNING "%s: invalide mode (%u) for %s\n", - __FUNCTION__, mode, fmt); - return -EINVAL; - } - - va_start(args, fmt); - return devfs_mk_dev(dev, mode, fmt, args); -} - -EXPORT_SYMBOL(devfs_mk_cdev); - -/** - * _devfs_unhook - Unhook a device entry from its parents list - * @de: The entry to unhook. - * - * Returns %TRUE if the entry was unhooked, else %FALSE if it was - * previously unhooked. - * The caller must have a write lock on the parent directory. - */ - -static int _devfs_unhook(struct devfs_entry *de) -{ - struct devfs_entry *parent; - - if (!de || (de->prev == de)) - return FALSE; - parent = de->parent; - if (de->prev == NULL) - parent->u.dir.first = de->next; - else - de->prev->next = de->next; - if (de->next == NULL) - parent->u.dir.last = de->prev; - else - de->next->prev = de->prev; - de->prev = de; /* Indicate we're unhooked */ - de->next = NULL; /* Force early termination for <devfs_readdir> */ - return TRUE; -} /* End Function _devfs_unhook */ - -/** - * _devfs_unregister - Unregister a device entry from its parent. - * @dir: The parent directory. - * @de: The entry to unregister. - * - * The caller must have a write lock on the parent directory, which is - * unlocked by this function. - */ - -static void _devfs_unregister(struct devfs_entry *dir, struct devfs_entry *de) -{ - int unhooked = _devfs_unhook(de); - - write_unlock(&dir->u.dir.lock); - if (!unhooked) - return; - devfs_get(dir); - devfsd_notify(de, DEVFSD_NOTIFY_UNREGISTERED); - free_dentry(de); - devfs_put(dir); - if (!S_ISDIR(de->mode)) - return; - while (TRUE) { /* Recursively unregister: this is a stack chomper */ - struct devfs_entry *child; - - write_lock(&de->u.dir.lock); - de->u.dir.no_more_additions = TRUE; - child = de->u.dir.first; - VERIFY_ENTRY(child); - _devfs_unregister(de, child); - if (!child) - break; - DPRINTK(DEBUG_UNREGISTER, "(%s): child: %p refcount: %d\n", - child->name, child, atomic_read(&child->refcount)); - devfs_put(child); - } -} /* End Function _devfs_unregister */ - -static int devfs_do_symlink(devfs_handle_t dir, const char *name, - const char *link, devfs_handle_t * handle) -{ - int err; - unsigned int linklength; - char *newlink; - struct devfs_entry *de; - - if (handle != NULL) - *handle = NULL; - if (name == NULL) { - PRINTK("(): NULL name pointer\n"); - return -EINVAL; - } - if (link == NULL) { - PRINTK("(%s): NULL link pointer\n", name); - return -EINVAL; - } - linklength = strlen(link); - if ((newlink = kmalloc(linklength + 1, GFP_KERNEL)) == NULL) - return -ENOMEM; - memcpy(newlink, link, linklength); - newlink[linklength] = '\0'; - if ((de = _devfs_prepare_leaf(&dir, name, S_IFLNK | S_IRUGO | S_IXUGO)) - == NULL) { - PRINTK("(%s): could not prepare leaf\n", name); - kfree(newlink); - return -ENOTDIR; - } - de->info = NULL; - de->u.symlink.linkname = newlink; - de->u.symlink.length = linklength; - if ((err = _devfs_append_entry(dir, de, NULL)) != 0) { - PRINTK("(%s): could not append to parent, err: %d\n", name, - err); - devfs_put(dir); - return err; - } - devfs_put(dir); -#ifdef CONFIG_DEVFS_DEBUG - spin_lock(&stat_lock); - stat_num_bytes += linklength + 1; - spin_unlock(&stat_lock); -#endif - if (handle != NULL) - *handle = de; - return 0; -} /* End Function devfs_do_symlink */ - -/** - * devfs_mk_symlink Create a symbolic link in the devfs namespace. - * @from: The name of the entry. - * @to: Name of the destination - * - * Returns 0 on success, else a negative error code is returned. - */ - -int devfs_mk_symlink(const char *from, const char *to) -{ - devfs_handle_t de; - int err; - - err = devfs_do_symlink(NULL, from, to, &de); - if (!err) { - de->vfs = TRUE; - devfsd_notify(de, DEVFSD_NOTIFY_REGISTERED); - } - - return err; -} - -/** - * devfs_mk_dir - Create a directory in the devfs namespace. - * new name is relative to the root of the devfs. - * @fmt: The name of the entry. - * - * Use of this function is optional. The devfs_register() function - * will automatically create intermediate directories as needed. This function - * is provided for efficiency reasons, as it provides a handle to a directory. - * On failure %NULL is returned. - */ - -int devfs_mk_dir(const char *fmt, ...) -{ - struct devfs_entry *dir = NULL, *de = NULL, *old; - char buf[64]; - va_list args; - int error, n; - - va_start(args, fmt); - n = vsnprintf(buf, 64, fmt, args); - if (n >= 64 || !buf[0]) { - printk(KERN_WARNING "%s: invalid argument.", __FUNCTION__); - return -EINVAL; - } - - de = _devfs_prepare_leaf(&dir, buf, MODE_DIR); - if (!de) { - PRINTK("(%s): could not prepare leaf\n", buf); - return -EINVAL; - } - - error = _devfs_append_entry(dir, de, &old); - if (error == -EEXIST && S_ISDIR(old->mode)) { - /* - * devfs_mk_dir() of an already-existing directory will - * return success. - */ - error = 0; - goto out_put; - } else if (error) { - PRINTK("(%s): could not append to dir: %p \"%s\"\n", - buf, dir, dir->name); - devfs_put(old); - goto out_put; - } - - devfsd_notify(de, DEVFSD_NOTIFY_REGISTERED); - - out_put: - devfs_put(dir); - return error; -} - -void devfs_remove(const char *fmt, ...) -{ - char buf[64]; - va_list args; - int n; - - va_start(args, fmt); - n = vsnprintf(buf, sizeof(buf), fmt, args); - if (n < sizeof(buf) && buf[0]) { - devfs_handle_t de = _devfs_find_entry(NULL, buf, 0); - - if (!de) { - printk(KERN_ERR "%s: %s not found, cannot remove\n", - __FUNCTION__, buf); - dump_stack(); - return; - } - - write_lock(&de->parent->u.dir.lock); - _devfs_unregister(de->parent, de); - devfs_put(de); - devfs_put(de); - } -} - -/** - * devfs_generate_path - Generate a pathname for an entry, relative to the devfs root. - * @de: The devfs entry. - * @path: The buffer to write the pathname to. The pathname and '\0' - * terminator will be written at the end of the buffer. - * @buflen: The length of the buffer. - * - * Returns the offset in the buffer where the pathname starts on success, - * else a negative error code. - */ - -static int devfs_generate_path(devfs_handle_t de, char *path, int buflen) -{ - int pos; -#define NAMEOF(de) ( (de)->mode ? (de)->name : (de)->u.name ) - - if (de == NULL) - return -EINVAL; - VERIFY_ENTRY(de); - if (de->namelen >= buflen) - return -ENAMETOOLONG; /* Must be first */ - path[buflen - 1] = '\0'; - if (de->parent == NULL) - return buflen - 1; /* Don't prepend root */ - pos = buflen - de->namelen - 1; - memcpy(path + pos, NAMEOF(de), de->namelen); - for (de = de->parent; de->parent != NULL; de = de->parent) { - if (pos - de->namelen - 1 < 0) - return -ENAMETOOLONG; - path[--pos] = '/'; - pos -= de->namelen; - memcpy(path + pos, NAMEOF(de), de->namelen); - } - return pos; -} /* End Function devfs_generate_path */ - -/** - * devfs_setup - Process kernel boot options. - * @str: The boot options after the "devfs=". - */ - -static int __init devfs_setup(char *str) -{ - static struct { - char *name; - unsigned int mask; - unsigned int *opt; - } devfs_options_tab[] __initdata = { -#ifdef CONFIG_DEVFS_DEBUG - { - "dall", DEBUG_ALL, &devfs_debug_init}, { - "dmod", DEBUG_MODULE_LOAD, &devfs_debug_init}, { - "dreg", DEBUG_REGISTER, &devfs_debug_init}, { - "dunreg", DEBUG_UNREGISTER, &devfs_debug_init}, { - "dfree", DEBUG_FREE, &devfs_debug_init}, { - "diget", DEBUG_I_GET, &devfs_debug_init}, { - "dchange", DEBUG_SET_FLAGS, &devfs_debug_init}, { - "dsread", DEBUG_S_READ, &devfs_debug_init}, { - "dichange", DEBUG_I_CHANGE, &devfs_debug_init}, { - "dimknod", DEBUG_I_MKNOD, &devfs_debug_init}, { - "dilookup", DEBUG_I_LOOKUP, &devfs_debug_init}, { - "diunlink", DEBUG_I_UNLINK, &devfs_debug_init}, -#endif /* CONFIG_DEVFS_DEBUG */ - { - "mount", OPTION_MOUNT, &boot_options}, { - NULL, 0, NULL} - }; - - while ((*str != '\0') && !isspace(*str)) { - int i, found = 0, invert = 0; - - if (strncmp(str, "no", 2) == 0) { - invert = 1; - str += 2; - } - for (i = 0; devfs_options_tab[i].name != NULL; i++) { - int len = strlen(devfs_options_tab[i].name); - - if (strncmp(str, devfs_options_tab[i].name, len) == 0) { - if (invert) - *devfs_options_tab[i].opt &= - ~devfs_options_tab[i].mask; - else - *devfs_options_tab[i].opt |= - devfs_options_tab[i].mask; - str += len; - found = 1; - break; - } - } - if (!found) - return 0; /* No match */ - if (*str != ',') - return 0; /* No more options */ - ++str; - } - return 1; -} /* End Function devfs_setup */ - -__setup("devfs=", devfs_setup); - -EXPORT_SYMBOL(devfs_mk_dir); -EXPORT_SYMBOL(devfs_remove); - -/** - * try_modload - Notify devfsd of an inode lookup by a non-devfsd process. - * @parent: The parent devfs entry. - * @fs_info: The filesystem info. - * @name: The device name. - * @namelen: The number of characters in @name. - * @buf: A working area that will be used. This must not go out of scope - * until devfsd is idle again. - * - * Returns 0 on success (event was queued), else a negative error code. - */ - -static int try_modload(struct devfs_entry *parent, struct fs_info *fs_info, - const char *name, unsigned namelen, - struct devfs_entry *buf) -{ - if (!(fs_info->devfsd_event_mask & (1 << DEVFSD_NOTIFY_LOOKUP))) - return -ENOENT; - if (is_devfsd_or_child(fs_info)) - return -ENOENT; - memset(buf, 0, sizeof *buf); - atomic_set(&buf->refcount, 1); - buf->parent = parent; - buf->namelen = namelen; - buf->u.name = name; - WRITE_ENTRY_MAGIC(buf, MAGIC_VALUE); - if (!devfsd_notify_de(buf, DEVFSD_NOTIFY_LOOKUP, 0, - current->euid, current->egid, fs_info)) - return -ENOENT; - /* Possible success: event has been queued */ - return 0; -} /* End Function try_modload */ - -/* Superblock operations follow */ - -static struct inode_operations devfs_iops; -static struct inode_operations devfs_dir_iops; -static const struct file_operations devfs_fops; -static const struct file_operations devfs_dir_fops; -static struct inode_operations devfs_symlink_iops; - -static int devfs_notify_change(struct dentry *dentry, struct iattr *iattr) -{ - int retval; - struct devfs_entry *de; - struct inode *inode = dentry->d_inode; - struct fs_info *fs_info = inode->i_sb->s_fs_info; - - de = get_devfs_entry_from_vfs_inode(inode); - if (de == NULL) - return -ENODEV; - retval = inode_change_ok(inode, iattr); - if (retval != 0) - return retval; - retval = inode_setattr(inode, iattr); - if (retval != 0) - return retval; - DPRINTK(DEBUG_I_CHANGE, "(%d): VFS inode: %p devfs_entry: %p\n", - (int)inode->i_ino, inode, de); - DPRINTK(DEBUG_I_CHANGE, "(): mode: 0%o uid: %d gid: %d\n", - (int)inode->i_mode, (int)inode->i_uid, (int)inode->i_gid); - /* Inode is not on hash chains, thus must save permissions here rather - than in a write_inode() method */ - de->mode = inode->i_mode; - de->inode.uid = inode->i_uid; - de->inode.gid = inode->i_gid; - de->inode.atime = inode->i_atime; - de->inode.mtime = inode->i_mtime; - de->inode.ctime = inode->i_ctime; - if ((iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) && - !is_devfsd_or_child(fs_info)) - devfsd_notify_de(de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - return 0; -} /* End Function devfs_notify_change */ - -static struct super_operations devfs_sops = { - .drop_inode = generic_delete_inode, - .statfs = simple_statfs, -}; - -/** - * _devfs_get_vfs_inode - Get a VFS inode. - * @sb: The super block. - * @de: The devfs inode. - * @dentry: The dentry to register with the devfs inode. - * - * Returns the inode on success, else %NULL. An implicit devfs_get() is - * performed if the inode is created. - */ - -static struct inode *_devfs_get_vfs_inode(struct super_block *sb, - struct devfs_entry *de, - struct dentry *dentry) -{ - struct inode *inode; - - if (de->prev == de) - return NULL; /* Quick check to see if unhooked */ - if ((inode = new_inode(sb)) == NULL) { - PRINTK("(%s): new_inode() failed, de: %p\n", de->name, de); - return NULL; - } - if (de->parent) { - read_lock(&de->parent->u.dir.lock); - if (de->prev != de) - de->inode.dentry = dentry; /* Not unhooked */ - read_unlock(&de->parent->u.dir.lock); - } else - de->inode.dentry = dentry; /* Root: no locking needed */ - if (de->inode.dentry != dentry) { /* Must have been unhooked */ - iput(inode); - return NULL; - } - /* FIXME where is devfs_put? */ - inode->u.generic_ip = devfs_get(de); - inode->i_ino = de->inode.ino; - DPRINTK(DEBUG_I_GET, "(%d): VFS inode: %p devfs_entry: %p\n", - (int)inode->i_ino, inode, de); - inode->i_blocks = 0; - inode->i_blksize = FAKE_BLOCK_SIZE; - inode->i_op = &devfs_iops; - inode->i_mode = de->mode; - if (S_ISDIR(de->mode)) { - inode->i_op = &devfs_dir_iops; - inode->i_fop = &devfs_dir_fops; - } else if (S_ISLNK(de->mode)) { - inode->i_op = &devfs_symlink_iops; - inode->i_size = de->u.symlink.length; - } else if (S_ISCHR(de->mode) || S_ISBLK(de->mode)) { - init_special_inode(inode, de->mode, de->u.dev); - } else if (S_ISFIFO(de->mode) || S_ISSOCK(de->mode)) { - init_special_inode(inode, de->mode, 0); - } else { - PRINTK("(%s): unknown mode %o de: %p\n", - de->name, de->mode, de); - iput(inode); - devfs_put(de); - return NULL; - } - - inode->i_uid = de->inode.uid; - inode->i_gid = de->inode.gid; - inode->i_atime = de->inode.atime; - inode->i_mtime = de->inode.mtime; - inode->i_ctime = de->inode.ctime; - DPRINTK(DEBUG_I_GET, "(): mode: 0%o uid: %d gid: %d\n", - (int)inode->i_mode, (int)inode->i_uid, (int)inode->i_gid); - return inode; -} /* End Function _devfs_get_vfs_inode */ - -/* File operations for device entries follow */ - -static int devfs_readdir(struct file *file, void *dirent, filldir_t filldir) -{ - int err, count; - int stored = 0; - struct fs_info *fs_info; - struct devfs_entry *parent, *de, *next = NULL; - struct inode *inode = file->f_dentry->d_inode; - - fs_info = inode->i_sb->s_fs_info; - parent = get_devfs_entry_from_vfs_inode(file->f_dentry->d_inode); - if ((long)file->f_pos < 0) - return -EINVAL; - DPRINTK(DEBUG_F_READDIR, "(%s): fs_info: %p pos: %ld\n", - parent->name, fs_info, (long)file->f_pos); - switch ((long)file->f_pos) { - case 0: - err = (*filldir) (dirent, "..", 2, file->f_pos, - parent_ino(file->f_dentry), DT_DIR); - if (err == -EINVAL) - break; - if (err < 0) - return err; - file->f_pos++; - ++stored; - /* Fall through */ - case 1: - err = - (*filldir) (dirent, ".", 1, file->f_pos, inode->i_ino, - DT_DIR); - if (err == -EINVAL) - break; - if (err < 0) - return err; - file->f_pos++; - ++stored; - /* Fall through */ - default: - /* Skip entries */ - count = file->f_pos - 2; - read_lock(&parent->u.dir.lock); - for (de = parent->u.dir.first; de && (count > 0); de = de->next) - --count; - devfs_get(de); - read_unlock(&parent->u.dir.lock); - /* Now add all remaining entries */ - while (de) { - err = (*filldir) (dirent, de->name, de->namelen, - file->f_pos, de->inode.ino, - de->mode >> 12); - if (err < 0) - devfs_put(de); - else { - file->f_pos++; - ++stored; - } - if (err == -EINVAL) - break; - if (err < 0) - return err; - read_lock(&parent->u.dir.lock); - next = devfs_get(de->next); - read_unlock(&parent->u.dir.lock); - devfs_put(de); - de = next; - } - break; - } - return stored; -} /* End Function devfs_readdir */ - -/* Open devfs specific special files */ -static int devfs_open(struct inode *inode, struct file *file) -{ - int err; - int minor = MINOR(inode->i_rdev); - struct file_operations *old_fops, *new_fops; - - switch (minor) { - case 0: /* /dev/.devfsd */ - new_fops = fops_get(&devfsd_fops); - break; -#ifdef CONFIG_DEVFS_DEBUG - case 1: /* /dev/.stat */ - new_fops = fops_get(&stat_fops); - break; -#endif - default: - return -ENODEV; - } - - if (new_fops == NULL) - return -ENODEV; - old_fops = file->f_op; - file->f_op = new_fops; - err = new_fops->open ? new_fops->open(inode, file) : 0; - if (err) { - file->f_op = old_fops; - fops_put(new_fops); - } else - fops_put(old_fops); - return err; -} /* End Function devfs_open */ - -static const struct file_operations devfs_fops = { - .open = devfs_open, -}; - -static const struct file_operations devfs_dir_fops = { - .read = generic_read_dir, - .readdir = devfs_readdir, -}; - -/* Dentry operations for device entries follow */ - -/** - * devfs_d_release - Callback for when a dentry is freed. - * @dentry: The dentry. - */ - -static void devfs_d_release(struct dentry *dentry) -{ - DPRINTK(DEBUG_D_RELEASE, "(%p): inode: %p\n", dentry, dentry->d_inode); -} /* End Function devfs_d_release */ - -/** - * devfs_d_iput - Callback for when a dentry loses its inode. - * @dentry: The dentry. - * @inode: The inode. - */ - -static void devfs_d_iput(struct dentry *dentry, struct inode *inode) -{ - struct devfs_entry *de; - - de = get_devfs_entry_from_vfs_inode(inode); - DPRINTK(DEBUG_D_IPUT, - "(%s): dentry: %p inode: %p de: %p de->dentry: %p\n", de->name, - dentry, inode, de, de->inode.dentry); - if (de->inode.dentry && (de->inode.dentry != dentry)) - OOPS("(%s): de: %p dentry: %p de->dentry: %p\n", - de->name, de, dentry, de->inode.dentry); - de->inode.dentry = NULL; - iput(inode); - devfs_put(de); -} /* End Function devfs_d_iput */ - -static int devfs_d_delete(struct dentry *dentry); - -static struct dentry_operations devfs_dops = { - .d_delete = devfs_d_delete, - .d_release = devfs_d_release, - .d_iput = devfs_d_iput, -}; - -static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *); - -static struct dentry_operations devfs_wait_dops = { - .d_delete = devfs_d_delete, - .d_release = devfs_d_release, - .d_iput = devfs_d_iput, - .d_revalidate = devfs_d_revalidate_wait, -}; - -/** - * devfs_d_delete - Callback for when all files for a dentry are closed. - * @dentry: The dentry. - */ - -static int devfs_d_delete(struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - - if (dentry->d_op == &devfs_wait_dops) - dentry->d_op = &devfs_dops; - /* Unhash dentry if negative (has no inode) */ - if (inode == NULL) { - DPRINTK(DEBUG_D_DELETE, "(%p): dropping negative dentry\n", - dentry); - return 1; - } - return 0; -} /* End Function devfs_d_delete */ - -struct devfs_lookup_struct { - devfs_handle_t de; - wait_queue_head_t wait_queue; -}; - -/* XXX: this doesn't handle the case where we got a negative dentry - but a devfs entry has been registered in the meanwhile */ -static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd) -{ - struct inode *dir = dentry->d_parent->d_inode; - struct fs_info *fs_info = dir->i_sb->s_fs_info; - devfs_handle_t parent = get_devfs_entry_from_vfs_inode(dir); - struct devfs_lookup_struct *lookup_info = dentry->d_fsdata; - DECLARE_WAITQUEUE(wait, current); - int need_lock; - - /* - * FIXME HACK - * - * make sure that - * d_instantiate always runs under lock - * we release i_mutex lock before going to sleep - * - * unfortunately sometimes d_revalidate is called with - * and sometimes without i_mutex lock held. The following checks - * attempt to deduce when we need to add (and drop resp.) lock - * here. This relies on current (2.6.2) calling coventions: - * - * lookup_hash is always run under i_mutex and is passing NULL - * as nd - * - * open(...,O_CREATE,...) calls _lookup_hash under i_mutex - * and sets flags to LOOKUP_OPEN|LOOKUP_CREATE - * - * all other invocations of ->d_revalidate seem to happen - * outside of i_mutex - */ - need_lock = nd && - (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT)); - - if (need_lock) - mutex_lock(&dir->i_mutex); - - if (is_devfsd_or_child(fs_info)) { - devfs_handle_t de = lookup_info->de; - struct inode *inode; - - DPRINTK(DEBUG_I_LOOKUP, - "(%s): dentry: %p inode: %p de: %p by: \"%s\"\n", - dentry->d_name.name, dentry, dentry->d_inode, de, - current->comm); - if (dentry->d_inode) - goto out; - if (de == NULL) { - read_lock(&parent->u.dir.lock); - de = _devfs_search_dir(parent, dentry->d_name.name, - dentry->d_name.len); - read_unlock(&parent->u.dir.lock); - if (de == NULL) - goto out; - lookup_info->de = de; - } - /* Create an inode, now that the driver information is available */ - inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry); - if (!inode) - goto out; - DPRINTK(DEBUG_I_LOOKUP, - "(%s): new VFS inode(%u): %p de: %p by: \"%s\"\n", - de->name, de->inode.ino, inode, de, current->comm); - d_instantiate(dentry, inode); - goto out; - } - if (lookup_info == NULL) - goto out; /* Early termination */ - read_lock(&parent->u.dir.lock); - if (dentry->d_fsdata) { - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&lookup_info->wait_queue, &wait); - read_unlock(&parent->u.dir.lock); - /* at this point it is always (hopefully) locked */ - mutex_unlock(&dir->i_mutex); - schedule(); - mutex_lock(&dir->i_mutex); - /* - * This does not need nor should remove wait from wait_queue. - * Wait queue head is never reused - nothing is ever added to it - * after all waiters have been waked up and head itself disappears - * very soon after it. Moreover it is local variable on stack that - * is likely to have already disappeared so any reference to it - * at this point is buggy. - */ - - } else - read_unlock(&parent->u.dir.lock); - - out: - if (need_lock) - mutex_unlock(&dir->i_mutex); - return 1; -} /* End Function devfs_d_revalidate_wait */ - -/* Inode operations for device entries follow */ - -static struct dentry *devfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct devfs_entry tmp; /* Must stay in scope until devfsd idle again */ - struct devfs_lookup_struct lookup_info; - struct fs_info *fs_info = dir->i_sb->s_fs_info; - struct devfs_entry *parent, *de; - struct inode *inode; - struct dentry *retval = NULL; - - /* Set up the dentry operations before anything else, to ensure cleaning - up on any error */ - dentry->d_op = &devfs_dops; - /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode(dir); - DPRINTK(DEBUG_I_LOOKUP, "(%s): dentry: %p parent: %p by: \"%s\"\n", - dentry->d_name.name, dentry, parent, current->comm); - if (parent == NULL) - return ERR_PTR(-ENOENT); - read_lock(&parent->u.dir.lock); - de = _devfs_search_dir(parent, dentry->d_name.name, dentry->d_name.len); - read_unlock(&parent->u.dir.lock); - lookup_info.de = de; - init_waitqueue_head(&lookup_info.wait_queue); - dentry->d_fsdata = &lookup_info; - if (de == NULL) { /* Try with devfsd. For any kind of failure, leave a negative dentry - so someone else can deal with it (in the case where the sysadmin - does a mknod()). It's important to do this before hashing the - dentry, so that the devfsd queue is filled before revalidates - can start */ - if (try_modload(parent, fs_info, dentry->d_name.name, dentry->d_name.len, &tmp) < 0) { /* Lookup event was not queued to devfsd */ - d_add(dentry, NULL); - return NULL; - } - } - dentry->d_op = &devfs_wait_dops; - d_add(dentry, NULL); /* Open the floodgates */ - /* Unlock directory semaphore, which will release any waiters. They - will get the hashed dentry, and may be forced to wait for - revalidation */ - mutex_unlock(&dir->i_mutex); - wait_for_devfsd_finished(fs_info); /* If I'm not devfsd, must wait */ - mutex_lock(&dir->i_mutex); /* Grab it again because them's the rules */ - de = lookup_info.de; - /* If someone else has been so kind as to make the inode, we go home - early */ - if (dentry->d_inode) - goto out; - if (de == NULL) { - read_lock(&parent->u.dir.lock); - de = _devfs_search_dir(parent, dentry->d_name.name, - dentry->d_name.len); - read_unlock(&parent->u.dir.lock); - if (de == NULL) - goto out; - /* OK, there's an entry now, but no VFS inode yet */ - } - /* Create an inode, now that the driver information is available */ - inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry); - if (!inode) { - retval = ERR_PTR(-ENOMEM); - goto out; - } - DPRINTK(DEBUG_I_LOOKUP, - "(%s): new VFS inode(%u): %p de: %p by: \"%s\"\n", de->name, - de->inode.ino, inode, de, current->comm); - d_instantiate(dentry, inode); - out: - write_lock(&parent->u.dir.lock); - dentry->d_op = &devfs_dops; - dentry->d_fsdata = NULL; - wake_up(&lookup_info.wait_queue); - write_unlock(&parent->u.dir.lock); - devfs_put(de); - return retval; -} /* End Function devfs_lookup */ - -static int devfs_unlink(struct inode *dir, struct dentry *dentry) -{ - int unhooked; - struct devfs_entry *de; - struct inode *inode = dentry->d_inode; - struct fs_info *fs_info = dir->i_sb->s_fs_info; - - de = get_devfs_entry_from_vfs_inode(inode); - DPRINTK(DEBUG_I_UNLINK, "(%s): de: %p\n", dentry->d_name.name, de); - if (de == NULL) - return -ENOENT; - if (!de->vfs) - return -EPERM; - write_lock(&de->parent->u.dir.lock); - unhooked = _devfs_unhook(de); - write_unlock(&de->parent->u.dir.lock); - if (!unhooked) - return -ENOENT; - if (!is_devfsd_or_child(fs_info)) - devfsd_notify_de(de, DEVFSD_NOTIFY_DELETE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - free_dentry(de); - devfs_put(de); - return 0; -} /* End Function devfs_unlink */ - -static int devfs_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - int err; - struct fs_info *fs_info = dir->i_sb->s_fs_info; - struct devfs_entry *parent, *de; - struct inode *inode; - - /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode(dir); - if (parent == NULL) - return -ENOENT; - err = devfs_do_symlink(parent, dentry->d_name.name, symname, &de); - DPRINTK(DEBUG_DISABLED, "(%s): errcode from <devfs_do_symlink>: %d\n", - dentry->d_name.name, err); - if (err < 0) - return err; - de->vfs = TRUE; - de->inode.uid = current->euid; - de->inode.gid = current->egid; - de->inode.atime = CURRENT_TIME; - de->inode.mtime = CURRENT_TIME; - de->inode.ctime = CURRENT_TIME; - if ((inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry)) == NULL) - return -ENOMEM; - DPRINTK(DEBUG_DISABLED, "(%s): new VFS inode(%u): %p dentry: %p\n", - dentry->d_name.name, de->inode.ino, inode, dentry); - d_instantiate(dentry, inode); - if (!is_devfsd_or_child(fs_info)) - devfsd_notify_de(de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - return 0; -} /* End Function devfs_symlink */ - -static int devfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - int err; - struct fs_info *fs_info = dir->i_sb->s_fs_info; - struct devfs_entry *parent, *de; - struct inode *inode; - - mode = (mode & ~S_IFMT) | S_IFDIR; /* VFS doesn't pass S_IFMT part */ - parent = get_devfs_entry_from_vfs_inode(dir); - if (parent == NULL) - return -ENOENT; - de = _devfs_alloc_entry(dentry->d_name.name, dentry->d_name.len, mode); - if (!de) - return -ENOMEM; - de->vfs = TRUE; - if ((err = _devfs_append_entry(parent, de, NULL)) != 0) - return err; - de->inode.uid = current->euid; - de->inode.gid = current->egid; - de->inode.atime = CURRENT_TIME; - de->inode.mtime = CURRENT_TIME; - de->inode.ctime = CURRENT_TIME; - if ((inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry)) == NULL) - return -ENOMEM; - DPRINTK(DEBUG_DISABLED, "(%s): new VFS inode(%u): %p dentry: %p\n", - dentry->d_name.name, de->inode.ino, inode, dentry); - d_instantiate(dentry, inode); - if (!is_devfsd_or_child(fs_info)) - devfsd_notify_de(de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - return 0; -} /* End Function devfs_mkdir */ - -static int devfs_rmdir(struct inode *dir, struct dentry *dentry) -{ - int err = 0; - struct devfs_entry *de; - struct fs_info *fs_info = dir->i_sb->s_fs_info; - struct inode *inode = dentry->d_inode; - - if (dir->i_sb->s_fs_info != inode->i_sb->s_fs_info) - return -EINVAL; - de = get_devfs_entry_from_vfs_inode(inode); - if (de == NULL) - return -ENOENT; - if (!S_ISDIR(de->mode)) - return -ENOTDIR; - if (!de->vfs) - return -EPERM; - /* First ensure the directory is empty and will stay that way */ - write_lock(&de->u.dir.lock); - if (de->u.dir.first) - err = -ENOTEMPTY; - else - de->u.dir.no_more_additions = TRUE; - write_unlock(&de->u.dir.lock); - if (err) - return err; - /* Now unhook the directory from its parent */ - write_lock(&de->parent->u.dir.lock); - if (!_devfs_unhook(de)) - err = -ENOENT; - write_unlock(&de->parent->u.dir.lock); - if (err) - return err; - if (!is_devfsd_or_child(fs_info)) - devfsd_notify_de(de, DEVFSD_NOTIFY_DELETE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - free_dentry(de); - devfs_put(de); - return 0; -} /* End Function devfs_rmdir */ - -static int devfs_mknod(struct inode *dir, struct dentry *dentry, int mode, - dev_t rdev) -{ - int err; - struct fs_info *fs_info = dir->i_sb->s_fs_info; - struct devfs_entry *parent, *de; - struct inode *inode; - - DPRINTK(DEBUG_I_MKNOD, "(%s): mode: 0%o dev: %u:%u\n", - dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); - parent = get_devfs_entry_from_vfs_inode(dir); - if (parent == NULL) - return -ENOENT; - de = _devfs_alloc_entry(dentry->d_name.name, dentry->d_name.len, mode); - if (!de) - return -ENOMEM; - de->vfs = TRUE; - if (S_ISCHR(mode) || S_ISBLK(mode)) - de->u.dev = rdev; - if ((err = _devfs_append_entry(parent, de, NULL)) != 0) - return err; - de->inode.uid = current->euid; - de->inode.gid = current->egid; - de->inode.atime = CURRENT_TIME; - de->inode.mtime = CURRENT_TIME; - de->inode.ctime = CURRENT_TIME; - if ((inode = _devfs_get_vfs_inode(dir->i_sb, de, dentry)) == NULL) - return -ENOMEM; - DPRINTK(DEBUG_I_MKNOD, ": new VFS inode(%u): %p dentry: %p\n", - de->inode.ino, inode, dentry); - d_instantiate(dentry, inode); - if (!is_devfsd_or_child(fs_info)) - devfsd_notify_de(de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - return 0; -} /* End Function devfs_mknod */ - -static void *devfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct devfs_entry *p = get_devfs_entry_from_vfs_inode(dentry->d_inode); - nd_set_link(nd, p ? p->u.symlink.linkname : ERR_PTR(-ENODEV)); - return NULL; -} /* End Function devfs_follow_link */ - -static struct inode_operations devfs_iops = { - .setattr = devfs_notify_change, -}; - -static struct inode_operations devfs_dir_iops = { - .lookup = devfs_lookup, - .unlink = devfs_unlink, - .symlink = devfs_symlink, - .mkdir = devfs_mkdir, - .rmdir = devfs_rmdir, - .mknod = devfs_mknod, - .setattr = devfs_notify_change, -}; - -static struct inode_operations devfs_symlink_iops = { - .readlink = generic_readlink, - .follow_link = devfs_follow_link, - .setattr = devfs_notify_change, -}; - -static int devfs_fill_super(struct super_block *sb, void *data, int silent) -{ - struct inode *root_inode = NULL; - - if (_devfs_get_root_entry() == NULL) - goto out_no_root; - atomic_set(&fs_info.devfsd_overrun_count, 0); - init_waitqueue_head(&fs_info.devfsd_wait_queue); - init_waitqueue_head(&fs_info.revalidate_wait_queue); - fs_info.sb = sb; - sb->s_fs_info = &fs_info; - sb->s_blocksize = 1024; - sb->s_blocksize_bits = 10; - sb->s_magic = DEVFS_SUPER_MAGIC; - sb->s_op = &devfs_sops; - sb->s_time_gran = 1; - if ((root_inode = _devfs_get_vfs_inode(sb, root_entry, NULL)) == NULL) - goto out_no_root; - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_no_root; - DPRINTK(DEBUG_S_READ, "(): made devfs ptr: %p\n", sb->s_fs_info); - return 0; - - out_no_root: - PRINTK("(): get root inode failed\n"); - if (root_inode) - iput(root_inode); - return -EINVAL; -} /* End Function devfs_fill_super */ - -static int devfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct vfsmount *mnt) -{ - return get_sb_single(fs_type, flags, data, devfs_fill_super, mnt); -} - -static struct file_system_type devfs_fs_type = { - .name = DEVFS_NAME, - .get_sb = devfs_get_sb, - .kill_sb = kill_anon_super, -}; - -/* File operations for devfsd follow */ - -static ssize_t devfsd_read(struct file *file, char __user *buf, size_t len, - loff_t * ppos) -{ - int done = FALSE; - int ival; - loff_t pos, devname_offset, tlen, rpos; - devfs_handle_t de; - struct devfsd_buf_entry *entry; - struct fs_info *fs_info = file->f_dentry->d_inode->i_sb->s_fs_info; - struct devfsd_notify_struct *info = fs_info->devfsd_info; - DECLARE_WAITQUEUE(wait, current); - - /* Verify the task has grabbed the queue */ - if (fs_info->devfsd_task != current) - return -EPERM; - info->major = 0; - info->minor = 0; - /* Block for a new entry */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&fs_info->devfsd_wait_queue, &wait); - while (devfsd_queue_empty(fs_info)) { - fs_info->devfsd_sleeping = TRUE; - wake_up(&fs_info->revalidate_wait_queue); - schedule(); - fs_info->devfsd_sleeping = FALSE; - if (signal_pending(current)) { - remove_wait_queue(&fs_info->devfsd_wait_queue, &wait); - __set_current_state(TASK_RUNNING); - return -EINTR; - } - set_current_state(TASK_INTERRUPTIBLE); - } - remove_wait_queue(&fs_info->devfsd_wait_queue, &wait); - __set_current_state(TASK_RUNNING); - /* Now play with the data */ - ival = atomic_read(&fs_info->devfsd_overrun_count); - info->overrun_count = ival; - entry = fs_info->devfsd_first_event; - info->type = entry->type; - info->mode = entry->mode; - info->uid = entry->uid; - info->gid = entry->gid; - de = entry->de; - if (S_ISCHR(de->mode) || S_ISBLK(de->mode)) { - info->major = MAJOR(de->u.dev); - info->minor = MINOR(de->u.dev); - } - pos = devfs_generate_path(de, info->devname, DEVFS_PATHLEN); - if (pos < 0) - return pos; - info->namelen = DEVFS_PATHLEN - pos - 1; - if (info->mode == 0) - info->mode = de->mode; - devname_offset = info->devname - (char *)info; - rpos = *ppos; - if (rpos < devname_offset) { - /* Copy parts of the header */ - tlen = devname_offset - rpos; - if (tlen > len) - tlen = len; - if (copy_to_user(buf, (char *)info + rpos, tlen)) { - return -EFAULT; - } - rpos += tlen; - buf += tlen; - len -= tlen; - } - if ((rpos >= devname_offset) && (len > 0)) { - /* Copy the name */ - tlen = info->namelen + 1; - if (tlen > len) - tlen = len; - else - done = TRUE; - if (copy_to_user - (buf, info->devname + pos + rpos - devname_offset, tlen)) { - return -EFAULT; - } - rpos += tlen; - } - tlen = rpos - *ppos; - if (done) { - devfs_handle_t parent; - - spin_lock(&fs_info->devfsd_buffer_lock); - fs_info->devfsd_first_event = entry->next; - if (entry->next == NULL) - fs_info->devfsd_last_event = NULL; - spin_unlock(&fs_info->devfsd_buffer_lock); - for (; de != NULL; de = parent) { - parent = de->parent; - devfs_put(de); - } - kmem_cache_free(devfsd_buf_cache, entry); - if (ival > 0) - atomic_sub(ival, &fs_info->devfsd_overrun_count); - *ppos = 0; - } else - *ppos = rpos; - return tlen; -} /* End Function devfsd_read */ - -static int devfsd_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - int ival; - struct fs_info *fs_info = inode->i_sb->s_fs_info; - - switch (cmd) { - case DEVFSDIOC_GET_PROTO_REV: - ival = DEVFSD_PROTOCOL_REVISION_KERNEL; - if (copy_to_user((void __user *)arg, &ival, sizeof ival)) - return -EFAULT; - break; - case DEVFSDIOC_SET_EVENT_MASK: - /* Ensure only one reader has access to the queue. This scheme will - work even if the global kernel lock were to be removed, because it - doesn't matter who gets in first, as long as only one gets it */ - if (fs_info->devfsd_task == NULL) { - static DEFINE_SPINLOCK(lock); - - if (!spin_trylock(&lock)) - return -EBUSY; - if (fs_info->devfsd_task != NULL) { /* We lost the race... */ - spin_unlock(&lock); - return -EBUSY; - } - fs_info->devfsd_task = current; - spin_unlock(&lock); - fs_info->devfsd_pgrp = - (process_group(current) == - current->pid) ? process_group(current) : 0; - fs_info->devfsd_file = file; - fs_info->devfsd_info = - kmalloc(sizeof *fs_info->devfsd_info, GFP_KERNEL); - if (!fs_info->devfsd_info) { - devfsd_close(inode, file); - return -ENOMEM; - } - } else if (fs_info->devfsd_task != current) - return -EBUSY; - fs_info->devfsd_event_mask = arg; /* Let the masses come forth */ - break; - case DEVFSDIOC_RELEASE_EVENT_QUEUE: - if (fs_info->devfsd_file != file) - return -EPERM; - return devfsd_close(inode, file); - /*break; */ -#ifdef CONFIG_DEVFS_DEBUG - case DEVFSDIOC_SET_DEBUG_MASK: - if (copy_from_user(&ival, (void __user *)arg, sizeof ival)) - return -EFAULT; - devfs_debug = ival; - break; -#endif - default: - return -ENOIOCTLCMD; - } - return 0; -} /* End Function devfsd_ioctl */ - -static int devfsd_close(struct inode *inode, struct file *file) -{ - struct devfsd_buf_entry *entry, *next; - struct fs_info *fs_info = inode->i_sb->s_fs_info; - - if (fs_info->devfsd_file != file) - return 0; - fs_info->devfsd_event_mask = 0; - fs_info->devfsd_file = NULL; - spin_lock(&fs_info->devfsd_buffer_lock); - entry = fs_info->devfsd_first_event; - fs_info->devfsd_first_event = NULL; - fs_info->devfsd_last_event = NULL; - kfree(fs_info->devfsd_info); - fs_info->devfsd_info = NULL; - spin_unlock(&fs_info->devfsd_buffer_lock); - fs_info->devfsd_pgrp = 0; - fs_info->devfsd_task = NULL; - wake_up(&fs_info->revalidate_wait_queue); - for (; entry; entry = next) { - next = entry->next; - kmem_cache_free(devfsd_buf_cache, entry); - } - return 0; -} /* End Function devfsd_close */ - -#ifdef CONFIG_DEVFS_DEBUG -static ssize_t stat_read(struct file *file, char __user *buf, size_t len, - loff_t * ppos) -{ - ssize_t num; - char txt[80]; - - num = sprintf(txt, "Number of entries: %u number of bytes: %u\n", - stat_num_entries, stat_num_bytes) + 1; - if (*ppos >= num) - return 0; - if (*ppos + len > num) - len = num - *ppos; - if (copy_to_user(buf, txt + *ppos, len)) - return -EFAULT; - *ppos += len; - return len; -} /* End Function stat_read */ -#endif - -static int __init init_devfs_fs(void) -{ - int err; - int major; - struct devfs_entry *devfsd; -#ifdef CONFIG_DEVFS_DEBUG - struct devfs_entry *stat; -#endif - - if (_devfs_get_root_entry() == NULL) - return -ENOMEM; - - printk(KERN_INFO "%s: %s Richard Gooch (rgooch@atnf.csiro.au)\n", - DEVFS_NAME, DEVFS_VERSION); - devfsd_buf_cache = kmem_cache_create("devfsd_event", - sizeof(struct devfsd_buf_entry), - 0, 0, NULL, NULL); - if (!devfsd_buf_cache) - OOPS("(): unable to allocate event slab\n"); -#ifdef CONFIG_DEVFS_DEBUG - devfs_debug = devfs_debug_init; - printk(KERN_INFO "%s: devfs_debug: 0x%0x\n", DEVFS_NAME, devfs_debug); -#endif - printk(KERN_INFO "%s: boot_options: 0x%0x\n", DEVFS_NAME, boot_options); - - /* register special device for devfsd communication */ - major = register_chrdev(0, "devfs", &devfs_fops); - if (major < 0) - return major; - - /* And create the entry for ".devfsd" */ - devfsd = _devfs_alloc_entry(".devfsd", 0, S_IFCHR | S_IRUSR | S_IWUSR); - if (devfsd == NULL) - return -ENOMEM; - devfsd->u.dev = MKDEV(major, 0); - _devfs_append_entry(root_entry, devfsd, NULL); - -#ifdef CONFIG_DEVFS_DEBUG - stat = _devfs_alloc_entry(".stat", 0, S_IFCHR | S_IRUGO); - if (stat == NULL) - return -ENOMEM; - stat->u.dev = MKDEV(major, 1); - _devfs_append_entry(root_entry, stat, NULL); -#endif - - err = register_filesystem(&devfs_fs_type); - return err; -} /* End Function init_devfs_fs */ - -void __init mount_devfs_fs(void) -{ - int err; - - if (!(boot_options & OPTION_MOUNT)) - return; - err = do_mount("none", "/dev", "devfs", 0, NULL); - if (err == 0) - printk(KERN_INFO "Mounted devfs on /dev\n"); - else - PRINTK("(): unable to mount devfs, err: %d\n", err); -} /* End Function mount_devfs_fs */ - -module_init(init_devfs_fs) diff --git a/fs/devfs/util.c b/fs/devfs/util.c deleted file mode 100644 index db06d38..0000000 --- a/fs/devfs/util.c +++ /dev/null @@ -1,97 +0,0 @@ -/* devfs (Device FileSystem) utilities. - - Copyright (C) 1999-2002 Richard Gooch - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Richard Gooch may be reached by email at rgooch@atnf.csiro.au - The postal address is: - Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. - - ChangeLog - - 19991031 Richard Gooch <rgooch@atnf.csiro.au> - Created. - 19991103 Richard Gooch <rgooch@atnf.csiro.au> - Created <_devfs_convert_name> and supported SCSI and IDE CD-ROMs - 20000203 Richard Gooch <rgooch@atnf.csiro.au> - Changed operations pointer type to void *. - 20000621 Richard Gooch <rgooch@atnf.csiro.au> - Changed interface to <devfs_register_series>. - 20000622 Richard Gooch <rgooch@atnf.csiro.au> - Took account of interface change to <devfs_mk_symlink>. - Took account of interface change to <devfs_mk_dir>. - 20010519 Richard Gooch <rgooch@atnf.csiro.au> - Documentation cleanup. - 20010709 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_*alloc_major> and <devfs_*alloc_devnum>. - 20010710 Richard Gooch <rgooch@atnf.csiro.au> - Created <devfs_*alloc_unique_number>. - 20010730 Richard Gooch <rgooch@atnf.csiro.au> - Documentation typo fix. - 20010806 Richard Gooch <rgooch@atnf.csiro.au> - Made <block_semaphore> and <char_semaphore> private. - 20010813 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bug in <devfs_alloc_unique_number>: limited to 128 numbers - 20010818 Richard Gooch <rgooch@atnf.csiro.au> - Updated major masks up to Linus' "no new majors" proclamation. - Block: were 126 now 122 free, char: were 26 now 19 free. - 20020324 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bug in <devfs_alloc_unique_number>: was clearing beyond - bitfield. - 20020326 Richard Gooch <rgooch@atnf.csiro.au> - Fixed bitfield data type for <devfs_*alloc_devnum>. - Made major bitfield type and initialiser 64 bit safe. - 20020413 Richard Gooch <rgooch@atnf.csiro.au> - Fixed shift warning on 64 bit machines. - 20020428 Richard Gooch <rgooch@atnf.csiro.au> - Copied and used macro for error messages from fs/devfs/base.c - 20021013 Richard Gooch <rgooch@atnf.csiro.au> - Documentation fix. - 20030101 Adam J. Richter <adam@yggdrasil.com> - Eliminate DEVFS_SPECIAL_{CHR,BLK}. Use mode_t instead. - 20030106 Christoph Hellwig <hch@infradead.org> - Rewrite devfs_{,de}alloc_devnum to look like C code. -*/ -#include <linux/module.h> -#include <linux/init.h> -#include <linux/devfs_fs_kernel.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/genhd.h> -#include <linux/bitops.h> - -int devfs_register_tape(const char *name) -{ - char tname[32], dest[64]; - static unsigned int tape_counter; - unsigned int n = tape_counter++; - - sprintf(dest, "../%s", name); - sprintf(tname, "tapes/tape%u", n); - devfs_mk_symlink(tname, dest); - - return n; -} - -EXPORT_SYMBOL(devfs_register_tape); - -void devfs_unregister_tape(int num) -{ - if (num >= 0) - devfs_remove("tapes/tape%u", num); -} - -EXPORT_SYMBOL(devfs_unregister_tape); diff --git a/fs/direct-io.c b/fs/direct-io.c index 538fb04..5981e17f 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) if (dio->end_io && dio->result) dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); if (dio->lock_type == DIO_LOCKING) - up_read(&dio->inode->i_alloc_sem); + /* lockdep: non-owner release */ + up_read_non_owner(&dio->inode->i_alloc_sem); } /* @@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } if (dio_lock_type == DIO_LOCKING) - down_read(&inode->i_alloc_sem); + /* lockdep: not the owner will release it */ + down_read_non_owner(&inode->i_alloc_sem); } /* diff --git a/fs/efs/inode.c b/fs/efs/inode.c index 180607f..174696f 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -21,7 +21,7 @@ static sector_t _efs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,efs_get_block); } -static struct address_space_operations efs_aops = { +static const struct address_space_operations efs_aops = { .readpage = efs_readpage, .sync_page = block_sync_page, .bmap = _efs_bmap diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index 3d9a350..e249cf7 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -53,6 +53,6 @@ fail: return err; } -struct address_space_operations efs_symlink_aops = { +const struct address_space_operations efs_symlink_aops = { .readpage = efs_symlink_readpage }; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9c677bb..19ffb04 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -120,7 +120,7 @@ struct epoll_filefd { */ struct wake_task_node { struct list_head llink; - task_t *task; + struct task_struct *task; wait_queue_head_t *wq; }; @@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) { int wake_nests = 0; unsigned long flags; - task_t *this_task = current; + struct task_struct *this_task = current; struct list_head *lsthead = &psw->wake_task_list, *lnk; struct wake_task_node *tncur; struct wake_task_node tnode; @@ -22,7 +22,6 @@ * formats. */ -#include <linux/config.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/mman.h> diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 433a213..d487043 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -11,7 +11,6 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ -#include <linux/config.h> #include "ext2.h" #include <linux/quotaops.h> #include <linux/sched.h> diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 9f74a62..e65a019 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -162,9 +162,9 @@ extern const struct file_operations ext2_file_operations; extern const struct file_operations ext2_xip_file_operations; /* inode.c */ -extern struct address_space_operations ext2_aops; -extern struct address_space_operations ext2_aops_xip; -extern struct address_space_operations ext2_nobh_aops; +extern const struct address_space_operations ext2_aops; +extern const struct address_space_operations ext2_aops_xip; +extern const struct address_space_operations ext2_nobh_aops; /* namei.c */ extern struct inode_operations ext2_dir_inode_operations; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 308c252..de85c61 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -12,7 +12,6 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ -#include <linux/config.h> #include <linux/quotaops.h> #include <linux/sched.h> #include <linux/backing-dev.h> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 04af9c4..fb4d322 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -684,7 +684,7 @@ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) return mpage_writepages(mapping, wbc, ext2_get_block); } -struct address_space_operations ext2_aops = { +const struct address_space_operations ext2_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, .writepage = ext2_writepage, @@ -697,12 +697,12 @@ struct address_space_operations ext2_aops = { .migratepage = buffer_migrate_page, }; -struct address_space_operations ext2_aops_xip = { +const struct address_space_operations ext2_aops_xip = { .bmap = ext2_bmap, .get_xip_page = ext2_get_xip_page, }; -struct address_space_operations ext2_nobh_aops = { +const struct address_space_operations ext2_nobh_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, .writepage = ext2_nobh_writepage, diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d4233b2..f2702cd 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -16,7 +16,6 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ -#include <linux/config.h> #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> @@ -1158,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head tmp_bh; struct buffer_head *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index 67cfeb6..bf8175b 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h @@ -6,7 +6,6 @@ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ -#include <linux/config.h> #include <linux/init.h> #include <linux/xattr.h> diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 92d50b5..0d1e627 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -62,9 +62,6 @@ extern int ext3_permission (struct inode *, int, struct nameidata *); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); -extern int init_ext3_acl(void); -extern void exit_ext3_acl(void); - #else /* CONFIG_EXT3_FS_POSIX_ACL */ #include <linux/sched.h> #define ext3_permission NULL diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 96172e8..a504a40 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -11,7 +11,6 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ -#include <linux/config.h> #include <linux/time.h> #include <linux/capability.h> #include <linux/fs.h> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 0321e1b..f804d5e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1698,7 +1698,7 @@ static int ext3_journalled_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -static struct address_space_operations ext3_ordered_aops = { +static const struct address_space_operations ext3_ordered_aops = { .readpage = ext3_readpage, .readpages = ext3_readpages, .writepage = ext3_ordered_writepage, @@ -1712,7 +1712,7 @@ static struct address_space_operations ext3_ordered_aops = { .migratepage = buffer_migrate_page, }; -static struct address_space_operations ext3_writeback_aops = { +static const struct address_space_operations ext3_writeback_aops = { .readpage = ext3_readpage, .readpages = ext3_readpages, .writepage = ext3_writeback_writepage, @@ -1726,7 +1726,7 @@ static struct address_space_operations ext3_writeback_aops = { .migratepage = buffer_migrate_page, }; -static struct address_space_operations ext3_journalled_aops = { +static const struct address_space_operations ext3_journalled_aops = { .readpage = ext3_readpage, .readpages = ext3_readpages, .writepage = ext3_journalled_writepage, diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index dfd8118..5e1337f 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -8,7 +8,6 @@ * This could probably be made into a module, because it is not often in use. */ -#include <linux/config.h> #define EXT3FS_DEBUG diff --git a/fs/ext3/super.c b/fs/ext3/super.c index b748336..813d589 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -16,7 +16,6 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ -#include <linux/config.h> #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> @@ -2615,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h index 2ceae38..6b1ae1c 100644 --- a/fs/ext3/xattr.h +++ b/fs/ext3/xattr.h @@ -6,7 +6,6 @@ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ -#include <linux/config.h> #include <linux/xattr.h> /* Magic value in attribute blocks */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 7c35d58..31b7174 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -196,7 +196,7 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, fat_get_block); } -static struct address_space_operations fat_aops = { +static const struct address_space_operations fat_aops = { .readpage = fat_readpage, .readpages = fat_readpages, .writepage = fat_writepage, @@ -240,13 +240,9 @@ static struct fdtable *alloc_fdtable(int nr) if (!fdt) goto out; - nfds = 8 * L1_CACHE_BYTES; - /* Expand to the max in easy steps */ - while (nfds <= nr) { - nfds = nfds * 2; - if (nfds > NR_OPEN) - nfds = NR_OPEN; - } + nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1)); + if (nfds > NR_OPEN) + nfds = NR_OPEN; new_openset = alloc_fdset(nfds); new_execset = alloc_fdset(nfds); @@ -277,11 +273,13 @@ static struct fdtable *alloc_fdtable(int nr) } while (nfds <= nr); new_fds = alloc_fd_array(nfds); if (!new_fds) - goto out; + goto out2; fdt->fd = new_fds; fdt->max_fds = nfds; fdt->free_files = NULL; return fdt; +out2: + nfds = fdt->max_fdset; out: if (new_openset) free_fdset(new_openset, nfds); diff --git a/fs/file_table.c b/fs/file_table.c index 506d530..0131ba0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -5,7 +5,6 @@ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ -#include <linux/config.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/file.h> diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index 6f5df17..4e25f3f 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -56,7 +56,7 @@ struct inode_operations vxfs_immed_symlink_iops = { /* * Adress space operations for immed files and directories. */ -struct address_space_operations vxfs_immed_aops = { +const struct address_space_operations vxfs_immed_aops = { .readpage = vxfs_immed_readpage, }; diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index f544aae..ca6a397 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -41,8 +41,8 @@ #include "vxfs_extern.h" -extern struct address_space_operations vxfs_aops; -extern struct address_space_operations vxfs_immed_aops; +extern const struct address_space_operations vxfs_aops; +extern const struct address_space_operations vxfs_immed_aops; extern struct inode_operations vxfs_immed_symlink_iops; @@ -295,7 +295,7 @@ vxfs_read_inode(struct inode *ip) { struct super_block *sbp = ip->i_sb; struct vxfs_inode_info *vip; - struct address_space_operations *aops; + const struct address_space_operations *aops; ino_t ino = ip->i_ino; if (!(vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist))) diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index c1be118..decac62 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c @@ -42,7 +42,7 @@ static int vxfs_readpage(struct file *, struct page *); static sector_t vxfs_bmap(struct address_space *, sector_t); -struct address_space_operations vxfs_aops = { +const struct address_space_operations vxfs_aops = { .readpage = vxfs_readpage, .bmap = vxfs_bmap, .sync_page = block_sync_page, diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 031b27a..892643d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -464,8 +464,8 @@ void sync_inodes_sb(struct super_block *sb, int wait) .range_start = 0, .range_end = LLONG_MAX, }; - unsigned long nr_dirty = read_page_state(nr_dirty); - unsigned long nr_unstable = read_page_state(nr_unstable); + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); wbc.nr_to_write = nr_dirty + nr_unstable + (inodes_stat.nr_inodes - inodes_stat.nr_unused) + diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 28aa81e..63614ed 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -770,7 +770,7 @@ static const struct file_operations fuse_direct_io_file_operations = { /* no mmap and sendfile */ }; -static struct address_space_operations fuse_file_aops = { +static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, .prepare_write = fuse_prepare_write, .commit_write = fuse_commit_write, diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 3ed8663..735332d 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -182,8 +182,8 @@ extern void hfs_file_truncate(struct inode *); extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int); /* inode.c */ -extern struct address_space_operations hfs_aops; -extern struct address_space_operations hfs_btree_aops; +extern const struct address_space_operations hfs_aops; +extern const struct address_space_operations hfs_btree_aops; extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 2d4ced2..315cf44 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -114,7 +114,7 @@ static int hfs_writepages(struct address_space *mapping, return mpage_writepages(mapping, wbc, hfs_get_block); } -struct address_space_operations hfs_btree_aops = { +const struct address_space_operations hfs_btree_aops = { .readpage = hfs_readpage, .writepage = hfs_writepage, .sync_page = block_sync_page, @@ -124,7 +124,7 @@ struct address_space_operations hfs_btree_aops = { .releasepage = hfs_releasepage, }; -struct address_space_operations hfs_aops = { +const struct address_space_operations hfs_aops = { .readpage = hfs_readpage, .writepage = hfs_writepage, .sync_page = block_sync_page, diff --git a/fs/hfs/super.c b/fs/hfs/super.c index d9227bf..34937ee 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -12,7 +12,6 @@ * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds */ -#include <linux/config.h> #include <linux/module.h> #include <linux/blkdev.h> #include <linux/mount.h> diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 7ae3936..8a1ca5e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -323,8 +323,8 @@ int hfsplus_file_extend(struct inode *); void hfsplus_file_truncate(struct inode *); /* inode.c */ -extern struct address_space_operations hfsplus_aops; -extern struct address_space_operations hfsplus_btree_aops; +extern const struct address_space_operations hfsplus_aops; +extern const struct address_space_operations hfsplus_btree_aops; void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *); void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index acf66db..924ecde 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -109,7 +109,7 @@ static int hfsplus_writepages(struct address_space *mapping, return mpage_writepages(mapping, wbc, hfsplus_get_block); } -struct address_space_operations hfsplus_btree_aops = { +const struct address_space_operations hfsplus_btree_aops = { .readpage = hfsplus_readpage, .writepage = hfsplus_writepage, .sync_page = block_sync_page, @@ -119,7 +119,7 @@ struct address_space_operations hfsplus_btree_aops = { .releasepage = hfsplus_releasepage, }; -struct address_space_operations hfsplus_aops = { +const struct address_space_operations hfsplus_aops = { .readpage = hfsplus_readpage, .writepage = hfsplus_writepage, .sync_page = block_sync_page, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 0a92fa2..d279d59 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -7,7 +7,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/pagemap.h> diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 8e0d377..b82e3d9 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -54,7 +54,7 @@ static int append = 0; static struct inode_operations hostfs_iops; static struct inode_operations hostfs_dir_iops; -static struct address_space_operations hostfs_link_aops; +static const struct address_space_operations hostfs_link_aops; #ifndef MODULE static int __init hostfs_args(char *options, int *add) @@ -518,7 +518,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from, return(err); } -static struct address_space_operations hostfs_aops = { +static const struct address_space_operations hostfs_aops = { .writepage = hostfs_writepage, .readpage = hostfs_readpage, .set_page_dirty = __set_page_dirty_nobuffers, @@ -935,7 +935,7 @@ int hostfs_link_readpage(struct file *file, struct page *page) return(err); } -static struct address_space_operations hostfs_link_aops = { +static const struct address_space_operations hostfs_link_aops = { .readpage = hostfs_link_readpage, }; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index d3b9fff..d9eb19b 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -99,7 +99,7 @@ static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,hpfs_get_block); } -struct address_space_operations hpfs_aops = { +const struct address_space_operations hpfs_aops = { .readpage = hpfs_readpage, .writepage = hpfs_writepage, .sync_page = block_sync_page, diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 29b7a3e..f687d54 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int); int hpfs_file_fsync(struct file *, struct dentry *, int); extern const struct file_operations hpfs_file_ops; extern struct inode_operations hpfs_file_iops; -extern struct address_space_operations hpfs_aops; +extern const struct address_space_operations hpfs_aops; /* inode.c */ @@ -304,7 +304,7 @@ void hpfs_decide_conv(struct inode *, unsigned char *, unsigned); /* namei.c */ extern struct inode_operations hpfs_dir_iops; -extern struct address_space_operations hpfs_symlink_aops; +extern const struct address_space_operations hpfs_symlink_aops; static inline struct hpfs_inode_info *hpfs_i(struct inode *inode) { diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index a03abb1..59e7dc1 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -538,7 +538,7 @@ fail: return err; } -struct address_space_operations hpfs_symlink_aops = { +const struct address_space_operations hpfs_symlink_aops = { .readpage = hpfs_symlink_readpage }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e6410d8..c3920c9 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -34,7 +34,7 @@ #define HUGETLBFS_MAGIC 0x958458f6 static struct super_operations hugetlbfs_ops; -static struct address_space_operations hugetlbfs_aops; +static const struct address_space_operations hugetlbfs_aops; const struct file_operations hugetlbfs_file_operations; static struct inode_operations hugetlbfs_dir_inode_operations; static struct inode_operations hugetlbfs_inode_operations; @@ -83,8 +83,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = -ENOMEM; len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) - goto out; if (vma->vm_flags & VM_MAYSHARE && hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), @@ -93,7 +91,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; hugetlb_prefault_arch_hook(vma->vm_mm); - if (inode->i_size < len) + if (vma->vm_flags & VM_WRITE && inode->i_size < len) inode->i_size = len; out: mutex_unlock(&inode->i_mutex); @@ -547,7 +545,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode) kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); } -static struct address_space_operations hugetlbfs_aops = { +static const struct address_space_operations hugetlbfs_aops = { .readpage = hugetlbfs_readpage, .prepare_write = hugetlbfs_prepare_write, .commit_write = hugetlbfs_commit_write, @@ -4,7 +4,6 @@ * (C) 1997 Linus Torvalds */ -#include <linux/config.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/dcache.h> @@ -102,7 +101,7 @@ static kmem_cache_t * inode_cachep __read_mostly; static struct inode *alloc_inode(struct super_block *sb) { - static struct address_space_operations empty_aops; + static const struct address_space_operations empty_aops; static struct inode_operations empty_iops; static const struct file_operations empty_fops; struct inode *inode; @@ -452,15 +451,14 @@ static void prune_icache(int nr_to_scan) nr_pruned++; } inodes_stat.nr_unused -= nr_pruned; + if (current_is_kswapd()) + __count_vm_events(KSWAPD_INODESTEAL, reap); + else + __count_vm_events(PGINODESTEAL, reap); spin_unlock(&inode_lock); dispose_list(&freeable); mutex_unlock(&iprune_mutex); - - if (current_is_kswapd()) - mod_page_state(kswapd_inodesteal, reap); - else - mod_page_state(pginodesteal, reap); } /* @@ -4,7 +4,6 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/config.h> #include <linux/syscalls.h> #include <linux/mm.h> #include <linux/smp_lock.h> diff --git a/fs/ioprio.c b/fs/ioprio.c index 7fa76ed..93aa571 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -125,11 +125,24 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) return ret; } +static int get_task_ioprio(struct task_struct *p) +{ + int ret; + + ret = security_task_getioprio(p); + if (ret) + goto out; + ret = p->ioprio; +out: + return ret; +} + asmlinkage long sys_ioprio_get(int which, int who) { struct task_struct *g, *p; struct user_struct *user; int ret = -ESRCH; + int tmpio; read_lock_irq(&tasklist_lock); switch (which) { @@ -139,16 +152,19 @@ asmlinkage long sys_ioprio_get(int which, int who) else p = find_task_by_pid(who); if (p) - ret = p->ioprio; + ret = get_task_ioprio(p); break; case IOPRIO_WHO_PGRP: if (!who) who = process_group(current); do_each_task_pid(who, PIDTYPE_PGID, p) { + tmpio = get_task_ioprio(p); + if (tmpio < 0) + continue; if (ret == -ESRCH) - ret = p->ioprio; + ret = tmpio; else - ret = ioprio_best(ret, p->ioprio); + ret = ioprio_best(ret, tmpio); } while_each_task_pid(who, PIDTYPE_PGID, p); break; case IOPRIO_WHO_USER: @@ -163,10 +179,13 @@ asmlinkage long sys_ioprio_get(int which, int who) do_each_thread(g, p) { if (p->uid != user->uid) continue; + tmpio = get_task_ioprio(p); + if (tmpio < 0) + continue; if (ret == -ESRCH) - ret = p->ioprio; + ret = tmpio; else - ret = ioprio_best(ret, p->ioprio); + ret = ioprio_best(ret, tmpio); } while_each_thread(g, p); if (who) diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 4917315..7318163 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -16,7 +16,6 @@ * Transparent decompression of files on an iso9660 filesystem */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> @@ -312,7 +311,7 @@ eio: return err; } -struct address_space_operations zisofs_aops = { +const struct address_space_operations zisofs_aops = { .readpage = zisofs_readpage, /* No sync_page operation supported? */ /* No bmap operation supported */ diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 5440ea2..27e2769 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -10,7 +10,6 @@ * * isofs directory handling functions */ -#include <linux/config.h> #include <linux/smp_lock.h> #include "isofs.h" diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 3f9c8ba..1439136 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -11,7 +11,6 @@ * 2004 Paul Serice - NFS Export Operations */ -#include <linux/config.h> #include <linux/init.h> #include <linux/module.h> @@ -1054,7 +1053,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping,block,isofs_get_block); } -static struct address_space_operations isofs_aops = { +static const struct address_space_operations isofs_aops = { .readpage = isofs_readpage, .sync_page = block_sync_page, .bmap = _isofs_bmap diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index b87ba06..e6308c8 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -176,5 +176,5 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de, extern struct inode_operations isofs_dir_inode_operations; extern const struct file_operations isofs_dir_operations; -extern struct address_space_operations isofs_symlink_aops; +extern const struct address_space_operations isofs_symlink_aops; extern struct export_operations isofs_export_ops; diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 4326cb4..f3a1db3 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -754,6 +754,6 @@ error: return -EIO; } -struct address_space_operations isofs_symlink_aops = { +const struct address_space_operations isofs_symlink_aops = { .readpage = rock_ridge_symlink_readpage }; diff --git a/fs/isofs/zisofs.h b/fs/isofs/zisofs.h index d78485d..2737957 100644 --- a/fs/isofs/zisofs.h +++ b/fs/isofs/zisofs.h @@ -15,7 +15,7 @@ */ #ifdef CONFIG_ZISOFS -extern struct address_space_operations zisofs_aops; +extern const struct address_space_operations zisofs_aops; extern int __init zisofs_init(void); extern void zisofs_cleanup(void); #endif diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 9e46ea6..9306869 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -59,7 +59,7 @@ static const struct file_operations jffs_file_operations; static struct inode_operations jffs_file_inode_operations; static const struct file_operations jffs_dir_operations; static struct inode_operations jffs_dir_inode_operations; -static struct address_space_operations jffs_address_operations; +static const struct address_space_operations jffs_address_operations; kmem_cache_t *node_cache = NULL; kmem_cache_t *fm_cache = NULL; @@ -1614,7 +1614,7 @@ jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, } /* jffs_ioctl() */ -static struct address_space_operations jffs_address_operations = { +static const struct address_space_operations jffs_address_operations = { .readpage = jffs_readpage, .prepare_write = jffs_prepare_write, .commit_write = jffs_commit_write, diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 5371a40..9000f1e 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -55,7 +55,6 @@ * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/jffs.h> diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h index c794d92..9ee6ad2 100644 --- a/fs/jffs/jffs_fm.h +++ b/fs/jffs/jffs_fm.h @@ -20,7 +20,6 @@ #ifndef __LINUX_JFFS_FM_H__ #define __LINUX_JFFS_FM_H__ -#include <linux/config.h> #include <linux/types.h> #include <linux/jffs.h> #include <linux/mtd/mtd.h> diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 320dd48..0ae3cd1 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -267,6 +267,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) } rc = do_jffs2_setxattr(inode, xprefix, "", value, size, 0); + if (!value && rc == -ENODATA) + rc = 0; if (value) kfree(value); if (!rc) { @@ -343,10 +345,8 @@ int jffs2_init_acl(struct inode *inode, struct inode *dir) return rc; } -void jffs2_clear_acl(struct inode *inode) +void jffs2_clear_acl(struct jffs2_inode_info *f) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) { posix_acl_release(f->i_acl_access); f->i_acl_access = JFFS2_ACL_NOT_CACHED; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 8893bd1..fa327db 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -30,7 +30,7 @@ struct jffs2_acl_header { extern int jffs2_permission(struct inode *, int, struct nameidata *); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl(struct inode *, struct inode *); -extern void jffs2_clear_acl(struct inode *); +extern void jffs2_clear_acl(struct jffs2_inode_info *); extern struct xattr_handler jffs2_acl_access_xattr_handler; extern struct xattr_handler jffs2_acl_default_xattr_handler; @@ -40,6 +40,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; #define jffs2_permission NULL #define jffs2_acl_chmod(inode) (0) #define jffs2_init_acl(inode,dir) (0) -#define jffs2_clear_acl(inode) +#define jffs2_clear_acl(f) #endif /* CONFIG_JFFS2_FS_POSIX_ACL */ diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c index 5c63e0c..3681d07 100644 --- a/fs/jffs2/compr_zlib.c +++ b/fs/jffs2/compr_zlib.c @@ -15,7 +15,6 @@ #error "The userspace support got too messy and was removed. Update your mkfs.jffs2" #endif -#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/slab.h> diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 5fa494a..3daf3bc 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -13,7 +13,6 @@ #ifndef _JFFS2_DEBUG_H_ #define _JFFS2_DEBUG_H_ -#include <linux/config.h> #ifndef CONFIG_JFFS2_FS_DEBUG #define CONFIG_JFFS2_FS_DEBUG 0 diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index b8886f0..ad01210 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -225,7 +225,6 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, at the end of the linked list. Stash it and continue from the beginning of the list */ ic = (struct jffs2_inode_cache *)(*prev); - BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE); prev = &ic->nodes; continue; } @@ -249,7 +248,8 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, /* PARANOIA */ if (!ic) { - printk(KERN_WARNING "inode_cache not found in remove_node_refs()!!\n"); + JFFS2_WARNING("inode_cache/xattr_datum/xattr_ref" + " not found in remove_node_refs()!!\n"); return; } @@ -274,8 +274,19 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c, printk("\n"); }); - if (ic->nodes == (void *)ic && ic->nlink == 0) - jffs2_del_ino_cache(c, ic); + switch (ic->class) { +#ifdef CONFIG_JFFS2_FS_XATTR + case RAWNODE_CLASS_XATTR_DATUM: + jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic); + break; + case RAWNODE_CLASS_XATTR_REF: + jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic); + break; +#endif + default: + if (ic->nodes == (void *)ic && ic->nlink == 0) + jffs2_del_ino_cache(c, ic); + } } void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index bb8844f..3ed6e3e 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -62,7 +62,7 @@ struct inode_operations jffs2_file_inode_operations = .removexattr = jffs2_removexattr }; -struct address_space_operations jffs2_file_address_operations = +const struct address_space_operations jffs2_file_address_operations = { .readpage = jffs2_readpage, .prepare_write =jffs2_prepare_write, diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 2900ec3..4780f82 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -12,7 +12,6 @@ */ #include <linux/capability.h> -#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/fs.h> @@ -227,8 +226,6 @@ void jffs2_clear_inode (struct inode *inode) struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); - - jffs2_xattr_delete_inode(c, f->inocache); jffs2_do_clear_inode(c, f); } diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 477c526..daff334 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -165,6 +165,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", ic->ino)); spin_unlock(&c->inocache_lock); + jffs2_xattr_delete_inode(c, ic); continue; } switch(ic->state) { @@ -275,13 +276,12 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) * We can decide whether this node is inode or xattr by ic->class. */ if (ic->class == RAWNODE_CLASS_XATTR_DATUM || ic->class == RAWNODE_CLASS_XATTR_REF) { - BUG_ON(raw->next_in_ino != (void *)ic); spin_unlock(&c->erase_completion_lock); if (ic->class == RAWNODE_CLASS_XATTR_DATUM) { - ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic); + ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw); } else { - ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic); + ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw); } goto release_sem; } diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 935fec1..b985949 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -119,8 +119,11 @@ struct jffs2_sb_info { #ifdef CONFIG_JFFS2_FS_XATTR #define XATTRINDEX_HASHSIZE (57) uint32_t highest_xid; + uint32_t highest_xseqno; struct list_head xattrindex[XATTRINDEX_HASHSIZE]; struct list_head xattr_unchecked; + struct list_head xattr_dead_list; + struct jffs2_xattr_ref *xref_dead_list; struct jffs2_xattr_ref *xref_temp; struct rw_semaphore xattr_sem; uint32_t xdatum_mem_usage; diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 4889d07..33f2910 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -190,7 +190,7 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x) kmem_cache_free(tmp_dnode_info_slab, x); } -struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) +static struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) { struct jffs2_raw_node_ref *ret; @@ -291,6 +291,7 @@ struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) memset(xd, 0, sizeof(struct jffs2_xattr_datum)); xd->class = RAWNODE_CLASS_XATTR_DATUM; + xd->node = (void *)xd; INIT_LIST_HEAD(&xd->xindex); return xd; } @@ -309,6 +310,7 @@ struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) memset(ref, 0, sizeof(struct jffs2_xattr_ref)); ref->class = RAWNODE_CLASS_XATTR_REF; + ref->node = (void *)ref; return ref; } diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c index 927dfe4..7675b33 100644 --- a/fs/jffs2/nodelist.c +++ b/fs/jffs2/nodelist.c @@ -906,6 +906,9 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old) { struct jffs2_inode_cache **prev; +#ifdef CONFIG_JFFS2_FS_XATTR + BUG_ON(old->xref); +#endif dbg_inocache("del %p (ino #%u)\n", old, old->ino); spin_lock(&c->inocache_lock); diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index b16c60b..cae92c1 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -14,7 +14,6 @@ #ifndef __JFFS2_NODELIST_H__ #define __JFFS2_NODELIST_H__ -#include <linux/config.h> #include <linux/fs.h> #include <linux/types.h> #include <linux/jffs2.h> @@ -427,8 +426,6 @@ char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f); /* scan.c */ int jffs2_scan_medium(struct jffs2_sb_info *c); void jffs2_rotate_lists(struct jffs2_sb_info *c); -int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, - uint32_t ofs, uint32_t len); struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino); int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size); diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index ac0c350..d883769 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -683,19 +683,26 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref spin_lock(&c->erase_completion_lock); ic = jffs2_raw_ref_to_ic(ref); - /* It seems we should never call jffs2_mark_node_obsolete() for - XATTR nodes.... yet. Make sure we notice if/when we change - that :) */ - BUG_ON(ic->class != RAWNODE_CLASS_INODE_CACHE); for (p = &ic->nodes; (*p) != ref; p = &((*p)->next_in_ino)) ; *p = ref->next_in_ino; ref->next_in_ino = NULL; - if (ic->nodes == (void *)ic && ic->nlink == 0) - jffs2_del_ino_cache(c, ic); - + switch (ic->class) { +#ifdef CONFIG_JFFS2_FS_XATTR + case RAWNODE_CLASS_XATTR_DATUM: + jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic); + break; + case RAWNODE_CLASS_XATTR_REF: + jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic); + break; +#endif + default: + if (ic->nodes == (void *)ic && ic->nlink == 0) + jffs2_del_ino_cache(c, ic); + break; + } spin_unlock(&c->erase_completion_lock); } diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 6b52235..9f41fc0 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -158,7 +158,7 @@ extern struct inode_operations jffs2_dir_inode_operations; /* file.c */ extern const struct file_operations jffs2_file_operations; extern struct inode_operations jffs2_file_inode_operations; -extern struct address_space_operations jffs2_file_address_operations; +extern const struct address_space_operations jffs2_file_address_operations; int jffs2_fsync(struct file *, struct dentry *, int); int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 5fec012..266423b 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -968,6 +968,8 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) struct jffs2_full_dirent *fd, *fds; int deleted; + jffs2_clear_acl(f); + jffs2_xattr_delete_inode(c, f->inocache); down(&f->sem); deleted = f->inocache && !f->inocache->nlink; diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 6161808..e241346 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -274,8 +274,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) return ret; } -int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf, - uint32_t ofs, uint32_t len) +static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, + uint32_t ofs, uint32_t len) { int ret; size_t retlen; @@ -317,20 +317,23 @@ static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_erasebloc struct jffs2_summary *s) { struct jffs2_xattr_datum *xd; - uint32_t totlen, crc; + uint32_t xid, version, totlen, crc; int err; crc = crc32(0, rx, sizeof(struct jffs2_raw_xattr) - 4); if (crc != je32_to_cpu(rx->node_crc)) { - if (je32_to_cpu(rx->node_crc) != 0xffffffff) - JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", - ofs, je32_to_cpu(rx->node_crc), crc); + JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + ofs, je32_to_cpu(rx->node_crc), crc); if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(rx->totlen)))) return err; return 0; } - totlen = PAD(sizeof(*rx) + rx->name_len + 1 + je16_to_cpu(rx->value_len)); + xid = je32_to_cpu(rx->xid); + version = je32_to_cpu(rx->version); + + totlen = PAD(sizeof(struct jffs2_raw_xattr) + + rx->name_len + 1 + je16_to_cpu(rx->value_len)); if (totlen != je32_to_cpu(rx->totlen)) { JFFS2_WARNING("node length mismatch at %#08x, read=%u, calc=%u\n", ofs, je32_to_cpu(rx->totlen), totlen); @@ -339,22 +342,24 @@ static int jffs2_scan_xattr_node(struct jffs2_sb_info *c, struct jffs2_erasebloc return 0; } - xd = jffs2_setup_xattr_datum(c, je32_to_cpu(rx->xid), je32_to_cpu(rx->version)); - if (IS_ERR(xd)) { - if (PTR_ERR(xd) == -EEXIST) { - if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rx->totlen))))) - return err; - return 0; - } + xd = jffs2_setup_xattr_datum(c, xid, version); + if (IS_ERR(xd)) return PTR_ERR(xd); - } - xd->xprefix = rx->xprefix; - xd->name_len = rx->name_len; - xd->value_len = je16_to_cpu(rx->value_len); - xd->data_crc = je32_to_cpu(rx->data_crc); - xd->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL); - /* FIXME */ xd->node->next_in_ino = (void *)xd; + if (xd->version > version) { + struct jffs2_raw_node_ref *raw + = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, NULL); + raw->next_in_ino = xd->node->next_in_ino; + xd->node->next_in_ino = raw; + } else { + xd->version = version; + xd->xprefix = rx->xprefix; + xd->name_len = rx->name_len; + xd->value_len = je16_to_cpu(rx->value_len); + xd->data_crc = je32_to_cpu(rx->data_crc); + + jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, totlen, (void *)xd); + } if (jffs2_sum_active()) jffs2_sum_add_xattr_mem(s, rx, ofs - jeb->offset); @@ -373,9 +378,8 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock crc = crc32(0, rr, sizeof(*rr) - 4); if (crc != je32_to_cpu(rr->node_crc)) { - if (je32_to_cpu(rr->node_crc) != 0xffffffff) - JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", - ofs, je32_to_cpu(rr->node_crc), crc); + JFFS2_WARNING("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + ofs, je32_to_cpu(rr->node_crc), crc); if ((err = jffs2_scan_dirty_space(c, jeb, PAD(je32_to_cpu(rr->totlen))))) return err; return 0; @@ -395,6 +399,7 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock return -ENOMEM; /* BEFORE jffs2_build_xattr_subsystem() called, + * and AFTER xattr_ref is marked as a dead xref, * ref->xid is used to store 32bit xid, xd is not used * ref->ino is used to store 32bit inode-number, ic is not used * Thoes variables are declared as union, thus using those @@ -404,11 +409,13 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock */ ref->ino = je32_to_cpu(rr->ino); ref->xid = je32_to_cpu(rr->xid); + ref->xseqno = je32_to_cpu(rr->xseqno); + if (ref->xseqno > c->highest_xseqno) + c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER); ref->next = c->xref_temp; c->xref_temp = ref; - ref->node = jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), NULL); - /* FIXME */ ref->node->next_in_ino = (void *)ref; + jffs2_link_node_ref(c, jeb, ofs | REF_PRISTINE, PAD(je32_to_cpu(rr->totlen)), (void *)ref); if (jffs2_sum_active()) jffs2_sum_add_xref_mem(s, rr, ofs - jeb->offset); diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index be1acc3..c19bd47 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -5,7 +5,7 @@ * Zoltan Sogor <weth@inf.u-szeged.hu>, * Patrik Kluba <pajko@halom.u-szeged.hu>, * University of Szeged, Hungary - * 2005 KaiGai Kohei <kaigai@ak.jp.nec.com> + * 2006 KaiGai Kohei <kaigai@ak.jp.nec.com> * * For licensing information, see the file 'LICENCE' in this directory. * @@ -310,8 +310,6 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs, #ifdef CONFIG_JFFS2_FS_XATTR case JFFS2_NODETYPE_XATTR: { struct jffs2_sum_xattr_mem *temp; - if (je32_to_cpu(node->x.version) == 0xffffffff) - return 0; temp = kmalloc(sizeof(struct jffs2_sum_xattr_mem), GFP_KERNEL); if (!temp) goto no_mem; @@ -327,10 +325,6 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs, } case JFFS2_NODETYPE_XREF: { struct jffs2_sum_xref_mem *temp; - - if (je32_to_cpu(node->r.ino) == 0xffffffff - && je32_to_cpu(node->r.xid) == 0xffffffff) - return 0; temp = kmalloc(sizeof(struct jffs2_sum_xref_mem), GFP_KERNEL); if (!temp) goto no_mem; @@ -483,22 +477,20 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras xd = jffs2_setup_xattr_datum(c, je32_to_cpu(spx->xid), je32_to_cpu(spx->version)); - if (IS_ERR(xd)) { - if (PTR_ERR(xd) == -EEXIST) { - /* a newer version of xd exists */ - if ((err = jffs2_scan_dirty_space(c, jeb, je32_to_cpu(spx->totlen)))) - return err; - sp += JFFS2_SUMMARY_XATTR_SIZE; - break; - } - JFFS2_NOTICE("allocation of xattr_datum failed\n"); + if (IS_ERR(xd)) return PTR_ERR(xd); + if (xd->version > je32_to_cpu(spx->version)) { + /* node is not the newest one */ + struct jffs2_raw_node_ref *raw + = sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED, + PAD(je32_to_cpu(spx->totlen)), NULL); + raw->next_in_ino = xd->node->next_in_ino; + xd->node->next_in_ino = raw; + } else { + xd->version = je32_to_cpu(spx->version); + sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED, + PAD(je32_to_cpu(spx->totlen)), (void *)xd); } - - xd->node = sum_link_node_ref(c, jeb, je32_to_cpu(spx->offset) | REF_UNCHECKED, - PAD(je32_to_cpu(spx->totlen)), NULL); - /* FIXME */ xd->node->next_in_ino = (void *)xd; - *pseudo_random += je32_to_cpu(spx->xid); sp += JFFS2_SUMMARY_XATTR_SIZE; @@ -519,14 +511,11 @@ static int jffs2_sum_process_sum_data(struct jffs2_sb_info *c, struct jffs2_eras JFFS2_NOTICE("allocation of xattr_datum failed\n"); return -ENOMEM; } - ref->ino = 0xfffffffe; - ref->xid = 0xfffffffd; ref->next = c->xref_temp; c->xref_temp = ref; - ref->node = sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED, - PAD(sizeof(struct jffs2_raw_xref)), NULL); - /* FIXME */ ref->node->next_in_ino = (void *)ref; + sum_link_node_ref(c, jeb, je32_to_cpu(spr->offset) | REF_UNCHECKED, + PAD(sizeof(struct jffs2_raw_xref)), (void *)ref); *pseudo_random += ref->node->flash_offset; sp += JFFS2_SUMMARY_XREF_SIZE; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 2378a66..68e3953 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -11,7 +11,6 @@ * */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 2d82e25..25bc1ae 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -23,18 +23,15 @@ * xattr_datum_hashkey(xprefix, xname, xvalue, xsize) * is used to calcurate xdatum hashkey. The reminder of hashkey into XATTRINDEX_HASHSIZE is * the index of the xattr name/value pair cache (c->xattrindex). + * is_xattr_datum_unchecked(c, xd) + * returns 1, if xdatum contains any unchecked raw nodes. if all raw nodes are not + * unchecked, it returns 0. * unload_xattr_datum(c, xd) * is used to release xattr name/value pair and detach from c->xattrindex. * reclaim_xattr_datum(c) * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold * is hard coded as 32KiB. - * delete_xattr_datum_node(c, xd) - * is used to delete a jffs2 node is dominated by xdatum. When EBS(Erase Block Summary) is - * enabled, it overwrites the obsolete node by myself. - * delete_xattr_datum(c, xd) - * is used to delete jffs2_xattr_datum object. It must be called with 0-value of reference - * counter. (It means how many jffs2_xattr_ref object refers this xdatum.) * do_verify_xattr_datum(c, xd) * is used to load the xdatum informations without name/value pair from the medium. * It's necessary once, because those informations are not collected during mounting @@ -53,8 +50,11 @@ * is used to write xdatum to medium. xd->version will be incremented. * create_xattr_datum(c, xprefix, xname, xvalue, xsize) * is used to create new xdatum and write to medium. + * unrefer_xattr_datum(c, xd) + * is used to delete a xdatum. When nobody refers this xdatum, JFFS2_XFLAGS_DEAD + * is set on xd->flags and chained xattr_dead_list or release it immediately. + * In the first case, the garbage collector release it later. * -------------------------------------------------- */ - static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize) { int name_len = strlen(xname); @@ -62,6 +62,22 @@ static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char * return crc32(xprefix, xname, name_len) ^ crc32(xprefix, xvalue, xsize); } +static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + struct jffs2_raw_node_ref *raw; + int rc = 0; + + spin_lock(&c->erase_completion_lock); + for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) { + if (ref_flags(raw) == REF_UNCHECKED) { + rc = 1; + break; + } + } + spin_unlock(&c->erase_completion_lock); + return rc; +} + static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) { /* must be called under down_write(xattr_sem) */ @@ -107,77 +123,33 @@ static void reclaim_xattr_datum(struct jffs2_sb_info *c) before, c->xdatum_mem_usage, before - c->xdatum_mem_usage); } -static void delete_xattr_datum_node(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) -{ - /* must be called under down_write(xattr_sem) */ - struct jffs2_raw_xattr rx; - size_t length; - int rc; - - if (!xd->node) { - JFFS2_WARNING("xdatum (xid=%u) is removed twice.\n", xd->xid); - return; - } - if (jffs2_sum_active()) { - memset(&rx, 0xff, sizeof(struct jffs2_raw_xattr)); - rc = jffs2_flash_read(c, ref_offset(xd->node), - sizeof(struct jffs2_unknown_node), - &length, (char *)&rx); - if (rc || length != sizeof(struct jffs2_unknown_node)) { - JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n", - rc, sizeof(struct jffs2_unknown_node), - length, ref_offset(xd->node)); - } - rc = jffs2_flash_write(c, ref_offset(xd->node), sizeof(rx), - &length, (char *)&rx); - if (rc || length != sizeof(struct jffs2_raw_xattr)) { - JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu ar %#08x\n", - rc, sizeof(rx), length, ref_offset(xd->node)); - } - } - spin_lock(&c->erase_completion_lock); - xd->node->next_in_ino = NULL; - spin_unlock(&c->erase_completion_lock); - jffs2_mark_node_obsolete(c, xd->node); - xd->node = NULL; -} - -static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) -{ - /* must be called under down_write(xattr_sem) */ - BUG_ON(xd->refcnt); - - unload_xattr_datum(c, xd); - if (xd->node) { - delete_xattr_datum_node(c, xd); - xd->node = NULL; - } - jffs2_free_xattr_datum(xd); -} - static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) { /* must be called under down_write(xattr_sem) */ struct jffs2_eraseblock *jeb; + struct jffs2_raw_node_ref *raw; struct jffs2_raw_xattr rx; size_t readlen; - uint32_t crc, totlen; + uint32_t crc, offset, totlen; int rc; - BUG_ON(!xd->node); - BUG_ON(ref_flags(xd->node) != REF_UNCHECKED); + spin_lock(&c->erase_completion_lock); + offset = ref_offset(xd->node); + if (ref_flags(xd->node) == REF_PRISTINE) + goto complete; + spin_unlock(&c->erase_completion_lock); - rc = jffs2_flash_read(c, ref_offset(xd->node), sizeof(rx), &readlen, (char *)&rx); + rc = jffs2_flash_read(c, offset, sizeof(rx), &readlen, (char *)&rx); if (rc || readlen != sizeof(rx)) { JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n", - rc, sizeof(rx), readlen, ref_offset(xd->node)); + rc, sizeof(rx), readlen, offset); return rc ? rc : -EIO; } crc = crc32(0, &rx, sizeof(rx) - 4); if (crc != je32_to_cpu(rx.node_crc)) { - if (je32_to_cpu(rx.node_crc) != 0xffffffff) - JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", - ref_offset(xd->node), je32_to_cpu(rx.hdr_crc), crc); + JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + offset, je32_to_cpu(rx.hdr_crc), crc); + xd->flags |= JFFS2_XFLAGS_INVALID; return EIO; } totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len)); @@ -188,11 +160,12 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat || je32_to_cpu(rx.version) != xd->version) { JFFS2_ERROR("inconsistent xdatum at %#08x, magic=%#04x/%#04x, " "nodetype=%#04x/%#04x, totlen=%u/%u, xid=%u/%u, version=%u/%u\n", - ref_offset(xd->node), je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK, + offset, je16_to_cpu(rx.magic), JFFS2_MAGIC_BITMASK, je16_to_cpu(rx.nodetype), JFFS2_NODETYPE_XATTR, je32_to_cpu(rx.totlen), totlen, je32_to_cpu(rx.xid), xd->xid, je32_to_cpu(rx.version), xd->version); + xd->flags |= JFFS2_XFLAGS_INVALID; return EIO; } xd->xprefix = rx.xprefix; @@ -200,14 +173,17 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat xd->value_len = je16_to_cpu(rx.value_len); xd->data_crc = je32_to_cpu(rx.data_crc); - /* This JFFS2_NODETYPE_XATTR node is checked */ - jeb = &c->blocks[ref_offset(xd->node) / c->sector_size]; - totlen = PAD(je32_to_cpu(rx.totlen)); - spin_lock(&c->erase_completion_lock); - c->unchecked_size -= totlen; c->used_size += totlen; - jeb->unchecked_size -= totlen; jeb->used_size += totlen; - xd->node->flash_offset = ref_offset(xd->node) | REF_PRISTINE; + complete: + for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) { + jeb = &c->blocks[ref_offset(raw) / c->sector_size]; + totlen = PAD(ref_totlen(c, jeb, raw)); + if (ref_flags(raw) == REF_UNCHECKED) { + c->unchecked_size -= totlen; c->used_size += totlen; + jeb->unchecked_size -= totlen; jeb->used_size += totlen; + } + raw->flash_offset = ref_offset(raw) | ((xd->node==raw) ? REF_PRISTINE : REF_NORMAL); + } spin_unlock(&c->erase_completion_lock); /* unchecked xdatum is chained with c->xattr_unchecked */ @@ -227,7 +203,6 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum uint32_t crc, length; int i, ret, retry = 0; - BUG_ON(!xd->node); BUG_ON(ref_flags(xd->node) != REF_PRISTINE); BUG_ON(!list_empty(&xd->xindex)); retry: @@ -253,6 +228,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum " at %#08x, read: 0x%08x calculated: 0x%08x\n", ref_offset(xd->node), xd->data_crc, crc); kfree(data); + xd->flags |= JFFS2_XFLAGS_INVALID; return EIO; } @@ -286,16 +262,14 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x * rc > 0 : Unrecoverable error, this node should be deleted. */ int rc = 0; - BUG_ON(xd->xname); - if (!xd->node) + + BUG_ON(xd->flags & JFFS2_XFLAGS_DEAD); + if (xd->xname) + return 0; + if (xd->flags & JFFS2_XFLAGS_INVALID) return EIO; - if (unlikely(ref_flags(xd->node) != REF_PRISTINE)) { + if (unlikely(is_xattr_datum_unchecked(c, xd))) rc = do_verify_xattr_datum(c, xd); - if (rc > 0) { - list_del_init(&xd->xindex); - delete_xattr_datum_node(c, xd); - } - } if (!rc) rc = do_load_xattr_datum(c, xd); return rc; @@ -304,7 +278,6 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) { /* must be called under down_write(xattr_sem) */ - struct jffs2_raw_node_ref *raw; struct jffs2_raw_xattr rx; struct kvec vecs[2]; size_t length; @@ -312,14 +285,16 @@ static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x uint32_t phys_ofs = write_ofs(c); BUG_ON(!xd->xname); + BUG_ON(xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID)); vecs[0].iov_base = ℞ - vecs[0].iov_len = PAD(sizeof(rx)); + vecs[0].iov_len = sizeof(rx); vecs[1].iov_base = xd->xname; vecs[1].iov_len = xd->name_len + 1 + xd->value_len; totlen = vecs[0].iov_len + vecs[1].iov_len; /* Setup raw-xattr */ + memset(&rx, 0, sizeof(rx)); rx.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); rx.nodetype = cpu_to_je16(JFFS2_NODETYPE_XATTR); rx.totlen = cpu_to_je32(PAD(totlen)); @@ -343,14 +318,8 @@ static int save_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x return rc; } - /* success */ - raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), NULL); - /* FIXME */ raw->next_in_ino = (void *)xd; - - if (xd->node) - delete_xattr_datum_node(c, xd); - xd->node = raw; + jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(totlen), (void *)xd); dbg_xattr("success on saving xdatum (xid=%u, version=%u, xprefix=%u, xname='%s')\n", xd->xid, xd->version, xd->xprefix, xd->xname); @@ -377,7 +346,7 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c, && xd->value_len==xsize && !strcmp(xd->xname, xname) && !memcmp(xd->xvalue, xvalue, xsize)) { - xd->refcnt++; + atomic_inc(&xd->refcnt); return xd; } } @@ -397,7 +366,7 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c, strcpy(data, xname); memcpy(data + name_len + 1, xvalue, xsize); - xd->refcnt = 1; + atomic_set(&xd->refcnt, 1); xd->xid = ++c->highest_xid; xd->flags |= JFFS2_XFLAGS_HOT; xd->xprefix = xprefix; @@ -426,20 +395,38 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c, return xd; } +static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under down_write(xattr_sem) */ + if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) { + uint32_t xid = xd->xid, version = xd->version; + + unload_xattr_datum(c, xd); + xd->flags |= JFFS2_XFLAGS_DEAD; + if (xd->node == (void *)xd) { + BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID)); + jffs2_free_xattr_datum(xd); + } else { + list_add(&xd->xindex, &c->xattr_dead_list); + } + spin_unlock(&c->erase_completion_lock); + + dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version); + } +} + /* -------- xref related functions ------------------ * verify_xattr_ref(c, ref) * is used to load xref information from medium. Because summary data does not * contain xid/ino, it's necessary to verify once while mounting process. - * delete_xattr_ref_node(c, ref) - * is used to delete a jffs2 node is dominated by xref. When EBS is enabled, - * it overwrites the obsolete node by myself. - * delete_xattr_ref(c, ref) - * is used to delete jffs2_xattr_ref object. If the reference counter of xdatum - * is refered by this xref become 0, delete_xattr_datum() is called later. * save_xattr_ref(c, ref) - * is used to write xref to medium. + * is used to write xref to medium. If delete marker is marked, it write + * a delete marker of xref into medium. * create_xattr_ref(c, ic, xd) * is used to create a new xref and write to medium. + * delete_xattr_ref(c, ref) + * is used to delete jffs2_xattr_ref. It marks xref XREF_DELETE_MARKER, + * and allows GC to reclaim those physical nodes. * jffs2_xattr_delete_inode(c, ic) * is called to remove xrefs related to obsolete inode when inode is unlinked. * jffs2_xattr_free_inode(c, ic) @@ -450,25 +437,29 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c, static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) { struct jffs2_eraseblock *jeb; + struct jffs2_raw_node_ref *raw; struct jffs2_raw_xref rr; size_t readlen; - uint32_t crc, totlen; + uint32_t crc, offset, totlen; int rc; - BUG_ON(ref_flags(ref->node) != REF_UNCHECKED); + spin_lock(&c->erase_completion_lock); + if (ref_flags(ref->node) != REF_UNCHECKED) + goto complete; + offset = ref_offset(ref->node); + spin_unlock(&c->erase_completion_lock); - rc = jffs2_flash_read(c, ref_offset(ref->node), sizeof(rr), &readlen, (char *)&rr); + rc = jffs2_flash_read(c, offset, sizeof(rr), &readlen, (char *)&rr); if (rc || sizeof(rr) != readlen) { JFFS2_WARNING("jffs2_flash_read()=%d, req=%zu, read=%zu, at %#08x\n", - rc, sizeof(rr), readlen, ref_offset(ref->node)); + rc, sizeof(rr), readlen, offset); return rc ? rc : -EIO; } /* obsolete node */ crc = crc32(0, &rr, sizeof(rr) - 4); if (crc != je32_to_cpu(rr.node_crc)) { - if (je32_to_cpu(rr.node_crc) != 0xffffffff) - JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", - ref_offset(ref->node), je32_to_cpu(rr.node_crc), crc); + JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", + offset, je32_to_cpu(rr.node_crc), crc); return EIO; } if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK @@ -476,22 +467,28 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref || je32_to_cpu(rr.totlen) != PAD(sizeof(rr))) { JFFS2_ERROR("inconsistent xref at %#08x, magic=%#04x/%#04x, " "nodetype=%#04x/%#04x, totlen=%u/%zu\n", - ref_offset(ref->node), je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, + offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF, je32_to_cpu(rr.totlen), PAD(sizeof(rr))); return EIO; } ref->ino = je32_to_cpu(rr.ino); ref->xid = je32_to_cpu(rr.xid); - - /* fixup superblock/eraseblock info */ - jeb = &c->blocks[ref_offset(ref->node) / c->sector_size]; - totlen = PAD(sizeof(rr)); + ref->xseqno = je32_to_cpu(rr.xseqno); + if (ref->xseqno > c->highest_xseqno) + c->highest_xseqno = (ref->xseqno & ~XREF_DELETE_MARKER); spin_lock(&c->erase_completion_lock); - c->unchecked_size -= totlen; c->used_size += totlen; - jeb->unchecked_size -= totlen; jeb->used_size += totlen; - ref->node->flash_offset = ref_offset(ref->node) | REF_PRISTINE; + complete: + for (raw=ref->node; raw != (void *)ref; raw=raw->next_in_ino) { + jeb = &c->blocks[ref_offset(raw) / c->sector_size]; + totlen = PAD(ref_totlen(c, jeb, raw)); + if (ref_flags(raw) == REF_UNCHECKED) { + c->unchecked_size -= totlen; c->used_size += totlen; + jeb->unchecked_size -= totlen; jeb->used_size += totlen; + } + raw->flash_offset = ref_offset(raw) | ((ref->node==raw) ? REF_PRISTINE : REF_NORMAL); + } spin_unlock(&c->erase_completion_lock); dbg_xattr("success on verifying xref (ino=%u, xid=%u) at %#08x\n", @@ -499,58 +496,12 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref return 0; } -static void delete_xattr_ref_node(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) -{ - struct jffs2_raw_xref rr; - size_t length; - int rc; - - if (jffs2_sum_active()) { - memset(&rr, 0xff, sizeof(rr)); - rc = jffs2_flash_read(c, ref_offset(ref->node), - sizeof(struct jffs2_unknown_node), - &length, (char *)&rr); - if (rc || length != sizeof(struct jffs2_unknown_node)) { - JFFS2_ERROR("jffs2_flash_read()=%d, req=%zu, read=%zu at %#08x\n", - rc, sizeof(struct jffs2_unknown_node), - length, ref_offset(ref->node)); - } - rc = jffs2_flash_write(c, ref_offset(ref->node), sizeof(rr), - &length, (char *)&rr); - if (rc || length != sizeof(struct jffs2_raw_xref)) { - JFFS2_ERROR("jffs2_flash_write()=%d, req=%zu, wrote=%zu at %#08x\n", - rc, sizeof(rr), length, ref_offset(ref->node)); - } - } - spin_lock(&c->erase_completion_lock); - ref->node->next_in_ino = NULL; - spin_unlock(&c->erase_completion_lock); - jffs2_mark_node_obsolete(c, ref->node); - ref->node = NULL; -} - -static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) -{ - /* must be called under down_write(xattr_sem) */ - struct jffs2_xattr_datum *xd; - - BUG_ON(!ref->node); - delete_xattr_ref_node(c, ref); - - xd = ref->xd; - xd->refcnt--; - if (!xd->refcnt) - delete_xattr_datum(c, xd); - jffs2_free_xattr_ref(ref); -} - static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) { /* must be called under down_write(xattr_sem) */ - struct jffs2_raw_node_ref *raw; struct jffs2_raw_xref rr; size_t length; - uint32_t phys_ofs = write_ofs(c); + uint32_t xseqno, phys_ofs = write_ofs(c); int ret; rr.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -558,8 +509,16 @@ static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) rr.totlen = cpu_to_je32(PAD(sizeof(rr))); rr.hdr_crc = cpu_to_je32(crc32(0, &rr, sizeof(struct jffs2_unknown_node) - 4)); - rr.ino = cpu_to_je32(ref->ic->ino); - rr.xid = cpu_to_je32(ref->xd->xid); + xseqno = (c->highest_xseqno += 2); + if (is_xattr_ref_dead(ref)) { + xseqno |= XREF_DELETE_MARKER; + rr.ino = cpu_to_je32(ref->ino); + rr.xid = cpu_to_je32(ref->xid); + } else { + rr.ino = cpu_to_je32(ref->ic->ino); + rr.xid = cpu_to_je32(ref->xd->xid); + } + rr.xseqno = cpu_to_je32(xseqno); rr.node_crc = cpu_to_je32(crc32(0, &rr, sizeof(rr) - 4)); ret = jffs2_flash_write(c, phys_ofs, sizeof(rr), &length, (char *)&rr); @@ -572,12 +531,9 @@ static int save_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) return ret; } - - raw = jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), NULL); - /* FIXME */ raw->next_in_ino = (void *)ref; - if (ref->node) - delete_xattr_ref_node(c, ref); - ref->node = raw; + /* success */ + ref->xseqno = xseqno; + jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, PAD(sizeof(rr)), (void *)ref); dbg_xattr("success on saving xref (ino=%u, xid=%u)\n", ref->ic->ino, ref->xd->xid); @@ -610,6 +566,26 @@ static struct jffs2_xattr_ref *create_xattr_ref(struct jffs2_sb_info *c, struct return ref; /* success */ } +static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +{ + /* must be called under down_write(xattr_sem) */ + struct jffs2_xattr_datum *xd; + + xd = ref->xd; + ref->xseqno |= XREF_DELETE_MARKER; + ref->ino = ref->ic->ino; + ref->xid = ref->xd->xid; + spin_lock(&c->erase_completion_lock); + ref->next = c->xref_dead_list; + c->xref_dead_list = ref; + spin_unlock(&c->erase_completion_lock); + + dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n", + ref->ino, ref->xid, ref->xseqno); + + unrefer_xattr_datum(c, xd); +} + void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) { /* It's called from jffs2_clear_inode() on inode removing. @@ -638,8 +614,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i for (ref = ic->xref; ref; ref = _ref) { _ref = ref->next; xd = ref->xd; - xd->refcnt--; - if (!xd->refcnt) { + if (atomic_dec_and_test(&xd->refcnt)) { unload_xattr_datum(c, xd); jffs2_free_xattr_datum(xd); } @@ -655,7 +630,7 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac * duplicate name/value pairs. If duplicate name/value pair would be found, * one will be removed. */ - struct jffs2_xattr_ref *ref, *cmp, **pref; + struct jffs2_xattr_ref *ref, *cmp, **pref, **pcmp; int rc = 0; if (likely(ic->flags & INO_FLAGS_XATTR_CHECKED)) @@ -673,13 +648,13 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac } else if (unlikely(rc < 0)) goto out; } - for (cmp=ref->next, pref=&ref->next; cmp; pref=&cmp->next, cmp=cmp->next) { + for (cmp=ref->next, pcmp=&ref->next; cmp; pcmp=&cmp->next, cmp=cmp->next) { if (!cmp->xd->xname) { ref->xd->flags |= JFFS2_XFLAGS_BIND; rc = load_xattr_datum(c, cmp->xd); ref->xd->flags &= ~JFFS2_XFLAGS_BIND; if (unlikely(rc > 0)) { - *pref = cmp->next; + *pcmp = cmp->next; delete_xattr_ref(c, cmp); goto retry; } else if (unlikely(rc < 0)) @@ -687,8 +662,13 @@ static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cac } if (ref->xd->xprefix == cmp->xd->xprefix && !strcmp(ref->xd->xname, cmp->xd->xname)) { - *pref = cmp->next; - delete_xattr_ref(c, cmp); + if (ref->xseqno > cmp->xseqno) { + *pcmp = cmp->next; + delete_xattr_ref(c, cmp); + } else { + *pref = ref->next; + delete_xattr_ref(c, ref); + } goto retry; } } @@ -719,9 +699,13 @@ void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c) for (i=0; i < XATTRINDEX_HASHSIZE; i++) INIT_LIST_HEAD(&c->xattrindex[i]); INIT_LIST_HEAD(&c->xattr_unchecked); + INIT_LIST_HEAD(&c->xattr_dead_list); + c->xref_dead_list = NULL; c->xref_temp = NULL; init_rwsem(&c->xattr_sem); + c->highest_xid = 0; + c->highest_xseqno = 0; c->xdatum_mem_usage = 0; c->xdatum_mem_threshold = 32 * 1024; /* Default 32KB */ } @@ -751,7 +735,11 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) _ref = ref->next; jffs2_free_xattr_ref(ref); } - c->xref_temp = NULL; + + for (ref=c->xref_dead_list; ref; ref = _ref) { + _ref = ref->next; + jffs2_free_xattr_ref(ref); + } for (i=0; i < XATTRINDEX_HASHSIZE; i++) { list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) { @@ -761,100 +749,143 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) jffs2_free_xattr_datum(xd); } } + + list_for_each_entry_safe(xd, _xd, &c->xattr_dead_list, xindex) { + list_del(&xd->xindex); + jffs2_free_xattr_datum(xd); + } } +#define XREF_TMPHASH_SIZE (128) void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) { struct jffs2_xattr_ref *ref, *_ref; + struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE]; struct jffs2_xattr_datum *xd, *_xd; struct jffs2_inode_cache *ic; - int i, xdatum_count =0, xdatum_unchecked_count = 0, xref_count = 0; + struct jffs2_raw_node_ref *raw; + int i, xdatum_count = 0, xdatum_unchecked_count = 0, xref_count = 0; + int xdatum_orphan_count = 0, xref_orphan_count = 0, xref_dead_count = 0; BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING)); - /* Phase.1 */ + /* Phase.1 : Merge same xref */ + for (i=0; i < XREF_TMPHASH_SIZE; i++) + xref_tmphash[i] = NULL; for (ref=c->xref_temp; ref; ref=_ref) { + struct jffs2_xattr_ref *tmp; + _ref = ref->next; - /* checking REF_UNCHECKED nodes */ if (ref_flags(ref->node) != REF_PRISTINE) { if (verify_xattr_ref(c, ref)) { - delete_xattr_ref_node(c, ref); + BUG_ON(ref->node->next_in_ino != (void *)ref); + ref->node->next_in_ino = NULL; + jffs2_mark_node_obsolete(c, ref->node); jffs2_free_xattr_ref(ref); continue; } } - /* At this point, ref->xid and ref->ino contain XID and inode number. - ref->xd and ref->ic are not valid yet. */ - xd = jffs2_find_xattr_datum(c, ref->xid); - ic = jffs2_get_ino_cache(c, ref->ino); - if (!xd || !ic) { - if (ref_flags(ref->node) != REF_UNCHECKED) - JFFS2_WARNING("xref(ino=%u, xid=%u) is orphan. \n", - ref->ino, ref->xid); - delete_xattr_ref_node(c, ref); + + i = (ref->ino ^ ref->xid) % XREF_TMPHASH_SIZE; + for (tmp=xref_tmphash[i]; tmp; tmp=tmp->next) { + if (tmp->ino == ref->ino && tmp->xid == ref->xid) + break; + } + if (tmp) { + raw = ref->node; + if (ref->xseqno > tmp->xseqno) { + tmp->xseqno = ref->xseqno; + raw->next_in_ino = tmp->node; + tmp->node = raw; + } else { + raw->next_in_ino = tmp->node->next_in_ino; + tmp->node->next_in_ino = raw; + } jffs2_free_xattr_ref(ref); continue; + } else { + ref->next = xref_tmphash[i]; + xref_tmphash[i] = ref; } - ref->xd = xd; - ref->ic = ic; - xd->refcnt++; - ref->next = ic->xref; - ic->xref = ref; - xref_count++; } c->xref_temp = NULL; - /* After this, ref->xid/ino are NEVER used. */ - /* Phase.2 */ + /* Phase.2 : Bind xref with inode_cache and xattr_datum */ + for (i=0; i < XREF_TMPHASH_SIZE; i++) { + for (ref=xref_tmphash[i]; ref; ref=_ref) { + xref_count++; + _ref = ref->next; + if (is_xattr_ref_dead(ref)) { + ref->next = c->xref_dead_list; + c->xref_dead_list = ref; + xref_dead_count++; + continue; + } + /* At this point, ref->xid and ref->ino contain XID and inode number. + ref->xd and ref->ic are not valid yet. */ + xd = jffs2_find_xattr_datum(c, ref->xid); + ic = jffs2_get_ino_cache(c, ref->ino); + if (!xd || !ic) { + dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", + ref->ino, ref->xid, ref->xseqno); + ref->xseqno |= XREF_DELETE_MARKER; + ref->next = c->xref_dead_list; + c->xref_dead_list = ref; + xref_orphan_count++; + continue; + } + ref->xd = xd; + ref->ic = ic; + atomic_inc(&xd->refcnt); + ref->next = ic->xref; + ic->xref = ref; + } + } + + /* Phase.3 : Link unchecked xdatum to xattr_unchecked list */ for (i=0; i < XATTRINDEX_HASHSIZE; i++) { list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) { + xdatum_count++; list_del_init(&xd->xindex); - if (!xd->refcnt) { - if (ref_flags(xd->node) != REF_UNCHECKED) - JFFS2_WARNING("orphan xdatum(xid=%u, version=%u) at %#08x\n", - xd->xid, xd->version, ref_offset(xd->node)); - delete_xattr_datum(c, xd); + if (!atomic_read(&xd->refcnt)) { + dbg_xattr("xdatum(xid=%u, version=%u) is orphan.\n", + xd->xid, xd->version); + xd->flags |= JFFS2_XFLAGS_DEAD; + list_add(&xd->xindex, &c->xattr_unchecked); + xdatum_orphan_count++; continue; } - if (ref_flags(xd->node) != REF_PRISTINE) { - dbg_xattr("unchecked xdatum(xid=%u) at %#08x\n", - xd->xid, ref_offset(xd->node)); + if (is_xattr_datum_unchecked(c, xd)) { + dbg_xattr("unchecked xdatum(xid=%u, version=%u)\n", + xd->xid, xd->version); list_add(&xd->xindex, &c->xattr_unchecked); xdatum_unchecked_count++; } - xdatum_count++; } } /* build complete */ - JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum (%u unchecked) and " - "%u of xref found.\n", xdatum_count, xdatum_unchecked_count, xref_count); + JFFS2_NOTICE("complete building xattr subsystem, %u of xdatum" + " (%u unchecked, %u orphan) and " + "%u of xref (%u dead, %u orphan) found.\n", + xdatum_count, xdatum_unchecked_count, xdatum_orphan_count, + xref_count, xref_dead_count, xref_orphan_count); } struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, uint32_t xid, uint32_t version) { - struct jffs2_xattr_datum *xd, *_xd; + struct jffs2_xattr_datum *xd; - _xd = jffs2_find_xattr_datum(c, xid); - if (_xd) { - dbg_xattr("duplicate xdatum (xid=%u, version=%u/%u) at %#08x\n", - xid, version, _xd->version, ref_offset(_xd->node)); - if (version < _xd->version) - return ERR_PTR(-EEXIST); - } - xd = jffs2_alloc_xattr_datum(); - if (!xd) - return ERR_PTR(-ENOMEM); - xd->xid = xid; - xd->version = version; - if (xd->xid > c->highest_xid) - c->highest_xid = xd->xid; - list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]); - - if (_xd) { - list_del_init(&_xd->xindex); - delete_xattr_datum_node(c, _xd); - jffs2_free_xattr_datum(_xd); + xd = jffs2_find_xattr_datum(c, xid); + if (!xd) { + xd = jffs2_alloc_xattr_datum(); + if (!xd) + return ERR_PTR(-ENOMEM); + xd->xid = xid; + xd->version = version; + if (xd->xid > c->highest_xid) + c->highest_xid = xd->xid; + list_add_tail(&xd->xindex, &c->xattrindex[xid % XATTRINDEX_HASHSIZE]); } return xd; } @@ -1080,9 +1111,22 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, goto out; } if (!buffer) { - *pref = ref->next; - delete_xattr_ref(c, ref); - rc = 0; + ref->ino = ic->ino; + ref->xid = xd->xid; + ref->xseqno |= XREF_DELETE_MARKER; + rc = save_xattr_ref(c, ref); + if (!rc) { + *pref = ref->next; + spin_lock(&c->erase_completion_lock); + ref->next = c->xref_dead_list; + c->xref_dead_list = ref; + spin_unlock(&c->erase_completion_lock); + unrefer_xattr_datum(c, xd); + } else { + ref->ic = ic; + ref->xd = xd; + ref->xseqno &= ~XREF_DELETE_MARKER; + } goto out; } goto found; @@ -1094,7 +1138,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, goto out; } if (!buffer) { - rc = -EINVAL; + rc = -ENODATA; goto out; } found: @@ -1110,16 +1154,13 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, request = PAD(sizeof(struct jffs2_raw_xref)); rc = jffs2_reserve_space(c, request, &length, ALLOC_NORMAL, JFFS2_SUMMARY_XREF_SIZE); + down_write(&c->xattr_sem); if (rc) { JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); - down_write(&c->xattr_sem); - xd->refcnt--; - if (!xd->refcnt) - delete_xattr_datum(c, xd); + unrefer_xattr_datum(c, xd); up_write(&c->xattr_sem); return rc; } - down_write(&c->xattr_sem); if (ref) *pref = ref->next; newref = create_xattr_ref(c, ic, xd); @@ -1129,9 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, ic->xref = ref; } rc = PTR_ERR(newref); - xd->refcnt--; - if (!xd->refcnt) - delete_xattr_datum(c, xd); + unrefer_xattr_datum(c, xd); } else if (ref) { delete_xattr_ref(c, ref); } @@ -1142,38 +1181,40 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, } /* -------- garbage collector functions ------------- - * jffs2_garbage_collect_xattr_datum(c, xd) + * jffs2_garbage_collect_xattr_datum(c, xd, raw) * is used to move xdatum into new node. - * jffs2_garbage_collect_xattr_ref(c, ref) + * jffs2_garbage_collect_xattr_ref(c, ref, raw) * is used to move xref into new node. * jffs2_verify_xattr(c) * is used to call do_verify_xattr_datum() before garbage collecting. + * jffs2_release_xattr_datum(c, xd) + * is used to release an in-memory object of xdatum. + * jffs2_release_xattr_ref(c, ref) + * is used to release an in-memory object of xref. * -------------------------------------------------- */ -int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd, + struct jffs2_raw_node_ref *raw) { uint32_t totlen, length, old_ofs; - int rc = -EINVAL; + int rc = 0; down_write(&c->xattr_sem); - BUG_ON(!xd->node); - - old_ofs = ref_offset(xd->node); - totlen = ref_totlen(c, c->gcblock, xd->node); - if (totlen < sizeof(struct jffs2_raw_xattr)) + if (xd->node != raw) + goto out; + if (xd->flags & (JFFS2_XFLAGS_DEAD|JFFS2_XFLAGS_INVALID)) goto out; - if (!xd->xname) { - rc = load_xattr_datum(c, xd); - if (unlikely(rc > 0)) { - delete_xattr_datum_node(c, xd); - rc = 0; - goto out; - } else if (unlikely(rc < 0)) - goto out; + rc = load_xattr_datum(c, xd); + if (unlikely(rc)) { + rc = (rc > 0) ? 0 : rc; + goto out; } + old_ofs = ref_offset(xd->node); + totlen = PAD(sizeof(struct jffs2_raw_xattr) + + xd->name_len + 1 + xd->value_len); rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE); - if (rc || length < totlen) { - JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, totlen); + if (rc) { + JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen); rc = rc ? rc : -EBADFD; goto out; } @@ -1182,27 +1223,32 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt dbg_xattr("xdatum (xid=%u, version=%u) GC'ed from %#08x to %08x\n", xd->xid, xd->version, old_ofs, ref_offset(xd->node)); out: + if (!rc) + jffs2_mark_node_obsolete(c, raw); up_write(&c->xattr_sem); return rc; } - -int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref, + struct jffs2_raw_node_ref *raw) { uint32_t totlen, length, old_ofs; - int rc = -EINVAL; + int rc = 0; down_write(&c->xattr_sem); BUG_ON(!ref->node); + if (ref->node != raw) + goto out; + if (is_xattr_ref_dead(ref) && (raw->next_in_ino == (void *)ref)) + goto out; + old_ofs = ref_offset(ref->node); totlen = ref_totlen(c, c->gcblock, ref->node); - if (totlen != sizeof(struct jffs2_raw_xref)) - goto out; rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); - if (rc || length < totlen) { - JFFS2_WARNING("%s: jffs2_reserve_space() = %d, request = %u\n", + if (rc) { + JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", __FUNCTION__, rc, totlen); rc = rc ? rc : -EBADFD; goto out; @@ -1212,6 +1258,8 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ dbg_xattr("xref (ino=%u, xid=%u) GC'ed from %#08x to %08x\n", ref->ic->ino, ref->xd->xid, old_ofs, ref_offset(ref->node)); out: + if (!rc) + jffs2_mark_node_obsolete(c, raw); up_write(&c->xattr_sem); return rc; } @@ -1219,20 +1267,59 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ int jffs2_verify_xattr(struct jffs2_sb_info *c) { struct jffs2_xattr_datum *xd, *_xd; + struct jffs2_eraseblock *jeb; + struct jffs2_raw_node_ref *raw; + uint32_t totlen; int rc; down_write(&c->xattr_sem); list_for_each_entry_safe(xd, _xd, &c->xattr_unchecked, xindex) { rc = do_verify_xattr_datum(c, xd); - if (rc == 0) { - list_del_init(&xd->xindex); - break; - } else if (rc > 0) { - list_del_init(&xd->xindex); - delete_xattr_datum_node(c, xd); + if (rc < 0) + continue; + list_del_init(&xd->xindex); + spin_lock(&c->erase_completion_lock); + for (raw=xd->node; raw != (void *)xd; raw=raw->next_in_ino) { + if (ref_flags(raw) != REF_UNCHECKED) + continue; + jeb = &c->blocks[ref_offset(raw) / c->sector_size]; + totlen = PAD(ref_totlen(c, jeb, raw)); + c->unchecked_size -= totlen; c->used_size += totlen; + jeb->unchecked_size -= totlen; jeb->used_size += totlen; + raw->flash_offset = ref_offset(raw) + | ((xd->node == (void *)raw) ? REF_PRISTINE : REF_NORMAL); } + if (xd->flags & JFFS2_XFLAGS_DEAD) + list_add(&xd->xindex, &c->xattr_dead_list); + spin_unlock(&c->erase_completion_lock); } up_write(&c->xattr_sem); - return list_empty(&c->xattr_unchecked) ? 1 : 0; } + +void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +{ + /* must be called under spin_lock(&c->erase_completion_lock) */ + if (atomic_read(&xd->refcnt) || xd->node != (void *)xd) + return; + + list_del(&xd->xindex); + jffs2_free_xattr_datum(xd); +} + +void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref) +{ + /* must be called under spin_lock(&c->erase_completion_lock) */ + struct jffs2_xattr_ref *tmp, **ptmp; + + if (ref->node != (void *)ref) + return; + + for (tmp=c->xref_dead_list, ptmp=&c->xref_dead_list; tmp; ptmp=&tmp->next, tmp=tmp->next) { + if (ref == tmp) { + *ptmp = tmp->next; + break; + } + } + jffs2_free_xattr_ref(ref); +} diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h index 2c19985..06a5c69 100644 --- a/fs/jffs2/xattr.h +++ b/fs/jffs2/xattr.h @@ -16,6 +16,8 @@ #define JFFS2_XFLAGS_HOT (0x01) /* This datum is HOT */ #define JFFS2_XFLAGS_BIND (0x02) /* This datum is not reclaimed */ +#define JFFS2_XFLAGS_DEAD (0x40) /* This datum is already dead */ +#define JFFS2_XFLAGS_INVALID (0x80) /* This datum contains crc error */ struct jffs2_xattr_datum { @@ -23,10 +25,10 @@ struct jffs2_xattr_datum struct jffs2_raw_node_ref *node; uint8_t class; uint8_t flags; - uint16_t xprefix; /* see JFFS2_XATTR_PREFIX_* */ + uint16_t xprefix; /* see JFFS2_XATTR_PREFIX_* */ struct list_head xindex; /* chained from c->xattrindex[n] */ - uint32_t refcnt; /* # of xattr_ref refers this */ + atomic_t refcnt; /* # of xattr_ref refers this */ uint32_t xid; uint32_t version; @@ -47,6 +49,7 @@ struct jffs2_xattr_ref uint8_t flags; /* Currently unused */ u16 unused; + uint32_t xseqno; union { struct jffs2_inode_cache *ic; /* reference to jffs2_inode_cache */ uint32_t ino; /* only used in scanning/building */ @@ -58,6 +61,12 @@ struct jffs2_xattr_ref struct jffs2_xattr_ref *next; /* chained from ic->xref_list */ }; +#define XREF_DELETE_MARKER (0x00000001) +static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref) +{ + return ((ref->xseqno & XREF_DELETE_MARKER) != 0); +} + #ifdef CONFIG_JFFS2_FS_XATTR extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c); @@ -70,9 +79,13 @@ extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c extern void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); extern void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic); -extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd); -extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref); +extern int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd, + struct jffs2_raw_node_ref *raw); +extern int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref, + struct jffs2_raw_node_ref *raw); extern int jffs2_verify_xattr(struct jffs2_sb_info *c); +extern void jffs2_release_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd); +extern void jffs2_release_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref); extern int do_jffs2_getxattr(struct inode *inode, int xprefix, const char *xname, char *buffer, size_t size); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 04eb78f..43e3f56 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -305,7 +305,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, jfs_get_block, NULL); } -struct address_space_operations jfs_aops = { +const struct address_space_operations jfs_aops = { .readpage = jfs_readpage, .readpages = jfs_readpages, .writepage = jfs_writepage, diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index c300726..b5c7da6 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -33,7 +33,7 @@ extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); extern void jfs_set_inode_flags(struct inode *); -extern struct address_space_operations jfs_aops; +extern const struct address_space_operations jfs_aops; extern struct inode_operations jfs_dir_inode_operations; extern const struct file_operations jfs_dir_operations; extern struct inode_operations jfs_file_inode_operations; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 7f6e880..e1e0a6e 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -577,7 +577,7 @@ static void metapage_invalidatepage(struct page *page, unsigned long offset) metapage_releasepage(page, 0); } -struct address_space_operations jfs_metapage_aops = { +const struct address_space_operations jfs_metapage_aops = { .readpage = metapage_readpage, .writepage = metapage_writepage, .sync_page = block_sync_page, diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index f0b7d32..d17a329 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -139,7 +139,7 @@ static inline void metapage_homeok(struct metapage *mp) put_metapage(mp); } -extern struct address_space_operations jfs_metapage_aops; +extern const struct address_space_operations jfs_metapage_aops; /* * This routines invalidate all pages for an extent. diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index ac3d669..10c4623 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -842,7 +842,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, TXN_UNLOCK(); release_metapage(mp); TXN_LOCK(); - xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ + xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 73d2aba..4f6cfeb 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -18,7 +18,6 @@ */ #include <linux/fs.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/parser.h> #include <linux/completion.h> diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 4db6209..89ba0df 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -6,7 +6,6 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> @@ -455,7 +454,7 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho fl->fl_ops = &nlmclnt_lock_ops; } -static void do_vfs_lock(struct file_lock *fl) +static int do_vfs_lock(struct file_lock *fl) { int res = 0; switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { @@ -468,9 +467,7 @@ static void do_vfs_lock(struct file_lock *fl) default: BUG(); } - if (res < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", - __FUNCTION__); + return res; } /* @@ -499,6 +496,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; struct nlm_wait *block = NULL; + unsigned char fl_flags = fl->fl_flags; int status = -ENOLCK; if (!host->h_monitored && nsm_monitor(host) < 0) { @@ -506,6 +504,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) host->h_name); goto out; } + fl->fl_flags |= FL_ACCESS; + status = do_vfs_lock(fl); + if (status < 0) + goto out; block = nlmclnt_prepare_block(host, fl); again: @@ -540,9 +542,10 @@ again: up_read(&host->h_rwsem); goto again; } - fl->fl_flags |= FL_SLEEP; /* Ensure the resulting lock will get added to granted list */ - do_vfs_lock(fl); + fl->fl_flags = fl_flags | FL_SLEEP; + if (do_vfs_lock(fl) < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); up_read(&host->h_rwsem); } status = nlm_stat_to_errno(resp->status); @@ -553,6 +556,7 @@ out_unblock: nlmclnt_cancel(host, req->a_args.block, fl); out: nlm_release_call(req); + fl->fl_flags = fl_flags; return status; } @@ -607,15 +611,19 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - int status; + int status = 0; /* * Note: the server is supposed to either grant us the unlock * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either * case, we want to unlock. */ + fl->fl_flags |= FL_EXISTS; down_read(&host->h_rwsem); - do_vfs_lock(fl); + if (do_vfs_lock(fl) == -ENOENT) { + up_read(&host->h_rwsem); + goto out; + } up_read(&host->h_rwsem); if (req->a_flags & RPC_TASK_ASYNC) @@ -625,7 +633,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) if (status < 0) goto out; - status = 0; if (resp->status == NLM_LCK_GRANTED) goto out; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index fd56c88..9a991b5 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -12,7 +12,6 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> #include <linux/sysctl.h> diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 3ef7391..baf5ae5 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -20,7 +20,6 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/kernel.h> diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index d210cf3..dbb66a3 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -7,7 +7,6 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/time.h> #include <linux/slab.h> diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index a570e5c..2a4df9b 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -6,7 +6,6 @@ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/string.h> #include <linux/time.h> diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index f22a376..033ea4a 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -6,7 +6,6 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/utsname.h> @@ -725,6 +725,10 @@ next_task: /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks * at the head of the list, but that's secret knowledge known only to * flock_lock_file and posix_lock_file. + * + * Note that if called with an FL_EXISTS argument, the caller may determine + * whether or not a lock was successfully freed by testing the return + * value for -ENOENT. */ static int flock_lock_file(struct file *filp, struct file_lock *request) { @@ -735,6 +739,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) int found = 0; lock_kernel(); + if (request->fl_flags & FL_ACCESS) + goto find_conflict; for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) @@ -750,8 +756,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) break; } - if (request->fl_type == F_UNLCK) + if (request->fl_type == F_UNLCK) { + if ((request->fl_flags & FL_EXISTS) && !found) + error = -ENOENT; goto out; + } error = -ENOMEM; new_fl = locks_alloc_lock(); @@ -764,6 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) if (found) cond_resched(); +find_conflict: for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) @@ -777,6 +787,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) locks_insert_block(fl, request); goto out; } + if (request->fl_flags & FL_ACCESS) + goto out; locks_copy_lock(new_fl, request); locks_insert_lock(&inode->i_flock, new_fl); new_fl = NULL; @@ -948,8 +960,11 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request error = 0; if (!added) { - if (request->fl_type == F_UNLCK) + if (request->fl_type == F_UNLCK) { + if (request->fl_flags & FL_EXISTS) + error = -ENOENT; goto out; + } if (!new_fl) { error = -ENOLCK; @@ -996,6 +1011,10 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request * Add a POSIX style lock to a file. * We merge adjacent & overlapping locks whenever possible. * POSIX locks are sorted by owner task, then by starting address + * + * Note that if called with an FL_EXISTS argument, the caller may determine + * whether or not a lock was successfully freed by testing the return + * value for -ENOENT. */ int posix_lock_file(struct file *filp, struct file_lock *fl) { diff --git a/fs/minix/inode.c b/fs/minix/inode.c index a6fb509..9ea91c5 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -335,7 +335,7 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,minix_get_block); } -static struct address_space_operations minix_aops = { +static const struct address_space_operations minix_aops = { .readpage = minix_readpage, .writepage = minix_writepage, .sync_page = block_sync_page, @@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) struct dentry *p; if (p1 == p2) { - mutex_lock(&p1->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); return NULL; } @@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) for (p = p1; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p2) { - mutex_lock(&p2->d_inode->i_mutex); - mutex_lock(&p1->d_inode->i_mutex); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); return p; } } for (p = p2; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p1) { - mutex_lock(&p1->d_inode->i_mutex); - mutex_lock(&p2->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); return p; } } - mutex_lock(&p1->d_inode->i_mutex); - mutex_lock(&p2->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); return NULL; } @@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { struct dentry *dentry = ERR_PTR(-EEXIST); - mutex_lock(&nd->dentry->d_inode->i_mutex); + mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) @@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname) error = -EBUSY; goto exit1; } - mutex_lock(&nd.dentry->d_inode->i_mutex); + mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; - mutex_lock(&nd.dentry->d_inode->i_mutex); + mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { diff --git a/fs/namespace.c b/fs/namespace.c index b3ed212..fa7ed6a9f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -8,7 +8,6 @@ * Heavily rewritten. */ -#include <linux/config.h> #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/sched.h> diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index f0860c6..b4ee892 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -10,7 +10,6 @@ * */ -#include <linux/config.h> #include <linux/time.h> #include <linux/errno.h> diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 90d2ea2..1ddf77b 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -9,7 +9,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <asm/system.h> @@ -105,7 +104,7 @@ static struct super_operations ncp_sops = extern struct dentry_operations ncp_root_dentry_operations; #if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) -extern struct address_space_operations ncp_symlink_aops; +extern const struct address_space_operations ncp_symlink_aops; extern int ncp_symlink(struct inode*, struct dentry*, const char*); #endif diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index eb3813a..42039fe 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -7,7 +7,6 @@ * */ -#include <linux/config.h> #include <asm/uaccess.h> #include <linux/capability.h> diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 52d60c3..e7d5a30 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -93,7 +93,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, */ if (type) *type = VM_FAULT_MAJOR; - inc_page_state(pgmajfault); + count_vm_event(PGMAJFAULT); return page; } diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index d9ebf64..551e0ba 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -10,7 +10,6 @@ */ -#include <linux/config.h> #include "ncplib_kernel.h" diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 799e5c2..2441d1a 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -12,7 +12,6 @@ #ifndef _NCPLIB_H #define _NCPLIB_H -#include <linux/config.h> #include <linux/fs.h> #include <linux/types.h> diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c index a6ec90c..749a18d 100644 --- a/fs/ncpfs/ncpsign_kernel.c +++ b/fs/ncpfs/ncpsign_kernel.c @@ -5,7 +5,6 @@ * */ -#include <linux/config.h> #ifdef CONFIG_NCPFS_PACKET_SIGNING diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index 8783eb7..11c2b25 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c @@ -8,7 +8,6 @@ * */ -#include <linux/config.h> #include <linux/time.h> #include <linux/errno.h> diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c index e935f1b..ca92c24 100644 --- a/fs/ncpfs/symlink.c +++ b/fs/ncpfs/symlink.c @@ -20,7 +20,6 @@ * */ -#include <linux/config.h> #include <asm/uaccess.h> @@ -99,7 +98,7 @@ fail: /* * symlinks can't do much... */ -struct address_space_operations ncp_symlink_aops = { +const struct address_space_operations ncp_symlink_aops = { .readpage = ncp_symlink_readpage, }; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index d53f8c6..fe0a6b8 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -6,7 +6,6 @@ * NFSv4 callback handling */ -#include <linux/config.h> #include <linux/completion.h> #include <linux/ip.h> #include <linux/module.h> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 462cfce..7719483 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -5,7 +5,6 @@ * * NFSv4 callback procedures */ -#include <linux/config.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include "nfs4_fs.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c929913..29f9321 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -5,7 +5,6 @@ * * NFSv4 callback encode/decode procedures */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d3be923..9540a31 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -6,7 +6,6 @@ * NFS file delegation management * */ -#include <linux/config.h> #include <linux/completion.h> #include <linux/kthread.h> #include <linux/module.h> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3ddda6f..e7ffb4d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -690,7 +690,9 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) goto out_force; /* This is an open(2) */ if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO)) + !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || + S_ISDIR(inode->i_mode))) goto out_force; } return nfs_revalidate_inode(server, inode); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8ca9707..fecd3b0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -38,7 +38,6 @@ * */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -68,25 +67,19 @@ struct nfs_direct_req { struct kref kref; /* release manager */ /* I/O parameters */ - struct list_head list, /* nfs_read/write_data structs */ - rewrite_list; /* saved nfs_write_data structs */ struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ - unsigned long user_addr; /* location of user's buffer */ - size_t user_count; /* total bytes to move */ - loff_t pos; /* starting offset in file */ - struct page ** pages; /* pages in our buffer */ - unsigned int npages; /* count of pages */ /* completion state */ + atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ - int outstanding; /* i/os we're waiting for */ ssize_t count, /* bytes actually processed */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ /* commit state */ + struct list_head rewrite_list; /* saved nfs_write_data structs */ struct nfs_write_data * commit_data; /* special write_data for commits */ int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ @@ -94,8 +87,37 @@ struct nfs_direct_req { struct nfs_writeverf verf; /* unstable write verifier */ }; -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); +static const struct rpc_call_ops nfs_write_direct_ops; + +static inline void get_dreq(struct nfs_direct_req *dreq) +{ + atomic_inc(&dreq->io_count); +} + +static inline int put_dreq(struct nfs_direct_req *dreq) +{ + return atomic_dec_and_test(&dreq->io_count); +} + +/* + * "size" is never larger than rsize or wsize. + */ +static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) +{ + int page_count; + + page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; + page_count -= user_addr >> PAGE_SHIFT; + BUG_ON(page_count < 0); + + return page_count; +} + +static inline unsigned int nfs_max_pages(unsigned int size) +{ + return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +} /** * nfs_direct_IO - NFS address space operation for direct I/O @@ -119,50 +141,21 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) +static void nfs_direct_dirty_pages(struct page **pages, int npages) { int i; for (i = 0; i < npages; i++) { struct page *page = pages[i]; - if (do_dirty && !PageCompound(page)) + if (!PageCompound(page)) set_page_dirty_lock(page); - page_cache_release(page); } - kfree(pages); } -static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) +static void nfs_direct_release_pages(struct page **pages, int npages) { - int result = -ENOMEM; - unsigned long page_count; - size_t array_size; - - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; - page_count -= user_addr >> PAGE_SHIFT; - - array_size = (page_count * sizeof(struct page *)); - *pages = kmalloc(array_size, GFP_KERNEL); - if (*pages) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - page_count, (rw == READ), 0, - *pages, NULL); - up_read(¤t->mm->mmap_sem); - if (result != page_count) { - /* - * If we got fewer pages than expected from - * get_user_pages(), the user buffer runs off the - * end of a mapping; return EFAULT. - */ - if (result >= 0) { - nfs_free_user_pages(*pages, result, 0); - result = -EFAULT; - } else - kfree(*pages); - *pages = NULL; - } - } - return result; + int i; + for (i = 0; i < npages; i++) + page_cache_release(pages[i]); } static inline struct nfs_direct_req *nfs_direct_req_alloc(void) @@ -174,13 +167,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) return NULL; kref_init(&dreq->kref); + kref_get(&dreq->kref); init_completion(&dreq->completion); - INIT_LIST_HEAD(&dreq->list); INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; dreq->ctx = NULL; spin_lock_init(&dreq->lock); - dreq->outstanding = 0; + atomic_set(&dreq->io_count, 0); dreq->count = 0; dreq->error = 0; dreq->flags = 0; @@ -221,18 +214,11 @@ out: } /* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - * - * In addition, synchronous I/O uses a stack-allocated iocb. Thus we - * can't trust the iocb is still valid here if this is a synchronous - * request. If the waiter is woken prematurely, the iocb is long gone. + * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust + * the iocb is still valid here if this is a synchronous request. */ static void nfs_direct_complete(struct nfs_direct_req *dreq) { - nfs_free_user_pages(dreq->pages, dreq->npages, 1); - if (dreq->iocb) { long res = (long) dreq->error; if (!res) @@ -245,48 +231,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) } /* - * Note we also set the number of requests we have in the dreq when we are - * done. This prevents races with I/O completion so we will always wait - * until all requests have been dispatched and completed. + * We must hold a reference to all the pages in this direct read request + * until the RPCs complete. This could be long *after* we are woken up in + * nfs_direct_wait (for instance, if someone hits ^C on a slow server). */ -static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_read_data *data = nfs_readdata_alloc(rpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_read_data, pages); - list_del(&data->pages); - nfs_readdata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - dreq->outstanding++; - if (nbytes <= rsize) - break; - nbytes -= rsize; - } - kref_get(&dreq->kref); - return dreq; -} - static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; @@ -295,6 +243,9 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (nfs_readpage_result(task, data) != 0) return; + nfs_direct_dirty_pages(data->pagevec, data->npages); + nfs_direct_release_pages(data->pagevec, data->npages); + spin_lock(&dreq->lock); if (likely(task->tk_status >= 0)) @@ -302,13 +253,10 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) else dreq->error = task->tk_status; - if (--dreq->outstanding) { - spin_unlock(&dreq->lock); - return; - } - spin_unlock(&dreq->lock); - nfs_direct_complete(dreq); + + if (put_dreq(dreq)) + nfs_direct_complete(dreq); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -317,41 +265,60 @@ static const struct rpc_call_ops nfs_read_direct_ops = { }; /* - * For each nfs_read_data struct that was allocated on the list, dispatch - * an NFS READ operation + * For each rsize'd chunk of the user's buffer, dispatch an NFS READ + * operation. If nfs_readdata_alloc() or get_user_pages() fails, + * bail and stop sending more reads. Read length accounting is + * handled automatically by nfs_direct_read_result(). Otherwise, if + * no requests have been sent, just return an error. */ -static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) +static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; - struct page **pages = dreq->pages; - size_t count = dreq->user_count; - loff_t pos = dreq->pos; size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int curpage, pgbase; + unsigned int rpages = nfs_max_pages(rsize); + unsigned int pgbase; + int result; + ssize_t started = 0; + + get_dreq(dreq); - curpage = 0; - pgbase = dreq->user_addr & ~PAGE_MASK; + pgbase = user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_readdata_alloc(rpages); + if (unlikely(!data)) + break; + bytes = rsize; if (count < rsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_read_data, pages); - list_del_init(&data->pages); + data->npages = nfs_direct_count_pages(user_addr, bytes); + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 1, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } + + get_dreq(dreq); + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = &pages[curpage]; + data->args.pages = data->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.eof = 0; @@ -374,33 +341,35 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) bytes, (unsigned long long)data->args.offset); + started += bytes; + user_addr += bytes; pos += bytes; pgbase += bytes; - curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); + + if (put_dreq(dreq)) + nfs_direct_complete(dreq); + + if (started) + return 0; + return result < 0 ? (ssize_t) result : -EFAULT; } -static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) +static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; - dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; - dreq->user_addr = user_addr; - dreq->user_count = count; - dreq->pos = pos; - dreq->pages = pages; - dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -408,8 +377,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_read_schedule(dreq); - result = nfs_direct_wait(dreq); + result = nfs_direct_read_schedule(dreq, user_addr, count, pos); + if (!result) + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -417,10 +387,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { - list_splice_init(&dreq->rewrite_list, &dreq->list); - while (!list_empty(&dreq->list)) { - struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); + while (!list_empty(&dreq->rewrite_list)) { + struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); list_del(&data->pages); + nfs_direct_release_pages(data->pagevec, data->npages); nfs_writedata_release(data); } } @@ -428,14 +398,51 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { - struct list_head *pos; + struct inode *inode = dreq->inode; + struct list_head *p; + struct nfs_write_data *data; - list_splice_init(&dreq->rewrite_list, &dreq->list); - list_for_each(pos, &dreq->list) - dreq->outstanding++; dreq->count = 0; + get_dreq(dreq); + + list_for_each(p, &dreq->rewrite_list) { + data = list_entry(p, struct nfs_write_data, pages); + + get_dreq(dreq); + + /* + * Reset data->res. + */ + nfs_fattr_init(&data->fattr); + data->res.count = data->args.count; + memset(&data->verf, 0, sizeof(data->verf)); + + /* + * Reuse data->task; data->args should not have changed + * since the original request was sent. + */ + rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, + &nfs_write_direct_ops, data); + NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); + + data->task.tk_priority = RPC_PRIORITY_NORMAL; + data->task.tk_cookie = (unsigned long) inode; + + /* + * We're called via an RPC callback, so BKL is already held. + */ + rpc_execute(&data->task); + + dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + } - nfs_direct_write_schedule(dreq, FLUSH_STABLE); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); } static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) @@ -472,8 +479,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->cred = dreq->ctx->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = dreq->pos; - data->args.count = dreq->user_count; + data->args.offset = 0; + data->args.count = 0; data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -535,47 +542,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #endif -static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_write_data *data = nfs_writedata_alloc(wpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_write_data, pages); - list_del(&data->pages); - nfs_writedata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - dreq->outstanding++; - if (nbytes <= wsize) - break; - nbytes -= wsize; - } - - nfs_alloc_commit_data(dreq); - - kref_get(&dreq->kref); - return dreq; -} - static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -605,8 +571,6 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) } } } - /* In case we have to resend */ - data->args.stable = NFS_FILE_SYNC; spin_unlock(&dreq->lock); } @@ -620,14 +584,8 @@ static void nfs_direct_write_release(void *calldata) struct nfs_write_data *data = calldata; struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - spin_lock(&dreq->lock); - if (--dreq->outstanding) { - spin_unlock(&dreq->lock); - return; - } - spin_unlock(&dreq->lock); - - nfs_direct_write_complete(dreq, data->inode); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, data->inode); } static const struct rpc_call_ops nfs_write_direct_ops = { @@ -636,41 +594,62 @@ static const struct rpc_call_ops nfs_write_direct_ops = { }; /* - * For each nfs_write_data struct that was allocated on the list, dispatch - * an NFS WRITE operation + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. */ -static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) +static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; - struct page **pages = dreq->pages; - size_t count = dreq->user_count; - loff_t pos = dreq->pos; size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int curpage, pgbase; + unsigned int wpages = nfs_max_pages(wsize); + unsigned int pgbase; + int result; + ssize_t started = 0; - curpage = 0; - pgbase = dreq->user_addr & ~PAGE_MASK; + get_dreq(dreq); + + pgbase = user_addr & ~PAGE_MASK; do { struct nfs_write_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_writedata_alloc(wpages); + if (unlikely(!data)) + break; + bytes = wsize; if (count < wsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_write_data, pages); + data->npages = nfs_direct_count_pages(user_addr, bytes); + down_read(¤t->mm->mmap_sem); + result = get_user_pages(current, current->mm, user_addr, + data->npages, 0, 0, data->pagevec, NULL); + up_read(¤t->mm->mmap_sem); + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } + + get_dreq(dreq); + list_move_tail(&data->pages, &dreq->rewrite_list); + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = &pages[curpage]; + data->args.pages = data->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.count = bytes; @@ -694,19 +673,26 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) bytes, (unsigned long long)data->args.offset); + started += bytes; + user_addr += bytes; pos += bytes; pgbase += bytes; - curpage += pgbase >> PAGE_SHIFT; pgbase &= ~PAGE_MASK; count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); + + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); + + if (started) + return 0; + return result < 0 ? (ssize_t) result : -EFAULT; } -static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) +static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -714,17 +700,14 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz size_t wsize = NFS_SERVER(inode)->wsize; int sync = 0; - dreq = nfs_direct_write_alloc(count, wsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; + nfs_alloc_commit_data(dreq); + if (dreq->commit_data == NULL || count < wsize) sync = FLUSH_STABLE; - dreq->user_addr = user_addr; - dreq->user_count = count; - dreq->pos = pos; - dreq->pages = pages; - dreq->npages = nr_pages; dreq->inode = inode; dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) @@ -735,8 +718,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz nfs_begin_data_update(inode); rpc_clnt_sigmask(clnt, &oldset); - nfs_direct_write_schedule(dreq, sync); - result = nfs_direct_wait(dreq); + result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); + if (!result) + result = nfs_direct_wait(dreq); rpc_clnt_sigunmask(clnt, &oldset); return result; @@ -766,8 +750,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) { ssize_t retval = -EINVAL; - int page_count; - struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -789,14 +771,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, if (retval) goto out; - retval = nfs_get_user_pages(READ, (unsigned long) buf, - count, &pages); - if (retval < 0) - goto out; - page_count = retval; - - retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, - pages, page_count); + retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos); if (retval > 0) iocb->ki_pos = pos + retval; @@ -832,8 +807,6 @@ out: ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { ssize_t retval; - int page_count; - struct page **pages; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -861,14 +834,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t if (retval) goto out; - retval = nfs_get_user_pages(WRITE, (unsigned long) buf, - count, &pages); - if (retval < 0) - goto out; - page_count = retval; - - retval = nfs_direct_write(iocb, (unsigned long) buf, count, - pos, pages, page_count); + retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos); /* * XXX: nfs_end_data_update() already ensures this file's diff --git a/fs/nfs/file.c b/fs/nfs/file.c index add2891..cc2b874 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -315,7 +315,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp) return !nfs_wb_page(page->mapping->host, page); } -struct address_space_operations nfs_file_aops = { +const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, .set_page_dirty = __set_page_dirty_nobuffers, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c5b9166..d349fb2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -13,7 +13,6 @@ * */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4fe51c1..e4f4e5d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -81,9 +81,9 @@ extern struct file_system_type clone_nfs_fs_type; #ifdef CONFIG_NFS_V4 extern struct file_system_type clone_nfs4_fs_type; #endif -#ifdef CONFIG_PROC_FS + extern struct rpc_stat nfs_rpcstat; -#endif + extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b4916b0..e6ee97f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3144,9 +3144,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) default: BUG(); } - if (res < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", - __FUNCTION__); return res; } @@ -3258,8 +3255,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - /* Unlock _before_ we do the RPC call */ - do_vfs_lock(fl->fl_file, fl); return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); } @@ -3270,30 +3265,28 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * struct rpc_task *task; int status = 0; - /* Is this a delegated lock? */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - goto out_unlock; - /* Is this open_owner holding any locks on the server? */ - if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) - goto out_unlock; - status = nfs4_set_lock_state(state, request); + /* Unlock _before_ we do the RPC call */ + request->fl_flags |= FL_EXISTS; + if (do_vfs_lock(request->fl_file, request) == -ENOENT) + goto out; if (status != 0) - goto out_unlock; + goto out; + /* Is this a delegated lock? */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; lsp = request->fl_u.nfs4_fl.owner; - status = -ENOMEM; seqid = nfs_alloc_seqid(&lsp->ls_seqid); + status = -ENOMEM; if (seqid == NULL) - goto out_unlock; + goto out; task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) - goto out_unlock; + goto out; status = nfs4_wait_for_completion_rpc_task(task); rpc_release_task(task); - return status; -out_unlock: - do_vfs_lock(request->fl_file, request); +out: return status; } @@ -3461,10 +3454,10 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request struct nfs4_exception exception = { }; int err; - /* Cache the lock if possible... */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - return 0; do { + /* Cache the lock if possible... */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) + return 0; err = _nfs4_do_setlk(state, F_SETLK, request, 1); if (err != -NFS4ERR_DELAY) break; @@ -3483,6 +3476,8 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request if (err != 0) return err; do { + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) + return 0; err = _nfs4_do_setlk(state, F_SETLK, request, 0); if (err != -NFS4ERR_DELAY) break; @@ -3494,29 +3489,42 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs4_client *clp = state->owner->so_client; + unsigned char fl_flags = request->fl_flags; int status; /* Is this a delegated open? */ - if (NFS_I(state->inode)->delegation_state != 0) { - /* Yes: cache locks! */ - status = do_vfs_lock(request->fl_file, request); - /* ...but avoid races with delegation recall... */ - if (status < 0 || test_bit(NFS_DELEGATED_STATE, &state->flags)) - return status; - } - down_read(&clp->cl_sem); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; + request->fl_flags |= FL_ACCESS; + status = do_vfs_lock(request->fl_file, request); + if (status < 0) + goto out; + down_read(&clp->cl_sem); + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + struct nfs_inode *nfsi = NFS_I(state->inode); + /* Yes: cache locks! */ + down_read(&nfsi->rwsem); + /* ...but avoid races with delegation recall... */ + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + request->fl_flags = fl_flags & ~FL_SLEEP; + status = do_vfs_lock(request->fl_file, request); + up_read(&nfsi->rwsem); + goto out_unlock; + } + up_read(&nfsi->rwsem); + } status = _nfs4_do_setlk(state, cmd, request, 0); if (status != 0) - goto out; + goto out_unlock; /* Note: we always want to sleep here! */ - request->fl_flags |= FL_SLEEP; + request->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(request->fl_file, request) < 0) printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); -out: +out_unlock: up_read(&clp->cl_sem); +out: + request->fl_flags = fl_flags; return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 96e5b82..090a36b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -38,7 +38,6 @@ * subsequent patch. */ -#include <linux/config.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/nfs_fs.h> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d89f6fb..36e902a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -9,7 +9,6 @@ * */ -#include <linux/config.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/sunrpc/clnt.h> @@ -315,6 +314,7 @@ nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, req->wb_index, NFS_PAGE_TAG_DIRTY); nfs_list_remove_request(req); nfs_list_add_request(req, dst); + dec_zone_page_state(req->wb_page, NR_FILE_DIRTY); res++; } } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 32cf377..52bf634 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -15,7 +15,6 @@ * within the RPC code when root squashing is suspected. */ -#include <linux/config.h> #include <linux/time.h> #include <linux/kernel.h> #include <linux/errno.h> diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index db61e51..2fe3403 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -3,7 +3,6 @@ * * Sysctl interface to NFS parameters */ -#include <linux/config.h> #include <linux/types.h> #include <linux/linkage.h> #include <linux/ctype.h> diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8fccb9c..86bac6a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,7 +46,6 @@ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/mm.h> @@ -497,7 +496,7 @@ nfs_mark_request_dirty(struct nfs_page *req) nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); - inc_page_state(nr_dirty); + inc_zone_page_state(req->wb_page, NR_FILE_DIRTY); mark_inode_dirty(inode); } @@ -525,7 +524,7 @@ nfs_mark_request_commit(struct nfs_page *req) nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; spin_unlock(&nfsi->req_lock); - inc_page_state(nr_unstable); + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); mark_inode_dirty(inode); } #endif @@ -579,7 +578,7 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un return ret; } -static void nfs_cancel_requests(struct list_head *head) +static void nfs_cancel_dirty_list(struct list_head *head) { struct nfs_page *req; while(!list_empty(head)) { @@ -590,6 +589,19 @@ static void nfs_cancel_requests(struct list_head *head) } } +static void nfs_cancel_commit_list(struct list_head *head) +{ + struct nfs_page *req; + + while(!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_inode_remove_request(req); + nfs_clear_page_writeback(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + } +} + /* * nfs_scan_dirty - Scan an inode for dirty requests * @inode: NFS inode to scan @@ -609,7 +621,6 @@ nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_sta if (nfsi->ndirty != 0) { res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); nfsi->ndirty -= res; - sub_page_state(nr_dirty,res); if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); } @@ -1383,6 +1394,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_list_remove_request(req); nfs_mark_request_commit(req); nfs_clear_page_writeback(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); } return -ENOMEM; } @@ -1394,7 +1406,6 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; struct nfs_page *req; - int res = 0; dprintk("NFS: %4d nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); @@ -1406,6 +1417,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -1432,9 +1444,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) nfs_mark_request_dirty(req); next: nfs_clear_page_writeback(req); - res++; } - sub_page_state(nr_unstable,res); } static const struct rpc_call_ops nfs_commit_ops = { @@ -1503,7 +1513,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, if (pages != 0) { spin_unlock(&nfsi->req_lock); if (how & FLUSH_INVALIDATE) - nfs_cancel_requests(&head); + nfs_cancel_dirty_list(&head); else ret = nfs_flush_list(inode, &head, pages, how); spin_lock(&nfsi->req_lock); @@ -1516,7 +1526,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, break; if (how & FLUSH_INVALIDATE) { spin_unlock(&nfsi->req_lock); - nfs_cancel_requests(&head); + nfs_cancel_commit_list(&head); spin_lock(&nfsi->req_lock); continue; } diff --git a/fs/nfsctl.c b/fs/nfsctl.c index a5a18d4..c043136 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -4,7 +4,6 @@ * This should eventually move to userland. * */ -#include <linux/config.h> #include <linux/types.h> #include <linux/file.h> #include <linux/fs.h> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 3eec300..01bc68c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -126,7 +126,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) if (*ep) goto out; dprintk("found fsidtype %d\n", fsidtype); - if (fsidtype > 2) + if (key_len(fsidtype)==0) /* invalid type */ goto out; if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) goto out; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index dbaf3f9..54b37b1 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -33,7 +33,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/list.h> #include <linux/inet.h> diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 4b6aa60..bea6b94 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -34,7 +34,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/init.h> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b0e095e..ee4eff2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -721,6 +721,12 @@ nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) return nfs_ok; } +static inline void nfsd4_increment_op_stats(u32 opnum) +{ + if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) + nfsdstats.nfs4_opcount[opnum]++; +} + /* * COMPOUND call. @@ -930,6 +936,8 @@ encode_op: /* XXX Ugh, we need to get rid of this kind of special case: */ if (op->opnum == OP_READ && op->u.read.rd_filp) fput(op->u.read.rd_filp); + + nfsd4_increment_op_stats(op->opnum); } out: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7c7d016..9daa0b9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1237,8 +1237,15 @@ find_file(struct inode *ino) return NULL; } -#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) -#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) +static int access_valid(u32 x) +{ + return (x > 0 && x < 4); +} + +static int deny_valid(u32 x) +{ + return (x >= 0 && x < 5); +} static void set_access(unsigned int *access, unsigned long bmap) { @@ -1745,7 +1752,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf int status; status = nfserr_inval; - if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) + if (!access_valid(open->op_share_access) + || !deny_valid(open->op_share_deny)) goto out; /* * Lookup file; if found, lookup stateid and check open request, @@ -1782,10 +1790,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } else { /* Stateid was not found, this is a new OPEN */ int flags = 0; + if (open->op_share_access & NFS4_SHARE_ACCESS_READ) + flags |= MAY_READ; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flags = MAY_WRITE; - else - flags = MAY_READ; + flags |= MAY_WRITE; status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); if (status) goto out; @@ -2070,16 +2078,12 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl if (!stateid->si_fileid) { /* delegation stateid */ if(!(dp = find_delegation_stateid(ino, stateid))) { dprintk("NFSD: delegation stateid not found\n"); - if (nfs4_in_grace()) - status = nfserr_grace; goto out; } stidp = &dp->dl_stateid; } else { /* open or lock stateid */ if (!(stp = find_stateid(stateid, flags))) { dprintk("NFSD: open or lock stateid not found\n"); - if (nfs4_in_grace()) - status = nfserr_grace; goto out; } if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) @@ -2252,8 +2256,9 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs (int)current_fh->fh_dentry->d_name.len, current_fh->fh_dentry->d_name.name); - if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) - goto out; + status = fh_verify(rqstp, current_fh, S_IFREG, 0); + if (status) + return status; nfs4_lock_state(); @@ -2320,7 +2325,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct n (int)current_fh->fh_dentry->d_name.len, current_fh->fh_dentry->d_name.name); - if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny)) + if (!access_valid(od->od_share_access) + || !deny_valid(od->od_share_deny)) return nfserr_inval; nfs4_lock_state(); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index a1810e6..7046ac9 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -6,7 +6,6 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/linkage.h> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3f2ec2e..ecc439d 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -187,13 +187,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) goto out; } - /* Set user creds for this exportpoint */ - error = nfsd_setuser(rqstp, exp); - if (error) { - error = nfserrno(error); - goto out; - } - /* * Look up the dentry using the NFS file handle. */ @@ -251,6 +244,14 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) } cache_get(&exp->h); + /* Set user creds for this exportpoint; necessary even in the "just + * checking" case because this may be a filehandle that was created by + * fh_compose, and that is about to be used in another nfsv4 compound + * operation */ + error = nfserrno(nfsd_setuser(rqstp, exp)); + if (error) + goto out; + error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); if (error) goto out; @@ -312,8 +313,8 @@ int fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh) { /* ref_fh is a reference file handle. - * if it is non-null, then we should compose a filehandle which is - * of the same version, where possible. + * if it is non-null and for the same filesystem, then we should compose + * a filehandle which is of the same version, where possible. * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca * Then create a 32byte filehandle using nfs_fhbase_old * @@ -332,7 +333,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st parent->d_name.name, dentry->d_name.name, (inode ? inode->i_ino : 0)); - if (ref_fh) { + if (ref_fh && ref_fh->fh_export == exp) { ref_fh_version = ref_fh->fh_handle.fh_version; if (ref_fh_version == 0xca) ref_fh_fsid_type = 0; @@ -461,7 +462,7 @@ fh_update(struct svc_fh *fhp) } else { int size; if (fhp->fh_handle.fh_fileid_type != 0) - goto out_uptodate; + goto out; datap = fhp->fh_handle.fh_auth+ fhp->fh_handle.fh_size/4 -1; size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; @@ -481,10 +482,6 @@ out_negative: printk(KERN_ERR "fh_update: %s/%s still negative!\n", dentry->d_parent->d_name.name, dentry->d_name.name); goto out; -out_uptodate: - printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - goto out; } /* diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 3790727..ec1decf 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -8,7 +8,6 @@ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */ -#include <linux/config.h> #include <linux/module.h> #include <linux/time.h> diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 57265d5..71944cd 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -72,6 +72,16 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) /* show my rpc info */ svc_seq_show(seq, &nfsd_svcstats); +#ifdef CONFIG_NFSD_V4 + /* Show count for individual nfsv4 operations */ + /* Writing operation numbers 0 1 2 also for maintaining uniformity */ + seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); + for (i = 0; i <= LAST_NFS4_OP; i++) + seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]); + + seq_putc(seq, '\n'); +#endif + return 0; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 245eaa1..c9e3b5a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -16,7 +16,6 @@ * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> */ -#include <linux/config.h> #include <linux/string.h> #include <linux/time.h> #include <linux/errno.h> @@ -673,7 +672,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, goto out_nfserr; if (access & MAY_WRITE) { - flags = O_WRONLY|O_LARGEFILE; + if (access & MAY_READ) + flags = O_RDWR|O_LARGEFILE; + else + flags = O_WRONLY|O_LARGEFILE; DQUOT_INIT(inode); } @@ -834,7 +836,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (ra && ra->p_set) file->f_ra = ra->p_ra; - if (file->f_op->sendfile) { + if (file->f_op->sendfile && rqstp->rq_sendfile_ok) { svc_pushback_unused_pages(rqstp); err = file->f_op->sendfile(file, &offset, *count, nfsd_read_actor, rqstp); @@ -1517,14 +1519,15 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserrno(err); } - fh_unlock(ffhp); dput(dnew); +out_unlock: + fh_unlock(ffhp); out: return err; out_nfserr: err = nfserrno(err); - goto out; + goto out_unlock; } /* @@ -1553,7 +1556,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, tdir = tdentry->d_inode; err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - if (fdir->i_sb != tdir->i_sb) + if (ffhp->fh_export != tfhp->fh_export) goto out; err = nfserr_perm; diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index a912deb..9de6b49 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -10,7 +10,6 @@ #include <linux/module.h> #include <linux/string.h> -#include <linux/config.h> #include <linux/nls.h> #include <linux/kernel.h> #include <linux/errno.h> diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 580412d..bc579bf 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1544,7 +1544,7 @@ err_out: /** * ntfs_aops - general address space operations for inodes and attributes */ -struct address_space_operations ntfs_aops = { +const struct address_space_operations ntfs_aops = { .readpage = ntfs_readpage, /* Fill page with data. */ .sync_page = block_sync_page, /* Currently, just unplugs the disk request queue. */ @@ -1560,7 +1560,7 @@ struct address_space_operations ntfs_aops = { * ntfs_mst_aops - general address space operations for mst protecteed inodes * and attributes */ -struct address_space_operations ntfs_mst_aops = { +const struct address_space_operations ntfs_mst_aops = { .readpage = ntfs_readpage, /* Fill page with data. */ .sync_page = block_sync_page, /* Currently, just unplugs the disk request queue. */ diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4c86b7e..d313f35 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni) kmem_cache_free(ntfs_inode_cache, ni); } +/* + * The attribute runlist lock has separate locking rules from the + * normal runlist lock, so split the two lock-classes: + */ +static struct lock_class_key attr_list_rl_lock_class; + /** * __ntfs_init_inode - initialize ntfs specific part of an inode * @sb: super block of mounted volume @@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) ni->attr_list_size = 0; ni->attr_list = NULL; ntfs_init_runlist(&ni->attr_list_rl); + lockdep_set_class(&ni->attr_list_rl.lock, + &attr_list_rl_lock_class); ni->itype.index.bmp_ino = NULL; ni->itype.index.block_size = 0; ni->itype.index.vcn_size = 0; @@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) ni->ext.base_ntfs_ino = NULL; } +/* + * Extent inodes get MFT-mapped in a nested way, while the base inode + * is still mapped. Teach this nesting to the lock validator by creating + * a separate class for nested inode's mrec_lock's: + */ +static struct lock_class_key extent_inode_mrec_lock_key; + inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, unsigned long mft_no) { @@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, ntfs_debug("Entering."); if (likely(ni != NULL)) { __ntfs_init_inode(sb, ni); + lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key); ni->mft_no = mft_no; ni->type = AT_UNUSED; ni->name = NULL; @@ -1722,6 +1738,15 @@ err_out: return err; } +/* + * The MFT inode has special locking, so teach the lock validator + * about this by splitting off the locking rules of the MFT from + * the locking rules of other inodes. The MFT inode can never be + * accessed from the VFS side (or even internally), only by the + * map_mft functions. + */ +static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key; + /** * ntfs_read_inode_mount - special read_inode for mount time use only * @vi: inode to read @@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi) ntfs_attr_put_search_ctx(ctx); ntfs_debug("Done."); ntfs_free(m); + + /* + * Split the locking rules of the MFT inode from the + * locking rules of other inodes: + */ + lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key); + lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key); + return 0; em_put_err_out: diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index bf7b3d7..ddd3d50 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h @@ -57,8 +57,8 @@ extern struct kmem_cache *ntfs_attr_ctx_cache; extern struct kmem_cache *ntfs_index_ctx_cache; /* The various operations structs defined throughout the driver files. */ -extern struct address_space_operations ntfs_aops; -extern struct address_space_operations ntfs_mst_aops; +extern const struct address_space_operations ntfs_aops; +extern const struct address_space_operations ntfs_mst_aops; extern const struct file_operations ntfs_file_ops; extern struct inode_operations ntfs_file_inode_ops; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0e14ace..74e0ee8 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1724,6 +1724,14 @@ upcase_failed: return FALSE; } +/* + * The lcn and mft bitmap inodes are NTFS-internal inodes with + * their own special locking rules: + */ +static struct lock_class_key + lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key, + mftbmp_runlist_lock_key, mftbmp_mrec_lock_key; + /** * load_system_files - open the system files using normal functions * @vol: ntfs super block describing device whose system files to load @@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol) ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); goto iput_mirr_err_out; } + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock, + &mftbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock, + &mftbmp_mrec_lock_key); /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ if (!load_and_init_upcase(vol)) goto iput_mftbmp_err_out; @@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol) iput(vol->lcnbmp_ino); goto bitmap_failed; } + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock, + &lcnbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock, + &lcnbmp_mrec_lock_key); + NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { iput(vol->lcnbmp_ino); @@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) struct inode *tmp_ino; int blocksize, result; + /* + * We do a pretty difficult piece of bootstrap by reading the + * MFT (and other metadata) from disk into memory. We'll only + * release this metadata during umount, so the locking patterns + * observed during bootstrap do not count. So turn off the + * observation of locking patterns (strictly for this context + * only) while mounting NTFS. [The validator is still active + * otherwise, even for this context: it will for example record + * lock class registrations.] + */ + lockdep_off(); ntfs_debug("Entering."); #ifndef NTFS_RW sb->s_flags |= MS_RDONLY; @@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) if (!silent) ntfs_error(sb, "Allocation of NTFS volume structure " "failed. Aborting mount..."); + lockdep_on(); return -ENOMEM; } /* Initialize ntfs_volume structure. */ @@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) mutex_unlock(&ntfs_lock); sb->s_export_op = &ntfs_export_ops; lock_kernel(); + lockdep_on(); return 0; } ntfs_error(sb, "Failed to allocate root directory."); @@ -3059,6 +3089,7 @@ err_out_now: sb->s_fs_info = NULL; kfree(vol); ntfs_debug("Failed, returning -EINVAL."); + lockdep_on(); return -EINVAL; } diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h index c8064ca..beda5bf 100644 --- a/fs/ntfs/sysctl.h +++ b/fs/ntfs/sysctl.h @@ -24,7 +24,6 @@ #ifndef _LINUX_NTFS_SYSCTL_H #define _LINUX_NTFS_SYSCTL_H -#include <linux/config.h> #if defined(DEBUG) && defined(CONFIG_SYSCTL) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 47152bf..f1d1c34 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -558,16 +558,9 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, u64 vbo_max; /* file offset, max_blocks from iblock */ u64 p_blkno; int contig_blocks; - unsigned char blocksize_bits; + unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; - if (!inode || !bh_result) { - mlog(ML_ERROR, "inode or bh_result is null\n"); - return -EIO; - } - - blocksize_bits = inode->i_sb->s_blocksize_bits; - /* This function won't even be called if the request isn't all * nicely aligned and of the right size, so there's no need * for us to check any of that. */ @@ -666,7 +659,7 @@ out: return ret; } -struct address_space_operations ocfs2_aops = { +const struct address_space_operations ocfs2_aops = { .readpage = ocfs2_readpage, .writepage = ocfs2_writepage, .prepare_write = ocfs2_prepare_write, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 1d26cfc..504595d 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -517,6 +517,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg, hb_block->hb_seq = cpu_to_le64(cputime); hb_block->hb_node = node_num; hb_block->hb_generation = cpu_to_le64(generation); + hb_block->hb_dead_ms = cpu_to_le32(o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS); /* This step must always happen last! */ hb_block->hb_cksum = cpu_to_le32(o2hb_compute_block_crc_le(reg, @@ -645,6 +646,8 @@ static int o2hb_check_slot(struct o2hb_region *reg, struct o2nm_node *node; struct o2hb_disk_heartbeat_block *hb_block = reg->hr_tmp_block; u64 cputime; + unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; + unsigned int slot_dead_ms; memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); @@ -733,6 +736,23 @@ fire_callbacks: &o2hb_live_slots[slot->ds_node_num]); slot->ds_equal_samples = 0; + + /* We want to be sure that all nodes agree on the + * number of milliseconds before a node will be + * considered dead. The self-fencing timeout is + * computed from this value, and a discrepancy might + * result in heartbeat calling a node dead when it + * hasn't self-fenced yet. */ + slot_dead_ms = le32_to_cpu(hb_block->hb_dead_ms); + if (slot_dead_ms && slot_dead_ms != dead_ms) { + /* TODO: Perhaps we can fail the region here. */ + mlog(ML_ERROR, "Node %d on device %s has a dead count " + "of %u ms, but our count is %u ms.\n" + "Please double check your configuration values " + "for 'O2CB_HEARTBEAT_THRESHOLD'\n", + slot->ds_node_num, reg->hr_dev_name, slot_dead_ms, + dead_ms); + } goto out; } diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 73edad7..a42628b 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -123,6 +123,17 @@ #define MLOG_MASK_PREFIX 0 #endif +/* + * When logging is disabled, force the bit test to 0 for anything other + * than errors and notices, allowing gcc to remove the code completely. + * When enabled, allow all masks. + */ +#if defined(CONFIG_OCFS2_DEBUG_MASKLOG) +#define ML_ALLOWED_BITS ~0 +#else +#define ML_ALLOWED_BITS (ML_ERROR|ML_NOTICE) +#endif + #define MLOG_MAX_BITS 64 struct mlog_bits { @@ -187,7 +198,8 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; #define mlog(mask, fmt, args...) do { \ u64 __m = MLOG_MASK_PREFIX | (mask); \ - if (__mlog_test_u64(__m, mlog_and_bits) && \ + if ((__m & ML_ALLOWED_BITS) && \ + __mlog_test_u64(__m, mlog_and_bits) && \ !__mlog_test_u64(__m, mlog_not_bits)) { \ if (__m & ML_ERROR) \ __mlog_printk(KERN_ERR, "ERROR: "fmt , ##args); \ @@ -204,6 +216,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ } while (0) +#if defined(CONFIG_OCFS2_DEBUG_MASKLOG) #define mlog_entry(fmt, args...) do { \ mlog(ML_ENTRY, "ENTRY:" fmt , ##args); \ } while (0) @@ -247,6 +260,13 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; #define mlog_exit_void() do { \ mlog(ML_EXIT, "EXIT\n"); \ } while (0) +#else +#define mlog_entry(...) do { } while (0) +#define mlog_entry_void(...) do { } while (0) +#define mlog_exit(...) do { } while (0) +#define mlog_exit_ptr(...) do { } while (0) +#define mlog_exit_void(...) do { } while (0) +#endif /* defined(CONFIG_OCFS2_DEBUG_MASKLOG) */ #define mlog_bug_on_msg(cond, fmt, args...) do { \ if (cond) { \ diff --git a/fs/ocfs2/cluster/ocfs2_heartbeat.h b/fs/ocfs2/cluster/ocfs2_heartbeat.h index 9409606..3f4151d 100644 --- a/fs/ocfs2/cluster/ocfs2_heartbeat.h +++ b/fs/ocfs2/cluster/ocfs2_heartbeat.h @@ -32,6 +32,7 @@ struct o2hb_disk_heartbeat_block { __u8 hb_pad1[3]; __le32 hb_cksum; __le64 hb_generation; + __le32 hb_dead_ms; }; #endif /* _OCFS2_HEARTBEAT_H */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1591eb3..b650efa 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -396,8 +396,8 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - mlog(ML_NOTICE, "no longer connected to " SC_NODEF_FMT "\n", - SC_NODEF_ARGS(old_sc)); + printk(KERN_INFO "o2net: no longer connected to " + SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -409,10 +409,10 @@ static void o2net_set_nn_state(struct o2net_node *nn, * the only way to start connecting again is to down * heartbeat and bring it back up. */ cancel_delayed_work(&nn->nn_connect_expired); - mlog(ML_NOTICE, "%s " SC_NODEF_FMT "\n", - o2nm_this_node() > sc->sc_node->nd_num ? - "connected to" : "accepted connection from", - SC_NODEF_ARGS(sc)); + printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", + o2nm_this_node() > sc->sc_node->nd_num ? + "connected to" : "accepted connection from", + SC_NODEF_ARGS(sc)); } /* trigger the connecting worker func as long as we're not valid, @@ -1280,7 +1280,7 @@ static void o2net_idle_timer(unsigned long data) do_gettimeofday(&now); - mlog(ML_NOTICE, "connection to " SC_NODEF_FMT " has been idle for 10 " + printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 " "seconds, shutting it down.\n", SC_NODEF_ARGS(sc)); mlog(ML_NOTICE, "here are some times that might help debug the " "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index ae47f45..3d494d1 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -213,11 +213,9 @@ int ocfs2_find_files_on_disk(const char *name, struct ocfs2_dir_entry **dirent) { int status = -ENOENT; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - mlog_entry("(osb=%p, parent=%llu, name='%.*s', blkno=%p, inode=%p)\n", - osb, (unsigned long long)OCFS2_I(inode)->ip_blkno, - namelen, name, blkno, inode); + mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n", + namelen, name, blkno, inode, dirent_bh, dirent); *dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent); if (!*dirent_bh || !*dirent) { diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 9bdc9cf..14530ee 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -822,8 +822,6 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data); int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data); int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 nodenum, u8 *real_master); -int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 *real_master); int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index b8c23f7..8d1065f 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -408,12 +408,13 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm) assert_spin_locked(&dlm->spinlock); - mlog(ML_NOTICE, "Nodes in my domain (\"%s\"):\n", dlm->name); + printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name); while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { - mlog(ML_NOTICE, " node %d\n", node); + printk("%d ", node); } + printk("\n"); } static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data) @@ -429,7 +430,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data) node = exit_msg->node_idx; - mlog(0, "Node %u leaves domain %s\n", node, dlm->name); + printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name); spin_lock(&dlm->spinlock); clear_bit(node, dlm->domain_map); @@ -678,6 +679,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data) set_bit(assert->node_idx, dlm->domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); + printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n", + assert->node_idx, dlm->name); __dlm_print_nodes(dlm); /* notify anything attached to the heartbeat events */ diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 29b2845..594745f 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -95,6 +95,9 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st); static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data); static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data); +static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + u8 *real_master); static u64 dlm_get_next_mig_cookie(void); @@ -1484,8 +1487,9 @@ leave: -int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 *real_master) +static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + u8 *real_master) { struct dlm_node_iter iter; int nodenum; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4acd372..762eb1f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2071,8 +2071,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) } /* launch vote thread */ - osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote-%d", - osb->osb_id); + osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); if (IS_ERR(osb->vote_task)) { status = PTR_ERR(osb->vote_task); osb->vote_task = NULL; diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 1a5c690..fcd4475 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -298,7 +298,7 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode, ret = ocfs2_extent_map_insert(inode, rec, le16_to_cpu(el->l_tree_depth)); - if (ret) { + if (ret && (ret != -EEXIST)) { mlog_errno(ret); goto out_free; } @@ -427,6 +427,11 @@ static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em, /* * Simple rule: on any return code other than -EAGAIN, anything left * in the insert_context will be freed. + * + * Simple rule #2: A return code of -EEXIST from this function or + * its calls to ocfs2_extent_map_insert_entry() signifies that another + * thread beat us to the insert. It is not an actual error, but it + * tells the caller we have no more work to do. */ static int ocfs2_extent_map_try_insert(struct inode *inode, struct ocfs2_extent_rec *rec, @@ -448,22 +453,32 @@ static int ocfs2_extent_map_try_insert(struct inode *inode, goto out_unlock; } + /* Since insert_entry failed, the map MUST have old_ent */ old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos), - le32_to_cpu(rec->e_clusters), NULL, - NULL); + le32_to_cpu(rec->e_clusters), + NULL, NULL); BUG_ON(!old_ent); - ret = -EEXIST; - if (old_ent->e_tree_depth < tree_depth) + if (old_ent->e_tree_depth < tree_depth) { + /* Another thread beat us to the lower tree_depth */ + ret = -EEXIST; goto out_unlock; + } if (old_ent->e_tree_depth == tree_depth) { + /* + * Another thread beat us to this tree_depth. + * Let's make sure we agree with that thread (the + * extent_rec should be identical). + */ if (!memcmp(rec, &old_ent->e_rec, sizeof(struct ocfs2_extent_rec))) ret = 0; + else + /* FIXME: Should this be ESRCH/EBADR??? */ + ret = -EEXIST; - /* FIXME: Should this be ESRCH/EBADR??? */ goto out_unlock; } @@ -599,7 +614,7 @@ static int ocfs2_extent_map_insert(struct inode *inode, tree_depth, &ctxt); } while (ret == -EAGAIN); - if (ret < 0) + if ((ret < 0) && (ret != -EEXIST)) mlog_errno(ret); if (ctxt.left_ent) diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 84c5079..35140f6 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -114,7 +114,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) extern kmem_cache_t *ocfs2_inode_cache; -extern struct address_space_operations ocfs2_aops; +extern const struct address_space_operations ocfs2_aops; struct buffer_head *ocfs2_bread(struct inode *inode, int block, int *err, int reada); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 910a601..f92bf1d 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -784,8 +784,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal) } /* Launch the commit thread */ - osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt-%d", - osb->osb_id); + osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt"); if (IS_ERR(osb->commit_task)) { status = PTR_ERR(osb->commit_task); osb->commit_task = NULL; @@ -1118,7 +1117,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) goto out; osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, - "ocfs2rec-%d", osb->osb_id); + "ocfs2rec"); if (IS_ERR(osb->recovery_thread_task)) { mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); osb->recovery_thread_task = NULL; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 843cf9d..83934e3 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -46,12 +46,12 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area, unsigned long address, int *type) { - struct inode *inode = area->vm_file->f_dentry->d_inode; struct page *page = NOPAGE_SIGBUS; sigset_t blocked, oldset; int ret; - mlog_entry("(inode %lu, address %lu)\n", inode->i_ino, address); + mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, + type); /* The best way to deal with signals in this path is * to block them upfront, rather than allowing the diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index da10930..cd4a6f2 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -184,7 +184,6 @@ struct ocfs2_journal; struct ocfs2_journal_handle; struct ocfs2_super { - u32 osb_id; /* id used by the proc interface */ struct task_struct *commit_task; struct super_block *sb; struct inode *root_inode; @@ -222,13 +221,11 @@ struct ocfs2_super unsigned long s_mount_opt; u16 max_slots; - u16 num_nodes; s16 node_num; s16 slot_num; int s_sectsize_bits; int s_clustersize; int s_clustersize_bits; - struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */ atomic_t vol_state; struct mutex recovery_lock; @@ -294,7 +291,6 @@ struct ocfs2_super }; #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) -#define OCFS2_MAX_OSB_ID 65536 static inline int ocfs2_should_order_data(struct inode *inode) { diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 8716279..aa6f5aa 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -264,7 +264,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb) osb->slot_num = slot; spin_unlock(&si->si_lock); - mlog(ML_NOTICE, "taking node slot %d\n", osb->slot_num); + mlog(0, "taking node slot %d\n", osb->slot_num); status = ocfs2_update_disk_slots(osb, si); if (status < 0) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index cdf7339..382706a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -68,13 +68,6 @@ #include "buffer_head_io.h" -/* - * Globals - */ -static spinlock_t ocfs2_globals_lock = SPIN_LOCK_UNLOCKED; - -static u32 osb_id; /* Keeps track of next available OSB Id */ - static kmem_cache_t *ocfs2_inode_cachep = NULL; kmem_cache_t *ocfs2_lock_cache = NULL; @@ -642,10 +635,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) ocfs2_complete_mount_recovery(osb); - printk("ocfs2: Mounting device (%u,%u) on (node %d, slot %d) with %s " - "data mode.\n", - MAJOR(sb->s_dev), MINOR(sb->s_dev), osb->node_num, - osb->slot_num, + printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %d, slot %d) " + "with %s data mode.\n", + osb->dev_str, osb->node_num, osb->slot_num, osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); @@ -800,10 +792,6 @@ static int __init ocfs2_init(void) goto leave; } - spin_lock(&ocfs2_globals_lock); - osb_id = 0; - spin_unlock(&ocfs2_globals_lock); - ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); if (!ocfs2_debugfs_root) { status = -EFAULT; @@ -1020,7 +1008,7 @@ static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) goto bail; } - mlog(ML_NOTICE, "I am node %d\n", osb->node_num); + mlog(0, "I am node %d\n", osb->node_num); status = 0; bail: @@ -1191,8 +1179,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); - printk("ocfs2: Unmounting device (%u,%u) on (node %d)\n", - MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev), osb->node_num); + printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %d)\n", + osb->dev_str, osb->node_num); ocfs2_delete_osb(osb); kfree(osb); @@ -1212,8 +1200,6 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu if (osb->uuid_str == NULL) return -ENOMEM; - memcpy(osb->uuid, uuid, OCFS2_VOL_UUID_LEN); - for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) { /* print with null */ ret = snprintf(ptr, 3, "%02X", uuid[i]); @@ -1311,13 +1297,6 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - osb->uuid = kmalloc(OCFS2_VOL_UUID_LEN, GFP_KERNEL); - if (!osb->uuid) { - mlog(ML_ERROR, "unable to alloc uuid\n"); - status = -ENOMEM; - goto bail; - } - di = (struct ocfs2_dinode *)bh->b_data; osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); @@ -1327,7 +1306,7 @@ static int ocfs2_initialize_super(struct super_block *sb, status = -EINVAL; goto bail; } - mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots); + mlog(0, "max_slots for this device: %u\n", osb->max_slots); init_waitqueue_head(&osb->osb_wipe_event); osb->osb_orphan_wipes = kcalloc(osb->max_slots, @@ -1418,7 +1397,7 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - memcpy(&uuid_net_key, &osb->uuid[i], sizeof(osb->net_key)); + memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); osb->net_key = le32_to_cpu(uuid_net_key); strncpy(osb->vol_label, di->id2.i_super.s_label, 63); @@ -1484,18 +1463,6 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - /* Link this osb onto the global linked list of all osb structures. */ - /* The Global Link List is mainted for the whole driver . */ - spin_lock(&ocfs2_globals_lock); - osb->osb_id = osb_id; - if (osb_id < OCFS2_MAX_OSB_ID) - osb_id++; - else { - mlog(ML_ERROR, "Too many volumes mounted\n"); - status = -ENOMEM; - } - spin_unlock(&ocfs2_globals_lock); - bail: mlog_exit(status); return status; diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 0c8a129..c0f68aa 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -154,7 +154,7 @@ static void *ocfs2_follow_link(struct dentry *dentry, } status = vfs_follow_link(nd, link); - if (status) + if (status && status != -ENOENT) mlog_errno(status); bail: if (page) { diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile index 42c7d38..d713ce6 100644 --- a/fs/partitions/Makefile +++ b/fs/partitions/Makefile @@ -4,7 +4,6 @@ obj-y := check.o -obj-$(CONFIG_DEVFS_FS) += devfs.o obj-$(CONFIG_ACORN_PARTITION) += acorn.o obj-$(CONFIG_AMIGA_PARTITION) += amiga.o obj-$(CONFIG_ATARI_PARTITION) += atari.o diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index c050857..1bc9f37 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c @@ -12,7 +12,6 @@ * every single manufacturer of SCSI and IDE cards created their own * method. */ -#include <linux/config.h> #include <linux/buffer_head.h> #include <linux/adfs_fs.h> diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 2ef313a..51c6a74 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -18,10 +18,8 @@ #include <linux/fs.h> #include <linux/kmod.h> #include <linux/ctype.h> -#include <linux/devfs_fs_kernel.h> #include "check.h" -#include "devfs.h" #include "acorn.h" #include "amiga.h" @@ -161,18 +159,11 @@ check_partition(struct gendisk *hd, struct block_device *bdev) if (!state) return NULL; -#ifdef CONFIG_DEVFS_FS - if (hd->devfs_name[0] != '\0') { - printk(KERN_INFO " /dev/%s:", hd->devfs_name); + disk_name(hd, 0, state->name); + printk(KERN_INFO " %s:", state->name); + if (isdigit(state->name[strlen(state->name)-1])) sprintf(state->name, "p"); - } -#endif - else { - disk_name(hd, 0, state->name); - printk(KERN_INFO " %s:", state->name); - if (isdigit(state->name[strlen(state->name)-1])) - sprintf(state->name, "p"); - } + state->limit = hd->minors; i = res = 0; while (!res && check_part[i]) { @@ -328,7 +319,6 @@ void delete_partition(struct gendisk *disk, int part) p->nr_sects = 0; p->ios[0] = p->ios[1] = 0; p->sectors[0] = p->sectors[1] = 0; - devfs_remove("%s/part%d", disk->devfs_name, part); sysfs_remove_link(&p->kobj, "subsystem"); if (p->holder_dir) kobject_unregister(p->holder_dir); @@ -349,10 +339,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len) p->start_sect = start; p->nr_sects = len; p->partno = part; - - devfs_mk_bdev(MKDEV(disk->major, disk->first_minor + part), - S_IFBLK|S_IRUSR|S_IWUSR, - "%s/part%d", disk->devfs_name, part); + p->policy = disk->policy; if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1])) snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part); @@ -423,14 +410,8 @@ void register_disk(struct gendisk *disk) disk_sysfs_add_subdirs(disk); /* No minors to use for partitions */ - if (disk->minors == 1) { - if (disk->devfs_name[0] != '\0') - devfs_add_disk(disk); + if (disk->minors == 1) goto exit; - } - - /* always add handle for the whole disk */ - devfs_add_partitioned(disk); /* No such device (e.g., media were just removed) */ if (!get_capacity(disk)) @@ -538,8 +519,6 @@ void del_gendisk(struct gendisk *disk) disk_stat_set_all(disk, 0); disk->stamp = 0; - devfs_remove_disk(disk); - kobject_uevent(&disk->kobj, KOBJ_REMOVE); if (disk->holder_dir) kobject_unregister(disk->holder_dir); diff --git a/fs/partitions/devfs.c b/fs/partitions/devfs.c deleted file mode 100644 index 3f0a780..0000000 --- a/fs/partitions/devfs.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * This tries to keep block devices away from devfs as much as possible. - */ -#include <linux/fs.h> -#include <linux/devfs_fs_kernel.h> -#include <linux/vmalloc.h> -#include <linux/genhd.h> -#include <linux/bitops.h> -#include <linux/mutex.h> - - -struct unique_numspace { - u32 num_free; /* Num free in bits */ - u32 length; /* Array length in bytes */ - unsigned long *bits; - struct semaphore mutex; -}; - -static DEFINE_MUTEX(numspace_mutex); - -static int expand_numspace(struct unique_numspace *s) -{ - u32 length; - void *bits; - - if (s->length < 16) - length = 16; - else - length = s->length << 1; - - bits = vmalloc(length); - if (!bits) - return -ENOMEM; - if (s->bits) { - memcpy(bits, s->bits, s->length); - vfree(s->bits); - } - - s->num_free = (length - s->length) << 3; - s->bits = bits; - memset(bits + s->length, 0, length - s->length); - s->length = length; - - return 0; -} - -static int alloc_unique_number(struct unique_numspace *s) -{ - int rval = 0; - - mutex_lock(&numspace_mutex); - if (s->num_free < 1) - rval = expand_numspace(s); - if (!rval) { - rval = find_first_zero_bit(s->bits, s->length << 3); - --s->num_free; - __set_bit(rval, s->bits); - } - mutex_unlock(&numspace_mutex); - - return rval; -} - -static void dealloc_unique_number(struct unique_numspace *s, int number) -{ - int old_val; - - if (number >= 0) { - mutex_lock(&numspace_mutex); - old_val = __test_and_clear_bit(number, s->bits); - if (old_val) - ++s->num_free; - mutex_unlock(&numspace_mutex); - } -} - -static struct unique_numspace disc_numspace; -static struct unique_numspace cdrom_numspace; - -void devfs_add_partitioned(struct gendisk *disk) -{ - char dirname[64], symlink[16]; - - devfs_mk_dir(disk->devfs_name); - devfs_mk_bdev(MKDEV(disk->major, disk->first_minor), - S_IFBLK|S_IRUSR|S_IWUSR, - "%s/disc", disk->devfs_name); - - disk->number = alloc_unique_number(&disc_numspace); - - sprintf(symlink, "discs/disc%d", disk->number); - sprintf(dirname, "../%s", disk->devfs_name); - devfs_mk_symlink(symlink, dirname); - -} - -void devfs_add_disk(struct gendisk *disk) -{ - devfs_mk_bdev(MKDEV(disk->major, disk->first_minor), - (disk->flags & GENHD_FL_CD) ? - S_IFBLK|S_IRUGO|S_IWUGO : - S_IFBLK|S_IRUSR|S_IWUSR, - "%s", disk->devfs_name); - - if (disk->flags & GENHD_FL_CD) { - char dirname[64], symlink[16]; - - disk->number = alloc_unique_number(&cdrom_numspace); - - sprintf(symlink, "cdroms/cdrom%d", disk->number); - sprintf(dirname, "../%s", disk->devfs_name); - devfs_mk_symlink(symlink, dirname); - } -} - -void devfs_remove_disk(struct gendisk *disk) -{ - if (disk->minors != 1) { - devfs_remove("discs/disc%d", disk->number); - dealloc_unique_number(&disc_numspace, disk->number); - devfs_remove("%s/disc", disk->devfs_name); - } - if (disk->flags & GENHD_FL_CD) { - devfs_remove("cdroms/cdrom%d", disk->number); - dealloc_unique_number(&cdrom_numspace, disk->number); - } - devfs_remove(disk->devfs_name); -} - - diff --git a/fs/partitions/devfs.h b/fs/partitions/devfs.h deleted file mode 100644 index 176118b..0000000 --- a/fs/partitions/devfs.h +++ /dev/null @@ -1,10 +0,0 @@ - -#ifdef CONFIG_DEVFS_FS -void devfs_add_disk(struct gendisk *dev); -void devfs_add_partitioned(struct gendisk *dev); -void devfs_remove_disk(struct gendisk *dev); -#else -# define devfs_add_disk(disk) do { } while (0) -# define devfs_add_partitioned(disk) do { } while (0) -# define devfs_remove_disk(disk) do { } while (0) -#endif diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 0f5b017..6373028 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -91,7 +91,6 @@ * - Code works, detects all the partitions. * ************************************************************/ -#include <linux/config.h> #include <linux/crc32.h> #include "check.h" #include "efi.h" diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h index c44fb05..2cc89d0 100644 --- a/fs/partitions/efi.h +++ b/fs/partitions/efi.h @@ -26,7 +26,6 @@ #define FS_PART_EFI_H_INCLUDED #include <linux/types.h> -#include <linux/config.h> #include <linux/fs.h> #include <linux/genhd.h> #include <linux/kernel.h> diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 830c55d..d352a73 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -6,7 +6,6 @@ * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 */ -#include <linux/config.h> #include <linux/buffer_head.h> #include <linux/hdreg.h> #include <linux/slab.h> diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index 813292f..c087100 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c @@ -6,7 +6,6 @@ * Re-organised Feb 1998 Russell King */ -#include <linux/config.h> #include <linux/ctype.h> #include "check.h" #include "mac.h" diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 9935d25..8f12587 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -19,7 +19,6 @@ * Re-organised Feb 1998 Russell King */ -#include <linux/config.h> #include "check.h" #include "msdos.h" diff --git a/fs/proc/array.c b/fs/proc/array.c index 7a76ad5..7495d3e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -52,7 +52,6 @@ * : base.c too. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/time.h> diff --git a/fs/proc/base.c b/fs/proc/base.c index 6ba7785..243a94a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -49,7 +49,6 @@ #include <asm/uaccess.h> -#include <linux/config.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 17f6e8f..6a984f6 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -9,7 +9,6 @@ * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/user.h> @@ -43,8 +42,6 @@ const struct file_operations proc_kcore_operations = { #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) #endif -#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) - /* An ELF note in memory */ struct memelfnote { @@ -385,7 +382,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) */ if (n) { if (clear_user(buffer + tsz - n, - tsz - n)) + n)) return -EFAULT; } } else { diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5c10ea1..9f2cfc3 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -26,7 +26,6 @@ #include <linux/mman.h> #include <linux/proc_fs.h> #include <linux/ioport.h> -#include <linux/config.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/pagemap.h> @@ -120,7 +119,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off, { struct sysinfo i; int len; - struct page_state ps; unsigned long inactive; unsigned long active; unsigned long free; @@ -129,7 +127,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off, struct vmalloc_info vmi; long cached; - get_page_state(&ps); get_zone_counts(&active, &inactive, &free); /* @@ -142,7 +139,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; - cached = get_page_cache_size() - total_swapcache_pages - i.bufferram; + cached = global_page_state(NR_FILE_PAGES) - + total_swapcache_pages - i.bufferram; if (cached < 0) cached = 0; @@ -167,11 +165,14 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "SwapFree: %8lu kB\n" "Dirty: %8lu kB\n" "Writeback: %8lu kB\n" + "AnonPages: %8lu kB\n" "Mapped: %8lu kB\n" "Slab: %8lu kB\n" + "PageTables: %8lu kB\n" + "NFS Unstable: %8lu kB\n" + "Bounce: %8lu kB\n" "CommitLimit: %8lu kB\n" "Committed_AS: %8lu kB\n" - "PageTables: %8lu kB\n" "VmallocTotal: %8lu kB\n" "VmallocUsed: %8lu kB\n" "VmallocChunk: %8lu kB\n", @@ -188,13 +189,16 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(i.freeram-i.freehigh), K(i.totalswap), K(i.freeswap), - K(ps.nr_dirty), - K(ps.nr_writeback), - K(ps.nr_mapped), - K(ps.nr_slab), + K(global_page_state(NR_FILE_DIRTY)), + K(global_page_state(NR_WRITEBACK)), + K(global_page_state(NR_ANON_PAGES)), + K(global_page_state(NR_FILE_MAPPED)), + K(global_page_state(NR_SLAB)), + K(global_page_state(NR_PAGETABLE)), + K(global_page_state(NR_UNSTABLE_NFS)), + K(global_page_state(NR_BOUNCE)), K(allowed), K(committed), - K(ps.nr_page_table_pages), (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, vmi.largest_chunk >> 10 diff --git a/fs/proc/root.c b/fs/proc/root.c index 9995356..8901c65 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -12,7 +12,6 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> -#include <linux/config.h> #include <linux/init.h> #include <linux/module.h> #include <linux/bitops.h> diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index af69f28..4616ed5 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -107,7 +107,7 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * { struct vm_list_struct *vml; struct vm_area_struct *vma; - struct task_struct *task = proc_task(inode); + struct task_struct *task = get_proc_task(inode); struct mm_struct *mm = get_task_mm(task); int result = -ENOENT; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 20d4b22..d9605072 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -7,7 +7,6 @@ * */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/user.h> diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index 46efbf5..8425cf6 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -13,7 +13,6 @@ * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) . */ -#include <linux/config.h> #include <linux/time.h> #include <linux/fs.h> #include <linux/qnx4_fs.h> diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 9031948..0d7103f 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -11,7 +11,6 @@ * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. */ -#include <linux/config.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c index df5bc75..aa3b195 100644 --- a/fs/qnx4/fsync.c +++ b/fs/qnx4/fsync.c @@ -10,7 +10,6 @@ * 24-03-1998 by Richard Frowijn : first release. */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/stat.h> diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 2f24c46..5a90349 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -12,7 +12,6 @@ * 30-06-1998 by Frank Denis : first step to write inodes. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/types.h> #include <linux/string.h> @@ -450,7 +449,7 @@ static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,qnx4_get_block); } -static struct address_space_operations qnx4_aops = { +static const struct address_space_operations qnx4_aops = { .readpage = qnx4_readpage, .writepage = qnx4_writepage, .sync_page = block_sync_page, diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 4af4951..c3d83f6 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -12,7 +12,6 @@ * 04-07-1998 by Frank Denis : first step for rmdir/unlink. */ -#include <linux/config.h> #include <linux/time.h> #include <linux/fs.h> #include <linux/qnx4_fs.h> diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c index 86563ec..6437c1c 100644 --- a/fs/qnx4/truncate.c +++ b/fs/qnx4/truncate.c @@ -10,7 +10,6 @@ * 30-06-1998 by Frank DENIS : ugly filler. */ -#include <linux/config.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 00a933e..86f14ca 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -26,7 +26,7 @@ #include <linux/fs.h> -struct address_space_operations ramfs_aops = { +const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, .prepare_write = simple_prepare_write, .commit_write = simple_commit_write diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index f443a84..677139b 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -27,7 +27,7 @@ static int ramfs_nommu_setattr(struct dentry *, struct iattr *); -struct address_space_operations ramfs_aops = { +const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, .prepare_write = simple_prepare_write, .commit_write = simple_commit_write @@ -283,9 +283,9 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file, /*****************************************************************************/ /* - * set up a mapping + * set up a mapping for shared memory segments */ int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) { - return 0; + return vma->vm_flags & VM_SHARED ? 0 : -ENOSYS; } diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h index 3132376..c2bb58e 100644 --- a/fs/ramfs/internal.h +++ b/fs/ramfs/internal.h @@ -10,6 +10,6 @@ */ -extern struct address_space_operations ramfs_aops; +extern const struct address_space_operations ramfs_aops; extern const struct file_operations ramfs_file_operations; extern struct inode_operations ramfs_file_inode_operations; diff --git a/fs/read_write.c b/fs/read_write.c index 5bc0e92..d4cb318 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -436,7 +436,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) return seg; } -EXPORT_SYMBOL(iov_shorten); +EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ /* A write operation does a read from user space and vice versa */ #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 909f71e..4a7dbde 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -3,7 +3,6 @@ */ /* Reiserfs block (de)allocator, bitmap-based. */ -#include <linux/config.h> #include <linux/time.h> #include <linux/reiserfs_fs.h> #include <linux/errno.h> diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 973c819..9aabcc0 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -2,7 +2,6 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ -#include <linux/config.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/fs.h> diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index b2264ba..fba304e 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -15,7 +15,6 @@ ** **/ -#include <linux/config.h> #include <asm/uaccess.h> #include <linux/time.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 752cea1..f318b58 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -860,8 +860,12 @@ static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_han // this sets the proper flags for O_SYNC to trigger a commit mark_inode_dirty(inode); reiserfs_write_unlock(inode->i_sb); - } else + } else { + reiserfs_write_lock(inode->i_sb); + reiserfs_update_inode_transaction(inode); mark_inode_dirty(inode); + reiserfs_write_unlock(inode->i_sb); + } sd_update = 1; } diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 5600d3d..6d0e554 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -34,7 +34,6 @@ ** **/ -#include <linux/config.h> #include <linux/time.h> #include <linux/string.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c index 6c5a726..de391a8 100644 --- a/fs/reiserfs/ibalance.c +++ b/fs/reiserfs/ibalance.c @@ -2,7 +2,6 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ -#include <linux/config.h> #include <asm/uaccess.h> #include <linux/string.h> #include <linux/time.h> diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9857e50..12dfdcf 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2,7 +2,6 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ -#include <linux/config.h> #include <linux/time.h> #include <linux/fs.h> #include <linux/reiserfs_fs.h> @@ -2933,6 +2932,11 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) } if (error) goto out; + /* + * file size is changed, ctime and mtime are + * to be updated + */ + attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); } } @@ -2996,7 +3000,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) return error; } -struct address_space_operations reiserfs_address_space_operations = { +const struct address_space_operations reiserfs_address_space_operations = { .writepage = reiserfs_writepage, .readpage = reiserfs_readpage, .readpages = reiserfs_readpages, diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 49d1a53..9b3672d 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -34,7 +34,6 @@ ** from within kupdate, it will ignore the immediate flag */ -#include <linux/config.h> #include <asm/uaccess.h> #include <asm/system.h> diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 2533c1f..281f806 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c @@ -2,7 +2,6 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ -#include <linux/config.h> #include <asm/uaccess.h> #include <linux/string.h> #include <linux/time.h> diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 284f785..c61710e 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -11,7 +11,6 @@ * NO WARRANTY */ -#include <linux/config.h> #include <linux/time.h> #include <linux/bitops.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index f62590a..65feba4 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c @@ -2,7 +2,6 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ -#include <linux/config.h> #include <linux/string.h> #include <linux/random.h> #include <linux/time.h> diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 27bd3a1..bc808a9 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -2,7 +2,6 @@ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README */ -#include <linux/config.h> #include <linux/time.h> #include <linux/fs.h> #include <linux/reiserfs_fs.h> diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 731688e..5d8a8cf 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -10,7 +10,6 @@ /* $Id: procfs.c,v 1.1.8.2 2001/07/15 17:08:42 god Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/time.h> #include <linux/seq_file.h> diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index d2b25e1..8b9b131 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -49,7 +49,6 @@ * reiserfs_insert_item */ -#include <linux/config.h> #include <linux/time.h> #include <linux/string.h> #include <linux/pagemap.h> diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 00f1321..5567328 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -11,7 +11,6 @@ * NO WARRANTY */ -#include <linux/config.h> #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/time.h> @@ -2204,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head tmp_bh, *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index 196e971..36f108f 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c @@ -2,7 +2,6 @@ * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details */ -#include <linux/config.h> #include <linux/time.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 283fbc6..22eed61 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -459,7 +459,7 @@ err_out: /* Mapping from our types to the kernel */ -static struct address_space_operations romfs_aops = { +static const struct address_space_operations romfs_aops = { .readpage = romfs_readpage }; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index ed9a24d..dae6704 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -306,7 +306,7 @@ static int smb_commit_write(struct file *file, struct page *page, return status; } -struct address_space_operations smb_file_aops = { +const struct address_space_operations smb_file_aops = { .readpage = smb_readpage, .writepage = smb_writepage, .prepare_write = smb_prepare_write, diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 506ff87..a1ed657 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -7,7 +7,6 @@ * Please add a note about your changes to smbfs in the ChangeLog file. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/time.h> #include <linux/kernel.h> diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h index 972ed7d..34fb462 100644 --- a/fs/smbfs/proto.h +++ b/fs/smbfs/proto.h @@ -63,7 +63,7 @@ extern int smb_revalidate_inode(struct dentry *dentry); extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); extern int smb_notify_change(struct dentry *dentry, struct iattr *attr); /* file.c */ -extern struct address_space_operations smb_file_aops; +extern const struct address_space_operations smb_file_aops; extern const struct file_operations smb_file_operations; extern struct inode_operations smb_file_inode_operations; /* ioctl.c */ diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 24577e2..e675404 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -5,7 +5,6 @@ * Copyright (C) 2001, Urban Widmark */ -#include <linux/config.h> #include <linux/sched.h> #include <linux/kernel.h> diff --git a/fs/splice.c b/fs/splice.c index 05fd278..684bca3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1307,6 +1307,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, } /* + * Make sure there's data to read. Wait for input if we can, otherwise + * return an appropriate error. + */ +static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +{ + int ret; + + /* + * Check ->nrbufs without the inode lock first. This function + * is speculative anyways, so missing one is ok. + */ + if (pipe->nrbufs) + return 0; + + ret = 0; + mutex_lock(&pipe->inode->i_mutex); + + while (!pipe->nrbufs) { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + if (!pipe->writers) + break; + if (!pipe->waiting_writers) { + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } + } + pipe_wait(pipe); + } + + mutex_unlock(&pipe->inode->i_mutex); + return ret; +} + +/* + * Make sure there's writeable room. Wait for room if we can, otherwise + * return an appropriate error. + */ +static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) +{ + int ret; + + /* + * Check ->nrbufs without the inode lock first. This function + * is speculative anyways, so missing one is ok. + */ + if (pipe->nrbufs < PIPE_BUFFERS) + return 0; + + ret = 0; + mutex_lock(&pipe->inode->i_mutex); + + while (pipe->nrbufs >= PIPE_BUFFERS) { + if (!pipe->readers) { + send_sig(SIGPIPE, current, 0); + ret = -EPIPE; + break; + } + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + pipe->waiting_writers++; + pipe_wait(pipe); + pipe->waiting_writers--; + } + + mutex_unlock(&pipe->inode->i_mutex); + return ret; +} + +/* * Link contents of ipipe to opipe. */ static int link_pipe(struct pipe_inode_info *ipipe, @@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; - int ret, do_wakeup, i, ipipe_first; - - ret = do_wakeup = ipipe_first = 0; + int ret = 0, i = 0, nbuf; /* * Potential ABBA deadlock, work around it by ordering lock @@ -1324,126 +1401,62 @@ static int link_pipe(struct pipe_inode_info *ipipe, * could deadlock (one doing tee from A -> B, the other from B -> A). */ if (ipipe->inode < opipe->inode) { - ipipe_first = 1; - mutex_lock(&ipipe->inode->i_mutex); - mutex_lock(&opipe->inode->i_mutex); + mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); } else { - mutex_lock(&opipe->inode->i_mutex); - mutex_lock(&ipipe->inode->i_mutex); + mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); } - for (i = 0;; i++) { + do { if (!opipe->readers) { send_sig(SIGPIPE, current, 0); if (!ret) ret = -EPIPE; break; } - if (ipipe->nrbufs - i) { - ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); - /* - * If we have room, fill this buffer - */ - if (opipe->nrbufs < PIPE_BUFFERS) { - int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); - - /* - * Get a reference to this pipe buffer, - * so we can copy the contents over. - */ - ibuf->ops->get(ipipe, ibuf); - - obuf = opipe->bufs + nbuf; - *obuf = *ibuf; - - /* - * Don't inherit the gift flag, we need to - * prevent multiple steals of this page. - */ - obuf->flags &= ~PIPE_BUF_FLAG_GIFT; - - if (obuf->len > len) - obuf->len = len; - - opipe->nrbufs++; - do_wakeup = 1; - ret += obuf->len; - len -= obuf->len; - - if (!len) - break; - if (opipe->nrbufs < PIPE_BUFFERS) - continue; - } - - /* - * We have input available, but no output room. - * If we already copied data, return that. If we - * need to drop the opipe lock, it must be ordered - * last to avoid deadlocks. - */ - if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { - if (!ret) - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } - if (do_wakeup) { - smp_mb(); - if (waitqueue_active(&opipe->wait)) - wake_up_interruptible(&opipe->wait); - kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); - do_wakeup = 0; - } + /* + * If we have iterated all input buffers or ran out of + * output room, break. + */ + if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) + break; - opipe->waiting_writers++; - pipe_wait(opipe); - opipe->waiting_writers--; - continue; - } + ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); + nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); /* - * No input buffers, do the usual checks for available - * writers and blocking and wait if necessary + * Get a reference to this pipe buffer, + * so we can copy the contents over. */ - if (!ipipe->writers) - break; - if (!ipipe->waiting_writers) { - if (ret) - break; - } + ibuf->ops->get(ipipe, ibuf); + + obuf = opipe->bufs + nbuf; + *obuf = *ibuf; + /* - * pipe_wait() drops the ipipe mutex. To avoid deadlocks - * with another process, we can only safely do that if - * the ipipe lock is ordered last. + * Don't inherit the gift flag, we need to + * prevent multiple steals of this page. */ - if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { - if (!ret) - ret = -EAGAIN; - break; - } - if (signal_pending(current)) { - if (!ret) - ret = -ERESTARTSYS; - break; - } + obuf->flags &= ~PIPE_BUF_FLAG_GIFT; - if (waitqueue_active(&ipipe->wait)) - wake_up_interruptible_sync(&ipipe->wait); - kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); + if (obuf->len > len) + obuf->len = len; - pipe_wait(ipipe); - } + opipe->nrbufs++; + ret += obuf->len; + len -= obuf->len; + i++; + } while (len); mutex_unlock(&ipipe->inode->i_mutex); mutex_unlock(&opipe->inode->i_mutex); - if (do_wakeup) { + /* + * If we put data in the output pipe, wakeup any potential readers. + */ + if (ret > 0) { smp_mb(); if (waitqueue_active(&opipe->wait)) wake_up_interruptible(&opipe->wait); @@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len, { struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; + int ret = -EINVAL; /* - * Link ipipe to the two output pipes, consuming as we go along. + * Duplicate the contents of ipipe to opipe without actually + * copying the data. */ - if (ipipe && opipe) - return link_pipe(ipipe, opipe, len, flags); + if (ipipe && opipe && ipipe != opipe) { + /* + * Keep going, unless we encounter an error. The ipipe/opipe + * ordering doesn't really matter. + */ + ret = link_ipipe_prep(ipipe, flags); + if (!ret) { + ret = link_opipe_prep(opipe, flags); + if (!ret) { + ret = link_pipe(ipipe, opipe, len, flags); + if (!ret && (flags & SPLICE_F_NONBLOCK)) + ret = -EAGAIN; + } + } + } - return -EINVAL; + return ret; } asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) @@ -4,7 +4,6 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/config.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/errno.h> @@ -20,7 +20,6 @@ * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 */ -#include <linux/config.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> @@ -54,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock); * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ -static struct super_block *alloc_super(void) +static struct super_block *alloc_super(struct file_system_type *type) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static struct super_operations default_op; @@ -73,6 +72,13 @@ static struct super_block *alloc_super(void) INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * The locking rules for s_lock are up to the + * filesystem. For example ext3fs has different + * lock ordering than usbfs: + */ + lockdep_set_class(&s->s_lock, &type->s_lock_key); down_write(&s->s_umount); s->s_count = S_BIAS; atomic_set(&s->s_active, 1); @@ -296,7 +302,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(); + s = alloc_super(type); if (!s) return ERR_PTR(-ENOMEM); goto retry; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index f0b347b..9889e54 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -16,7 +16,7 @@ extern struct super_block * sysfs_sb; -static struct address_space_operations sysfs_aops = { +static const struct address_space_operations sysfs_aops = { .readpage = simple_readpage, .prepare_write = simple_prepare_write, .commit_write = simple_commit_write @@ -109,6 +109,17 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) inode->i_ctime = iattr->ia_ctime; } + +/* + * sysfs has a different i_mutex lock order behavior for i_mutex than other + * filesystems; sysfs i_mutex is called in many places with subsystem locks + * held. At the same time, many of the VFS locking rules do not apply to + * sysfs at all (cross directory rename for example). To untangle this mess + * (which gives false positives in lockdep), we're giving sysfs inodes their + * own class for i_mutex. + */ +static struct lock_class_key sysfs_inode_imutex_key; + struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) { struct inode * inode = new_inode(sysfs_sb); @@ -118,6 +129,7 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) inode->i_mapping->a_ops = &sysfs_aops; inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; inode->i_op = &sysfs_inode_operations; + lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); if (sd->s_iattr) { /* sysfs_dirent has non-default attributes diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 86f5f8d..f2bcccd 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -465,7 +465,7 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,get_block); } -struct address_space_operations sysv_aops = { +const struct address_space_operations sysv_aops = { .readpage = sysv_readpage, .writepage = sysv_writepage, .sync_page = block_sync_page, diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 393a480..9dcc821 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -161,7 +161,7 @@ extern struct inode_operations sysv_dir_inode_operations; extern struct inode_operations sysv_fast_symlink_inode_operations; extern const struct file_operations sysv_file_operations; extern const struct file_operations sysv_dir_operations; -extern struct address_space_operations sysv_aops; +extern const struct address_space_operations sysv_aops; extern struct super_operations sysv_sops; extern struct dentry_operations sysv_dentry_operations; diff --git a/fs/udf/file.c b/fs/udf/file.c index e34b00e..a59e5f3 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -95,7 +95,7 @@ static int udf_adinicb_commit_write(struct file *file, struct page *page, unsign return 0; } -struct address_space_operations udf_adinicb_aops = { +const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .sync_page = block_sync_page, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 2983afd..605f511 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -132,7 +132,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping,block,udf_get_block); } -struct address_space_operations udf_aops = { +const struct address_space_operations udf_aops = { .readpage = udf_readpage, .writepage = udf_writepage, .sync_page = block_sync_page, diff --git a/fs/udf/super.c b/fs/udf/super.c index 44fe2cb..4df822c 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -40,7 +40,6 @@ #include "udfdecl.h" -#include <linux/config.h> #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/kernel.h> diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 674bb40..ba068a7 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -113,6 +113,6 @@ out: /* * symlinks can't do much... */ -struct address_space_operations udf_symlink_aops = { +const struct address_space_operations udf_symlink_aops = { .readpage = udf_symlink_filler, }; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 023e19b..1033b7c 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -6,7 +6,6 @@ #include "osta_udf.h" #include <linux/fs.h> -#include <linux/config.h> #include <linux/types.h> #include <linux/udf_fs_i.h> #include <linux/udf_fs_sb.h> @@ -47,9 +46,9 @@ extern struct inode_operations udf_dir_inode_operations; extern const struct file_operations udf_dir_operations; extern struct inode_operations udf_file_inode_operations; extern const struct file_operations udf_file_operations; -extern struct address_space_operations udf_aops; -extern struct address_space_operations udf_adinicb_aops; -extern struct address_space_operations udf_symlink_aops; +extern const struct address_space_operations udf_aops; +extern const struct address_space_operations udf_adinicb_aops; +extern const struct address_space_operations udf_symlink_aops; struct udf_fileident_bh { diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 95b878e..b01804b 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -217,48 +217,6 @@ failed: return; } -static struct page *ufs_get_locked_page(struct address_space *mapping, - unsigned long index) -{ - struct page *page; - -try_again: - page = find_lock_page(mapping, index); - if (!page) { - page = read_cache_page(mapping, index, - (filler_t*)mapping->a_ops->readpage, - NULL); - if (IS_ERR(page)) { - printk(KERN_ERR "ufs_change_blocknr: " - "read_cache_page error: ino %lu, index: %lu\n", - mapping->host->i_ino, index); - goto out; - } - - lock_page(page); - - if (!PageUptodate(page) || PageError(page)) { - unlock_page(page); - page_cache_release(page); - - printk(KERN_ERR "ufs_change_blocknr: " - "can not read page: ino %lu, index: %lu\n", - mapping->host->i_ino, index); - - page = ERR_PTR(-EIO); - goto out; - } - } - - if (unlikely(!page->mapping || !page_has_buffers(page))) { - unlock_page(page); - page_cache_release(page); - goto try_again;/*we really need these buffers*/ - } -out: - return page; -} - /* * Modify inode page cache in such way: * have - blocks with b_blocknr equal to oldb...oldb+count-1 @@ -311,10 +269,8 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk, set_page_dirty(page); - if (likely(cur_index != index)) { - unlock_page(page); - page_cache_release(page); - } + if (likely(cur_index != index)) + ufs_put_locked_page(page); } UFSD("EXIT\n"); } diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 0e50015..a9c6e5f 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -60,7 +60,3 @@ const struct file_operations ufs_file_operations = { .fsync = ufs_sync_file, .sendfile = generic_file_sendfile, }; - -struct inode_operations ufs_file_inode_operations = { - .truncate = ufs_truncate, -}; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 259bd19..e7c8615 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -98,7 +98,9 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag) u64 temp = 0L; UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth); - UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",uspi->s_fpbshift,uspi->s_apbmask,mask); + UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n", + uspi->s_fpbshift, uspi->s_apbmask, + (unsigned long long)mask); if (depth == 0) return 0; @@ -429,7 +431,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head if (!create) { phys64 = ufs_frag_map(inode, fragment); - UFSD("phys64 = %llu \n",phys64); + UFSD("phys64 = %llu\n", (unsigned long long)phys64); if (phys64) map_bh(bh_result, sb, phys64); return 0; @@ -574,7 +576,7 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,ufs_getfrag_block); } -struct address_space_operations ufs_aops = { +const struct address_space_operations ufs_aops = { .readpage = ufs_readpage, .writepage = ufs_writepage, .sync_page = block_sync_page, @@ -841,14 +843,17 @@ int ufs_sync_inode (struct inode *inode) void ufs_delete_inode (struct inode * inode) { + loff_t old_i_size; + truncate_inode_pages(&inode->i_data, 0); /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ lock_kernel(); mark_inode_dirty(inode); ufs_update_inode(inode, IS_SYNC(inode)); + old_i_size = inode->i_size; inode->i_size = 0; - if (inode->i_blocks) - ufs_truncate (inode); + if (inode->i_blocks && ufs_truncate(inode, old_i_size)) + ufs_warning(inode->i_sb, __FUNCTION__, "ufs_truncate failed\n"); ufs_free_inode (inode); unlock_kernel(); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 74ef5e9..992ee0b 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -64,7 +64,6 @@ */ -#include <linux/config.h> #include <linux/module.h> #include <linux/bitops.h> @@ -1327,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 3c3b301..c9b5587 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -369,24 +369,97 @@ static int ufs_trunc_tindirect (struct inode * inode) UFSD("EXIT\n"); return retry; } - -void ufs_truncate (struct inode * inode) + +static int ufs_alloc_lastblock(struct inode *inode) { + int err = 0; + struct address_space *mapping = inode->i_mapping; + struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; struct ufs_inode_info *ufsi = UFS_I(inode); - struct super_block * sb; - struct ufs_sb_private_info * uspi; - int retry; + unsigned lastfrag, i, end; + struct page *lastpage; + struct buffer_head *bh; + + lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; + + if (!lastfrag) { + ufsi->i_lastfrag = 0; + goto out; + } + lastfrag--; + + lastpage = ufs_get_locked_page(mapping, lastfrag >> + (PAGE_CACHE_SHIFT - inode->i_blkbits)); + if (IS_ERR(lastpage)) { + err = -EIO; + goto out; + } + + end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1); + bh = page_buffers(lastpage); + for (i = 0; i < end; ++i) + bh = bh->b_this_page; + + if (!buffer_mapped(bh)) { + err = ufs_getfrag_block(inode, lastfrag, bh, 1); + + if (unlikely(err)) + goto out_unlock; + + if (buffer_new(bh)) { + clear_buffer_new(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + /* + * we do not zeroize fragment, because of + * if it maped to hole, it already contains zeroes + */ + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + set_page_dirty(lastpage); + } + } +out_unlock: + ufs_put_locked_page(lastpage); +out: + return err; +} + +int ufs_truncate(struct inode *inode, loff_t old_i_size) +{ + struct ufs_inode_info *ufsi = UFS_I(inode); + struct super_block *sb = inode->i_sb; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; + int retry, err = 0; UFSD("ENTER\n"); - sb = inode->i_sb; - uspi = UFS_SB(sb)->s_uspi; - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) - return; + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; + return -EPERM; + + if (inode->i_size > old_i_size) { + /* + * if we expand file we should care about + * allocation of block for last byte first of all + */ + err = ufs_alloc_lastblock(inode); + + if (err) { + i_size_write(inode, old_i_size); + goto out; + } + /* + * go away, because of we expand file, and we do not + * need free blocks, and zeroizes page + */ + lock_kernel(); + goto almost_end; + } - block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); + block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); lock_kernel(); while (1) { @@ -404,9 +477,58 @@ void ufs_truncate (struct inode * inode) yield(); } + if (inode->i_size < old_i_size) { + /* + * now we should have enough space + * to allocate block for last byte + */ + err = ufs_alloc_lastblock(inode); + if (err) + /* + * looks like all the same - we have no space, + * but we truncate file already + */ + inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize; + } +almost_end: inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; - ufsi->i_lastfrag = DIRECT_FRAGMENT; unlock_kernel(); mark_inode_dirty(inode); - UFSD("EXIT\n"); +out: + UFSD("EXIT: err %d\n", err); + return err; } + + +/* + * We don't define our `inode->i_op->truncate', and call it here, + * because of: + * - there is no way to know old size + * - there is no way inform user about error, if it happens in `truncate' + */ +static int ufs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + unsigned int ia_valid = attr->ia_valid; + int error; + + error = inode_change_ok(inode, attr); + if (error) + return error; + + if (ia_valid & ATTR_SIZE && + attr->ia_size != i_size_read(inode)) { + loff_t old_i_size = inode->i_size; + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + error = ufs_truncate(inode, old_i_size); + if (error) + return error; + } + return inode_setattr(inode, attr); +} + +struct inode_operations ufs_file_inode_operations = { + .setattr = ufs_setattr, +}; diff --git a/fs/ufs/util.c b/fs/ufs/util.c index a2f13f4..337cf2c 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -233,3 +233,57 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev else ufsi->i_u1.i_data[0] = fs32; } + +/** + * ufs_get_locked_page() - locate, pin and lock a pagecache page, if not exist + * read it from disk. + * @mapping: the address_space to search + * @index: the page index + * + * Locates the desired pagecache page, if not exist we'll read it, + * locks it, increments its reference + * count and returns its address. + * + */ + +struct page *ufs_get_locked_page(struct address_space *mapping, + pgoff_t index) +{ + struct page *page; + +try_again: + page = find_lock_page(mapping, index); + if (!page) { + page = read_cache_page(mapping, index, + (filler_t*)mapping->a_ops->readpage, + NULL); + if (IS_ERR(page)) { + printk(KERN_ERR "ufs_change_blocknr: " + "read_cache_page error: ino %lu, index: %lu\n", + mapping->host->i_ino, index); + goto out; + } + + lock_page(page); + + if (!PageUptodate(page) || PageError(page)) { + unlock_page(page); + page_cache_release(page); + + printk(KERN_ERR "ufs_change_blocknr: " + "can not read page: ino %lu, index: %lu\n", + mapping->host->i_ino, index); + + page = ERR_PTR(-EIO); + goto out; + } + } + + if (unlikely(!page->mapping || !page_has_buffers(page))) { + unlock_page(page); + page_cache_release(page); + goto try_again;/*we really need these buffers*/ + } +out: + return page; +} diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 406981f..28fce6c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -251,6 +251,14 @@ extern void _ubh_ubhcpymem_(struct ufs_sb_private_info *, unsigned char *, struc #define ubh_memcpyubh(ubh,mem,size) _ubh_memcpyubh_(uspi,ubh,mem,size) extern void _ubh_memcpyubh_(struct ufs_sb_private_info *, struct ufs_buffer_head *, unsigned char *, unsigned); +/* This functions works with cache pages*/ +extern struct page *ufs_get_locked_page(struct address_space *mapping, + pgoff_t index); +static inline void ufs_put_locked_page(struct page *page) +{ + unlock_page(page); + page_cache_release(page); +} /* diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 3e807b8..c40f81b 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1454,7 +1454,7 @@ xfs_vm_invalidatepage( block_invalidatepage(page, offset); } -struct address_space_operations xfs_address_space_operations = { +const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 706d8c7..2244e51 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -40,7 +40,7 @@ typedef struct xfs_ioend { struct work_struct io_work; /* xfsdatad work queue */ } xfs_ioend_t; -extern struct address_space_operations xfs_address_space_operations; +extern const struct address_space_operations xfs_address_space_operations; extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 26fed07..2af528d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1520,7 +1520,7 @@ xfs_mapping_buftarg( struct backing_dev_info *bdi; struct inode *inode; struct address_space *mapping; - static struct address_space_operations mapping_aops = { + static const struct address_space_operations mapping_aops = { .sync_page = block_sync_page, .migratepage = fail_migrate_page, }; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 4dd6592..ceda3a2 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -18,7 +18,6 @@ #ifndef __XFS_BUF_H__ #define __XFS_BUF_H__ -#include <linux/config.h> #include <linux/list.h> #include <linux/types.h> #include <linux/spinlock.h> diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 601f01c..270db0f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -15,7 +15,6 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <linux/config.h> #include <linux/compat.h> #include <linux/init.h> #include <linux/ioctl.h> diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 12810ba..d918002 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -419,16 +419,15 @@ xfs_vn_link( int error; ip = old_dentry->d_inode; /* inode being linked to */ - if (S_ISDIR(ip->i_mode)) - return -EPERM; - tdvp = vn_from_inode(dir); vp = vn_from_inode(ip); + VN_HOLD(vp); error = bhv_vop_link(tdvp, vp, dentry, NULL); - if (likely(!error)) { + if (unlikely(error)) { + VN_RELE(vp); + } else { VMODIFY(tdvp); - VN_HOLD(vp); xfs_validate_fields(ip, &vattr); d_instantiate(dentry, ip); } diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index aa26ab9..a13f75c 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -19,7 +19,6 @@ #define __XFS_LINUX__ #include <linux/types.h> -#include <linux/config.h> /* * Some types are conditional depending on the target system. @@ -140,9 +139,7 @@ BUFFER_FNS(PrivateStart, unwritten); #define current_pid() (current->pid) #define current_fsuid(cred) (current->fsuid) #define current_fsgid(cred) (current->fsgid) -#define current_set_flags(f) (current->flags |= (f)) #define current_test_flags(f) (current->flags & (f)) -#define current_clear_flags(f) (current->flags & ~(f)) #define current_set_flags_nested(sp, f) \ (*(sp) = current->flags, current->flags |= (f)) #define current_clear_flags_nested(sp, f) \ @@ -218,7 +215,6 @@ BUFFER_FNS(PrivateStart, unwritten); #define MIN(a,b) (min(a,b)) #define MAX(a,b) (max(a,b)) #define howmany(x, y) (((x)+((y)-1))/(y)) -#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* * Various platform dependent calls that don't fit anywhere else diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 35c6a01..c42b322 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -93,7 +93,7 @@ typedef enum { */ static inline struct bhv_vnode *vn_from_inode(struct inode *inode) { - return (bhv_vnode_t *)list_entry(inode, bhv_vnode_t, v_inode); + return container_of(inode, bhv_vnode_t, v_inode); } static inline struct inode *vn_to_inode(struct bhv_vnode *vnode) { diff --git a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h index 1d8ff10..6e6e56f 100644 --- a/fs/xfs/xfs_behavior.h +++ b/fs/xfs/xfs_behavior.h @@ -78,15 +78,12 @@ * */ -struct bhv_head_lock; - /* * Behavior head. Head of the chain of behaviors. * Contained within each virtualized object data structure. */ typedef struct bhv_head { struct bhv_desc *bh_first; /* first behavior in chain */ - struct bhv_head_lock *bh_lockp; /* pointer to lock info struct */ } bhv_head_t; /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5fa0adb..86c1bf0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1961,9 +1961,9 @@ xfs_iunlink_remove( xfs_agino_t agino; xfs_agino_t next_agino; xfs_buf_t *last_ibp; - xfs_dinode_t *last_dip; + xfs_dinode_t *last_dip = NULL; short bucket_index; - int offset, last_offset; + int offset, last_offset = 0; int error; int agi_ok; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index d8f5d4c..e730328 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1740,10 +1740,10 @@ xlog_write(xfs_mount_t * mp, xlog_in_core_t **commit_iclog, uint flags) { - xlog_t *log = mp->m_log; + xlog_t *log = mp->m_log; xlog_ticket_t *ticket = (xlog_ticket_t *)tic; + xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ xlog_op_header_t *logop_head; /* ptr to log operation header */ - xlog_in_core_t *iclog; /* ptr to current in-core log */ __psint_t ptr; /* copy address into data region */ int len; /* # xlog_write() bytes 2 still copy */ int index; /* region index currently copying */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 55b4237..3cb678e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -990,6 +990,8 @@ xlog_find_zeroed( xfs_daddr_t num_scan_bblks; int error, log_bbnum = log->l_logBBsize; + *blk_no = 0; + /* check totally zeroed log */ bp = xlog_get_bp(log, 1); if (!bp) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ed7579b..4be5c0b 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -2028,7 +2028,7 @@ xfs_icsb_balance_counter( xfs_sb_field_t field, int flags) { - uint64_t count, resid = 0; + uint64_t count, resid; int weight = num_online_cpus(); int s; @@ -2060,6 +2060,7 @@ xfs_icsb_balance_counter( break; default: BUG(); + count = resid = 0; /* quiet, gcc */ break; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 0c1e42b..5a0b678 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1929,7 +1929,7 @@ xfs_growfs_rt( /* * Initial error checking. */ - if (mp->m_rtdev_targp || mp->m_rbmip == NULL || + if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || (nrblocks = in->newblocks) <= sbp->sb_rblocks || (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) return XFS_ERROR(EINVAL); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index cb65c3a..9dc88b3 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -338,8 +338,6 @@ typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); typedef struct xfs_trans { unsigned int t_magic; /* magic number */ xfs_log_callback_t t_logcb; /* log callback struct */ - struct xfs_trans *t_forw; /* async list pointers */ - struct xfs_trans *t_back; /* async list pointers */ unsigned int t_type; /* transaction type */ unsigned int t_log_res; /* amt of log space resvd */ unsigned int t_log_count; /* count for perm log res */ @@ -364,9 +362,11 @@ typedef struct xfs_trans { long t_res_fdblocks_delta; /* on-disk only chg */ long t_frextents_delta;/* superblock freextents chg*/ long t_res_frextents_delta; /* on-disk only chg */ +#ifdef DEBUG long t_ag_freeblks_delta; /* debugging counter */ long t_ag_flist_delta; /* debugging counter */ long t_ag_btree_delta; /* debugging counter */ +#endif long t_dblocks_delta;/* superblock dblocks change */ long t_agcount_delta;/* superblock agcount change */ long t_imaxpct_delta;/* superblock imaxpct change */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 00a6b7d..23cfa58 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2603,8 +2603,7 @@ xfs_link( vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address); target_namelen = VNAMELEN(dentry); - if (VN_ISDIR(src_vp)) - return XFS_ERROR(EPERM); + ASSERT(!VN_ISDIR(src_vp)); sip = xfs_vtoi(src_vp); tdp = XFS_BHVTOI(target_dir_bdp); @@ -2699,9 +2698,8 @@ xfs_link( xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); - if (error) { + if (error) goto abort_return; - } /* * If this is a synchronous mount, make sure that the @@ -2719,9 +2717,8 @@ xfs_link( } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); - if (error) { + if (error) goto std_return; - } /* Fall through to std_return with error = 0. */ std_return: @@ -2742,6 +2739,8 @@ std_return: xfs_trans_cancel(tp, cancel_flags); goto std_return; } + + /* * xfs_mkdir * |