From a1de02dccf906faba2ee2d99cac56799bda3b96a Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 4 Feb 2010 23:58:38 -0500 Subject: ext4: fix async i/o writes beyond 4GB to a sparse file The "offset" member in ext4_io_end holds bytes, not blocks, so ext4_lblk_t is wrong - and too small (u32). This caused the async i/o writes to sparse files beyond 4GB to fail when they wrapped around to 0. Also fix up the type of arguments to ext4_convert_unwritten_extents(), it gets ssize_t from ext4_end_aio_dio_nolock() and ext4_ext_direct_IO(). Reported-by: Giel de Nijs Signed-off-by: Eric Sandeen --- fs/ext4/ext4.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 874d169..602d5ad 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -139,8 +139,8 @@ typedef struct ext4_io_end { struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ int error; /* I/O error code */ - ext4_lblk_t offset; /* offset in the file */ - size_t size; /* size of the extent */ + loff_t offset; /* offset in the file */ + ssize_t size; /* size of the extent */ struct work_struct work; /* data work queue */ } ext4_io_end_t; @@ -1744,7 +1744,7 @@ extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - loff_t len); + ssize_t len); extern int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, unsigned int max_blocks, struct buffer_head *bh, int flags); -- cgit v1.1 From 1f2acb6017d8528135ec3b01ab7cd2be6ea0630b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 22 Jan 2010 17:40:42 -0500 Subject: ext4: Add block validity check when truncating indirect block mapped inodes Add checks to ext4_free_branches() to make sure a block number found in an indirect block are valid before trying to free it. If a bad block number is found, stop freeing the indirect block immediately, since the file system is corrupt and we will need to run fsck anyway. This also avoids spamming the logs, and specifically avoids driver-level "attempt to access beyond end of device" errors obscure what is really going on. If you get *really*, *really*, *really* unlucky, without this patch, a supposed indirect block containing garbage might contain a reference to a primary block group descriptor, in which case ext4_free_branches() could end up zero'ing out a block group descriptor block, and if then one of the block bitmaps for a block group described by that bg descriptor block is not in memory, and is read in by ext4_read_block_bitmap(). This function calls ext4_valid_block_bitmap(), which assumes that bg_inode_table() was validated at mount time and hasn't been modified since. Since this assumption is no longer valid, it's possible for the value (ext4_inode_table(sb, desc) - group_first_block) to go negative, which will cause ext4_find_next_zero_bit() to trigger a kernel GPF. Addresses-Google-Bug: #2220436 Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 602d5ad..307ecd1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -377,6 +377,7 @@ struct ext4_new_group_data { */ #define EXT4_FREE_BLOCKS_METADATA 0x0001 #define EXT4_FREE_BLOCKS_FORGET 0x0002 +#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 /* * ioctl commands -- cgit v1.1 From 19f5fb7ad679bb361222c7916086435020c37cce Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 24 Jan 2010 14:34:07 -0500 Subject: ext4: Use bitops to read/modify EXT4_I(inode)->i_state At several places we modify EXT4_I(inode)->i_state without holding i_mutex (ext4_release_file, ext4_bmap, ext4_journalled_writepage, ext4_do_update_inode, ...). These modifications are racy and we can lose updates to i_state. So convert handling of i_state to use bitops which are atomic. Cc: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 307ecd1..ffe1334 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -313,17 +313,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) return flags & EXT4_OTHER_FLMASK; } -/* - * Inode dynamic state flags - */ -#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ -#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ -#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ -#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ -#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ -#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ -#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/ - /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { __u32 group; /* Group number for this data */ @@ -631,7 +620,7 @@ struct ext4_inode_info { * near to their parent directory's inode. */ ext4_group_t i_block_group; - __u32 i_state; /* Dynamic state flags for ext4 */ + unsigned long i_state_flags; /* Dynamic state flags */ ext4_lblk_t i_dir_start_lookup; #ifdef CONFIG_EXT4_FS_XATTR @@ -1051,6 +1040,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) (ino >= EXT4_FIRST_INO(sb) && ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } + +/* + * Inode dynamic state flags + */ +enum { + EXT4_STATE_JDATA, /* journaled data exists */ + EXT4_STATE_NEW, /* inode is newly created */ + EXT4_STATE_XATTR, /* has in-inode xattrs */ + EXT4_STATE_NO_EXPAND, /* No space for expansion */ + EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ + EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ + EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ +}; + +static inline int ext4_test_inode_state(struct inode *inode, int bit) +{ + return test_bit(bit, &EXT4_I(inode)->i_state_flags); +} + +static inline void ext4_set_inode_state(struct inode *inode, int bit) +{ + set_bit(bit, &EXT4_I(inode)->i_state_flags); +} + +static inline void ext4_clear_inode_state(struct inode *inode, int bit) +{ + clear_bit(bit, &EXT4_I(inode)->i_state_flags); +} #else /* Assume that user mode programs are passing in an ext4fs superblock, not * a kernel struct super_block. This will allow us to call the feature-test -- cgit v1.1 From f710b4b96ba292dfed2153afc47e9063b0abfd89 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 25 Jan 2010 03:31:32 -0500 Subject: ext4: Reserve INCOMPAT_EA_INODE and INCOMPAT_DIRDATA feature codepoints Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ffe1334..61cf3b3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -284,6 +284,7 @@ struct flex_groups { #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ @@ -1144,6 +1145,8 @@ static inline void ext4_clear_inode_state(struct inode *inode, int bit) #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 +#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ +#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ -- cgit v1.1 From 12062dddda450976b129dcb1bacd91acaf4d8030 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 15 Feb 2010 14:19:27 -0500 Subject: ext4: move __func__ into a macro for ext4_warning, ext4_error Just a pet peeve of mine; we had a mishash of calls with either __func__ or "function_name" and the latter tends to get out of sync. I think it's easier to just hide the __func__ in a macro, and it'll be consistent from then on. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 61cf3b3..509437f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1486,13 +1486,16 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ -extern void ext4_error(struct super_block *, const char *, const char *, ...) +extern void __ext4_error(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); +#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) extern void __ext4_std_error(struct super_block *, const char *, int); extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ext4_warning(struct super_block *, const char *, const char *, ...) +extern void __ext4_warning(struct super_block *, const char *, + const char *, ...) __attribute__ ((format (printf, 3, 4))); +#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) extern void ext4_msg(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, -- cgit v1.1 From c8d46e41bc744c8fa0092112af3942fcd46c8b18 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Wed, 24 Feb 2010 09:52:53 -0500 Subject: ext4: Add flag to files with blocks intentionally past EOF fallocate() may potentially instantiate blocks past EOF, depending on the flags used when it is called. e2fsck currently has a test for blocks past i_size, and it sometimes trips up - noticeably on xfstests 013 which runs fsstress. This patch from Jiayang does fix it up - it (along with e2fsprogs updates and other patches recently from Aneesh) has survived many fsstress runs in a row. Signed-off-by: Eric Sandeen Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 509437f..74664ca 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -285,10 +285,11 @@ struct flex_groups { #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ +#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */ /* Flags that should be inherited by new inodes from their parent. */ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ -- cgit v1.1 From c7064ef13b2181a489836349f9baf87df0dab28f Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Tue, 2 Mar 2010 13:28:44 -0500 Subject: ext4: mechanical rename some of the direct I/O get_block's identifiers This commit renames some of the direct I/O's block allocation flags, variables, and functions introduced in Mingming's "Direct IO for holes and fallocate" patches so that they can be used by ext4's buffered write path as well. Also changed the related function comments accordingly to cover both direct write and buffered write cases. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 74664ca..c831a58 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -133,7 +133,7 @@ struct mpage_da_data { int pages_written; int retval; }; -#define DIO_AIO_UNWRITTEN 0x1 +#define EXT4_IO_UNWRITTEN 0x1 typedef struct ext4_io_end { struct list_head list; /* per-file finished AIO list */ struct inode *inode; /* file being written to */ @@ -355,13 +355,13 @@ struct ext4_new_group_data { /* caller is from the direct IO path, request to creation of an unitialized extents if not allocated, split the uninitialized extent if blocks has been preallocated already*/ -#define EXT4_GET_BLOCKS_DIO 0x0008 +#define EXT4_GET_BLOCKS_PRE_IO 0x0008 #define EXT4_GET_BLOCKS_CONVERT 0x0010 -#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ +#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) - /* Convert extent to initialized after direct IO complete */ -#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ - EXT4_GET_BLOCKS_DIO_CREATE_EXT) + /* Convert extent to initialized after IO complete */ +#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ + EXT4_GET_BLOCKS_IO_CREATE_EXT) /* * Flags used by ext4_free_blocks @@ -700,8 +700,8 @@ struct ext4_inode_info { qsize_t i_reserved_quota; #endif - /* completed async DIOs that might need unwritten extents handling */ - struct list_head i_aio_dio_complete_list; + /* completed IOs that might need unwritten extents handling */ + struct list_head i_completed_io_list; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; @@ -1461,7 +1461,7 @@ extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); -extern int flush_aio_dio_completed_IO(struct inode *inode); +extern int flush_completed_IO(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); /* ioctl.c */ -- cgit v1.1 From 744692dc059845b2a3022119871846e74d4f6e11 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Thu, 4 Mar 2010 16:14:02 -0500 Subject: ext4: use ext4_get_block_write in buffer write Allocate uninitialized extent before ext4 buffer write and convert the extent to initialized after io completes. The purpose is to make sure an extent can only be marked initialized after it has been written with new data so we can safely drop the i_mutex lock in ext4 DIO read without exposing stale data. This helps to improve multi-thread DIO read performance on high-speed disks. Skip the nobh and data=journal mount cases to make things simple for now. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c831a58..dee4580 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -138,7 +138,7 @@ typedef struct ext4_io_end { struct list_head list; /* per-file finished AIO list */ struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ - int error; /* I/O error code */ + struct page *page; /* page struct for buffer write */ loff_t offset; /* offset in the file */ ssize_t size; /* size of the extent */ struct work_struct work; /* data work queue */ @@ -361,7 +361,7 @@ struct ext4_new_group_data { EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* Convert extent to initialized after IO complete */ #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ - EXT4_GET_BLOCKS_IO_CREATE_EXT) + EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* * Flags used by ext4_free_blocks @@ -702,6 +702,7 @@ struct ext4_inode_info { /* completed IOs that might need unwritten extents handling */ struct list_head i_completed_io_list; + spinlock_t i_completed_io_lock; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; @@ -752,6 +753,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ @@ -1781,6 +1783,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 len, __u64 *moved_len); +/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ +enum ext4_state_bits { + BH_Uninit /* blocks are allocated but uninitialized on disk */ + = BH_JBDPrivateStart, +}; + +BUFFER_FNS(Uninit, uninit) +TAS_BUFFER_FNS(Uninit, uninit) + /* * Add new method to test wether block and inode bitmaps are properly * initialized. With uninit_bg reading the block from disk is not enough -- cgit v1.1 From 273df556b6ee2065bfe96edab5888d3dc9b108d8 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Tue, 2 Mar 2010 11:46:09 -0500 Subject: ext4: Convert BUG_ON checks to use ext4_error() instead Convert a bunch of BUG_ONs to emit a ext4_error() message and return EIO. This is a first pass and most notably does _not_ cover mballoc.c, which is a morass of void functions. Signed-off-by: Frank Mayhar Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index dee4580..3d85bbb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -53,6 +53,12 @@ #define ext4_debug(f, a...) do {} while (0) #endif +#define EXT4_ERROR_INODE(inode, fmt, a...) \ + ext4_error_inode(__func__, (inode), (fmt), ## a); + +#define EXT4_ERROR_FILE(file, fmt, a...) \ + ext4_error_file(__func__, (file), (fmt), ## a); + /* data type for block offset of block group */ typedef int ext4_grpblk_t; @@ -1492,6 +1498,10 @@ extern int ext4_group_extend(struct super_block *sb, extern void __ext4_error(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); #define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) +extern void ext4_error_inode(const char *, struct inode *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_error_file(const char *, struct file *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); extern void __ext4_std_error(struct super_block *, const char *, int); extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -- cgit v1.1 From 731eb1a03a8445cde2cb23ecfb3580c6fa7bb690 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 3 Mar 2010 23:55:01 -0500 Subject: ext4: consolidate in_range() definitions There are duplicate macro definitions of in_range() in mballoc.h and balloc.c. This consolidates these two definitions into ext4.h, and changes extents.c to use in_range() as well. Signed-off-by: Akinobu Mita Signed-off-by: "Theodore Ts'o" Cc: Andreas Dilger --- fs/ext4/ext4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3d85bbb..9b17916 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1819,6 +1819,8 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); } +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ -- cgit v1.1