From 3a85cd96d3ab3c6dcf88b81fc6eaddb84e565a43 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jan 2010 01:33:55 +0000 Subject: xfs: add tracing to xfs_swap_extents To be able to diagnose whether the swap extents function is detecting compatible inode data fork configurations for swapping extents, add tracing points to the code to allow us to see the format of the inode forks before and after the swap. Signed-off-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_trace.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_dfrag.c | 5 +++++ 2 files changed, 58 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index c22a608..3353aef 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -1414,6 +1414,59 @@ TRACE_EVENT(xfs_dir2_leafn_moveents, __entry->count) ); +#define XFS_SWAPEXT_INODES \ + { 0, "target" }, \ + { 1, "temp" } + +#define XFS_INODE_FORMAT_STR \ + { 0, "invalid" }, \ + { 1, "local" }, \ + { 2, "extent" }, \ + { 3, "btree" } + +DECLARE_EVENT_CLASS(xfs_swap_extent_class, + TP_PROTO(struct xfs_inode *ip, int which), + TP_ARGS(ip, which), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, which) + __field(xfs_ino_t, ino) + __field(int, format) + __field(int, nex) + __field(int, max_nex) + __field(int, broot_size) + __field(int, fork_off) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->which = which; + __entry->ino = ip->i_ino; + __entry->format = ip->i_d.di_format; + __entry->nex = ip->i_d.di_nextents; + __entry->max_nex = ip->i_df.if_ext_max; + __entry->broot_size = ip->i_df.if_broot_bytes; + __entry->fork_off = XFS_IFORK_BOFF(ip); + ), + TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " + "Max in-fork extents %d, broot size %d, fork offset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), + __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), + __entry->nex, + __entry->max_nex, + __entry->broot_size, + __entry->fork_off) +) + +#define DEFINE_SWAPEXT_EVENT(name) \ +DEFINE_EVENT(xfs_swap_extent_class, name, \ + TP_PROTO(struct xfs_inode *ip, int which), \ + TP_ARGS(ip, which)) + +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 84ca1cf..f25e540 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -254,6 +254,9 @@ xfs_swap_extents( goto out_unlock; } + trace_xfs_swap_extent_before(ip, 0); + trace_xfs_swap_extent_before(tip, 1); + /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { @@ -421,6 +424,8 @@ xfs_swap_extents( error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); + trace_xfs_swap_extent_after(ip, 0); + trace_xfs_swap_extent_after(tip, 1); out: kmem_free(tempifp); return error; -- cgit v1.1 From 6bded0f383fd7971b76ad6c194dda7d5b814b871 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jan 2010 01:33:56 +0000 Subject: xfs: clean up inconsistent variable naming in xfs_swap_extent The swap extent ioctl passes in a target inode and a temporary inode which are clearly named in the ioctl structure. The code then assigns temp to target and vice versa, making it extremely difficult to work out which inode is which later in the code. Make this consistent throughout the code. Also make xfs_swap_extent static as there are no external users of the function. Signed-off-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/xfs_dfrag.c | 38 ++++++++++++++++++++++---------------- fs/xfs/xfs_dfrag.h | 3 --- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index f25e540..cd27c9d 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -45,15 +45,21 @@ #include "xfs_vnodeops.h" #include "xfs_trace.h" + +static int xfs_swap_extents( + xfs_inode_t *ip, /* target inode */ + xfs_inode_t *tip, /* tmp inode */ + xfs_swapext_t *sxp); + /* - * Syssgi interface for swapext + * ioctl interface for swapext */ int xfs_swapext( xfs_swapext_t *sxp) { xfs_inode_t *ip, *tip; - struct file *file, *target_file; + struct file *file, *tmp_file; int error = 0; /* Pull information for the target fd */ @@ -68,46 +74,46 @@ xfs_swapext( goto out_put_file; } - target_file = fget((int)sxp->sx_fdtmp); - if (!target_file) { + tmp_file = fget((int)sxp->sx_fdtmp); + if (!tmp_file) { error = XFS_ERROR(EINVAL); goto out_put_file; } - if (!(target_file->f_mode & FMODE_WRITE) || - (target_file->f_flags & O_APPEND)) { + if (!(tmp_file->f_mode & FMODE_WRITE) || + (tmp_file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); - goto out_put_target_file; + goto out_put_tmp_file; } if (IS_SWAPFILE(file->f_path.dentry->d_inode) || - IS_SWAPFILE(target_file->f_path.dentry->d_inode)) { + IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { error = XFS_ERROR(EINVAL); - goto out_put_target_file; + goto out_put_tmp_file; } ip = XFS_I(file->f_path.dentry->d_inode); - tip = XFS_I(target_file->f_path.dentry->d_inode); + tip = XFS_I(tmp_file->f_path.dentry->d_inode); if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); - goto out_put_target_file; + goto out_put_tmp_file; } if (ip->i_ino == tip->i_ino) { error = XFS_ERROR(EINVAL); - goto out_put_target_file; + goto out_put_tmp_file; } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { error = XFS_ERROR(EIO); - goto out_put_target_file; + goto out_put_tmp_file; } error = xfs_swap_extents(ip, tip, sxp); - out_put_target_file: - fput(target_file); + out_put_tmp_file: + fput(tmp_file); out_put_file: fput(file); out: @@ -186,7 +192,7 @@ xfs_swap_extents_check_format( return 0; } -int +static int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h index 4f55a63..20bdd93 100644 --- a/fs/xfs/xfs_dfrag.h +++ b/fs/xfs/xfs_dfrag.h @@ -48,9 +48,6 @@ typedef struct xfs_swapext */ int xfs_swapext(struct xfs_swapext *sx); -int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, - struct xfs_swapext *sxp); - #endif /* __KERNEL__ */ #endif /* __XFS_DFRAG_H__ */ -- cgit v1.1 From 5d77c0dc0c05c2c65aee16149fae06831a118730 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 19 Nov 2009 15:52:00 +0000 Subject: xfs: make several more functions static Just minor housekeeping, a lot more functions can be trivially made static; others could if we reordered things a bit... Signed-off-by: Eric Sandeen Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 2 +- fs/xfs/linux-2.6/xfs_sync.h | 1 - fs/xfs/xfs_attr.c | 2 +- fs/xfs/xfs_attr.h | 1 - fs/xfs/xfs_bmap_btree.c | 2 +- fs/xfs/xfs_bmap_btree.h | 1 - fs/xfs/xfs_dir2_node.c | 2 +- fs/xfs/xfs_dir2_node.h | 2 -- fs/xfs/xfs_log_priv.h | 5 ----- fs/xfs/xfs_log_recover.c | 6 +++--- 10 files changed, 7 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 1f5e4bb..0f90bfe 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -351,7 +351,7 @@ xfs_commit_dummy_trans( return error; } -int +STATIC int xfs_sync_fsdata( struct xfs_mount *mp, int flags) diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index ea932b4..d480c34 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -37,7 +37,6 @@ void xfs_syncd_stop(struct xfs_mount *mp); int xfs_sync_attr(struct xfs_mount *mp, int flags); int xfs_sync_data(struct xfs_mount *mp, int flags); -int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index e953b6c..9d11eba 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -197,7 +197,7 @@ xfs_attr_get( /* * Calculate how many blocks we need for the new attribute, */ -int +STATIC int xfs_attr_calc_size( struct xfs_inode *ip, int namelen, diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 59b410c..9c3a243 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -139,7 +139,6 @@ typedef struct xfs_attr_list_context { /* * Overall external interface routines. */ -int xfs_attr_calc_size(struct xfs_inode *, int, int, int *); int xfs_attr_inactive(struct xfs_inode *dp); int xfs_attr_rmtval_get(struct xfs_da_args *args); int xfs_attr_list_int(struct xfs_attr_list_context *); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 38751d5..416e47e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -334,7 +334,7 @@ xfs_bmbt_disk_set_allf( /* * Set all the fields in a bmap extent record from the uncompressed form. */ -void +STATIC void xfs_bmbt_disk_set_all( xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s) diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index cf07ca7..0e66c4e 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -223,7 +223,6 @@ extern void xfs_bmbt_set_startblock(xfs_bmbt_rec_host_t *r, xfs_fsblock_t v); extern void xfs_bmbt_set_startoff(xfs_bmbt_rec_host_t *r, xfs_fileoff_t v); extern void xfs_bmbt_set_state(xfs_bmbt_rec_host_t *r, xfs_exntst_t v); -extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index ce6e355..78fc4d9 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -65,7 +65,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, /* * Log entries from a freespace block. */ -void +STATIC void xfs_dir2_free_log_bests( xfs_trans_t *tp, /* transaction pointer */ xfs_dabuf_t *bp, /* freespace buffer */ diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h index dde72db..82dfe71 100644 --- a/fs/xfs/xfs_dir2_node.h +++ b/fs/xfs/xfs_dir2_node.h @@ -75,8 +75,6 @@ xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)); } -extern void xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, - int first, int last); extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_dabuf *lbp); extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index d55662d..fd02a18 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -443,14 +443,9 @@ typedef struct log { /* common routines */ extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); -extern int xlog_find_tail(xlog_t *log, - xfs_daddr_t *head_blk, - xfs_daddr_t *tail_blk); extern int xlog_recover(xlog_t *log); extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); -extern struct xfs_buf *xlog_get_bp(xlog_t *, int); -extern void xlog_put_bp(struct xfs_buf *); extern kmem_zone_t *xfs_log_ticket_zone; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 69ac2e5..48a7ab1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -68,7 +68,7 @@ STATIC void xlog_recover_check_summary(xlog_t *); ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) #define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) -xfs_buf_t * +STATIC xfs_buf_t * xlog_get_bp( xlog_t *log, int nbblks) @@ -88,7 +88,7 @@ xlog_get_bp( return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); } -void +STATIC void xlog_put_bp( xfs_buf_t *bp) { @@ -805,7 +805,7 @@ xlog_find_head( * We could speed up search by using current head_blk buffer, but it is not * available. */ -int +STATIC int xlog_find_tail( xlog_t *log, xfs_daddr_t *head_blk, -- cgit v1.1 From f0a7695380efa31cd281730917f7e907a724d5cb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:49:57 +0000 Subject: xfs: Use list_heads for log recovery item lists Remove the roll-your-own linked list operations. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_log_recover.c | 205 ++++++++++++++++------------------------------- fs/xfs/xfs_log_recover.h | 23 +++--- 2 files changed, 81 insertions(+), 147 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 48a7ab1..65f1f13 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -50,8 +50,6 @@ STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); -STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, - xlog_recover_item_t *item); #if defined(DEBUG) STATIC void xlog_recover_check_summary(xlog_t *); #else @@ -1367,36 +1365,45 @@ xlog_clear_stale_blocks( STATIC xlog_recover_t * xlog_recover_find_tid( - xlog_recover_t *q, + struct hlist_head *head, xlog_tid_t tid) { - xlog_recover_t *p = q; + xlog_recover_t *trans; + struct hlist_node *n; - while (p != NULL) { - if (p->r_log_tid == tid) - break; - p = p->r_next; + hlist_for_each_entry(trans, n, head, r_list) { + if (trans->r_log_tid == tid) + return trans; } - return p; + return NULL; } STATIC void -xlog_recover_put_hashq( - xlog_recover_t **q, - xlog_recover_t *trans) +xlog_recover_new_tid( + struct hlist_head *head, + xlog_tid_t tid, + xfs_lsn_t lsn) { - trans->r_next = *q; - *q = trans; + xlog_recover_t *trans; + + trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); + trans->r_log_tid = tid; + trans->r_lsn = lsn; + INIT_LIST_HEAD(&trans->r_itemq); + + INIT_HLIST_NODE(&trans->r_list); + hlist_add_head(&trans->r_list, head); } STATIC void xlog_recover_add_item( - xlog_recover_item_t **itemq) + struct list_head *head) { xlog_recover_item_t *item; item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); - xlog_recover_insert_item_backq(itemq, item); + INIT_LIST_HEAD(&item->ri_list); + list_add_tail(&item->ri_list, head); } STATIC int @@ -1409,8 +1416,7 @@ xlog_recover_add_to_cont_trans( xfs_caddr_t ptr, old_ptr; int old_len; - item = trans->r_itemq; - if (item == NULL) { + if (list_empty(&trans->r_itemq)) { /* finish copying rest of trans header */ xlog_recover_add_item(&trans->r_itemq); ptr = (xfs_caddr_t) &trans->r_theader + @@ -1418,7 +1424,8 @@ xlog_recover_add_to_cont_trans( memcpy(ptr, dp, len); /* d, s, l */ return 0; } - item = item->ri_prev; + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; @@ -1455,8 +1462,7 @@ xlog_recover_add_to_trans( if (!len) return 0; - item = trans->r_itemq; - if (item == NULL) { + if (list_empty(&trans->r_itemq)) { /* we need to catch log corruptions here */ if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { xlog_warn("XFS: xlog_recover_add_to_trans: " @@ -1474,12 +1480,15 @@ xlog_recover_add_to_trans( memcpy(ptr, dp, len); in_f = (xfs_inode_log_format_t *)ptr; - if (item->ri_prev->ri_total != 0 && - item->ri_prev->ri_total == item->ri_prev->ri_cnt) { + /* take the tail entry */ + item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); + if (item->ri_total != 0 && + item->ri_total == item->ri_cnt) { + /* tail item is in use, get a new one */ xlog_recover_add_item(&trans->r_itemq); + item = list_entry(trans->r_itemq.prev, + xlog_recover_item_t, ri_list); } - item = trans->r_itemq; - item = item->ri_prev; if (item->ri_total == 0) { /* first region to be added */ if (in_f->ilf_size == 0 || @@ -1504,96 +1513,29 @@ xlog_recover_add_to_trans( return 0; } -STATIC void -xlog_recover_new_tid( - xlog_recover_t **q, - xlog_tid_t tid, - xfs_lsn_t lsn) -{ - xlog_recover_t *trans; - - trans = kmem_zalloc(sizeof(xlog_recover_t), KM_SLEEP); - trans->r_log_tid = tid; - trans->r_lsn = lsn; - xlog_recover_put_hashq(q, trans); -} - -STATIC int -xlog_recover_unlink_tid( - xlog_recover_t **q, - xlog_recover_t *trans) -{ - xlog_recover_t *tp; - int found = 0; - - ASSERT(trans != NULL); - if (trans == *q) { - *q = (*q)->r_next; - } else { - tp = *q; - while (tp) { - if (tp->r_next == trans) { - found = 1; - break; - } - tp = tp->r_next; - } - if (!found) { - xlog_warn( - "XFS: xlog_recover_unlink_tid: trans not found"); - ASSERT(0); - return XFS_ERROR(EIO); - } - tp->r_next = tp->r_next->r_next; - } - return 0; -} - -STATIC void -xlog_recover_insert_item_backq( - xlog_recover_item_t **q, - xlog_recover_item_t *item) -{ - if (*q == NULL) { - item->ri_prev = item->ri_next = item; - *q = item; - } else { - item->ri_next = *q; - item->ri_prev = (*q)->ri_prev; - (*q)->ri_prev = item; - item->ri_prev->ri_next = item; - } -} - -STATIC void -xlog_recover_insert_item_frontq( - xlog_recover_item_t **q, - xlog_recover_item_t *item) -{ - xlog_recover_insert_item_backq(q, item); - *q = item; -} - +/* + * Sort the log items in the transaction. Cancelled buffers need + * to be put first so they are processed before any items that might + * modify the buffers. If they are cancelled, then the modifications + * don't need to be replayed. + */ STATIC int xlog_recover_reorder_trans( xlog_recover_t *trans) { - xlog_recover_item_t *first_item, *itemq, *itemq_next; - xfs_buf_log_format_t *buf_f; - ushort flags = 0; + xlog_recover_item_t *item, *n; + LIST_HEAD(sort_list); + + list_splice_init(&trans->r_itemq, &sort_list); + list_for_each_entry_safe(item, n, &sort_list, ri_list) { + xfs_buf_log_format_t *buf_f; - first_item = itemq = trans->r_itemq; - trans->r_itemq = NULL; - do { - itemq_next = itemq->ri_next; - buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr; + buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; - switch (ITEM_TYPE(itemq)) { + switch (ITEM_TYPE(item)) { case XFS_LI_BUF: - flags = buf_f->blf_flags; - if (!(flags & XFS_BLI_CANCEL)) { - xlog_recover_insert_item_frontq(&trans->r_itemq, - itemq); + if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { + list_move(&item->ri_list, &trans->r_itemq); break; } case XFS_LI_INODE: @@ -1601,7 +1543,7 @@ xlog_recover_reorder_trans( case XFS_LI_QUOTAOFF: case XFS_LI_EFD: case XFS_LI_EFI: - xlog_recover_insert_item_backq(&trans->r_itemq, itemq); + list_move_tail(&item->ri_list, &trans->r_itemq); break; default: xlog_warn( @@ -1609,8 +1551,8 @@ xlog_recover_reorder_trans( ASSERT(0); return XFS_ERROR(EIO); } - itemq = itemq_next; - } while (first_item != itemq); + } + ASSERT(list_empty(&sort_list)); return 0; } @@ -2814,14 +2756,13 @@ xlog_recover_do_trans( int pass) { int error = 0; - xlog_recover_item_t *item, *first_item; + xlog_recover_item_t *item; error = xlog_recover_reorder_trans(trans); if (error) return error; - first_item = item = trans->r_itemq; - do { + list_for_each_entry(item, &trans->r_itemq, ri_list) { switch (ITEM_TYPE(item)) { case XFS_LI_BUF: error = xlog_recover_do_buffer_trans(log, item, pass); @@ -2854,8 +2795,7 @@ xlog_recover_do_trans( if (error) return error; - item = item->ri_next; - } while (first_item != item); + } return 0; } @@ -2869,21 +2809,18 @@ STATIC void xlog_recover_free_trans( xlog_recover_t *trans) { - xlog_recover_item_t *first_item, *item, *free_item; + xlog_recover_item_t *item, *n; int i; - item = first_item = trans->r_itemq; - do { - free_item = item; - item = item->ri_next; - /* Free the regions in the item. */ - for (i = 0; i < free_item->ri_cnt; i++) { - kmem_free(free_item->ri_buf[i].i_addr); - } + list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { + /* Free the regions in the item. */ + list_del(&item->ri_list); + for (i = 0; i < item->ri_cnt; i++) + kmem_free(item->ri_buf[i].i_addr); /* Free the item itself */ - kmem_free(free_item->ri_buf); - kmem_free(free_item); - } while (first_item != item); + kmem_free(item->ri_buf); + kmem_free(item); + } /* Free the transaction recover structure */ kmem_free(trans); } @@ -2891,14 +2828,12 @@ xlog_recover_free_trans( STATIC int xlog_recover_commit_trans( xlog_t *log, - xlog_recover_t **q, xlog_recover_t *trans, int pass) { int error; - if ((error = xlog_recover_unlink_tid(q, trans))) - return error; + hlist_del(&trans->r_list); if ((error = xlog_recover_do_trans(log, trans, pass))) return error; xlog_recover_free_trans(trans); /* no error */ @@ -2926,7 +2861,7 @@ xlog_recover_unmount_trans( STATIC int xlog_recover_process_data( xlog_t *log, - xlog_recover_t *rhash[], + struct hlist_head rhash[], xlog_rec_header_t *rhead, xfs_caddr_t dp, int pass) @@ -2960,7 +2895,7 @@ xlog_recover_process_data( } tid = be32_to_cpu(ohead->oh_tid); hash = XLOG_RHASH(tid); - trans = xlog_recover_find_tid(rhash[hash], tid); + trans = xlog_recover_find_tid(&rhash[hash], tid); if (trans == NULL) { /* not found; add new tid */ if (ohead->oh_flags & XLOG_START_TRANS) xlog_recover_new_tid(&rhash[hash], tid, @@ -2978,7 +2913,7 @@ xlog_recover_process_data( switch (flags) { case XLOG_COMMIT_TRANS: error = xlog_recover_commit_trans(log, - &rhash[hash], trans, pass); + trans, pass); break; case XLOG_UNMOUNT_TRANS: error = xlog_recover_unmount_trans(trans); @@ -3517,7 +3452,7 @@ xlog_do_recovery_pass( int error = 0, h_size; int bblks, split_bblks; int hblks, split_hblks, wrapped_hblks; - xlog_recover_t *rhash[XLOG_RHASH_SIZE]; + struct hlist_head rhash[XLOG_RHASH_SIZE]; ASSERT(head_blk != tail_blk); diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/xfs_log_recover.h index b225455..75d7492 100644 --- a/fs/xfs/xfs_log_recover.h +++ b/fs/xfs/xfs_log_recover.h @@ -35,22 +35,21 @@ * item headers are in ri_buf[0]. Additional buffers follow. */ typedef struct xlog_recover_item { - struct xlog_recover_item *ri_next; - struct xlog_recover_item *ri_prev; - int ri_type; - int ri_cnt; /* count of regions found */ - int ri_total; /* total regions */ - xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ + struct list_head ri_list; + int ri_type; + int ri_cnt; /* count of regions found */ + int ri_total; /* total regions */ + xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ } xlog_recover_item_t; struct xlog_tid; typedef struct xlog_recover { - struct xlog_recover *r_next; - xlog_tid_t r_log_tid; /* log's transaction id */ - xfs_trans_header_t r_theader; /* trans header for partial */ - int r_state; /* not needed */ - xfs_lsn_t r_lsn; /* xact lsn */ - xlog_recover_item_t *r_itemq; /* q for items */ + struct hlist_node r_list; + xlog_tid_t r_log_tid; /* log's transaction id */ + xfs_trans_header_t r_theader; /* trans header for partial */ + int r_state; /* not needed */ + xfs_lsn_t r_lsn; /* xact lsn */ + struct list_head r_itemq; /* q for items */ } xlog_recover_t; #define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) -- cgit v1.1 From 453eac8a9aa417878a38bdfbccafd5f7ce4e8e4e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:49:58 +0000 Subject: xfs: Don't wake the aild once per second Now that the AIL push algorithm is traversal safe, we don't need a watchdog function in the xfsaild to catch pushes that fail to make progress. Remove the watchdog timeout and make pushes purely driven by demand. This will remove the once-per-second wakeup that is seen when the filesystem is idle and make laptop power misers happy. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 7 +++---- fs/xfs/xfs_trans_ail.c | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 77414db..9f2e398 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -877,12 +877,11 @@ xfsaild( { struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; + long tout = 0; /* milliseconds */ while (!kthread_should_stop()) { - if (tout) - schedule_timeout_interruptible(msecs_to_jiffies(tout)); - tout = 1000; + schedule_timeout_interruptible(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); /* swsusp */ try_to_freeze(); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 2ffc570..063dfbd 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -237,14 +237,15 @@ out: } /* - * Function that does the work of pushing on the AIL + * xfsaild_push does the work of pushing on the AIL. Returning a timeout of + * zero indicates that the caller should sleep until woken. */ long xfsaild_push( struct xfs_ail *ailp, xfs_lsn_t *last_lsn) { - long tout = 1000; /* milliseconds */ + long tout = 0; xfs_lsn_t last_pushed_lsn = *last_lsn; xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; @@ -262,7 +263,7 @@ xfsaild_push( */ xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); - last_pushed_lsn = 0; + *last_lsn = 0; return tout; } @@ -279,7 +280,6 @@ xfsaild_push( * prevents use from spinning when we can't do anything or there is * lots of contention on the AIL lists. */ - tout = 10; lsn = lip->li_lsn; flush_log = stuck = count = 0; while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { @@ -376,14 +376,14 @@ xfsaild_push( if (!count) { /* We're past our target or empty, so idle */ - tout = 1000; + last_pushed_lsn = 0; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* * We reached the target so wait a bit longer for I/O to * complete and remove pushed items from the AIL before we * start the next scan from the start of the AIL. */ - tout += 20; + tout = 50; last_pushed_lsn = 0; } else if ((stuck * 100) / count > 90) { /* @@ -395,11 +395,14 @@ xfsaild_push( * Backoff a bit more to allow some I/O to complete before * continuing from where we were. */ - tout += 10; + tout = 20; + } else { + /* more to do, but wait a short while before continuing */ + tout = 10; } *last_lsn = last_pushed_lsn; return tout; -} /* xfsaild_push */ +} /* -- cgit v1.1 From c9c129714e71c890bed1bd5b61697a896c3c2d54 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:49:59 +0000 Subject: xfs: Don't wake xfsbufd when idle The xfsbufd wakes every xfsbufd_centisecs (once per second by default) for each filesystem even when the filesystem is idle. If the xfsbufd has nothing to do, put it into a long term sleep and only wake it up when there is work pending (i.e. dirty buffers to flush soon). This will make laptop power misers happy. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 77b8be8..18ae3ba 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1595,6 +1595,11 @@ xfs_buf_delwri_queue( list_del(&bp->b_list); } + if (list_empty(dwq)) { + /* start xfsbufd as it is about to have something to do */ + wake_up_process(bp->b_target->bt_task); + } + bp->b_flags |= _XBF_DELWRI_Q; list_add_tail(&bp->b_list, dwq); bp->b_queuetime = jiffies; @@ -1644,6 +1649,8 @@ xfsbufd_wakeup( list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) continue; + if (list_empty(&btp->bt_delwrite_queue)) + continue; set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); wake_up_process(btp->bt_task); } @@ -1708,6 +1715,9 @@ xfsbufd( set_freezable(); do { + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); + long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); refrigerator(); @@ -1715,12 +1725,12 @@ xfsbufd( clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); } - schedule_timeout_interruptible( - xfs_buf_timer_centisecs * msecs_to_jiffies(10)); - - xfs_buf_delwri_split(target, &tmp, - xfs_buf_age_centisecs * msecs_to_jiffies(10)); + /* sleep for a long time if there is nothing to do. */ + if (list_empty(&target->bt_delwrite_queue)) + tout = MAX_SCHEDULE_TIMEOUT; + schedule_timeout_interruptible(tout); + xfs_buf_delwri_split(target, &tmp, age); count = 0; while (!list_empty(&tmp)) { bp = list_entry(tmp.next, xfs_buf_t, b_list); -- cgit v1.1 From 5017e97d52628fb8ae56e434e86ac2e72ddaac2b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:40 +0000 Subject: xfs: rename xfs_get_perag xfs_get_perag is really getting the perag that an inode belongs to based on it's inode number. Convert the use of this function to just get the perag from a provided ag number. Use this new function to obtain the per-ag structure when traversing the per AG inode trees for sync and reclaim. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 22 +++++++++++++--------- fs/xfs/xfs_iget.c | 10 +++++----- fs/xfs/xfs_inode.c | 8 +++++--- fs/xfs/xfs_mount.h | 8 ++++---- 4 files changed, 27 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 0f90bfe..cc964fa 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -90,14 +90,13 @@ xfs_inode_ag_lookup( STATIC int xfs_inode_ag_walk( struct xfs_mount *mp, - xfs_agnumber_t ag, + struct xfs_perag *pag, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, int tag, int exclusive) { - struct xfs_perag *pag = &mp->m_perag[ag]; uint32_t first_index; int last_error = 0; int skipped; @@ -141,8 +140,6 @@ restart: delay(1); goto restart; } - - xfs_put_perag(mp, pag); return last_error; } @@ -160,10 +157,16 @@ xfs_inode_ag_iterator( xfs_agnumber_t ag; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { - if (!mp->m_perag[ag].pag_ici_init) + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, ag); + if (!pag->pag_ici_init) { + xfs_perag_put(pag); continue; - error = xfs_inode_ag_walk(mp, ag, execute, flags, tag, + } + error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, exclusive); + xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) @@ -690,16 +693,17 @@ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) { - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); read_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); __xfs_inode_set_reclaim_tag(pag, ip); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); } void diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 155e798..e281eb4 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -374,7 +374,7 @@ xfs_iget( return EINVAL; /* get the perag structure and ensure that it's inode capable */ - pag = xfs_get_perag(mp, ino); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); if (!pag->pagi_inodeok) return EINVAL; ASSERT(pag->pag_ici_init); @@ -398,7 +398,7 @@ again: if (error) goto out_error_or_again; } - xfs_put_perag(mp, pag); + xfs_perag_put(pag); *ipp = ip; @@ -417,7 +417,7 @@ out_error_or_again: delay(1); goto again; } - xfs_put_perag(mp, pag); + xfs_perag_put(pag); return error; } @@ -488,12 +488,12 @@ xfs_ireclaim( * added to the tree assert that it's been there before to catch * problems with the inode life time early on. */ - pag = xfs_get_perag(mp, ip->i_ino); + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); write_lock(&pag->pag_ici_lock); if (!radix_tree_delete(&pag->pag_ici_root, agino)) ASSERT(0); write_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); /* * Here we do an (almost) spurious inode lock in order to coordinate diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ef77fd8..bd3d816 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1946,8 +1946,9 @@ xfs_ifree_cluster( xfs_inode_t *ip, **ip_found; xfs_inode_log_item_t *iip; xfs_log_item_t *lip; - xfs_perag_t *pag = xfs_get_perag(mp, inum); + struct xfs_perag *pag; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; ninodes = mp->m_sb.sb_inopblock; @@ -2088,7 +2089,7 @@ xfs_ifree_cluster( } kmem_free(ip_found); - xfs_put_perag(mp, pag); + xfs_perag_put(pag); } /* @@ -2675,7 +2676,7 @@ xfs_iflush_cluster( xfs_buf_t *bp) { xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_perag *pag; unsigned long first_index, mask; unsigned long inodes_per_cluster; int ilist_size; @@ -2686,6 +2687,7 @@ xfs_iflush_cluster( int bufwasdelwri; int i; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); ASSERT(pag->pagi_inodeok); ASSERT(pag->pag_ici_init); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1df7e45..f8a68a2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -386,14 +386,14 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) /* * perag get/put wrappers for eventual ref counting */ -static inline xfs_perag_t * -xfs_get_perag(struct xfs_mount *mp, xfs_ino_t ino) +static inline struct xfs_perag * +xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) { - return &mp->m_perag[XFS_INO_TO_AGNO(mp, ino)]; + return &mp->m_perag[agno]; } static inline void -xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag) +xfs_perag_put(struct xfs_perag *pag) { /* nothing to see here, move along */ } -- cgit v1.1 From a862e0fdcb8862aab2538ec2fc2f0dc07a625c59 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:41 +0000 Subject: xfs: Don't directly reference m_perag in allocation code Start abstracting the perag references so that the indexing of the structures is not directly coded into all the places that uses the perag structures. This will allow us to separate the use of the perag structure and the way it is indexed and hence avoid the known deadlocks related to growing a busy filesystem. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_alloc.c | 82 +++++++++++++++++++++++++++--------------------- fs/xfs/xfs_alloc_btree.c | 9 ++++-- 2 files changed, 53 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 275b1f4..84070f2 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1662,11 +1662,13 @@ xfs_free_ag_extent( xfs_agf_t *agf; xfs_perag_t *pag; /* per allocation group data */ + pag = xfs_perag_get(mp, agno); + pag->pagf_freeblks += len; + xfs_perag_put(pag); + agf = XFS_BUF_TO_AGF(agbp); - pag = &mp->m_perag[agno]; be32_add_cpu(&agf->agf_freeblks, len); xfs_trans_agblocks_delta(tp, len); - pag->pagf_freeblks += len; XFS_WANT_CORRUPTED_GOTO( be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length), @@ -1969,10 +1971,12 @@ xfs_alloc_get_freelist( xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) agf->agf_flfirst = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; + xfs_perag_put(pag); logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; if (btreeblk) { @@ -2078,7 +2082,8 @@ xfs_alloc_put_freelist( be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) agf->agf_fllast = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; @@ -2089,6 +2094,7 @@ xfs_alloc_put_freelist( pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } + xfs_perag_put(pag); xfs_alloc_log_agf(tp, agbp, logflags); @@ -2152,7 +2158,6 @@ xfs_read_agf( xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EFSCORRUPTED); } - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); return 0; } @@ -2184,7 +2189,7 @@ xfs_alloc_read_agf( ASSERT(!XFS_BUF_GETERROR(*bpp)); agf = XFS_BUF_TO_AGF(*bpp); - pag = &mp->m_perag[agno]; + pag = xfs_perag_get(mp, agno); if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); @@ -2211,6 +2216,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif + xfs_perag_put(pag); return 0; } @@ -2271,7 +2277,7 @@ xfs_alloc_vextent( */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); down_read(&mp->m_peraglock); - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); args->minleft = minleft; @@ -2341,7 +2347,7 @@ xfs_alloc_vextent( */ down_read(&mp->m_peraglock); for (;;) { - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); args->minleft = minleft; @@ -2400,6 +2406,7 @@ xfs_alloc_vextent( } } } + xfs_perag_put(args->pag); } up_read(&mp->m_peraglock); if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { @@ -2427,8 +2434,10 @@ xfs_alloc_vextent( args->len); #endif } + xfs_perag_put(args->pag); return 0; error0: + xfs_perag_put(args->pag); up_read(&mp->m_peraglock); return error; } @@ -2455,7 +2464,7 @@ xfs_free_extent( ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); down_read(&args.mp->m_peraglock); - args.pag = &args.mp->m_perag[args.agno]; + args.pag = xfs_perag_get(args.mp, args.agno); if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; #ifdef DEBUG @@ -2465,6 +2474,7 @@ xfs_free_extent( #endif error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: + xfs_perag_put(args.pag); up_read(&args.mp->m_peraglock); return error; } @@ -2486,15 +2496,15 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_agblock_t bno, xfs_extlen_t len) { - xfs_mount_t *mp; xfs_perag_busy_t *bsy; + struct xfs_perag *pag; int n; - mp = tp->t_mountp; - spin_lock(&mp->m_perag[agno].pagb_lock); + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); /* search pagb_list for an open slot */ - for (bsy = mp->m_perag[agno].pagb_list, n = 0; + for (bsy = pag->pagb_list, n = 0; n < XFS_PAGB_NUM_SLOTS; bsy++, n++) { if (bsy->busy_tp == NULL) { @@ -2502,11 +2512,11 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, } } - trace_xfs_alloc_busy(mp, agno, bno, len, n); + trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len, n); if (n < XFS_PAGB_NUM_SLOTS) { - bsy = &mp->m_perag[agno].pagb_list[n]; - mp->m_perag[agno].pagb_count++; + bsy = &pag->pagb_list[n]; + pag->pagb_count++; bsy->busy_start = bno; bsy->busy_length = len; bsy->busy_tp = tp; @@ -2521,7 +2531,8 @@ xfs_alloc_mark_busy(xfs_trans_t *tp, xfs_trans_set_sync(tp); } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); } void @@ -2529,24 +2540,23 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, xfs_agnumber_t agno, int idx) { - xfs_mount_t *mp; + struct xfs_perag *pag; xfs_perag_busy_t *list; - mp = tp->t_mountp; - - spin_lock(&mp->m_perag[agno].pagb_lock); - list = mp->m_perag[agno].pagb_list; - ASSERT(idx < XFS_PAGB_NUM_SLOTS); + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); + list = pag->pagb_list; - trace_xfs_alloc_unbusy(mp, agno, idx, list[idx].busy_tp == tp); + trace_xfs_alloc_unbusy(tp->t_mountp, agno, idx, list[idx].busy_tp == tp); if (list[idx].busy_tp == tp) { list[idx].busy_tp = NULL; - mp->m_perag[agno].pagb_count--; + pag->pagb_count--; } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); } @@ -2560,17 +2570,15 @@ xfs_alloc_search_busy(xfs_trans_t *tp, xfs_agblock_t bno, xfs_extlen_t len) { - xfs_mount_t *mp; + struct xfs_perag *pag; xfs_perag_busy_t *bsy; xfs_agblock_t uend, bend; xfs_lsn_t lsn = 0; int cnt; - mp = tp->t_mountp; - - spin_lock(&mp->m_perag[agno].pagb_lock); - - uend = bno + len - 1; + pag = xfs_perag_get(tp->t_mountp, agno); + spin_lock(&pag->pagb_lock); + cnt = pag->pagb_count; /* * search pagb_list for this slot, skipping open slots. We have to @@ -2578,8 +2586,9 @@ xfs_alloc_search_busy(xfs_trans_t *tp, * we have to get the most recent LSN for the log force to push out * all the transactions that span the range. */ - for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) { - bsy = &mp->m_perag[agno].pagb_list[cnt]; + uend = bno + len - 1; + for (cnt = 0; cnt < pag->pagb_count; cnt++) { + bsy = &pag->pagb_list[cnt]; if (!bsy->busy_tp) continue; @@ -2591,7 +2600,8 @@ xfs_alloc_search_busy(xfs_trans_t *tp, if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0) lsn = bsy->busy_tp->t_commit_lsn; } - spin_unlock(&mp->m_perag[agno].pagb_lock); + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn); /* @@ -2599,5 +2609,5 @@ xfs_alloc_search_busy(xfs_trans_t *tp, * transaction that freed the block */ if (lsn) - xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force(tp->t_mountp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); } diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index adbd914..b726e10 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -61,12 +61,14 @@ xfs_allocbt_set_root( struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); int btnum = cur->bc_btnum; + struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); ASSERT(ptr->s != 0); agf->agf_roots[btnum] = ptr->s; be32_add_cpu(&agf->agf_levels[btnum], inc); - cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc; + pag->pagf_levels[btnum] += inc; + xfs_perag_put(pag); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -150,6 +152,7 @@ xfs_allocbt_update_lastrec( { struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + struct xfs_perag *pag; __be32 len; int numrecs; @@ -193,7 +196,9 @@ xfs_allocbt_update_lastrec( } agf->agf_longest = len; - cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len); + pag = xfs_perag_get(cur->bc_mp, seqno); + pag->pagf_longest = be32_to_cpu(len); + xfs_perag_put(pag); xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); } -- cgit v1.1 From 4196ac08c023c6dab90c3fa460d9c06deaa304c4 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:42 +0000 Subject: xfs: Convert filestreams code to use per-ag get/put routines Use xfs_perag_get() and xfs_perag_put() in the filestreams code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_filestream.c | 19 ++++++++++++------- fs/xfs/xfs_filestream.h | 27 ++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index a631e14..e61f2aa 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -140,6 +140,7 @@ _xfs_filestream_pick_ag( int flags, xfs_extlen_t minlen) { + int streams, max_streams; int err, trylock, nscan; xfs_extlen_t longest, free, minfree, maxfree = 0; xfs_agnumber_t ag, max_ag = NULLAGNUMBER; @@ -155,15 +156,15 @@ _xfs_filestream_pick_ag( trylock = XFS_ALLOC_FLAG_TRYLOCK; for (nscan = 0; 1; nscan++) { - - TRACE_AG_SCAN(mp, ag, xfs_filestream_peek_ag(mp, ag)); - - pag = mp->m_perag + ag; + pag = xfs_perag_get(mp, ag); + TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms)); if (!pag->pagf_init) { err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); - if (err && !trylock) + if (err && !trylock) { + xfs_perag_put(pag); return err; + } } /* Might fail sometimes during the 1st pass with trylock set. */ @@ -173,6 +174,7 @@ _xfs_filestream_pick_ag( /* Keep track of the AG with the most free blocks. */ if (pag->pagf_freeblks > maxfree) { maxfree = pag->pagf_freeblks; + max_streams = atomic_read(&pag->pagf_fstrms); max_ag = ag; } @@ -195,6 +197,8 @@ _xfs_filestream_pick_ag( /* Break out, retaining the reference on the AG. */ free = pag->pagf_freeblks; + streams = atomic_read(&pag->pagf_fstrms); + xfs_perag_put(pag); *agp = ag; break; } @@ -202,6 +206,7 @@ _xfs_filestream_pick_ag( /* Drop the reference on this AG, it's not usable. */ xfs_filestream_put_ag(mp, ag); next_ag: + xfs_perag_put(pag); /* Move to the next AG, wrapping to AG 0 if necessary. */ if (++ag >= mp->m_sb.sb_agcount) ag = 0; @@ -229,6 +234,7 @@ next_ag: if (max_ag != NULLAGNUMBER) { xfs_filestream_get_ag(mp, max_ag); TRACE_AG_PICK1(mp, max_ag, maxfree); + streams = max_streams; free = maxfree; *agp = max_ag; break; @@ -240,8 +246,7 @@ next_ag: return 0; } - TRACE_AG_PICK2(mp, startag, *agp, xfs_filestream_peek_ag(mp, *agp), - free, nscan, flags); + TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); return 0; } diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 4aba67c..58378b2 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -79,12 +79,21 @@ extern ktrace_t *xfs_filestreams_trace_buf; * the cache that reference per-ag array elements that have since been * reallocated. */ +/* + * xfs_filestream_peek_ag is only used in tracing code + */ static inline int xfs_filestream_peek_ag( xfs_mount_t *mp, xfs_agnumber_t agno) { - return atomic_read(&mp->m_perag[agno].pagf_fstrms); + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_read(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; } static inline int @@ -92,7 +101,13 @@ xfs_filestream_get_ag( xfs_mount_t *mp, xfs_agnumber_t agno) { - return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_inc_return(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; } static inline int @@ -100,7 +115,13 @@ xfs_filestream_put_ag( xfs_mount_t *mp, xfs_agnumber_t agno) { - return atomic_dec_return(&mp->m_perag[agno].pagf_fstrms); + struct xfs_perag *pag; + int ret; + + pag = xfs_perag_get(mp, agno); + ret = atomic_dec_return(&pag->pagf_fstrms); + xfs_perag_put(pag); + return ret; } /* allocation selection flags */ -- cgit v1.1 From 44b56e0a1aed522a10051645e85d300e10926fd3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:43 +0000 Subject: xfs: convert remaining direct references to m_perag Convert the remaining direct lookups of the per ag structures to use get/put accesses. Ensure that the loops across AGs and prior users of the interface balance gets and puts correctly. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_bmap.c | 8 +++++++- fs/xfs/xfs_ialloc.c | 35 +++++++++++++++++++++++++---------- fs/xfs/xfs_inode.c | 5 ++++- fs/xfs/xfs_mount.c | 9 ++++++--- 4 files changed, 42 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 98251cd..a9b95d9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2630,11 +2630,12 @@ xfs_bmap_btalloc( startag = ag = 0; notinit = 0; down_read(&mp->m_peraglock); + pag = xfs_perag_get(mp, ag); while (blen < ap->alen) { - pag = &mp->m_perag[ag]; if (!pag->pagf_init && (error = xfs_alloc_pagf_init(mp, args.tp, ag, XFS_ALLOC_FLAG_TRYLOCK))) { + xfs_perag_put(pag); up_read(&mp->m_peraglock); return error; } @@ -2667,6 +2668,7 @@ xfs_bmap_btalloc( break; error = xfs_filestream_new_ag(ap, &ag); + xfs_perag_put(pag); if (error) { up_read(&mp->m_peraglock); return error; @@ -2674,6 +2676,7 @@ xfs_bmap_btalloc( /* loop again to set 'blen'*/ startag = NULLAGNUMBER; + pag = xfs_perag_get(mp, ag); continue; } } @@ -2681,7 +2684,10 @@ xfs_bmap_btalloc( ag = 0; if (ag == startag) break; + xfs_perag_put(pag); + pag = xfs_perag_get(mp, ag); } + xfs_perag_put(pag); up_read(&mp->m_peraglock); /* * Since the above loop did a BUF_TRYLOCK, it is diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index cb907ba..884ee13 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -253,6 +253,7 @@ xfs_ialloc_ag_alloc( xfs_agino_t thisino; /* current inode number, for loop */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ + struct xfs_perag *pag; args.tp = tp; args.mp = tp->t_mountp; @@ -383,7 +384,9 @@ xfs_ialloc_ag_alloc( be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); down_read(&args.mp->m_peraglock); - args.mp->m_perag[agno].pagi_freecount += newlen; + pag = xfs_perag_get(args.mp, agno); + pag->pagi_freecount += newlen; + xfs_perag_put(pag); up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); @@ -488,7 +491,7 @@ xfs_ialloc_ag_select( flags = XFS_ALLOC_FLAG_TRYLOCK; down_read(&mp->m_peraglock); for (;;) { - pag = &mp->m_perag[agno]; + pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { agbp = NULL; @@ -527,6 +530,7 @@ xfs_ialloc_ag_select( agbp = NULL; goto nextag; } + xfs_perag_put(pag); up_read(&mp->m_peraglock); return agbp; } @@ -535,6 +539,7 @@ unlock_nextag: if (agbp) xfs_trans_brelse(tp, agbp); nextag: + xfs_perag_put(pag); /* * No point in iterating over the rest, if we're shutting * down. @@ -672,6 +677,7 @@ xfs_dialloc( xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ + struct xfs_perag *pag; if (*IO_agbp == NULL) { @@ -772,11 +778,14 @@ nextag: return noroom ? ENOSPC : 0; } down_read(&mp->m_peraglock); - if (mp->m_perag[tagno].pagi_inodeok == 0) { + pag = xfs_perag_get(mp, tagno); + if (pag->pagi_inodeok == 0) { + xfs_perag_put(pag); up_read(&mp->m_peraglock); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); + xfs_perag_put(pag); up_read(&mp->m_peraglock); if (error) goto nextag; @@ -790,6 +799,7 @@ nextag: */ agno = tagno; *IO_agbp = NULL; + pag = xfs_perag_get(mp, agno); restart_pagno: cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); @@ -808,7 +818,6 @@ nextag: * If in the same AG as the parent, try to get near the parent. */ if (pagno == agno) { - xfs_perag_t *pag = &mp->m_perag[agno]; int doneleft; /* done, to the left */ int doneright; /* done, to the right */ int searchdistance = 10; @@ -1007,7 +1016,7 @@ alloc_inode: be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); - mp->m_perag[tagno].pagi_freecount--; + pag->pagi_freecount--; up_read(&mp->m_peraglock); error = xfs_check_agi_freecount(cur, agi); @@ -1016,12 +1025,14 @@ alloc_inode: xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + xfs_perag_put(pag); *inop = ino; return 0; error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); error0: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + xfs_perag_put(pag); return error; } @@ -1052,6 +1063,7 @@ xfs_difree( xfs_mount_t *mp; /* mount structure for filesystem */ int off; /* offset of inode in inode chunk */ xfs_inobt_rec_incore_t rec; /* btree record */ + struct xfs_perag *pag; mp = tp->t_mountp; @@ -1158,7 +1170,9 @@ xfs_difree( be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); - mp->m_perag[agno].pagi_freecount -= ilen - 1; + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount -= ilen - 1; + xfs_perag_put(pag); up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -1189,7 +1203,9 @@ xfs_difree( be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); - mp->m_perag[agno].pagi_freecount++; + pag = xfs_perag_get(mp, agno); + pag->pagi_freecount++; + xfs_perag_put(pag); up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -1379,7 +1395,6 @@ xfs_imap( XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); return XFS_ERROR(EINVAL); } - return 0; } @@ -1523,8 +1538,7 @@ xfs_ialloc_read_agi( return error; agi = XFS_BUF_TO_AGI(*bpp); - pag = &mp->m_perag[agno]; - + pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); @@ -1537,6 +1551,7 @@ xfs_ialloc_read_agi( */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || XFS_FORCED_SHUTDOWN(mp)); + xfs_perag_put(pag); return 0; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bd3d816..0317b00 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2695,7 +2695,7 @@ xfs_iflush_cluster( ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) - return 0; + goto out_put; mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; @@ -2764,6 +2764,8 @@ xfs_iflush_cluster( out_free: read_unlock(&pag->pag_ici_lock); kmem_free(ilist); +out_put: + xfs_perag_put(pag); return 0; @@ -2807,6 +2809,7 @@ cluster_corrupt_out: */ xfs_iflush_abort(iq); kmem_free(ilist); + xfs_perag_put(pag); return XFS_ERROR(EFSCORRUPTED); } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index eb403b4..9055b60 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -438,18 +438,20 @@ xfs_initialize_perag( } /* This ag is preferred for inodes */ - pag = &mp->m_perag[index]; + pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; if (index < max_metadata) pag->pagf_metadata = 1; xfs_initialize_perag_icache(pag); + xfs_perag_put(pag); } } else { /* Setup default behavior for smaller filesystems */ for (index = 0; index < agcount; index++) { - pag = &mp->m_perag[index]; + pag = xfs_perag_get(mp, index); pag->pagi_inodeok = 1; xfs_initialize_perag_icache(pag); + xfs_perag_put(pag); } } return index; @@ -731,12 +733,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) error = xfs_ialloc_pagi_init(mp, NULL, index); if (error) return error; - pag = &mp->m_perag[index]; + pag = xfs_perag_get(mp, index); ifree += pag->pagi_freecount; ialloc += pag->pagi_count; bfree += pag->pagf_freeblks; bfreelst += pag->pagf_flcount; btree += pag->pagf_btreeblks; + xfs_perag_put(pag); } /* * Overwrite incore superblock counters with just-read data -- cgit v1.1 From 1c1c6ebcf5284aee4910f3b906ac90c20e510c82 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:44 +0000 Subject: xfs: Replace per-ag array with a radix tree The use of an array for the per-ag structures requires reallocation of the array when growing the filesystem. This requires locking access to the array to avoid use after free situations, and the locking is difficult to get right. To avoid needing to reallocate an array, change the per-ag structures to an allocated object per ag and index them using a tree structure. The AGs are always densely indexed (hence the use of an array), but the number supported is 2^32 and lookups tend to be random and hence indexing needs to scale. A simple choice is a radix tree - it works well with this sort of index. This change also removes another large contiguous allocation from the mount/growfs path in XFS. The growing process now needs to change to only initialise the new AGs required for the extra space, and as such only needs to exclusively lock the tree for inserts. The rest of the code only needs to lock the tree while doing lookups, and hence this will remove all the deadlocks that currently occur on the m_perag_lock as it is now an innermost lock. The lock is also changed to a spinlock from a read/write lock as the hold time is now extremely short. To complete the picture, the per-ag structures will need to be reference counted to ensure that we don't free/modify them while they are still in use. This will be done in subsequent patch. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_alloc.c | 8 ------- fs/xfs/xfs_bmap.c | 7 +----- fs/xfs/xfs_filestream.c | 13 ++++------ fs/xfs/xfs_fsops.c | 42 ++++++++++++++++----------------- fs/xfs/xfs_ialloc.c | 25 ++------------------ fs/xfs/xfs_itable.c | 4 ---- fs/xfs/xfs_mount.c | 63 +++++++++++++++++++++++++++++++++++++------------ fs/xfs/xfs_mount.h | 14 +++++++---- 8 files changed, 86 insertions(+), 90 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 84070f2..4d66bb7 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2276,7 +2276,6 @@ xfs_alloc_vextent( * These three force us into a single a.g. */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - down_read(&mp->m_peraglock); args->pag = xfs_perag_get(mp, args->agno); args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); @@ -2286,14 +2285,12 @@ xfs_alloc_vextent( goto error0; } if (!args->agbp) { - up_read(&mp->m_peraglock); trace_xfs_alloc_vextent_noagbp(args); break; } args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); if ((error = xfs_alloc_ag_vextent(args))) goto error0; - up_read(&mp->m_peraglock); break; case XFS_ALLOCTYPE_START_BNO: /* @@ -2345,7 +2342,6 @@ xfs_alloc_vextent( * Loop over allocation groups twice; first time with * trylock set, second time without. */ - down_read(&mp->m_peraglock); for (;;) { args->pag = xfs_perag_get(mp, args->agno); if (no_min) args->minleft = 0; @@ -2408,7 +2404,6 @@ xfs_alloc_vextent( } xfs_perag_put(args->pag); } - up_read(&mp->m_peraglock); if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { if (args->agno == sagno) mp->m_agfrotor = (mp->m_agfrotor + 1) % @@ -2438,7 +2433,6 @@ xfs_alloc_vextent( return 0; error0: xfs_perag_put(args->pag); - up_read(&mp->m_peraglock); return error; } @@ -2463,7 +2457,6 @@ xfs_free_extent( args.agno = XFS_FSB_TO_AGNO(args.mp, bno); ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - down_read(&args.mp->m_peraglock); args.pag = xfs_perag_get(args.mp, args.agno); if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; @@ -2475,7 +2468,6 @@ xfs_free_extent( error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); - up_read(&args.mp->m_peraglock); return error; } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index a9b95d9..7c6d9ac 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2629,14 +2629,12 @@ xfs_bmap_btalloc( if (startag == NULLAGNUMBER) startag = ag = 0; notinit = 0; - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, ag); while (blen < ap->alen) { if (!pag->pagf_init && (error = xfs_alloc_pagf_init(mp, args.tp, ag, XFS_ALLOC_FLAG_TRYLOCK))) { xfs_perag_put(pag); - up_read(&mp->m_peraglock); return error; } /* @@ -2669,10 +2667,8 @@ xfs_bmap_btalloc( error = xfs_filestream_new_ag(ap, &ag); xfs_perag_put(pag); - if (error) { - up_read(&mp->m_peraglock); + if (error) return error; - } /* loop again to set 'blen'*/ startag = NULLAGNUMBER; @@ -2688,7 +2684,6 @@ xfs_bmap_btalloc( pag = xfs_perag_get(mp, ag); } xfs_perag_put(pag); - up_read(&mp->m_peraglock); /* * Since the above loop did a BUF_TRYLOCK, it is * possible that there is space for this request. diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index e61f2aa..914d00d 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -253,8 +253,7 @@ next_ag: /* * Set the allocation group number for a file or a directory, updating inode - * references and per-AG references as appropriate. Must be called with the - * m_peraglock held in read mode. + * references and per-AG references as appropriate. */ static int _xfs_filestream_update_ag( @@ -456,10 +455,10 @@ xfs_filestream_unmount( } /* - * If the mount point's m_perag array is going to be reallocated, all + * If the mount point's m_perag tree is going to be modified, all * outstanding cache entries must be flushed to avoid accessing reference count * addresses that have been freed. The call to xfs_filestream_flush() must be - * made inside the block that holds the m_peraglock in write mode to do the + * made inside the block that holds the m_perag_lock in write mode to do the * reallocation. */ void @@ -531,7 +530,6 @@ xfs_filestream_associate( mp = pip->i_mount; cache = mp->m_filestream; - down_read(&mp->m_peraglock); /* * We have a problem, Houston. @@ -548,10 +546,8 @@ xfs_filestream_associate( * * So, if we can't get the iolock without sleeping then just give up */ - if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { - up_read(&mp->m_peraglock); + if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) return 1; - } /* If the parent directory is already in the cache, use its AG. */ item = xfs_mru_cache_lookup(cache, pip->i_ino); @@ -606,7 +602,6 @@ exit_did_pick: exit: xfs_iunlock(pip, XFS_IOLOCK_EXCL); - up_read(&mp->m_peraglock); return -err; } diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a13919a..37a6f62 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -167,27 +167,14 @@ xfs_growfs_data_private( } new = nb - mp->m_sb.sb_dblocks; oagcount = mp->m_sb.sb_agcount; - if (nagcount > oagcount) { - void *new_perag, *old_perag; - - xfs_filestream_flush(mp); - - new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount, - KM_MAYFAIL); - if (!new_perag) - return XFS_ERROR(ENOMEM); - - down_write(&mp->m_peraglock); - memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount); - old_perag = mp->m_perag; - mp->m_perag = new_perag; - - mp->m_flags |= XFS_MOUNT_32BITINODES; - nagimax = xfs_initialize_perag(mp, nagcount); - up_write(&mp->m_peraglock); - kmem_free(old_perag); + /* allocate the new per-ag structures */ + if (nagcount > oagcount) { + error = xfs_initialize_perag(mp, nagcount, &nagimax); + if (error) + return error; } + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); tp->t_flags |= XFS_TRANS_RESERVE; if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), @@ -196,6 +183,11 @@ xfs_growfs_data_private( return error; } + /* + * Write new AG headers to disk. Non-transactional, but written + * synchronously so they are completed prior to the growfs transaction + * being logged. + */ nfree = 0; for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { /* @@ -359,6 +351,12 @@ xfs_growfs_data_private( goto error0; } } + + /* + * Update changed superblock fields transactionally. These are not + * seen by the rest of the world until the transaction commit applies + * them atomically to the superblock. + */ if (nagcount > oagcount) xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); if (nb > mp->m_sb.sb_dblocks) @@ -369,9 +367,9 @@ xfs_growfs_data_private( if (dpct) xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); error = xfs_trans_commit(tp, 0); - if (error) { + if (error) return error; - } + /* New allocation groups fully initialized, so update mount struct */ if (nagimax) mp->m_maxagi = nagimax; @@ -381,6 +379,8 @@ xfs_growfs_data_private( mp->m_maxicount = icount << mp->m_sb.sb_inopblog; } else mp->m_maxicount = 0; + + /* update secondary superblocks. */ for (agno = 1; agno < nagcount; agno++) { error = xfs_read_buf(mp, mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 884ee13..52c9d00 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -383,11 +383,9 @@ xfs_ialloc_ag_alloc( newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - down_read(&args.mp->m_peraglock); pag = xfs_perag_get(args.mp, agno); pag->pagi_freecount += newlen; xfs_perag_put(pag); - up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); /* @@ -489,7 +487,6 @@ xfs_ialloc_ag_select( */ agno = pagno; flags = XFS_ALLOC_FLAG_TRYLOCK; - down_read(&mp->m_peraglock); for (;;) { pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { @@ -531,7 +528,6 @@ xfs_ialloc_ag_select( goto nextag; } xfs_perag_put(pag); - up_read(&mp->m_peraglock); return agbp; } } @@ -544,18 +540,14 @@ nextag: * No point in iterating over the rest, if we're shutting * down. */ - if (XFS_FORCED_SHUTDOWN(mp)) { - up_read(&mp->m_peraglock); + if (XFS_FORCED_SHUTDOWN(mp)) return NULL; - } agno++; if (agno >= agcount) agno = 0; if (agno == pagno) { - if (flags == 0) { - up_read(&mp->m_peraglock); + if (flags == 0) return NULL; - } flags = 0; } } @@ -777,16 +769,13 @@ nextag: *inop = NULLFSINO; return noroom ? ENOSPC : 0; } - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, tagno); if (pag->pagi_inodeok == 0) { xfs_perag_put(pag); - up_read(&mp->m_peraglock); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); xfs_perag_put(pag); - up_read(&mp->m_peraglock); if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); @@ -1015,9 +1004,7 @@ alloc_inode: goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag->pagi_freecount--; - up_read(&mp->m_peraglock); error = xfs_check_agi_freecount(cur, agi); if (error) @@ -1100,9 +1087,7 @@ xfs_difree( /* * Get the allocation group header. */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { cmn_err(CE_WARN, "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", @@ -1169,11 +1154,9 @@ xfs_difree( be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, agno); pag->pagi_freecount -= ilen - 1; xfs_perag_put(pag); - up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -1202,11 +1185,9 @@ xfs_difree( */ be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, agno); pag->pagi_freecount++; xfs_perag_put(pag); - up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -1328,9 +1309,7 @@ xfs_imap( xfs_buf_t *agbp; /* agi buffer */ int i; /* temp state */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_ialloc_read_agi() returned " diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 62efab2..940307a 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -420,9 +420,7 @@ xfs_bulkstat( while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { cond_resched(); bp = NULL; - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { /* * Skip this allocation group and go to the next one. @@ -849,9 +847,7 @@ xfs_inumbers( agbp = NULL; while (left > 0 && agno < mp->m_sb.sb_agcount) { if (agbp == NULL) { - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { /* * If we can't read the AGI of this ag, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9055b60..c04dd83 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -209,13 +209,16 @@ STATIC void xfs_free_perag( xfs_mount_t *mp) { - if (mp->m_perag) { - int agno; + xfs_agnumber_t agno; + struct xfs_perag *pag; - for (agno = 0; agno < mp->m_maxagi; agno++) - if (mp->m_perag[agno].pagb_list) - kmem_free(mp->m_perag[agno].pagb_list); - kmem_free(mp->m_perag); + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + spin_lock(&mp->m_perag_lock); + pag = radix_tree_delete(&mp->m_perag_tree, agno); + spin_unlock(&mp->m_perag_lock); + ASSERT(pag); + kmem_free(pag->pagb_list); + kmem_free(pag); } } @@ -389,10 +392,11 @@ xfs_initialize_perag_icache( } } -xfs_agnumber_t +int xfs_initialize_perag( xfs_mount_t *mp, - xfs_agnumber_t agcount) + xfs_agnumber_t agcount, + xfs_agnumber_t *maxagi) { xfs_agnumber_t index, max_metadata; xfs_perag_t *pag; @@ -405,6 +409,33 @@ xfs_initialize_perag( agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); + /* + * Walk the current per-ag tree so we don't try to initialise AGs + * that already exist (growfs case). Allocate and insert all the + * AGs we don't find ready for initialisation. + */ + for (index = 0; index < agcount; index++) { + pag = xfs_perag_get(mp, index); + if (pag) { + xfs_perag_put(pag); + continue; + } + pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); + if (!pag) + return -ENOMEM; + if (radix_tree_preload(GFP_NOFS)) + return -ENOMEM; + spin_lock(&mp->m_perag_lock); + if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { + BUG(); + spin_unlock(&mp->m_perag_lock); + kmem_free(pag); + return -EEXIST; + } + spin_unlock(&mp->m_perag_lock); + radix_tree_preload_end(); + } + /* Clear the mount flag if no inode can overflow 32 bits * on this filesystem, or if specifically requested.. */ @@ -454,7 +485,9 @@ xfs_initialize_perag( xfs_perag_put(pag); } } - return index; + if (maxagi) + *maxagi = index; + return 0; } void @@ -1155,13 +1188,13 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - init_rwsem(&mp->m_peraglock); - mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), - KM_MAYFAIL); - if (!mp->m_perag) + spin_lock_init(&mp->m_perag_lock); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS); + error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); + if (error) { + cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); goto out_remove_uuid; - - mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); + } if (!sbp->sb_logblocks) { cmn_err(CE_WARN, "XFS: no log defined"); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f8a68a2..cfa7a5d 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -207,8 +207,8 @@ typedef struct xfs_mount { uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* max inobt btree levels. */ - struct xfs_perag *m_perag; /* per-ag accounting info */ - struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ + struct radix_tree_root m_perag_tree; /* per-ag accounting info */ + spinlock_t m_perag_lock; /* lock for m_perag_tree */ struct mutex m_growlock; /* growfs mutex */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ @@ -389,7 +389,12 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) static inline struct xfs_perag * xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) { - return &mp->m_perag[agno]; + struct xfs_perag *pag; + + spin_lock(&mp->m_perag_lock); + pag = radix_tree_lookup(&mp->m_perag_tree, agno); + spin_unlock(&mp->m_perag_lock); + return pag; } static inline void @@ -450,7 +455,8 @@ extern struct xfs_dmops xfs_dmcore_xfs; #endif /* __KERNEL__ */ extern void xfs_mod_sb(struct xfs_trans *, __int64_t); -extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); +extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, + xfs_agnumber_t *); extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); -- cgit v1.1 From aed3bb90abaf0b42e8c8747e192f7bb97f445279 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:45 +0000 Subject: xfs: Reference count per-ag structures Reference count the per-ag structures to ensure that we keep get/put pairs balanced. Assert that the reference counts are zero at unmount time to catch leaks. In future, reference counts will enable us to safely remove perag structures by allowing us to detect when they are no longer in use. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_ag.h | 4 ++-- fs/xfs/xfs_mount.c | 1 + fs/xfs/xfs_mount.h | 11 +++++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 6702bd8..18ae43f 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -196,8 +196,8 @@ typedef struct xfs_perag_busy { #define XFS_PAGB_NUM_SLOTS 128 #endif -typedef struct xfs_perag -{ +typedef struct xfs_perag { + atomic_t pag_ref; /* perag reference count */ char pagf_init; /* this agf's entry is initialized */ char pagi_init; /* this agi's entry is initialized */ char pagf_metadata; /* the agf is preferred to be metadata */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index c04dd83..f241fec 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -215,6 +215,7 @@ xfs_free_perag( for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { spin_lock(&mp->m_perag_lock); pag = radix_tree_delete(&mp->m_perag_tree, agno); + ASSERT(atomic_read(&pag->pag_ref) == 0); spin_unlock(&mp->m_perag_lock); ASSERT(pag); kmem_free(pag->pagb_list); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index cfa7a5d..16b2212 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -384,7 +384,7 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) } /* - * perag get/put wrappers for eventual ref counting + * perag get/put wrappers for ref counting */ static inline struct xfs_perag * xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) @@ -393,6 +393,12 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) spin_lock(&mp->m_perag_lock); pag = radix_tree_lookup(&mp->m_perag_tree, agno); + if (pag) { + ASSERT(atomic_read(&pag->pag_ref) >= 0); + /* catch leaks in the positive direction during testing */ + ASSERT(atomic_read(&pag->pag_ref) < 1000); + atomic_inc(&pag->pag_ref); + } spin_unlock(&mp->m_perag_lock); return pag; } @@ -400,7 +406,8 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) static inline void xfs_perag_put(struct xfs_perag *pag) { - /* nothing to see here, move along */ + ASSERT(atomic_read(&pag->pag_ref) > 0); + atomic_dec(&pag->pag_ref); } /* -- cgit v1.1 From 0fa800fbd549736dfdc1d7761f87e33dc8cd973b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:46 +0000 Subject: xfs: Add trace points for per-ag refcount debugging. Uninline xfs_perag_{get,put} so that tracepoints can be inserted into them to speed debugging of reference count problems. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_trace.h | 27 +++++++++++++++++++++++++++ fs/xfs/xfs_ag.h | 2 ++ fs/xfs/xfs_mount.c | 34 ++++++++++++++++++++++++++++++++++ fs/xfs/xfs_mount.h | 25 ++----------------------- 4 files changed, 65 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 3353aef..1bb09e7 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -78,6 +78,33 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class, ) ) +#define DEFINE_PERAG_REF_EVENT(name) \ +TRACE_EVENT(name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip), \ + TP_STRUCT__entry( \ + __field(dev_t, dev) \ + __field(xfs_agnumber_t, agno) \ + __field(int, refcount) \ + __field(unsigned long, caller_ip) \ + ), \ + TP_fast_assign( \ + __entry->dev = mp->m_super->s_dev; \ + __entry->agno = agno; \ + __entry->refcount = refcount; \ + __entry->caller_ip = caller_ip; \ + ), \ + TP_printk("dev %d:%d agno %u refcount %d caller %pf", \ + MAJOR(__entry->dev), MINOR(__entry->dev), \ + __entry->agno, \ + __entry->refcount, \ + (char *)__entry->caller_ip) \ +); + +DEFINE_PERAG_REF_EVENT(xfs_perag_get) +DEFINE_PERAG_REF_EVENT(xfs_perag_put) + #define DEFINE_ATTR_LIST_EVENT(name) \ DEFINE_EVENT(xfs_attr_list_class, name, \ TP_PROTO(struct xfs_attr_list_context *ctx), \ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 18ae43f..963bc27 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -197,6 +197,8 @@ typedef struct xfs_perag_busy { #endif typedef struct xfs_perag { + struct xfs_mount *pag_mount; /* owner filesystem */ + xfs_agnumber_t pag_agno; /* AG this structure belongs to */ atomic_t pag_ref; /* perag reference count */ char pagf_init; /* this agf's entry is initialized */ char pagi_init; /* this agi's entry is initialized */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f241fec..049dbc7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -201,6 +201,38 @@ xfs_uuid_unmount( /* + * Reference counting access wrappers to the perag structures. + */ +struct xfs_perag * +xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + int ref = 0; + + spin_lock(&mp->m_perag_lock); + pag = radix_tree_lookup(&mp->m_perag_tree, agno); + if (pag) { + ASSERT(atomic_read(&pag->pag_ref) >= 0); + /* catch leaks in the positive direction during testing */ + ASSERT(atomic_read(&pag->pag_ref) < 1000); + ref = atomic_inc_return(&pag->pag_ref); + } + spin_unlock(&mp->m_perag_lock); + trace_xfs_perag_get(mp, agno, ref, _RET_IP_); + return pag; +} + +void +xfs_perag_put(struct xfs_perag *pag) +{ + int ref; + + ASSERT(atomic_read(&pag->pag_ref) > 0); + ref = atomic_dec_return(&pag->pag_ref); + trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); +} + +/* * Free up the resources associated with a mount structure. Assume that * the structure was initially zeroed, so we can tell which fields got * initialized. @@ -433,6 +465,8 @@ xfs_initialize_perag( kmem_free(pag); return -EEXIST; } + pag->pag_agno = index; + pag->pag_mount = mp; spin_unlock(&mp->m_perag_lock); radix_tree_preload_end(); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 16b2212..e62fd1c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -386,29 +386,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) /* * perag get/put wrappers for ref counting */ -static inline struct xfs_perag * -xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - - spin_lock(&mp->m_perag_lock); - pag = radix_tree_lookup(&mp->m_perag_tree, agno); - if (pag) { - ASSERT(atomic_read(&pag->pag_ref) >= 0); - /* catch leaks in the positive direction during testing */ - ASSERT(atomic_read(&pag->pag_ref) < 1000); - atomic_inc(&pag->pag_ref); - } - spin_unlock(&mp->m_perag_lock); - return pag; -} - -static inline void -xfs_perag_put(struct xfs_perag *pag) -{ - ASSERT(atomic_read(&pag->pag_ref) > 0); - atomic_dec(&pag->pag_ref); -} +struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); +void xfs_perag_put(struct xfs_perag *pag); /* * Per-cpu superblock locking functions -- cgit v1.1 From b657fc82a3ca6d7ad16a59e81765f0fb0e86cdbb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:47 +0000 Subject: xfs: Kill filestreams cache flush The filestreams cache flush is not needed in the sync code as it does not affect data writeback, and it is now not used by the growfs code, either, so kill it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 3 --- fs/xfs/xfs_filestream.c | 14 -------------- fs/xfs/xfs_filestream.h | 1 - fs/xfs/xfs_mru_cache.c | 2 +- fs/xfs/xfs_mru_cache.h | 1 - 5 files changed, 1 insertion(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index cc964fa..b58f841 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -451,9 +451,6 @@ xfs_quiesce_data( xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); - /* drop inode references pinned by filestreams */ - xfs_filestream_flush(mp); - /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, SYNC_WAIT); diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 914d00d..390850e 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -455,20 +455,6 @@ xfs_filestream_unmount( } /* - * If the mount point's m_perag tree is going to be modified, all - * outstanding cache entries must be flushed to avoid accessing reference count - * addresses that have been freed. The call to xfs_filestream_flush() must be - * made inside the block that holds the m_perag_lock in write mode to do the - * reallocation. - */ -void -xfs_filestream_flush( - xfs_mount_t *mp) -{ - xfs_mru_cache_flush(mp->m_filestream); -} - -/* * Return the AG of the filestream the file or directory belongs to, or * NULLAGNUMBER otherwise. */ diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 58378b2..260f757 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -135,7 +135,6 @@ int xfs_filestream_init(void); void xfs_filestream_uninit(void); int xfs_filestream_mount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp); -void xfs_filestream_flush(struct xfs_mount *mp); xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip); void xfs_filestream_deassociate(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 4b0613d..45ce15d 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -398,7 +398,7 @@ exit: * guaranteed that all the free functions for all the elements have finished * executing and the reaper is not running. */ -void +static void xfs_mru_cache_flush( xfs_mru_cache_t *mru) { diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 5d439f3..36dd3ec 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -42,7 +42,6 @@ void xfs_mru_cache_uninit(void); int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, unsigned int grp_count, xfs_mru_cache_free_func_t free_func); -void xfs_mru_cache_flush(xfs_mru_cache_t *mru); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, void *value); -- cgit v1.1 From 8b26c5825e023b1bccac7afd174ebe55b8905cb1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:48 +0000 Subject: xfs: handle ENOMEM correctly during initialisation of perag structures Add proper error handling in case an error occurs while initializing new perag structures for a mount point. The mount structure is restored to its previous state by deleting and freeing any perag structures added during the call. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_mount.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 049dbc7..be643e5 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -432,11 +432,13 @@ xfs_initialize_perag( xfs_agnumber_t *maxagi) { xfs_agnumber_t index, max_metadata; + xfs_agnumber_t first_initialised = 0; xfs_perag_t *pag; xfs_agino_t agino; xfs_ino_t ino; xfs_sb_t *sbp = &mp->m_sb; xfs_ino_t max_inum = XFS_MAXINUMBER_32; + int error = -ENOMEM; /* Check to see if the filesystem can overflow 32 bit inodes */ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); @@ -453,17 +455,20 @@ xfs_initialize_perag( xfs_perag_put(pag); continue; } + if (!first_initialised) + first_initialised = index; pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); if (!pag) - return -ENOMEM; + goto out_unwind; if (radix_tree_preload(GFP_NOFS)) - return -ENOMEM; + goto out_unwind; spin_lock(&mp->m_perag_lock); if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { BUG(); spin_unlock(&mp->m_perag_lock); - kmem_free(pag); - return -EEXIST; + radix_tree_preload_end(); + error = -EEXIST; + goto out_unwind; } pag->pag_agno = index; pag->pag_mount = mp; @@ -523,6 +528,14 @@ xfs_initialize_perag( if (maxagi) *maxagi = index; return 0; + +out_unwind: + kmem_free(pag); + for (; index > first_initialised; index--) { + pag = radix_tree_delete(&mp->m_perag_tree, index); + kmem_free(pag); + } + return error; } void -- cgit v1.1 From e57336ff7fc7520bec7b3a7741043bdebaf622ea Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 11 Jan 2010 11:47:49 +0000 Subject: xfs: embed the pagb_list array in the perag structure Now that the perag structure is allocated memory rather than held in an array, we don't need to have the busy extent array external to the structure. Embed it into the perag structure to avoid needing an extra allocation when setting up. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_ag.h | 10 ++-------- fs/xfs/xfs_alloc.c | 4 ++-- fs/xfs/xfs_mount.c | 3 +-- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 963bc27..b1a5a1f 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -187,14 +187,8 @@ typedef struct xfs_perag_busy { /* * Per-ag incore structure, copies of information in agf and agi, * to improve the performance of allocation group selection. - * - * pick sizes which fit in allocation buckets well */ -#if (BITS_PER_LONG == 32) -#define XFS_PAGB_NUM_SLOTS 84 -#elif (BITS_PER_LONG == 64) #define XFS_PAGB_NUM_SLOTS 128 -#endif typedef struct xfs_perag { struct xfs_mount *pag_mount; /* owner filesystem */ @@ -212,8 +206,6 @@ typedef struct xfs_perag { __uint32_t pagf_btreeblks; /* # of blocks held in AGF btrees */ xfs_agino_t pagi_freecount; /* number of free inodes */ xfs_agino_t pagi_count; /* number of allocated inodes */ - int pagb_count; /* pagb slots in use */ - xfs_perag_busy_t *pagb_list; /* unstable blocks */ /* * Inode allocation search lookup optimisation. @@ -232,6 +224,8 @@ typedef struct xfs_perag { rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ #endif + int pagb_count; /* pagb slots in use */ + xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ } xfs_perag_t; /* diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 4d66bb7..8aa181d 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2200,8 +2200,8 @@ xfs_alloc_read_agf( pag->pagf_levels[XFS_BTNUM_CNTi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); spin_lock_init(&pag->pagb_lock); - pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * - sizeof(xfs_perag_busy_t), KM_SLEEP); + pag->pagb_count = 0; + memset(pag->pagb_list, 0, sizeof(pag->pagb_list)); pag->pagf_init = 1; } #ifdef DEBUG diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index be643e5..0df5045 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -247,10 +247,9 @@ xfs_free_perag( for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { spin_lock(&mp->m_perag_lock); pag = radix_tree_delete(&mp->m_perag_tree, agno); + ASSERT(pag); ASSERT(atomic_read(&pag->pag_ref) == 0); spin_unlock(&mp->m_perag_lock); - ASSERT(pag); - kmem_free(pag->pagb_list); kmem_free(pag); } } -- cgit v1.1 From 873ff5501d8cd1a21045d6c1da34f0c3876bc235 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jan 2010 22:17:57 +0000 Subject: xfs: clean up log buffer writes Don't bother using XFS_bwrite as it doesn't provide much code for our use case. Instead opencode it and fold xlog_bdstrat_cb into the new xlog_bdstrat helper. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.h | 2 -- fs/xfs/xfs_log.c | 67 +++++++++++++++++++++++----------------------- 2 files changed, 33 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a34c7b5..c20a760 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -408,8 +408,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp) return error; } -#define XFS_bdstrat(bp) xfs_buf_iorequest(bp) - #define xfs_iowait(bp) xfs_buf_iowait(bp) #define xfs_baread(target, rablkno, ralen) \ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 600b5b0..0d17516 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -50,7 +50,6 @@ kmem_zone_t *xfs_log_ticket_zone; (off) += (bytes);} /* Local miscellaneous function prototypes */ -STATIC int xlog_bdstrat_cb(struct xfs_buf *); STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, xlog_in_core_t **, xfs_lsn_t *); STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, @@ -988,35 +987,6 @@ xlog_iodone(xfs_buf_t *bp) } /* xlog_iodone */ /* - * The bdstrat callback function for log bufs. This gives us a central - * place to trap bufs in case we get hit by a log I/O error and need to - * shutdown. Actually, in practice, even when we didn't get a log error, - * we transition the iclogs to IOERROR state *after* flushing all existing - * iclogs to disk. This is because we don't want anymore new transactions to be - * started or completed afterwards. - */ -STATIC int -xlog_bdstrat_cb(struct xfs_buf *bp) -{ - xlog_in_core_t *iclog; - - iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); - - if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) { - /* note for irix bstrat will need struct bdevsw passed - * Fix the following macro if the code ever is merged - */ - XFS_bdstrat(bp); - return 0; - } - - XFS_BUF_ERROR(bp, EIO); - XFS_BUF_STALE(bp); - xfs_biodone(bp); - return XFS_ERROR(EIO); -} - -/* * Return size of each in-core log record buffer. * * All machines get 8 x 32kB buffers by default, unless tuned otherwise. @@ -1158,7 +1128,6 @@ xlog_alloc_log(xfs_mount_t *mp, if (!bp) goto out_free_log; XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); @@ -1196,7 +1165,6 @@ xlog_alloc_log(xfs_mount_t *mp, if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); - XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; @@ -1343,6 +1311,37 @@ xlog_grant_push_ail(xfs_mount_t *mp, xfs_trans_ail_push(log->l_ailp, threshold_lsn); } /* xlog_grant_push_ail */ +/* + * The bdstrat callback function for log bufs. This gives us a central + * place to trap bufs in case we get hit by a log I/O error and need to + * shutdown. Actually, in practice, even when we didn't get a log error, + * we transition the iclogs to IOERROR state *after* flushing all existing + * iclogs to disk. This is because we don't want anymore new transactions to be + * started or completed afterwards. + */ +STATIC int +xlog_bdstrat( + struct xfs_buf *bp) +{ + struct xlog_in_core *iclog; + + iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *); + if (iclog->ic_state & XLOG_STATE_IOERROR) { + XFS_BUF_ERROR(bp, EIO); + XFS_BUF_STALE(bp); + xfs_biodone(bp); + /* + * It would seem logical to return EIO here, but we rely on + * the log state machine to propagate I/O errors instead of + * doing it here. + */ + return 0; + } + + bp->b_flags |= _XBF_RUN_QUEUES; + xfs_buf_iorequest(bp); + return 0; +} /* * Flush out the in-core log (iclog) to the on-disk log in an asynchronous @@ -1462,7 +1461,7 @@ xlog_sync(xlog_t *log, */ XFS_BUF_WRITE(bp); - if ((error = XFS_bwrite(bp))) { + if ((error = xlog_bdstrat(bp))) { xfs_ioerror_alert("xlog_sync", log->l_mp, bp, XFS_BUF_ADDR(bp)); return error; @@ -1502,7 +1501,7 @@ xlog_sync(xlog_t *log, /* account for internal log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); XFS_BUF_WRITE(bp); - if ((error = XFS_bwrite(bp))) { + if ((error = xlog_bdstrat(bp))) { xfs_ioerror_alert("xlog_sync (split)", log->l_mp, bp, XFS_BUF_ADDR(bp)); return error; -- cgit v1.1 From 64e0bc7d2a6609ad265757a600e2a0d93c8adb47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jan 2010 22:17:58 +0000 Subject: xfs: clean up xfs_bwrite Fold XFS_bwrite into it's only caller, xfs_bwrite and move it into xfs_buf.c instead of leaving it as a fairly large inline function. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 27 +++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_buf.h | 19 +------------------ fs/xfs/xfs_rw.c | 31 ------------------------------- fs/xfs/xfs_rw.h | 1 - 4 files changed, 28 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 18ae3ba..492465c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1051,6 +1051,33 @@ xfs_buf_ioerror( } int +xfs_bwrite( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + int iowait = (bp->b_flags & XBF_ASYNC) == 0; + int error = 0; + + bp->b_strat = xfs_bdstrat_cb; + bp->b_mount = mp; + bp->b_flags |= XBF_WRITE; + if (!iowait) + bp->b_flags |= _XBF_RUN_QUEUES; + + xfs_buf_delwri_dequeue(bp); + xfs_buf_iostrategy(bp); + + if (iowait) { + error = xfs_buf_iowait(bp); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + xfs_buf_relse(bp); + } + + return error; +} + +int xfs_bawrite( void *mp, struct xfs_buf *bp) diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index c20a760..f69b8e7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -232,6 +232,7 @@ extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ +extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bawrite(void *mp, xfs_buf_t *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); extern void xfs_buf_ioend(xfs_buf_t *, int); @@ -390,24 +391,6 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) #define xfs_biozero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -static inline int XFS_bwrite(xfs_buf_t *bp) -{ - int iowait = (bp->b_flags & XBF_ASYNC) == 0; - int error = 0; - - if (!iowait) - bp->b_flags |= _XBF_RUN_QUEUES; - - xfs_buf_delwri_dequeue(bp); - xfs_buf_iostrategy(bp); - if (iowait) { - error = xfs_buf_iowait(bp); - xfs_buf_relse(bp); - } - return error; -} - #define xfs_iowait(bp) xfs_buf_iowait(bp) #define xfs_baread(target, rablkno, ralen) \ diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 5aa07ca..9d933a1 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -306,37 +306,6 @@ xfs_read_buf( } /* - * Wrapper around bwrite() so that we can trap - * write errors, and act accordingly. - */ -int -xfs_bwrite( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - int error; - - /* - * XXXsup how does this work for quotas. - */ - XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); - bp->b_mount = mp; - XFS_BUF_WRITE(bp); - - if ((error = XFS_bwrite(bp))) { - ASSERT(mp); - /* - * Cannot put a buftrace here since if the buffer is not - * B_HOLD then we will brelse() the buffer before returning - * from bwrite and we could be tracing a buffer that has - * been reused. - */ - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - } - return (error); -} - -/* * helper function to extract extent size hint from inode */ xfs_extlen_t diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index 571f217..ff68eb5 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -40,7 +40,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); -extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bioerror(struct xfs_buf *bp); extern int xfs_bioerror_relse(struct xfs_buf *bp); extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, -- cgit v1.1 From 4e23471a3f3aba885ea70100db47ccacb5f069f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jan 2010 22:17:56 +0000 Subject: xfs: move more buffer helpers into xfs_buf.c Move xfsbdstrat and xfs_bdstrat_cb from xfs_lrw.c and xfs_bioerror and xfs_bioerror_relse from xfs_rw.c into xfs_buf.c. This also means xfs_bioerror and xfs_bioerror_relse can be marked static now. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 120 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_buf.h | 4 ++ fs/xfs/linux-2.6/xfs_lrw.c | 47 ------------------ fs/xfs/linux-2.6/xfs_lrw.h | 3 -- fs/xfs/xfs_rw.c | 82 ------------------------------- fs/xfs/xfs_rw.h | 2 - 6 files changed, 124 insertions(+), 134 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 492465c..158fad4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1112,6 +1112,126 @@ xfs_bdwrite( xfs_buf_delwri_queue(bp, 1); } +/* + * Called when we want to stop a buffer from getting written or read. + * We attach the EIO error, muck with its flags, and call biodone + * so that the proper iodone callbacks get called. + */ +STATIC int +xfs_bioerror( + xfs_buf_t *bp) +{ +#ifdef XFSERRORDEBUG + ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); +#endif + + /* + * No need to wait until the buffer is unpinned, we aren't flushing it. + */ + XFS_BUF_ERROR(bp, EIO); + + /* + * We're calling biodone, so delete XBF_DONE flag. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_UNDONE(bp); + XFS_BUF_STALE(bp); + + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + xfs_biodone(bp); + + return EIO; +} + +/* + * Same as xfs_bioerror, except that we are releasing the buffer + * here ourselves, and avoiding the biodone call. + * This is meant for userdata errors; metadata bufs come with + * iodone functions attached, so that we can track down errors. + */ +STATIC int +xfs_bioerror_relse( + struct xfs_buf *bp) +{ + int64_t fl = XFS_BUF_BFLAGS(bp); + /* + * No need to wait until the buffer is unpinned. + * We aren't flushing it. + * + * chunkhold expects B_DONE to be set, whether + * we actually finish the I/O or not. We don't want to + * change that interface. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_DONE(bp); + XFS_BUF_STALE(bp); + XFS_BUF_CLR_IODONE_FUNC(bp); + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + if (!(fl & XFS_B_ASYNC)) { + /* + * Mark b_error and B_ERROR _both_. + * Lot's of chunkcache code assumes that. + * There's no reason to mark error for + * ASYNC buffers. + */ + XFS_BUF_ERROR(bp, EIO); + XFS_BUF_FINISH_IOWAIT(bp); + } else { + xfs_buf_relse(bp); + } + + return EIO; +} + + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb( + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) + return xfs_bioerror_relse(bp); + else + return xfs_bioerror(bp); + } + + xfs_buf_iorequest(bp); + return 0; +} + +/* + * Wrapper around bdstrat so that we can stop data from going to disk in case + * we are shutting down the filesystem. Typically user data goes thru this + * path; one of the exceptions is the superblock. + */ +void +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + return; + } + + xfs_buf_iorequest(bp); +} + STATIC void _xfs_buf_ioend( xfs_buf_t *bp, diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index f69b8e7..9a29d18 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -235,6 +235,10 @@ extern void xfs_buf_unlock(xfs_buf_t *); extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); extern int xfs_bawrite(void *mp, xfs_buf_t *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); + +extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern int xfs_buf_iorequest(xfs_buf_t *); diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 0d32457..c80fa00d 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -784,53 +784,6 @@ write_retry: } /* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int -xfs_bdstrat_cb(struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - /* - * Metadata write that didn't get logged but - * written delayed anyway. These aren't associated - * with a transaction, and can be ignored. - */ - if (XFS_BUF_IODONE_FUNC(bp) == NULL && - (XFS_BUF_ISREAD(bp)) == 0) - return (xfs_bioerror_relse(bp)); - else - return (xfs_bioerror(bp)); - } - - xfs_buf_iorequest(bp); - return 0; -} - -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - ASSERT(mp); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return; - } - - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); -} - -/* * If the underlying (data/log/rt) device is readonly, there are some * operations that cannot proceed. */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index d1f7789..342ae8c 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h @@ -22,9 +22,6 @@ struct xfs_mount; struct xfs_inode; struct xfs_buf; -/* errors from xfsbdstrat() must be extracted from the buffer */ -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 9d933a1..abb2c45 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -153,88 +153,6 @@ xfs_do_force_shutdown( } } - -/* - * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call biodone - * so that the proper iodone callbacks get called. - */ -int -xfs_bioerror( - xfs_buf_t *bp) -{ - -#ifdef XFSERRORDEBUG - ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); -#endif - - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - */ - XFS_BUF_ERROR(bp, EIO); - /* - * We're calling biodone, so delete B_DONE flag. Either way - * we have to call the iodone callback, and calling biodone - * probably is the best way since it takes care of - * GRIO as well. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_UNDONE(bp); - XFS_BUF_STALE(bp); - - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - xfs_biodone(bp); - - return (EIO); -} - -/* - * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the biodone call. - * This is meant for userdata errors; metadata bufs come with - * iodone functions attached, so that we can track down errors. - */ -int -xfs_bioerror_relse( - xfs_buf_t *bp) -{ - int64_t fl; - - ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); - ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); - - fl = XFS_BUF_BFLAGS(bp); - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - * - * chunkhold expects B_DONE to be set, whether - * we actually finish the I/O or not. We don't want to - * change that interface. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_DONE(bp); - XFS_BUF_STALE(bp); - XFS_BUF_CLR_IODONE_FUNC(bp); - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - if (!(fl & XFS_B_ASYNC)) { - /* - * Mark b_error and B_ERROR _both_. - * Lot's of chunkcache code assumes that. - * There's no reason to mark error for - * ASYNC buffers. - */ - XFS_BUF_ERROR(bp, EIO); - XFS_BUF_FINISH_IOWAIT(bp); - } else { - xfs_buf_relse(bp); - } - return (EIO); -} - /* * Prints out an ALERT message about I/O error. */ diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index ff68eb5..a54c3b7 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -40,8 +40,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) * Prototypes for functions in xfs_rw.c. */ extern int xfs_write_clear_setuid(struct xfs_inode *ip); -extern int xfs_bioerror(struct xfs_buf *bp); -extern int xfs_bioerror_relse(struct xfs_buf *bp); extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, xfs_daddr_t blkno, int len, uint flags, struct xfs_buf **bpp); -- cgit v1.1 From e2bcd936eb95d0019ca5e05f9fdd27e770ddded1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:44:58 +1100 Subject: xfs: directory names are unsigned Convert the struct xfs_name to use unsigned chars for the name strings to match both what is stored on disk (__uint8_t) and what the VFS expects (unsigned char). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index d725428..b099045 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -151,8 +151,8 @@ typedef enum { } xfs_btnum_t; struct xfs_name { - const char *name; - int len; + const unsigned char *name; + int len; }; #endif /* __XFS_TYPES_H__ */ -- cgit v1.1 From 046ea753130fc51d885835458bf8c1d84765b9ac Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:47:08 +1100 Subject: xfs: convert DM ops to use unsigned char names dmops uses a signed char for it's namespace event. To be consistent with the rest of the code, convert them to unsigned char for the namespace string. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_mount.h | 3 ++- fs/xfs/xfs_vnodeops.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e62fd1c..f4d1441 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -78,7 +78,8 @@ typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t); typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, struct xfs_inode *, dm_right_t, struct xfs_inode *, dm_right_t, - const char *, const char *, mode_t, int, int); + const unsigned char *, const unsigned char *, + mode_t, int, int); typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, char *, char *); typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 6f26875..9f7c001 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2199,7 +2199,8 @@ xfs_symlink( if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, - link_name->name, target_path, 0, 0, 0); + link_name->name, + (unsigned char *)target_path, 0, 0, 0); if (error) return error; } @@ -2395,7 +2396,8 @@ std_return: dp, DM_RIGHT_NULL, error ? NULL : ip, DM_RIGHT_NULL, link_name->name, - target_path, 0, error, 0); + (unsigned char *)target_path, + 0, error, 0); } if (!error) -- cgit v1.1 From 2bc754213d40d67c39ddd58cf240f2b948e1951e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:47:17 +1100 Subject: xfs: convert dirnameops to unsigned char names To be consistent across the codebase, convert the dirnameops to pass the directory names by unsigned char strings. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_da_btree.c | 4 ++-- fs/xfs/xfs_da_btree.h | 5 +++-- fs/xfs/xfs_dir2.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index c0c8869..0ca556b 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1534,8 +1534,8 @@ xfs_da_hashname(const __uint8_t *name, int namelen) enum xfs_dacmp xfs_da_compname( struct xfs_da_args *args, - const char *name, - int len) + const unsigned char *name, + int len) { return (args->namelen == len && memcmp(args->name, name, len) == 0) ? XFS_CMP_EXACT : XFS_CMP_DIFFERENT; diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 30cd08f..fe9f5a8 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -209,7 +209,8 @@ typedef struct xfs_da_state { */ struct xfs_nameops { xfs_dahash_t (*hashname)(struct xfs_name *); - enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int); + enum xfs_dacmp (*compname)(struct xfs_da_args *, + const unsigned char *, int); }; @@ -260,7 +261,7 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, uint xfs_da_hashname(const __uint8_t *name_string, int name_length); enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, - const char *name, int len); + const unsigned char *name, int len); xfs_da_state_t *xfs_da_state_alloc(void); diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 93634a7..c21c527 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -66,8 +66,8 @@ xfs_ascii_ci_hashname( STATIC enum xfs_dacmp xfs_ascii_ci_compname( struct xfs_da_args *args, - const char *name, - int len) + const unsigned char *name, + int len) { enum xfs_dacmp result; int i; -- cgit v1.1 From a3380ae39fa321282c407ba5e1835e14b64853d9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:47:25 +1100 Subject: xfs: make xfs_dir_cilookup_result use unsigned char For consistency with the result of the code. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_dir2.c | 2 +- fs/xfs/xfs_dir2.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index c21c527..3a8c6ba 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -247,7 +247,7 @@ xfs_dir_createname( int xfs_dir_cilookup_result( struct xfs_da_args *args, - const char *name, + const unsigned char *name, int len) { if (args->cmpresult == XFS_CMP_DIFFERENT) diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index 1d9ef96..74a3b10 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -100,7 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_dabuf *bp); -extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name, - int len); +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, + const unsigned char *name, int len); #endif /* __XFS_DIR2_H__ */ -- cgit v1.1 From b9c48649577dfc4a8c263c106d518effa24ea54b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:47:39 +1100 Subject: xfs: xfs_buf_iomove() doesn't care about signedness xfs_buf_iomove() uses xfs_caddr_t as it's parameter types, but it doesn't care about the signedness of the variables as it is just copying the data. Change the prototype to use void * so that we don't get sign warnings at call sites. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 2 +- fs/xfs/linux-2.6/xfs_buf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 158fad4..efd745b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1443,7 +1443,7 @@ xfs_buf_iomove( xfs_buf_t *bp, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ - caddr_t data, /* data address */ + void *data, /* data address */ xfs_buf_rw_t mode) /* read/write/zero flag */ { size_t bend, cpoff, csize; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 9a29d18..4f2ad66 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -243,7 +243,7 @@ extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, +extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); static inline int xfs_buf_iostrategy(xfs_buf_t *bp) -- cgit v1.1 From a9273ca5c6814f393e18ed66645f817b2b71e9ad Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:47:48 +1100 Subject: xfs: convert attr to use unsigned names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be consistent with the directory code, the attr code should use unsigned names. Convert the names from the vfs at the highest level to unsigned, and ænsure they are consistenly used as unsigned down to disk. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_acl.c | 11 ++++++----- fs/xfs/linux-2.6/xfs_ioctl.c | 18 +++++++++--------- fs/xfs/linux-2.6/xfs_ioctl.h | 12 ++++++------ fs/xfs/linux-2.6/xfs_ioctl32.c | 4 ++-- fs/xfs/linux-2.6/xfs_iops.c | 4 ++-- fs/xfs/linux-2.6/xfs_xattr.c | 27 +++++++++++++++++++-------- fs/xfs/xfs_acl.h | 4 ++-- fs/xfs/xfs_attr.c | 38 +++++++++++++++++++++++--------------- fs/xfs/xfs_attr.h | 2 +- fs/xfs/xfs_attr_leaf.c | 28 ++++++++++++++-------------- fs/xfs/xfs_attr_sf.h | 2 +- fs/xfs/xfs_vnodeops.h | 10 +++++----- 12 files changed, 90 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 883ca5a..bf85bbe 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -106,7 +106,7 @@ xfs_get_acl(struct inode *inode, int type) struct posix_acl *acl; struct xfs_acl *xfs_acl; int len = sizeof(struct xfs_acl); - char *ea_name; + unsigned char *ea_name; int error; acl = get_cached_acl(inode, type); @@ -133,7 +133,8 @@ xfs_get_acl(struct inode *inode, int type) if (!xfs_acl) return ERR_PTR(-ENOMEM); - error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT); + error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, + &len, ATTR_ROOT); if (error) { /* * If the attribute doesn't exist make sure we have a negative @@ -162,7 +163,7 @@ STATIC int xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { struct xfs_inode *ip = XFS_I(inode); - char *ea_name; + unsigned char *ea_name; int error; if (S_ISLNK(inode->i_mode)) @@ -194,7 +195,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) (sizeof(struct xfs_acl_entry) * (XFS_ACL_MAX_ENTRIES - acl->a_count)); - error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl, + error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, len, ATTR_ROOT); kfree(xfs_acl); @@ -262,7 +263,7 @@ xfs_set_mode(struct inode *inode, mode_t mode) } static int -xfs_acl_exists(struct inode *inode, char *name) +xfs_acl_exists(struct inode *inode, unsigned char *name) { int len = sizeof(struct xfs_acl); diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index a034cf6..3906e85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -447,12 +447,12 @@ xfs_attrlist_by_handle( int xfs_attrmulti_attr_get( struct inode *inode, - char *name, - char __user *ubuf, + unsigned char *name, + unsigned char __user *ubuf, __uint32_t *len, __uint32_t flags) { - char *kbuf; + unsigned char *kbuf; int error = EFAULT; if (*len > XATTR_SIZE_MAX) @@ -476,12 +476,12 @@ xfs_attrmulti_attr_get( int xfs_attrmulti_attr_set( struct inode *inode, - char *name, - const char __user *ubuf, + unsigned char *name, + const unsigned char __user *ubuf, __uint32_t len, __uint32_t flags) { - char *kbuf; + unsigned char *kbuf; int error = EFAULT; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -501,7 +501,7 @@ xfs_attrmulti_attr_set( int xfs_attrmulti_attr_remove( struct inode *inode, - char *name, + unsigned char *name, __uint32_t flags) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -519,7 +519,7 @@ xfs_attrmulti_by_handle( xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - char *attr_name; + unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -547,7 +547,7 @@ xfs_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user(attr_name, + ops[i].am_error = strncpy_from_user((char *)attr_name, ops[i].am_attrname, MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) error = -ERANGE; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index 7bd7c6a..d56173b 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -45,23 +45,23 @@ xfs_readlink_by_handle( extern int xfs_attrmulti_attr_get( struct inode *inode, - char *name, - char __user *ubuf, + unsigned char *name, + unsigned char __user *ubuf, __uint32_t *len, __uint32_t flags); extern int - xfs_attrmulti_attr_set( +xfs_attrmulti_attr_set( struct inode *inode, - char *name, - const char __user *ubuf, + unsigned char *name, + const unsigned char __user *ubuf, __uint32_t len, __uint32_t flags); extern int xfs_attrmulti_attr_remove( struct inode *inode, - char *name, + unsigned char *name, __uint32_t flags); extern struct dentry * diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index be1527b..0bf6d61 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -411,7 +411,7 @@ xfs_compat_attrmulti_by_handle( compat_xfs_fsop_attrmulti_handlereq_t am_hreq; struct dentry *dentry; unsigned int i, size; - char *attr_name; + unsigned char *attr_name; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -440,7 +440,7 @@ xfs_compat_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user(attr_name, + ops[i].am_error = strncpy_from_user((char *)attr_name, compat_ptr(ops[i].am_attrname), MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 2259460..e8566bb 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -140,10 +140,10 @@ xfs_init_security( struct xfs_inode *ip = XFS_I(inode); size_t length; void *value; - char *name; + unsigned char *name; int error; - error = security_inode_init_security(inode, dir, &name, + error = security_inode_init_security(inode, dir, (char **)&name, &value, &length); if (error) { if (error == -EOPNOTSUPP) diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c index 0b18788..fa01b9d 100644 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ b/fs/xfs/linux-2.6/xfs_xattr.c @@ -45,7 +45,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name, value = NULL; } - error = -xfs_attr_get(ip, name, value, &asize, xflags); + error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); if (error) return error; return asize; @@ -67,8 +67,9 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, xflags |= ATTR_REPLACE; if (!value) - return -xfs_attr_remove(ip, name, xflags); - return -xfs_attr_set(ip, name, (void *)value, size, xflags); + return -xfs_attr_remove(ip, (unsigned char *)name, xflags); + return -xfs_attr_set(ip, (unsigned char *)name, + (void *)value, size, xflags); } static struct xattr_handler xfs_xattr_user_handler = { @@ -124,8 +125,13 @@ static const char *xfs_xattr_prefix(int flags) } static int -xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, - char *name, int namelen, int valuelen, char *value) +xfs_xattr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { unsigned int prefix_len = xfs_xattr_prefix_len(flags); char *offset; @@ -148,7 +154,7 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, offset = (char *)context->alist + context->count; strncpy(offset, xfs_xattr_prefix(flags), prefix_len); offset += prefix_len; - strncpy(offset, name, namelen); /* real name */ + strncpy(offset, (char *)name, namelen); /* real name */ offset += namelen; *offset = '\0'; context->count += prefix_len + namelen + 1; @@ -156,8 +162,13 @@ xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, } static int -xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, - char *name, int namelen, int valuelen, char *value) +xfs_xattr_put_listent_sizes( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { context->count += xfs_xattr_prefix_len(flags) + namelen + 1; return 0; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 00fd357c..d13eeba 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -36,8 +36,8 @@ struct xfs_acl { }; /* On-disk XFS extended attribute names */ -#define SGI_ACL_FILE "SGI_ACL_FILE" -#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT" +#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" +#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 9d11eba..f7b426a 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -93,12 +93,12 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args); STATIC int xfs_attr_name_to_xname( struct xfs_name *xname, - const char *aname) + const unsigned char *aname) { if (!aname) return EINVAL; xname->name = aname; - xname->len = strlen(aname); + xname->len = strlen((char *)aname); if (xname->len >= MAXNAMELEN) return EFAULT; /* match IRIX behaviour */ @@ -124,7 +124,7 @@ STATIC int xfs_attr_get_int( struct xfs_inode *ip, struct xfs_name *name, - char *value, + unsigned char *value, int *valuelenp, int flags) { @@ -171,8 +171,8 @@ xfs_attr_get_int( int xfs_attr_get( xfs_inode_t *ip, - const char *name, - char *value, + const unsigned char *name, + unsigned char *value, int *valuelenp, int flags) { @@ -235,8 +235,12 @@ xfs_attr_calc_size( } STATIC int -xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, - char *value, int valuelen, int flags) +xfs_attr_set_int( + struct xfs_inode *dp, + struct xfs_name *name, + unsigned char *value, + int valuelen, + int flags) { xfs_da_args_t args; xfs_fsblock_t firstblock; @@ -452,8 +456,8 @@ out: int xfs_attr_set( xfs_inode_t *dp, - const char *name, - char *value, + const unsigned char *name, + unsigned char *value, int valuelen, int flags) { @@ -600,7 +604,7 @@ out: int xfs_attr_remove( xfs_inode_t *dp, - const char *name, + const unsigned char *name, int flags) { int error; @@ -669,9 +673,13 @@ xfs_attr_list_int(xfs_attr_list_context_t *context) */ /*ARGSUSED*/ STATIC int -xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, - char *name, int namelen, - int valuelen, char *value) +xfs_attr_put_listent( + xfs_attr_list_context_t *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) { struct attrlist *alist = (struct attrlist *)context->alist; attrlist_ent_t *aep; @@ -1980,7 +1988,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE]; xfs_mount_t *mp; xfs_daddr_t dblkno; - xfs_caddr_t dst; + void *dst; xfs_buf_t *bp; int nmap, error, tmp, valuelen, blkcnt, i; xfs_dablk_t lblkno; @@ -2039,7 +2047,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) xfs_inode_t *dp; xfs_bmbt_irec_t map; xfs_daddr_t dblkno; - xfs_caddr_t src; + void *src; xfs_buf_t *bp; xfs_dablk_t lblkno; int blkcnt, valuelen, nmap, error, tmp, committed; diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 9c3a243..e920d68 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -113,7 +113,7 @@ typedef struct attrlist_cursor_kern { typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, - char *, int, int, char *); + unsigned char *, int, int, unsigned char *); typedef struct xfs_attr_list_context { struct xfs_inode *dp; /* inode */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index baf41b5..52519a2 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -521,11 +521,11 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; i++) { - nargs.name = (char *)sfe->nameval; + nargs.name = sfe->nameval; nargs.namelen = sfe->namelen; - nargs.value = (char *)&sfe->nameval[nargs.namelen]; + nargs.value = &sfe->nameval[nargs.namelen]; nargs.valuelen = sfe->valuelen; - nargs.hashval = xfs_da_hashname((char *)sfe->nameval, + nargs.hashval = xfs_da_hashname(sfe->nameval, sfe->namelen); nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */ @@ -612,10 +612,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { error = context->put_listent(context, sfe->flags, - (char *)sfe->nameval, + sfe->nameval, (int)sfe->namelen, (int)sfe->valuelen, - (char*)&sfe->nameval[sfe->namelen]); + &sfe->nameval[sfe->namelen]); /* * Either search callback finished early or @@ -659,8 +659,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } sbp->entno = i; - sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); - sbp->name = (char *)sfe->nameval; + sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen); + sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ sbp->valuelen = sfe->valuelen; @@ -818,9 +818,9 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) continue; ASSERT(entry->flags & XFS_ATTR_LOCAL); name_loc = xfs_attr_leaf_name_local(leaf, i); - nargs.name = (char *)name_loc->nameval; + nargs.name = name_loc->nameval; nargs.namelen = name_loc->namelen; - nargs.value = (char *)&name_loc->nameval[nargs.namelen]; + nargs.value = &name_loc->nameval[nargs.namelen]; nargs.valuelen = be16_to_cpu(name_loc->valuelen); nargs.hashval = be32_to_cpu(entry->hashval); nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); @@ -2370,10 +2370,10 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) retval = context->put_listent(context, entry->flags, - (char *)name_loc->nameval, + name_loc->nameval, (int)name_loc->namelen, be16_to_cpu(name_loc->valuelen), - (char *)&name_loc->nameval[name_loc->namelen]); + &name_loc->nameval[name_loc->namelen]); if (retval) return retval; } else { @@ -2397,15 +2397,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) return retval; retval = context->put_listent(context, entry->flags, - (char *)name_rmt->name, + name_rmt->name, (int)name_rmt->namelen, valuelen, - (char*)args.value); + args.value); kmem_free(args.value); } else { retval = context->put_listent(context, entry->flags, - (char *)name_rmt->name, + name_rmt->name, (int)name_rmt->namelen, valuelen, NULL); diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h index 76ab7b0..919756e 100644 --- a/fs/xfs/xfs_attr_sf.h +++ b/fs/xfs/xfs_attr_sf.h @@ -52,7 +52,7 @@ typedef struct xfs_attr_sf_sort { __uint8_t valuelen; /* length of value */ __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ xfs_dahash_t hash; /* this entry's hash value */ - char *name; /* name value, pointer into buffer */ + unsigned char *name; /* name value, pointer into buffer */ } xfs_attr_sf_sort_t; #define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 167a467..774f407 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -43,11 +43,11 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd, int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); -int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, - int *valuelenp, int flags); -int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, - int valuelen, int flags); -int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); +int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, + unsigned char *value, int *valuelenp, int flags); +int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, + unsigned char *value, int valuelen, int flags); +int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, -- cgit v1.1 From 4a24cb71407dc25035d75dd3d118e0e55679e217 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:48:05 +1100 Subject: xfs: clean up sign warnings in dir2 code We are now consistently using unsigned char strings for names so fix up the remaining warnings in the dir2 code to complete the cleanup. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_dir2.c | 2 +- fs/xfs/xfs_dir2_block.c | 9 +++++---- fs/xfs/xfs_dir2_leaf.c | 2 +- fs/xfs/xfs_dir2_sf.c | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 3a8c6ba..42520f0 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -44,7 +44,7 @@ #include "xfs_vnodeops.h" #include "xfs_trace.h" -struct xfs_name xfs_name_dotdot = {"..", 2}; +struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2}; /* * ASCII case-insensitive (ie. A-Z) support for directories that was diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index ddc4ecc..779a267 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -57,8 +57,8 @@ static xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; void xfs_dir_startup(void) { - xfs_dir_hash_dot = xfs_da_hashname(".", 1); - xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); + xfs_dir_hash_dot = xfs_da_hashname((unsigned char *)".", 1); + xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); } /* @@ -513,8 +513,9 @@ xfs_dir2_block_getdents( /* * If it didn't fit, set the final offset to here & return. */ - if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, - be64_to_cpu(dep->inumber), DT_UNKNOWN)) { + if (filldir(dirent, (char *)dep->name, dep->namelen, + cook & 0x7fffffff, be64_to_cpu(dep->inumber), + DT_UNKNOWN)) { *offset = cook & 0x7fffffff; xfs_da_brelse(NULL, bp); return 0; diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 29f484c..e2d8985 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -1081,7 +1081,7 @@ xfs_dir2_leaf_getdents( dep = (xfs_dir2_data_entry_t *)ptr; length = xfs_dir2_data_entsize(dep->namelen); - if (filldir(dirent, dep->name, dep->namelen, + if (filldir(dirent, (char *)dep->name, dep->namelen, xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, be64_to_cpu(dep->inumber), DT_UNKNOWN)) break; diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 9d4f17a..c1a5945 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -782,7 +782,7 @@ xfs_dir2_sf_getdents( } ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); - if (filldir(dirent, sfep->name, sfep->namelen, + if (filldir(dirent, (char *)sfep->name, sfep->namelen, off & 0x7fffffff, ino, DT_UNKNOWN)) { *offset = off & 0x7fffffff; return 0; -- cgit v1.1 From 58c75cfb51393a52b45262394c1fa81514b4d9bd Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:49:18 +1100 Subject: xfs: make compile warn about char sign mismatches again The -fno-unsigned-char directive has no effect anymore as the XFs build is clean. However, the kernel build hides pointer sign differences so turn that back on so that we can clean up all the mismatches prior to a userspace code resync. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 56641fe..1926701 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,7 +16,7 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -funsigned-char +EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -Wpointer-sign XFS_LINUX := linux-2.6 -- cgit v1.1 From f0a0eaa8da08ebc6519cacd731df05bbb4ca47ce Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 10:50:06 +1100 Subject: xfs: suppress spurious uninitialised var warning in xfs_bmapi() Initialise the xfs_bmalloca_t structure to zero to avoid uninitialised variable warnings. This is done by zeroing the arg structure rather than using the uninitialised_var() trick so we know for certain that the structure is correctly initialised as xfs_bmapi is a very complex function and it is difficult to prove warnings are spurious. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 7c6d9ac..1869fb9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4471,7 +4471,7 @@ xfs_bmapi( xfs_fsblock_t abno; /* allocated block number */ xfs_extlen_t alen; /* allocated extent length */ xfs_fileoff_t aoff; /* allocated file offset */ - xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ + xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */ xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_fileoff_t end; /* end of mapped file region */ int eof; /* we've hit the end of extents */ -- cgit v1.1 From 587aa0feb74ffe3239b5e26ff5d017ba9f5daec9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 20 Jan 2010 12:04:53 +1100 Subject: xfs: rearrange xfs_mod_sb() to avoid array subscript warning gcc warns of an array subscript out of bounds in xfs_mod_sb(). The code is written in such a way that if the array subscript is out of bounds, then it will assert fail. Rearrange the code to avoid the bounds check warning. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_mount.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0df5045..d95bd18 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1631,15 +1631,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); /* find modified range */ + f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); + ASSERT((1LL << f) & XFS_SB_MOD_BITS); + last = xfs_sb_info[f + 1].offset - 1; f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); ASSERT((1LL << f) & XFS_SB_MOD_BITS); first = xfs_sb_info[f].offset; - f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); - ASSERT((1LL << f) & XFS_SB_MOD_BITS); - last = xfs_sb_info[f + 1].offset - 1; - xfs_trans_log_buf(tp, bp, first, last); } -- cgit v1.1 From 512dd1abd9539a474f2792eeaf6783c59ad7778a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jan 2010 22:05:48 +0000 Subject: xfs: kill XFS_QMOPT_ASYNC The option is unused and one of the few remaining users of xfs_bawrite, so let's get rid of it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/quota/xfs_dquot.c | 2 -- fs/xfs/xfs_quota.h | 1 - 2 files changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index d7c7eea..a447493 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1253,8 +1253,6 @@ xfs_qm_dqflush( if (flags & XFS_QMOPT_DELWRI) { xfs_bdwrite(mp, bp); - } else if (flags & XFS_QMOPT_ASYNC) { - error = xfs_bawrite(mp, bp); } else { error = xfs_bwrite(mp, bp); } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 91bfd60..21d11d9 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -226,7 +226,6 @@ typedef struct xfs_qoff_logformat { * flags for dqflush and dqflush_all. */ #define XFS_QMOPT_SYNC 0x1000000 -#define XFS_QMOPT_ASYNC 0x2000000 #define XFS_QMOPT_DELWRI 0x4000000 /* -- cgit v1.1 From 4d1f88d75b00c4d23f4c51305ab5b779a86ef74e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jan 2010 22:05:49 +0000 Subject: xfs: clean up error handling in xfs_trans_dqresv Move the error code selection after the goto label and fold the xfs_quota_error helper into it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/quota/xfs_trans_dquot.c | 48 +++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 97ac964..b9db6f7 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -589,14 +589,6 @@ xfs_trans_unreserve_and_mod_dquots( } } -STATIC int -xfs_quota_error(uint flags) -{ - if (flags & XFS_QMOPT_ENOSPC) - return ENOSPC; - return EDQUOT; -} - /* * This reserves disk blocks and inodes against a dquot. * Flags indicate if the dquot is to be locked here and also @@ -612,7 +604,6 @@ xfs_trans_dqresv( long ninos, uint flags) { - int error; xfs_qcnt_t hardlimit; xfs_qcnt_t softlimit; time_t timer; @@ -649,7 +640,6 @@ xfs_trans_dqresv( warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); resbcountp = &dqp->q_res_rtbcount; } - error = 0; if ((flags & XFS_QMOPT_FORCE_RES) == 0 && dqp->q_core.d_id && @@ -667,19 +657,13 @@ xfs_trans_dqresv( * nblks. */ if (hardlimit > 0ULL && - (hardlimit <= nblks + *resbcountp)) { - error = xfs_quota_error(flags); + hardlimit <= nblks + *resbcountp) goto error_return; - } - if (softlimit > 0ULL && - (softlimit <= nblks + *resbcountp)) { - if ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit)) { - error = xfs_quota_error(flags); - goto error_return; - } - } + softlimit <= nblks + *resbcountp && + ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit))) + goto error_return; } if (ninos > 0) { count = be64_to_cpu(dqp->q_core.d_icount); @@ -692,16 +676,13 @@ xfs_trans_dqresv( softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); if (!softlimit) softlimit = q->qi_isoftlimit; - if (hardlimit > 0ULL && count >= hardlimit) { - error = xfs_quota_error(flags); + + if (hardlimit > 0ULL && count >= hardlimit) + goto error_return; + if (softlimit > 0ULL && count >= softlimit && + ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit))) goto error_return; - } else if (softlimit > 0ULL && count >= softlimit) { - if ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit)) { - error = xfs_quota_error(flags); - goto error_return; - } - } } } @@ -736,9 +717,14 @@ xfs_trans_dqresv( ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); + xfs_dqunlock(dqp); + return 0; + error_return: xfs_dqunlock(dqp); - return error; + if (flags & XFS_QMOPT_ENOSPC) + return ENOSPC; + return EDQUOT; } -- cgit v1.1 From a210c1aa7f6c90b729cc3a72d03e789b13cb6c47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 17 Jan 2010 22:36:19 +0000 Subject: xfs: implement quota warnings via netlink Wire up quota_send_warning to send quota warnings over netlink. This is used by various desktops to show user quota warnings. Tested by running the quota_nld daemon while running the xfstest quota tests and observing the warnings. I'll see how I can get a more formal testcase for it written. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/quota/xfs_trans_dquot.c | 49 +++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index b9db6f7..c3ab75c 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -589,6 +589,20 @@ xfs_trans_unreserve_and_mod_dquots( } } +STATIC void +xfs_quota_warn( + struct xfs_mount *mp, + struct xfs_dquot *dqp, + int type) +{ + /* no warnings for project quotas - we just return ENOSPC later */ + if (dqp->dq_flags & XFS_DQ_PROJ) + return; + quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, + be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, + type); +} + /* * This reserves disk blocks and inodes against a dquot. * Flags indicate if the dquot is to be locked here and also @@ -657,13 +671,21 @@ xfs_trans_dqresv( * nblks. */ if (hardlimit > 0ULL && - hardlimit <= nblks + *resbcountp) + hardlimit <= nblks + *resbcountp) { + xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); goto error_return; + } if (softlimit > 0ULL && - softlimit <= nblks + *resbcountp && - ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit))) - goto error_return; + softlimit <= nblks + *resbcountp) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + xfs_quota_warn(mp, dqp, + QUOTA_NL_BSOFTLONGWARN); + goto error_return; + } + + xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN); + } } if (ninos > 0) { count = be64_to_cpu(dqp->q_core.d_icount); @@ -677,12 +699,19 @@ xfs_trans_dqresv( if (!softlimit) softlimit = q->qi_isoftlimit; - if (hardlimit > 0ULL && count >= hardlimit) - goto error_return; - if (softlimit > 0ULL && count >= softlimit && - ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit))) + if (hardlimit > 0ULL && count >= hardlimit) { + xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); goto error_return; + } + if (softlimit > 0ULL && count >= softlimit) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + xfs_quota_warn(mp, dqp, + QUOTA_NL_ISOFTLONGWARN); + goto error_return; + } + xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN); + } } } -- cgit v1.1 From 0cadda1c5f194f98a05d252ff4385d86d2ed0862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:44 +0000 Subject: xfs: remove duplicate buffer flags Currently we define aliases for the buffer flags in various namespaces, which only adds confusion. Remove all but the XBF_ flags to clean this up a bit. Note that we still abuse XFS_B_ASYNC/XBF_ASYNC for some non-buffer uses, but I'll clean that up later. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 2 +- fs/xfs/linux-2.6/xfs_buf.h | 22 ++++------------------ fs/xfs/linux-2.6/xfs_fs_subr.c | 2 +- fs/xfs/linux-2.6/xfs_sync.c | 4 ++-- fs/xfs/quota/xfs_dquot.c | 3 +-- fs/xfs/quota/xfs_dquot_item.c | 3 +-- fs/xfs/xfs_alloc.c | 2 +- fs/xfs/xfs_attr.c | 12 +++++------- fs/xfs/xfs_attr_leaf.c | 2 +- fs/xfs/xfs_btree.c | 4 ++-- fs/xfs/xfs_ialloc.c | 2 +- fs/xfs/xfs_inode.c | 20 ++++++++++---------- fs/xfs/xfs_inode_item.c | 2 +- fs/xfs/xfs_log_recover.c | 8 ++++---- fs/xfs/xfs_mount.c | 4 ++-- fs/xfs/xfs_trans_buf.c | 27 ++++++++++++++------------- fs/xfs/xfs_vnodeops.c | 4 ++-- 17 files changed, 53 insertions(+), 70 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index efd745b..730eff1 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1169,7 +1169,7 @@ xfs_bioerror_relse( XFS_BUF_STALE(bp); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_CLR_BDSTRAT_FUNC(bp); - if (!(fl & XFS_B_ASYNC)) { + if (!(fl & XBF_ASYNC)) { /* * Mark b_error and B_ERROR _both_. * Lot's of chunkcache code assumes that. diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 4f2ad66..ea8c198 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -275,33 +275,19 @@ extern void xfs_buf_terminate(void); ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) -#define XFS_B_ASYNC XBF_ASYNC -#define XFS_B_DELWRI XBF_DELWRI -#define XFS_B_READ XBF_READ -#define XFS_B_WRITE XBF_WRITE -#define XFS_B_STALE XBF_STALE - -#define XFS_BUF_TRYLOCK XBF_TRYLOCK -#define XFS_INCORE_TRYLOCK XBF_TRYLOCK -#define XFS_BUF_LOCK XBF_LOCK -#define XFS_BUF_MAPPED XBF_MAPPED - -#define BUF_BUSY XBF_DONT_BLOCK - #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) -#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) -#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) -#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XFS_B_STALE) +#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE) +#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) +#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) #define XFS_BUF_SUPER_STALE(bp) do { \ XFS_BUF_STALE(bp); \ xfs_buf_delwri_dequeue(bp); \ XFS_BUF_DONE(bp); \ } while (0) -#define XFS_BUF_MANAGE XBF_FS_MANAGED #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) @@ -390,7 +376,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) #define xfs_biomove(bp, off, len, data, rw) \ xfs_buf_iomove((bp), (off), (len), (data), \ - ((rw) == XFS_B_WRITE) ? XBRW_WRITE : XBRW_READ) + ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) #define xfs_biozero(bp, off, len) \ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 7501b85..b6918d7 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -79,7 +79,7 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = -filemap_fdatawrite(mapping); } - if (flags & XFS_B_ASYNC) + if (flags & XBF_ASYNC) return ret; ret2 = xfs_wait_on_pages(ip, first, last); if (!ret) diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b58f841..58c24be 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -234,7 +234,7 @@ xfs_sync_inode_data( } error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XFS_B_ASYNC, FI_NONE); + 0 : XBF_ASYNC, FI_NONE); xfs_iunlock(ip, XFS_IOLOCK_SHARED); out_wait: @@ -370,7 +370,7 @@ xfs_sync_fsdata( if (flags & SYNC_TRYLOCK) { ASSERT(!(flags & SYNC_WAIT)); - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + bp = xfs_getsb(mp, XBF_TRYLOCK); if (!bp) goto out; diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index a447493..5756392 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1527,8 +1527,7 @@ xfs_qm_dqflock_pushbuf_wait( * the flush lock when the I/O completes. */ bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, - XFS_QI_DQCHUNKLEN(dqp->q_mount), - XFS_INCORE_TRYLOCK); + XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { int error; diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index d0d4a9a..37929d1 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -237,8 +237,7 @@ xfs_qm_dquot_logitem_pushbuf( } mp = dqp->q_mount; bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, - XFS_QI_DQCHUNKLEN(mp), - XFS_INCORE_TRYLOCK); + XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 8aa181d..a27aeb7 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2180,7 +2180,7 @@ xfs_alloc_read_agf( ASSERT(agno != NULLAGNUMBER); error = xfs_read_agf(mp, tp, agno, - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, bpp); if (error) return error; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index f7b426a..b9c196a 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -2015,15 +2015,14 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, - blkcnt, - XFS_BUF_LOCK | XBF_DONT_BLOCK, + blkcnt, XBF_LOCK | XBF_DONT_BLOCK, &bp); if (error) return(error); tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, dst, XFS_B_READ); + xfs_biomove(bp, 0, tmp, dst, XBF_READ); xfs_buf_relse(bp); dst += tmp; valuelen -= tmp; @@ -2149,13 +2148,13 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, - XFS_BUF_LOCK | XBF_DONT_BLOCK); + XBF_LOCK | XBF_DONT_BLOCK); ASSERT(bp); ASSERT(!XFS_BUF_GETERROR(bp)); tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE); + xfs_biomove(bp, 0, tmp, src, XBF_WRITE); if (tmp < XFS_BUF_SIZE(bp)) xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ @@ -2216,8 +2215,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) /* * If the "remote" value is in the cache, remove it. */ - bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, - XFS_INCORE_TRYLOCK); + bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); if (bp) { XFS_BUF_STALE(bp); XFS_BUF_UNDELAYWRITE(bp); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 52519a2..a90ce74 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -2950,7 +2950,7 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, map.br_blockcount); bp = xfs_trans_get_buf(*trans, dp->i_mount->m_ddev_targp, - dblkno, dblkcnt, XFS_BUF_LOCK); + dblkno, dblkcnt, XBF_LOCK); xfs_trans_binval(*trans, bp); /* * Roll to next transaction. diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 36a0992..96be4b0 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -977,7 +977,7 @@ xfs_btree_get_buf_block( xfs_daddr_t d; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, @@ -1008,7 +1008,7 @@ xfs_btree_read_buf_block( int error; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 52c9d00..9d884c1 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -205,7 +205,7 @@ xfs_ialloc_inode_init( d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); ASSERT(fbuf); ASSERT(!XFS_BUF_GETERROR(fbuf)); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0317b00..bbb3bee 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -151,7 +151,7 @@ xfs_imap_to_bp( "an error %d on %s. Returning error.", error, mp->m_fsname); } else { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); } return error; } @@ -239,7 +239,7 @@ xfs_inotobp( if (error) return error; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XBF_LOCK, imap_flags); if (error) return error; @@ -285,7 +285,7 @@ xfs_itobp( return error; if (!bp) { - ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(buf_flags & XBF_TRYLOCK); ASSERT(tp == NULL); *bpp = NULL; return EAGAIN; @@ -807,7 +807,7 @@ xfs_iread( * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, - XFS_BUF_LOCK, iget_flags); + XBF_LOCK, iget_flags); if (error) return error; dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -1751,7 +1751,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -1833,7 +1833,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -1895,7 +1895,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2040,7 +2040,7 @@ xfs_ifree_cluster( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, mp->m_bsize * blks_per_cluster, - XFS_BUF_LOCK); + XBF_LOCK); pre_flushed = 0; lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); @@ -2151,7 +2151,7 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XBF_LOCK); if (error) return error; @@ -2952,7 +2952,7 @@ xfs_iflush( * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); + noblock ? XBF_TRYLOCK : XBF_LOCK); if (error || !bp) { xfs_ifunlock(ip); return error; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index f38855d..6194fb5 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -785,7 +785,7 @@ xfs_inode_item_pushbuf( mp = ip->i_mount; bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, - iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); + iip->ili_format.ilf_len, XBF_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 65f1f13..97148f0 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2184,9 +2184,9 @@ xlog_recover_do_buffer_trans( } mp = log->l_mp; - buf_flags = XFS_BUF_LOCK; + buf_flags = XBF_LOCK; if (!(flags & XFS_BLI_INODE_BUF)) - buf_flags |= XFS_BUF_MAPPED; + buf_flags |= XBF_MAPPED; bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags); if (XFS_BUF_ISERROR(bp)) { @@ -2288,7 +2288,7 @@ xlog_recover_do_inode_trans( } bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, - XFS_BUF_LOCK); + XBF_LOCK); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, bp, in_f->ilf_blkno); @@ -3146,7 +3146,7 @@ xlog_recover_process_one_iunlink( /* * Get the on disk inode to find the next inode in the bucket. */ - error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XBF_LOCK); if (error) goto fail_iput; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d95bd18..bb01540 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -665,7 +665,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) * access to the superblock. */ sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; + extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED; bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), extra_flags); @@ -1969,7 +1969,7 @@ xfs_getsb( ASSERT(mp->m_sb_bp != NULL); bp = mp->m_sb_bp; - if (flags & XFS_BUF_TRYLOCK) { + if (flags & XBF_TRYLOCK) { if (!XFS_BUF_CPSEMA(bp)) { return NULL; } diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 4913062..5ffd544 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -75,13 +75,14 @@ xfs_trans_get_buf(xfs_trans_t *tp, xfs_buf_log_item_t *bip; if (flags == 0) - flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; + flags = XBF_LOCK | XBF_MAPPED; /* * Default to a normal get_buf() call if the tp is NULL. */ if (tp == NULL) - return xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); + return xfs_buf_get(target_dev, blkno, len, + flags | XBF_DONT_BLOCK); /* * If we find the buffer in the cache with this transaction @@ -117,14 +118,14 @@ xfs_trans_get_buf(xfs_trans_t *tp, } /* - * We always specify the BUF_BUSY flag within a transaction so - * that get_buf does not try to push out a delayed write buffer + * We always specify the XBF_DONT_BLOCK flag within a transaction + * so that get_buf does not try to push out a delayed write buffer * which might cause another transaction to take place (if the * buffer was delayed alloc). Such recursive transactions can * easily deadlock with our current transaction as well as cause * us to run out of stack space. */ - bp = xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK); if (bp == NULL) { return NULL; } @@ -290,15 +291,15 @@ xfs_trans_read_buf( int error; if (flags == 0) - flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; + flags = XBF_LOCK | XBF_MAPPED; /* * Default to a normal get_buf() call if the tp is NULL. */ if (tp == NULL) { - bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); if (!bp) - return (flags & XFS_BUF_TRYLOCK) ? + return (flags & XBF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); if (XFS_BUF_GETERROR(bp) != 0) { @@ -385,14 +386,14 @@ xfs_trans_read_buf( } /* - * We always specify the BUF_BUSY flag within a transaction so - * that get_buf does not try to push out a delayed write buffer + * We always specify the XBF_DONT_BLOCK flag within a transaction + * so that get_buf does not try to push out a delayed write buffer * which might cause another transaction to take place (if the * buffer was delayed alloc). Such recursive transactions can * easily deadlock with our current transaction as well as cause * us to run out of stack space. */ - bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY); + bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); if (bp == NULL) { *bpp = NULL; return 0; @@ -472,8 +473,8 @@ shutdown_abort: if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); #endif - ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != - (XFS_B_STALE|XFS_B_DELWRI)); + ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != + (XBF_STALE|XBF_DELWRI)); trace_xfs_trans_read_buf_shut(bp, _RET_IP_); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9f7c001..4da96cd 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -256,7 +256,7 @@ xfs_setattr( iattr->ia_size > ip->i_d.di_size) { code = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, - XFS_B_ASYNC, FI_NONE); + XBF_ASYNC, FI_NONE); } /* wait for all I/O to complete */ @@ -1096,7 +1096,7 @@ xfs_release( */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) - xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); + xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); } if (ip->i_d.di_nlink != 0) { -- cgit v1.1 From 4139b3b337cffd106744386c842b89dc86e31d4b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:45 +0000 Subject: xfs: kill XLOG_VEC_SET_TYPE This macro only obsfucates the log item type assignments, so kill it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/quota/xfs_dquot_item.c | 6 +++--- fs/xfs/xfs_buf_item.c | 8 ++++---- fs/xfs/xfs_extfree_item.c | 4 ++-- fs/xfs/xfs_inode_item.c | 18 +++++++++--------- fs/xfs/xfs_log.c | 4 ++-- fs/xfs/xfs_log.h | 4 +--- fs/xfs/xfs_trans.c | 2 +- 7 files changed, 22 insertions(+), 24 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 37929d1..116580d 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -74,11 +74,11 @@ xfs_qm_dquot_logitem_format( logvec->i_addr = (xfs_caddr_t)&logitem->qli_format; logvec->i_len = sizeof(xfs_dq_logformat_t); - XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_QFORMAT); + logvec->i_type = XLOG_REG_TYPE_QFORMAT; logvec++; logvec->i_addr = (xfs_caddr_t)&logitem->qli_dquot->q_core; logvec->i_len = sizeof(xfs_disk_dquot_t); - XLOG_VEC_SET_TYPE(logvec, XLOG_REG_TYPE_DQUOT); + logvec->i_type = XLOG_REG_TYPE_DQUOT; ASSERT(2 == logitem->qli_item.li_desc->lid_size); logitem->qli_format.qlf_size = 2; @@ -466,7 +466,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t *qf, log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format); log_vector->i_len = sizeof(xfs_qoff_logitem_t); - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF); + log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; qf->qql_format.qf_size = 1; } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a30f7e9..e0a1158 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -250,7 +250,7 @@ xfs_buf_item_format( ((bip->bli_format.blf_map_size - 1) * sizeof(uint))); vecp->i_addr = (xfs_caddr_t)&bip->bli_format; vecp->i_len = base_size; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT); + vecp->i_type = XLOG_REG_TYPE_BFORMAT; vecp++; nvecs = 1; @@ -297,14 +297,14 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); + vecp->i_type = XLOG_REG_TYPE_BCHUNK; nvecs++; break; } else if (next_bit != last_bit + 1) { buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); + vecp->i_type = XLOG_REG_TYPE_BCHUNK; nvecs++; vecp++; first_bit = next_bit; @@ -316,7 +316,7 @@ xfs_buf_item_format( buffer_offset = first_bit * XFS_BLI_CHUNK; vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_len = nbits * XFS_BLI_CHUNK; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK); + vecp->i_type = XLOG_REG_TYPE_BCHUNK; /* You would think we need to bump the nvecs here too, but we do not * this number is used by recovery, and it gets confused by the boundary * split here diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 05a4bdd..6f35ed1 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -82,7 +82,7 @@ xfs_efi_item_format(xfs_efi_log_item_t *efip, log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format); log_vector->i_len = size; - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT); + log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; ASSERT(size >= sizeof(xfs_efi_log_format_t)); } @@ -406,7 +406,7 @@ xfs_efd_item_format(xfs_efd_log_item_t *efdp, log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format); log_vector->i_len = size; - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT); + log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; ASSERT(size >= sizeof(xfs_efd_log_format_t)); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6194fb5..da4cac6 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -228,7 +228,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&iip->ili_format; vecp->i_len = sizeof(xfs_inode_log_format_t); - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT); + vecp->i_type = XLOG_REG_TYPE_IFORMAT; vecp++; nvecs = 1; @@ -279,7 +279,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)&ip->i_d; vecp->i_len = sizeof(struct xfs_icdinode); - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); + vecp->i_type = XLOG_REG_TYPE_ICORE; vecp++; nvecs++; iip->ili_format.ilf_fields |= XFS_ILOG_CORE; @@ -336,7 +336,7 @@ xfs_inode_item_format( vecp->i_addr = (char *)(ip->i_df.if_u1.if_extents); vecp->i_len = ip->i_df.if_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); + vecp->i_type = XLOG_REG_TYPE_IEXT; } else #endif { @@ -355,7 +355,7 @@ xfs_inode_item_format( vecp->i_addr = (xfs_caddr_t)ext_buffer; vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_DATA_FORK); - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT); + vecp->i_type = XLOG_REG_TYPE_IEXT; } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -373,7 +373,7 @@ xfs_inode_item_format( ASSERT(ip->i_df.if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot; vecp->i_len = ip->i_df.if_broot_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT); + vecp->i_type = XLOG_REG_TYPE_IBROOT; vecp++; nvecs++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; @@ -399,7 +399,7 @@ xfs_inode_item_format( ASSERT((ip->i_df.if_real_bytes == 0) || (ip->i_df.if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL); + vecp->i_type = XLOG_REG_TYPE_ILOCAL; vecp++; nvecs++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; @@ -477,7 +477,7 @@ xfs_inode_item_format( vecp->i_len = xfs_iextents_copy(ip, ext_buffer, XFS_ATTR_FORK); #endif - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT); + vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; @@ -492,7 +492,7 @@ xfs_inode_item_format( ASSERT(ip->i_afp->if_broot != NULL); vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot; vecp->i_len = ip->i_afp->if_broot_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT); + vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; nvecs++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; @@ -516,7 +516,7 @@ xfs_inode_item_format( ASSERT((ip->i_afp->if_real_bytes == 0) || (ip->i_afp->if_real_bytes == data_bytes)); vecp->i_len = (int)data_bytes; - XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL); + vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; vecp++; nvecs++; iip->ili_format.ilf_asize = (unsigned)data_bytes; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0d17516..20118dd 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -617,7 +617,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (! (XLOG_FORCED_SHUTDOWN(log))) { reg[0].i_addr = (void*)&magic; reg[0].i_len = sizeof(magic); - XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_UNMOUNT); + reg[0].i_type = XLOG_REG_TYPE_UNMOUNT; error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); @@ -1236,7 +1236,7 @@ xlog_commit_record(xfs_mount_t *mp, reg[0].i_addr = NULL; reg[0].i_len = 0; - XLOG_VEC_SET_TYPE(®[0], XLOG_REG_TYPE_COMMIT); + reg[0].i_type = XLOG_REG_TYPE_COMMIT; ASSERT_ALWAYS(iclog); if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d0c9baa..811ccf4 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -110,10 +110,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) #define XLOG_REG_TYPE_TRANSHDR 19 #define XLOG_REG_TYPE_MAX 19 -#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t)) - typedef struct xfs_log_iovec { - xfs_caddr_t i_addr; /* beginning address of region */ + xfs_caddr_t i_addr; /* beginning address of region */ int i_len; /* length in bytes of region */ uint i_type; /* type of region */ } xfs_log_iovec_t; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 237badc..7dbe3c3 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1121,7 +1121,7 @@ xfs_trans_fill_vecs( tp->t_header.th_num_items = nitems; log_vector->i_addr = (xfs_caddr_t)&tp->t_header; log_vector->i_len = sizeof(xfs_trans_header_t); - XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR); + log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; } -- cgit v1.1 From a14a348bff2f99471a28e5928eb6801224c053d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jan 2010 09:56:46 +0000 Subject: xfs: cleanup up xfs_log_force calling conventions Remove the XFS_LOG_FORCE argument which was always set, and the XFS_LOG_URGE define, which was never used. Split xfs_log_force into a two helpers - xfs_log_force which forces the whole log, and xfs_log_force_lsn which forces up to the specified LSN. The underlying implementations already were entirely separate, as were the users. Also re-indent the new _xfs_log_force/_xfs_log_force which previously had a weird coding style. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 17 +-- fs/xfs/quota/xfs_dquot.c | 10 +- fs/xfs/quota/xfs_dquot_item.c | 9 +- fs/xfs/quota/xfs_qm_syscalls.c | 4 +- fs/xfs/xfs_alloc.c | 2 +- fs/xfs/xfs_inode.c | 9 +- fs/xfs/xfs_inode_item.c | 7 +- fs/xfs/xfs_log.c | 312 ++++++++++++++++++++--------------------- fs/xfs/xfs_log.h | 15 +- fs/xfs/xfs_log_recover.c | 3 +- fs/xfs/xfs_mount.c | 4 +- fs/xfs/xfs_trans.c | 5 +- fs/xfs/xfs_trans_ail.c | 2 +- fs/xfs/xfs_vnodeops.c | 5 +- 14 files changed, 193 insertions(+), 211 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 58c24be..c9b863e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -296,10 +296,7 @@ xfs_sync_data( if (error) return XFS_ERROR(error); - xfs_log_force(mp, 0, - (flags & SYNC_WAIT) ? - XFS_LOG_FORCE | XFS_LOG_SYNC : - XFS_LOG_FORCE); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return 0; } @@ -325,10 +322,6 @@ xfs_commit_dummy_trans( struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; - int log_flags = XFS_LOG_FORCE; - - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery @@ -350,7 +343,7 @@ xfs_commit_dummy_trans( xfs_iunlock(ip, XFS_ILOCK_EXCL); /* the log force ensures this transaction is pushed to disk */ - xfs_log_force(mp, 0, log_flags); + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); return error; } @@ -390,7 +383,7 @@ xfs_sync_fsdata( * become pinned in between there and here. */ if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); } @@ -575,7 +568,7 @@ xfs_flush_inodes( igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); wait_for_completion(&completion); - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force(ip->i_mount, XFS_LOG_SYNC); } /* @@ -591,7 +584,7 @@ xfs_sync_worker( int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 5756392..f9baeed 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1248,7 +1248,7 @@ xfs_qm_dqflush( */ if (XFS_BUF_ISPINNED(bp)) { trace_xfs_dqflush_force(dqp); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); } if (flags & XFS_QMOPT_DELWRI) { @@ -1531,11 +1531,9 @@ xfs_qm_dqflock_pushbuf_wait( if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { int error; - if (XFS_BUF_ISPINNED(bp)) { - xfs_log_force(dqp->q_mount, - (xfs_lsn_t)0, - XFS_LOG_FORCE); - } + + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(dqp->q_mount, 0); error = xfs_bawrite(dqp->q_mount, bp); if (error) xfs_fs_cmn_err(CE_WARN, dqp->q_mount, diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 116580d..1b56437 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -190,7 +190,7 @@ xfs_qm_dqunpin_wait( /* * Give the log a push so we don't wait here too long. */ - xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(dqp->q_mount, 0); wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } @@ -245,10 +245,9 @@ xfs_qm_dquot_logitem_pushbuf( qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); - if (XFS_BUF_ISPINNED(bp)) { - xfs_log_force(mp, (xfs_lsn_t)0, - XFS_LOG_FORCE); - } + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, 0); + if (dopush) { int error; #ifdef XFSRACEDEBUG diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 873e07e..5d0ee8d 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1192,9 +1192,9 @@ xfs_qm_internalqcheck( if (! XFS_IS_QUOTA_ON(mp)) return XFS_ERROR(ESRCH); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + xfs_log_force(mp, XFS_LOG_SYNC); XFS_bflush(mp->m_ddev_targp); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + xfs_log_force(mp, XFS_LOG_SYNC); XFS_bflush(mp->m_ddev_targp); mutex_lock(&qcheck_lock); diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a27aeb7..94cddbf 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2601,5 +2601,5 @@ xfs_alloc_search_busy(xfs_trans_t *tp, * transaction that freed the block */ if (lsn) - xfs_log_force(tp->t_mountp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_log_force_lsn(tp->t_mountp, lsn, XFS_LOG_SYNC); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bbb3bee..d0d1b5a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2484,8 +2484,11 @@ __xfs_iunpin_wait( return; /* Give the log a push to start the unpinning I/O */ - xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? - iip->ili_last_lsn : 0, XFS_LOG_FORCE); + if (iip && iip->ili_last_lsn) + xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0); + else + xfs_log_force(ip->i_mount, 0); + if (wait) wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); } @@ -2970,7 +2973,7 @@ xfs_iflush( * get stuck waiting in the write for too long. */ if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); /* * inode clustering: diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index da4cac6..48ec1c0 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -804,10 +804,9 @@ xfs_inode_item_pushbuf( trace_xfs_inode_item_push(bp, _RET_IP_); - if (XFS_BUF_ISPINNED(bp)) { - xfs_log_force(mp, (xfs_lsn_t)0, - XFS_LOG_FORCE); - } + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, 0); + if (dopush) { int error; error = xfs_bawrite(mp, bp); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 20118dd..4f16be4 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -79,11 +79,6 @@ STATIC int xlog_state_release_iclog(xlog_t *log, STATIC void xlog_state_switch_iclogs(xlog_t *log, xlog_in_core_t *iclog, int eventual_size); -STATIC int xlog_state_sync(xlog_t *log, - xfs_lsn_t lsn, - uint flags, - int *log_flushed); -STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed); STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); /* local functions to manipulate grant head */ @@ -296,65 +291,6 @@ xfs_log_done(xfs_mount_t *mp, return lsn; } /* xfs_log_done */ - -/* - * Force the in-core log to disk. If flags == XFS_LOG_SYNC, - * the force is done synchronously. - * - * Asynchronous forces are implemented by setting the WANT_SYNC - * bit in the appropriate in-core log and then returning. - * - * Synchronous forces are implemented with a signal variable. All callers - * to force a given lsn to disk will wait on a the sv attached to the - * specific in-core log. When given in-core log finally completes its - * write to disk, that thread will wake up all threads waiting on the - * sv. - */ -int -_xfs_log_force( - xfs_mount_t *mp, - xfs_lsn_t lsn, - uint flags, - int *log_flushed) -{ - xlog_t *log = mp->m_log; - int dummy; - - if (!log_flushed) - log_flushed = &dummy; - - ASSERT(flags & XFS_LOG_FORCE); - - XFS_STATS_INC(xs_log_force); - - if (log->l_flags & XLOG_IO_ERROR) - return XFS_ERROR(EIO); - if (lsn == 0) - return xlog_state_sync_all(log, flags, log_flushed); - else - return xlog_state_sync(log, lsn, flags, log_flushed); -} /* _xfs_log_force */ - -/* - * Wrapper for _xfs_log_force(), to be used when caller doesn't care - * about errors or whether the log was flushed or not. This is the normal - * interface to use when trying to unpin items or move the log forward. - */ -void -xfs_log_force( - xfs_mount_t *mp, - xfs_lsn_t lsn, - uint flags) -{ - int error; - error = _xfs_log_force(mp, lsn, flags, NULL); - if (error) { - xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " - "error %d returned.", error); - } -} - - /* * Attaches a new iclog I/O completion callback routine during * transaction commit. If the log is in error state, a non-zero @@ -601,7 +537,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; - error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); + error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL); ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); #ifdef DEBUG @@ -2853,7 +2789,6 @@ xlog_state_switch_iclogs(xlog_t *log, log->l_iclog = iclog->ic_next; } /* xlog_state_switch_iclogs */ - /* * Write out all data in the in-core log as of this exact moment in time. * @@ -2881,11 +2816,17 @@ xlog_state_switch_iclogs(xlog_t *log, * b) when we return from flushing out this iclog, it is still * not in the active nor dirty state. */ -STATIC int -xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) +int +_xfs_log_force( + struct xfs_mount *mp, + uint flags, + int *log_flushed) { - xlog_in_core_t *iclog; - xfs_lsn_t lsn; + struct log *log = mp->m_log; + struct xlog_in_core *iclog; + xfs_lsn_t lsn; + + XFS_STATS_INC(xs_log_force); spin_lock(&log->l_icloglock); @@ -2931,7 +2872,9 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); - *log_flushed = 1; + + if (log_flushed) + *log_flushed = 1; spin_lock(&log->l_icloglock); if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn && iclog->ic_state != XLOG_STATE_DIRTY) @@ -2975,19 +2918,37 @@ maybe_sleep: */ if (iclog->ic_state & XLOG_STATE_IOERROR) return XFS_ERROR(EIO); - *log_flushed = 1; - + if (log_flushed) + *log_flushed = 1; } else { no_sleep: spin_unlock(&log->l_icloglock); } return 0; -} /* xlog_state_sync_all */ +} +/* + * Wrapper for _xfs_log_force(), to be used when caller doesn't care + * about errors or whether the log was flushed or not. This is the normal + * interface to use when trying to unpin items or move the log forward. + */ +void +xfs_log_force( + xfs_mount_t *mp, + uint flags) +{ + int error; + + error = _xfs_log_force(mp, flags, NULL); + if (error) { + xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " + "error %d returned.", error); + } +} /* - * Used by code which implements synchronous log forces. + * Force the in-core log to disk for a specific LSN. * * Find in-core log with lsn. * If it is in the DIRTY state, just return. @@ -2995,109 +2956,142 @@ no_sleep: * state and go to sleep or return. * If it is in any other state, go to sleep or return. * - * If filesystem activity goes to zero, the iclog will get flushed only by - * bdflush(). + * Synchronous forces are implemented with a signal variable. All callers + * to force a given lsn to disk will wait on a the sv attached to the + * specific in-core log. When given in-core log finally completes its + * write to disk, that thread will wake up all threads waiting on the + * sv. */ -STATIC int -xlog_state_sync(xlog_t *log, - xfs_lsn_t lsn, - uint flags, - int *log_flushed) +int +_xfs_log_force_lsn( + struct xfs_mount *mp, + xfs_lsn_t lsn, + uint flags, + int *log_flushed) { - xlog_in_core_t *iclog; - int already_slept = 0; - -try_again: - spin_lock(&log->l_icloglock); - iclog = log->l_iclog; + struct log *log = mp->m_log; + struct xlog_in_core *iclog; + int already_slept = 0; - if (iclog->ic_state & XLOG_STATE_IOERROR) { - spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); - } + ASSERT(lsn != 0); - do { - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { - iclog = iclog->ic_next; - continue; - } + XFS_STATS_INC(xs_log_force); - if (iclog->ic_state == XLOG_STATE_DIRTY) { +try_again: + spin_lock(&log->l_icloglock); + iclog = log->l_iclog; + if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); - return 0; + return XFS_ERROR(EIO); } - if (iclog->ic_state == XLOG_STATE_ACTIVE) { - /* - * We sleep here if we haven't already slept (e.g. - * this is the first time we've looked at the correct - * iclog buf) and the buffer before us is going to - * be sync'ed. The reason for this is that if we - * are doing sync transactions here, by waiting for - * the previous I/O to complete, we can allow a few - * more transactions into this iclog before we close - * it down. - * - * Otherwise, we mark the buffer WANT_SYNC, and bump - * up the refcnt so we can release the log (which drops - * the ref count). The state switch keeps new transaction - * commits from using this buffer. When the current commits - * finish writing into the buffer, the refcount will drop to - * zero and the buffer will go out then. - */ - if (!already_slept && - (iclog->ic_prev->ic_state & (XLOG_STATE_WANT_SYNC | - XLOG_STATE_SYNCING))) { - ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); - XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, - &log->l_icloglock, s); - *log_flushed = 1; - already_slept = 1; - goto try_again; - } else { + do { + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { + iclog = iclog->ic_next; + continue; + } + + if (iclog->ic_state == XLOG_STATE_DIRTY) { + spin_unlock(&log->l_icloglock); + return 0; + } + + if (iclog->ic_state == XLOG_STATE_ACTIVE) { + /* + * We sleep here if we haven't already slept (e.g. + * this is the first time we've looked at the correct + * iclog buf) and the buffer before us is going to + * be sync'ed. The reason for this is that if we + * are doing sync transactions here, by waiting for + * the previous I/O to complete, we can allow a few + * more transactions into this iclog before we close + * it down. + * + * Otherwise, we mark the buffer WANT_SYNC, and bump + * up the refcnt so we can release the log (which + * drops the ref count). The state switch keeps new + * transaction commits from using this buffer. When + * the current commits finish writing into the buffer, + * the refcount will drop to zero and the buffer will + * go out then. + */ + if (!already_slept && + (iclog->ic_prev->ic_state & + (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { + ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); + + XFS_STATS_INC(xs_log_force_sleep); + + sv_wait(&iclog->ic_prev->ic_write_wait, + PSWP, &log->l_icloglock, s); + if (log_flushed) + *log_flushed = 1; + already_slept = 1; + goto try_again; + } atomic_inc(&iclog->ic_refcnt); xlog_state_switch_iclogs(log, iclog, 0); spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) return XFS_ERROR(EIO); - *log_flushed = 1; + if (log_flushed) + *log_flushed = 1; spin_lock(&log->l_icloglock); } - } - if ((flags & XFS_LOG_SYNC) && /* sleep */ - !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { + if ((flags & XFS_LOG_SYNC) && /* sleep */ + !(iclog->ic_state & + (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { + /* + * Don't wait on completion if we know that we've + * gotten a log write error. + */ + if (iclog->ic_state & XLOG_STATE_IOERROR) { + spin_unlock(&log->l_icloglock); + return XFS_ERROR(EIO); + } + XFS_STATS_INC(xs_log_force_sleep); + sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); + /* + * No need to grab the log lock here since we're + * only deciding whether or not to return EIO + * and the memory read should be atomic. + */ + if (iclog->ic_state & XLOG_STATE_IOERROR) + return XFS_ERROR(EIO); - /* - * Don't wait on completion if we know that we've - * gotten a log write error. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) { + if (log_flushed) + *log_flushed = 1; + } else { /* just return */ spin_unlock(&log->l_icloglock); - return XFS_ERROR(EIO); } - XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); - /* - * No need to grab the log lock here since we're - * only deciding whether or not to return EIO - * and the memory read should be atomic. - */ - if (iclog->ic_state & XLOG_STATE_IOERROR) - return XFS_ERROR(EIO); - *log_flushed = 1; - } else { /* just return */ - spin_unlock(&log->l_icloglock); - } - return 0; - } while (iclog != log->l_iclog); + return 0; + } while (iclog != log->l_iclog); - spin_unlock(&log->l_icloglock); - return 0; -} /* xlog_state_sync */ + spin_unlock(&log->l_icloglock); + return 0; +} + +/* + * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care + * about errors or whether the log was flushed or not. This is the normal + * interface to use when trying to unpin items or move the log forward. + */ +void +xfs_log_force_lsn( + xfs_mount_t *mp, + xfs_lsn_t lsn, + uint flags) +{ + int error; + error = _xfs_log_force_lsn(mp, lsn, flags, NULL); + if (error) { + xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " + "error %d returned.", error); + } +} /* * Called when we want to mark the current iclog as being ready to sync to @@ -3462,7 +3456,6 @@ xfs_log_force_umount( xlog_ticket_t *tic; xlog_t *log; int retval; - int dummy; log = mp->m_log; @@ -3536,13 +3529,14 @@ xfs_log_force_umount( } spin_unlock(&log->l_grant_lock); - if (! (log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { + if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); /* * Force the incore logs to disk before shutting the * log down completely. */ - xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy); + _xfs_log_force(mp, XFS_LOG_SYNC, NULL); + spin_lock(&log->l_icloglock); retval = xlog_state_ioerror(log); spin_unlock(&log->l_icloglock); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 811ccf4..7074be9 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -70,14 +70,8 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) * Flags to xfs_log_force() * * XFS_LOG_SYNC: Synchronous force in-core log to disk - * XFS_LOG_FORCE: Start in-core log write now. - * XFS_LOG_URGE: Start write within some window of time. - * - * Note: Either XFS_LOG_FORCE or XFS_LOG_URGE must be set. */ #define XFS_LOG_SYNC 0x1 -#define XFS_LOG_FORCE 0x2 -#define XFS_LOG_URGE 0x4 #endif /* __KERNEL__ */ @@ -138,12 +132,17 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp, void **iclog, uint flags); int _xfs_log_force(struct xfs_mount *mp, - xfs_lsn_t lsn, uint flags, int *log_forced); void xfs_log_force(struct xfs_mount *mp, - xfs_lsn_t lsn, uint flags); +int _xfs_log_force_lsn(struct xfs_mount *mp, + xfs_lsn_t lsn, + uint flags, + int *log_forced); +void xfs_log_force_lsn(struct xfs_mount *mp, + xfs_lsn_t lsn, + uint flags); int xfs_log_mount(struct xfs_mount *mp, struct xfs_buftarg *log_target, xfs_daddr_t start_block, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 97148f0..22e6efd 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3913,8 +3913,7 @@ xlog_recover_finish( * case the unlink transactions would have problems * pushing the EFIs out of the way. */ - xfs_log_force(log->l_mp, (xfs_lsn_t)0, - (XFS_LOG_FORCE | XFS_LOG_SYNC)); + xfs_log_force(log->l_mp, XFS_LOG_SYNC); xlog_recover_process_iunlinks(log); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index bb01540..7f81ed7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1455,7 +1455,7 @@ xfs_unmountfs( * push out the iclog we will never get that unlocked. hence we * need to force the log first. */ - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + xfs_log_force(mp, XFS_LOG_SYNC); xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); xfs_qm_unmount(mp); @@ -1465,7 +1465,7 @@ xfs_unmountfs( * that nothing is pinned. This is important because bflush() * will skip pinned buffers. */ - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); + xfs_log_force(mp, XFS_LOG_SYNC); xfs_binval(mp->m_ddev_targp); if (mp->m_rtdev_targp) { diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7dbe3c3..be942d4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -981,9 +981,8 @@ shut_us_down: */ if (sync) { if (!error) { - error = _xfs_log_force(mp, commit_lsn, - XFS_LOG_FORCE | XFS_LOG_SYNC, - log_flushed); + error = _xfs_log_force_lsn(mp, commit_lsn, + XFS_LOG_SYNC, log_flushed); } XFS_STATS_INC(xs_trans_sync); } else { diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 063dfbd..d7b1af8 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -371,7 +371,7 @@ xfsaild_push( * move forward in the AIL. */ XFS_STATS_INC(xs_push_ail_flush); - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_log_force(mp, 0); } if (!count) { diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4da96cd..fd108b7 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -631,9 +631,8 @@ xfs_fsync( xfs_iunlock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { - error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, - XFS_LOG_FORCE | XFS_LOG_SYNC, - &log_flushed); + error = _xfs_log_force(ip->i_mount, XFS_LOG_SYNC, + &log_flushed); } else { /* * If the inode is not pinned and nothing has changed -- cgit v1.1 From bdfb04301fa5fdd95f219539a9a5b9663b1e5fc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2010 21:55:30 +0000 Subject: xfs: replace KM_LARGE with explicit vmalloc use We use the KM_LARGE flag to make kmem_alloc and friends use vmalloc if necessary. As we only need this for a few boot/mount time allocations just switch to explicit vmalloc calls there. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/kmem.c | 56 +++++++++++++++++----------------------------- fs/xfs/linux-2.6/kmem.h | 21 ++++++++++++++--- fs/xfs/linux-2.6/xfs_buf.c | 6 ++--- fs/xfs/quota/xfs_qm.c | 26 ++++++++++++++++----- fs/xfs/xfs_itable.c | 8 ++++--- 5 files changed, 66 insertions(+), 51 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 2d3f90a..bc74055 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include -#include #include #include #include @@ -24,8 +23,25 @@ #include "time.h" #include "kmem.h" -#define MAX_VMALLOCS 6 -#define MAX_SLAB_SIZE 0x20000 +/* + * Greedy allocation. May fail and may return vmalloced memory. + * + * Must be freed using kmem_free_large. + */ +void * +kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) +{ + void *ptr; + size_t kmsize = maxsize; + + while (!(ptr = kmem_zalloc_large(kmsize))) { + if ((kmsize >>= 1) <= minsize) + kmsize = minsize; + } + if (ptr) + *size = kmsize; + return ptr; +} void * kmem_alloc(size_t size, unsigned int __nocast flags) @@ -34,19 +50,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; -#ifdef DEBUG - if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { - printk(KERN_WARNING "Large %s attempt, size=%ld\n", - __func__, (long)size); - dump_stack(); - } -#endif - do { - if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS) - ptr = kmalloc(size, lflags); - else - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = kmalloc(size, lflags); if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) @@ -68,27 +73,6 @@ kmem_zalloc(size_t size, unsigned int __nocast flags) return ptr; } -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, - unsigned int __nocast flags) -{ - void *ptr; - size_t kmsize = maxsize; - unsigned int kmflags = (flags & ~KM_SLEEP) | KM_NOSLEEP; - - while (!(ptr = kmem_zalloc(kmsize, kmflags))) { - if ((kmsize <= minsize) && (flags & KM_NOSLEEP)) - break; - if ((kmsize >>= 1) <= minsize) { - kmsize = minsize; - kmflags = flags; - } - } - if (ptr) - *size = kmsize; - return ptr; -} - void kmem_free(const void *ptr) { diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 179cbd6..f7c8f7a 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -21,6 +21,7 @@ #include #include #include +#include /* * General memory allocation interfaces @@ -30,7 +31,6 @@ #define KM_NOSLEEP 0x0002u #define KM_NOFS 0x0004u #define KM_MAYFAIL 0x0008u -#define KM_LARGE 0x0010u /* * We use a special process flag to avoid recursive callbacks into @@ -42,7 +42,7 @@ kmem_flags_convert(unsigned int __nocast flags) { gfp_t lflags; - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_LARGE)); + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); if (flags & KM_NOSLEEP) { lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -56,10 +56,25 @@ kmem_flags_convert(unsigned int __nocast flags) extern void *kmem_alloc(size_t, unsigned int __nocast); extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); extern void kmem_free(const void *); +static inline void *kmem_zalloc_large(size_t size) +{ + void *ptr; + + ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} +static inline void kmem_free_large(void *ptr) +{ + vfree(ptr); +} + +extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); + /* * Zone interfaces */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 730eff1..44e20e5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1525,8 +1525,8 @@ xfs_alloc_bufhash( btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; - btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * - sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); + btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * + sizeof(xfs_bufhash_t)); for (i = 0; i < (1 << btp->bt_hashshift); i++) { spin_lock_init(&btp->bt_hash[i].bh_lock); INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); @@ -1537,7 +1537,7 @@ STATIC void xfs_free_bufhash( xfs_buftarg_t *btp) { - kmem_free(btp->bt_hash); + kmem_free_large(btp->bt_hash); btp->bt_hash = NULL; } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 9e627a8..11cfd82 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -118,9 +118,14 @@ xfs_Gqm_init(void) */ udqhash = kmem_zalloc_greedy(&hsize, XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), - XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), - KM_SLEEP | KM_MAYFAIL | KM_LARGE); - gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); + if (!udqhash) + goto out; + + gdqhash = kmem_zalloc_large(hsize); + if (!udqhash) + goto out_free_udqhash; + hsize /= sizeof(xfs_dqhash_t); ndquot = hsize << 8; @@ -170,6 +175,11 @@ xfs_Gqm_init(void) mutex_init(&qcheck_lock); #endif return xqm; + + out_free_udqhash: + kmem_free_large(udqhash); + out: + return NULL; } /* @@ -189,8 +199,8 @@ xfs_qm_destroy( xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); } - kmem_free(xqm->qm_usr_dqhtable); - kmem_free(xqm->qm_grp_dqhtable); + kmem_free_large(xqm->qm_usr_dqhtable); + kmem_free_large(xqm->qm_grp_dqhtable); xqm->qm_usr_dqhtable = NULL; xqm->qm_grp_dqhtable = NULL; xqm->qm_dqhashmask = 0; @@ -219,8 +229,12 @@ xfs_qm_hold_quotafs_ref( */ mutex_lock(&xfs_Gqm_lock); - if (xfs_Gqm == NULL) + if (!xfs_Gqm) { xfs_Gqm = xfs_Gqm_init(); + if (!xfs_Gqm) + return ENOMEM; + } + /* * We can keep a list of all filesystems with quotas mounted for * debugging and statistical purposes, but ... diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 940307a..3af0231 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -408,8 +408,10 @@ xfs_bulkstat( (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4, - KM_SLEEP | KM_MAYFAIL | KM_LARGE); + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + if (!irbuf) + return ENOMEM; + nirbuf = irbsize / sizeof(*irbuf); /* @@ -727,7 +729,7 @@ xfs_bulkstat( /* * Done, we're either out of filesystem or space to put the data. */ - kmem_free(irbuf); + kmem_free_large(irbuf); *ubcountp = ubelem; /* * Found some inodes, return them now and return the error next time. -- cgit v1.1 From 9b00f30762fe9f914eb6e03057a616ed63a4e8ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jan 2010 11:17:20 +0000 Subject: xfs: quota limit statvfs available blocks A "df" run on an NFS client of an exported XFS file system reports the wrong information for "available" blocks. When a block quota is enforced, the amount reported as free is limited by the quota, but the amount reported available is not (and should be). Reported-by: Guk-Bong, Kwon Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/quota/xfs_qm_bhv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index a534663..97b410c 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -59,7 +59,7 @@ xfs_fill_statvfs_from_dquot( be64_to_cpu(dp->d_blk_hardlimit); if (limit && statp->f_blocks > limit) { statp->f_blocks = limit; - statp->f_bfree = + statp->f_bfree = statp->f_bavail = (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; } -- cgit v1.1 From cbe132a8bdcff0f9afd9060948fb50597c7400b8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 26 Jan 2010 15:08:49 +1100 Subject: xfs: don't hold onto reserved blocks on remount,ro If we hold onto reserved blocks when doing a remount,ro we end up writing the blocks used count to disk that includes the reserved blocks. Reserved blocks are not actually used, so this results in the values in the superblock being incorrect. Hence if we run xfs_check or xfs_repair -n while the filesystem is mounted remount,ro we end up with an inconsistent filesystem being reported. Also, running xfs_copy on the remount,ro filesystem will result in an inconsistent image being generated. To fix this, unreserve the blocks when doing the remount,ro, and reserved them again on remount,rw. This way a remount,ro filesystem will appear consistent on disk to all utilities. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 28 ++++++++++++++++++++++++++++ fs/xfs/xfs_mount.h | 1 + 2 files changed, 29 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9f2e398..e9c2145 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1318,6 +1318,8 @@ xfs_fs_remount( /* ro -> rw */ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + __uint64_t resblks; + mp->m_flags &= ~XFS_MOUNT_RDONLY; if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_mountfs_check_barriers(mp); @@ -1335,11 +1337,37 @@ xfs_fs_remount( } mp->m_update_flags = 0; } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else { + resblks = mp->m_sb.sb_dblocks; + do_div(resblks, 20); + resblks = min_t(__uint64_t, resblks, 1024); + } + xfs_reserve_blocks(mp, &resblks, NULL); } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + __uint64_t resblks = 0; + xfs_quiesce_data(mp); + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f4d1441..02d45f2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -225,6 +225,7 @@ typedef struct xfs_mount { __uint64_t m_maxioffset; /* maximum inode offset */ __uint64_t m_resblks; /* total reserved blocks */ __uint64_t m_resblks_avail;/* available reserved blocks */ + __uint64_t m_resblks_save; /* reserved blks @ remount,ro */ int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ int m_sinoalign; /* stripe unit inode alignment */ -- cgit v1.1 From 388f1f0c346b533b06d8bc792f7204ebc3e4b7da Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 26 Jan 2010 15:10:15 +1100 Subject: xfs: turn off sign warnings Because they cause warnings in static inline functions conditionally compiled into XFS from the VFS (e.g. fsnotify). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 1926701..5c5a366 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,7 +16,7 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 -Wpointer-sign +EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 XFS_LINUX := linux-2.6 -- cgit v1.1 From d5db0f97fbbeff11c88dec1aaf1536a975afbaeb Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 5 Feb 2010 22:59:53 +0000 Subject: xfs: more reserved blocks fixups This mangles the reserved blocks counts a little more. 1) add a helper function for the default reserved count 2) add helper functions to save/restore counts on ro/rw 3) save/restore reserved blocks on freeze/thaw 4) disallow changing reserved count while readonly V2: changed field name to match Dave's changes Signed-off-by: Eric Sandeen Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_ioctl.c | 3 +++ fs/xfs/linux-2.6/xfs_super.c | 51 ++++++++++++++++++++++++++++++++------------ fs/xfs/xfs_mount.c | 34 +++++++++++++++++++---------- fs/xfs/xfs_mount.h | 1 + 4 files changed, 64 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 3906e85..4ea1ee1 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1431,6 +1431,9 @@ xfs_file_ioctl( if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + if (copy_from_user(&inout, arg, sizeof(inout))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e9c2145..6ce828e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1256,6 +1256,29 @@ xfs_fs_statfs( return 0; } +STATIC void +xfs_save_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks = 0; + + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC void +xfs_restore_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks; + + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else + resblks = xfs_default_resblks(mp); + + xfs_reserve_blocks(mp, &resblks, NULL); +} + STATIC int xfs_fs_remount( struct super_block *sb, @@ -1318,8 +1341,6 @@ xfs_fs_remount( /* ro -> rw */ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { - __uint64_t resblks; - mp->m_flags &= ~XFS_MOUNT_RDONLY; if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_mountfs_check_barriers(mp); @@ -1342,15 +1363,7 @@ xfs_fs_remount( * Fill out the reserve pool if it is empty. Use the stashed * value if it is non-zero, otherwise go with the default. */ - if (mp->m_resblks_save) { - resblks = mp->m_resblks_save; - mp->m_resblks_save = 0; - } else { - resblks = mp->m_sb.sb_dblocks; - do_div(resblks, 20); - resblks = min_t(__uint64_t, resblks, 1024); - } - xfs_reserve_blocks(mp, &resblks, NULL); + xfs_restore_resvblks(mp); } /* rw -> ro */ @@ -1363,11 +1376,9 @@ xfs_fs_remount( * so that if we get remounted rw, we can return it to the same * size. */ - __uint64_t resblks = 0; xfs_quiesce_data(mp); - mp->m_resblks_save = mp->m_resblks; - xfs_reserve_blocks(mp, &resblks, NULL); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1386,11 +1397,22 @@ xfs_fs_freeze( { struct xfs_mount *mp = XFS_M(sb); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); return -xfs_fs_log_dummy(mp); } STATIC int +xfs_fs_unfreeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_restore_resvblks(mp); + return 0; +} + +STATIC int xfs_fs_show_options( struct seq_file *m, struct vfsmount *mnt) @@ -1612,6 +1634,7 @@ static const struct super_operations xfs_super_operations = { .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 7f81ed7..5061149 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1091,6 +1091,22 @@ xfs_mount_reset_sbqflags( return xfs_trans_commit(tp, 0); } +__uint64_t +xfs_default_resblks(xfs_mount_t *mp) +{ + __uint64_t resblks; + + /* + * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. + * This may drive us straight to ENOSPC on mount, but that implies + * we were already there on the last unmount. Warn if this occurs. + */ + resblks = mp->m_sb.sb_dblocks; + do_div(resblks, 20); + resblks = min_t(__uint64_t, resblks, 1024); + return resblks; +} + /* * This function does the following on an initial mount of a file system: * - reads the superblock from disk and init the mount struct @@ -1401,18 +1417,14 @@ xfs_mountfs( * when at ENOSPC. This is needed for operations like create with * attr, unwritten extent conversion at ENOSPC, etc. Data allocations * are not allowed to use this reserved space. - * - * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. - * This may drive us straight to ENOSPC on mount, but that implies - * we were already there on the last unmount. Warn if this occurs. */ - resblks = mp->m_sb.sb_dblocks; - do_div(resblks, 20); - resblks = min_t(__uint64_t, resblks, 1024); - error = xfs_reserve_blocks(mp, &resblks, NULL); - if (error) - cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " - "Continuing without a reserve pool."); + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + resblks = xfs_default_resblks(mp); + error = xfs_reserve_blocks(mp, &resblks, NULL); + if (error) + cmn_err(CE_WARN, "XFS: Unable to allocate reserve " + "blocks. Continuing without a reserve pool."); + } return 0; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 02d45f2..70504fc 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -421,6 +421,7 @@ typedef struct xfs_mod_sb { } xfs_mod_sb_t; extern int xfs_log_sbcount(xfs_mount_t *, uint); +extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); -- cgit v1.1 From 777df5afdb26c71634edd60582be620ff94e87a0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 6 Feb 2010 12:37:26 +1100 Subject: xfs: Make inode reclaim states explicit A.K.A.: don't rely on xfs_iflush() return value in reclaim We have gradually been moving checks out of the reclaim code because they are duplicated in xfs_iflush(). We've had a history of problems in this area, and many of them stem from the overloading of the return values from xfs_iflush() and interaction with inode flush locking to determine if the inode is safe to reclaim. With the desire to move to delayed write flushing of inodes and non-blocking inode tree reclaim walks, the overloading of the return value of xfs_iflush makes it very difficult to determine the correct thing to do next. This patch explicitly re-adds the checks to the inode reclaim code, removing the reliance on the return value of xfs_iflush() to determine what to do next. It also means that we can clearly document all the inode states that reclaim must handle and hence we can easily see that we handled all the necessary cases. This also removes the need for the xfs_inode_clean() check in xfs_iflush() as all callers now check this first (safely). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.c | 81 ++++++++++++++++++++++++++++++++++----------- fs/xfs/xfs_inode.c | 11 +----- fs/xfs/xfs_inode.h | 1 + 3 files changed, 64 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index c9b863e..525260c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -706,12 +706,43 @@ __xfs_inode_clear_reclaim_tag( XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); } +/* + * Inodes in different states need to be treated differently, and the return + * value of xfs_iflush is not sufficient to get this right. The following table + * lists the inode states and the reclaim actions necessary for non-blocking + * reclaim: + * + * + * inode state iflush ret required action + * --------------- ---------- --------------- + * bad - reclaim + * shutdown EIO unpin and reclaim + * clean, unpinned 0 reclaim + * stale, unpinned 0 reclaim + * clean, pinned(*) 0 unpin and reclaim + * stale, pinned 0 unpin and reclaim + * dirty, async 0 block on flush lock, reclaim + * dirty, sync flush 0 block on flush lock, reclaim + * + * (*) dgc: I don't think the clean, pinned state is possible but it gets + * handled anyway given the order of checks implemented. + * + * Hence the order of actions after gaining the locks should be: + * bad => reclaim + * shutdown => unpin and reclaim + * pinned => unpin + * stale => reclaim + * clean => reclaim + * dirty => flush, wait and reclaim + */ STATIC int xfs_reclaim_inode( struct xfs_inode *ip, struct xfs_perag *pag, int sync_mode) { + int error; + /* * The radix tree lock here protects a thread in xfs_iget from racing * with us starting reclaim on the inode. Once we have the @@ -729,30 +760,42 @@ xfs_reclaim_inode( spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); + if (is_bad_inode(VFS_I(ip))) + goto reclaim; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_iunpin_wait(ip); + goto reclaim; + } + if (xfs_ipincount(ip)) + xfs_iunpin_wait(ip); + if (xfs_iflags_test(ip, XFS_ISTALE)) + goto reclaim; + if (xfs_inode_clean(ip)) + goto reclaim; + + /* Now we have an inode that needs flushing */ + error = xfs_iflush(ip, sync_mode); + if (!error) { + switch(sync_mode) { + case XFS_IFLUSH_DELWRI_ELSE_ASYNC: + case XFS_IFLUSH_DELWRI: + case XFS_IFLUSH_ASYNC: + case XFS_IFLUSH_DELWRI_ELSE_SYNC: + case XFS_IFLUSH_SYNC: + /* IO issued, synchronise with IO completion */ + xfs_iflock(ip); + break; + default: + ASSERT(0); + break; + } } +reclaim: + xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_ireclaim(ip); return 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d0d1b5a..8d0666d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2493,7 +2493,7 @@ __xfs_iunpin_wait( wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); } -static inline void +void xfs_iunpin_wait( xfs_inode_t *ip) { @@ -2849,15 +2849,6 @@ xfs_iflush( mp = ip->i_mount; /* - * If the inode isn't dirty, then just release the inode flush lock and - * do nothing. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - return 0; - } - - /* * We can't flush the inode until it is unpinned, so wait for it if we * are allowed to block. We know noone new can pin it, because we are * holding the inode lock shared and you need to hold it exclusively to diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ec1f28c..8b618ea 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -483,6 +483,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); +void xfs_iunpin_wait(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); void xfs_ichgtime(xfs_inode_t *, int); void xfs_lock_inodes(xfs_inode_t **, int, uint); -- cgit v1.1 From c854363e80b49dd04a4de18ebc379eb8c8806674 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 6 Feb 2010 12:39:36 +1100 Subject: xfs: Use delayed write for inodes rather than async V2 We currently do background inode flush asynchronously, resulting in inodes being written in whatever order the background writeback issues them. Not only that, there are also blocking and non-blocking asynchronous inode flushes, depending on where the flush comes from. This patch completely removes asynchronous inode writeback. It removes all the strange writeback modes and replaces them with either a synchronous flush or a non-blocking delayed write flush. That is, inode flushes will only issue IO directly if they are synchronous, and background flushing may do nothing if the operation would block (e.g. on a pinned inode or buffer lock). Delayed write flushes will now result in the inode buffer sitting in the delwri queue of the buffer cache to be flushed by either an AIL push or by the xfsbufd timing out the buffer. This will allow accumulation of dirty inode buffers in memory and allow optimisation of inode cluster writeback at the xfsbufd level where we have much greater queue depths than the block layer elevators. We will also get adjacent inode cluster buffer IO merging for free when a later patch in the series allows sorting of the delayed write buffers before dispatch. This effectively means that any inode that is written back by background writeback will be seen as flush locked during AIL pushing, and will result in the buffers being pushed from there. This writeback path is currently non-optimal, but the next patch in the series will fix that problem. A side effect of this delayed write mechanism is that background inode reclaim will no longer directly flush inodes, nor can it wait on the flush lock. The result is that inode reclaim must leave the inode in the reclaimable state until it is clean. Hence attempts to reclaim a dirty inode in the background will simply skip the inode until it is clean and this allows other mechanisms (i.e. xfsbufd) to do more optimal writeback of the dirty buffers. As a result, the inode reclaim code has been rewritten so that it no longer relies on the ambiguous return values of xfs_iflush() to determine whether it is safe to reclaim an inode. Portions of this patch are derived from patches by Christoph Hellwig. Version 2: - cleanup reclaim code as suggested by Christoph - log background reclaim inode flush errors - just pass sync flags to xfs_iflush Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_super.c | 4 +- fs/xfs/linux-2.6/xfs_sync.c | 105 +++++++++++++++++++++++++++++++------------ fs/xfs/xfs_inode.c | 75 +++---------------------------- fs/xfs/xfs_inode.h | 10 ----- fs/xfs/xfs_inode_item.c | 10 +++-- fs/xfs/xfs_mount.c | 13 +++++- 6 files changed, 102 insertions(+), 115 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 6ce828e..3b5b46b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1064,7 +1064,7 @@ xfs_fs_write_inode( xfs_ilock(ip, XFS_ILOCK_SHARED); xfs_iflock(ip); - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + error = xfs_iflush(ip, SYNC_WAIT); } else { error = EAGAIN; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) @@ -1072,7 +1072,7 @@ xfs_fs_write_inode( if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) goto out_unlock; - error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); + error = xfs_iflush(ip, 0); } out_unlock: diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 525260c..a9f6d20 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -270,8 +270,7 @@ xfs_sync_inode_attr( goto out_unlock; } - error = xfs_iflush(ip, (flags & SYNC_WAIT) ? - XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI); + error = xfs_iflush(ip, flags); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -460,16 +459,18 @@ xfs_quiesce_fs( { int count = 0, pincount; + xfs_reclaim_inodes(mp, 0); xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop * will flush most meta data but that will generate more meta data * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. + * logged before we can write the unmount record. We also so sync + * reclaim of inodes to catch any that the above delwri flush skipped. */ do { + xfs_reclaim_inodes(mp, SYNC_WAIT); xfs_sync_attr(mp, SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { @@ -585,7 +586,7 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0); /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); @@ -719,21 +720,42 @@ __xfs_inode_clear_reclaim_tag( * shutdown EIO unpin and reclaim * clean, unpinned 0 reclaim * stale, unpinned 0 reclaim - * clean, pinned(*) 0 unpin and reclaim - * stale, pinned 0 unpin and reclaim - * dirty, async 0 block on flush lock, reclaim - * dirty, sync flush 0 block on flush lock, reclaim + * clean, pinned(*) 0 requeue + * stale, pinned EAGAIN requeue + * dirty, delwri ok 0 requeue + * dirty, delwri blocked EAGAIN requeue + * dirty, sync flush 0 reclaim * * (*) dgc: I don't think the clean, pinned state is possible but it gets * handled anyway given the order of checks implemented. * + * As can be seen from the table, the return value of xfs_iflush() is not + * sufficient to correctly decide the reclaim action here. The checks in + * xfs_iflush() might look like duplicates, but they are not. + * + * Also, because we get the flush lock first, we know that any inode that has + * been flushed delwri has had the flush completed by the time we check that + * the inode is clean. The clean inode check needs to be done before flushing + * the inode delwri otherwise we would loop forever requeuing clean inodes as + * we cannot tell apart a successful delwri flush and a clean inode from the + * return value of xfs_iflush(). + * + * Note that because the inode is flushed delayed write by background + * writeback, the flush lock may already be held here and waiting on it can + * result in very long latencies. Hence for sync reclaims, where we wait on the + * flush lock, the caller should push out delayed write inodes first before + * trying to reclaim them to minimise the amount of time spent waiting. For + * background relaim, we just requeue the inode for the next pass. + * * Hence the order of actions after gaining the locks should be: * bad => reclaim * shutdown => unpin and reclaim - * pinned => unpin + * pinned, delwri => requeue + * pinned, sync => unpin * stale => reclaim * clean => reclaim - * dirty => flush, wait and reclaim + * dirty, delwri => flush and requeue + * dirty, sync => flush, wait and reclaim */ STATIC int xfs_reclaim_inode( @@ -741,7 +763,7 @@ xfs_reclaim_inode( struct xfs_perag *pag, int sync_mode) { - int error; + int error = 0; /* * The radix tree lock here protects a thread in xfs_iget from racing @@ -761,7 +783,11 @@ xfs_reclaim_inode( write_unlock(&pag->pag_ici_lock); xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); + if (!xfs_iflock_nowait(ip)) { + if (!(sync_mode & SYNC_WAIT)) + goto out; + xfs_iflock(ip); + } if (is_bad_inode(VFS_I(ip))) goto reclaim; @@ -769,8 +795,13 @@ xfs_reclaim_inode( xfs_iunpin_wait(ip); goto reclaim; } - if (xfs_ipincount(ip)) + if (xfs_ipincount(ip)) { + if (!(sync_mode & SYNC_WAIT)) { + xfs_ifunlock(ip); + goto out; + } xfs_iunpin_wait(ip); + } if (xfs_iflags_test(ip, XFS_ISTALE)) goto reclaim; if (xfs_inode_clean(ip)) @@ -778,27 +809,43 @@ xfs_reclaim_inode( /* Now we have an inode that needs flushing */ error = xfs_iflush(ip, sync_mode); - if (!error) { - switch(sync_mode) { - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - case XFS_IFLUSH_DELWRI: - case XFS_IFLUSH_ASYNC: - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - case XFS_IFLUSH_SYNC: - /* IO issued, synchronise with IO completion */ - xfs_iflock(ip); - break; - default: - ASSERT(0); - break; - } + if (sync_mode & SYNC_WAIT) { + xfs_iflock(ip); + goto reclaim; } + /* + * When we have to flush an inode but don't have SYNC_WAIT set, we + * flush the inode out using a delwri buffer and wait for the next + * call into reclaim to find it in a clean state instead of waiting for + * it now. We also don't return errors here - if the error is transient + * then the next reclaim pass will flush the inode, and if the error + * is permanent then the next sync reclaim will relcaim the inode and + * pass on the error. + */ + if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "inode 0x%llx background reclaim flush failed with %d", + (long long)ip->i_ino, error); + } +out: + xfs_iflags_clear(ip, XFS_IRECLAIM); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * We could return EAGAIN here to make reclaim rescan the inode tree in + * a short while. However, this just burns CPU time scanning the tree + * waiting for IO to complete and xfssyncd never goes back to the idle + * state. Instead, return 0 to let the next scheduled background reclaim + * attempt to reclaim the inode again. + */ + return 0; + reclaim: xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_ireclaim(ip); - return 0; + return error; + } int diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 8d0666d..fa31360 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2835,8 +2835,6 @@ xfs_iflush( xfs_dinode_t *dip; xfs_mount_t *mp; int error; - int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); - enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; XFS_STATS_INC(xs_iflush_count); @@ -2859,7 +2857,7 @@ xfs_iflush( * in the same cluster are dirty, they will probably write the inode * out for us if they occur after the log force completes. */ - if (noblock && xfs_ipincount(ip)) { + if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { xfs_iunpin_nowait(ip); xfs_ifunlock(ip); return EAGAIN; @@ -2893,60 +2891,10 @@ xfs_iflush( } /* - * Decide how buffer will be flushed out. This is done before - * the call to xfs_iflush_int because this field is zeroed by it. - */ - if (iip != NULL && iip->ili_format.ilf_fields != 0) { - /* - * Flush out the inode buffer according to the directions - * of the caller. In the cases where the caller has given - * us a choice choose the non-delwri case. This is because - * the inode is in the AIL and we need to get it out soon. - */ - switch (flags) { - case XFS_IFLUSH_SYNC: - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - flags = 0; - break; - case XFS_IFLUSH_ASYNC_NOBLOCK: - case XFS_IFLUSH_ASYNC: - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - flags = INT_ASYNC; - break; - case XFS_IFLUSH_DELWRI: - flags = INT_DELWRI; - break; - default: - ASSERT(0); - flags = 0; - break; - } - } else { - switch (flags) { - case XFS_IFLUSH_DELWRI_ELSE_SYNC: - case XFS_IFLUSH_DELWRI_ELSE_ASYNC: - case XFS_IFLUSH_DELWRI: - flags = INT_DELWRI; - break; - case XFS_IFLUSH_ASYNC_NOBLOCK: - case XFS_IFLUSH_ASYNC: - flags = INT_ASYNC; - break; - case XFS_IFLUSH_SYNC: - flags = 0; - break; - default: - ASSERT(0); - flags = 0; - break; - } - } - - /* * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - noblock ? XBF_TRYLOCK : XBF_LOCK); + (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); if (error || !bp) { xfs_ifunlock(ip); return error; @@ -2974,13 +2922,10 @@ xfs_iflush( if (error) goto cluster_corrupt_out; - if (flags & INT_DELWRI) { - xfs_bdwrite(mp, bp); - } else if (flags & INT_ASYNC) { - error = xfs_bawrite(mp, bp); - } else { + if (flags & SYNC_WAIT) error = xfs_bwrite(mp, bp); - } + else + xfs_bdwrite(mp, bp); return error; corrupt_out: @@ -3015,16 +2960,6 @@ xfs_iflush_int( iip = ip->i_itemp; mp = ip->i_mount; - - /* - * If the inode isn't dirty, then just release the inode - * flush lock and do nothing. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - return 0; - } - /* set *dip = inode's place in the buffer */ dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 8b618ea..6c912b0 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -420,16 +420,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) /* - * Flags for xfs_iflush() - */ -#define XFS_IFLUSH_DELWRI_ELSE_SYNC 1 -#define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2 -#define XFS_IFLUSH_SYNC 3 -#define XFS_IFLUSH_ASYNC 4 -#define XFS_IFLUSH_DELWRI 5 -#define XFS_IFLUSH_ASYNC_NOBLOCK 6 - -/* * Flags for xfs_itruncate_start(). */ #define XFS_ITRUNC_DEFINITE 0x1 diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 48ec1c0..207553e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -866,10 +866,14 @@ xfs_inode_item_push( iip->ili_format.ilf_fields != 0); /* - * Write out the inode. The completion routine ('iflush_done') will - * pull it from the AIL, mark it clean, unlock the flush lock. + * Push the inode to it's backing buffer. This will not remove the + * inode from the AIL - a further push will be required to trigger a + * buffer push. However, this allows all the dirty inodes to be pushed + * to the buffer before it is pushed to disk. THe buffer IO completion + * will pull th einode from the AIL, mark it clean and unlock the flush + * lock. */ - (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); + (void) xfs_iflush(ip, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); return; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5061149..6afaaeb 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1468,7 +1468,18 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, XFS_LOG_SYNC); - xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); + + /* + * Do a delwri reclaim pass first so that as many dirty inodes are + * queued up for IO as possible. Then flush the buffers before making + * a synchronous path to catch all the remaining inodes are reclaimed. + * This makes the reclaim process as quick as possible by avoiding + * synchronous writeout and blocking on inodes already in the delwri + * state as much as possible. + */ + xfs_reclaim_inodes(mp, 0); + XFS_bflush(mp->m_ddev_targp); + xfs_reclaim_inodes(mp, SYNC_WAIT); xfs_qm_unmount(mp); -- cgit v1.1 From d808f617ad00a413585b806de340feda5ad9a2da Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 2 Feb 2010 10:13:42 +1100 Subject: xfs: Don't issue buffer IO direct from AIL push V2 All buffers logged into the AIL are marked as delayed write. When the AIL needs to push the buffer out, it issues an async write of the buffer. This means that IO patterns are dependent on the order of buffers in the AIL. Instead of flushing the buffer, promote the buffer in the delayed write list so that the next time the xfsbufd is run the buffer will be flushed by the xfsbufd. Return the state to the xfsaild that the buffer was promoted so that the xfsaild knows that it needs to cause the xfsbufd to run to flush the buffers that were promoted. Using the xfsbufd for issuing the IO allows us to dispatch all buffer IO from the one queue. This means that we can make much more enlightened decisions on what order to flush buffers to disk as we don't have multiple places issuing IO. Optimisations to xfsbufd will be in a future patch. Version 2 - kill XFS_ITEM_FLUSHING as it is now unused. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 29 +++++++++++++ fs/xfs/linux-2.6/xfs_buf.h | 2 + fs/xfs/linux-2.6/xfs_trace.h | 1 + fs/xfs/quota/xfs_dquot_item.c | 85 ++++++------------------------------- fs/xfs/quota/xfs_dquot_item.h | 4 -- fs/xfs/xfs_buf_item.c | 64 +++++++++++++++------------- fs/xfs/xfs_inode_item.c | 98 +++++++------------------------------------ fs/xfs/xfs_inode_item.h | 6 --- fs/xfs/xfs_trans.h | 3 +- fs/xfs/xfs_trans_ail.c | 13 +++--- 10 files changed, 102 insertions(+), 203 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 44e20e5..b306265 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1778,6 +1778,35 @@ xfs_buf_delwri_dequeue( trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); } +/* + * If a delwri buffer needs to be pushed before it has aged out, then promote + * it to the head of the delwri queue so that it will be flushed on the next + * xfsbufd run. We do this by resetting the queuetime of the buffer to be older + * than the age currently needed to flush the buffer. Hence the next time the + * xfsbufd sees it is guaranteed to be considered old enough to flush. + */ +void +xfs_buf_delwri_promote( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; + + ASSERT(bp->b_flags & XBF_DELWRI); + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + /* + * Check the buffer age before locking the delayed write queue as we + * don't need to promote buffers that are already past the flush age. + */ + if (bp->b_queuetime < jiffies - age) + return; + bp->b_queuetime = jiffies - age; + spin_lock(&btp->bt_delwrite_lock); + list_move(&bp->b_list, &btp->bt_delwrite_queue); + spin_unlock(&btp->bt_delwrite_lock); +} + STATIC void xfs_buf_runall_queues( struct workqueue_struct *queue) diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index ea8c198..be45e8c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -266,6 +266,7 @@ extern int xfs_buf_ispin(xfs_buf_t *); /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_dequeue(xfs_buf_t *); +extern void xfs_buf_delwri_promote(xfs_buf_t *); /* Buffer Daemon Setup Routines */ extern int xfs_buf_init(void); @@ -395,6 +396,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); + #ifdef CONFIG_KDB_MODULES extern struct list_head *xfs_get_buftarg_list(void); #endif diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 1bb09e7..a4574dc 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -483,6 +483,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 1b56437..dda0fb0 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -212,66 +212,31 @@ xfs_qm_dquot_logitem_pushbuf( xfs_dquot_t *dqp; xfs_mount_t *mp; xfs_buf_t *bp; - uint dopush; dqp = qip->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* - * The qli_pushbuf_flag keeps others from - * trying to duplicate our effort. - */ - ASSERT(qip->qli_pushbuf_flag != 0); - ASSERT(qip->qli_push_owner == current_pid()); - - /* * If flushlock isn't locked anymore, chances are that the * inode flush completed and the inode was taken off the AIL. * So, just get out. */ if (completion_done(&dqp->q_flush) || ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { - qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); return; } mp = dqp->q_mount; bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - !completion_done(&dqp->q_flush)); - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); - - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0); - - if (dopush) { - int error; -#ifdef XFSRACEDEBUG - delay_for_intr(); - delay(300); -#endif - error = xfs_bawrite(mp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", - error, qip, bp); - } else { - xfs_buf_relse(bp); - } - } else { - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); - xfs_buf_relse(bp); - } + xfs_dqunlock(dqp); + if (!bp) return; - } + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); + return; - qip->qli_pushbuf_flag = 0; - xfs_dqunlock(dqp); } /* @@ -289,50 +254,24 @@ xfs_qm_dquot_logitem_trylock( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; - uint retval; dqp = qip->qli_dquot; if (atomic_read(&dqp->q_pincount) > 0) - return (XFS_ITEM_PINNED); + return XFS_ITEM_PINNED; if (! xfs_qm_dqlock_nowait(dqp)) - return (XFS_ITEM_LOCKED); + return XFS_ITEM_LOCKED; - retval = XFS_ITEM_SUCCESS; if (!xfs_dqflock_nowait(dqp)) { /* - * The dquot is already being flushed. It may have been - * flushed delayed write, however, and we don't want to - * get stuck waiting for that to complete. So, we want to check - * to see if we can lock the dquot's buffer without sleeping. - * If we can and it is marked for delayed write, then we - * hold it and send it out from the push routine. We don't - * want to do that now since we might sleep in the device - * strategy routine. We also don't want to grab the buffer lock - * here because we'd like not to call into the buffer cache - * while holding the AIL lock. - * Make sure to only return PUSHBUF if we set pushbuf_flag - * ourselves. If someone else is doing it then we don't - * want to go to the push routine and duplicate their efforts. + * dquot has already been flushed to the backing buffer, + * leave it locked, pushbuf routine will unlock it. */ - if (qip->qli_pushbuf_flag == 0) { - qip->qli_pushbuf_flag = 1; - ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno); -#ifdef DEBUG - qip->qli_push_owner = current_pid(); -#endif - /* - * The dquot is left locked. - */ - retval = XFS_ITEM_PUSHBUF; - } else { - retval = XFS_ITEM_FLUSHING; - xfs_dqunlock_nonotify(dqp); - } + return XFS_ITEM_PUSHBUF; } ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL); - return (retval); + return XFS_ITEM_SUCCESS; } diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h index 5a63253..5acae2a 100644 --- a/fs/xfs/quota/xfs_dquot_item.h +++ b/fs/xfs/quota/xfs_dquot_item.h @@ -27,10 +27,6 @@ typedef struct xfs_dq_logitem { xfs_log_item_t qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ - unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */ -#ifdef DEBUG - uint64_t qli_push_owner; -#endif xfs_dq_logformat_t qli_format; /* logged structure */ } xfs_dq_logitem_t; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index e0a1158..f3c49e6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -467,8 +467,10 @@ xfs_buf_item_unpin_remove( /* * This is called to attempt to lock the buffer associated with this * buf log item. Don't sleep on the buffer lock. If we can't get - * the lock right away, return 0. If we can get the lock, pull the - * buffer from the free list, mark it busy, and return 1. + * the lock right away, return 0. If we can get the lock, take a + * reference to the buffer. If this is a delayed write buffer that + * needs AIL help to be written back, invoke the pushbuf routine + * rather than the normal success path. */ STATIC uint xfs_buf_item_trylock( @@ -477,24 +479,18 @@ xfs_buf_item_trylock( xfs_buf_t *bp; bp = bip->bli_buf; - - if (XFS_BUF_ISPINNED(bp)) { + if (XFS_BUF_ISPINNED(bp)) return XFS_ITEM_PINNED; - } - - if (!XFS_BUF_CPSEMA(bp)) { + if (!XFS_BUF_CPSEMA(bp)) return XFS_ITEM_LOCKED; - } - /* - * Remove the buffer from the free list. Only do this - * if it's on the free list. Private buffers like the - * superblock buffer are not. - */ + /* take a reference to the buffer. */ XFS_BUF_HOLD(bp); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); trace_xfs_buf_item_trylock(bip); + if (XFS_BUF_ISDELAYWRITE(bp)) + return XFS_ITEM_PUSHBUF; return XFS_ITEM_SUCCESS; } @@ -626,11 +622,9 @@ xfs_buf_item_committed( } /* - * This is called to asynchronously write the buffer associated with this - * buf log item out to disk. The buffer will already have been locked by - * a successful call to xfs_buf_item_trylock(). If the buffer still has - * B_DELWRI set, then get it going out to disk with a call to bawrite(). - * If not, then just release the buffer. + * The buffer is locked, but is not a delayed write buffer. This happens + * if we race with IO completion and hence we don't want to try to write it + * again. Just release the buffer. */ STATIC void xfs_buf_item_push( @@ -642,17 +636,29 @@ xfs_buf_item_push( trace_xfs_buf_item_push(bip); bp = bip->bli_buf; + ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); + xfs_buf_relse(bp); +} - if (XFS_BUF_ISDELAYWRITE(bp)) { - int error; - error = xfs_bawrite(bip->bli_item.li_mountp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, - "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", - error, bip, bp); - } else { - xfs_buf_relse(bp); - } +/* + * The buffer is locked and is a delayed write buffer. Promote the buffer + * in the delayed write queue as the caller knows that they must invoke + * the xfsbufd to get this buffer written. We have to unlock the buffer + * to allow the xfsbufd to write it, too. + */ +STATIC void +xfs_buf_item_pushbuf( + xfs_buf_log_item_t *bip) +{ + xfs_buf_t *bp; + + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + trace_xfs_buf_item_pushbuf(bip); + + bp = bip->bli_buf; + ASSERT(XFS_BUF_ISDELAYWRITE(bp)); + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); } /* ARGSUSED */ @@ -677,7 +683,7 @@ static struct xfs_item_ops xfs_buf_item_ops = { .iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committed, .iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push, - .iop_pushbuf = NULL, + .iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf, .iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t)) xfs_buf_item_committing }; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 207553e..d4dc063 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -602,33 +602,20 @@ xfs_inode_item_trylock( if (!xfs_iflock_nowait(ip)) { /* - * If someone else isn't already trying to push the inode - * buffer, we get to do it. + * inode has already been flushed to the backing buffer, + * leave it locked in shared mode, pushbuf routine will + * unlock it. */ - if (iip->ili_pushbuf_flag == 0) { - iip->ili_pushbuf_flag = 1; -#ifdef DEBUG - iip->ili_push_owner = current_pid(); -#endif - /* - * Inode is left locked in shared mode. - * Pushbuf routine gets to unlock it. - */ - return XFS_ITEM_PUSHBUF; - } else { - /* - * We hold the AIL lock, so we must specify the - * NONOTIFY flag so that we won't double trip. - */ - xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); - return XFS_ITEM_FLUSHING; - } - /* NOTREACHED */ + return XFS_ITEM_PUSHBUF; } /* Stale items should force out the iclog */ if (ip->i_flags & XFS_ISTALE) { xfs_ifunlock(ip); + /* + * we hold the AIL lock - notify the unlock routine of this + * so it doesn't try to get the lock again. + */ xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); return XFS_ITEM_PINNED; } @@ -746,11 +733,8 @@ xfs_inode_item_committed( * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's - * marked delayed write. If that's the case, we'll initiate a bawrite on that - * buffer to expedite the process. - * - * We aren't holding the AIL lock (or the flush lock) when this gets called, - * so it is inherently race-y. + * marked delayed write. If that's the case, we'll promote it and that will + * allow the caller to write the buffer by triggering the xfsbufd to run. */ STATIC void xfs_inode_item_pushbuf( @@ -759,26 +743,16 @@ xfs_inode_item_pushbuf( xfs_inode_t *ip; xfs_mount_t *mp; xfs_buf_t *bp; - uint dopush; ip = iip->ili_inode; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); /* - * The ili_pushbuf_flag keeps others from - * trying to duplicate our effort. - */ - ASSERT(iip->ili_pushbuf_flag != 0); - ASSERT(iip->ili_push_owner == current_pid()); - - /* * If a flush is not in progress anymore, chances are that the * inode was taken off the AIL. So, just get out. */ if (completion_done(&ip->i_flush) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { - iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } @@ -787,53 +761,12 @@ xfs_inode_item_pushbuf( bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, iip->ili_format.ilf_len, XBF_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - /* - * We were racing with iflush because we don't hold - * the AIL lock or the flush lock. However, at this point, - * we have the buffer, and we know that it's dirty. - * So, it's possible that iflush raced with us, and - * this item is already taken off the AIL. - * If not, we can flush it async. - */ - dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && - !completion_done(&ip->i_flush)); - iip->ili_pushbuf_flag = 0; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - trace_xfs_inode_item_push(bp, _RET_IP_); - - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, 0); - - if (dopush) { - int error; - error = xfs_bawrite(mp, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", - error, iip, bp); - } else { - xfs_buf_relse(bp); - } - } else { - iip->ili_pushbuf_flag = 0; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - xfs_buf_relse(bp); - } - return; - } - /* - * We have to be careful about resetting pushbuf flag too early (above). - * Even though in theory we can do it as soon as we have the buflock, - * we don't want others to be doing work needlessly. They'll come to - * this function thinking that pushing the buffer is their - * responsibility only to find that the buffer is still locked by - * another doing the same thing - */ - iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); + if (!bp) + return; + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); return; } @@ -937,7 +870,6 @@ xfs_inode_item_init( /* We have zeroed memory. No need ... iip->ili_extents_buf = NULL; - iip->ili_pushbuf_flag = 0; */ iip->ili_format.ilf_type = XFS_LI_INODE; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index cc8df1a..9a46795 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -144,12 +144,6 @@ typedef struct xfs_inode_log_item { data exts */ struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged attr exts */ - unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ - -#ifdef DEBUG - uint64_t ili_push_owner; /* one who sets pushbuf_flag - above gets to push the buf */ -#endif #ifdef XFS_TRANS_DEBUG int ili_root_size; char *ili_orig_root; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ca64f33..c93e3a1 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -861,8 +861,7 @@ typedef struct xfs_item_ops { #define XFS_ITEM_SUCCESS 0 #define XFS_ITEM_PINNED 1 #define XFS_ITEM_LOCKED 2 -#define XFS_ITEM_FLUSHING 3 -#define XFS_ITEM_PUSHBUF 4 +#define XFS_ITEM_PUSHBUF 3 /* * This structure is used to maintain a list of block ranges that have been diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d7b1af8..e799824 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -253,6 +253,7 @@ xfsaild_push( int flush_log, count, stuck; xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; + int push_xfsbufd = 0; spin_lock(&ailp->xa_lock); xfs_trans_ail_cursor_init(ailp, cur); @@ -308,6 +309,7 @@ xfsaild_push( XFS_STATS_INC(xs_push_ail_pushbuf); IOP_PUSHBUF(lip); last_pushed_lsn = lsn; + push_xfsbufd = 1; break; case XFS_ITEM_PINNED: @@ -322,12 +324,6 @@ xfsaild_push( stuck++; break; - case XFS_ITEM_FLUSHING: - XFS_STATS_INC(xs_push_ail_flushing); - last_pushed_lsn = lsn; - stuck++; - break; - default: ASSERT(0); break; @@ -374,6 +370,11 @@ xfsaild_push( xfs_log_force(mp, 0); } + if (push_xfsbufd) { + /* we've got delayed write buffers to flush */ + wake_up_process(mp->m_ddev_targp->bt_task); + } + if (!count) { /* We're past our target or empty, so idle */ last_pushed_lsn = 0; -- cgit v1.1 From 089716aa1480b7197bcd678b8477774c379a2768 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 26 Jan 2010 15:13:25 +1100 Subject: xfs: Sort delayed write buffers before dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently when the xfsbufd writes delayed write buffers, it pushes them to disk in the order they come off the delayed write list. If there are lots of buffers Ń•pread widely over the disk, this results in overwhelming the elevator sort queues in the block layer and we end up losing the posibility of merging adjacent buffers to minimise the number of IOs. Use the new generic list_sort function to sort the delwri dispatch queue before issue to ensure that the buffers are pushed in the most friendly order possible to the lower layers. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 87 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index b306265..4556a4c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "xfs_sb.h" #include "xfs_inum.h" @@ -1877,14 +1878,42 @@ xfs_buf_delwri_split( } +/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values + */ +static int +xfs_buf_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); + xfs_daddr_t diff; + + diff = ap->b_bn - bp->b_bn; + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +void +xfs_buf_delwri_sort( + xfs_buftarg_t *target, + struct list_head *list) +{ + list_sort(NULL, list, xfs_buf_cmp); +} + STATIC int xfsbufd( void *data) { - struct list_head tmp; - xfs_buftarg_t *target = (xfs_buftarg_t *)data; - int count; - xfs_buf_t *bp; + xfs_buftarg_t *target = (xfs_buftarg_t *)data; current->flags |= PF_MEMALLOC; @@ -1893,6 +1922,8 @@ xfsbufd( do { long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + int count = 0; + struct list_head tmp; if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); @@ -1907,11 +1938,10 @@ xfsbufd( schedule_timeout_interruptible(tout); xfs_buf_delwri_split(target, &tmp, age); - count = 0; + list_sort(NULL, &tmp, xfs_buf_cmp); while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - ASSERT(target == bp->b_target); - + struct xfs_buf *bp; + bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); xfs_buf_iostrategy(bp); count++; @@ -1937,42 +1967,45 @@ xfs_flush_buftarg( xfs_buftarg_t *target, int wait) { - struct list_head tmp; - xfs_buf_t *bp, *n; + xfs_buf_t *bp; int pincount = 0; + LIST_HEAD(tmp_list); + LIST_HEAD(wait_list); xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); set_bit(XBT_FORCE_FLUSH, &target->bt_flags); - pincount = xfs_buf_delwri_split(target, &tmp, 0); + pincount = xfs_buf_delwri_split(target, &tmp_list, 0); /* - * Dropped the delayed write list lock, now walk the temporary list + * Dropped the delayed write list lock, now walk the temporary list. + * All I/O is issued async and then if we need to wait for completion + * we do that after issuing all the IO. */ - list_for_each_entry_safe(bp, n, &tmp, b_list) { + list_sort(NULL, &tmp_list, xfs_buf_cmp); + while (!list_empty(&tmp_list)) { + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); ASSERT(target == bp->b_target); - if (wait) + list_del_init(&bp->b_list); + if (wait) { bp->b_flags &= ~XBF_ASYNC; - else - list_del_init(&bp->b_list); - + list_add(&bp->b_list, &wait_list); + } xfs_buf_iostrategy(bp); } - if (wait) + if (wait) { + /* Expedite and wait for IO to complete. */ blk_run_address_space(target->bt_mapping); + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - /* - * Remaining list items must be flushed before returning - */ - while (!list_empty(&tmp)) { - bp = list_entry(tmp.next, xfs_buf_t, b_list); - - list_del_init(&bp->b_list); - xfs_iowait(bp); - xfs_buf_relse(bp); + list_del_init(&bp->b_list); + xfs_iowait(bp); + xfs_buf_relse(bp); + } } return pincount; -- cgit v1.1 From 7d6a7bde52e449f21a0e86a7a4955b4e08a49d69 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 26 Jan 2010 15:13:41 +1100 Subject: xfs: Use delay write promotion for dquot flushing xfs_qm_dqflock_pushbuf_wait() does a very similar trick to item pushing used to do to flush out delayed write dquot buffers. Change it to use the new promotion method rather than an async flush. Also, xfs_qm_dqflock_pushbuf_wait() can return without the flush lock held, yet the callers make the assumption that after this call the flush lock is held. Always return with the flush lock held. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/quota/xfs_dquot.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index f9baeed..1620a56 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1528,21 +1528,16 @@ xfs_qm_dqflock_pushbuf_wait( */ bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK); - if (bp != NULL) { - if (XFS_BUF_ISDELAYWRITE(bp)) { - int error; - - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(dqp->q_mount, 0); - error = xfs_bawrite(dqp->q_mount, bp); - if (error) - xfs_fs_cmn_err(CE_WARN, dqp->q_mount, - "xfs_qm_dqflock_pushbuf_wait: " - "pushbuf error %d on dqp %p, bp %p", - error, dqp, bp); - } else { - xfs_buf_relse(bp); - } + if (!bp) + goto out_lock; + + if (XFS_BUF_ISDELAYWRITE(bp)) { + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(dqp->q_mount, 0); + xfs_buf_delwri_promote(bp); + wake_up_process(bp->b_target->bt_task); } + xfs_buf_relse(bp); +out_lock: xfs_dqflock(dqp); } -- cgit v1.1 From 20026d92013d7bb3abb295337191def6758fc086 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Feb 2010 09:48:58 +1100 Subject: xfs: kill the unused XFS_QMOPT_* flush flags V2 dquots are never flushed asynchronously. Remove the flag and the async write support from the flush function. Make the default flush a delwri flush to make the inode flush code, which leaves the XFS_QMOPT_SYNC the only flag remaining. Convert that to use SYNC_WAIT instead, just like the inode flush code. V2: - just pass flush flags straight through Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/quota/xfs_dquot.c | 13 ++++++------- fs/xfs/quota/xfs_dquot_item.c | 2 +- fs/xfs/quota/xfs_qm.c | 14 ++++++-------- fs/xfs/xfs_quota.h | 8 +------- 4 files changed, 14 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 1620a56..5f79dd7 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1187,7 +1187,7 @@ xfs_qm_dqflush( * block, nada. */ if (!XFS_DQ_IS_DIRTY(dqp) || - (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) { + (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { xfs_dqfunlock(dqp); return 0; } @@ -1251,18 +1251,17 @@ xfs_qm_dqflush( xfs_log_force(mp, 0); } - if (flags & XFS_QMOPT_DELWRI) { - xfs_bdwrite(mp, bp); - } else { + if (flags & SYNC_WAIT) error = xfs_bwrite(mp, bp); - } + else + xfs_bdwrite(mp, bp); trace_xfs_dqflush_done(dqp); /* * dqp is still locked, but caller is free to unlock it now. */ - return (error); + return error; } @@ -1443,7 +1442,7 @@ xfs_qm_dqpurge( * We don't care about getting disk errors here. We need * to purge this dquot anyway, so we go ahead regardless. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); + error = xfs_qm_dqflush(dqp, SYNC_WAIT); if (error) xfs_fs_cmn_err(CE_WARN, mp, "xfs_qm_dqpurge: dquot %p flush failed", dqp); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index dda0fb0..4e4ee9a 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -153,7 +153,7 @@ xfs_qm_dquot_logitem_push( * lock without sleeping, then there must not have been * anyone in the process of flushing the dquot. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush(dqp, 0); if (error) xfs_fs_cmn_err(CE_WARN, dqp->q_mount, "xfs_qm_dquot_logitem_push: push error %d on dqp %p", diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 11cfd82..8699e51 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -450,7 +450,7 @@ xfs_qm_unmount_quotas( STATIC int xfs_qm_dqflush_all( xfs_mount_t *mp, - int flags) + int sync_mode) { int recl; xfs_dquot_t *dqp; @@ -486,7 +486,7 @@ again: * across a disk write. */ xfs_qm_mplist_unlock(mp); - error = xfs_qm_dqflush(dqp, flags); + error = xfs_qm_dqflush(dqp, sync_mode); xfs_dqunlock(dqp); if (error) return error; @@ -926,13 +926,11 @@ xfs_qm_sync( { int recl, restarts; xfs_dquot_t *dqp; - uint flush_flags; int error; if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; - flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI; restarts = 0; again: @@ -992,7 +990,7 @@ xfs_qm_sync( * across a disk write */ xfs_qm_mplist_unlock(mp); - error = xfs_qm_dqflush(dqp, flush_flags); + error = xfs_qm_dqflush(dqp, flags); xfs_dqunlock(dqp); if (error && XFS_FORCED_SHUTDOWN(mp)) return 0; /* Need to prevent umount failure */ @@ -1796,7 +1794,7 @@ xfs_qm_quotacheck( * successfully. */ if (!error) - error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush_all(mp, 0); /* * We can get this error if we couldn't do a dquot allocation inside @@ -2018,7 +2016,7 @@ xfs_qm_shake_freelist( * We flush it delayed write, so don't bother * releasing the mplock. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush(dqp, 0); if (error) { xfs_fs_cmn_err(CE_WARN, dqp->q_mount, "xfs_qm_dqflush_all: dquot %p flush failed", dqp); @@ -2201,7 +2199,7 @@ xfs_qm_dqreclaim_one(void) * We flush it delayed write, so don't bother * releasing the freelist lock. */ - error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); + error = xfs_qm_dqflush(dqp, 0); if (error) { xfs_fs_cmn_err(CE_WARN, dqp->q_mount, "xfs_qm_dqreclaim: dquot %p flush failed", dqp); diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 21d11d9..fdcab3f 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -223,15 +223,9 @@ typedef struct xfs_qoff_logformat { #define XFS_QMOPT_RES_INOS 0x0800000 /* - * flags for dqflush and dqflush_all. - */ -#define XFS_QMOPT_SYNC 0x1000000 -#define XFS_QMOPT_DELWRI 0x4000000 - -/* * flags for dqalloc. */ -#define XFS_QMOPT_INHERIT 0x8000000 +#define XFS_QMOPT_INHERIT 0x1000000 /* * flags to xfs_trans_mod_dquot. -- cgit v1.1 From e8b217e7530c6a073ac69f1c85b922d93fdf5647 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Feb 2010 10:16:26 +1100 Subject: xfs: remove invalid barrier optimization from xfs_fsync We always need to flush the disk write cache and can't skip it just because the no inode attributes have changed. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner --- fs/xfs/xfs_vnodeops.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index fd108b7..43241e2 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -597,7 +597,7 @@ xfs_fsync( { xfs_trans_t *tp; int error = 0; - int log_flushed = 0, changed = 1; + int log_flushed = 0; xfs_itrace_entry(ip); @@ -627,18 +627,10 @@ xfs_fsync( * disk yet, the inode will be still be pinned. If it is, * force the log. */ - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) { error = _xfs_log_force(ip->i_mount, XFS_LOG_SYNC, &log_flushed); - } else { - /* - * If the inode is not pinned and nothing has changed - * we don't need to flush the cache. - */ - changed = 0; } } else { /* @@ -673,7 +665,7 @@ xfs_fsync( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) { + if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { /* * If the log write didn't issue an ordered tag we need * to flush the disk cache for the data device now. -- cgit v1.1 From 07fec73625dc0db6f9aed68019918208a2ca53f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Feb 2010 11:43:49 +1100 Subject: xfs: log changed inodes instead of writing them synchronously When an inode has already be flushed delayed write, xfs_inode_clean() returns true and hence xfs_fs_write_inode() can return on a synchronous inode write without having written the inode. Currently these sycnhronous writes only come sync(1), unmount, a sycnhronous NFS export and cachefiles so should be relatively rare and out of common performance paths. Realistically, a synchronous inode write is not necessary here; we can avoid writing the inode by logging any non-transactional changes that are pending. This needs to be done with synchronous transactions, but it avoids seeking between the log and inode clusters as we do now. We don't force the log if the inode is pinned, though, so this differs from the fsync case. For normal sys_sync and unmount behaviour this is fine because we do a synchronous log force in xfs_sync_data which is called from the ->sync_fs code. It does however break the NFS synchronous export guarantees for now, but work is under way to fix this at a higher level or for the higher level to provide an additional flag in the writeback control to tell us that a log force is needed. Portions of this patch are based on work from Dave Chinner. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Alex Elder --- fs/xfs/linux-2.6/xfs_super.c | 111 ++++++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 3b5b46b..25ea240 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1021,12 +1021,45 @@ xfs_fs_dirty_inode( XFS_I(inode)->i_update_core = 1; } -/* - * Attempt to flush the inode, this will actually fail - * if the inode is pinned, but we dirty the inode again - * at the point when it is unpinned after a log write, - * since this is when the inode itself becomes flushable. - */ +STATIC int +xfs_log_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + + if (error) { + xfs_trans_cancel(tp, 0); + /* we need to return with the lock hold shared */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out of the + * way during trans_reserve which would flush the inode. But there's + * no guarantee that the inode buffer has actually gone out yet (it's + * delwri). Plus the buffer could be pinned anyway if it's part of + * an inode in another recent transaction. So we play it safe and + * fire off the transaction anyway. + */ + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + + return error; +} + STATIC int xfs_fs_write_inode( struct inode *inode, @@ -1034,7 +1067,7 @@ xfs_fs_write_inode( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - int error = 0; + int error = EAGAIN; xfs_itrace_entry(ip); @@ -1045,35 +1078,55 @@ xfs_fs_write_inode( error = xfs_wait_on_pages(ip, 0, -1); if (error) goto out; - } - - /* - * Bypass inodes which have already been cleaned by - * the inode flush clustering code inside xfs_iflush - */ - if (xfs_inode_clean(ip)) - goto out; - /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by xfs_sync. - */ - if (sync) { + /* + * Make sure the inode has hit stable storage. By using the + * log and the fsync transactions we reduce the IOs we have + * to do here from two (log and inode) to just the log. + * + * Note: We still need to do a delwri write of the inode after + * this to flush it to the backing buffer so that bulkstat + * works properly if this is the first time the inode has been + * written. Because we hold the ilock atomically over the + * transaction commit and the inode flush we are guaranteed + * that the inode is not pinned when it returns. If the flush + * lock is already held, then the inode has already been + * flushed once and we don't need to flush it again. Hence + * the code will only flush the inode if it isn't already + * being flushed. + */ xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_iflock(ip); - - error = xfs_iflush(ip, SYNC_WAIT); + if (ip->i_update_core) { + error = xfs_log_inode(ip); + if (error) + goto out_unlock; + } } else { - error = EAGAIN; + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by xfs_sync. + */ if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) goto out; - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; + } + + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; - error = xfs_iflush(ip, 0); + /* + * Now we have the flush lock and the inode is not pinned, we can check + * if the inode is really clean as we know that there are no pending + * transaction completions, it is not waiting on the delayed write + * queue and there is no IO in progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; } + error = xfs_iflush(ip, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); -- cgit v1.1 From 5322892d867e186c6b4c5fff5c99ea4863696a60 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Feb 2010 10:09:14 +1100 Subject: xfs: kill xfs_bawrite There are no more users of this function left in the XFS code now that we've switched everything to delayed write flushing. Remove it. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 19 ------------------- fs/xfs/linux-2.6/xfs_buf.h | 1 - 2 files changed, 20 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 4556a4c..d50df3a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1078,25 +1078,6 @@ xfs_bwrite( return error; } -int -xfs_bawrite( - void *mp, - struct xfs_buf *bp) -{ - trace_xfs_buf_bawrite(bp, _RET_IP_); - - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - - xfs_buf_delwri_dequeue(bp); - - bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); - - bp->b_mount = mp; - bp->b_strat = xfs_bdstrat_cb; - return xfs_bdstrat_cb(bp); -} - void xfs_bdwrite( void *mp, diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index be45e8c..386e736 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -233,7 +233,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); -extern int xfs_bawrite(void *mp, xfs_buf_t *bp); extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -- cgit v1.1 From 73c77e2ccc14413c232c3e0b3aa43a0c4b72ec70 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 25 Jan 2010 11:42:24 -0600 Subject: xfs: fix xfs to work with Virtually Indexed architectures xfs_buf.c includes what is essentially a hand rolled version of blk_rq_map_kern(). In order to work properly with the vmalloc buffers that xfs uses, this hand rolled routine must also implement the flushing API for vmap/vmalloc areas. [style updates from hch@lst.de] Acked-by: Christoph Hellwig Signed-off-by: James Bottomley --- fs/xfs/linux-2.6/xfs_buf.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 77b8be8..6f3ebb6 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -76,6 +76,27 @@ struct workqueue_struct *xfsconvertd_workqueue; #define xfs_buf_deallocate(bp) \ kmem_zone_free(xfs_buf_zone, (bp)); +static inline int +xfs_buf_is_vmapped( + struct xfs_buf *bp) +{ + /* + * Return true if the buffer is vmapped. + * + * The XBF_MAPPED flag is set if the buffer should be mapped, but the + * code is clever enough to know it doesn't have to map a single page, + * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. + */ + return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; +} + +static inline int +xfs_buf_vmap_len( + struct xfs_buf *bp) +{ + return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; +} + /* * Page Region interfaces. * @@ -314,7 +335,7 @@ xfs_buf_free( if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; - if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) + if (xfs_buf_is_vmapped(bp)) free_address(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { @@ -1107,6 +1128,9 @@ xfs_buf_bio_end_io( xfs_buf_ioerror(bp, -error); + if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + do { struct page *page = bvec->bv_page; @@ -1216,6 +1240,10 @@ next_chunk: submit_io: if (likely(bio->bi_size)) { + if (xfs_buf_is_vmapped(bp)) { + flush_kernel_vmap_range(bp->b_addr, + xfs_buf_vmap_len(bp)); + } submit_bio(rw, bio); if (size) goto next_chunk; -- cgit v1.1 From 8cfb3343f70bcf9403218df120ecf345f06dd585 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 1 Feb 2010 21:34:14 -0700 Subject: of: make set_node_proc_entry private to proc_devtree.c We only need set_node_proc_entry in proc_devtree.c, so move it there. This fixes the !HAVE_ARCH_DEVTREE_FIXUPS build, as we can't make make the definition in linux/of.h conditional on this #define (definitions in asm/prom.h can't be exposed to linux/of.h, due to the enforced #include ordering). Signed-off-by: Jeremy Kerr Signed-off-by: Grant Likely --- fs/proc/proc_devtree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 123257b..2309bf1 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -14,12 +14,13 @@ #include #include "internal.h" -#ifndef HAVE_ARCH_DEVTREE_FIXUPS static inline void set_node_proc_entry(struct device_node *np, struct proc_dir_entry *de) { -} +#ifdef HAVE_ARCH_DEVTREE_FIXUPS + np->pde = de; #endif +} static struct proc_dir_entry *proc_device_tree; -- cgit v1.1 From 50ab2fe147e22c8786552cda1791a61ae81b84d2 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 1 Feb 2010 21:34:14 -0700 Subject: proc_devtree: include linux/of.h Currenly, proc_devtree.c depends on asm/prom.h to include linux/of.h, to provide some device-tree definitions (eg, struct property). Instead, include linux/of.h directly. We still need asm/prom.h for HAVE_ARCH_DEVTREE_FIXUPS. Signed-off-by: Jeremy Kerr Signed-off-by: Grant Likely --- fs/proc/proc_devtree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 2309bf1..0ec4511 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "internal.h" -- cgit v1.1 From 87185517de81101da5afbc82cefdeed6eeaa38fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Feb 2010 19:43:31 +0000 Subject: xfs: only clear the suid bit once in xfs_write file_remove_suid already calls into ->setattr to clear the suid and sgid bits if needed, no need to start a second transaction to do it ourselves. Note that xfs_write_clear_setuid issues a sync transaction while the path through ->setattr doesn't, but that is consistant with the other filesystems. Signed-off-by: Christoph Hellwig Reviewed-by: Alex Elder Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_lrw.c | 15 +++------------ fs/xfs/xfs_rw.c | 42 ------------------------------------------ fs/xfs/xfs_rw.h | 1 - 3 files changed, 3 insertions(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index c80fa00d..eac6f80 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -630,18 +630,9 @@ start: * by root. This keeps people from modifying setuid and * setgid binaries. */ - - if (((xip->i_d.di_mode & S_ISUID) || - ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP))) && - !capable(CAP_FSETID)) { - error = xfs_write_clear_setuid(xip); - if (likely(!error)) - error = -file_remove_suid(file); - if (unlikely(error)) { - goto out_unlock_internal; - } - } + error = -file_remove_suid(file); + if (unlikely(error)) + goto out_unlock_internal; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index abb2c45..e336742 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -47,48 +47,6 @@ #include "xfs_trace.h" /* - * This is a subroutine for xfs_write() and other writers (xfs_ioctl) - * which clears the setuid and setgid bits when a file is written. - */ -int -xfs_write_clear_setuid( - xfs_inode_t *ip) -{ - xfs_mount_t *mp; - xfs_trans_t *tp; - int error; - - mp = ip->i_mount; - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); - if ((error = xfs_trans_reserve(tp, 0, - XFS_WRITEID_LOG_RES(mp), - 0, 0, 0))) { - xfs_trans_cancel(tp, 0); - return error; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - ip->i_d.di_mode &= ~S_ISUID; - - /* - * Note that we don't have to worry about mandatory - * file locking being disabled here because we only - * clear the S_ISGID bit if the Group execute bit is - * on, but if it was on then mandatory locking wouldn't - * have been enabled. - */ - if (ip->i_d.di_mode & S_IXGRP) { - ip->i_d.di_mode &= ~S_ISGID; - } - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; -} - -/* * Force a shutdown of the filesystem instantly while keeping * the filesystem consistent. We don't do an unmount here; just shutdown * the shop, make sure that absolutely nothing persistent happens to diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h index a54c3b7..11c41ec 100644 --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -39,7 +39,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) /* * Prototypes for functions in xfs_rw.c. */ -extern int xfs_write_clear_setuid(struct xfs_inode *ip); extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp, xfs_daddr_t blkno, int len, uint flags, struct xfs_buf **bpp); -- cgit v1.1 From 180040b89ee2aed88c0a0b1fcf7ada9a512b12e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Feb 2010 09:57:55 +0000 Subject: xfs: optimize log flushing in xfs_fsync If we have a pinned inode it must have a log item attached to it. Usually that log item will have ili_last_lsn already set, in which case we only need to flush the log up to that LSN instead of doing a full log force. This gives speedups of about 5% in some fsync heavy workloads. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_vnodeops.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 43241e2..ddd2c5d 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -629,8 +629,14 @@ xfs_fsync( */ xfs_iunlock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { - error = _xfs_log_force(ip->i_mount, XFS_LOG_SYNC, - &log_flushed); + if (ip->i_itemp->ili_last_lsn) { + error = _xfs_log_force_lsn(ip->i_mount, + ip->i_itemp->ili_last_lsn, + XFS_LOG_SYNC, &log_flushed); + } else { + error = _xfs_log_force(ip->i_mount, + XFS_LOG_SYNC, &log_flushed); + } } } else { /* -- cgit v1.1 From d67b1b03254c501fef371b0e5916c94a52bfc2c5 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 6 Feb 2010 08:45:15 +0000 Subject: fs/xfs: Correct NULL test Test the value that was just allocated rather than the previously tested one. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ expression *x; expression e; identifier l; @@ if (x == NULL || ...) { ... when forall return ...; } ... when != goto l; when != x = e when != &x *x == NULL // Signed-off-by: Julia Lawall Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/quota/xfs_qm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 8699e51..417e61e 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -123,7 +123,7 @@ xfs_Gqm_init(void) goto out; gdqhash = kmem_zalloc_large(hsize); - if (!udqhash) + if (!gdqhash) goto out_free_udqhash; hsize /= sizeof(xfs_dqhash_t); -- cgit v1.1 From 7c540d9e3da38c3d1c15fb8059e4577a84ac0066 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Sun, 14 Feb 2010 07:13:41 -0700 Subject: proc_devtree: fix THIS_MODULE without module.h Commit e22f628395432b967f2f505858c64450f7835365 introduced a build breakage for ARM devtree work: the THIS_MODULE macro was added, but we don't have module.h This change adds the necessary #include to get THIS_MODULE defined. While we could just replace it with NULL (PROC_FS is a bool, not a tristate), using THIS_MODULE will prevent unexpected breakage if we ever do compile this as a module. Signed-off-by: Jeremy Kerr Signed-off-by: Grant Likely Acked-by: Benjamin Herrenschmidt Acked-by: Michal Simek --- fs/proc/proc_devtree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 0ec4511..f8650dc 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include "internal.h" -- cgit v1.1 From ac278a9c505092dd82077a2446af8f9fc0d9c095 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Feb 2010 18:09:36 +0000 Subject: fix LOOKUP_FOLLOW on automount "symlinks" Make sure that automount "symlinks" are followed regardless of LOOKUP_FOLLOW; it should have no effect on them. Cc: stable@kernel.org Signed-off-by: Al Viro --- fs/namei.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index d62fdc8..a4855af 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -823,6 +823,17 @@ fail: } /* + * This is a temporary kludge to deal with "automount" symlinks; proper + * solution is to trigger them on follow_mount(), so that do_lookup() + * would DTRT. To be killed before 2.6.34-final. + */ +static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) +{ + return inode && unlikely(inode->i_op->follow_link) && + ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode)); +} + +/* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. @@ -942,8 +953,7 @@ last_component: if (err) break; inode = next.dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op->follow_link) { + if (follow_on_final(inode, lookup_flags)) { err = do_follow_link(&next, nd); if (err) goto return_err; -- cgit v1.1 From 7fee4868be91e71a3ee8e57289ebf5e10a12297e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Jan 2010 01:03:28 -0500 Subject: Switch proc/self to nd_set_link() Signed-off-by: Al Viro --- fs/proc/base.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index e42bbd8..58324c2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2369,16 +2369,30 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { struct pid_namespace *ns = dentry->d_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); - char tmp[PROC_NUMBUF]; - if (!tgid) - return ERR_PTR(-ENOENT); - sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); - return ERR_PTR(vfs_follow_link(nd,tmp)); + char *name = ERR_PTR(-ENOENT); + if (tgid) { + name = __getname(); + if (!name) + name = ERR_PTR(-ENOMEM); + else + sprintf(name, "%d", tgid); + } + nd_set_link(nd, name); + return NULL; +} + +static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + __putname(s); } static const struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, .follow_link = proc_self_follow_link, + .put_link = proc_self_put_link, }; /* -- cgit v1.1 From aeaa5ccd6421fbf9e7ded0ac67b12ea2b9fcf51e Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Mon, 15 Feb 2010 18:07:39 -0500 Subject: vfs: don't call ima_file_check() unconditionally in nfsd_open() commit 1e41568d7378d1ba8c64ba137b9ddd00b59f893a ("Take ima_path_check() in nfsd past dentry_open() in nfsd_open()") moved this code back to its original location but missed the "else". Signed-off-by: Chuck Ebbert Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 97d79ef..8715d19 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -752,7 +752,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); - host_err = ima_file_check(*filp, access); + else + host_err = ima_file_check(*filp, access); out_nfserr: err = nfserrno(host_err); out: -- cgit v1.1 From 8f9941aeccc318f243ab3fa55aaa17f4c1cb33f9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 19 Feb 2010 18:14:21 +0000 Subject: CacheFiles: Fix a race in cachefiles_delete_object() vs rename cachefiles_delete_object() can race with rename. It gets the parent directory of the object it's asked to delete, then locks it - but rename may have changed the object's parent between the get and the completion of the lock. However, if such a circumstance is detected, we abandon our attempt to delete the object - since it's no longer in the index key path, it won't be seen again by lookups of that key. The assumption is that cachefilesd may have culled it by renaming it to the graveyard for later destruction. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/cachefiles/namei.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 14ac480..eeb4986 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -348,7 +348,17 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, dir = dget_parent(object->dentry); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - ret = cachefiles_bury_object(cache, dir, object->dentry); + + /* we need to check that our parent is _still_ our parent - it may have + * been renamed */ + if (dir == object->dentry->d_parent) { + ret = cachefiles_bury_object(cache, dir, object->dentry); + } else { + /* it got moved, presumably by cachefilesd culling it, so it's + * no longer in the key path and we can ignore it */ + mutex_unlock(&dir->d_inode->i_mutex); + ret = 0; + } dput(dir); _leave(" = %d", ret); -- cgit v1.1 From a17e18790a8c47113a73139d54a375dc9ccd8f08 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 22 Feb 2010 12:44:24 -0800 Subject: fs/exec.c: fix initial stack reservation 803bf5ec259941936262d10ecc84511b76a20921 ("fs/exec.c: restrict initial stack space expansion to rlimit") attempts to limit the initial stack to 20*PAGE_SIZE. Unfortunately, in attempting ensure the stack is not reduced in size, we ended up not changing the stack at all. This size reduction check is not necessary as the expand_stack call does this already. This caused a regression in UML resulting in most guest processes being killed. Signed-off-by: Michael Neuling Reviewed-by: KOSAKI Motohiro Acked-by: WANG Cong Cc: Anton Blanchard Cc: Oleg Nesterov Cc: James Morris Cc: Serge Hallyn Cc: Benjamin Herrenschmidt Cc: Jouni Malinen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index e95c692..cce6bbd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -637,7 +637,6 @@ int setup_arg_pages(struct linux_binprm *bprm, * will align it up. */ rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; - rlim_stack = min(rlim_stack, stack_size); #ifdef CONFIG_STACK_GROWSUP if (stack_size + stack_expand > rlim_stack) stack_base = vma->vm_start + rlim_stack; -- cgit v1.1 From 7fe2b3190b8b299409f13cf3a6f85c2bd371f8bb Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 24 Feb 2010 11:08:18 -0600 Subject: dlm: fix ordering of bast and cast When both blocking and completion callbacks are queued for lock, the dlm would always deliver the completion callback (cast) first. In some cases the blocking callback (bast) is queued before the cast, though, and should be delivered first. This patch keeps track of the order in which they were queued and delivers them in that order. This patch also keeps track of the granted mode in the last cast and eliminates the following bast if the bast mode is compatible with the preceding cast mode. This happens when a remotely mastered lock is demoted, e.g. EX->NL, in which case the local node queues a cast immediately after sending the demote message. In this way a cast can be queued for a mode, e.g. NL, that makes an in-transit bast extraneous. Signed-off-by: David Teigland --- fs/dlm/ast.c | 74 ++++++++++++++++++++++++++++++++++++++++----------- fs/dlm/ast.h | 4 +-- fs/dlm/dlm_internal.h | 10 +++++-- fs/dlm/lock.c | 4 +-- fs/dlm/user.c | 10 ++++--- fs/dlm/user.h | 4 +-- 6 files changed, 78 insertions(+), 28 deletions(-) (limited to 'fs') diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index dc2ad60..4314f0d 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -33,10 +33,10 @@ void dlm_del_ast(struct dlm_lkb *lkb) spin_unlock(&ast_queue_lock); } -void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode) +void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode) { if (lkb->lkb_flags & DLM_IFL_USER) { - dlm_user_add_ast(lkb, type, bastmode); + dlm_user_add_ast(lkb, type, mode); return; } @@ -44,10 +44,21 @@ void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode) if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { kref_get(&lkb->lkb_ref); list_add_tail(&lkb->lkb_astqueue, &ast_queue); + lkb->lkb_ast_first = type; } + + /* sanity check, this should not happen */ + + if ((type == AST_COMP) && (lkb->lkb_ast_type & AST_COMP)) + log_print("repeat cast %d castmode %d lock %x %s", + mode, lkb->lkb_castmode, + lkb->lkb_id, lkb->lkb_resource->res_name); + lkb->lkb_ast_type |= type; - if (bastmode) - lkb->lkb_bastmode = bastmode; + if (type == AST_BAST) + lkb->lkb_bastmode = mode; + else + lkb->lkb_castmode = mode; spin_unlock(&ast_queue_lock); set_bit(WAKE_ASTS, &astd_wakeflags); @@ -59,9 +70,9 @@ static void process_asts(void) struct dlm_ls *ls = NULL; struct dlm_rsb *r = NULL; struct dlm_lkb *lkb; - void (*cast) (void *astparam); - void (*bast) (void *astparam, int mode); - int type = 0, bastmode; + void (*castfn) (void *astparam); + void (*bastfn) (void *astparam, int mode); + int type, first, bastmode, castmode, do_bast, do_cast, last_castmode; repeat: spin_lock(&ast_queue_lock); @@ -75,17 +86,48 @@ repeat: list_del(&lkb->lkb_astqueue); type = lkb->lkb_ast_type; lkb->lkb_ast_type = 0; + first = lkb->lkb_ast_first; + lkb->lkb_ast_first = 0; bastmode = lkb->lkb_bastmode; - + castmode = lkb->lkb_castmode; + castfn = lkb->lkb_astfn; + bastfn = lkb->lkb_bastfn; spin_unlock(&ast_queue_lock); - cast = lkb->lkb_astfn; - bast = lkb->lkb_bastfn; - - if ((type & AST_COMP) && cast) - cast(lkb->lkb_astparam); - if ((type & AST_BAST) && bast) - bast(lkb->lkb_astparam, bastmode); + do_cast = (type & AST_COMP) && castfn; + do_bast = (type & AST_BAST) && bastfn; + + /* Skip a bast if its blocking mode is compatible with the + granted mode of the preceding cast. */ + + if (do_bast) { + if (first == AST_COMP) + last_castmode = castmode; + else + last_castmode = lkb->lkb_castmode_done; + if (dlm_modes_compat(bastmode, last_castmode)) + do_bast = 0; + } + + if (first == AST_COMP) { + if (do_cast) + castfn(lkb->lkb_astparam); + if (do_bast) + bastfn(lkb->lkb_astparam, bastmode); + } else if (first == AST_BAST) { + if (do_bast) + bastfn(lkb->lkb_astparam, bastmode); + if (do_cast) + castfn(lkb->lkb_astparam); + } else { + log_error(ls, "bad ast_first %d ast_type %d", + first, type); + } + + if (do_cast) + lkb->lkb_castmode_done = castmode; + if (do_bast) + lkb->lkb_bastmode_done = bastmode; /* this removes the reference added by dlm_add_ast and may result in the lkb being freed */ diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h index 1b5fc5f..bcb1aab 100644 --- a/fs/dlm/ast.h +++ b/fs/dlm/ast.h @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -13,7 +13,7 @@ #ifndef __ASTD_DOT_H__ #define __ASTD_DOT_H__ -void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode); +void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode); void dlm_del_ast(struct dlm_lkb *lkb); void dlm_astd_wake(void); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 826d3dc..f632b58 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -232,11 +232,17 @@ struct dlm_lkb { int8_t lkb_status; /* granted, waiting, convert */ int8_t lkb_rqmode; /* requested lock mode */ int8_t lkb_grmode; /* granted lock mode */ - int8_t lkb_bastmode; /* requested mode */ int8_t lkb_highbast; /* highest mode bast sent for */ + int8_t lkb_wait_type; /* type of reply waiting for */ int8_t lkb_wait_count; int8_t lkb_ast_type; /* type of ast queued for */ + int8_t lkb_ast_first; /* type of first ast queued */ + + int8_t lkb_bastmode; /* req mode of queued bast */ + int8_t lkb_castmode; /* gr mode of queued cast */ + int8_t lkb_bastmode_done; /* last delivered bastmode */ + int8_t lkb_castmode_done; /* last delivered castmode */ struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ struct list_head lkb_statequeue; /* rsb g/c/w list */ diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 9c0c1db..e08ea93 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -307,7 +307,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv) lkb->lkb_lksb->sb_status = rv; lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags; - dlm_add_ast(lkb, AST_COMP, 0); + dlm_add_ast(lkb, AST_COMP, lkb->lkb_grmode); } static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index e73a4bb..a4bfd31 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. + * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -173,7 +173,7 @@ static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type) /* we could possibly check if the cancel of an orphan has resulted in the lkb being removed and then remove that lkb from the orphans list and free it */ -void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode) +void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode) { struct dlm_ls *ls; struct dlm_user_args *ua; @@ -206,8 +206,10 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode) ast_type = lkb->lkb_ast_type; lkb->lkb_ast_type |= type; - if (bastmode) - lkb->lkb_bastmode = bastmode; + if (type == AST_BAST) + lkb->lkb_bastmode = mode; + else + lkb->lkb_castmode = mode; if (!ast_type) { kref_get(&lkb->lkb_ref); diff --git a/fs/dlm/user.h b/fs/dlm/user.h index 1c96864..f196091 100644 --- a/fs/dlm/user.h +++ b/fs/dlm/user.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -9,7 +9,7 @@ #ifndef __USER_DOT_H__ #define __USER_DOT_H__ -void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode); +void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode); int dlm_user_init(void); void dlm_user_exit(void); int dlm_device_deregister(struct dlm_ls *ls); -- cgit v1.1 From 7dc52157982ab771f40e3c0b7dc55b954c3c2d19 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:52 -0800 Subject: vfs: Apply lockdep-based checking to rcu_dereference() uses Add lockdep-ified RCU primitives to alloc_fd(), files_fdtable() and fcheck_files(). Cc: Alexander Viro Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: Alexander Viro LKML-Reference: <1266887105-1528-8-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- fs/file.c | 2 +- fs/proc/array.c | 2 ++ fs/proc/base.c | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/file.c b/fs/file.c index 87e1290..38039af 100644 --- a/fs/file.c +++ b/fs/file.c @@ -478,7 +478,7 @@ repeat: error = fd; #if 1 /* Sanity check */ - if (rcu_dereference(fdt->fd[fd]) != NULL) { + if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } diff --git a/fs/proc/array.c b/fs/proc/array.c index 13b5d07..18e20fe 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -270,7 +270,9 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); + rcu_read_lock(); /* FIXME: is this correct? */ qsize = atomic_read(&__task_cred(p)->user->sigpending); + rcu_read_unlock(); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 58324c2..623e2ff 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1095,8 +1095,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, if (!capable(CAP_AUDIT_CONTROL)) return -EPERM; - if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) + rcu_read_lock(); + if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); if (count >= PAGE_SIZE) count = PAGE_SIZE - 1; -- cgit v1.1 From cf6620acc0f6fac57968aafef79ab372bdcf6157 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 24 Feb 2010 11:59:23 -0600 Subject: dlm: send reply before bast When the lock master processes a successful operation (request, convert, cancel, or unlock), it will process the effects of the change before sending the reply for the operation. The "effects" of the operation are: - blocking callbacks (basts) for any newly granted locks - waiting or converting locks that can now be granted The cast is queued on the local node when the reply from the lock master is received. This means that a lock holder can receive a bast for a lock mode that is doesn't yet know has been granted. Signed-off-by: David Teigland --- fs/dlm/lock.c | 110 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index e08ea93..d0e43a3 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -2280,20 +2280,30 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb) if (can_be_queued(lkb)) { error = -EINPROGRESS; add_lkb(r, lkb, DLM_LKSTS_WAITING); - send_blocking_asts(r, lkb); add_timeout(lkb); goto out; } error = -EAGAIN; - if (force_blocking_asts(lkb)) - send_blocking_asts_all(r, lkb); queue_cast(r, lkb, -EAGAIN); - out: return error; } +static void do_request_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, + int error) +{ + switch (error) { + case -EAGAIN: + if (force_blocking_asts(lkb)) + send_blocking_asts_all(r, lkb); + break; + case -EINPROGRESS: + send_blocking_asts(r, lkb); + break; + } +} + static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error = 0; @@ -2304,7 +2314,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) if (can_be_granted(r, lkb, 1, &deadlk)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); - grant_pending_locks(r); goto out; } @@ -2334,7 +2343,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) if (_can_be_granted(r, lkb, 1)) { grant_lock(r, lkb); queue_cast(r, lkb, 0); - grant_pending_locks(r); goto out; } /* else fall through and move to convert queue */ @@ -2344,28 +2352,47 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) error = -EINPROGRESS; del_lkb(r, lkb); add_lkb(r, lkb, DLM_LKSTS_CONVERT); - send_blocking_asts(r, lkb); add_timeout(lkb); goto out; } error = -EAGAIN; - if (force_blocking_asts(lkb)) - send_blocking_asts_all(r, lkb); queue_cast(r, lkb, -EAGAIN); - out: return error; } +static void do_convert_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, + int error) +{ + switch (error) { + case 0: + grant_pending_locks(r); + /* grant_pending_locks also sends basts */ + break; + case -EAGAIN: + if (force_blocking_asts(lkb)) + send_blocking_asts_all(r, lkb); + break; + case -EINPROGRESS: + send_blocking_asts(r, lkb); + break; + } +} + static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb) { remove_lock(r, lkb); queue_cast(r, lkb, -DLM_EUNLOCK); - grant_pending_locks(r); return -DLM_EUNLOCK; } +static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, + int error) +{ + grant_pending_locks(r); +} + /* returns: 0 did nothing, -DLM_ECANCEL canceled lock */ static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) @@ -2375,12 +2402,18 @@ static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb) error = revert_lock(r, lkb); if (error) { queue_cast(r, lkb, -DLM_ECANCEL); - grant_pending_locks(r); return -DLM_ECANCEL; } return 0; } +static void do_cancel_effects(struct dlm_rsb *r, struct dlm_lkb *lkb, + int error) +{ + if (error) + grant_pending_locks(r); +} + /* * Four stage 3 varieties: * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock() @@ -2402,11 +2435,15 @@ static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) goto out; } - if (is_remote(r)) + if (is_remote(r)) { /* receive_request() calls do_request() on remote node */ error = send_request(r, lkb); - else + } else { error = do_request(r, lkb); + /* for remote locks the request_reply is sent + between do_request and do_request_effects */ + do_request_effects(r, lkb, error); + } out: return error; } @@ -2417,11 +2454,15 @@ static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error; - if (is_remote(r)) + if (is_remote(r)) { /* receive_convert() calls do_convert() on remote node */ error = send_convert(r, lkb); - else + } else { error = do_convert(r, lkb); + /* for remote locks the convert_reply is sent + between do_convert and do_convert_effects */ + do_convert_effects(r, lkb, error); + } return error; } @@ -2432,11 +2473,15 @@ static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error; - if (is_remote(r)) + if (is_remote(r)) { /* receive_unlock() calls do_unlock() on remote node */ error = send_unlock(r, lkb); - else + } else { error = do_unlock(r, lkb); + /* for remote locks the unlock_reply is sent + between do_unlock and do_unlock_effects */ + do_unlock_effects(r, lkb, error); + } return error; } @@ -2447,11 +2492,15 @@ static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb) { int error; - if (is_remote(r)) + if (is_remote(r)) { /* receive_cancel() calls do_cancel() on remote node */ error = send_cancel(r, lkb); - else + } else { error = do_cancel(r, lkb); + /* for remote locks the cancel_reply is sent + between do_cancel and do_cancel_effects */ + do_cancel_effects(r, lkb, error); + } return error; } @@ -3191,6 +3240,7 @@ static void receive_request(struct dlm_ls *ls, struct dlm_message *ms) attach_lkb(r, lkb); error = do_request(r, lkb); send_request_reply(r, lkb, error); + do_request_effects(r, lkb, error); unlock_rsb(r); put_rsb(r); @@ -3226,15 +3276,19 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms) goto out; receive_flags(lkb, ms); + error = receive_convert_args(ls, lkb, ms); - if (error) - goto out_reply; + if (error) { + send_convert_reply(r, lkb, error); + goto out; + } + reply = !down_conversion(lkb); error = do_convert(r, lkb); - out_reply: if (reply) send_convert_reply(r, lkb, error); + do_convert_effects(r, lkb, error); out: unlock_rsb(r); put_rsb(r); @@ -3266,13 +3320,16 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms) goto out; receive_flags(lkb, ms); + error = receive_unlock_args(ls, lkb, ms); - if (error) - goto out_reply; + if (error) { + send_unlock_reply(r, lkb, error); + goto out; + } error = do_unlock(r, lkb); - out_reply: send_unlock_reply(r, lkb, error); + do_unlock_effects(r, lkb, error); out: unlock_rsb(r); put_rsb(r); @@ -3307,6 +3364,7 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms) error = do_cancel(r, lkb); send_cancel_reply(r, lkb, error); + do_cancel_effects(r, lkb, error); out: unlock_rsb(r); put_rsb(r); -- cgit v1.1 From b4a5d4bc377e49239374f266f0a0e2772c29749c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 17 Feb 2010 09:41:34 +0000 Subject: dlm: Send lockspace name with uevents Although it is possible to get this information from the path, its much easier to provide the lockspace as a seperate env variable. Signed-off-by: Steven Whitehouse Signed-off-by: David Teigland --- fs/dlm/lockspace.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index c010ecf..26a8bd4 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -191,6 +191,18 @@ static int do_uevent(struct dlm_ls *ls, int in) return error; } +static int dlm_uevent(struct kset *kset, struct kobject *kobj, + struct kobj_uevent_env *env) +{ + struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj); + + add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name); + return 0; +} + +static struct kset_uevent_ops dlm_uevent_ops = { + .uevent = dlm_uevent, +}; int __init dlm_lockspace_init(void) { @@ -199,7 +211,7 @@ int __init dlm_lockspace_init(void) INIT_LIST_HEAD(&lslist); spin_lock_init(&lslist_lock); - dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); + dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj); if (!dlm_kset) { printk(KERN_WARNING "%s: can not create kset\n", __func__); return -ENOMEM; -- cgit v1.1 From b6fa8796b2da0390e9f4115e8789a01004fc1c9b Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 25 Feb 2010 12:20:57 -0600 Subject: dlm: use bastmode in debugfs output The bast mode that appears in the debugfs output should be useful on both master and process nodes. lkb_highbast is currently printed, and is only useful on the master node. lkb_bastmode is only useful on the process node. This patch sets lkb_bastmode on the master node as well, and uses that value in the debugfs print. Signed-off-by: David Teigland --- fs/dlm/debug_fs.c | 2 +- fs/dlm/lock.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 375a235..29d6139 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -256,7 +256,7 @@ static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb, lkb->lkb_status, lkb->lkb_grmode, lkb->lkb_rqmode, - lkb->lkb_highbast, + lkb->lkb_bastmode, rsb_lookup, lkb->lkb_wait_type, lkb->lkb_lvbseq, diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index d0e43a3..46ffd3e 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -320,10 +320,12 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) { lkb->lkb_time_bast = ktime_get(); - if (is_master_copy(lkb)) + if (is_master_copy(lkb)) { + lkb->lkb_bastmode = rqmode; /* printed by debugfs */ send_bast(r, lkb, rqmode); - else + } else { dlm_add_ast(lkb, AST_BAST, rqmode); + } } /* -- cgit v1.1 From 4912002fffa377e66c5caefc2c311732a4ad5fb8 Mon Sep 17 00:00:00 2001 From: Christian Kujau Date: Fri, 26 Feb 2010 17:25:14 +0000 Subject: Remove EXPERIMENTAL from NFS_FSCACHE There's currently an open Ubuntu bug[0], with the intent to compile NFS_FSCACHE (and possibly AFS_FSCACHE, 9P_FSCACHE) into the standard Ubuntu kernel. However, since *_FSCACHE still depends on EXPERIMENTAL, this won't happen. As Arjan van de Ven pointed out[1], the EXPERIMENTAL flag doesn't mean that much any more, I propose the following patch to fs/nfs/Kconfig. I'd do the same for fs/9p/Kconfig and fs/afs/Kconfig, but as I did not test 9p or AFS, I feel it would not be appropriate for me to remove the flag. [0] https://bugs.launchpad.net/ubuntu/+source/linux/+bug/440522/comments/5 [1] http://lkml.org/lkml/2010/1/23/145 Signed-off-by: Christian Kujau Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/nfs/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 59e5673..a43d07e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -95,8 +95,7 @@ config ROOT_NFS Most people say N here. config NFS_FSCACHE - bool "Provide NFS client caching support (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Provide NFS client caching support" depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y help Say Y here if you want NFS data to be cached locally on disc through -- cgit v1.1