aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c34
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c54
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c48
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/quota/xfs_qm.c3
-rw-r--r--fs/xfs/xfs_attr.c14
-rw-r--r--fs/xfs/xfs_attr_leaf.c64
-rw-r--r--fs/xfs/xfs_bmap.c10
-rw-r--r--fs/xfs/xfs_buf_item.c1
-rw-r--r--fs/xfs/xfs_iget.c18
-rw-r--r--fs/xfs/xfs_inode.c29
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_log_recover.c33
-rw-r--r--fs/xfs/xfs_mount.c29
-rw-r--r--fs/xfs/xfs_vnodeops.c15
20 files changed, 219 insertions, 169 deletions
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 39f4f80..f86e034 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -39,9 +39,11 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
struct xfs_acl_entry *ace;
- int count, i;
+ unsigned int count, i;
count = be32_to_cpu(aclp->acl_cnt);
+ if (count > XFS_ACL_MAX_ENTRIES)
+ return ERR_PTR(-EFSCORRUPTED);
acl = posix_acl_alloc(count, GFP_KERNEL);
if (!acl)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 50a7d5f..36d6ee4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -346,7 +346,6 @@ extern struct list_head *xfs_get_buftarg_list(void);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
-#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 244e797..572494f 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -68,7 +68,7 @@ xfs_trim_extents(
* Look up the longest btree in the AGF and start with it.
*/
error = xfs_alloc_lookup_le(cur, 0,
- XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+ be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
if (error)
goto out_del_cursor;
@@ -84,7 +84,7 @@ xfs_trim_extents(
if (error)
goto out_del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
- ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+ ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
/*
* Too small? Give up.
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index f4f878f..fed3f3c 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -98,22 +98,22 @@ xfs_fs_encode_fh(
switch (fileid_type) {
case FILEID_INO32_GEN_PARENT:
spin_lock(&dentry->d_lock);
- fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
+ fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
spin_unlock(&dentry->d_lock);
/*FALLTHRU*/
case FILEID_INO32_GEN:
- fid->i32.ino = inode->i_ino;
+ fid->i32.ino = XFS_I(inode)->i_ino;
fid->i32.gen = inode->i_generation;
break;
case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
spin_lock(&dentry->d_lock);
- fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
+ fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
spin_unlock(&dentry->d_lock);
/*FALLTHRU*/
case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
- fid64->ino = inode->i_ino;
+ fid64->ino = XFS_I(inode)->i_ino;
fid64->gen = inode->i_generation;
break;
}
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 7f782af..b679198 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -309,7 +309,19 @@ xfs_file_aio_read(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if (unlikely(ioflags & IO_ISDIRECT)) {
+ /*
+ * Locking is a bit tricky here. If we take an exclusive lock
+ * for direct IO, we effectively serialise all new concurrent
+ * read IO to this file and block it behind IO that is currently in
+ * progress because IO in progress holds the IO lock shared. We only
+ * need to hold the lock exclusive to blow away the page cache, so
+ * only take lock exclusively if the page cache needs invalidation.
+ * This allows the normal direct IO case of no page cache pages to
+ * proceeed concurrently without serialisation.
+ */
+ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
+ xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
@@ -322,8 +334,7 @@ xfs_file_aio_read(
}
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- } else
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+ }
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
@@ -658,6 +669,7 @@ xfs_file_aio_write_checks(
xfs_fsize_t new_size;
int error = 0;
+ xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
if (error) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
@@ -749,14 +761,24 @@ xfs_file_dio_aio_write(
*iolock = XFS_IOLOCK_EXCL;
else
*iolock = XFS_IOLOCK_SHARED;
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+ xfs_rw_ilock(ip, *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
if (ret)
return ret;
+ /*
+ * Recheck if there are cached pages that need invalidate after we got
+ * the iolock to protect against other threads adding new pages while
+ * we were waiting for the iolock.
+ */
+ if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) {
+ xfs_rw_iunlock(ip, *iolock);
+ *iolock = XFS_IOLOCK_EXCL;
+ xfs_rw_ilock(ip, *iolock);
+ }
+
if (mapping->nrpages) {
- WARN_ON(*iolock != XFS_IOLOCK_EXCL);
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED);
if (ret)
@@ -801,7 +823,7 @@ xfs_file_buffered_aio_write(
size_t count = ocount;
*iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+ xfs_rw_ilock(ip, *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
if (ret)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d44d92c..f5b697b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -69,9 +69,8 @@ xfs_synchronize_times(
}
/*
- * If the linux inode is valid, mark it dirty.
- * Used when committing a dirty inode into a transaction so that
- * the inode will get written back by the linux code
+ * If the linux inode is valid, mark it dirty, else mark the dirty state
+ * in the XFS inode to make sure we pick it up when reclaiming the inode.
*/
void
xfs_mark_inode_dirty_sync(
@@ -81,6 +80,10 @@ xfs_mark_inode_dirty_sync(
if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
mark_inode_dirty_sync(inode);
+ else {
+ barrier();
+ ip->i_update_core = 1;
+ }
}
void
@@ -91,6 +94,11 @@ xfs_mark_inode_dirty(
if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
mark_inode_dirty(inode);
+ else {
+ barrier();
+ ip->i_update_core = 1;
+ }
+
}
/*
@@ -456,7 +464,7 @@ xfs_vn_getattr(
trace_xfs_getattr(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -XFS_ERROR(EIO);
stat->size = XFS_ISIZE(ip);
stat->dev = inode->i_sb->s_dev;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 347cae9..e6ac98c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -871,43 +871,6 @@ xfs_fs_dirty_inode(
}
STATIC int
-xfs_log_inode(
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-
- if (error) {
- xfs_trans_cancel(tp, 0);
- /* we need to return with the lock hold shared */
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- /*
- * Note - it's possible that we might have pushed ourselves out of the
- * way during trans_reserve which would flush the inode. But there's
- * no guarantee that the inode buffer has actually gone out yet (it's
- * delwri). Plus the buffer could be pinned anyway if it's part of
- * an inode in another recent transaction. So we play it safe and
- * fire off the transaction anyway.
- */
- xfs_trans_ijoin(tp, ip);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp, 0);
- xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
-
- return error;
-}
-
-STATIC int
xfs_fs_write_inode(
struct inode *inode,
struct writeback_control *wbc)
@@ -919,9 +882,9 @@ xfs_fs_write_inode(
trace_xfs_write_inode(ip);
if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
+ return -XFS_ERROR(EIO);
- if (wbc->sync_mode == WB_SYNC_ALL) {
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
/*
* Make sure the inode has made it it into the log. Instead
* of forcing it all the way to stable storage using a
@@ -930,13 +893,14 @@ xfs_fs_write_inode(
* of synchronous log foces dramatically.
*/
xfs_ioend_wait(ip);
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- if (ip->i_update_core) {
- error = xfs_log_inode(ip);
- if (error)
- goto out_unlock;
- }
+ error = xfs_log_dirty_inode(ip, NULL, 0);
+ if (error)
+ goto out;
+ return 0;
} else {
+ if (!ip->i_update_core)
+ return 0;
+
/*
* We make this non-blocking if the inode is contended, return
* EAGAIN to indicate to the caller that they did not succeed.
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 8ecad5f..2f277a0 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -336,6 +336,32 @@ xfs_sync_fsdata(
return xfs_bwrite(mp, bp);
}
+int
+xfs_log_dirty_inode(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ if (!ip->i_update_core)
+ return 0;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return xfs_trans_commit(tp, 0);
+}
+
/*
* When remounting a filesystem read-only or freezing the filesystem, we have
* two phases to execute. This first phase is syncing the data before we
@@ -365,6 +391,17 @@ xfs_quiesce_data(
/* push and block till complete */
xfs_sync_data(mp, SYNC_WAIT);
+
+ /*
+ * Log all pending size and timestamp updates. The vfs writeback
+ * code is supposed to do this, but due to its overagressive
+ * livelock detection it will skip inodes where appending writes
+ * were written out in the first non-blocking sync phase if their
+ * completion took long enough that it happened after taking the
+ * timestamp for the cut-off in the blocking phase.
+ */
+ xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
+
xfs_qm_sync(mp, SYNC_WAIT);
/* write superblock and hoover up shutdown errors */
@@ -772,6 +809,17 @@ restart:
if (!xfs_iflock_nowait(ip)) {
if (!(sync_mode & SYNC_WAIT))
goto out;
+
+ /*
+ * If we only have a single dirty inode in a cluster there is
+ * a fair chance that the AIL push may have pushed it into
+ * the buffer, but xfsbufd won't touch it until 30 seconds
+ * from now, and thus we will lock up here.
+ *
+ * Promote the inode buffer to the front of the delwri list
+ * and wake up xfsbufd now.
+ */
+ xfs_promote_inode(ip);
xfs_iflock(ip);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e3a6ad2..ef5b2ce 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -42,6 +42,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
+int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
+
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index b94dace..e70c7fc 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -714,7 +714,8 @@ xfs_qm_dqattach_one(
* disk and we didn't ask it to allocate;
* ESRCH if quotas got turned off suddenly.
*/
- error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+ error = xfs_qm_dqget(ip->i_mount, ip, id, type,
+ doalloc | XFS_QMOPT_DOWARN, &dqp);
if (error)
return error;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01d2072..99d4011 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -822,17 +822,9 @@ xfs_attr_inactive(xfs_inode_t *dp)
error = xfs_attr_root_inactive(&trans, dp);
if (error)
goto out;
- /*
- * signal synchronous inactive transactions unless this
- * is a synchronous mount filesystem in which case we
- * know that we're here because we've been called out of
- * xfs_inactive which means that the last reference is gone
- * and the unlink transaction has already hit the disk so
- * async inactive transactions are safe.
- */
- if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
- (!(mp->m_flags & XFS_MOUNT_WSYNC)
- ? 1 : 0))))
+
+ error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, 0);
+ if (error)
goto out;
/*
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 71e90dc2..f49ecf2 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -110,6 +110,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
/*
* Query whether the requested number of additional bytes of extended
* attribute space will be able to fit inline.
+ *
* Returns zero if not, else the di_forkoff fork offset to be used in the
* literal area for attribute data once the new bytes have been added.
*
@@ -122,7 +123,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
int offset;
int minforkoff; /* lower limit on valid forkoff locations */
int maxforkoff; /* upper limit on valid forkoff locations */
- int dsize;
+ int dsize;
xfs_mount_t *mp = dp->i_mount;
offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
@@ -136,47 +137,60 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
return (offset >= minforkoff) ? minforkoff : 0;
}
- if (!(mp->m_flags & XFS_MOUNT_ATTR2)) {
- if (bytes <= XFS_IFORK_ASIZE(dp))
- return dp->i_d.di_forkoff;
+ /*
+ * If the requested numbers of bytes is smaller or equal to the
+ * current attribute fork size we can always proceed.
+ *
+ * Note that if_bytes in the data fork might actually be larger than
+ * the current data fork size is due to delalloc extents. In that
+ * case either the extent count will go down when they are converted
+ * to real extents, or the delalloc conversion will take care of the
+ * literal area rebalancing.
+ */
+ if (bytes <= XFS_IFORK_ASIZE(dp))
+ return dp->i_d.di_forkoff;
+
+ /*
+ * For attr2 we can try to move the forkoff if there is space in the
+ * literal area, but for the old format we are done if there is no
+ * space in the fixed attribute fork.
+ */
+ if (!(mp->m_flags & XFS_MOUNT_ATTR2))
return 0;
- }
dsize = dp->i_df.if_bytes;
-
+
switch (dp->i_d.di_format) {
case XFS_DINODE_FMT_EXTENTS:
- /*
+ /*
* If there is no attr fork and the data fork is extents,
- * determine if creating the default attr fork will result
- * in the extents form migrating to btree. If so, the
- * minimum offset only needs to be the space required for
+ * determine if creating the default attr fork will result
+ * in the extents form migrating to btree. If so, the
+ * minimum offset only needs to be the space required for
* the btree root.
- */
+ */
if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
xfs_default_attroffset(dp))
dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
break;
-
case XFS_DINODE_FMT_BTREE:
/*
- * If have data btree then keep forkoff if we have one,
- * otherwise we are adding a new attr, so then we set
- * minforkoff to where the btree root can finish so we have
+ * If we have a data btree then keep forkoff if we have one,
+ * otherwise we are adding a new attr, so then we set
+ * minforkoff to where the btree root can finish so we have
* plenty of room for attrs
*/
if (dp->i_d.di_forkoff) {
- if (offset < dp->i_d.di_forkoff)
+ if (offset < dp->i_d.di_forkoff)
return 0;
- else
- return dp->i_d.di_forkoff;
- } else
- dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
+ return dp->i_d.di_forkoff;
+ }
+ dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
break;
}
-
- /*
- * A data fork btree root must have space for at least
+
+ /*
+ * A data fork btree root must have space for at least
* MINDBTPTRS key/ptr pairs if the data fork is small or empty.
*/
minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
@@ -186,10 +200,10 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
maxforkoff = maxforkoff >> 3; /* rounded down */
- if (offset >= minforkoff && offset < maxforkoff)
- return offset;
if (offset >= maxforkoff)
return maxforkoff;
+ if (offset >= minforkoff)
+ return offset;
return 0;
}
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e546a33..a175933 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3785,19 +3785,11 @@ xfs_bmap_compute_maxlevels(
* Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
* caller. Frees all the extents that need freeing, which must be done
* last due to locking considerations. We never free any extents in
- * the first transaction. This is to allow the caller to make the first
- * transaction a synchronous one so that the pointers to the data being
- * broken in this transaction will be permanent before the data is actually
- * freed. This is necessary to prevent blocks from being reallocated
- * and written to before the free and reallocation are actually permanent.
- * We do not just make the first transaction synchronous here, because
- * there are more efficient ways to gain the same protection in some cases
- * (see the file truncation code).
+ * the first transaction.
*
* Return 1 if the given transaction was committed and a new one
* started, and 0 otherwise in the committed parameter.
*/
-/*ARGSUSED*/
int /* error */
xfs_bmap_finish(
xfs_trans_t **tp, /* transaction pointer addr */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a7342e8..7888a75 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1023,7 +1023,6 @@ xfs_buf_iodone_callbacks(
XFS_BUF_UNDELAYWRITE(bp);
trace_xfs_buf_error_relse(bp, _RET_IP_);
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
do_callbacks:
xfs_buf_do_callbacks(bp);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3631783..ca752f0 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -356,9 +356,20 @@ xfs_iget_cache_miss(
BUG();
}
- spin_lock(&pag->pag_ici_lock);
+ /*
+ * These values must be set before inserting the inode into the radix
+ * tree as the moment it is inserted a concurrent lookup (allowed by the
+ * RCU locking mechanism) can find it and that lookup must see that this
+ * is an inode currently under construction (i.e. that XFS_INEW is set).
+ * The ip->i_flags_lock that protects the XFS_INEW flag forms the
+ * memory barrier that ensures this detection works correctly at lookup
+ * time.
+ */
+ ip->i_udquot = ip->i_gdquot = NULL;
+ xfs_iflags_set(ip, XFS_INEW);
/* insert the new inode */
+ spin_lock(&pag->pag_ici_lock);
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
if (unlikely(error)) {
WARN_ON(error != -EEXIST);
@@ -366,11 +377,6 @@ xfs_iget_cache_miss(
error = EAGAIN;
goto out_preload_end;
}
-
- /* These values _must_ be set before releasing the radix tree lock! */
- ip->i_udquot = ip->i_gdquot = NULL;
- xfs_iflags_set(ip, XFS_INEW);
-
spin_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a098a20..5715279 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1528,15 +1528,7 @@ xfs_itruncate_finish(
xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
}
}
- } else if (sync) {
- ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
- if (ip->i_d.di_anextents > 0)
- xfs_trans_set_sync(ntp);
}
- ASSERT(fork == XFS_DATA_FORK ||
- (fork == XFS_ATTR_FORK &&
- ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
- (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
/*
* Since it is possible for space to become allocated beyond
@@ -3099,6 +3091,27 @@ corrupt_out:
return XFS_ERROR(EFSCORRUPTED);
}
+void
+xfs_promote_inode(
+ struct xfs_inode *ip)
+{
+ struct xfs_buf *bp;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+
+ bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
+ ip->i_imap.im_len, XBF_TRYLOCK);
+ if (!bp)
+ return;
+
+ if (XFS_BUF_ISDELAYWRITE(bp)) {
+ xfs_buf_delwri_promote(bp);
+ wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
+ }
+
+ xfs_buf_relse(bp);
+}
+
/*
* Return a pointer to the extent record at file index idx.
*/
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 964cfea..28b3596 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -509,6 +509,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_iunpin_wait(xfs_inode_t *);
int xfs_iflush(xfs_inode_t *, uint);
+void xfs_promote_inode(struct xfs_inode *);
void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 04142ca..b75fd67 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3159,37 +3159,26 @@ xlog_recover_process_iunlinks(
*/
continue;
}
+ /*
+ * Unlock the buffer so that it can be acquired in the normal
+ * course of the transaction to truncate and free each inode.
+ * Because we are not racing with anyone else here for the AGI
+ * buffer, we don't even need to hold it locked to read the
+ * initial unlinked bucket entries out of the buffer. We keep
+ * buffer reference though, so that it stays pinned in memory
+ * while we need the buffer.
+ */
agi = XFS_BUF_TO_AGI(agibp);
+ xfs_buf_unlock(agibp);
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
- /*
- * Release the agi buffer so that it can
- * be acquired in the normal course of the
- * transaction to truncate and free the inode.
- */
- xfs_buf_relse(agibp);
-
agino = xlog_recover_process_one_iunlink(mp,
agno, agino, bucket);
-
- /*
- * Reacquire the agibuffer and continue around
- * the loop. This should never fail as we know
- * the buffer was good earlier on.
- */
- error = xfs_read_agi(mp, NULL, agno, &agibp);
- ASSERT(error == 0);
- agi = XFS_BUF_TO_AGI(agibp);
}
}
-
- /*
- * Release the buffer for the current agi so we can
- * go on to the next one.
- */
- xfs_buf_relse(agibp);
+ xfs_buf_rele(agibp);
}
mp->m_dmevmask = mp_dmevmask;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b49b823..9afdd49 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,9 +44,6 @@
#include "xfs_trace.h"
-STATIC void xfs_unmountfs_wait(xfs_mount_t *);
-
-
#ifdef HAVE_PERCPU_SB
STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
int);
@@ -1507,11 +1504,6 @@ xfs_unmountfs(
*/
xfs_log_force(mp, XFS_LOG_SYNC);
- xfs_binval(mp->m_ddev_targp);
- if (mp->m_rtdev_targp) {
- xfs_binval(mp->m_rtdev_targp);
- }
-
/*
* Unreserve any blocks we have so that when we unmount we don't account
* the reserved free space as used. This is really only necessary for
@@ -1537,7 +1529,16 @@ xfs_unmountfs(
xfs_warn(mp, "Unable to update superblock counters. "
"Freespace may not be correct on next mount.");
xfs_unmountfs_writesb(mp);
- xfs_unmountfs_wait(mp); /* wait for async bufs */
+
+ /*
+ * Make sure all buffers have been flushed and completed before
+ * unmounting the log.
+ */
+ error = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+ if (error)
+ xfs_warn(mp, "%d busy buffers during unmount.", error);
+ xfs_wait_buftarg(mp->m_ddev_targp);
+
xfs_log_unmount_write(mp);
xfs_log_unmount(mp);
xfs_uuid_unmount(mp);
@@ -1548,16 +1549,6 @@ xfs_unmountfs(
xfs_free_perag(mp);
}
-STATIC void
-xfs_unmountfs_wait(xfs_mount_t *mp)
-{
- if (mp->m_logdev_targp != mp->m_ddev_targp)
- xfs_wait_buftarg(mp->m_logdev_targp);
- if (mp->m_rtdev_targp)
- xfs_wait_buftarg(mp->m_rtdev_targp);
- xfs_wait_buftarg(mp->m_ddev_targp);
-}
-
int
xfs_fs_writable(xfs_mount_t *mp)
{
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6197207..59509ae 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -535,7 +535,7 @@ xfs_readlink(
char *link)
{
xfs_mount_t *mp = ip->i_mount;
- int pathlen;
+ xfs_fsize_t pathlen;
int error = 0;
trace_xfs_readlink(ip);
@@ -545,13 +545,20 @@ xfs_readlink(
xfs_ilock(ip, XFS_ILOCK_SHARED);
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
- ASSERT(ip->i_d.di_size <= MAXPATHLEN);
-
pathlen = ip->i_d.di_size;
if (!pathlen)
goto out;
+ if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
+ __func__, (unsigned long long) ip->i_ino,
+ (long long) pathlen);
+ ASSERT(0);
+ error = XFS_ERROR(EFSCORRUPTED);
+ goto out;
+ }
+
+
if (ip->i_df.if_flags & XFS_IFINLINE) {
memcpy(link, ip->i_df.if_u1.if_data, pathlen);
link[pathlen] = '\0';