diff options
Diffstat (limited to 'fs/jfs')
-rw-r--r-- | fs/jfs/inode.c | 35 | ||||
-rw-r--r-- | fs/jfs/jfs_dmap.c | 12 | ||||
-rw-r--r-- | fs/jfs/jfs_dtree.c | 6 | ||||
-rw-r--r-- | fs/jfs/jfs_imap.c | 84 | ||||
-rw-r--r-- | fs/jfs/jfs_incore.h | 1 | ||||
-rw-r--r-- | fs/jfs/jfs_logmgr.c | 150 | ||||
-rw-r--r-- | fs/jfs/jfs_logmgr.h | 9 | ||||
-rw-r--r-- | fs/jfs/jfs_metapage.c | 908 | ||||
-rw-r--r-- | fs/jfs/jfs_metapage.h | 80 | ||||
-rw-r--r-- | fs/jfs/jfs_mount.c | 5 | ||||
-rw-r--r-- | fs/jfs/jfs_txnmgr.c | 166 | ||||
-rw-r--r-- | fs/jfs/jfs_umount.c | 16 | ||||
-rw-r--r-- | fs/jfs/jfs_xtree.c | 61 | ||||
-rw-r--r-- | fs/jfs/resize.c | 3 | ||||
-rw-r--r-- | fs/jfs/super.c | 37 |
15 files changed, 993 insertions, 580 deletions
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 7bc9066..24a6891 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -175,31 +175,22 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, { s64 lblock64 = lblock; int rc = 0; - int take_locks; xad_t xad; s64 xaddr; int xflag; - s32 xlen; - - /* - * If this is a special inode (imap, dmap) - * the lock should already be taken - */ - take_locks = (JFS_IP(ip)->fileset != AGGREGATE_I); + s32 xlen = max_blocks; /* * Take appropriate lock on inode */ - if (take_locks) { - if (create) - IWRITE_LOCK(ip); - else - IREAD_LOCK(ip); - } + if (create) + IWRITE_LOCK(ip); + else + IREAD_LOCK(ip); if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && - (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0) - == 0) && xlen) { + (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && + xaddr) { if (xflag & XAD_NOTRECORDED) { if (!create) /* @@ -238,7 +229,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, #ifdef _JFS_4K if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad))) goto unlock; - rc = extAlloc(ip, max_blocks, lblock64, &xad, FALSE); + rc = extAlloc(ip, xlen, lblock64, &xad, FALSE); if (rc) goto unlock; @@ -258,12 +249,10 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, /* * Release lock on inode */ - if (take_locks) { - if (create) - IWRITE_UNLOCK(ip); - else - IREAD_UNLOCK(ip); - } + if (create) + IWRITE_UNLOCK(ip); + else + IREAD_UNLOCK(ip); return rc; } diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index d86e467..69007fd 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -471,6 +471,7 @@ dbUpdatePMap(struct inode *ipbmap, struct metapage *mp; struct jfs_log *log; int lsn, difft, diffp; + unsigned long flags; /* the blocks better be within the mapsize. */ if (blkno + nblocks > bmp->db_mapsize) { @@ -504,6 +505,7 @@ dbUpdatePMap(struct inode *ipbmap, 0); if (mp == NULL) return -EIO; + metapage_wait_for_io(mp); } dp = (struct dmap *) mp->data; @@ -578,34 +580,32 @@ dbUpdatePMap(struct inode *ipbmap, if (mp->lsn != 0) { /* inherit older/smaller lsn */ logdiff(diffp, mp->lsn, log); + LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; /* move bp after tblock in logsync list */ - LOGSYNC_LOCK(log); list_move(&mp->synclist, &tblk->synclist); - LOGSYNC_UNLOCK(log); } /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert bp after tblock in logsync list */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } } diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index e357890..ac41f72 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -212,7 +212,7 @@ static struct metapage *read_index_page(struct inode *inode, s64 blkno) s32 xlen; rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); - if (rc || (xlen == 0)) + if (rc || (xaddr == 0)) return NULL; return read_metapage(inode, xaddr, PSIZE, 1); @@ -231,7 +231,7 @@ static struct metapage *get_index_page(struct inode *inode, s64 blkno) s32 xlen; rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); - if (rc || (xlen == 0)) + if (rc || (xaddr == 0)) return NULL; return get_metapage(inode, xaddr, PSIZE, 1); @@ -3181,7 +3181,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) d = (struct ldtentry *) & p->slot[stbl[i]]; if (((long) jfs_dirent + d->namlen + 1) > - (dirent_buf + PSIZE)) { + (dirent_buf + PAGE_SIZE)) { /* DBCS codepages could overrun dirent_buf */ index = i; overflow = 1; diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 7838313..7acff2c 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -502,7 +502,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) } - ip->i_mapping->a_ops = &jfs_aops; + ip->i_mapping->a_ops = &jfs_metapage_aops; mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); /* Allocations to metadata inodes should not affect quotas */ @@ -2573,9 +2573,18 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) goto out; } - /* assign a buffer for the page */ - mp = get_metapage(ipimap, xaddr, PSIZE, 1); - if (!mp) { + /* + * start transaction of update of the inode map + * addressing structure pointing to the new iag page; + */ + tid = txBegin(sb, COMMIT_FORCE); + down(&JFS_IP(ipimap)->commit_sem); + + /* update the inode map addressing structure to point to it */ + if ((rc = + xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { + txEnd(tid); + up(&JFS_IP(ipimap)->commit_sem); /* Free the blocks allocated for the iag since it was * not successfully added to the inode map */ @@ -2584,6 +2593,29 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) /* release the inode map lock */ IWRITE_UNLOCK(ipimap); + goto out; + } + + /* update the inode map's inode to reflect the extension */ + ipimap->i_size += PSIZE; + inode_add_bytes(ipimap, PSIZE); + + /* assign a buffer for the page */ + mp = get_metapage(ipimap, blkno, PSIZE, 0); + if (!mp) { + /* + * This is very unlikely since we just created the + * extent, but let's try to handle it correctly + */ + xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, + COMMIT_PWMAP); + + txAbort(tid, 0); + txEnd(tid); + + /* release the inode map lock */ + IWRITE_UNLOCK(ipimap); + rc = -EIO; goto out; } @@ -2605,41 +2637,11 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) iagp->inosmap[i] = cpu_to_le32(ONES); /* - * Invalidate the page after writing and syncing it. - * After it's initialized, we access it in a different - * address space + * Write and sync the metapage */ - set_bit(META_discard, &mp->flag); flush_metapage(mp); /* - * start tyransaction of update of the inode map - * addressing structure pointing to the new iag page; - */ - tid = txBegin(sb, COMMIT_FORCE); - down(&JFS_IP(ipimap)->commit_sem); - - /* update the inode map addressing structure to point to it */ - if ((rc = - xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { - txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); - /* Free the blocks allocated for the iag since it was - * not successfully added to the inode map - */ - dbFree(ipimap, xaddr, (s64) xlen); - - /* release the inode map lock */ - IWRITE_UNLOCK(ipimap); - - goto out; - } - - /* update the inode map's inode to reflect the extension */ - ipimap->i_size += PSIZE; - inode_add_bytes(ipimap, PSIZE); - - /* * txCommit(COMMIT_FORCE) will synchronously write address * index pages and inode after commit in careful update order * of address index pages (right to left, bottom up); @@ -2789,6 +2791,7 @@ diUpdatePMap(struct inode *ipimap, u32 mask; struct jfs_log *log; int lsn, difft, diffp; + unsigned long flags; imap = JFS_IP(ipimap)->i_imap; /* get the iag number containing the inode */ @@ -2805,6 +2808,7 @@ diUpdatePMap(struct inode *ipimap, IREAD_UNLOCK(ipimap); if (rc) return (rc); + metapage_wait_for_io(mp); iagp = (struct iag *) mp->data; /* get the inode number and extent number of the inode within * the iag and the inode number within the extent. @@ -2868,30 +2872,28 @@ diUpdatePMap(struct inode *ipimap, /* inherit older/smaller lsn */ logdiff(difft, lsn, log); logdiff(diffp, mp->lsn, log); + LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; /* move mp after tblock in logsync list */ - LOGSYNC_LOCK(log); list_move(&mp->synclist, &tblk->synclist); - LOGSYNC_UNLOCK(log); } /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); assert(mp->clsn); logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert mp after tblock in logsync list */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } write_metapage(mp); return (0); diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index ebd77c1..c0fd7b3 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -165,6 +165,7 @@ struct jfs_sb_info { /* Formerly in ipbmap */ struct bmap *bmap; /* incore bmap descriptor */ struct nls_table *nls_tab; /* current codepage */ + struct inode *direct_inode; /* metadata inode */ uint state; /* mount/recovery state */ unsigned long flag; /* mount time flags */ uint p_state; /* state prior to going no integrity */ diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index b6a6869..dfa1200 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -234,6 +234,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, int lsn; int diffp, difft; struct metapage *mp = NULL; + unsigned long flags; jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", log, tblk, lrd, tlck); @@ -254,7 +255,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, */ lsn = log->lsn; - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); /* * initialize page lsn if first log write of the page @@ -310,7 +311,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } } - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); /* * write the log record @@ -334,7 +335,6 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, return lsn; } - /* * NAME: lmWriteRecord() * @@ -927,9 +927,8 @@ static void lmPostGC(struct lbuf * bp) * calculate new value of i_nextsync which determines when * this code is called again. * - * this is called only from lmLog(). - * - * PARAMETER: ip - pointer to logs inode. + * PARAMETERS: log - log structure + * nosyncwait - 1 if called asynchronously * * RETURN: 0 * @@ -945,6 +944,15 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) struct lrd lrd; int lsn; struct logsyncblk *lp; + struct jfs_sb_info *sbi; + unsigned long flags; + + /* push dirty metapages out to disk */ + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_flush(sbi->ipbmap->i_mapping); + filemap_flush(sbi->ipimap->i_mapping); + filemap_flush(sbi->direct_inode->i_mapping); + } /* * forward syncpt @@ -954,10 +962,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) */ if (log->sync == log->syncpt) { - LOGSYNC_LOCK(log); - /* ToDo: push dirty metapages out to disk */ -// bmLogSync(log); - + LOGSYNC_LOCK(log, flags); if (list_empty(&log->synclist)) log->sync = log->lsn; else { @@ -965,7 +970,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) struct logsyncblk, synclist); log->sync = lp->lsn; } - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } @@ -974,27 +979,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) * reset syncpt = sync */ if (log->sync != log->syncpt) { - struct jfs_sb_info *sbi; - - /* - * We need to make sure all of the "written" metapages - * actually make it to disk - */ - list_for_each_entry(sbi, &log->sb_list, log_list) { - if (sbi->flag & JFS_NOINTEGRITY) - continue; - filemap_fdatawrite(sbi->ipbmap->i_mapping); - filemap_fdatawrite(sbi->ipimap->i_mapping); - filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping); - } - list_for_each_entry(sbi, &log->sb_list, log_list) { - if (sbi->flag & JFS_NOINTEGRITY) - continue; - filemap_fdatawait(sbi->ipbmap->i_mapping); - filemap_fdatawait(sbi->ipimap->i_mapping); - filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping); - } - lrd.logtid = 0; lrd.backchain = 0; lrd.type = cpu_to_le16(LOG_SYNCPT); @@ -1066,6 +1050,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait) return lsn; } +/* + * NAME: jfs_syncpt + * + * FUNCTION: write log SYNCPT record for specified log + * + * PARAMETERS: log - log structure + */ +void jfs_syncpt(struct jfs_log *log) +{ LOG_LOCK(log); + lmLogSync(log, 1); + LOG_UNLOCK(log); +} /* * NAME: lmLogOpen() @@ -1547,6 +1543,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) { int i; struct tblock *target = NULL; + struct jfs_sb_info *sbi; /* jfs_write_inode may call us during read-only mount */ if (!log) @@ -1608,12 +1605,18 @@ void jfs_flush_journal(struct jfs_log *log, int wait) if (wait < 2) return; + list_for_each_entry(sbi, &log->sb_list, log_list) { + filemap_fdatawrite(sbi->ipbmap->i_mapping); + filemap_fdatawrite(sbi->ipimap->i_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + } + /* * If there was recent activity, we may need to wait * for the lazycommit thread to catch up */ if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { - for (i = 0; i < 800; i++) { /* Too much? */ + for (i = 0; i < 200; i++) { /* Too much? */ msleep(250); if (list_empty(&log->cqueue) && list_empty(&log->synclist)) @@ -1621,7 +1624,24 @@ void jfs_flush_journal(struct jfs_log *log, int wait) } } assert(list_empty(&log->cqueue)); - assert(list_empty(&log->synclist)); + if (!list_empty(&log->synclist)) { + struct logsyncblk *lp; + + list_for_each_entry(lp, &log->synclist, synclist) { + if (lp->xflag & COMMIT_PAGE) { + struct metapage *mp = (struct metapage *)lp; + dump_mem("orphan metapage", lp, + sizeof(struct metapage)); + dump_mem("page", mp->page, sizeof(struct page)); + } + else + dump_mem("orphan tblock", lp, + sizeof(struct tblock)); + } +// current->state = TASK_INTERRUPTIBLE; +// schedule(); + } + //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } @@ -1669,6 +1689,7 @@ int lmLogShutdown(struct jfs_log * log) lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); lbmIOWait(log->bp, lbmFREE); + log->bp = NULL; /* * synchronous update log superblock @@ -1819,20 +1840,34 @@ static int lbmLogInit(struct jfs_log * log) log->lbuf_free = NULL; - for (i = 0; i < LOGPAGES; i++) { - lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); - if (lbuf == 0) - goto error; - lbuf->l_ldata = (char *) get_zeroed_page(GFP_KERNEL); - if (lbuf->l_ldata == 0) { - kfree(lbuf); + for (i = 0; i < LOGPAGES;) { + char *buffer; + uint offset; + struct page *page; + + buffer = (char *) get_zeroed_page(GFP_KERNEL); + if (buffer == NULL) goto error; + page = virt_to_page(buffer); + for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) { + lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); + if (lbuf == NULL) { + if (offset == 0) + free_page((unsigned long) buffer); + goto error; + } + if (offset) /* we already have one reference */ + get_page(page); + lbuf->l_offset = offset; + lbuf->l_ldata = buffer + offset; + lbuf->l_page = page; + lbuf->l_log = log; + init_waitqueue_head(&lbuf->l_ioevent); + + lbuf->l_freelist = log->lbuf_free; + log->lbuf_free = lbuf; + i++; } - lbuf->l_log = log; - init_waitqueue_head(&lbuf->l_ioevent); - - lbuf->l_freelist = log->lbuf_free; - log->lbuf_free = lbuf; } return (0); @@ -1857,12 +1892,10 @@ static void lbmLogShutdown(struct jfs_log * log) lbuf = log->lbuf_free; while (lbuf) { struct lbuf *next = lbuf->l_freelist; - free_page((unsigned long) lbuf->l_ldata); + __free_page(lbuf->l_page); kfree(lbuf); lbuf = next; } - - log->bp = NULL; } @@ -1974,9 +2007,9 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; - bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); + bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; - bio->bi_io_vec[0].bv_offset = 0; + bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; bio->bi_idx = 0; @@ -2115,9 +2148,9 @@ static void lbmStartIO(struct lbuf * bp) bio = bio_alloc(GFP_NOFS, 1); bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; - bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); + bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; - bio->bi_io_vec[0].bv_offset = 0; + bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; bio->bi_idx = 0; @@ -2127,16 +2160,13 @@ static void lbmStartIO(struct lbuf * bp) bio->bi_private = bp; /* check if journaling to disk has been disabled */ - if (!log->no_integrity) { + if (log->no_integrity) { + bio->bi_size = 0; + lbmIODone(bio, 0, 0); + } else { submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); } - else { - bio->bi_size = 0; - lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0 - * 3rd argument appears to not be used => 0 - */ - } } diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 141ad74..51291fb 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -463,9 +463,10 @@ struct lbuf { s64 l_blkno; /* 8: log page block number */ caddr_t l_ldata; /* 4: data page */ + struct page *l_page; /* The page itself */ + uint l_offset; /* Offset of l_ldata within the page */ wait_queue_head_t l_ioevent; /* 4: i/o done event */ - struct page *l_page; /* The page itself */ }; /* Reuse l_freelist for redrive list */ @@ -489,8 +490,9 @@ struct logsyncblk { */ #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) -#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock) -#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock) +#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) +#define LOGSYNC_UNLOCK(log, flags) \ + spin_unlock_irqrestore(&(log)->synclock, flags) /* compute the difference in bytes of lsn from sync point */ #define logdiff(diff, lsn, log)\ @@ -506,5 +508,6 @@ extern int lmLogShutdown(struct jfs_log * log); extern int lmLogInit(struct jfs_log * log); extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); extern void jfs_flush_journal(struct jfs_log * log, int wait); +extern void jfs_syncpt(struct jfs_log *log); #endif /* _H_JFS_LOGMGR */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 4c0a3ac..41bf078 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -1,5 +1,5 @@ /* - * Copyright (C) International Business Machines Corp., 2000-2003 + * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify @@ -18,10 +18,11 @@ */ #include <linux/fs.h> +#include <linux/mm.h> +#include <linux/bio.h> #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/mempool.h> -#include <linux/delay.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -29,8 +30,6 @@ #include "jfs_txnmgr.h" #include "jfs_debug.h" -static DEFINE_SPINLOCK(meta_lock); - #ifdef CONFIG_JFS_STATISTICS static struct { uint pagealloc; /* # of page allocations */ @@ -39,22 +38,8 @@ static struct { } mpStat; #endif - -#define HASH_BITS 10 /* This makes hash_table 1 4K page */ -#define HASH_SIZE (1 << HASH_BITS) -static struct metapage **hash_table = NULL; -static unsigned long hash_order; - - -static inline int metapage_locked(struct metapage *mp) -{ - return test_bit(META_locked, &mp->flag); -} - -static inline int trylock_metapage(struct metapage *mp) -{ - return test_and_set_bit(META_locked, &mp->flag); -} +#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) +#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag) static inline void unlock_metapage(struct metapage *mp) { @@ -62,26 +47,26 @@ static inline void unlock_metapage(struct metapage *mp) wake_up(&mp->wait); } -static void __lock_metapage(struct metapage *mp) +static inline void __lock_metapage(struct metapage *mp) { DECLARE_WAITQUEUE(wait, current); - INCREMENT(mpStat.lockwait); - add_wait_queue_exclusive(&mp->wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); if (metapage_locked(mp)) { - spin_unlock(&meta_lock); + unlock_page(mp->page); schedule(); - spin_lock(&meta_lock); + lock_page(mp->page); } } while (trylock_metapage(mp)); __set_current_state(TASK_RUNNING); remove_wait_queue(&mp->wait, &wait); } -/* needs meta_lock */ +/* + * Must have mp->page locked + */ static inline void lock_metapage(struct metapage *mp) { if (trylock_metapage(mp)) @@ -92,6 +77,110 @@ static inline void lock_metapage(struct metapage *mp) static kmem_cache_t *metapage_cache; static mempool_t *metapage_mempool; +#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE) + +#if MPS_PER_PAGE > 1 + +struct meta_anchor { + int mp_count; + atomic_t io_count; + struct metapage *mp[MPS_PER_PAGE]; +}; +#define mp_anchor(page) ((struct meta_anchor *)page->private) + +static inline struct metapage *page_to_mp(struct page *page, uint offset) +{ + if (!PagePrivate(page)) + return NULL; + return mp_anchor(page)->mp[offset >> L2PSIZE]; +} + +static inline int insert_metapage(struct page *page, struct metapage *mp) +{ + struct meta_anchor *a; + int index; + int l2mp_blocks; /* log2 blocks per metapage */ + + if (PagePrivate(page)) + a = mp_anchor(page); + else { + a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS); + if (!a) + return -ENOMEM; + memset(a, 0, sizeof(struct meta_anchor)); + page->private = (unsigned long)a; + SetPagePrivate(page); + kmap(page); + } + + if (mp) { + l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); + a->mp_count++; + a->mp[index] = mp; + } + + return 0; +} + +static inline void remove_metapage(struct page *page, struct metapage *mp) +{ + struct meta_anchor *a = mp_anchor(page); + int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits; + int index; + + index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1); + + BUG_ON(a->mp[index] != mp); + + a->mp[index] = NULL; + if (--a->mp_count == 0) { + kfree(a); + page->private = 0; + ClearPagePrivate(page); + kunmap(page); + } +} + +static inline void inc_io(struct page *page) +{ + atomic_inc(&mp_anchor(page)->io_count); +} + +static inline void dec_io(struct page *page, void (*handler) (struct page *)) +{ + if (atomic_dec_and_test(&mp_anchor(page)->io_count)) + handler(page); +} + +#else +static inline struct metapage *page_to_mp(struct page *page, uint offset) +{ + return PagePrivate(page) ? (struct metapage *)page->private : NULL; +} + +static inline int insert_metapage(struct page *page, struct metapage *mp) +{ + if (mp) { + page->private = (unsigned long)mp; + SetPagePrivate(page); + kmap(page); + } + return 0; +} + +static inline void remove_metapage(struct page *page, struct metapage *mp) +{ + page->private = 0; + ClearPagePrivate(page); + kunmap(page); +} + +#define inc_io(page) do {} while(0) +#define dec_io(page, handler) handler(page) + +#endif + static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; @@ -139,16 +228,6 @@ int __init metapage_init(void) kmem_cache_destroy(metapage_cache); return -ENOMEM; } - /* - * Now the hash list - */ - for (hash_order = 0; - ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE; - hash_order++); - hash_table = - (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order); - assert(hash_table); - memset(hash_table, 0, PAGE_SIZE << hash_order); return 0; } @@ -159,73 +238,388 @@ void metapage_exit(void) kmem_cache_destroy(metapage_cache); } +static inline void drop_metapage(struct page *page, struct metapage *mp) +{ + if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) || + test_bit(META_io, &mp->flag)) + return; + remove_metapage(page, mp); + INCREMENT(mpStat.pagefree); + free_metapage(mp); +} + /* - * Basically same hash as in pagemap.h, but using our hash table + * Metapage address space operations */ -static struct metapage **meta_hash(struct address_space *mapping, - unsigned long index) + +static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, + unsigned int *len) { -#define i (((unsigned long)mapping)/ \ - (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) -#define s(x) ((x) + ((x) >> HASH_BITS)) - return hash_table + (s(i + index) & (HASH_SIZE - 1)); -#undef i -#undef s + int rc = 0; + int xflag; + s64 xaddr; + sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >> + inode->i_blkbits; + + if (lblock >= file_blocks) + return 0; + if (lblock + *len > file_blocks) + *len = file_blocks - lblock; + + if (inode->i_ino) { + rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0); + if ((rc == 0) && *len) + lblock = (sector_t)xaddr; + else + lblock = 0; + } /* else no mapping */ + + return lblock; } -static struct metapage *search_hash(struct metapage ** hash_ptr, - struct address_space *mapping, - unsigned long index) +static void last_read_complete(struct page *page) { - struct metapage *ptr; + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); +} + +static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done, + int err) +{ + struct page *page = bio->bi_private; + + if (bio->bi_size) + return 1; - for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { - if ((ptr->mapping == mapping) && (ptr->index == index)) - return ptr; + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + printk(KERN_ERR "metapage_read_end_io: I/O error\n"); + SetPageError(page); } - return NULL; + dec_io(page, last_read_complete); + bio_put(bio); + + return 0; } -static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr) +static void remove_from_logsync(struct metapage *mp) { - if (*hash_ptr) - (*hash_ptr)->hash_prev = mp; + struct jfs_log *log = mp->log; + unsigned long flags; +/* + * This can race. Recheck that log hasn't been set to null, and after + * acquiring logsync lock, recheck lsn + */ + if (!log) + return; + + LOGSYNC_LOCK(log, flags); + if (mp->lsn) { + mp->log = NULL; + mp->lsn = 0; + mp->clsn = 0; + log->count--; + list_del(&mp->synclist); + } + LOGSYNC_UNLOCK(log, flags); +} - mp->hash_prev = NULL; - mp->hash_next = *hash_ptr; - *hash_ptr = mp; +static void last_write_complete(struct page *page) +{ + struct metapage *mp; + unsigned int offset; + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + if (mp && test_bit(META_io, &mp->flag)) { + if (mp->lsn) + remove_from_logsync(mp); + clear_bit(META_io, &mp->flag); + } + /* + * I'd like to call drop_metapage here, but I don't think it's + * safe unless I have the page locked + */ + } + end_page_writeback(page); } -static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr) +static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done, + int err) { - if (mp->hash_prev) - mp->hash_prev->hash_next = mp->hash_next; - else { - assert(*hash_ptr == mp); - *hash_ptr = mp->hash_next; + struct page *page = bio->bi_private; + + BUG_ON(!PagePrivate(page)); + + if (bio->bi_size) + return 1; + + if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) { + printk(KERN_ERR "metapage_write_end_io: I/O error\n"); + SetPageError(page); + } + dec_io(page, last_write_complete); + bio_put(bio); + return 0; +} + +static int metapage_writepage(struct page *page, struct writeback_control *wbc) +{ + struct bio *bio = NULL; + unsigned int block_offset; /* block offset of mp within page */ + struct inode *inode = page->mapping->host; + unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; + unsigned int len; + unsigned int xlen; + struct metapage *mp; + int redirty = 0; + sector_t lblock; + sector_t pblock; + sector_t next_block = 0; + sector_t page_start; + unsigned long bio_bytes = 0; + unsigned long bio_offset = 0; + unsigned int offset; + + page_start = (sector_t)page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + BUG_ON(!PageLocked(page)); + BUG_ON(PageWriteback(page)); + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + + if (!mp || !test_bit(META_dirty, &mp->flag)) + continue; + + if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) { + redirty = 1; + continue; + } + + clear_bit(META_dirty, &mp->flag); + block_offset = offset >> inode->i_blkbits; + lblock = page_start + block_offset; + if (bio) { + if (xlen && lblock == next_block) { + /* Contiguous, in memory & on disk */ + len = min(xlen, blocks_per_mp); + xlen -= len; + bio_bytes += len << inode->i_blkbits; + set_bit(META_io, &mp->flag); + continue; + } + /* Not contiguous */ + if (bio_add_page(bio, page, bio_bytes, bio_offset) < + bio_bytes) + goto add_failed; + /* + * Increment counter before submitting i/o to keep + * count from hitting zero before we're through + */ + inc_io(page); + if (!bio->bi_size) + goto dump_bio; + submit_bio(WRITE, bio); + bio = NULL; + } else { + set_page_writeback(page); + inc_io(page); + } + xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; + pblock = metapage_get_blocks(inode, lblock, &xlen); + if (!pblock) { + /* Need better error handling */ + printk(KERN_ERR "JFS: metapage_get_blocks failed\n"); + dec_io(page, last_write_complete); + continue; + } + set_bit(META_io, &mp->flag); + len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage); + + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_end_io = metapage_write_end_io; + bio->bi_private = page; + + /* Don't call bio_add_page yet, we may add to this vec */ + bio_offset = offset; + bio_bytes = len << inode->i_blkbits; + + xlen -= len; + next_block = lblock + len; + } + if (bio) { + if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes) + goto add_failed; + if (!bio->bi_size) + goto dump_bio; + + submit_bio(WRITE, bio); + } + if (redirty) + redirty_page_for_writepage(wbc, page); + + unlock_page(page); + + return 0; +add_failed: + /* We should never reach here, since we're only adding one vec */ + printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); + goto skip; +dump_bio: + dump_mem("bio", bio, sizeof(*bio)); +skip: + bio_put(bio); + unlock_page(page); + dec_io(page, last_write_complete); + + return -EIO; +} + +static int metapage_readpage(struct file *fp, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct bio *bio = NULL; + unsigned int block_offset; + unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; + sector_t page_start; /* address of page in fs blocks */ + sector_t pblock; + unsigned int xlen; + unsigned int len; + unsigned int offset; + + BUG_ON(!PageLocked(page)); + page_start = (sector_t)page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + + block_offset = 0; + while (block_offset < blocks_per_page) { + xlen = blocks_per_page - block_offset; + pblock = metapage_get_blocks(inode, page_start + block_offset, + &xlen); + if (pblock) { + if (!PagePrivate(page)) + insert_metapage(page, NULL); + inc_io(page); + if (bio) + submit_bio(READ, bio); + + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_end_io = metapage_read_end_io; + bio->bi_private = page; + len = xlen << inode->i_blkbits; + offset = block_offset << inode->i_blkbits; + if (bio_add_page(bio, page, len, offset) < len) + goto add_failed; + block_offset += xlen; + } else + block_offset++; } + if (bio) + submit_bio(READ, bio); + else + unlock_page(page); + + return 0; - if (mp->hash_next) - mp->hash_next->hash_prev = mp->hash_prev; +add_failed: + printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); + bio_put(bio); + dec_io(page, last_read_complete); + return -EIO; } +static int metapage_releasepage(struct page *page, int gfp_mask) +{ + struct metapage *mp; + int busy = 0; + unsigned int offset; + + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + + if (!mp) + continue; + + jfs_info("metapage_releasepage: mp = 0x%p", mp); + if (mp->count || mp->nohomeok) { + jfs_info("count = %ld, nohomeok = %d", mp->count, + mp->nohomeok); + busy = 1; + continue; + } + wait_on_page_writeback(page); + //WARN_ON(test_bit(META_dirty, &mp->flag)); + if (test_bit(META_dirty, &mp->flag)) { + dump_mem("dirty mp in metapage_releasepage", mp, + sizeof(struct metapage)); + dump_mem("page", page, sizeof(struct page)); + dump_stack(); + } + WARN_ON(mp->lsn); + if (mp->lsn) + remove_from_logsync(mp); + remove_metapage(page, mp); + INCREMENT(mpStat.pagefree); + free_metapage(mp); + } + if (busy) + return -1; + + return 0; +} + +static int metapage_invalidatepage(struct page *page, unsigned long offset) +{ + BUG_ON(offset); + + if (PageWriteback(page)) + return 0; + + return metapage_releasepage(page, 0); +} + +struct address_space_operations jfs_metapage_aops = { + .readpage = metapage_readpage, + .writepage = metapage_writepage, + .sync_page = block_sync_page, + .releasepage = metapage_releasepage, + .invalidatepage = metapage_invalidatepage, + .set_page_dirty = __set_page_dirty_nobuffers, +}; + struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, unsigned int size, int absolute, unsigned long new) { - struct metapage **hash_ptr; int l2BlocksPerPage; int l2bsize; struct address_space *mapping; - struct metapage *mp; + struct metapage *mp = NULL; + struct page *page; unsigned long page_index; unsigned long page_offset; - jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock); - + jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d", + inode->i_ino, lblock, absolute); + + l2bsize = inode->i_blkbits; + l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; + page_index = lblock >> l2BlocksPerPage; + page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize; + if ((page_offset + size) > PAGE_CACHE_SIZE) { + jfs_err("MetaData crosses page boundary!!"); + jfs_err("lblock = %lx, size = %d", lblock, size); + dump_stack(); + return NULL; + } if (absolute) - mapping = inode->i_sb->s_bdev->bd_inode->i_mapping; + mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping; else { /* * If an nfs client tries to read an inode that is larger @@ -237,312 +631,212 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, mapping = inode->i_mapping; } - hash_ptr = meta_hash(mapping, lblock); -again: - spin_lock(&meta_lock); - mp = search_hash(hash_ptr, mapping, lblock); + if (new && (PSIZE == PAGE_CACHE_SIZE)) { + page = grab_cache_page(mapping, page_index); + if (!page) { + jfs_err("grab_cache_page failed!"); + return NULL; + } + SetPageUptodate(page); + } else { + page = read_cache_page(mapping, page_index, + (filler_t *)mapping->a_ops->readpage, NULL); + if (IS_ERR(page)) { + jfs_err("read_cache_page failed!"); + return NULL; + } + lock_page(page); + } + + mp = page_to_mp(page, page_offset); if (mp) { - page_found: - if (test_bit(META_stale, &mp->flag)) { - spin_unlock(&meta_lock); - msleep(1); - goto again; + if (mp->logical_size != size) { + jfs_error(inode->i_sb, + "__get_metapage: mp->logical_size != size"); + jfs_err("logical_size = %d, size = %d", + mp->logical_size, size); + dump_stack(); + goto unlock; } mp->count++; lock_metapage(mp); - spin_unlock(&meta_lock); if (test_bit(META_discard, &mp->flag)) { if (!new) { jfs_error(inode->i_sb, "__get_metapage: using a " "discarded metapage"); - release_metapage(mp); - return NULL; + discard_metapage(mp); + goto unlock; } clear_bit(META_discard, &mp->flag); } - jfs_info("__get_metapage: found 0x%p, in hash", mp); - if (mp->logical_size != size) { - jfs_error(inode->i_sb, - "__get_metapage: mp->logical_size != size"); - release_metapage(mp); - return NULL; - } } else { - l2bsize = inode->i_blkbits; - l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; - page_index = lblock >> l2BlocksPerPage; - page_offset = (lblock - (page_index << l2BlocksPerPage)) << - l2bsize; - if ((page_offset + size) > PAGE_CACHE_SIZE) { - spin_unlock(&meta_lock); - jfs_err("MetaData crosses page boundary!!"); - return NULL; - } - - /* - * Locks held on aggregate inode pages are usually - * not held long, and they are taken in critical code - * paths (committing dirty inodes, txCommit thread) - * - * Attempt to get metapage without blocking, tapping into - * reserves if necessary. - */ - mp = NULL; - if (JFS_IP(inode)->fileset == AGGREGATE_I) { - mp = alloc_metapage(GFP_ATOMIC); - if (!mp) { - /* - * mempool is supposed to protect us from - * failing here. We will try a blocking - * call, but a deadlock is possible here - */ - printk(KERN_WARNING - "__get_metapage: atomic call to mempool_alloc failed.\n"); - printk(KERN_WARNING - "Will attempt blocking call\n"); - } - } - if (!mp) { - struct metapage *mp2; - - spin_unlock(&meta_lock); - mp = alloc_metapage(GFP_NOFS); - spin_lock(&meta_lock); - - /* we dropped the meta_lock, we need to search the - * hash again. - */ - mp2 = search_hash(hash_ptr, mapping, lblock); - if (mp2) { - free_metapage(mp); - mp = mp2; - goto page_found; - } - } + INCREMENT(mpStat.pagealloc); + mp = alloc_metapage(GFP_NOFS); + mp->page = page; mp->flag = 0; - lock_metapage(mp); - if (absolute) - set_bit(META_absolute, &mp->flag); mp->xflag = COMMIT_PAGE; mp->count = 1; - atomic_set(&mp->nohomeok,0); - mp->mapping = mapping; - mp->index = lblock; - mp->page = NULL; + mp->nohomeok = 0; mp->logical_size = size; - add_to_hash(mp, hash_ptr); - spin_unlock(&meta_lock); - - if (new) { - jfs_info("__get_metapage: Calling grab_cache_page"); - mp->page = grab_cache_page(mapping, page_index); - if (!mp->page) { - jfs_err("grab_cache_page failed!"); - goto freeit; - } else { - INCREMENT(mpStat.pagealloc); - unlock_page(mp->page); - } - } else { - jfs_info("__get_metapage: Calling read_cache_page"); - mp->page = read_cache_page(mapping, lblock, - (filler_t *)mapping->a_ops->readpage, NULL); - if (IS_ERR(mp->page)) { - jfs_err("read_cache_page failed!"); - goto freeit; - } else - INCREMENT(mpStat.pagealloc); + mp->data = page_address(page) + page_offset; + mp->index = lblock; + if (unlikely(insert_metapage(page, mp))) { + free_metapage(mp); + goto unlock; } - mp->data = kmap(mp->page) + page_offset; + lock_metapage(mp); } - if (new) + if (new) { + jfs_info("zeroing mp = 0x%p", mp); memset(mp->data, 0, PSIZE); + } - jfs_info("__get_metapage: returning = 0x%p", mp); + unlock_page(page); + jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data); return mp; -freeit: - spin_lock(&meta_lock); - remove_from_hash(mp, hash_ptr); - free_metapage(mp); - spin_unlock(&meta_lock); +unlock: + unlock_page(page); return NULL; } -void hold_metapage(struct metapage * mp, int force) +void grab_metapage(struct metapage * mp) { - spin_lock(&meta_lock); - + jfs_info("grab_metapage: mp = 0x%p", mp); + page_cache_get(mp->page); + lock_page(mp->page); mp->count++; - - if (force) { - ASSERT (!(test_bit(META_forced, &mp->flag))); - if (trylock_metapage(mp)) - set_bit(META_forced, &mp->flag); - } else - lock_metapage(mp); - - spin_unlock(&meta_lock); + lock_metapage(mp); + unlock_page(mp->page); } -static void __write_metapage(struct metapage * mp) +void force_metapage(struct metapage *mp) { - int l2bsize = mp->mapping->host->i_blkbits; - int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; - unsigned long page_index; - unsigned long page_offset; - int rc; - - jfs_info("__write_metapage: mp = 0x%p", mp); - - page_index = mp->page->index; - page_offset = - (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; + struct page *page = mp->page; + jfs_info("force_metapage: mp = 0x%p", mp); + set_bit(META_forcewrite, &mp->flag); + clear_bit(META_sync, &mp->flag); + page_cache_get(page); + lock_page(page); + set_page_dirty(page); + write_one_page(page, 1); + clear_bit(META_forcewrite, &mp->flag); + page_cache_release(page); +} +extern void hold_metapage(struct metapage *mp) +{ lock_page(mp->page); - rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, - page_offset + - mp->logical_size); - if (rc) { - jfs_err("prepare_write return %d!", rc); - ClearPageUptodate(mp->page); +} + +extern void put_metapage(struct metapage *mp) +{ + if (mp->count || mp->nohomeok) { + /* Someone else will release this */ unlock_page(mp->page); - clear_bit(META_dirty, &mp->flag); return; } - rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, - page_offset + - mp->logical_size); - if (rc) { - jfs_err("commit_write returned %d", rc); - } - + page_cache_get(mp->page); + mp->count++; + lock_metapage(mp); unlock_page(mp->page); - clear_bit(META_dirty, &mp->flag); - - jfs_info("__write_metapage done"); -} - -static inline void sync_metapage(struct metapage *mp) -{ - struct page *page = mp->page; - - page_cache_get(page); - lock_page(page); - - /* we're done with this page - no need to check for errors */ - if (page_has_buffers(page)) - write_one_page(page, 1); - else - unlock_page(page); - page_cache_release(page); + release_metapage(mp); } void release_metapage(struct metapage * mp) { - struct jfs_log *log; - + struct page *page = mp->page; jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); - spin_lock(&meta_lock); - if (test_bit(META_forced, &mp->flag)) { - clear_bit(META_forced, &mp->flag); - mp->count--; - spin_unlock(&meta_lock); - return; - } + BUG_ON(!page); + + lock_page(page); + unlock_metapage(mp); assert(mp->count); - if (--mp->count || atomic_read(&mp->nohomeok)) { - unlock_metapage(mp); - spin_unlock(&meta_lock); + if (--mp->count || mp->nohomeok) { + unlock_page(page); + page_cache_release(page); return; } - if (mp->page) { - set_bit(META_stale, &mp->flag); - spin_unlock(&meta_lock); - kunmap(mp->page); - mp->data = NULL; - if (test_bit(META_dirty, &mp->flag)) - __write_metapage(mp); + if (test_bit(META_dirty, &mp->flag)) { + set_page_dirty(page); if (test_bit(META_sync, &mp->flag)) { - sync_metapage(mp); clear_bit(META_sync, &mp->flag); + write_one_page(page, 1); + lock_page(page); /* write_one_page unlocks the page */ } + } else if (mp->lsn) /* discard_metapage doesn't remove it */ + remove_from_logsync(mp); - if (test_bit(META_discard, &mp->flag)) { - lock_page(mp->page); - block_invalidatepage(mp->page, 0); - unlock_page(mp->page); - } - - page_cache_release(mp->page); - mp->page = NULL; - INCREMENT(mpStat.pagefree); - spin_lock(&meta_lock); - } +#if MPS_PER_PAGE == 1 + /* + * If we know this is the only thing in the page, we can throw + * the page out of the page cache. If pages are larger, we + * don't want to do this. + */ - if (mp->lsn) { - /* - * Remove metapage from logsynclist. - */ - log = mp->log; - LOGSYNC_LOCK(log); - mp->log = NULL; - mp->lsn = 0; - mp->clsn = 0; - log->count--; - list_del(&mp->synclist); - LOGSYNC_UNLOCK(log); + /* Retest mp->count since we may have released page lock */ + if (test_bit(META_discard, &mp->flag) && !mp->count) { + clear_page_dirty(page); + ClearPageUptodate(page); +#ifdef _NOT_YET + if (page->mapping) { + /* Remove from page cache and page cache reference */ + remove_from_page_cache(page); + page_cache_release(page); + metapage_releasepage(page, 0); + } +#endif } - remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); - spin_unlock(&meta_lock); - - free_metapage(mp); +#else + /* Try to keep metapages from using up too much memory */ + drop_metapage(page, mp); +#endif + unlock_page(page); + page_cache_release(page); } void __invalidate_metapages(struct inode *ip, s64 addr, int len) { - struct metapage **hash_ptr; - unsigned long lblock; + sector_t lblock; int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; + int BlocksPerPage = 1 << l2BlocksPerPage; /* All callers are interested in block device's mapping */ - struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping; + struct address_space *mapping = + JFS_SBI(ip->i_sb)->direct_inode->i_mapping; struct metapage *mp; struct page *page; + unsigned int offset; /* - * First, mark metapages to discard. They will eventually be + * Mark metapages to discard. They will eventually be * released, but should not be written. */ - for (lblock = addr; lblock < addr + len; - lblock += 1 << l2BlocksPerPage) { - hash_ptr = meta_hash(mapping, lblock); -again: - spin_lock(&meta_lock); - mp = search_hash(hash_ptr, mapping, lblock); - if (mp) { - if (test_bit(META_stale, &mp->flag)) { - spin_unlock(&meta_lock); - msleep(1); - goto again; - } + for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len; + lblock += BlocksPerPage) { + page = find_lock_page(mapping, lblock >> l2BlocksPerPage); + if (!page) + continue; + for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { + mp = page_to_mp(page, offset); + if (!mp) + continue; + if (mp->index < addr) + continue; + if (mp->index >= addr + len) + break; clear_bit(META_dirty, &mp->flag); set_bit(META_discard, &mp->flag); - spin_unlock(&meta_lock); - } else { - spin_unlock(&meta_lock); - page = find_lock_page(mapping, lblock>>l2BlocksPerPage); - if (page) { - block_invalidatepage(page, 0); - unlock_page(page); - page_cache_release(page); - } + if (mp->lsn) + remove_from_logsync(mp); } + unlock_page(page); + page_cache_release(page); } } diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index 0e58aba..991e9fb 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -33,38 +33,27 @@ struct metapage { unsigned long flag; /* See Below */ unsigned long count; /* Reference count */ void *data; /* Data pointer */ - - /* list management stuff */ - struct metapage *hash_prev; - struct metapage *hash_next; /* Also used for free list */ - - /* - * mapping & index become redundant, but we need these here to - * add the metapage to the hash before we have the real page - */ - struct address_space *mapping; - unsigned long index; + sector_t index; /* block address of page */ wait_queue_head_t wait; /* implementation */ struct page *page; - unsigned long logical_size; + unsigned int logical_size; /* Journal management */ int clsn; - atomic_t nohomeok; + int nohomeok; struct jfs_log *log; }; /* metapage flag */ #define META_locked 0 -#define META_absolute 1 -#define META_free 2 -#define META_dirty 3 -#define META_sync 4 -#define META_discard 5 -#define META_forced 6 -#define META_stale 7 +#define META_free 1 +#define META_dirty 2 +#define META_sync 3 +#define META_discard 4 +#define META_forcewrite 5 +#define META_io 6 #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) @@ -80,7 +69,16 @@ extern struct metapage *__get_metapage(struct inode *inode, __get_metapage(inode, lblock, size, absolute, TRUE) extern void release_metapage(struct metapage *); -extern void hold_metapage(struct metapage *, int); +extern void grab_metapage(struct metapage *); +extern void force_metapage(struct metapage *); + +/* + * hold_metapage and put_metapage are used in conjuction. The page lock + * is not dropped between the two, so no other threads can get or release + * the metapage + */ +extern void hold_metapage(struct metapage *); +extern void put_metapage(struct metapage *); static inline void write_metapage(struct metapage *mp) { @@ -101,6 +99,46 @@ static inline void discard_metapage(struct metapage *mp) release_metapage(mp); } +static inline void metapage_nohomeok(struct metapage *mp) +{ + struct page *page = mp->page; + lock_page(page); + if (!mp->nohomeok++) { + mark_metapage_dirty(mp); + page_cache_get(page); + wait_on_page_writeback(page); + } + unlock_page(page); +} + +/* + * This serializes access to mp->lsn when metapages are added to logsynclist + * without setting nohomeok. i.e. updating imap & dmap + */ +static inline void metapage_wait_for_io(struct metapage *mp) +{ + if (test_bit(META_io, &mp->flag)) + wait_on_page_writeback(mp->page); +} + +/* + * This is called when already holding the metapage + */ +static inline void _metapage_homeok(struct metapage *mp) +{ + if (!--mp->nohomeok) + page_cache_release(mp->page); +} + +static inline void metapage_homeok(struct metapage *mp) +{ + hold_metapage(mp); + _metapage_homeok(mp); + put_metapage(mp); +} + +extern struct address_space_operations jfs_metapage_aops; + /* * This routines invalidate all pages for an extent. */ diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index c535ffd..032d111 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -285,11 +285,6 @@ int jfs_mount_rw(struct super_block *sb, int remount) */ logMOUNT(sb); - /* - * Set page cache allocation policy - */ - mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS); - return rc; } diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index f40301d..e93d01a 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -227,6 +227,7 @@ static lid_t txLockAlloc(void) static void txLockFree(lid_t lid) { + TxLock[lid].tid = 0; TxLock[lid].next = TxAnchor.freelock; TxAnchor.freelock = lid; TxAnchor.tlocksInUse--; @@ -566,9 +567,6 @@ void txEnd(tid_t tid) * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag)) { - /* forward log syncpt */ - /* lmSync(log); */ - jfs_info("log barrier off: 0x%x", log->lsn); /* enable new transactions start */ @@ -576,15 +574,22 @@ void txEnd(tid_t tid) /* wakeup all waitors for logsync barrier */ TXN_WAKEUP(&log->syncwait); + + TXN_UNLOCK(); + + /* forward log syncpt */ + jfs_syncpt(log); + + goto wakeup; } } + TXN_UNLOCK(); +wakeup: /* * wakeup all waitors for a free tblock */ TXN_WAKEUP(&TxAnchor.freewait); - - TXN_UNLOCK(); } @@ -633,8 +638,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, /* is page locked by the requester transaction ? */ tlck = lid_to_tlock(lid); - if ((xtid = tlck->tid) == tid) + if ((xtid = tlck->tid) == tid) { + TXN_UNLOCK(); goto grantLock; + } /* * is page locked by anonymous transaction/lock ? @@ -649,6 +656,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, */ if (xtid == 0) { tlck->tid = tid; + TXN_UNLOCK(); tblk = tid_to_tblock(tid); /* * The order of the tlocks in the transaction is important @@ -706,17 +714,18 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, */ tlck->tid = tid; + TXN_UNLOCK(); + /* mark tlock for meta-data page */ if (mp->xflag & COMMIT_PAGE) { tlck->flag = tlckPAGELOCK; /* mark the page dirty and nohomeok */ - mark_metapage_dirty(mp); - atomic_inc(&mp->nohomeok); + metapage_nohomeok(mp); jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", - mp, atomic_read(&mp->nohomeok), tid, tlck); + mp, mp->nohomeok, tid, tlck); /* if anonymous transaction, and buffer is on the group * commit synclist, mark inode to show this. This will @@ -762,8 +771,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; + TXN_LOCK(); list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); + TXN_UNLOCK(); } } @@ -821,8 +832,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, grantLock: tlck->type |= type; - TXN_UNLOCK(); - return tlck; /* @@ -841,11 +850,19 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, BUG(); } INCREMENT(stattx.waitlock); /* statistics */ + TXN_UNLOCK(); release_metapage(mp); + TXN_LOCK(); + xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); - TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); + + /* Recheck everything since dropping TXN_LOCK */ + if (xtid && (tlck->mp == mp) && (mp->lid == lid)) + TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); + else + TXN_UNLOCK(); jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); return NULL; @@ -906,6 +923,7 @@ static void txUnlock(struct tblock * tblk) struct metapage *mp; struct jfs_log *log; int difft, diffp; + unsigned long flags; jfs_info("txUnlock: tblk = 0x%p", tblk); log = JFS_SBI(tblk->sb)->log; @@ -925,19 +943,14 @@ static void txUnlock(struct tblock * tblk) assert(mp->xflag & COMMIT_PAGE); /* hold buffer - * - * It's possible that someone else has the metapage. - * The only things were changing are nohomeok, which - * is handled atomically, and clsn which is protected - * by the LOGSYNC_LOCK. */ - hold_metapage(mp, 1); + hold_metapage(mp); - assert(atomic_read(&mp->nohomeok) > 0); - atomic_dec(&mp->nohomeok); + assert(mp->nohomeok > 0); + _metapage_homeok(mp); /* inherit younger/larger clsn */ - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); if (mp->clsn) { logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); @@ -945,16 +958,11 @@ static void txUnlock(struct tblock * tblk) mp->clsn = tblk->clsn; } else mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); assert(!(tlck->flag & tlckFREEPAGE)); - if (tlck->flag & tlckWRITEPAGE) { - write_metapage(mp); - } else { - /* release page which has been forced */ - release_metapage(mp); - } + put_metapage(mp); } /* insert tlock, and linelock(s) of the tlock if any, @@ -981,10 +989,10 @@ static void txUnlock(struct tblock * tblk) * has been inserted in logsync list at txUpdateMap()) */ if (tblk->lsn) { - LOGSYNC_LOCK(log); + LOGSYNC_LOCK(log, flags); log->count--; list_del(&tblk->synclist); - LOGSYNC_UNLOCK(log); + LOGSYNC_UNLOCK(log, flags); } } @@ -1573,8 +1581,8 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * the last entry, so don't bother logging this */ mp->lid = 0; - hold_metapage(mp, 0); - atomic_dec(&mp->nohomeok); + grab_metapage(mp); + metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; return 0; @@ -1712,7 +1720,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct maplock *maplock; struct xdlistlock *xadlock; struct pxd_lock *pxdlock; - pxd_t *pxd; + pxd_t *page_pxd; int next, lwm, hwm; ip = tlck->ip; @@ -1722,7 +1730,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); - pxd = &lrd->log.redopage.pxd; + page_pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) { lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); @@ -1752,9 +1760,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); -// *pxd = mp->cm_pxd; - PXDaddress(pxd, mp->index); - PXDlength(pxd, +// *page_pxd = mp->cm_pxd; + PXDaddress(page_pxd, mp->index); + PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); @@ -1776,25 +1784,31 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; - if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { + if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; + pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. + * + * We can fit twice as may pxd's as xads in the lock */ - xadlock->xdlist = &xtlck->pxdlock; - memcpy(xadlock->xdlist, &p->xad[lwm], - sizeof(xad_t) * xadlock->count); - - for (i = 0; i < xadlock->count; i++) + xadlock->flag = mlckALLOCPXDLIST; + pxd = xadlock->xdlist = &xtlck->pxdlock; + for (i = 0; i < xadlock->count; i++) { + PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); + PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); p->xad[lwm + i].flag &= ~(XAD_NEW | XAD_EXTENDED); + pxd++; + } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ + xadlock->flag = mlckALLOCXADLIST; xadlock->xdlist = &p->xad[lwm]; tblk->xflag &= ~COMMIT_LAZY; } @@ -1836,8 +1850,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, if (tblk->xflag & COMMIT_TRUNCATE) { /* write NOREDOPAGE for the page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); - PXDaddress(pxd, mp->index); - PXDlength(pxd, + PXDaddress(page_pxd, mp->index); + PXDlength(page_pxd, mp->logical_size >> tblk->sb-> s_blocksize_bits); lrd->backchain = @@ -1872,22 +1886,32 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * deleted page itself; */ tlck->flag |= tlckUPDATEMAP; - xadlock->flag = mlckFREEXADLIST; xadlock->count = hwm - XTENTRYSTART + 1; - if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { + if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { + int i; + pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. + * + * We can fit twice as may pxd's as xads in the lock */ - xadlock->xdlist = &xtlck->pxdlock; - memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART], - sizeof(xad_t) * xadlock->count); + xadlock->flag = mlckFREEPXDLIST; + pxd = xadlock->xdlist = &xtlck->pxdlock; + for (i = 0; i < xadlock->count; i++) { + PXDaddress(pxd, + addressXAD(&p->xad[XTENTRYSTART + i])); + PXDlength(pxd, + lengthXAD(&p->xad[XTENTRYSTART + i])); + pxd++; + } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ + xadlock->flag = mlckFREEXADLIST; xadlock->xdlist = &p->xad[XTENTRYSTART]; tblk->xflag &= ~COMMIT_LAZY; } @@ -1918,7 +1942,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * header ? */ if (tlck->type & tlckTRUNCATE) { - pxd_t tpxd; /* truncated extent of xad */ + pxd_t pxd; /* truncated extent of xad */ int twm; /* @@ -1947,8 +1971,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); - PXDaddress(pxd, mp->index); - PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); + PXDaddress(page_pxd, mp->index); + PXDlength(page_pxd, + mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* @@ -1966,7 +1991,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; - tpxd = pxdlock->pxd; /* save to format maplock */ + pxd = pxdlock->pxd; /* save to format maplock */ lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); } @@ -2035,7 +2060,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, pxdlock = (struct pxd_lock *) xadlock; pxdlock->flag = mlckFREEPXD; pxdlock->count = 1; - pxdlock->pxd = tpxd; + pxdlock->pxd = pxd; jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " "hwm:%d", ip, mp, pxdlock->count, hwm); @@ -2253,7 +2278,8 @@ void txForce(struct tblock * tblk) tlck->flag &= ~tlckWRITEPAGE; /* do not release page to freelist */ - + force_metapage(mp); +#if 0 /* * The "right" thing to do here is to * synchronously write the metadata. @@ -2265,9 +2291,10 @@ void txForce(struct tblock * tblk) * we can get by with synchronously writing * the pages when they are released. */ - assert(atomic_read(&mp->nohomeok)); + assert(mp->nohomeok); set_bit(META_dirty, &mp->flag); set_bit(META_sync, &mp->flag); +#endif } } } @@ -2327,7 +2354,7 @@ static void txUpdateMap(struct tblock * tblk) */ mp = tlck->mp; ASSERT(mp->xflag & COMMIT_PAGE); - hold_metapage(mp, 0); + grab_metapage(mp); } /* @@ -2377,8 +2404,8 @@ static void txUpdateMap(struct tblock * tblk) ASSERT(mp->lid == lid); tlck->mp->lid = 0; } - assert(atomic_read(&mp->nohomeok) == 1); - atomic_dec(&mp->nohomeok); + assert(mp->nohomeok == 1); + metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; } @@ -2844,24 +2871,9 @@ static void LogSyncRelease(struct metapage * mp) { struct jfs_log *log = mp->log; - assert(atomic_read(&mp->nohomeok)); + assert(mp->nohomeok); assert(log); - atomic_dec(&mp->nohomeok); - - if (atomic_read(&mp->nohomeok)) - return; - - hold_metapage(mp, 0); - - LOGSYNC_LOCK(log); - mp->log = NULL; - mp->lsn = 0; - mp->clsn = 0; - log->count--; - list_del_init(&mp->synclist); - LOGSYNC_UNLOCK(log); - - release_metapage(mp); + metapage_homeok(mp); } /* diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index f31a9e3..5cf9178 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -49,7 +49,6 @@ */ int jfs_umount(struct super_block *sb) { - struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping; struct jfs_sb_info *sbi = JFS_SBI(sb); struct inode *ipbmap = sbi->ipbmap; struct inode *ipimap = sbi->ipimap; @@ -109,8 +108,8 @@ int jfs_umount(struct super_block *sb) * Make sure all metadata makes it to disk before we mark * the superblock as clean */ - filemap_fdatawrite(bdev_mapping); - filemap_fdatawait(bdev_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); /* * ensure all file system file pages are propagated to their @@ -123,9 +122,6 @@ int jfs_umount(struct super_block *sb) if (log) { /* log = NULL if read-only mount */ updateSuper(sb, FM_CLEAN); - /* Restore default gfp_mask for bdev */ - mapping_set_gfp_mask(bdev_mapping, GFP_USER); - /* * close log: * @@ -140,7 +136,6 @@ int jfs_umount(struct super_block *sb) int jfs_umount_rw(struct super_block *sb) { - struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping; struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; @@ -166,13 +161,10 @@ int jfs_umount_rw(struct super_block *sb) * mark the superblock clean before everything is flushed to * disk. */ - filemap_fdatawrite(bdev_mapping); - filemap_fdatawait(bdev_mapping); + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); updateSuper(sb, FM_CLEAN); - /* Restore default gfp_mask for bdev */ - mapping_set_gfp_mask(bdev_mapping, GFP_USER); - return lmLogClose(sb); } diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 11c58c5..2c1f311 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -111,8 +111,8 @@ static struct { /* * forward references */ -static int xtSearch(struct inode *ip, - s64 xoff, int *cmpp, struct btstack * btstack, int flag); +static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, + struct btstack * btstack, int flag); static int xtSplitUp(tid_t tid, struct inode *ip, @@ -159,11 +159,12 @@ int xtLookup(struct inode *ip, s64 lstart, xtpage_t *p; int index; xad_t *xad; - s64 size, xoff, xend; + s64 next, size, xoff, xend; int xlen; s64 xaddr; - *plen = 0; + *paddr = 0; + *plen = llen; if (!no_check) { /* is lookup offset beyond eof ? */ @@ -180,7 +181,7 @@ int xtLookup(struct inode *ip, s64 lstart, * search for the xad entry covering the logical extent */ //search: - if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) { + if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { jfs_err("xtLookup: xtSearch returned %d", rc); return rc; } @@ -198,8 +199,11 @@ int xtLookup(struct inode *ip, s64 lstart, * lstart is a page start address, * i.e., lstart cannot start in a hole; */ - if (cmp) + if (cmp) { + if (next) + *plen = min(next - lstart, llen); goto out; + } /* * lxd covered by xad @@ -284,7 +288,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, if (lstart >= size) return 0; - if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) + if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) return rc; /* @@ -488,6 +492,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, * parameters: * ip - file object; * xoff - extent offset; + * nextp - address of next extent (if any) for search miss * cmpp - comparison result: * btstack - traverse stack; * flag - search process flag (XT_INSERT); @@ -497,7 +502,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, * *cmpp is set to result of comparison with the entry returned. * the page containing the entry is pinned at exit. */ -static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ +static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, int *cmpp, struct btstack * btstack, int flag) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); @@ -511,6 +516,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ struct btframe *btsp; int nsplit = 0; /* number of pages to split */ s64 t64; + s64 next = 0; INCREMENT(xtStat.search); @@ -579,6 +585,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ * previous and this entry */ *cmpp = 1; + next = t64; goto out; } @@ -623,6 +630,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ /* update sequential access heuristics */ jfs_ip->btindex = index; + if (nextp) + *nextp = next; + INCREMENT(xtStat.fastSearch); return 0; } @@ -675,10 +685,11 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ return 0; } - /* search hit - internal page: * descend/search its child page */ + if (index < p->header.nextindex - 1) + next = offsetXAD(&p->xad[index + 1]); goto next; } @@ -694,6 +705,8 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index. */ + if (base < p->header.nextindex) + next = offsetXAD(&p->xad[base]); /* * search miss - leaf page: * @@ -727,6 +740,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ jfs_ip->btorder = BT_RANDOM; jfs_ip->btindex = base; + if (nextp) + *nextp = next; + return 0; } @@ -793,6 +809,7 @@ int xtInsert(tid_t tid, /* transaction id */ struct xtsplit split; /* split information */ xad_t *xad; int cmp; + s64 next; struct tlock *tlck; struct xtlock *xtlck; @@ -806,7 +823,7 @@ int xtInsert(tid_t tid, /* transaction id */ * n.b. xtSearch() may return index of maxentry of * the full page. */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -814,7 +831,7 @@ int xtInsert(tid_t tid, /* transaction id */ /* This test must follow XT_GETSEARCH since mp must be valid if * we branch to out: */ - if (cmp == 0) { + if ((cmp == 0) || (next && (xlen > next - xoff))) { rc = -EEXIST; goto out; } @@ -1626,7 +1643,7 @@ int xtExtend(tid_t tid, /* transaction id */ jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); /* there must exist extent to be extended */ - if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -1794,7 +1811,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", */ /* there must exist extent to be tailgated */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -1977,7 +1994,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) nxlen = lengthXAD(nxad); nxaddr = addressXAD(nxad); - if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -2291,7 +2308,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (nextindex == le16_to_cpu(p->header.maxentry)) { XT_PUTPAGE(mp); - if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -2438,6 +2455,7 @@ int xtAppend(tid_t tid, /* transaction id */ int nsplit, nblocks, xlen; struct pxdlist pxdlist; pxd_t *pxd; + s64 next; xaddr = *xaddrp; xlen = *xlenp; @@ -2452,7 +2470,7 @@ int xtAppend(tid_t tid, /* transaction id */ * n.b. xtSearch() may return index of maxentry of * the full page. */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) + if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) return rc; /* retrieve search result */ @@ -2462,6 +2480,9 @@ int xtAppend(tid_t tid, /* transaction id */ rc = -EEXIST; goto out; } + + if (next) + xlen = min(xlen, (int)(next - xoff)); //insert: /* * insert entry for new extent @@ -2600,7 +2621,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) /* * find the matching entry; xtSearch() pins the page */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) + if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) return rc; XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); @@ -2852,7 +2873,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ */ if (xtype == DATAEXT) { /* search in leaf entry */ - rc = xtSearch(ip, xoff, &cmp, &btstack, 0); + rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); if (rc) return rc; @@ -2958,7 +2979,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ } /* get back parent page */ - if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) + if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) return rc; XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); @@ -3991,7 +4012,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) if (committed_size) { xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; - rc = xtSearch(ip, xoff, &cmp, &btstack, 0); + rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); if (rc) return rc; diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 2eb6869..c6dc254 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -209,6 +209,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ txQuiesce(sb); + /* Reset size of direct inode */ + sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size; + if (sbi->mntflag & JFS_INLINELOG) { /* * deactivate old inline log diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 5856866..5e774ed 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -210,6 +210,10 @@ static void jfs_put_super(struct super_block *sb) unload_nls(sbi->nls_tab); sbi->nls_tab = NULL; + truncate_inode_pages(sbi->direct_inode->i_mapping, 0); + iput(sbi->direct_inode); + sbi->direct_inode = NULL; + kfree(sbi); } @@ -358,6 +362,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + /* + * Invalidate any previously read metadata. fsck may have + * changed the on-disk data since we mounted r/o + */ + truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); + JFS_SBI(sb)->flag = flag; return jfs_mount_rw(sb, 1); } @@ -428,12 +438,26 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &jfs_super_operations; sb->s_export_op = &jfs_export_operations; + /* + * Initialize direct-mapping inode/address-space + */ + inode = new_inode(sb); + if (inode == NULL) + goto out_kfree; + inode->i_ino = 0; + inode->i_nlink = 1; + inode->i_size = sb->s_bdev->bd_inode->i_size; + inode->i_mapping->a_ops = &jfs_metapage_aops; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + sbi->direct_inode = inode; + rc = jfs_mount(sb); if (rc) { if (!silent) { jfs_err("jfs_mount failed w/return code = %d", rc); } - goto out_kfree; + goto out_mount_failed; } if (sb->s_flags & MS_RDONLY) sbi->log = NULL; @@ -482,6 +506,13 @@ out_no_rw: if (rc) { jfs_err("jfs_umount failed with return code %d", rc); } +out_mount_failed: + filemap_fdatawrite(sbi->direct_inode->i_mapping); + filemap_fdatawait(sbi->direct_inode->i_mapping); + truncate_inode_pages(sbi->direct_inode->i_mapping, 0); + make_bad_inode(sbi->direct_inode); + iput(sbi->direct_inode); + sbi->direct_inode = NULL; out_kfree: if (sbi->nls_tab) unload_nls(sbi->nls_tab); @@ -527,8 +558,10 @@ static int jfs_sync_fs(struct super_block *sb, int wait) struct jfs_log *log = JFS_SBI(sb)->log; /* log == NULL indicates read-only mount */ - if (log) + if (log) { jfs_flush_journal(log, wait); + jfs_syncpt(log); + } return 0; } |