diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 83 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 6 | ||||
-rw-r--r-- | drivers/md/dm.c | 196 | ||||
-rw-r--r-- | drivers/md/md.c | 6 | ||||
-rw-r--r-- | drivers/md/raid1.c | 30 | ||||
-rw-r--r-- | drivers/md/raid5.c | 2 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 2 |
8 files changed, 170 insertions, 157 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 0c2ed99..41df4cd 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -108,7 +108,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) { unsigned char *page; -#if INJECT_FAULTS_1 +#ifdef INJECT_FAULTS_1 page = NULL; #else page = kmalloc(PAGE_SIZE, GFP_NOIO); @@ -818,8 +818,7 @@ int bitmap_unplug(struct bitmap *bitmap) return 0; } -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int in_sync); +static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset); /* * bitmap_init_from_disk -- called at bitmap_create time to initialize * the in-memory bitmap from the on-disk bitmap -- also, sets up the * memory mapping of the bitmap file @@ -828,7 +827,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, * previously kicked from the array, we mark all the bits as * 1's in order to cause a full resync. */ -static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) +static int bitmap_init_from_disk(struct bitmap *bitmap) { unsigned long i, chunks, index, oldindex, bit; struct page *page = NULL, *oldpage = NULL; @@ -843,7 +842,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) BUG_ON(!file && !bitmap->offset); -#if INJECT_FAULTS_3 +#ifdef INJECT_FAULTS_3 outofdate = 1; #else outofdate = bitmap->flags & BITMAP_STALE; @@ -929,8 +928,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) } if (test_bit(bit, page_address(page))) { /* if the disk bit is set, set the memory bit */ - bitmap_set_memory_bits(bitmap, - i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync); + bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap)); bit_cnt++; } } @@ -1187,7 +1185,7 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr, spin_unlock_irqrestore(&bitmap->lock, flags); -#if INJECT_FATAL_FAULT_2 +#ifdef INJECT_FATAL_FAULT_2 daemon = NULL; #else sprintf(namebuf, "%%s_%s", name); @@ -1426,35 +1424,53 @@ void bitmap_close_sync(struct bitmap *bitmap) } } -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int in_sync) +static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset) { /* For each chunk covered by any of these sectors, set the - * counter to 1 and set resync_needed unless in_sync. They should all + * counter to 1 and set resync_needed. They should all * be 0 at this point */ - while (sectors) { - int secs; - bitmap_counter_t *bmc; - spin_lock_irq(&bitmap->lock); - bmc = bitmap_get_counter(bitmap, offset, &secs, 1); - if (!bmc) { - spin_unlock_irq(&bitmap->lock); - return; - } - if (! *bmc) { - struct page *page; - *bmc = 1 | (in_sync? 0 : NEEDED_MASK); - bitmap_count_page(bitmap, offset, 1); - page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); - set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); - } + + int secs; + bitmap_counter_t *bmc; + spin_lock_irq(&bitmap->lock); + bmc = bitmap_get_counter(bitmap, offset, &secs, 1); + if (!bmc) { spin_unlock_irq(&bitmap->lock); - if (sectors > secs) - sectors -= secs; - else - sectors = 0; + return; + } + if (! *bmc) { + struct page *page; + *bmc = 1 | NEEDED_MASK; + bitmap_count_page(bitmap, offset, 1); + page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); + set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } + spin_unlock_irq(&bitmap->lock); + +} + +/* + * flush out any pending updates + */ +void bitmap_flush(mddev_t *mddev) +{ + struct bitmap *bitmap = mddev->bitmap; + int sleep; + + if (!bitmap) /* there was no bitmap */ + return; + + /* run the daemon_work three time to ensure everything is flushed + * that can be + */ + sleep = bitmap->daemon_sleep; + bitmap->daemon_sleep = 0; + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap->daemon_sleep = sleep; + bitmap_update_sb(bitmap); } /* @@ -1552,7 +1568,7 @@ int bitmap_create(mddev_t *mddev) bitmap->syncchunk = ~0UL; -#if INJECT_FATAL_FAULT_1 +#ifdef INJECT_FATAL_FAULT_1 bitmap->bp = NULL; #else bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); @@ -1565,7 +1581,8 @@ int bitmap_create(mddev_t *mddev) /* now that we have some pages available, initialize the in-memory * bitmap from the on-disk bitmap */ - err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector); + err = bitmap_init_from_disk(bitmap); + if (err) return err; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 12031c9..b08df8b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1230,7 +1230,7 @@ static int __init dm_mirror_init(void) if (r) return r; - _kmirrord_wq = create_workqueue("kmirrord"); + _kmirrord_wq = create_singlethread_workqueue("kmirrord"); if (!_kmirrord_wq) { DMERR("couldn't start kmirrord"); dm_dirty_log_exit(); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a5a4c0e..a6d3baa 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -869,11 +869,17 @@ static void suspend_targets(struct dm_table *t, unsigned postsuspend) void dm_table_presuspend_targets(struct dm_table *t) { + if (!t) + return; + return suspend_targets(t, 0); } void dm_table_postsuspend_targets(struct dm_table *t) { + if (!t) + return; + return suspend_targets(t, 1); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 54fabbf..d487d9d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -55,10 +55,10 @@ union map_info *dm_get_mapinfo(struct bio *bio) */ #define DMF_BLOCK_IO 0 #define DMF_SUSPENDED 1 -#define DMF_FS_LOCKED 2 struct mapped_device { - struct rw_semaphore lock; + struct rw_semaphore io_lock; + struct semaphore suspend_lock; rwlock_t map_lock; atomic_t holders; @@ -248,16 +248,16 @@ static inline void free_tio(struct mapped_device *md, struct target_io *tio) */ static int queue_io(struct mapped_device *md, struct bio *bio) { - down_write(&md->lock); + down_write(&md->io_lock); if (!test_bit(DMF_BLOCK_IO, &md->flags)) { - up_write(&md->lock); + up_write(&md->io_lock); return 1; } bio_list_add(&md->deferred, bio); - up_write(&md->lock); + up_write(&md->io_lock); return 0; /* deferred successfully */ } @@ -568,14 +568,14 @@ static int dm_request(request_queue_t *q, struct bio *bio) int r; struct mapped_device *md = q->queuedata; - down_read(&md->lock); + down_read(&md->io_lock); /* * If we're suspended we have to queue * this io for later. */ while (test_bit(DMF_BLOCK_IO, &md->flags)) { - up_read(&md->lock); + up_read(&md->io_lock); if (bio_rw(bio) == READA) { bio_io_error(bio, bio->bi_size); @@ -594,11 +594,11 @@ static int dm_request(request_queue_t *q, struct bio *bio) * We're in a while loop, because someone could suspend * before we get to the following read lock. */ - down_read(&md->lock); + down_read(&md->io_lock); } __split_bio(md, bio); - up_read(&md->lock); + up_read(&md->io_lock); return 0; } @@ -610,7 +610,7 @@ static int dm_flush_all(request_queue_t *q, struct gendisk *disk, int ret = -ENXIO; if (map) { - ret = dm_table_flush_all(md->map); + ret = dm_table_flush_all(map); dm_table_put(map); } @@ -747,7 +747,8 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent) goto bad1; memset(md, 0, sizeof(*md)); - init_rwsem(&md->lock); + init_rwsem(&md->io_lock); + init_MUTEX(&md->suspend_lock); rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->event_nr, 0); @@ -825,18 +826,13 @@ static void event_callback(void *context) wake_up(&md->eventq); } -static void __set_size(struct gendisk *disk, sector_t size) +static void __set_size(struct mapped_device *md, sector_t size) { - struct block_device *bdev; - - set_capacity(disk, size); - bdev = bdget_disk(disk, 0); - if (bdev) { - down(&bdev->bd_inode->i_sem); - i_size_write(bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); - up(&bdev->bd_inode->i_sem); - bdput(bdev); - } + set_capacity(md->disk, size); + + down(&md->frozen_bdev->bd_inode->i_sem); + i_size_write(md->frozen_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); + up(&md->frozen_bdev->bd_inode->i_sem); } static int __bind(struct mapped_device *md, struct dm_table *t) @@ -845,17 +841,18 @@ static int __bind(struct mapped_device *md, struct dm_table *t) sector_t size; size = dm_table_get_size(t); - __set_size(md->disk, size); + __set_size(md, size); if (size == 0) return 0; + dm_table_get(t); + dm_table_event_callback(t, event_callback, md); + write_lock(&md->map_lock); md->map = t; + dm_table_set_restrictions(t, q); write_unlock(&md->map_lock); - dm_table_get(t); - dm_table_event_callback(md->map, event_callback, md); - dm_table_set_restrictions(t, q); return 0; } @@ -935,7 +932,7 @@ void dm_put(struct mapped_device *md) struct dm_table *map = dm_get_table(md); if (atomic_dec_and_test(&md->holders)) { - if (!test_bit(DMF_SUSPENDED, &md->flags) && map) { + if (!dm_suspended(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } @@ -968,17 +965,17 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) { int r = -EINVAL; - down_write(&md->lock); + down(&md->suspend_lock); /* device must be suspended */ - if (!test_bit(DMF_SUSPENDED, &md->flags)) + if (!dm_suspended(md)) goto out; __unbind(md); r = __bind(md, table); out: - up_write(&md->lock); + up(&md->suspend_lock); return r; } @@ -986,16 +983,13 @@ out: * Functions to lock and unlock any filesystem running on the * device. */ -static int __lock_fs(struct mapped_device *md) +static int lock_fs(struct mapped_device *md) { - int error = -ENOMEM; - - if (test_and_set_bit(DMF_FS_LOCKED, &md->flags)) - return 0; + int r = -ENOMEM; md->frozen_bdev = bdget_disk(md->disk, 0); if (!md->frozen_bdev) { - DMWARN("bdget failed in __lock_fs"); + DMWARN("bdget failed in lock_fs"); goto out; } @@ -1003,13 +997,13 @@ static int __lock_fs(struct mapped_device *md) md->frozen_sb = freeze_bdev(md->frozen_bdev); if (IS_ERR(md->frozen_sb)) { - error = PTR_ERR(md->frozen_sb); + r = PTR_ERR(md->frozen_sb); goto out_bdput; } /* don't bdput right now, we don't want the bdev * to go away while it is locked. We'll bdput - * in __unlock_fs + * in unlock_fs */ return 0; @@ -1018,15 +1012,11 @@ out_bdput: md->frozen_sb = NULL; md->frozen_bdev = NULL; out: - clear_bit(DMF_FS_LOCKED, &md->flags); - return error; + return r; } -static void __unlock_fs(struct mapped_device *md) +static void unlock_fs(struct mapped_device *md) { - if (!test_and_clear_bit(DMF_FS_LOCKED, &md->flags)) - return; - thaw_bdev(md->frozen_bdev, md->frozen_sb); bdput(md->frozen_bdev); @@ -1043,50 +1033,37 @@ static void __unlock_fs(struct mapped_device *md) */ int dm_suspend(struct mapped_device *md) { - struct dm_table *map; + struct dm_table *map = NULL; DECLARE_WAITQUEUE(wait, current); - int error = -EINVAL; + int r = -EINVAL; - /* Flush I/O to the device. */ - down_read(&md->lock); - if (test_bit(DMF_BLOCK_IO, &md->flags)) - goto out_read_unlock; + down(&md->suspend_lock); + + if (dm_suspended(md)) + goto out; map = dm_get_table(md); - if (map) - /* This does not get reverted if there's an error later. */ - dm_table_presuspend_targets(map); - error = __lock_fs(md); - if (error) { - dm_table_put(map); - goto out_read_unlock; - } + /* This does not get reverted if there's an error later. */ + dm_table_presuspend_targets(map); - up_read(&md->lock); + /* Flush I/O to the device. */ + r = lock_fs(md); + if (r) + goto out; /* * First we set the BLOCK_IO flag so no more ios will be mapped. - * - * If the flag is already set we know another thread is trying to - * suspend as well, so we leave the fs locked for this thread. */ - error = -EINVAL; - down_write(&md->lock); - if (test_and_set_bit(DMF_BLOCK_IO, &md->flags)) { - if (map) - dm_table_put(map); - goto out_write_unlock; - } + down_write(&md->io_lock); + set_bit(DMF_BLOCK_IO, &md->flags); add_wait_queue(&md->wait, &wait); - up_write(&md->lock); + up_write(&md->io_lock); /* unplug */ - if (map) { + if (map) dm_table_unplug_all(map); - dm_table_put(map); - } /* * Then we wait for the already mapped ios to @@ -1102,62 +1079,67 @@ int dm_suspend(struct mapped_device *md) } set_current_state(TASK_RUNNING); - down_write(&md->lock); + down_write(&md->io_lock); remove_wait_queue(&md->wait, &wait); /* were we interrupted ? */ - error = -EINTR; - if (atomic_read(&md->pending)) - goto out_unfreeze; - - set_bit(DMF_SUSPENDED, &md->flags); + r = -EINTR; + if (atomic_read(&md->pending)) { + up_write(&md->io_lock); + unlock_fs(md); + clear_bit(DMF_BLOCK_IO, &md->flags); + goto out; + } + up_write(&md->io_lock); - map = dm_get_table(md); - if (map) - dm_table_postsuspend_targets(map); - dm_table_put(map); - up_write(&md->lock); + dm_table_postsuspend_targets(map); - return 0; + set_bit(DMF_SUSPENDED, &md->flags); -out_unfreeze: - __unlock_fs(md); - clear_bit(DMF_BLOCK_IO, &md->flags); -out_write_unlock: - up_write(&md->lock); - return error; + r = 0; -out_read_unlock: - up_read(&md->lock); - return error; +out: + dm_table_put(map); + up(&md->suspend_lock); + return r; } int dm_resume(struct mapped_device *md) { + int r = -EINVAL; struct bio *def; - struct dm_table *map = dm_get_table(md); + struct dm_table *map = NULL; - down_write(&md->lock); - if (!map || - !test_bit(DMF_SUSPENDED, &md->flags) || - !dm_table_get_size(map)) { - up_write(&md->lock); - dm_table_put(map); - return -EINVAL; - } + down(&md->suspend_lock); + if (!dm_suspended(md)) + goto out; + + map = dm_get_table(md); + if (!map || !dm_table_get_size(map)) + goto out; dm_table_resume_targets(map); - clear_bit(DMF_SUSPENDED, &md->flags); + + down_write(&md->io_lock); clear_bit(DMF_BLOCK_IO, &md->flags); def = bio_list_get(&md->deferred); __flush_deferred_io(md, def); - up_write(&md->lock); - __unlock_fs(md); + up_write(&md->io_lock); + + unlock_fs(md); + + clear_bit(DMF_SUSPENDED, &md->flags); + dm_table_unplug_all(map); + + r = 0; + +out: dm_table_put(map); + up(&md->suspend_lock); - return 0; + return r; } /*----------------------------------------------------------------- diff --git a/drivers/md/md.c b/drivers/md/md.c index 4a0c57d..480f658 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -284,7 +284,7 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) return NULL; } -inline static sector_t calc_dev_sboffset(struct block_device *bdev) +static inline sector_t calc_dev_sboffset(struct block_device *bdev) { sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; return MD_NEW_SIZE_BLOCKS(size); @@ -1798,6 +1798,8 @@ static int do_md_stop(mddev_t * mddev, int ro) goto out; mddev->ro = 1; } else { + bitmap_flush(mddev); + wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); if (mddev->ro) set_disk_ro(disk, 0); blk_queue_make_request(mddev->queue, md_fail_request); @@ -3484,7 +3486,6 @@ static void md_do_sync(mddev_t *mddev) goto skip; } ITERATE_MDDEV(mddev2,tmp) { - printk("."); if (mddev2 == mddev) continue; if (mddev2->curr_resync && @@ -4007,3 +4008,4 @@ EXPORT_SYMBOL(md_wakeup_thread); EXPORT_SYMBOL(md_print_devices); EXPORT_SYMBOL(md_check_recovery); MODULE_LICENSE("GPL"); +MODULE_ALIAS("md"); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5f253ee..51d9645 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -893,7 +893,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) if (!uptodate) { md_error(r1_bio->mddev, conf->mirrors[r1_bio->read_disk].rdev); - set_bit(R1BIO_Degraded, &r1_bio->state); } else set_bit(R1BIO_Uptodate, &r1_bio->state); rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); @@ -918,10 +917,9 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) mirror = i; break; } - if (!uptodate) { + if (!uptodate) md_error(mddev, conf->mirrors[mirror].rdev); - set_bit(R1BIO_Degraded, &r1_bio->state); - } + update_head_pos(mirror, r1_bio); if (atomic_dec_and_test(&r1_bio->remaining)) { @@ -1109,6 +1107,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int i; int write_targets = 0; int sync_blocks; + int still_degraded = 0; if (!conf->r1buf_pool) { @@ -1137,7 +1136,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return 0; } - if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, mddev->degraded) && + /* before building a request, check if we can skip these blocks.. + * This call the bitmap_start_sync doesn't actually record anything + */ + if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && !conf->fullsync) { /* We can skip this block, and probably several more */ *skipped = 1; @@ -1203,24 +1205,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (i == disk) { bio->bi_rw = READ; bio->bi_end_io = end_sync_read; - } else if (conf->mirrors[i].rdev && - !conf->mirrors[i].rdev->faulty && - (!conf->mirrors[i].rdev->in_sync || - sector_nr + RESYNC_SECTORS > mddev->recovery_cp)) { + } else if (conf->mirrors[i].rdev == NULL || + conf->mirrors[i].rdev->faulty) { + still_degraded = 1; + continue; + } else if (!conf->mirrors[i].rdev->in_sync || + sector_nr + RESYNC_SECTORS > mddev->recovery_cp) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else + /* no need to read or write here */ continue; bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_private = r1_bio; } - if (write_targets + 1 < conf->raid_disks) - /* array degraded, can't clear bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); - if (write_targets == 0) { /* There is nowhere to write, so all non-sync * drives must be failed - so we are finished @@ -1243,7 +1244,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i break; if (sync_blocks == 0) { if (!bitmap_start_sync(mddev->bitmap, sector_nr, - &sync_blocks, mddev->degraded) && + &sync_blocks, still_degraded) && !conf->fullsync) break; if (sync_blocks < (PAGE_SIZE>>9)) @@ -1468,6 +1469,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->size = mddev->array_size; + mddev->resync_max_sectors = sectors; return 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 93a9726..43f231a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1653,6 +1653,7 @@ static int run (mddev_t *mddev) /* device size must be a multiple of chunk size */ mddev->size &= ~(mddev->chunk_size/1024 -1); + mddev->resync_max_sectors = mddev->size << 1; if (!conf->chunk_size || conf->chunk_size % 4) { printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", @@ -1931,6 +1932,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->size = sectors /2; + mddev->resync_max_sectors = sectors; return 0; } diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index f62ea1a..495dee1 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1813,6 +1813,7 @@ static int run (mddev_t *mddev) /* device size must be a multiple of chunk size */ mddev->size &= ~(mddev->chunk_size/1024 -1); + mddev->resync_max_sectors = mddev->size << 1; if (conf->raid_disks < 4) { printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", @@ -2095,6 +2096,7 @@ static int raid6_resize(mddev_t *mddev, sector_t sectors) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } mddev->size = sectors /2; + mddev->resync_max_sectors = sectors; return 0; } |