diff options
-rw-r--r-- | block/blk-cgroup.c | 2 | ||||
-rw-r--r-- | block/blk-core.c | 127 | ||||
-rw-r--r-- | block/blk-exec.c | 4 | ||||
-rw-r--r-- | block/blk-lib.c | 26 | ||||
-rw-r--r-- | block/blk-settings.c | 6 | ||||
-rw-r--r-- | block/blk-sysfs.c | 4 | ||||
-rw-r--r-- | block/blk-throttle.c | 2 | ||||
-rw-r--r-- | block/blk.h | 4 | ||||
-rw-r--r-- | block/bsg-lib.c | 13 | ||||
-rw-r--r-- | block/cfq-iosched.c | 3 | ||||
-rw-r--r-- | block/deadline-iosched.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 16 | ||||
-rw-r--r-- | block/genhd.c | 2 | ||||
-rw-r--r-- | block/partitions/Kconfig | 4 | ||||
-rw-r--r-- | drivers/scsi/scsi_lib.c | 2 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 4 | ||||
-rw-r--r-- | include/linux/blkdev.h | 17 | ||||
-rw-r--r-- | include/linux/bsg-lib.h | 1 | ||||
-rw-r--r-- | mm/backing-dev.c | 84 |
19 files changed, 224 insertions, 99 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3f6d39d..b8858fb 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -231,7 +231,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); + return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); return __blkg_lookup_create(blkcg, q, NULL); } EXPORT_SYMBOL_GPL(blkg_lookup_create); diff --git a/block/blk-core.c b/block/blk-core.c index 3c95c4d..c973249 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -40,6 +40,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); @@ -219,12 +220,13 @@ static void blk_delay_work(struct work_struct *work) * Description: * Sometimes queueing needs to be postponed for a little while, to allow * resources to come back. This function will make sure that queueing is - * restarted around the specified time. + * restarted around the specified time. Queue lock must be held. */ void blk_delay_queue(struct request_queue *q, unsigned long msecs) { - queue_delayed_work(kblockd_workqueue, &q->delay_work, - msecs_to_jiffies(msecs)); + if (likely(!blk_queue_dead(q))) + queue_delayed_work(kblockd_workqueue, &q->delay_work, + msecs_to_jiffies(msecs)); } EXPORT_SYMBOL(blk_delay_queue); @@ -293,6 +295,34 @@ void blk_sync_queue(struct request_queue *q) EXPORT_SYMBOL(blk_sync_queue); /** + * __blk_run_queue_uncond - run a queue whether or not it has been stopped + * @q: The queue to run + * + * Description: + * Invoke request handling on a queue if there are any pending requests. + * May be used to restart request handling after a request has completed. + * This variant runs the queue whether or not the queue has been + * stopped. Must be called with the queue lock held and interrupts + * disabled. See also @blk_run_queue. + */ +inline void __blk_run_queue_uncond(struct request_queue *q) +{ + if (unlikely(blk_queue_dead(q))) + return; + + /* + * Some request_fn implementations, e.g. scsi_request_fn(), unlock + * the queue lock internally. As a result multiple threads may be + * running such a request function concurrently. Keep track of the + * number of active request_fn invocations such that blk_drain_queue() + * can wait until all these request_fn calls have finished. + */ + q->request_fn_active++; + q->request_fn(q); + q->request_fn_active--; +} + +/** * __blk_run_queue - run a single device queue * @q: The queue to run * @@ -305,7 +335,7 @@ void __blk_run_queue(struct request_queue *q) if (unlikely(blk_queue_stopped(q))) return; - q->request_fn(q); + __blk_run_queue_uncond(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -315,11 +345,11 @@ EXPORT_SYMBOL(__blk_run_queue); * * Description: * Tells kblockd to perform the equivalent of @blk_run_queue on behalf - * of us. + * of us. The caller must hold the queue lock. */ void blk_run_queue_async(struct request_queue *q) { - if (likely(!blk_queue_stopped(q))) + if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q))) mod_delayed_work(kblockd_workqueue, &q->delay_work, 0); } EXPORT_SYMBOL(blk_run_queue_async); @@ -349,7 +379,7 @@ void blk_put_queue(struct request_queue *q) EXPORT_SYMBOL(blk_put_queue); /** - * blk_drain_queue - drain requests from request_queue + * __blk_drain_queue - drain requests from request_queue * @q: queue to drain * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV * @@ -357,15 +387,17 @@ EXPORT_SYMBOL(blk_put_queue); * If not, only ELVPRIV requests are drained. The caller is responsible * for ensuring that no new requests which need to be drained are queued. */ -void blk_drain_queue(struct request_queue *q, bool drain_all) +static void __blk_drain_queue(struct request_queue *q, bool drain_all) + __releases(q->queue_lock) + __acquires(q->queue_lock) { int i; + lockdep_assert_held(q->queue_lock); + while (true) { bool drain = false; - spin_lock_irq(q->queue_lock); - /* * The caller might be trying to drain @q before its * elevator is initialized. @@ -386,6 +418,7 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) __blk_run_queue(q); drain |= q->nr_rqs_elvpriv; + drain |= q->request_fn_active; /* * Unfortunately, requests are queued at and tracked from @@ -401,11 +434,14 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) } } - spin_unlock_irq(q->queue_lock); - if (!drain) break; + + spin_unlock_irq(q->queue_lock); + msleep(10); + + spin_lock_irq(q->queue_lock); } /* @@ -416,13 +452,9 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) if (q->request_fn) { struct request_list *rl; - spin_lock_irq(q->queue_lock); - blk_queue_for_each_rl(rl, q) for (i = 0; i < ARRAY_SIZE(rl->wait); i++) wake_up_all(&rl->wait[i]); - - spin_unlock_irq(q->queue_lock); } } @@ -446,7 +478,10 @@ void blk_queue_bypass_start(struct request_queue *q) spin_unlock_irq(q->queue_lock); if (drain) { - blk_drain_queue(q, false); + spin_lock_irq(q->queue_lock); + __blk_drain_queue(q, false); + spin_unlock_irq(q->queue_lock); + /* ensure blk_queue_bypass() is %true inside RCU read lock */ synchronize_rcu(); } @@ -473,20 +508,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end); * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown * - * Mark @q DEAD, drain all pending requests, destroy and put it. All - * future requests will be failed immediately with -ENODEV. + * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and + * put it. All future requests will be failed immediately with -ENODEV. */ void blk_cleanup_queue(struct request_queue *q) { spinlock_t *lock = q->queue_lock; - /* mark @q DEAD, no new request or merges will be allowed afterwards */ + /* mark @q DYING, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); - queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); + queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); spin_lock_irq(lock); /* - * Dead queue is permanently in bypass mode till released. Note + * A dying queue is permanently in bypass mode till released. Note * that, unlike blk_queue_bypass_start(), we aren't performing * synchronize_rcu() after entering bypass mode to avoid the delay * as some drivers create and destroy a lot of queues while @@ -499,12 +534,18 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set(QUEUE_FLAG_NOMERGES, q); queue_flag_set(QUEUE_FLAG_NOXMERGES, q); - queue_flag_set(QUEUE_FLAG_DEAD, q); + queue_flag_set(QUEUE_FLAG_DYING, q); spin_unlock_irq(lock); mutex_unlock(&q->sysfs_lock); - /* drain all requests queued before DEAD marking */ - blk_drain_queue(q, true); + /* + * Drain all requests queued before DYING marking. Set DEAD flag to + * prevent that q->request_fn() gets invoked after draining finished. + */ + spin_lock_irq(lock); + __blk_drain_queue(q, true); + queue_flag_set(QUEUE_FLAG_DEAD, q); + spin_unlock_irq(lock); /* @q won't process any more request, flush async actions */ del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); @@ -549,7 +590,7 @@ void blk_exit_rl(struct request_list *rl) struct request_queue *blk_alloc_queue(gfp_t gfp_mask) { - return blk_alloc_queue_node(gfp_mask, -1); + return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE); } EXPORT_SYMBOL(blk_alloc_queue); @@ -660,7 +701,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node); struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { - return blk_init_queue_node(rfn, lock, -1); + return blk_init_queue_node(rfn, lock, NUMA_NO_NODE); } EXPORT_SYMBOL(blk_init_queue); @@ -716,7 +757,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue); bool blk_get_queue(struct request_queue *q) { - if (likely(!blk_queue_dead(q))) { + if (likely(!blk_queue_dying(q))) { __blk_get_queue(q); return true; } @@ -870,7 +911,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, const bool is_sync = rw_is_sync(rw_flags) != 0; int may_queue; - if (unlikely(blk_queue_dead(q))) + if (unlikely(blk_queue_dying(q))) return NULL; may_queue = elv_may_queue(q, rw_flags); @@ -1050,7 +1091,7 @@ retry: if (rq) return rq; - if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) { + if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { blk_put_rl(rl); return NULL; } @@ -1910,7 +1951,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) return -EIO; spin_lock_irqsave(q->queue_lock, flags); - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_dying(q))) { spin_unlock_irqrestore(q->queue_lock, flags); return -ENODEV; } @@ -2884,27 +2925,11 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, { trace_block_unplug(q, depth, !from_schedule); - /* - * Don't mess with dead queue. - */ - if (unlikely(blk_queue_dead(q))) { - spin_unlock(q->queue_lock); - return; - } - - /* - * If we are punting this to kblockd, then we can safely drop - * the queue_lock before waking kblockd (which needs to take - * this lock). - */ - if (from_schedule) { - spin_unlock(q->queue_lock); + if (from_schedule) blk_run_queue_async(q); - } else { + else __blk_run_queue(q); - spin_unlock(q->queue_lock); - } - + spin_unlock(q->queue_lock); } static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) @@ -2996,7 +3021,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) /* * Short-circuit if @q is dead */ - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_dying(q))) { __blk_end_request_all(rq, -ENODEV); continue; } diff --git a/block/blk-exec.c b/block/blk-exec.c index f71eac3..74638ec 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -66,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, spin_lock_irq(q->queue_lock); - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_dying(q))) { rq->errors = -ENXIO; if (rq->end_io) rq->end_io(rq, rq->errors); @@ -78,7 +78,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, __blk_run_queue(q); /* the queue is stopped so it won't be run */ if (is_pm_resume) - q->request_fn(q); + __blk_run_queue_uncond(q); spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); diff --git a/block/blk-lib.c b/block/blk-lib.c index 9373b58..b3a1f2b7 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -43,11 +43,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); int type = REQ_WRITE | REQ_DISCARD; - unsigned int max_discard_sectors; - unsigned int granularity, alignment, mask; + sector_t max_discard_sectors; + sector_t granularity, alignment; struct bio_batch bb; struct bio *bio; int ret = 0; + struct blk_plug plug; if (!q) return -ENXIO; @@ -57,15 +58,16 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* Zero-sector (unknown) and one-sector granularities are the same. */ granularity = max(q->limits.discard_granularity >> 9, 1U); - mask = granularity - 1; - alignment = (bdev_discard_alignment(bdev) >> 9) & mask; + alignment = bdev_discard_alignment(bdev) >> 9; + alignment = sector_div(alignment, granularity); /* * Ensure that max_discard_sectors is of the proper * granularity, so that requests stay aligned after a split. */ max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); - max_discard_sectors = round_down(max_discard_sectors, granularity); + sector_div(max_discard_sectors, granularity); + max_discard_sectors *= granularity; if (unlikely(!max_discard_sectors)) { /* Avoid infinite loop below. Being cautious never hurts. */ return -EOPNOTSUPP; @@ -81,9 +83,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bb.flags = 1 << BIO_UPTODATE; bb.wait = &wait; + blk_start_plug(&plug); while (nr_sects) { unsigned int req_sects; - sector_t end_sect; + sector_t end_sect, tmp; bio = bio_alloc(gfp_mask, 1); if (!bio) { @@ -98,10 +101,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, * misaligned, stop the discard at the previous aligned sector. */ end_sect = sector + req_sects; - if (req_sects < nr_sects && (end_sect & mask) != alignment) { - end_sect = - round_down(end_sect - alignment, granularity) - + alignment; + tmp = end_sect; + if (req_sects < nr_sects && + sector_div(tmp, granularity) != alignment) { + end_sect = end_sect - alignment; + sector_div(end_sect, granularity); + end_sect = end_sect * granularity + alignment; req_sects = end_sect - sector; } @@ -117,6 +122,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, atomic_inc(&bb.done); submit_bio(type, bio); } + blk_finish_plug(&plug); /* Wait for bios in-flight */ if (!atomic_dec_and_test(&bb.done)) diff --git a/block/blk-settings.c b/block/blk-settings.c index 779bb76..c50ecf0 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -611,7 +611,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, bottom = b->discard_granularity + alignment; /* Verify that top and bottom intervals line up */ - if (max(top, bottom) & (min(top, bottom) - 1)) + if ((max(top, bottom) % min(top, bottom)) != 0) t->discard_misaligned = 1; } @@ -619,8 +619,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_discard_sectors); t->discard_granularity = max(t->discard_granularity, b->discard_granularity); - t->discard_alignment = lcm(t->discard_alignment, alignment) & - (t->discard_granularity - 1); + t->discard_alignment = lcm(t->discard_alignment, alignment) % + t->discard_granularity; } return ret; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ce62046..7881477 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -466,7 +466,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; mutex_lock(&q->sysfs_lock); - if (blk_queue_dead(q)) { + if (blk_queue_dying(q)) { mutex_unlock(&q->sysfs_lock); return -ENOENT; } @@ -488,7 +488,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, q = container_of(kobj, struct request_queue, kobj); mutex_lock(&q->sysfs_lock); - if (blk_queue_dead(q)) { + if (blk_queue_dying(q)) { mutex_unlock(&q->sysfs_lock); return -ENOENT; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a9664fa..3114622 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -302,7 +302,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, /* if %NULL and @q is alive, fall back to root_tg */ if (!IS_ERR(blkg)) tg = blkg_to_tg(blkg); - else if (!blk_queue_dead(q)) + else if (!blk_queue_dying(q)) tg = td_root_tg(td); } diff --git a/block/blk.h b/block/blk.h index ca51543..47fdfdd 100644 --- a/block/blk.h +++ b/block/blk.h @@ -96,7 +96,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) q->flush_queue_delayed = 1; return NULL; } - if (unlikely(blk_queue_dead(q)) || + if (unlikely(blk_queue_dying(q)) || !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) return NULL; } @@ -145,6 +145,8 @@ int blk_try_merge(struct request *rq, struct bio *bio); void blk_queue_congestion_threshold(struct request_queue *q); +void __blk_run_queue_uncond(struct request_queue *q); + int blk_dev_init(void); diff --git a/block/bsg-lib.c b/block/bsg-lib.c index deee61f..650f427 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -151,19 +151,6 @@ failjob_rls_job: return -ENOMEM; } -/* - * bsg_goose_queue - restart queue in case it was stopped - * @q: request q to be restarted - */ -void bsg_goose_queue(struct request_queue *q) -{ - if (!q) - return; - - blk_run_queue_async(q); -} -EXPORT_SYMBOL_GPL(bsg_goose_queue); - /** * bsg_request_fn - generic handler for bsg requests * @q: request queue to manage diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index fb52df9..e62e920 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1973,7 +1973,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, * reposition in fifo if next is older than rq */ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && - time_before(rq_fifo_time(next), rq_fifo_time(rq))) { + time_before(rq_fifo_time(next), rq_fifo_time(rq)) && + cfqq == RQ_CFQQ(next)) { list_move(&rq->queuelist, &next->queuelist); rq_set_fifo_time(rq, rq_fifo_time(next)); } diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 599b12e..90037b5 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -230,7 +230,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) /* * rq is expired! */ - if (time_after(jiffies, rq_fifo_time(rq))) + if (time_after_eq(jiffies, rq_fifo_time(rq))) return 1; return 0; diff --git a/block/elevator.c b/block/elevator.c index 9b1d42b..9edba1b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -458,6 +458,7 @@ static bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) { struct request *__rq; + bool ret; if (blk_queue_nomerges(q)) return false; @@ -471,14 +472,21 @@ static bool elv_attempt_insert_merge(struct request_queue *q, if (blk_queue_noxmerges(q)) return false; + ret = false; /* * See if our hash lookup can find a potential backmerge. */ - __rq = elv_rqhash_find(q, blk_rq_pos(rq)); - if (__rq && blk_attempt_req_merge(q, __rq, rq)) - return true; + while (1) { + __rq = elv_rqhash_find(q, blk_rq_pos(rq)); + if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) + break; - return false; + /* The merged request could be merged with others, try again */ + ret = true; + rq = __rq; + } + + return ret; } void elv_merged_request(struct request_queue *q, struct request *rq, int type) diff --git a/block/genhd.c b/block/genhd.c index 6cace66..2a6fdf5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1245,7 +1245,7 @@ EXPORT_SYMBOL(blk_lookup_devt); struct gendisk *alloc_disk(int minors) { - return alloc_disk_node(minors, -1); + return alloc_disk_node(minors, NUMA_NO_NODE); } EXPORT_SYMBOL(alloc_disk); diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig index cb5f0a3..75a54e1 100644 --- a/block/partitions/Kconfig +++ b/block/partitions/Kconfig @@ -234,8 +234,8 @@ config KARMA_PARTITION uses a proprietary partition table. config EFI_PARTITION - bool "EFI GUID Partition support" - depends on PARTITION_ADVANCED + bool "EFI GUID Partition support" if PARTITION_ADVANCED + default y select CRC32 help Say Y here if you would like to use hard disks under Linux which diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9032e91..f1bf5af 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1418,7 +1418,7 @@ static int scsi_lld_busy(struct request_queue *q) struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost; - if (blk_queue_dead(q)) + if (blk_queue_dying(q)) return 0; shost = sdev->host; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2a9a9ab..238521a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,6 +18,7 @@ #include <linux/writeback.h> #include <linux/atomic.h> #include <linux/sysctl.h> +#include <linux/mutex.h> struct page; struct device; @@ -105,6 +106,9 @@ struct backing_dev_info { struct timer_list laptop_mode_wb_timer; + cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */ + struct mutex flusher_cpumask_lock; + #ifdef CONFIG_DEBUG_FS struct dentry *debug_dir; struct dentry *debug_stats; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1756001..acb4f7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -378,6 +378,12 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; + /* + * Number of active block driver functions for which blk_drain_queue() + * must wait. Must be incremented around functions that unlock the + * queue_lock internally, e.g. scsi_request_fn(). + */ + unsigned int request_fn_active; unsigned int rq_timeout; struct timer_list timeout; @@ -437,7 +443,7 @@ struct request_queue { #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ -#define QUEUE_FLAG_DEAD 5 /* queue being torn down */ +#define QUEUE_FLAG_DYING 5 /* queue being torn down */ #define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ @@ -452,6 +458,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ #define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ +#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -521,6 +528,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) +#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) @@ -1180,13 +1188,14 @@ static inline int queue_discard_alignment(struct request_queue *q) static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) { - unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); + sector_t alignment = sector << 9; + alignment = sector_div(alignment, lim->discard_granularity); if (!lim->max_discard_sectors) return 0; - return (lim->discard_granularity + lim->discard_alignment - alignment) - & (lim->discard_granularity - 1); + alignment = lim->discard_granularity + lim->discard_alignment - alignment; + return sector_div(alignment, lim->discard_granularity); } static inline int bdev_discard_alignment(struct block_device *bdev) diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 4d0fb3d..a226652 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -67,6 +67,5 @@ void bsg_job_done(struct bsg_job *job, int result, int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, bsg_job_fn *job_fn, int dd_job_size); void bsg_request_fn(struct request_queue *q); -void bsg_goose_queue(struct request_queue *q); #endif diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca2b3..bd6a6ca 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/writeback.h> #include <linux/device.h> +#include <linux/slab.h> #include <trace/events/writeback.h> static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); @@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio) +static ssize_t cpu_list_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + struct bdi_writeback *wb = &bdi->wb; + cpumask_var_t newmask; + ssize_t ret; + struct task_struct *task; + + if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) + return -ENOMEM; + + ret = cpulist_parse(buf, newmask); + if (!ret) { + spin_lock_bh(&bdi->wb_lock); + task = wb->task; + if (task) + get_task_struct(task); + spin_unlock_bh(&bdi->wb_lock); + + mutex_lock(&bdi->flusher_cpumask_lock); + if (task) { + ret = set_cpus_allowed_ptr(task, newmask); + put_task_struct(task); + } + if (ret == 0) { + cpumask_copy(bdi->flusher_cpumask, newmask); + ret = count; + } + mutex_unlock(&bdi->flusher_cpumask_lock); + + } + free_cpumask_var(newmask); + + return ret; +} + +static ssize_t cpu_list_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + ssize_t ret; + + mutex_lock(&bdi->flusher_cpumask_lock); + ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask); + mutex_unlock(&bdi->flusher_cpumask_lock); + + return ret; +} + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), + __ATTR_RW(cpu_list), __ATTR_NULL, }; @@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr) writeback_inodes_wb(&bdi->wb, 1024, WB_REASON_FORKER_THREAD); } else { + int ret; /* * The spinlock makes sure we do not lose * wake-ups when racing with 'bdi_queue_work()'. @@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr) spin_lock_bh(&bdi->wb_lock); bdi->wb.task = task; spin_unlock_bh(&bdi->wb_lock); + mutex_lock(&bdi->flusher_cpumask_lock); + ret = set_cpus_allowed_ptr(task, + bdi->flusher_cpumask); + mutex_unlock(&bdi->flusher_cpumask_lock); + if (ret) + printk_once("%s: failed to bind flusher" + " thread %s, error %d\n", + __func__, task->comm, ret); wake_up_process(task); } bdi_clear_pending(bdi); @@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, dev_name(dev)); if (IS_ERR(wb->task)) return PTR_ERR(wb->task); + } else { + int node; + /* + * Set up a default cpumask for the flusher threads that + * includes all cpus on the same numa node as the device. + * The mask may be overridden via sysfs. + */ + node = dev_to_node(bdi->dev); + if (node != NUMA_NO_NODE) + cpumask_copy(bdi->flusher_cpumask, + cpumask_of_node(node)); } bdi_debug_register(bdi, dev_name(dev)); @@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi) bdi_wb_init(&bdi->wb, bdi); + if (!bdi_cap_flush_forker(bdi)) { + bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + if (!bdi->flusher_cpumask) + return -ENOMEM; + cpumask_setall(bdi->flusher_cpumask); + mutex_init(&bdi->flusher_cpumask_lock); + } else + bdi->flusher_cpumask = NULL; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init(&bdi->bdi_stat[i], 0); if (err) @@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi) err: while (i--) percpu_counter_destroy(&bdi->bdi_stat[i]); + kfree(bdi->flusher_cpumask); } return err; @@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); + kfree(bdi->flusher_cpumask); + /* * If bdi_unregister() had already been called earlier, the * wakeup_timer could still be armed because bdi_prune_sb() |