diff options
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 232 |
1 files changed, 169 insertions, 63 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ab7a9e6..7c52d68 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -300,7 +300,9 @@ struct cfq_data { /* List of cfq groups being managed on this device*/ struct hlist_head cfqg_list; - struct rcu_head rcu; + + /* Number of groups which are on blkcg->blkg_list */ + unsigned int nr_blkcg_linked_grps; }; static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); @@ -665,15 +667,11 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, if (rq2 == NULL) return rq1; - if (rq_is_sync(rq1) && !rq_is_sync(rq2)) - return rq1; - else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) - return rq2; - if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) - return rq1; - else if ((rq2->cmd_flags & REQ_META) && - !(rq1->cmd_flags & REQ_META)) - return rq2; + if (rq_is_sync(rq1) != rq_is_sync(rq2)) + return rq_is_sync(rq1) ? rq1 : rq2; + + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) + return rq1->cmd_flags & REQ_META ? rq1 : rq2; s1 = blk_rq_pos(rq1); s2 = blk_rq_pos(rq2); @@ -1014,28 +1012,47 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, cfqg->needs_update = true; } -static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, - struct blkio_cgroup *blkcg, int create) +static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, + struct cfq_group *cfqg, struct blkio_cgroup *blkcg) { - struct cfq_group *cfqg = NULL; - void *key = cfqd; - int i, j; - struct cfq_rb_root *st; struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; unsigned int major, minor; - cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); - if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { + /* + * Add group onto cgroup list. It might happen that bdi->dev is + * not initialized yet. Initialize this new group without major + * and minor info and this info will be filled in once a new thread + * comes for IO. + */ + if (bdi->dev) { sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); - cfqg->blkg.dev = MKDEV(major, minor); - goto done; - } - if (cfqg || !create) - goto done; + cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, + (void *)cfqd, MKDEV(major, minor)); + } else + cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, + (void *)cfqd, 0); + + cfqd->nr_blkcg_linked_grps++; + cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); + + /* Add group on cfqd list */ + hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); +} + +/* + * Should be called from sleepable context. No request queue lock as per + * cpu stats are allocated dynamically and alloc_percpu needs to be called + * from sleepable context. + */ +static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) +{ + struct cfq_group *cfqg = NULL; + int i, j, ret; + struct cfq_rb_root *st; cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); if (!cfqg) - goto done; + return NULL; for_each_cfqg_st(cfqg, i, j, st) *st = CFQ_RB_ROOT; @@ -1049,43 +1066,94 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, */ cfqg->ref = 1; + ret = blkio_alloc_blkg_stats(&cfqg->blkg); + if (ret) { + kfree(cfqg); + return NULL; + } + + return cfqg; +} + +static struct cfq_group * +cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg) +{ + struct cfq_group *cfqg = NULL; + void *key = cfqd; + struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; + unsigned int major, minor; + /* - * Add group onto cgroup list. It might happen that bdi->dev is - * not initialized yet. Initialize this new group without major - * and minor info and this info will be filled in once a new thread - * comes for IO. See code above. + * This is the common case when there are no blkio cgroups. + * Avoid lookup in this case */ - if (bdi->dev) { - sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, - MKDEV(major, minor)); - } else - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, - 0); - - cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); + if (blkcg == &blkio_root_cgroup) + cfqg = &cfqd->root_group; + else + cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); - /* Add group on cfqd list */ - hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); + if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); + cfqg->blkg.dev = MKDEV(major, minor); + } -done: return cfqg; } /* - * Search for the cfq group current task belongs to. If create = 1, then also - * create the cfq group if it does not exist. request_queue lock must be held. + * Search for the cfq group current task belongs to. request_queue lock must + * be held. */ -static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) +static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) { struct blkio_cgroup *blkcg; - struct cfq_group *cfqg = NULL; + struct cfq_group *cfqg = NULL, *__cfqg = NULL; + struct request_queue *q = cfqd->queue; rcu_read_lock(); blkcg = task_blkio_cgroup(current); - cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create); - if (!cfqg && create) + cfqg = cfq_find_cfqg(cfqd, blkcg); + if (cfqg) { + rcu_read_unlock(); + return cfqg; + } + + /* + * Need to allocate a group. Allocation of group also needs allocation + * of per cpu stats which in-turn takes a mutex() and can block. Hence + * we need to drop rcu lock and queue_lock before we call alloc. + * + * Not taking any queue reference here and assuming that queue is + * around by the time we return. CFQ queue allocation code does + * the same. It might be racy though. + */ + + rcu_read_unlock(); + spin_unlock_irq(q->queue_lock); + + cfqg = cfq_alloc_cfqg(cfqd); + + spin_lock_irq(q->queue_lock); + + rcu_read_lock(); + blkcg = task_blkio_cgroup(current); + + /* + * If some other thread already allocated the group while we were + * not holding queue lock, free up the group + */ + __cfqg = cfq_find_cfqg(cfqd, blkcg); + + if (__cfqg) { + kfree(cfqg); + rcu_read_unlock(); + return __cfqg; + } + + if (!cfqg) cfqg = &cfqd->root_group; + + cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg); rcu_read_unlock(); return cfqg; } @@ -1118,6 +1186,7 @@ static void cfq_put_cfqg(struct cfq_group *cfqg) return; for_each_cfqg_st(cfqg, i, j, st) BUG_ON(!RB_EMPTY_ROOT(&st->rb)); + free_percpu(cfqg->blkg.stats_cpu); kfree(cfqg); } @@ -1176,7 +1245,7 @@ void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) } #else /* GROUP_IOSCHED */ -static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) +static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) { return &cfqd->root_group; } @@ -1210,7 +1279,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_rb_root *service_tree; int left; int new_cfqq = 1; - int group_changed = 0; service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), cfqq_type(cfqq)); @@ -1281,7 +1349,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, rb_link_node(&cfqq->rb_node, parent, p); rb_insert_color(&cfqq->rb_node, &service_tree->rb); service_tree->count++; - if ((add_front || !new_cfqq) && !group_changed) + if (add_front || !new_cfqq) return; cfq_group_notify_queue_add(cfqd, cfqq->cfqg); } @@ -2029,7 +2097,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); - return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); + return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio); } /* @@ -2911,7 +2979,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_group *cfqg; retry: - cfqg = cfq_get_cfqg(cfqd, 1); + cfqg = cfq_get_cfqg(cfqd); cic = cfq_cic_lookup(cfqd, ioc); /* cic always exists here */ cfqq = cic_to_cfqq(cic, is_sync); @@ -3815,15 +3883,11 @@ static void cfq_put_async_queues(struct cfq_data *cfqd) cfq_put_queue(cfqd->async_idle_cfqq); } -static void cfq_cfqd_free(struct rcu_head *head) -{ - kfree(container_of(head, struct cfq_data, rcu)); -} - static void cfq_exit_queue(struct elevator_queue *e) { struct cfq_data *cfqd = e->elevator_data; struct request_queue *q = cfqd->queue; + bool wait = false; cfq_shutdown_timer_wq(cfqd); @@ -3842,7 +3906,13 @@ static void cfq_exit_queue(struct elevator_queue *e) cfq_put_async_queues(cfqd); cfq_release_cfq_groups(cfqd); - cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); + + /* + * If there are groups which we could not unlink from blkcg list, + * wait for a rcu period for them to be freed. + */ + if (cfqd->nr_blkcg_linked_grps) + wait = true; spin_unlock_irq(q->queue_lock); @@ -3852,8 +3922,25 @@ static void cfq_exit_queue(struct elevator_queue *e) ida_remove(&cic_index_ida, cfqd->cic_index); spin_unlock(&cic_index_lock); - /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ - call_rcu(&cfqd->rcu, cfq_cfqd_free); + /* + * Wait for cfqg->blkg->key accessors to exit their grace periods. + * Do this wait only if there are other unlinked groups out + * there. This can happen if cgroup deletion path claimed the + * responsibility of cleaning up a group before queue cleanup code + * get to the group. + * + * Do not call synchronize_rcu() unconditionally as there are drivers + * which create/delete request queue hundreds of times during scan/boot + * and synchronize_rcu() can take significant time and slow down boot. + */ + if (wait) + synchronize_rcu(); + +#ifdef CONFIG_CFQ_GROUP_IOSCHED + /* Free up per cpu stats for root group */ + free_percpu(cfqd->root_group.blkg.stats_cpu); +#endif + kfree(cfqd); } static int cfq_alloc_cic_index(void) @@ -3886,8 +3973,12 @@ static void *cfq_init_queue(struct request_queue *q) return NULL; cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); - if (!cfqd) + if (!cfqd) { + spin_lock(&cic_index_lock); + ida_remove(&cic_index_ida, i); + spin_unlock(&cic_index_lock); return NULL; + } /* * Don't need take queue_lock in the routine, since we are @@ -3909,14 +4000,29 @@ static void *cfq_init_queue(struct request_queue *q) #ifdef CONFIG_CFQ_GROUP_IOSCHED /* - * Take a reference to root group which we never drop. This is just - * to make sure that cfq_put_cfqg() does not try to kfree root group + * Set root group reference to 2. One reference will be dropped when + * all groups on cfqd->cfqg_list are being deleted during queue exit. + * Other reference will remain there as we don't want to delete this + * group as it is statically allocated and gets destroyed when + * throtl_data goes away. */ - cfqg->ref = 1; + cfqg->ref = 2; + + if (blkio_alloc_blkg_stats(&cfqg->blkg)) { + kfree(cfqg); + kfree(cfqd); + return NULL; + } + rcu_read_lock(); + cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd, 0); rcu_read_unlock(); + cfqd->nr_blkcg_linked_grps++; + + /* Add group on cfqd->cfqg_list */ + hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); #endif /* * Not strictly needed (since RB_ROOT just clears the node and we |