aboutsummaryrefslogtreecommitdiffstats
path: root/block/bfq.h
blob: e2ce5052a01c49f1bcf1495bb41c288d0c51beac (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
/*
 * BFQ-v5 for 3.0: data structures and common functions prototypes.
 *
 * Based on ideas and code from CFQ:
 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
 *
 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
 *		      Paolo Valente <paolo.valente@unimore.it>
 */

#ifndef _BFQ_H
#define _BFQ_H

#include <linux/blktrace_api.h>
#include <linux/hrtimer.h>
#include <linux/ioprio.h>
#include <linux/rbtree.h>

#define BFQ_IOPRIO_CLASSES	3
#define BFQ_CL_IDLE_TIMEOUT	HZ/5

#define BFQ_MIN_WEIGHT	1
#define BFQ_MAX_WEIGHT	1000

#define BFQ_DEFAULT_GRP_WEIGHT	10
#define BFQ_DEFAULT_GRP_IOPRIO	0
#define BFQ_DEFAULT_GRP_CLASS	IOPRIO_CLASS_BE

struct bfq_entity;

/**
 * struct bfq_service_tree - per ioprio_class service tree.
 * @active: tree for active entities (i.e., those backlogged).
 * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i).
 * @first_idle: idle entity with minimum F_i.
 * @last_idle: idle entity with maximum F_i.
 * @vtime: scheduler virtual time.
 * @wsum: scheduler weight sum; active and idle entities contribute to it.
 *
 * Each service tree represents a B-WF2Q+ scheduler on its own.  Each
 * ioprio_class has its own independent scheduler, and so its own
 * bfq_service_tree.  All the fields are protected by the queue lock
 * of the containing bfqd.
 */
struct bfq_service_tree {
	struct rb_root active;
	struct rb_root idle;

	struct bfq_entity *first_idle;
	struct bfq_entity *last_idle;

	u64 vtime;
	unsigned long wsum;
};

/**
 * struct bfq_sched_data - multi-class scheduler.
 * @active_entity: entity under service.
 * @next_active: head-of-the-line entity in the scheduler.
 * @service_tree: array of service trees, one per ioprio_class.
 *
 * bfq_sched_data is the basic scheduler queue.  It supports three
 * ioprio_classes, and can be used either as a toplevel queue or as
 * an intermediate queue on a hierarchical setup.
 * @next_active points to the active entity of the sched_data service
 * trees that will be scheduled next.
 *
 * The supported ioprio_classes are the same as in CFQ, in descending
 * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
 * Requests from higher priority queues are served before all the
 * requests from lower priority queues; among requests of the same
 * queue requests are served according to B-WF2Q+.
 * All the fields are protected by the queue lock of the containing bfqd.
 */
struct bfq_sched_data {
	struct bfq_entity *active_entity;
	struct bfq_entity *next_active;
	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
};

/**
 * struct bfq_entity - schedulable entity.
 * @rb_node: service_tree member.
 * @on_st: flag, true if the entity is on a tree (either the active or
 *         the idle one of its service_tree).
 * @finish: B-WF2Q+ finish timestamp (aka F_i).
 * @start: B-WF2Q+ start timestamp (aka S_i).
 * @tree: tree the entity is enqueued into; %NULL if not on a tree.
 * @min_start: minimum start time of the (active) subtree rooted at
 *             this entity; used for O(log N) lookups into active trees.
 * @service: service received during the last round of service.
 * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight.
 * @weight: weight of the queue
 * @parent: parent entity, for hierarchical scheduling.
 * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the
 *                 associated scheduler queue, %NULL on leaf nodes.
 * @sched_data: the scheduler queue this entity belongs to.
 * @ioprio: the ioprio in use.
 * @new_weight: when a weight change is requested, the new weight value.
 * @orig_weight: original weight, used to implement weight boosting
 * @new_ioprio: when an ioprio change is requested, the new ioprio value.
 * @ioprio_class: the ioprio_class in use.
 * @new_ioprio_class: when an ioprio_class change is requested, the new
 *                    ioprio_class value.
 * @ioprio_changed: flag, true when the user requested a weight, ioprio or
 *                  ioprio_class change.
 *
 * A bfq_entity is used to represent either a bfq_queue (leaf node in the
 * cgroup hierarchy) or a bfq_group into the upper level scheduler.  Each
 * entity belongs to the sched_data of the parent group in the cgroup
 * hierarchy.  Non-leaf entities have also their own sched_data, stored
 * in @my_sched_data.
 *
 * Each entity stores independently its priority values; this would
 * allow different weights on different devices, but this
 * functionality is not exported to userspace by now.  Priorities and
 * weights are updated lazily, first storing the new values into the
 * new_* fields, then setting the @ioprio_changed flag.  As soon as
 * there is a transition in the entity state that allows the priority
 * update to take place the effective and the requested priority
 * values are synchronized.
 *
 * Unless cgroups are used, the weight value is calculated from the
 * ioprio to export the same interface as CFQ.  When dealing with
 * ``well-behaved'' queues (i.e., queues that do not spend too much
 * time to consume their budget and have true sequential behavior, and
 * when there are no external factors breaking anticipation) the
 * relative weights at each level of the cgroups hierarchy should be
 * guaranteed.  All the fields are protected by the queue lock of the
 * containing bfqd.
 */
struct bfq_entity {
	struct rb_node rb_node;

	int on_st;

	u64 finish;
	u64 start;

	struct rb_root *tree;

	u64 min_start;

	unsigned long service, budget;
	unsigned short weight, new_weight;
	unsigned short orig_weight;

	struct bfq_entity *parent;

	struct bfq_sched_data *my_sched_data;
	struct bfq_sched_data *sched_data;

	unsigned short ioprio, new_ioprio;
	unsigned short ioprio_class, new_ioprio_class;

	int ioprio_changed;
};

struct bfq_group;

/**
 * struct bfq_queue - leaf schedulable entity.
 * @ref: reference counter.
 * @bfqd: parent bfq_data.
 * @new_bfqq: shared bfq_queue if queue is cooperating with
 *           one or more other queues.
 * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).
 * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).
 * @sort_list: sorted list of pending requests.
 * @next_rq: if fifo isn't expired, next request to serve.
 * @queued: nr of requests queued in @sort_list.
 * @allocated: currently allocated requests.
 * @meta_pending: pending metadata requests.
 * @fifo: fifo list of requests in sort_list.
 * @entity: entity representing this queue in the scheduler.
 * @max_budget: maximum budget allowed from the feedback mechanism.
 * @budget_timeout: budget expiration (in jiffies).
 * @dispatched: number of requests on the dispatch list or inside driver.
 * @org_ioprio: saved ioprio during boosted periods.
 * @org_ioprio_class: saved ioprio_class during boosted periods.
 * @flags: status flags.
 * @bfqq_list: node for active/idle bfqq list inside our bfqd.
 * @seek_samples: number of seeks sampled
 * @seek_total: sum of the distances of the seeks sampled
 * @seek_mean: mean seek distance
 * @last_request_pos: position of the last request enqueued
 * @pid: pid of the process owning the queue, used for logging purposes.
 * @last_rais_start_time: last (idle -> weight-raised) transition attempt
 * @raising_cur_max_time: current max raising time for this queue
 *
 * A bfq_queue is a leaf request queue; it can be associated to an io_context
 * or more (if it is an async one).  @cgroup holds a reference to the
 * cgroup, to be sure that it does not disappear while a bfqq still
 * references it (mostly to avoid races between request issuing and task
 * migration followed by cgroup distruction).
 * All the fields are protected by the queue lock of the containing bfqd.
 */
struct bfq_queue {
	atomic_t ref;
	struct bfq_data *bfqd;

	/* fields for cooperating queues handling */
	struct bfq_queue *new_bfqq;
	struct rb_node pos_node;
	struct rb_root *pos_root;

	struct rb_root sort_list;
	struct request *next_rq;
	int queued[2];
	int allocated[2];
	int meta_pending;
	struct list_head fifo;

	struct bfq_entity entity;

	unsigned long max_budget;
	unsigned long budget_timeout;

	int dispatched;

	unsigned short org_ioprio;
	unsigned short org_ioprio_class;

	unsigned int flags;

	struct list_head bfqq_list;

	unsigned int seek_samples;
	u64 seek_total;
	sector_t seek_mean;
	sector_t last_request_pos;

	pid_t pid;

	/* weight-raising fields */
	unsigned int raising_cur_max_time;
	u64 last_rais_start_finish, soft_rt_next_start;
	unsigned int raising_coeff;
};

/**
 * struct bfq_data - per device data structure.
 * @queue: request queue for the managed device.
 * @root_group: root bfq_group for the device.
 * @rq_pos_tree: rbtree sorted by next_request position,
 *		used when determining if two or more queues
 *		have interleaving requests (see bfq_close_cooperator).
 * @busy_queues: number of bfq_queues containing requests (including the
 *		 queue under service, even if it is idling).
 * @queued: number of queued requests.
 * @rq_in_driver: number of requests dispatched and waiting for completion.
 * @sync_flight: number of sync requests in the driver.
 * @max_rq_in_driver: max number of reqs in driver in the last @hw_tag_samples
 *		      completed requests .
 * @hw_tag_samples: nr of samples used to calculate hw_tag.
 * @hw_tag: flag set to one if the driver is showing a queueing behavior.
 * @budgets_assigned: number of budgets assigned.
 * @idle_slice_timer: timer set when idling for the next sequential request
 *                    from the queue under service.
 * @unplug_work: delayed work to restart dispatching on the request queue.
 * @active_queue: bfq_queue under service.
 * @active_cic: cfq_io_context (cic) associated with the @active_queue.
 * @last_position: on-disk position of the last served request.
 * @last_budget_start: beginning of the last budget.
 * @last_idling_start: beginning of the last idle slice.
 * @peak_rate: peak transfer rate observed for a budget.
 * @peak_rate_samples: number of samples used to calculate @peak_rate.
 * @bfq_max_budget: maximum budget allotted to a bfq_queue before rescheduling.
 * @cic_index: use small consequent indexes as radix tree keys to reduce depth
 * @cic_list: list of all the cics active on the bfq_data device.
 * @group_list: list of all the bfq_groups active on the device.
 * @active_list: list of all the bfq_queues active on the device.
 * @idle_list: list of all the bfq_queues idle on the device.
 * @bfq_quantum: max number of requests dispatched per dispatch round.
 * @bfq_fifo_expire: timeout for async/sync requests; when it expires
 *                   requests are served in fifo order.
 * @bfq_back_penalty: weight of backward seeks wrt forward ones.
 * @bfq_back_max: maximum allowed backward seek.
 * @bfq_slice_idle: maximum idling time.
 * @bfq_user_max_budget: user-configured max budget value (0 for auto-tuning).
 * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to
 *                           async queues.
 * @bfq_timeout: timeout for bfq_queues to consume their budget; used to
 *               to prevent seeky queues to impose long latencies to well
 *               behaved ones (this also implies that seeky queues cannot
 *               receive guarantees in the service domain; after a timeout
 *               they are charged for the whole allocated budget, to try
 *               to preserve a behavior reasonably fair among them, but
 *               without service-domain guarantees).
 * @bfq_raising_coeff: Maximum factor by which the weight of a boosted
 *                            queue is multiplied
 * @bfq_raising_max_time: maximum duration of a weight-raising period (jiffies)
 * @bfq_raising_rt_max_time: maximum duration for soft real-time processes
 * @bfq_raising_min_idle_time: minimum idle period after which weight-raising
 *			       may be reactivated for a queue (in jiffies)
 * @bfq_raising_min_inter_arr_async: minimum period between request arrivals
 *                                   after which weight-raising may be
 *                                   reactivated for an already busy queue
 *                                   (in jiffies)
 * @bfq_raising_max_softrt_rate: max service-rate for a soft real-time queue,
 *			         sectors per seconds
 * @RT_prod: cached value of the product R*T used for computing the maximum
 * 	     duration of the weight raising automatically
 * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions
 *
 * All the fields are protected by the @queue lock.
 */
struct bfq_data {
	struct request_queue *queue;

	struct bfq_group *root_group;

	struct rb_root rq_pos_tree;

	int busy_queues;
	int queued;
	int rq_in_driver;
	int sync_flight;

	int max_rq_in_driver;
	int hw_tag_samples;
	int hw_tag;

	int budgets_assigned;

	struct timer_list idle_slice_timer;
	struct work_struct unplug_work;

	struct bfq_queue *active_queue;
	struct cfq_io_context *active_cic;

	sector_t last_position;

	ktime_t last_budget_start;
	ktime_t last_idling_start;
	int peak_rate_samples;
	u64 peak_rate;
	unsigned long bfq_max_budget;

	unsigned int cic_index;
	struct list_head cic_list;
	struct hlist_head group_list;
	struct list_head active_list;
	struct list_head idle_list;

	unsigned int bfq_quantum;
	unsigned int bfq_fifo_expire[2];
	unsigned int bfq_back_penalty;
	unsigned int bfq_back_max;
	unsigned int bfq_slice_idle;
	u64 bfq_class_idle_last_service;

	unsigned int bfq_user_max_budget;
	unsigned int bfq_max_budget_async_rq;
	unsigned int bfq_timeout[2];

	bool low_latency;

	/* parameters of the low_latency heuristics */
	unsigned int bfq_raising_coeff;
	unsigned int bfq_raising_max_time;
	unsigned int bfq_raising_rt_max_time;
	unsigned int bfq_raising_min_idle_time;
	unsigned int bfq_raising_min_inter_arr_async;
	unsigned int bfq_raising_max_softrt_rate;
	u64 RT_prod;

	struct bfq_queue oom_bfqq;
};

enum bfqq_state_flags {
	BFQ_BFQQ_FLAG_busy = 0,		/* has requests or is under service */
	BFQ_BFQQ_FLAG_wait_request,	/* waiting for a request */
	BFQ_BFQQ_FLAG_must_alloc,	/* must be allowed rq alloc */
	BFQ_BFQQ_FLAG_fifo_expire,	/* FIFO checked in this slice */
	BFQ_BFQQ_FLAG_idle_window,	/* slice idling enabled */
	BFQ_BFQQ_FLAG_prio_changed,	/* task priority has changed */
	BFQ_BFQQ_FLAG_sync,		/* synchronous queue */
	BFQ_BFQQ_FLAG_budget_new,	/* no completion with this budget */
	BFQ_BFQQ_FLAG_coop,		/* bfqq is shared */
	BFQ_BFQQ_FLAG_split_coop,	/* shared bfqq will be splitted */
	BFQ_BFQQ_FLAG_some_coop_idle,   /* some cooperator is inactive */
};

#define BFQ_BFQQ_FNS(name)						\
static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq)		\
{									\
	(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name);			\
}									\
static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq)	\
{									\
	(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name);			\
}									\
static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq)		\
{									\
	return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0;	\
}

BFQ_BFQQ_FNS(busy);
BFQ_BFQQ_FNS(wait_request);
BFQ_BFQQ_FNS(must_alloc);
BFQ_BFQQ_FNS(fifo_expire);
BFQ_BFQQ_FNS(idle_window);
BFQ_BFQQ_FNS(prio_changed);
BFQ_BFQQ_FNS(sync);
BFQ_BFQQ_FNS(budget_new);
BFQ_BFQQ_FNS(coop);
BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(some_coop_idle);
#undef BFQ_BFQQ_FNS

/* Logging facilities. */
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
	blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args)

#define bfq_log(bfqd, fmt, args...) \
	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)

/* Expiration reasons. */
enum bfqq_expiration {
	BFQ_BFQQ_TOO_IDLE = 0,		/* queue has been idling for too long */
	BFQ_BFQQ_BUDGET_TIMEOUT,	/* budget took too long to be used */
	BFQ_BFQQ_BUDGET_EXHAUSTED,	/* budget consumed */
	BFQ_BFQQ_NO_MORE_REQUESTS,	/* the queue has no more requests */
};

#ifdef CONFIG_CGROUP_BFQIO
/**
 * struct bfq_group - per (device, cgroup) data structure.
 * @entity: schedulable entity to insert into the parent group sched_data.
 * @sched_data: own sched_data, to contain child entities (they may be
 *              both bfq_queues and bfq_groups).
 * @group_node: node to be inserted into the bfqio_cgroup->group_data
 *              list of the containing cgroup's bfqio_cgroup.
 * @bfqd_node: node to be inserted into the @bfqd->group_list list
 *             of the groups active on the same device; used for cleanup.
 * @bfqd: the bfq_data for the device this group acts upon.
 * @async_bfqq: array of async queues for all the tasks belonging to
 *              the group, one queue per ioprio value per ioprio_class,
 *              except for the idle class that has only one queue.
 * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
 * @my_entity: pointer to @entity, %NULL for the toplevel group; used
 *             to avoid too many special cases during group creation/migration.
 *
 * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
 * there is a set of bfq_groups, each one collecting the lower-level
 * entities belonging to the group that are acting on the same device.
 *
 * Locking works as follows:
 *    o @group_node is protected by the bfqio_cgroup lock, and is accessed
 *      via RCU from its readers.
 *    o @bfqd is protected by the queue lock, RCU is used to access it
 *      from the readers.
 *    o All the other fields are protected by the @bfqd queue lock.
 */
struct bfq_group {
	struct bfq_entity entity;
	struct bfq_sched_data sched_data;

	struct hlist_node group_node;
	struct hlist_node bfqd_node;

	void *bfqd;

	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
	struct bfq_queue *async_idle_bfqq;

	struct bfq_entity *my_entity;
};

/**
 * struct bfqio_cgroup - bfq cgroup data structure.
 * @css: subsystem state for bfq in the containing cgroup.
 * @weight: cgroup weight.
 * @ioprio: cgroup ioprio.
 * @ioprio_class: cgroup ioprio_class.
 * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.
 * @group_data: list containing the bfq_group belonging to this cgroup.
 *
 * @group_data is accessed using RCU, with @lock protecting the updates,
 * @ioprio and @ioprio_class are protected by @lock.
 */
struct bfqio_cgroup {
	struct cgroup_subsys_state css;

	unsigned short weight, ioprio, ioprio_class;

	spinlock_t lock;
	struct hlist_head group_data;
};
#else
struct bfq_group {
	struct bfq_sched_data sched_data;

	struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
	struct bfq_queue *async_idle_bfqq;
};
#endif

static inline struct bfq_service_tree *
bfq_entity_service_tree(struct bfq_entity *entity)
{
	struct bfq_sched_data *sched_data = entity->sched_data;
	unsigned int idx = entity->ioprio_class - 1;

	BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
	BUG_ON(sched_data == NULL);

	return sched_data->service_tree + idx;
}

static inline struct bfq_queue *cic_to_bfqq(struct cfq_io_context *cic,
					    int is_sync)
{
	return cic->cfqq[!!is_sync];
}

static inline void cic_set_bfqq(struct cfq_io_context *cic,
				struct bfq_queue *bfqq, int is_sync)
{
	cic->cfqq[!!is_sync] = bfqq;
}

static inline void call_for_each_cic(struct io_context *ioc,
				     void (*func)(struct io_context *,
				     struct cfq_io_context *))
{
	struct cfq_io_context *cic;
	struct hlist_node *n;

	rcu_read_lock();
	hlist_for_each_entry_rcu(cic, n, &ioc->bfq_cic_list, cic_list)
		func(ioc, cic);
	rcu_read_unlock();
}

#define CIC_DEAD_KEY    1ul
#define CIC_DEAD_INDEX_SHIFT    1

static inline void *bfqd_dead_key(struct bfq_data *bfqd)
{
	return (void *)(bfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
}

/**
 * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer.
 * @ptr: a pointer to a bfqd.
 * @flags: storage for the flags to be saved.
 *
 * This function allows cic->key and bfqg->bfqd to be protected by the
 * queue lock of the bfqd they reference; the pointer is dereferenced
 * under RCU, so the storage for bfqd is assured to be safe as long
 * as the RCU read side critical section does not end.  After the
 * bfqd->queue->queue_lock is taken the pointer is rechecked, to be
 * sure that no other writer accessed it.  If we raced with a writer,
 * the function returns NULL, with the queue unlocked, otherwise it
 * returns the dereferenced pointer, with the queue locked.
 */
static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,
						   unsigned long *flags)
{
	struct bfq_data *bfqd;

	rcu_read_lock();
	bfqd = rcu_dereference(*(struct bfq_data **)ptr);

	if (bfqd != NULL && !((unsigned long) bfqd & CIC_DEAD_KEY)) {
		spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
		if (*ptr == bfqd)
			goto out;
		spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
	}

	bfqd = NULL;
out:
	rcu_read_unlock();
	return bfqd;
}

static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,
				       unsigned long *flags)
{
	spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
}

static void bfq_changed_ioprio(struct io_context *ioc,
			       struct cfq_io_context *cic);
static void bfq_put_queue(struct bfq_queue *bfqq);
static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
				       struct bfq_group *bfqg, int is_sync,
				       struct io_context *ioc, gfp_t gfp_mask);
static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
#endif