aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2011-07-23 20:44:25 +0200
committerZiyan <jaraidaniel@gmail.com>2016-01-08 10:36:24 +0100
commit487300d6fc8c815a33992128bc21685b724fa1e2 (patch)
tree79fea951a0eb83039bd8cfc43260ca4d704bba8e
parentea0fd17caaac52dfd0ddc157a54e0d6fea35d18e (diff)
downloadkernel_samsung_tuna-487300d6fc8c815a33992128bc21685b724fa1e2.zip
kernel_samsung_tuna-487300d6fc8c815a33992128bc21685b724fa1e2.tar.gz
kernel_samsung_tuna-487300d6fc8c815a33992128bc21685b724fa1e2.tar.bz2
block: strict rq_affinity
Some systems benefit from completions always being steered to the strict requester cpu rather than the looser "per-socket" steering that blk_cpu_to_group() attempts by default. This is because the first CPU in the group mask ends up being completely overloaded with work, while the others (including the original submitter) has power left to spare. Allow the strict mode to be set by writing '2' to the sysfs control file. This is identical to the scheme used for the nomerges file, where '2' is a more aggressive setting than just being turned on. echo 2 > /sys/block/<bdev>/queue/rq_affinity Cc: Christoph Hellwig <hch@infradead.org> Cc: Roland Dreier <roland@purestorage.com> Tested-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com> Conflicts: include/linux/blkdev.h Change-Id: I9073251d611e61cdd6163975618c2a6d5bbbe45c
-rw-r--r--Documentation/block/queue-sysfs.txt10
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-softirq.c11
-rw-r--r--block/blk-sysfs.c13
-rw-r--r--include/linux/blkdev.h3
5 files changed, 27 insertions, 16 deletions
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index f652740..d8147b3 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -45,9 +45,13 @@ device.
rq_affinity (RW)
----------------
-If this option is enabled, the block layer will migrate request completions
-to the CPU that originally submitted the request. For some workloads
-this provides a significant reduction in CPU cycles due to caching effects.
+If this option is '1', the block layer will migrate request completions to the
+cpu "group" that originally submitted the request. For some workloads this
+provides a significant reduction in CPU cycles due to caching effects.
+
+For storage configurations that need to maximize distribution of completion
+processing setting this option to '2' forces the completion to run on the
+requesting cpu (bypassing the "group" aggregation logic).
scheduler (RW)
--------------
diff --git a/block/blk-core.c b/block/blk-core.c
index f249a21..2ce72a7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1228,10 +1228,8 @@ get_rq:
init_request_from_bio(req, bio);
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
- bio_flagged(bio, BIO_CPU_AFFINE)) {
- req->cpu = blk_cpu_to_group(get_cpu());
- put_cpu();
- }
+ bio_flagged(bio, BIO_CPU_AFFINE))
+ req->cpu = smp_processor_id();
plug = current->plug;
if (plug) {
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index ee9c216..475fab8 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,22 +103,25 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
void __blk_complete_request(struct request *req)
{
+ int ccpu, cpu, group_cpu = NR_CPUS;
struct request_queue *q = req->q;
unsigned long flags;
- int ccpu, cpu, group_cpu;
BUG_ON(!q->softirq_done_fn);
local_irq_save(flags);
cpu = smp_processor_id();
- group_cpu = blk_cpu_to_group(cpu);
/*
* Select completion CPU
*/
- if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
ccpu = req->cpu;
- else
+ if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
+ ccpu = blk_cpu_to_group(ccpu);
+ group_cpu = blk_cpu_to_group(cpu);
+ }
+ } else
ccpu = cpu;
if (ccpu == cpu || ccpu == group_cpu) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cb0f1a0..54ac101 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -246,8 +246,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
+ bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
- return queue_var_show(set, page);
+ return queue_var_show(set << force, page);
}
static ssize_t
@@ -259,10 +260,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
- if (val)
+ if (val) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
- else
- queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ if (val == 2)
+ queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+ } else {
+ queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ }
spin_unlock_irq(q->queue_lock);
#endif
return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f71d91d..e94e7e6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -393,7 +393,7 @@ struct request_queue
#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */
#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */
+#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */
#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */
#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */
@@ -403,6 +403,7 @@ struct request_queue
#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */
#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
+#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
#define QUEUE_FLAG_FAST 20 /* fast block device (e.g. ram based) */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \