Merge branch 'for-3.16/blk-mq-tagging' into for-3.16/core
Signed-off-by: Jens Axboe <axboe@fb.com>
Conflicts:
block/blk-mq-tag.c
diff --git a/block/Makefile b/block/Makefile
index 20645e8..b4c4d3b 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -2,12 +2,13 @@
# Makefile for the kernel block layer
#
-obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
+obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
- genhd.o scsi_ioctl.o partition-generic.o partitions/
+ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
+ partitions/
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
@@ -20,3 +21,4 @@
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
diff --git a/fs/bio-integrity.c b/block/bio-integrity.c
similarity index 100%
rename from fs/bio-integrity.c
rename to block/bio-integrity.c
diff --git a/fs/bio.c b/block/bio.c
similarity index 100%
rename from fs/bio.c
rename to block/bio.c
diff --git a/block/blk-core.c b/block/blk-core.c
index c426970..a6bd3e7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1233,12 +1233,15 @@
static void part_round_stats_single(int cpu, struct hd_struct *part,
unsigned long now)
{
+ int inflight;
+
if (now == part->stamp)
return;
- if (part_in_flight(part)) {
+ inflight = part_in_flight(part);
+ if (inflight) {
__part_stat_add(cpu, part, time_in_queue,
- part_in_flight(part) * (now - part->stamp));
+ inflight * (now - part->stamp));
__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
}
part->stamp = now;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c80086c..e6b3fba 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -20,7 +20,7 @@
int i;
for (i = 0; i < bt->map_nr; i++) {
- struct blk_mq_bitmap *bm = &bt->map[i];
+ struct blk_align_bitmap *bm = &bt->map[i];
int ret;
ret = find_first_zero_bit(&bm->word, bm->depth);
@@ -117,7 +117,7 @@
return atomic_read(&hctx->nr_active) < depth;
}
-static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag)
+static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
{
int tag, org_last_tag, end;
@@ -360,7 +360,7 @@
int i;
for (i = 0; i < bt->map_nr; i++) {
- struct blk_mq_bitmap *bm = &bt->map[i];
+ struct blk_align_bitmap *bm = &bt->map[i];
int bit = 0;
do {
@@ -400,7 +400,7 @@
unsigned int i, used;
for (i = 0, used = 0; i < bt->map_nr; i++) {
- struct blk_mq_bitmap *bm = &bt->map[i];
+ struct blk_align_bitmap *bm = &bt->map[i];
used += bitmap_weight(&bm->word, bm->depth);
}
@@ -438,7 +438,7 @@
}
nr = ALIGN(depth, tags_per_word) / tags_per_word;
- bt->map = kzalloc_node(nr * sizeof(struct blk_mq_bitmap),
+ bt->map = kzalloc_node(nr * sizeof(struct blk_align_bitmap),
GFP_KERNEL, node);
if (!bt->map)
return -ENOMEM;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 0f5ec8b..e144f68 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -1,6 +1,8 @@
#ifndef INT_BLK_MQ_TAG_H
#define INT_BLK_MQ_TAG_H
+#include "blk-mq.h"
+
enum {
BT_WAIT_QUEUES = 8,
BT_WAIT_BATCH = 8,
@@ -14,18 +16,13 @@
#define TAG_TO_INDEX(bt, tag) ((tag) >> (bt)->bits_per_word)
#define TAG_TO_BIT(bt, tag) ((tag) & ((1 << (bt)->bits_per_word) - 1))
-struct blk_mq_bitmap {
- unsigned long word;
- unsigned long depth;
-} ____cacheline_aligned_in_smp;
-
struct blk_mq_bitmap_tags {
unsigned int depth;
unsigned int wake_cnt;
unsigned int bits_per_word;
unsigned int map_nr;
- struct blk_mq_bitmap *map;
+ struct blk_align_bitmap *map;
unsigned int wake_index;
struct bt_wait_state *bs;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3c4f1fc..0fbef7e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -56,21 +56,40 @@
{
unsigned int i;
- for (i = 0; i < hctx->nr_ctx_map; i++)
- if (hctx->ctx_map[i])
+ for (i = 0; i < hctx->ctx_map.map_size; i++)
+ if (hctx->ctx_map.map[i].word)
return true;
return false;
}
+static inline struct blk_align_bitmap *get_bm(struct blk_mq_hw_ctx *hctx,
+ struct blk_mq_ctx *ctx)
+{
+ return &hctx->ctx_map.map[ctx->index_hw / hctx->ctx_map.bits_per_word];
+}
+
+#define CTX_TO_BIT(hctx, ctx) \
+ ((ctx)->index_hw & ((hctx)->ctx_map.bits_per_word - 1))
+
/*
* Mark this ctx as having pending work in this hardware queue
*/
static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
- if (!test_bit(ctx->index_hw, hctx->ctx_map))
- set_bit(ctx->index_hw, hctx->ctx_map);
+ struct blk_align_bitmap *bm = get_bm(hctx, ctx);
+
+ if (!test_bit(CTX_TO_BIT(hctx, ctx), &bm->word))
+ set_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
+}
+
+static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
+ struct blk_mq_ctx *ctx)
+{
+ struct blk_align_bitmap *bm = get_bm(hctx, ctx);
+
+ clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
}
static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
@@ -630,6 +649,40 @@
}
/*
+ * Process software queues that have been marked busy, splicing them
+ * to the for-dispatch
+ */
+static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
+{
+ struct blk_mq_ctx *ctx;
+ int i;
+
+ for (i = 0; i < hctx->ctx_map.map_size; i++) {
+ struct blk_align_bitmap *bm = &hctx->ctx_map.map[i];
+ unsigned int off, bit;
+
+ if (!bm->word)
+ continue;
+
+ bit = 0;
+ off = i * hctx->ctx_map.bits_per_word;
+ do {
+ bit = find_next_bit(&bm->word, bm->depth, bit);
+ if (bit >= bm->depth)
+ break;
+
+ ctx = hctx->ctxs[bit + off];
+ clear_bit(bit, &bm->word);
+ spin_lock(&ctx->lock);
+ list_splice_tail_init(&ctx->rq_list, list);
+ spin_unlock(&ctx->lock);
+
+ bit++;
+ } while (1);
+ }
+}
+
+/*
* Run this hardware queue, pulling any software queues mapped to it in.
* Note that this function currently has various problems around ordering
* of IO. In particular, we'd like FIFO behaviour on handling existing
@@ -638,10 +691,9 @@
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
- struct blk_mq_ctx *ctx;
struct request *rq;
LIST_HEAD(rq_list);
- int bit, queued;
+ int queued;
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
@@ -653,14 +705,7 @@
/*
* Touch any software queue that has pending entries.
*/
- for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) {
- clear_bit(bit, hctx->ctx_map);
- ctx = hctx->ctxs[bit];
-
- spin_lock(&ctx->lock);
- list_splice_tail_init(&ctx->rq_list, &rq_list);
- spin_unlock(&ctx->lock);
- }
+ flush_busy_ctxs(hctx, &rq_list);
/*
* If we have previous entries on our dispatch list, grab them
@@ -674,13 +719,9 @@
}
/*
- * Delete and return all entries from our dispatch list
- */
- queued = 0;
-
- /*
* Now process all the entries, sending them to the driver.
*/
+ queued = 0;
while (!list_empty(&rq_list)) {
int ret;
@@ -1103,17 +1144,15 @@
}
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
- init_request_from_bio(rq, bio);
-
+ blk_mq_bio_to_request(rq, bio);
spin_lock(&ctx->lock);
insert_rq:
__blk_mq_insert_request(hctx, rq, false);
spin_unlock(&ctx->lock);
- blk_account_io_start(rq, 1);
} else {
spin_lock(&ctx->lock);
if (!blk_mq_attempt_merge(q, ctx, bio)) {
- init_request_from_bio(rq, bio);
+ blk_mq_bio_to_request(rq, bio);
goto insert_rq;
}
@@ -1175,7 +1214,7 @@
spin_lock(&ctx->lock);
if (!list_empty(&ctx->rq_list)) {
list_splice_init(&ctx->rq_list, &tmp);
- clear_bit(ctx->index_hw, hctx->ctx_map);
+ blk_mq_hctx_clear_pending(hctx, ctx);
}
spin_unlock(&ctx->lock);
@@ -1315,6 +1354,34 @@
return NULL;
}
+static void blk_mq_free_bitmap(struct blk_mq_ctxmap *bitmap)
+{
+ kfree(bitmap->map);
+}
+
+static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
+{
+ unsigned int bpw = 8, total, num_maps, i;
+
+ bitmap->bits_per_word = bpw;
+
+ num_maps = ALIGN(nr_cpu_ids, bpw) / bpw;
+ bitmap->map = kzalloc_node(num_maps * sizeof(struct blk_align_bitmap),
+ GFP_KERNEL, node);
+ if (!bitmap->map)
+ return -ENOMEM;
+
+ bitmap->map_size = num_maps;
+
+ total = nr_cpu_ids;
+ for (i = 0; i < num_maps; i++) {
+ bitmap->map[i].depth = min(total, bitmap->bits_per_word);
+ total -= bitmap->map[i].depth;
+ }
+
+ return 0;
+}
+
static int blk_mq_init_hw_queues(struct request_queue *q,
struct blk_mq_tag_set *set)
{
@@ -1325,7 +1392,6 @@
* Initialize hardware queues
*/
queue_for_each_hw_ctx(q, hctx, i) {
- unsigned int num_maps;
int node;
node = hctx->numa_node;
@@ -1356,13 +1422,9 @@
if (!hctx->ctxs)
break;
- num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG;
- hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long),
- GFP_KERNEL, node);
- if (!hctx->ctx_map)
+ if (blk_mq_alloc_bitmap(&hctx->ctx_map, node))
break;
- hctx->nr_ctx_map = num_maps;
hctx->nr_ctx = 0;
if (set->ops->init_hctx &&
@@ -1385,7 +1447,7 @@
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
kfree(hctx->ctxs);
- kfree(hctx->ctx_map);
+ blk_mq_free_bitmap(&hctx->ctx_map);
}
return 1;
@@ -1614,7 +1676,6 @@
blk_mq_del_queue_tag_set(q);
queue_for_each_hw_ctx(q, hctx, i) {
- kfree(hctx->ctx_map);
kfree(hctx->ctxs);
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
if (q->mq_ops->exit_hctx)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 97cfab9..5e5a378 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -52,4 +52,13 @@
extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
+/*
+ * Basic implementation of sparser bitmap, allowing the user to spread
+ * the bits over more cachelines.
+ */
+struct blk_align_bitmap {
+ unsigned long word;
+ unsigned long depth;
+} ____cacheline_aligned_in_smp;
+
#endif
diff --git a/fs/ioprio.c b/block/ioprio.c
similarity index 100%
rename from fs/ioprio.c
rename to block/ioprio.c
diff --git a/fs/Makefile b/fs/Makefile
index f9cb987..4030cbf 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -14,14 +14,13 @@
stack.o fs_struct.o statfs.o
ifeq ($(CONFIG_BLOCK),y)
-obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+obj-y += buffer.o block_dev.o direct-io.o mpage.o
else
obj-y += no-block.o
endif
obj-$(CONFIG_PROC_FS) += proc_namespace.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
obj-$(CONFIG_ANON_INODES) += anon_inodes.o
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 379f88d..a06ca7b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -11,6 +11,12 @@
void (*notify)(void *data, unsigned long action, unsigned int cpu);
};
+struct blk_mq_ctxmap {
+ unsigned int map_size;
+ unsigned int bits_per_word;
+ struct blk_align_bitmap *map;
+};
+
struct blk_mq_hw_ctx {
struct {
spinlock_t lock;
@@ -31,8 +37,8 @@
void *driver_data;
- unsigned int nr_ctx_map;
- unsigned long *ctx_map;
+ struct blk_mq_ctxmap ctx_map;
+
unsigned int nr_ctx;
struct blk_mq_ctx **ctxs;