Merge branch 'for-4.14/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"This is the first pull request for 4.14, containing most of the code
changes. It's a quiet series this round, which I think we needed after
the churn of the last few series. This contains:
- Fix for a registration race in loop, from Anton Volkov.
- Overflow complaint fix from Arnd for DAC960.
- Series of drbd changes from the usual suspects.
- Conversion of the stec/skd driver to blk-mq. From Bart.
- A few BFQ improvements/fixes from Paolo.
- CFQ improvement from Ritesh, allowing idling for group idle.
- A few fixes found by Dan's smatch, courtesy of Dan.
- A warning fixup for a race between changing the IO scheduler and
device remova. From David Jeffery.
- A few nbd fixes from Josef.
- Support for cgroup info in blktrace, from Shaohua.
- Also from Shaohua, new features in the null_blk driver to allow it
to actually hold data, among other things.
- Various corner cases and error handling fixes from Weiping Zhang.
- Improvements to the IO stats tracking for blk-mq from me. Can
drastically improve performance for fast devices and/or big
machines.
- Series from Christoph removing bi_bdev as being needed for IO
submission, in preparation for nvme multipathing code.
- Series from Bart, including various cleanups and fixes for switch
fall through case complaints"
* 'for-4.14/block' of git://git.kernel.dk/linux-block: (162 commits)
kernfs: checking for IS_ERR() instead of NULL
drbd: remove BIOSET_NEED_RESCUER flag from drbd_{md_,}io_bio_set
drbd: Fix allyesconfig build, fix recent commit
drbd: switch from kmalloc() to kmalloc_array()
drbd: abort drbd_start_resync if there is no connection
drbd: move global variables to drbd namespace and make some static
drbd: rename "usermode_helper" to "drbd_usermode_helper"
drbd: fix race between handshake and admin disconnect/down
drbd: fix potential deadlock when trying to detach during handshake
drbd: A single dot should be put into a sequence.
drbd: fix rmmod cleanup, remove _all_ debugfs entries
drbd: Use setup_timer() instead of init_timer() to simplify the code.
drbd: fix potential get_ldev/put_ldev refcount imbalance during attach
drbd: new disk-option disable-write-same
drbd: Fix resource role for newly created resources in events2
drbd: mark symbols static where possible
drbd: Send P_NEG_ACK upon write error in protocol != C
drbd: add explicit plugging when submitting batches
drbd: change list_for_each_safe to while(list_first_entry_or_null)
drbd: introduce drbd_recv_header_maybe_unplug
...
diff --git a/MAINTAINERS b/MAINTAINERS
index 29c0c2b..c620db0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12561,6 +12561,12 @@
S: Odd Fixes
F: drivers/net/ethernet/adaptec/starfire*
+STEC S1220 SKD DRIVER
+M: Bart Van Assche <bart.vanassche@wdc.com>
+L: linux-block@vger.kernel.org
+S: Maintained
+F: drivers/block/skd*[ch]
+
STI CEC DRIVER
M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
S: Maintained
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 58507c3..c60e84e 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -110,7 +110,7 @@ axon_ram_irq_handler(int irq, void *dev)
static blk_qc_t
axon_ram_make_request(struct request_queue *queue, struct bio *bio)
{
- struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
+ struct axon_ram_bank *bank = bio->bi_disk->private_data;
unsigned long phys_mem, phys_end;
void *user_mem;
struct bio_vec vec;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 436b6ca..6a7a26b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -128,7 +128,7 @@ BFQ_BFQQ_FNS(busy);
BFQ_BFQQ_FNS(wait_request);
BFQ_BFQQ_FNS(non_blocking_wait_rq);
BFQ_BFQQ_FNS(fifo_expire);
-BFQ_BFQQ_FNS(idle_window);
+BFQ_BFQQ_FNS(has_short_ttime);
BFQ_BFQQ_FNS(sync);
BFQ_BFQQ_FNS(IO_bound);
BFQ_BFQQ_FNS(in_large_burst);
@@ -731,10 +731,10 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
unsigned int old_wr_coeff = bfqq->wr_coeff;
bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
- if (bic->saved_idle_window)
- bfq_mark_bfqq_idle_window(bfqq);
+ if (bic->saved_has_short_ttime)
+ bfq_mark_bfqq_has_short_ttime(bfqq);
else
- bfq_clear_bfqq_idle_window(bfqq);
+ bfq_clear_bfqq_has_short_ttime(bfqq);
if (bic->saved_IO_bound)
bfq_mark_bfqq_IO_bound(bfqq);
@@ -2012,7 +2012,7 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
return;
bic->saved_ttime = bfqq->ttime;
- bic->saved_idle_window = bfq_bfqq_idle_window(bfqq);
+ bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq);
bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq);
bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq);
bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node);
@@ -3038,8 +3038,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
}
bfq_log_bfqq(bfqd, bfqq,
- "expire (%d, slow %d, num_disp %d, idle_win %d)", reason,
- slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq));
+ "expire (%d, slow %d, num_disp %d, short_ttime %d)", reason,
+ slow, bfqq->dispatched, bfq_bfqq_has_short_ttime(bfqq));
/*
* Increase, decrease or leave budget unchanged according to
@@ -3114,7 +3114,10 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
- bool idling_boosts_thr, idling_boosts_thr_without_issues,
+ bool rot_without_queueing =
+ !blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag,
+ bfqq_sequential_and_IO_bound,
+ idling_boosts_thr, idling_boosts_thr_without_issues,
idling_needed_for_service_guarantees,
asymmetric_scenario;
@@ -3122,27 +3125,45 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
return true;
/*
+ * Idling is performed only if slice_idle > 0. In addition, we
+ * do not idle if
+ * (a) bfqq is async
+ * (b) bfqq is in the idle io prio class: in this case we do
+ * not idle because we want to minimize the bandwidth that
+ * queues in this class can steal to higher-priority queues
+ */
+ if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
+ bfq_class_idle(bfqq))
+ return false;
+
+ bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) &&
+ bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq);
+
+ /*
* The next variable takes into account the cases where idling
* boosts the throughput.
*
* The value of the variable is computed considering, first, that
* idling is virtually always beneficial for the throughput if:
- * (a) the device is not NCQ-capable, or
- * (b) regardless of the presence of NCQ, the device is rotational
- * and the request pattern for bfqq is I/O-bound and sequential.
+ * (a) the device is not NCQ-capable and rotational, or
+ * (b) regardless of the presence of NCQ, the device is rotational and
+ * the request pattern for bfqq is I/O-bound and sequential, or
+ * (c) regardless of whether it is rotational, the device is
+ * not NCQ-capable and the request pattern for bfqq is
+ * I/O-bound and sequential.
*
* Secondly, and in contrast to the above item (b), idling an
* NCQ-capable flash-based device would not boost the
* throughput even with sequential I/O; rather it would lower
* the throughput in proportion to how fast the device
* is. Accordingly, the next variable is true if any of the
- * above conditions (a) and (b) is true, and, in particular,
- * happens to be false if bfqd is an NCQ-capable flash-based
- * device.
+ * above conditions (a), (b) or (c) is true, and, in
+ * particular, happens to be false if bfqd is an NCQ-capable
+ * flash-based device.
*/
- idling_boosts_thr = !bfqd->hw_tag ||
- (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
- bfq_bfqq_idle_window(bfqq));
+ idling_boosts_thr = rot_without_queueing ||
+ ((!blk_queue_nonrot(bfqd->queue) || !bfqd->hw_tag) &&
+ bfqq_sequential_and_IO_bound);
/*
* The value of the next variable,
@@ -3313,16 +3334,13 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
/*
- * We have now all the components we need to compute the return
- * value of the function, which is true only if both the following
- * conditions hold:
- * 1) bfqq is sync, because idling make sense only for sync queues;
- * 2) idling either boosts the throughput (without issues), or
- * is necessary to preserve service guarantees.
+ * We have now all the components we need to compute the
+ * return value of the function, which is true only if idling
+ * either boosts the throughput (without issues), or is
+ * necessary to preserve service guarantees.
*/
- return bfq_bfqq_sync(bfqq) &&
- (idling_boosts_thr_without_issues ||
- idling_needed_for_service_guarantees);
+ return idling_boosts_thr_without_issues ||
+ idling_needed_for_service_guarantees;
}
/*
@@ -3338,10 +3356,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
*/
static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
{
- struct bfq_data *bfqd = bfqq->bfqd;
-
- return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
- bfq_bfqq_may_idle(bfqq);
+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_may_idle(bfqq);
}
/*
@@ -3783,7 +3798,6 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
case IOPRIO_CLASS_IDLE:
bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
bfqq->new_ioprio = 7;
- bfq_clear_bfqq_idle_window(bfqq);
break;
}
@@ -3843,8 +3857,14 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_set_next_ioprio_data(bfqq, bic);
if (is_sync) {
+ /*
+ * No need to mark as has_short_ttime if in
+ * idle_class, because no device idling is performed
+ * for queues in idle class
+ */
if (!bfq_class_idle(bfqq))
- bfq_mark_bfqq_idle_window(bfqq);
+ /* tentatively mark as has_short_ttime */
+ bfq_mark_bfqq_has_short_ttime(bfqq);
bfq_mark_bfqq_sync(bfqq);
bfq_mark_bfqq_just_created(bfqq);
} else
@@ -3985,18 +4005,19 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
}
-/*
- * Disable idle window if the process thinks too long or seeks so much that
- * it doesn't matter.
- */
-static void bfq_update_idle_window(struct bfq_data *bfqd,
- struct bfq_queue *bfqq,
- struct bfq_io_cq *bic)
+static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq,
+ struct bfq_io_cq *bic)
{
- int enable_idle;
+ bool has_short_ttime = true;
- /* Don't idle for async or idle io prio class. */
- if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq))
+ /*
+ * No need to update has_short_ttime if bfqq is async or in
+ * idle io prio class, or if bfq_slice_idle is zero, because
+ * no device idling is performed for bfqq in this case.
+ */
+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq) ||
+ bfqd->bfq_slice_idle == 0)
return;
/* Idle window just restored, statistics are meaningless. */
@@ -4004,27 +4025,22 @@ static void bfq_update_idle_window(struct bfq_data *bfqd,
bfqd->bfq_wr_min_idle_time))
return;
- enable_idle = bfq_bfqq_idle_window(bfqq);
-
+ /* Think time is infinite if no process is linked to
+ * bfqq. Otherwise check average think time to
+ * decide whether to mark as has_short_ttime
+ */
if (atomic_read(&bic->icq.ioc->active_ref) == 0 ||
- bfqd->bfq_slice_idle == 0 ||
- (bfqd->hw_tag && BFQQ_SEEKY(bfqq) &&
- bfqq->wr_coeff == 1))
- enable_idle = 0;
- else if (bfq_sample_valid(bfqq->ttime.ttime_samples)) {
- if (bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle &&
- bfqq->wr_coeff == 1)
- enable_idle = 0;
- else
- enable_idle = 1;
- }
- bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d",
- enable_idle);
+ (bfq_sample_valid(bfqq->ttime.ttime_samples) &&
+ bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle))
+ has_short_ttime = false;
- if (enable_idle)
- bfq_mark_bfqq_idle_window(bfqq);
+ bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d",
+ has_short_ttime);
+
+ if (has_short_ttime)
+ bfq_mark_bfqq_has_short_ttime(bfqq);
else
- bfq_clear_bfqq_idle_window(bfqq);
+ bfq_clear_bfqq_has_short_ttime(bfqq);
}
/*
@@ -4040,14 +4056,12 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqq->meta_pending++;
bfq_update_io_thinktime(bfqd, bfqq);
+ bfq_update_has_short_ttime(bfqd, bfqq, bic);
bfq_update_io_seektime(bfqd, bfqq, rq);
- if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 ||
- !BFQQ_SEEKY(bfqq))
- bfq_update_idle_window(bfqd, bfqq, bic);
bfq_log_bfqq(bfqd, bfqq,
- "rq_enqueued: idle_window=%d (seeky %d)",
- bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq));
+ "rq_enqueued: has_short_ttime=%d (seeky %d)",
+ bfq_bfqq_has_short_ttime(bfqq), BFQQ_SEEKY(bfqq));
bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
@@ -4787,16 +4801,13 @@ static ssize_t bfq_var_show(unsigned int var, char *page)
return sprintf(page, "%u\n", var);
}
-static ssize_t bfq_var_store(unsigned long *var, const char *page,
- size_t count)
+static void bfq_var_store(unsigned long *var, const char *page)
{
unsigned long new_val;
int ret = kstrtoul(page, 10, &new_val);
if (ret == 0)
*var = new_val;
-
- return count;
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
@@ -4838,7 +4849,7 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
struct bfq_data *bfqd = e->elevator_data; \
unsigned long uninitialized_var(__data); \
- int ret = bfq_var_store(&__data, (page), count); \
+ bfq_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
@@ -4849,7 +4860,7 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \
*(__PTR) = (u64)__data * NSEC_PER_MSEC; \
else \
*(__PTR) = __data; \
- return ret; \
+ return count; \
}
STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
INT_MAX, 2);
@@ -4866,13 +4877,13 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
{ \
struct bfq_data *bfqd = e->elevator_data; \
unsigned long uninitialized_var(__data); \
- int ret = bfq_var_store(&__data, (page), count); \
+ bfq_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
__data = (MAX); \
*(__PTR) = (u64)__data * NSEC_PER_USEC; \
- return ret; \
+ return count; \
}
USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0,
UINT_MAX);
@@ -4883,7 +4894,8 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
- int ret = bfq_var_store(&__data, (page), count);
+
+ bfq_var_store(&__data, (page));
if (__data == 0)
bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
@@ -4895,7 +4907,7 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e,
bfqd->bfq_user_max_budget = __data;
- return ret;
+ return count;
}
/*
@@ -4907,7 +4919,8 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
- int ret = bfq_var_store(&__data, (page), count);
+
+ bfq_var_store(&__data, (page));
if (__data < 1)
__data = 1;
@@ -4918,7 +4931,7 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e,
if (bfqd->bfq_user_max_budget == 0)
bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd);
- return ret;
+ return count;
}
static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
@@ -4926,7 +4939,8 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
- int ret = bfq_var_store(&__data, (page), count);
+
+ bfq_var_store(&__data, (page));
if (__data > 1)
__data = 1;
@@ -4936,7 +4950,7 @@ static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e,
bfqd->strict_guarantees = __data;
- return ret;
+ return count;
}
static ssize_t bfq_low_latency_store(struct elevator_queue *e,
@@ -4944,7 +4958,8 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
{
struct bfq_data *bfqd = e->elevator_data;
unsigned long uninitialized_var(__data);
- int ret = bfq_var_store(&__data, (page), count);
+
+ bfq_var_store(&__data, (page));
if (__data > 1)
__data = 1;
@@ -4952,7 +4967,7 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
bfq_end_wr(bfqd);
bfqd->low_latency = __data;
- return ret;
+ return count;
}
#define BFQ_ATTR(name) \
@@ -4998,6 +5013,7 @@ static struct elevator_type iosched_bfq_mq = {
.elevator_name = "bfq",
.elevator_owner = THIS_MODULE,
};
+MODULE_ALIAS("bfq-iosched");
static int __init bfq_init(void)
{
@@ -5048,10 +5064,12 @@ static int __init bfq_init(void)
ret = elv_register(&iosched_bfq_mq);
if (ret)
- goto err_pol_unreg;
+ goto slab_kill;
return 0;
+slab_kill:
+ bfq_slab_kill();
err_pol_unreg:
#ifdef CONFIG_BFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_bfq);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 859f0a8..cc4ea85 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -360,11 +360,11 @@ struct bfq_io_cq {
uint64_t blkcg_serial_nr; /* the current blkcg serial */
#endif
/*
- * Snapshot of the idle window before merging; taken to
- * remember this value while the queue is merged, so as to be
- * able to restore it in case of split.
+ * Snapshot of the has_short_time flag before merging; taken
+ * to remember its value while the queue is merged, so as to
+ * be able to restore it in case of split.
*/
- bool saved_idle_window;
+ bool saved_has_short_ttime;
/*
* Same purpose as the previous two fields for the I/O bound
* classification of a queue.
@@ -638,7 +638,7 @@ enum bfqq_state_flags {
* without idling the device
*/
BFQQF_fifo_expire, /* FIFO checked in this slice */
- BFQQF_idle_window, /* slice idling enabled */
+ BFQQF_has_short_ttime, /* queue has a short think time */
BFQQF_sync, /* synchronous queue */
BFQQF_IO_bound, /*
* bfqq has timed-out at least once
@@ -667,7 +667,7 @@ BFQ_BFQQ_FNS(busy);
BFQ_BFQQ_FNS(wait_request);
BFQ_BFQQ_FNS(non_blocking_wait_rq);
BFQ_BFQQ_FNS(fifo_expire);
-BFQ_BFQQ_FNS(idle_window);
+BFQ_BFQQ_FNS(has_short_ttime);
BFQ_BFQQ_FNS(sync);
BFQ_BFQQ_FNS(IO_bound);
BFQ_BFQQ_FNS(in_large_burst);
@@ -929,13 +929,16 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
- blk_add_trace_msg((bfqd)->queue, "bfq%d%c %s " fmt, (bfqq)->pid,\
- bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
- bfqq_group(bfqq)->blkg_path, ##args); \
+ blk_add_cgroup_trace_msg((bfqd)->queue, \
+ bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
+ "bfq%d%c " fmt, (bfqq)->pid, \
+ bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \
} while (0)
-#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) \
- blk_add_trace_msg((bfqd)->queue, "%s " fmt, (bfqg)->blkg_path, ##args)
+#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
+ blk_add_cgroup_trace_msg((bfqd)->queue, \
+ bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
+} while (0)
#else /* CONFIG_BFQ_GROUP_IOSCHED */
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 9b1ea47..5df3290 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -146,7 +146,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
iv = bip->bip_vec + bip->bip_vcnt;
if (bip->bip_vcnt &&
- bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev),
+ bvec_gap_to_prev(bio->bi_disk->queue,
&bip->bip_vec[bip->bip_vcnt - 1], offset))
return 0;
@@ -190,7 +190,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
static blk_status_t bio_integrity_process(struct bio *bio,
struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
{
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct blk_integrity_iter iter;
struct bvec_iter bviter;
struct bio_vec bv;
@@ -199,7 +199,7 @@ static blk_status_t bio_integrity_process(struct bio *bio,
void *prot_buf = page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset;
- iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ iter.disk_name = bio->bi_disk->disk_name;
iter.interval = 1 << bi->interval_exp;
iter.seed = proc_iter->bi_sector;
iter.prot_buf = prot_buf;
@@ -236,8 +236,8 @@ static blk_status_t bio_integrity_process(struct bio *bio,
bool bio_integrity_prep(struct bio *bio)
{
struct bio_integrity_payload *bip;
- struct blk_integrity *bi;
- struct request_queue *q;
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
+ struct request_queue *q = bio->bi_disk->queue;
void *buf;
unsigned long start, end;
unsigned int len, nr_pages;
@@ -245,8 +245,9 @@ bool bio_integrity_prep(struct bio *bio)
unsigned int intervals;
blk_status_t status;
- bi = bdev_get_integrity(bio->bi_bdev);
- q = bdev_get_queue(bio->bi_bdev);
+ if (!bi)
+ return true;
+
if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
return true;
@@ -257,9 +258,6 @@ bool bio_integrity_prep(struct bio *bio)
if (bio_integrity(bio))
return true;
- if (bi == NULL)
- return true;
-
if (bio_data_dir(bio) == READ) {
if (!bi->profile->verify_fn ||
!(bi->flags & BLK_INTEGRITY_VERIFY))
@@ -354,7 +352,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
struct bio_integrity_payload *bip =
container_of(work, struct bio_integrity_payload, bip_work);
struct bio *bio = bip->bip_bio;
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct bvec_iter iter = bio->bi_iter;
/*
@@ -387,7 +385,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
*/
bool __bio_integrity_endio(struct bio *bio)
{
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
@@ -413,7 +411,7 @@ bool __bio_integrity_endio(struct bio *bio)
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
bip->bip_iter.bi_sector += bytes_done >> 9;
@@ -430,7 +428,7 @@ EXPORT_SYMBOL(bio_integrity_advance);
void bio_integrity_trim(struct bio *bio)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
}
diff --git a/block/bio.c b/block/bio.c
index 9a63597..6745759 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -593,10 +593,10 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
/*
- * most users will be overriding ->bi_bdev with a new target,
+ * most users will be overriding ->bi_disk with a new target,
* so we don't set nor calculate new physical/hw segment counts here
*/
- bio->bi_bdev = bio_src->bi_bdev;
+ bio->bi_disk = bio_src->bi_disk;
bio_set_flag(bio, BIO_CLONED);
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
@@ -681,7 +681,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
if (!bio)
return NULL;
- bio->bi_bdev = bio_src->bi_bdev;
+ bio->bi_disk = bio_src->bi_disk;
bio->bi_opf = bio_src->bi_opf;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
@@ -936,6 +936,10 @@ static void submit_bio_wait_endio(struct bio *bio)
*
* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
* bio_endio() on failure.
+ *
+ * WARNING: Unlike to how submit_bio() is usually used, this function does not
+ * result in bio reference to be consumed. The caller must drop the reference
+ * on his own.
*/
int submit_bio_wait(struct bio *bio)
{
@@ -1732,29 +1736,29 @@ void bio_check_pages_dirty(struct bio *bio)
}
}
-void generic_start_io_acct(int rw, unsigned long sectors,
- struct hd_struct *part)
+void generic_start_io_acct(struct request_queue *q, int rw,
+ unsigned long sectors, struct hd_struct *part)
{
int cpu = part_stat_lock();
- part_round_stats(cpu, part);
+ part_round_stats(q, cpu, part);
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, sectors[rw], sectors);
- part_inc_in_flight(part, rw);
+ part_inc_in_flight(q, part, rw);
part_stat_unlock();
}
EXPORT_SYMBOL(generic_start_io_acct);
-void generic_end_io_acct(int rw, struct hd_struct *part,
- unsigned long start_time)
+void generic_end_io_acct(struct request_queue *q, int rw,
+ struct hd_struct *part, unsigned long start_time)
{
unsigned long duration = jiffies - start_time;
int cpu = part_stat_lock();
part_stat_add(cpu, part, ticks[rw], duration);
- part_round_stats(cpu, part);
- part_dec_in_flight(part, rw);
+ part_round_stats(q, cpu, part);
+ part_dec_in_flight(q, part, rw);
part_stat_unlock();
}
@@ -1826,8 +1830,8 @@ void bio_endio(struct bio *bio)
goto again;
}
- if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
- trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
+ if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
+ trace_block_bio_complete(bio->bi_disk->queue, bio,
blk_status_to_errno(bio->bi_status));
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
}
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0480892..d3f56ba 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1067,7 +1067,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
if (!blkcg) {
ret = ERR_PTR(-ENOMEM);
- goto free_blkcg;
+ goto unlock;
}
}
@@ -1111,8 +1111,10 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
for (i--; i >= 0; i--)
if (blkcg->cpd[i])
blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
-free_blkcg:
- kfree(blkcg);
+
+ if (blkcg != &blkcg_root)
+ kfree(blkcg);
+unlock:
mutex_unlock(&blkcg_pol_mutex);
return ret;
}
diff --git a/block/blk-core.c b/block/blk-core.c
index dbecbf4..d709c0e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -280,7 +280,7 @@ EXPORT_SYMBOL(blk_start_queue_async);
void blk_start_queue(struct request_queue *q)
{
lockdep_assert_held(q->queue_lock);
- WARN_ON(!irqs_disabled());
+ WARN_ON(!in_interrupt() && !irqs_disabled());
WARN_ON_ONCE(q->mq_ops);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
@@ -1469,15 +1469,10 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
__elv_add_request(q, rq, where);
}
-static void part_round_stats_single(int cpu, struct hd_struct *part,
- unsigned long now)
+static void part_round_stats_single(struct request_queue *q, int cpu,
+ struct hd_struct *part, unsigned long now,
+ unsigned int inflight)
{
- int inflight;
-
- if (now == part->stamp)
- return;
-
- inflight = part_in_flight(part);
if (inflight) {
__part_stat_add(cpu, part, time_in_queue,
inflight * (now - part->stamp));
@@ -1488,6 +1483,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
/**
* part_round_stats() - Round off the performance stats on a struct disk_stats.
+ * @q: target block queue
* @cpu: cpu number for stats access
* @part: target partition
*
@@ -1502,13 +1498,31 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
* /proc/diskstats. This accounts immediately for all queue usage up to
* the current jiffies and restarts the counters again.
*/
-void part_round_stats(int cpu, struct hd_struct *part)
+void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
{
+ struct hd_struct *part2 = NULL;
unsigned long now = jiffies;
+ unsigned int inflight[2];
+ int stats = 0;
- if (part->partno)
- part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
- part_round_stats_single(cpu, part, now);
+ if (part->stamp != now)
+ stats |= 1;
+
+ if (part->partno) {
+ part2 = &part_to_disk(part)->part0;
+ if (part2->stamp != now)
+ stats |= 2;
+ }
+
+ if (!stats)
+ return;
+
+ part_in_flight(q, part, inflight);
+
+ if (stats & 2)
+ part_round_stats_single(q, cpu, part2, now, inflight[1]);
+ if (stats & 1)
+ part_round_stats_single(q, cpu, part, now, inflight[0]);
}
EXPORT_SYMBOL_GPL(part_round_stats);
@@ -1896,40 +1910,15 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
-/*
- * If bio->bi_dev is a partition, remap the location
- */
-static inline void blk_partition_remap(struct bio *bio)
-{
- struct block_device *bdev = bio->bi_bdev;
-
- /*
- * Zone reset does not include bi_size so bio_sectors() is always 0.
- * Include a test for the reset op code and perform the remap if needed.
- */
- if (bdev != bdev->bd_contains &&
- (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)) {
- struct hd_struct *p = bdev->bd_part;
-
- bio->bi_iter.bi_sector += p->start_sect;
- bio->bi_bdev = bdev->bd_contains;
-
- trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
- bdev->bd_dev,
- bio->bi_iter.bi_sector - p->start_sect);
- }
-}
-
static void handle_bad_sector(struct bio *bio)
{
char b[BDEVNAME_SIZE];
printk(KERN_INFO "attempt to access beyond end of device\n");
printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
- bdevname(bio->bi_bdev, b),
- bio->bi_opf,
+ bio_devname(bio, b), bio->bi_opf,
(unsigned long long)bio_end_sector(bio),
- (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
+ (long long)get_capacity(bio->bi_disk));
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -1968,6 +1957,38 @@ static inline bool should_fail_request(struct hd_struct *part,
#endif /* CONFIG_FAIL_MAKE_REQUEST */
/*
+ * Remap block n of partition p to block n+start(p) of the disk.
+ */
+static inline int blk_partition_remap(struct bio *bio)
+{
+ struct hd_struct *p;
+ int ret = 0;
+
+ /*
+ * Zone reset does not include bi_size so bio_sectors() is always 0.
+ * Include a test for the reset op code and perform the remap if needed.
+ */
+ if (!bio->bi_partno ||
+ (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET))
+ return 0;
+
+ rcu_read_lock();
+ p = __disk_get_part(bio->bi_disk, bio->bi_partno);
+ if (likely(p && !should_fail_request(p, bio->bi_iter.bi_size))) {
+ bio->bi_iter.bi_sector += p->start_sect;
+ bio->bi_partno = 0;
+ trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
+ bio->bi_iter.bi_sector - p->start_sect);
+ } else {
+ printk("%s: fail for partition %d\n", __func__, bio->bi_partno);
+ ret = -EIO;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/*
* Check whether this bio extends beyond the end of the device.
*/
static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
@@ -1978,7 +1999,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
return 0;
/* Test device or partition size, when known. */
- maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
+ maxsector = get_capacity(bio->bi_disk);
if (maxsector) {
sector_t sector = bio->bi_iter.bi_sector;
@@ -2003,20 +2024,18 @@ generic_make_request_checks(struct bio *bio)
int nr_sectors = bio_sectors(bio);
blk_status_t status = BLK_STS_IOERR;
char b[BDEVNAME_SIZE];
- struct hd_struct *part;
might_sleep();
if (bio_check_eod(bio, nr_sectors))
goto end_io;
- q = bdev_get_queue(bio->bi_bdev);
+ q = bio->bi_disk->queue;
if (unlikely(!q)) {
printk(KERN_ERR
"generic_make_request: Trying to access "
"nonexistent block-device %s (%Lu)\n",
- bdevname(bio->bi_bdev, b),
- (long long) bio->bi_iter.bi_sector);
+ bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
goto end_io;
}
@@ -2028,17 +2047,11 @@ generic_make_request_checks(struct bio *bio)
if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
goto not_supported;
- part = bio->bi_bdev->bd_part;
- if (should_fail_request(part, bio->bi_iter.bi_size) ||
- should_fail_request(&part_to_disk(part)->part0,
- bio->bi_iter.bi_size))
+ if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
goto end_io;
- /*
- * If this device has partitions, remap block n
- * of partition p to block n+start(p) of the disk.
- */
- blk_partition_remap(bio);
+ if (blk_partition_remap(bio))
+ goto end_io;
if (bio_check_eod(bio, nr_sectors))
goto end_io;
@@ -2067,16 +2080,16 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_WRITE_SAME:
- if (!bdev_write_same(bio->bi_bdev))
+ if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
case REQ_OP_ZONE_REPORT:
case REQ_OP_ZONE_RESET:
- if (!bdev_is_zoned(bio->bi_bdev))
+ if (!blk_queue_is_zoned(q))
goto not_supported;
break;
case REQ_OP_WRITE_ZEROES:
- if (!bdev_write_zeroes_sectors(bio->bi_bdev))
+ if (!q->limits.max_write_zeroes_sectors)
goto not_supported;
break;
default:
@@ -2183,7 +2196,7 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_init(&bio_list_on_stack[0]);
current->bio_list = bio_list_on_stack;
do {
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ struct request_queue *q = bio->bi_disk->queue;
if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
struct bio_list lower, same;
@@ -2201,7 +2214,7 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_init(&lower);
bio_list_init(&same);
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
- if (q == bdev_get_queue(bio->bi_bdev))
+ if (q == bio->bi_disk->queue)
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
@@ -2244,7 +2257,7 @@ blk_qc_t submit_bio(struct bio *bio)
unsigned int count;
if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
- count = bdev_logical_block_size(bio->bi_bdev) >> 9;
+ count = queue_logical_block_size(bio->bi_disk->queue);
else
count = bio_sectors(bio);
@@ -2261,8 +2274,7 @@ blk_qc_t submit_bio(struct bio *bio)
current->comm, task_pid_nr(current),
op_is_write(bio_op(bio)) ? "WRITE" : "READ",
(unsigned long long)bio->bi_iter.bi_sector,
- bdevname(bio->bi_bdev, b),
- count);
+ bio_devname(bio, b), count);
}
}
@@ -2431,8 +2443,8 @@ void blk_account_io_done(struct request *req)
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, ticks[rw], duration);
- part_round_stats(cpu, part);
- part_dec_in_flight(part, rw);
+ part_round_stats(req->q, cpu, part);
+ part_dec_in_flight(req->q, part, rw);
hd_struct_put(part);
part_stat_unlock();
@@ -2489,8 +2501,8 @@ void blk_account_io_start(struct request *rq, bool new_io)
part = &rq->rq_disk->part0;
hd_struct_get(part);
}
- part_round_stats(cpu, part);
- part_inc_in_flight(part, rw);
+ part_round_stats(rq->q, cpu, part);
+ part_inc_in_flight(rq->q, part, rw);
rq->part = part;
}
@@ -2603,7 +2615,7 @@ struct request *blk_peek_request(struct request_queue *q)
}
EXPORT_SYMBOL(blk_peek_request);
-void blk_dequeue_request(struct request *rq)
+static void blk_dequeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -2630,9 +2642,6 @@ void blk_dequeue_request(struct request *rq)
* Description:
* Dequeue @req and start timeout timer on it. This hands off the
* request to the driver.
- *
- * Block internal functions which don't want to start timer should
- * call blk_dequeue_request().
*/
void blk_start_request(struct request *req)
{
@@ -3035,8 +3044,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
- if (bio->bi_bdev)
- rq->rq_disk = bio->bi_bdev->bd_disk;
+ if (bio->bi_disk)
+ rq->rq_disk = bio->bi_disk;
}
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
diff --git a/block/blk-flush.c b/block/blk-flush.c
index ed5fe32..4938bec 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -1,12 +1,12 @@
/*
- * Functions to sequence FLUSH and FUA writes.
+ * Functions to sequence PREFLUSH and FUA writes.
*
* Copyright (C) 2011 Max Planck Institute for Gravitational Physics
* Copyright (C) 2011 Tejun Heo <tj@kernel.org>
*
* This file is released under the GPLv2.
*
- * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three
+ * REQ_{PREFLUSH|FUA} requests are decomposed to sequences consisted of three
* optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
* properties and hardware capability.
*
@@ -16,9 +16,9 @@
* REQ_FUA means that the data must be on non-volatile media on request
* completion.
*
- * If the device doesn't have writeback cache, FLUSH and FUA don't make any
- * difference. The requests are either completed immediately if there's no
- * data or executed as normal requests otherwise.
+ * If the device doesn't have writeback cache, PREFLUSH and FUA don't make any
+ * difference. The requests are either completed immediately if there's no data
+ * or executed as normal requests otherwise.
*
* If the device has writeback cache and supports FUA, REQ_PREFLUSH is
* translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
@@ -31,7 +31,7 @@
* fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a
* REQ_OP_FLUSH is issued and the pending_idx is toggled. When the flush
* completes, all the requests which were pending are proceeded to the next
- * step. This allows arbitrary merging of different types of FLUSH/FUA
+ * step. This allows arbitrary merging of different types of PREFLUSH/FUA
* requests.
*
* Currently, the following conditions are used to determine when to issue
@@ -47,19 +47,19 @@
* C3. The second condition is ignored if there is a request which has
* waited longer than FLUSH_PENDING_TIMEOUT. This is to avoid
* starvation in the unlikely case where there are continuous stream of
- * FUA (without FLUSH) requests.
+ * FUA (without PREFLUSH) requests.
*
* For devices which support FUA, it isn't clear whether C2 (and thus C3)
* is beneficial.
*
- * Note that a sequenced FLUSH/FUA request with DATA is completed twice.
+ * Note that a sequenced PREFLUSH/FUA request with DATA is completed twice.
* Once while executing DATA and again after the whole sequence is
* complete. The first completion updates the contained bio but doesn't
* finish it so that the bio submitter is notified only after the whole
* sequence is complete. This is implemented by testing RQF_FLUSH_SEQ in
* req_bio_endio().
*
- * The above peculiarity requires that each FLUSH/FUA request has only one
+ * The above peculiarity requires that each PREFLUSH/FUA request has only one
* bio attached to it, which is guaranteed as they aren't allowed to be
* merged in the usual way.
*/
@@ -76,7 +76,7 @@
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
-/* FLUSH/FUA sequences */
+/* PREFLUSH/FUA sequences */
enum {
REQ_FSEQ_PREFLUSH = (1 << 0), /* pre-flushing in progress */
REQ_FSEQ_DATA = (1 << 1), /* data write in progress */
@@ -148,7 +148,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
/**
* blk_flush_complete_seq - complete flush sequence
- * @rq: FLUSH/FUA request being sequenced
+ * @rq: PREFLUSH/FUA request being sequenced
* @fq: flush queue
* @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
* @error: whether an error occurred
@@ -406,7 +406,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
}
/**
- * blk_insert_flush - insert a new FLUSH/FUA request
+ * blk_insert_flush - insert a new PREFLUSH/FUA request
* @rq: request to insert
*
* To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
@@ -525,7 +525,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
return -ENXIO;
bio = bio_alloc(gfp_mask, 0);
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
ret = submit_bio_wait(bio);
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 3fe0aec..e01adb5 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -77,7 +77,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, op, 0);
bio->bi_iter.bi_size = req_sects << 9;
@@ -168,7 +168,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
while (nr_sects) {
bio = next_bio(bio, 1, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
@@ -241,7 +241,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
while (nr_sects) {
bio = next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE_ZEROES;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
@@ -323,7 +323,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
gfp_mask);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
while (nr_sects != 0) {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99038830..aa524ca 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -633,8 +633,8 @@ static void blk_account_io_merge(struct request *req)
cpu = part_stat_lock();
part = req->part;
- part_round_stats(cpu, part);
- part_dec_in_flight(part, rq_data_dir(req));
+ part_round_stats(req->q, cpu, part);
+ part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
part_stat_unlock();
@@ -786,7 +786,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
return false;
/* must be same device and not a special request */
- if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq))
+ if (rq->rq_disk != bio->bi_disk || req_no_special_merge(rq))
return false;
/* only merge integrity protected bio into ditto rq */
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 4f927a5..980e730 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -48,8 +48,6 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(QUEUED),
QUEUE_FLAG_NAME(STOPPED),
- QUEUE_FLAG_NAME(SYNCFULL),
- QUEUE_FLAG_NAME(ASYNCFULL),
QUEUE_FLAG_NAME(DYING),
QUEUE_FLAG_NAME(BYPASS),
QUEUE_FLAG_NAME(BIDI),
@@ -744,7 +742,7 @@ static int blk_mq_debugfs_release(struct inode *inode, struct file *file)
return seq_release(inode, file);
}
-const struct file_operations blk_mq_debugfs_fops = {
+static const struct file_operations blk_mq_debugfs_fops = {
.open = blk_mq_debugfs_open,
.read = seq_read,
.write = blk_mq_debugfs_write,
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index d0be72c..6714507 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -214,7 +214,11 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
bitnr += tags->nr_reserved_tags;
rq = tags->rqs[bitnr];
- if (rq->q == hctx->queue)
+ /*
+ * We can hit rq == NULL here, because the tagging functions
+ * test and set the bit before assining ->rqs[].
+ */
+ if (rq && rq->q == hctx->queue)
iter_data->fn(hctx, rq, iter_data->data, reserved);
return true;
}
@@ -248,9 +252,15 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
if (!reserved)
bitnr += tags->nr_reserved_tags;
- rq = tags->rqs[bitnr];
- iter_data->fn(rq, iter_data->data, reserved);
+ /*
+ * We can hit rq == NULL here, because the tagging functions
+ * test and set the bit before assining ->rqs[].
+ */
+ rq = tags->rqs[bitnr];
+ if (rq)
+ iter_data->fn(rq, iter_data->data, reserved);
+
return true;
}
@@ -288,11 +298,12 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
-int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
+int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
+ int (reinit_request)(void *, struct request *))
{
int i, j, ret = 0;
- if (!set->ops->reinit_request)
+ if (WARN_ON_ONCE(!reinit_request))
goto out;
for (i = 0; i < set->nr_hw_queues; i++) {
@@ -305,8 +316,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
if (!tags->static_rqs[j])
continue;
- ret = set->ops->reinit_request(set->driver_data,
- tags->static_rqs[j]);
+ ret = reinit_request(set->driver_data,
+ tags->static_rqs[j]);
if (ret)
goto out;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4603b11..3f18cff 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -83,6 +83,41 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
}
+struct mq_inflight {
+ struct hd_struct *part;
+ unsigned int *inflight;
+};
+
+static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, void *priv,
+ bool reserved)
+{
+ struct mq_inflight *mi = priv;
+
+ if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags) &&
+ !test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) {
+ /*
+ * index[0] counts the specific partition that was asked
+ * for. index[1] counts the ones that are active on the
+ * whole device, so increment that if mi->part is indeed
+ * a partition, and not a whole device.
+ */
+ if (rq->part == mi->part)
+ mi->inflight[0]++;
+ if (mi->part->partno)
+ mi->inflight[1]++;
+ }
+}
+
+void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
+{
+ struct mq_inflight mi = { .part = part, .inflight = inflight, };
+
+ inflight[0] = inflight[1] = 0;
+ blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
+}
+
void blk_freeze_queue_start(struct request_queue *q)
{
int freeze_depth;
@@ -624,11 +659,10 @@ static void blk_mq_requeue_work(struct work_struct *work)
container_of(work, struct request_queue, requeue_work.work);
LIST_HEAD(rq_list);
struct request *rq, *next;
- unsigned long flags;
- spin_lock_irqsave(&q->requeue_lock, flags);
+ spin_lock_irq(&q->requeue_lock);
list_splice_init(&q->requeue_list, &rq_list);
- spin_unlock_irqrestore(&q->requeue_lock, flags);
+ spin_unlock_irq(&q->requeue_lock);
list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
if (!(rq->rq_flags & RQF_SOFTBARRIER))
@@ -1102,9 +1136,19 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
{
int srcu_idx;
+ /*
+ * We should be running this queue from one of the CPUs that
+ * are mapped to it.
+ */
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
cpu_online(hctx->next_cpu));
+ /*
+ * We can't run the queue inline with ints disabled. Ensure that
+ * we catch bad users of this early.
+ */
+ WARN_ON_ONCE(in_interrupt());
+
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
blk_mq_sched_dispatch_requests(hctx);
@@ -1218,7 +1262,7 @@ EXPORT_SYMBOL(blk_mq_queue_stopped);
/*
* This function is often used for pausing .queue_rq() by driver when
* there isn't enough resource or some conditions aren't satisfied, and
- * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ * BLK_STS_RESOURCE is usually returned.
*
* We do not guarantee that dispatch can be drained or blocked
* after blk_mq_stop_hw_queue() returns. Please use
@@ -1235,7 +1279,7 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queue);
/*
* This function is often used for pausing .queue_rq() by driver when
* there isn't enough resource or some conditions aren't satisfied, and
- * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ * BLK_STS_RESOURCE is usually returned.
*
* We do not guarantee that dispatch can be drained or blocked
* after blk_mq_stop_hw_queues() returns. Please use
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 60b01c0..98252b7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -133,4 +133,7 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
return hctx->nr_ctx && hctx->tags;
}
+void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2]);
+
#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index be1f115..8559e95 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -68,6 +68,7 @@ EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
{
+ WARN_ON_ONCE(q->mq_ops);
q->rq_timed_out_fn = fn;
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 27aceab..b8362c0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -931,7 +931,9 @@ void blk_unregister_queue(struct gendisk *disk)
if (WARN_ON(!q))
return;
+ mutex_lock(&q->sysfs_lock);
queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
+ mutex_unlock(&q->sysfs_lock);
wbt_exit(q);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 2290f65..e1a9c15 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -290,7 +290,6 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
*/
clear_bit_unlock(tag, bqt->tag_map);
}
-EXPORT_SYMBOL(blk_queue_end_tag);
/**
* blk_queue_start_tag - find a free tag and assign it
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 80f5481..0fea76a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -373,10 +373,8 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
if (likely(!blk_trace_note_message_enabled(__td->queue))) \
break; \
if ((__tg)) { \
- char __pbuf[128]; \
- \
- blkg_path(tg_to_blkg(__tg), __pbuf, sizeof(__pbuf)); \
- blk_add_trace_msg(__td->queue, "throtl %s " fmt, __pbuf, ##args); \
+ blk_add_cgroup_trace_msg(__td->queue, \
+ tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
} else { \
blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
} \
@@ -2114,14 +2112,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
{
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
- int ret;
-
- ret = bio_associate_current(bio);
- if (ret == 0 || ret == -EBUSY)
+ if (bio->bi_css)
bio->bi_cg_private = tg;
blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
-#else
- bio_associate_current(bio);
#endif
}
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 3bd15d8..ff57fb5 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -116,7 +116,7 @@ int blkdev_report_zones(struct block_device *bdev,
if (!bio)
return -ENOMEM;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blk_zone_start(q, sector);
bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
@@ -234,7 +234,7 @@ int blkdev_reset_zones(struct block_device *bdev,
bio = bio_alloc(gfp_mask, 0);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
ret = submit_bio_wait(bio);
diff --git a/block/blk.h b/block/blk.h
index 3a3d715..fcb9775 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -64,7 +64,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio);
void blk_queue_bypass_start(struct request_queue *q);
void blk_queue_bypass_end(struct request_queue *q);
-void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
void blk_freeze_queue(struct request_queue *q);
@@ -204,6 +203,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
}
+struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);
+
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
diff --git a/block/bsg.c b/block/bsg.c
index 37663b6..ee1335c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -932,15 +932,8 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return ret;
}
- /*
- * block device ioctls
- */
default:
-#if 0
- return ioctl_by_bdev(bd->bdev, cmd, arg);
-#else
return -ENOTTY;
-#endif
}
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3d5c289..9b86e9b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -656,20 +656,17 @@ static inline void cfqg_put(struct cfq_group *cfqg)
}
#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) do { \
- char __pbuf[128]; \
- \
- blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf)); \
- blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c %s " fmt, (cfqq)->pid, \
+ blk_add_cgroup_trace_msg((cfqd)->queue, \
+ cfqg_to_blkg((cfqq)->cfqg)->blkcg, \
+ "cfq%d%c%c " fmt, (cfqq)->pid, \
cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
- __pbuf, ##args); \
+ ##args); \
} while (0)
#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do { \
- char __pbuf[128]; \
- \
- blkg_path(cfqg_to_blkg(cfqg), __pbuf, sizeof(__pbuf)); \
- blk_add_trace_msg((cfqd)->queue, "%s " fmt, __pbuf, ##args); \
+ blk_add_cgroup_trace_msg((cfqd)->queue, \
+ cfqg_to_blkg(cfqg)->blkcg, fmt, ##args); \
} while (0)
static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
@@ -2937,7 +2934,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* for devices that support queuing, otherwise we still have a problem
* with sync vs async workloads.
*/
- if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
+ if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag &&
+ !cfqd->cfq_group_idle)
return;
WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
@@ -4714,13 +4712,12 @@ cfq_var_show(unsigned int var, char *page)
return sprintf(page, "%u\n", var);
}
-static ssize_t
-cfq_var_store(unsigned int *var, const char *page, size_t count)
+static void
+cfq_var_store(unsigned int *var, const char *page)
{
char *p = (char *) page;
*var = simple_strtoul(p, &p, 10);
- return count;
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
@@ -4766,7 +4763,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
{ \
struct cfq_data *cfqd = e->elevator_data; \
unsigned int __data; \
- int ret = cfq_var_store(&__data, (page), count); \
+ cfq_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
@@ -4775,7 +4772,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
*(__PTR) = (u64)__data * NSEC_PER_MSEC; \
else \
*(__PTR) = __data; \
- return ret; \
+ return count; \
}
STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
@@ -4800,13 +4797,13 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
{ \
struct cfq_data *cfqd = e->elevator_data; \
unsigned int __data; \
- int ret = cfq_var_store(&__data, (page), count); \
+ cfq_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
__data = (MAX); \
*(__PTR) = (u64)__data * NSEC_PER_USEC; \
- return ret; \
+ return count; \
}
USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c68f6bb..b83f774 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -373,13 +373,12 @@ deadline_var_show(int var, char *page)
return sprintf(page, "%d\n", var);
}
-static ssize_t
-deadline_var_store(int *var, const char *page, size_t count)
+static void
+deadline_var_store(int *var, const char *page)
{
char *p = (char *) page;
*var = simple_strtol(p, &p, 10);
- return count;
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
@@ -403,7 +402,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
{ \
struct deadline_data *dd = e->elevator_data; \
int __data; \
- int ret = deadline_var_store(&__data, (page), count); \
+ deadline_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
@@ -412,7 +411,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
*(__PTR) = msecs_to_jiffies(__data); \
else \
*(__PTR) = __data; \
- return ret; \
+ return count; \
}
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
diff --git a/block/elevator.c b/block/elevator.c
index 4bb2f0c..153926a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1055,6 +1055,10 @@ static int __elevator_change(struct request_queue *q, const char *name)
char elevator_name[ELV_NAME_MAX];
struct elevator_type *e;
+ /* Make sure queue is not in the middle of being removed */
+ if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
+ return -ENOENT;
+
/*
* Special case for mq, turn off scheduling
*/
diff --git a/block/genhd.c b/block/genhd.c
index 51c1d40..dd305c6 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -45,6 +45,52 @@ static void disk_add_events(struct gendisk *disk);
static void disk_del_events(struct gendisk *disk);
static void disk_release_events(struct gendisk *disk);
+void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
+{
+ if (q->mq_ops)
+ return;
+
+ atomic_inc(&part->in_flight[rw]);
+ if (part->partno)
+ atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
+}
+
+void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
+{
+ if (q->mq_ops)
+ return;
+
+ atomic_dec(&part->in_flight[rw]);
+ if (part->partno)
+ atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
+}
+
+void part_in_flight(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2])
+{
+ if (q->mq_ops) {
+ blk_mq_in_flight(q, part, inflight);
+ return;
+ }
+
+ inflight[0] = atomic_read(&part->in_flight[0]) +
+ atomic_read(&part->in_flight[1]);
+ if (part->partno) {
+ part = &part_to_disk(part)->part0;
+ inflight[1] = atomic_read(&part->in_flight[0]) +
+ atomic_read(&part->in_flight[1]);
+ }
+}
+
+struct hd_struct *__disk_get_part(struct gendisk *disk, int partno)
+{
+ struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl);
+
+ if (unlikely(partno < 0 || partno >= ptbl->len))
+ return NULL;
+ return rcu_dereference(ptbl->part[partno]);
+}
+
/**
* disk_get_part - get partition
* @disk: disk to look partition from
@@ -61,21 +107,12 @@ static void disk_release_events(struct gendisk *disk);
*/
struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
{
- struct hd_struct *part = NULL;
- struct disk_part_tbl *ptbl;
-
- if (unlikely(partno < 0))
- return NULL;
+ struct hd_struct *part;
rcu_read_lock();
-
- ptbl = rcu_dereference(disk->part_tbl);
- if (likely(partno < ptbl->len)) {
- part = rcu_dereference(ptbl->part[partno]);
- if (part)
- get_device(part_to_dev(part));
- }
-
+ part = __disk_get_part(disk, partno);
+ if (part)
+ get_device(part_to_dev(part));
rcu_read_unlock();
return part;
@@ -1098,12 +1135,13 @@ static const struct attribute_group *disk_attr_groups[] = {
* original ptbl is freed using RCU callback.
*
* LOCKING:
- * Matching bd_mutx locked.
+ * Matching bd_mutex locked or the caller is the only user of @disk.
*/
static void disk_replace_part_tbl(struct gendisk *disk,
struct disk_part_tbl *new_ptbl)
{
- struct disk_part_tbl *old_ptbl = disk->part_tbl;
+ struct disk_part_tbl *old_ptbl =
+ rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(disk->part_tbl, new_ptbl);
@@ -1122,14 +1160,16 @@ static void disk_replace_part_tbl(struct gendisk *disk,
* uses RCU to allow unlocked dereferencing for stats and other stuff.
*
* LOCKING:
- * Matching bd_mutex locked, might sleep.
+ * Matching bd_mutex locked or the caller is the only user of @disk.
+ * Might sleep.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
int disk_expand_part_tbl(struct gendisk *disk, int partno)
{
- struct disk_part_tbl *old_ptbl = disk->part_tbl;
+ struct disk_part_tbl *old_ptbl =
+ rcu_dereference_protected(disk->part_tbl, 1);
struct disk_part_tbl *new_ptbl;
int len = old_ptbl ? old_ptbl->len : 0;
int i, target;
@@ -1212,6 +1252,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct disk_part_iter piter;
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
+ unsigned int inflight[2];
int cpu;
/*
@@ -1225,8 +1266,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
cpu = part_stat_lock();
- part_round_stats(cpu, hd);
+ part_round_stats(gp->queue, cpu, hd);
part_stat_unlock();
+ part_in_flight(gp->queue, hd, inflight);
seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
"%u %lu %lu %lu %u %u %u %u\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
@@ -1239,7 +1281,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
part_stat_read(hd, merges[WRITE]),
part_stat_read(hd, sectors[WRITE]),
jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
- part_in_flight(hd),
+ inflight[0],
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
);
@@ -1321,6 +1363,14 @@ EXPORT_SYMBOL(alloc_disk);
struct gendisk *alloc_disk_node(int minors, int node_id)
{
struct gendisk *disk;
+ struct disk_part_tbl *ptbl;
+
+ if (minors > DISK_MAX_PARTS) {
+ printk(KERN_ERR
+ "block: can't allocated more than %d partitions\n",
+ DISK_MAX_PARTS);
+ minors = DISK_MAX_PARTS;
+ }
disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
if (disk) {
@@ -1334,7 +1384,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
kfree(disk);
return NULL;
}
- disk->part_tbl->part[0] = &disk->part0;
+ ptbl = rcu_dereference_protected(disk->part_tbl, 1);
+ rcu_assign_pointer(ptbl->part[0], &disk->part0);
/*
* set_capacity() and get_capacity() currently don't use
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 1b964a3..a1cad43 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -457,13 +457,12 @@ deadline_var_show(int var, char *page)
return sprintf(page, "%d\n", var);
}
-static ssize_t
-deadline_var_store(int *var, const char *page, size_t count)
+static void
+deadline_var_store(int *var, const char *page)
{
char *p = (char *) page;
*var = simple_strtol(p, &p, 10);
- return count;
}
#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
@@ -487,7 +486,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
{ \
struct deadline_data *dd = e->elevator_data; \
int __data; \
- int ret = deadline_var_store(&__data, (page), count); \
+ deadline_var_store(&__data, (page)); \
if (__data < (MIN)) \
__data = (MIN); \
else if (__data > (MAX)) \
@@ -496,7 +495,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
*(__PTR) = msecs_to_jiffies(__data); \
else \
*(__PTR) = __data; \
- return ret; \
+ return count; \
}
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
@@ -660,6 +659,7 @@ static struct elevator_type mq_deadline = {
.elevator_name = "mq-deadline",
.elevator_owner = THIS_MODULE,
};
+MODULE_ALIAS("mq-deadline-iosched");
static int __init deadline_init(void)
{
diff --git a/block/partition-generic.c b/block/partition-generic.c
index c5ec824..86e8fe1a 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -112,11 +112,14 @@ ssize_t part_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q = dev_to_disk(dev)->queue;
+ unsigned int inflight[2];
int cpu;
cpu = part_stat_lock();
- part_round_stats(cpu, p);
+ part_round_stats(q, cpu, p);
part_stat_unlock();
+ part_in_flight(q, p, inflight);
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@@ -130,7 +133,7 @@ ssize_t part_stat_show(struct device *dev,
part_stat_read(p, merges[WRITE]),
(unsigned long long)part_stat_read(p, sectors[WRITE]),
jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
- part_in_flight(p),
+ inflight[0],
jiffies_to_msecs(part_stat_read(p, io_ticks)),
jiffies_to_msecs(part_stat_read(p, time_in_queue)));
}
@@ -249,15 +252,20 @@ void __delete_partition(struct percpu_ref *ref)
call_rcu(&part->rcu_head, delete_partition_rcu_cb);
}
+/*
+ * Must be called either with bd_mutex held, before a disk can be opened or
+ * after all disk users are gone.
+ */
void delete_partition(struct gendisk *disk, int partno)
{
- struct disk_part_tbl *ptbl = disk->part_tbl;
+ struct disk_part_tbl *ptbl =
+ rcu_dereference_protected(disk->part_tbl, 1);
struct hd_struct *part;
if (partno >= ptbl->len)
return;
- part = ptbl->part[partno];
+ part = rcu_dereference_protected(ptbl->part[partno], 1);
if (!part)
return;
@@ -277,6 +285,10 @@ static ssize_t whole_disk_show(struct device *dev,
static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
whole_disk_show, NULL);
+/*
+ * Must be called either with bd_mutex held, before a disk can be opened or
+ * after all disk users are gone.
+ */
struct hd_struct *add_partition(struct gendisk *disk, int partno,
sector_t start, sector_t len, int flags,
struct partition_meta_info *info)
@@ -292,7 +304,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
err = disk_expand_part_tbl(disk, partno);
if (err)
return ERR_PTR(err);
- ptbl = disk->part_tbl;
+ ptbl = rcu_dereference_protected(disk->part_tbl, 1);
if (ptbl->part[partno])
return ERR_PTR(-EBUSY);
@@ -391,7 +403,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
device_del(pdev);
out_put:
put_device(pdev);
- blk_free_devt(devt);
return ERR_PTR(err);
}
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 245a879..255591a 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1678,9 +1678,12 @@ static bool DAC960_V1_ReadControllerConfiguration(DAC960_Controller_T
Enquiry2->FirmwareID.FirmwareType = '0';
Enquiry2->FirmwareID.TurnID = 0;
}
- sprintf(Controller->FirmwareVersion, "%d.%02d-%c-%02d",
- Enquiry2->FirmwareID.MajorVersion, Enquiry2->FirmwareID.MinorVersion,
- Enquiry2->FirmwareID.FirmwareType, Enquiry2->FirmwareID.TurnID);
+ snprintf(Controller->FirmwareVersion, sizeof(Controller->FirmwareVersion),
+ "%d.%02d-%c-%02d",
+ Enquiry2->FirmwareID.MajorVersion,
+ Enquiry2->FirmwareID.MinorVersion,
+ Enquiry2->FirmwareID.FirmwareType,
+ Enquiry2->FirmwareID.TurnID);
if (!((Controller->FirmwareVersion[0] == '5' &&
strcmp(Controller->FirmwareVersion, "5.06") >= 0) ||
(Controller->FirmwareVersion[0] == '4' &&
@@ -6588,7 +6591,8 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
&dac960_proc_fops);
}
- sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber);
+ snprintf(Controller->ControllerName, sizeof(Controller->ControllerName),
+ "c%d", Controller->ControllerNumber);
ControllerProcEntry = proc_mkdir(Controller->ControllerName,
DAC960_ProcDirectoryEntry);
proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 80aaf34..104180e 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -17,6 +17,7 @@
config BLK_DEV_NULL_BLK
tristate "Null test block driver"
+ depends on CONFIGFS_FS
config BLK_DEV_FD
tristate "Normal floppy disk support"
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 5d9ed06..bbd0d18 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -294,14 +294,13 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
{
- struct block_device *bdev = bio->bi_bdev;
- struct brd_device *brd = bdev->bd_disk->private_data;
+ struct brd_device *brd = bio->bi_disk->private_data;
struct bio_vec bvec;
sector_t sector;
struct bvec_iter iter;
sector = bio->bi_iter.bi_sector;
- if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
+ if (bio_end_sector(bio) > get_capacity(bio->bi_disk))
goto io_error;
bio_for_each_segment(bvec, bio, iter) {
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e02c45c..5f0eaee 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -151,7 +151,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
op_flags |= REQ_SYNC;
bio = bio_alloc_drbd(GFP_NOIO);
- bio->bi_bdev = bdev->md_bdev;
+ bio_set_dev(bio, bdev->md_bdev);
bio->bi_iter.bi_sector = sector;
err = -EIO;
if (bio_add_page(bio, device->md_io.page, size, 0) != size)
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 809fd24..bd97908 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1019,7 +1019,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
- bio->bi_bdev = device->ldev->md_bdev;
+ bio_set_dev(bio, device->ldev->md_bdev);
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d17b6e6..7e8589c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -63,19 +63,15 @@
# define __must_hold(x)
#endif
-/* module parameter, defined in drbd_main.c */
-extern unsigned int minor_count;
-extern bool disable_sendpage;
-extern bool allow_oos;
-void tl_abort_disk_io(struct drbd_device *device);
-
+/* shared module parameters, defined in drbd_main.c */
#ifdef CONFIG_DRBD_FAULT_INJECTION
-extern int enable_faults;
-extern int fault_rate;
-extern int fault_devs;
+extern int drbd_enable_faults;
+extern int drbd_fault_rate;
#endif
-extern char usermode_helper[];
+extern unsigned int drbd_minor_count;
+extern char drbd_usermode_helper[];
+extern int drbd_proc_details;
/* This is used to stop/restart our threads.
@@ -181,8 +177,8 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type);
static inline int
drbd_insert_fault(struct drbd_device *device, unsigned int type) {
#ifdef CONFIG_DRBD_FAULT_INJECTION
- return fault_rate &&
- (enable_faults & (1<<type)) &&
+ return drbd_fault_rate &&
+ (drbd_enable_faults & (1<<type)) &&
_drbd_insert_fault(device, type);
#else
return 0;
@@ -745,6 +741,8 @@ struct drbd_connection {
unsigned current_tle_writes; /* writes seen within this tl epoch */
unsigned long last_reconnect_jif;
+ /* empty member on older kernels without blk_start_plug() */
+ struct blk_plug receiver_plug;
struct drbd_thread receiver;
struct drbd_thread worker;
struct drbd_thread ack_receiver;
@@ -1131,7 +1129,8 @@ extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_sta
extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device);
-void drbd_print_uuids(struct drbd_device *device, const char *text);
+extern void drbd_print_uuids(struct drbd_device *device, const char *text);
+extern void drbd_queue_unplug(struct drbd_device *device);
extern void conn_md_sync(struct drbd_connection *connection);
extern void drbd_md_write(struct drbd_device *device, void *buffer);
@@ -1463,8 +1462,6 @@ extern struct drbd_resource *drbd_find_resource(const char *name);
extern void drbd_destroy_resource(struct kref *kref);
extern void conn_free_crypto(struct drbd_connection *connection);
-extern int proc_details;
-
/* drbd_req */
extern void do_submit(struct work_struct *ws);
extern void __drbd_make_request(struct drbd_device *, struct bio *, unsigned long);
@@ -1628,8 +1625,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
- if (!bio->bi_bdev) {
- drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
+ if (!bio->bi_disk) {
+ drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n");
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index e2ed28d..8cb3791 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -77,41 +77,41 @@ MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
#include <linux/moduleparam.h>
-/* allow_open_on_secondary */
-MODULE_PARM_DESC(allow_oos, "DONT USE!");
/* thanks to these macros, if compiled into the kernel (not-module),
- * this becomes the boot parameter drbd.minor_count */
-module_param(minor_count, uint, 0444);
-module_param(disable_sendpage, bool, 0644);
-module_param(allow_oos, bool, 0);
-module_param(proc_details, int, 0644);
+ * these become boot parameters (e.g., drbd.minor_count) */
#ifdef CONFIG_DRBD_FAULT_INJECTION
-int enable_faults;
-int fault_rate;
-static int fault_count;
-int fault_devs;
+int drbd_enable_faults;
+int drbd_fault_rate;
+static int drbd_fault_count;
+static int drbd_fault_devs;
/* bitmap of enabled faults */
-module_param(enable_faults, int, 0664);
+module_param_named(enable_faults, drbd_enable_faults, int, 0664);
/* fault rate % value - applies to all enabled faults */
-module_param(fault_rate, int, 0664);
+module_param_named(fault_rate, drbd_fault_rate, int, 0664);
/* count of faults inserted */
-module_param(fault_count, int, 0664);
+module_param_named(fault_count, drbd_fault_count, int, 0664);
/* bitmap of devices to insert faults on */
-module_param(fault_devs, int, 0644);
+module_param_named(fault_devs, drbd_fault_devs, int, 0644);
#endif
-/* module parameter, defined */
-unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
-bool disable_sendpage;
-bool allow_oos;
-int proc_details; /* Detail level in proc drbd*/
+/* module parameters we can keep static */
+static bool drbd_allow_oos; /* allow_open_on_secondary */
+static bool drbd_disable_sendpage;
+MODULE_PARM_DESC(allow_oos, "DONT USE!");
+module_param_named(allow_oos, drbd_allow_oos, bool, 0);
+module_param_named(disable_sendpage, drbd_disable_sendpage, bool, 0644);
+/* module parameters we share */
+int drbd_proc_details; /* Detail level in proc drbd*/
+module_param_named(proc_details, drbd_proc_details, int, 0644);
+/* module parameters shared with defaults */
+unsigned int drbd_minor_count = DRBD_MINOR_COUNT_DEF;
/* Module parameter for setting the user mode helper program
* to run. Default is /sbin/drbdadm */
-char usermode_helper[80] = "/sbin/drbdadm";
-
-module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
+char drbd_usermode_helper[80] = "/sbin/drbdadm";
+module_param_named(minor_count, drbd_minor_count, uint, 0444);
+module_param_string(usermode_helper, drbd_usermode_helper, sizeof(drbd_usermode_helper), 0644);
/* in 2.6.x, our device mapping and config info contains our virtual gendisks
* as member "struct gendisk *vdisk;"
@@ -923,7 +923,9 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device)
}
/* communicated if (agreed_features & DRBD_FF_WSAME) */
-void assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, struct request_queue *q)
+static void
+assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p,
+ struct request_queue *q)
{
if (q) {
p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q));
@@ -1560,7 +1562,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
* put_page(); and would cause either a VM_BUG directly, or
* __page_cache_release a page that would actually still be referenced
* by someone, leading to some obscure delayed Oops somewhere else. */
- if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+ if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page))
return _drbd_no_send_page(peer_device, page, offset, size, msg_flags);
msg_flags |= MSG_NOSIGNAL;
@@ -1932,7 +1934,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
if (device->state.role != R_PRIMARY) {
if (mode & FMODE_WRITE)
rv = -EROFS;
- else if (!allow_oos)
+ else if (!drbd_allow_oos)
rv = -EMEDIUMTYPE;
}
@@ -1952,6 +1954,19 @@ static void drbd_release(struct gendisk *gd, fmode_t mode)
mutex_unlock(&drbd_main_mutex);
}
+/* need to hold resource->req_lock */
+void drbd_queue_unplug(struct drbd_device *device)
+{
+ if (device->state.pdsk >= D_INCONSISTENT && device->state.conn >= C_CONNECTED) {
+ D_ASSERT(device, device->state.role == R_PRIMARY);
+ if (test_and_clear_bit(UNPLUG_REMOTE, &device->flags)) {
+ drbd_queue_work_if_unqueued(
+ &first_peer_device(device)->connection->sender_work,
+ &device->unplug_work);
+ }
+ }
+}
+
static void drbd_set_defaults(struct drbd_device *device)
{
/* Beware! The actual layout differs
@@ -2008,18 +2023,14 @@ void drbd_init_set_defaults(struct drbd_device *device)
device->unplug_work.cb = w_send_write_hint;
device->bm_io_work.w.cb = w_bitmap_io;
- init_timer(&device->resync_timer);
- init_timer(&device->md_sync_timer);
- init_timer(&device->start_resync_timer);
- init_timer(&device->request_timer);
- device->resync_timer.function = resync_timer_fn;
- device->resync_timer.data = (unsigned long) device;
- device->md_sync_timer.function = md_sync_timer_fn;
- device->md_sync_timer.data = (unsigned long) device;
- device->start_resync_timer.function = start_resync_timer_fn;
- device->start_resync_timer.data = (unsigned long) device;
- device->request_timer.function = request_timer_fn;
- device->request_timer.data = (unsigned long) device;
+ setup_timer(&device->resync_timer, resync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->md_sync_timer, md_sync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->start_resync_timer, start_resync_timer_fn,
+ (unsigned long)device);
+ setup_timer(&device->request_timer, request_timer_fn,
+ (unsigned long)device);
init_waitqueue_head(&device->misc_wait);
init_waitqueue_head(&device->state_wait);
@@ -2131,7 +2142,7 @@ static void drbd_destroy_mempools(void)
static int drbd_create_mempools(void)
{
struct page *page;
- const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
+ const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
int i;
/* prepare our caches and mempools */
@@ -2167,13 +2178,12 @@ static int drbd_create_mempools(void)
goto Enomem;
/* mempools */
- drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+ drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
if (drbd_io_bio_set == NULL)
goto Enomem;
drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
- BIOSET_NEED_BVECS |
- BIOSET_NEED_RESCUER);
+ BIOSET_NEED_BVECS);
if (drbd_md_io_bio_set == NULL)
goto Enomem;
@@ -2409,7 +2419,6 @@ static void drbd_cleanup(void)
destroy_workqueue(retry.wq);
drbd_genl_unregister();
- drbd_debugfs_cleanup();
idr_for_each_entry(&drbd_devices, device, i)
drbd_delete_device(device);
@@ -2420,6 +2429,8 @@ static void drbd_cleanup(void)
drbd_free_resource(resource);
}
+ drbd_debugfs_cleanup();
+
drbd_destroy_mempools();
unregister_blkdev(DRBD_MAJOR, "drbd");
@@ -2972,12 +2983,12 @@ static int __init drbd_init(void)
{
int err;
- if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
- pr_err("invalid minor_count (%d)\n", minor_count);
+ if (drbd_minor_count < DRBD_MINOR_COUNT_MIN || drbd_minor_count > DRBD_MINOR_COUNT_MAX) {
+ pr_err("invalid minor_count (%d)\n", drbd_minor_count);
#ifdef MODULE
return -EINVAL;
#else
- minor_count = DRBD_MINOR_COUNT_DEF;
+ drbd_minor_count = DRBD_MINOR_COUNT_DEF;
#endif
}
@@ -3900,12 +3911,12 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type)
static struct fault_random_state rrs = {0, 0};
unsigned int ret = (
- (fault_devs == 0 ||
- ((1 << device_to_minor(device)) & fault_devs) != 0) &&
- (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
+ (drbd_fault_devs == 0 ||
+ ((1 << device_to_minor(device)) & drbd_fault_devs) != 0) &&
+ (((_drbd_fault_random(&rrs) % 100) + 1) <= drbd_fault_rate));
if (ret) {
- fault_count++;
+ drbd_fault_count++;
if (__ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "***Simulating %s failure\n",
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ad0fcb4..a12f77e 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -344,7 +344,7 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
(char[60]) { }, /* address */
NULL };
char mb[14];
- char *argv[] = {usermode_helper, cmd, mb, NULL };
+ char *argv[] = {drbd_usermode_helper, cmd, mb, NULL };
struct drbd_connection *connection = first_peer_device(device)->connection;
struct sib_info sib;
int ret;
@@ -359,19 +359,19 @@ int drbd_khelper(struct drbd_device *device, char *cmd)
* write out any unsynced meta data changes now */
drbd_md_sync(device);
- drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
+ drbd_info(device, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, mb);
sib.sib_reason = SIB_HELPER_PRE;
sib.helper_name = cmd;
drbd_bcast_event(device, &sib);
notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
- ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+ ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, mb,
+ drbd_usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
else
drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, mb,
+ drbd_usermode_helper, cmd, mb,
(ret >> 8) & 0xff, ret);
sib.sib_reason = SIB_HELPER_POST;
sib.helper_exit_code = ret;
@@ -396,24 +396,24 @@ enum drbd_peer_state conn_khelper(struct drbd_connection *connection, char *cmd)
(char[60]) { }, /* address */
NULL };
char *resource_name = connection->resource->name;
- char *argv[] = {usermode_helper, cmd, resource_name, NULL };
+ char *argv[] = {drbd_usermode_helper, cmd, resource_name, NULL };
int ret;
setup_khelper_env(connection, envp);
conn_md_sync(connection);
- drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
+ drbd_info(connection, "helper command: %s %s %s\n", drbd_usermode_helper, cmd, resource_name);
/* TODO: conn_bcast_event() ?? */
notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
- ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
+ ret = call_usermodehelper(drbd_usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, resource_name,
+ drbd_usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
else
drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
- usermode_helper, cmd, resource_name,
+ drbd_usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
/* TODO: conn_bcast_event() ?? */
notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
@@ -1236,12 +1236,18 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
static void decide_on_write_same_support(struct drbd_device *device,
struct request_queue *q,
- struct request_queue *b, struct o_qlim *o)
+ struct request_queue *b, struct o_qlim *o,
+ bool disable_write_same)
{
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
bool can_do = b ? b->limits.max_write_same_sectors : true;
+ if (can_do && disable_write_same) {
+ can_do = false;
+ drbd_info(peer_device, "WRITE_SAME disabled by config\n");
+ }
+
if (can_do && connection->cstate >= C_CONNECTED && !(connection->agreed_features & DRBD_FF_WSAME)) {
can_do = false;
drbd_info(peer_device, "peer does not support WRITE_SAME\n");
@@ -1302,6 +1308,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
struct request_queue *b = NULL;
struct disk_conf *dc;
bool discard_zeroes_if_aligned = true;
+ bool disable_write_same = false;
if (bdev) {
b = bdev->backing_bdev->bd_disk->queue;
@@ -1311,6 +1318,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
dc = rcu_dereference(device->ldev->disk_conf);
max_segments = dc->max_bio_bvecs;
discard_zeroes_if_aligned = dc->discard_zeroes_if_aligned;
+ disable_write_same = dc->disable_write_same;
rcu_read_unlock();
blk_set_stacking_limits(&q->limits);
@@ -1321,7 +1329,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_SIZE-1);
decide_on_discard_support(device, q, b, discard_zeroes_if_aligned);
- decide_on_write_same_support(device, q, b, o);
+ decide_on_write_same_support(device, q, b, o, disable_write_same);
if (b) {
blk_queue_stack_limits(q, b);
@@ -1612,7 +1620,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
if (write_ordering_changed(old_disk_conf, new_disk_conf))
drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
- if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned)
+ if (old_disk_conf->discard_zeroes_if_aligned != new_disk_conf->discard_zeroes_if_aligned
+ || old_disk_conf->disable_write_same != new_disk_conf->disable_write_same)
drbd_reconsider_queue_parameters(device, device->ldev, NULL);
drbd_md_sync(device);
@@ -2140,34 +2149,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
static int adm_detach(struct drbd_device *device, int force)
{
- enum drbd_state_rv retcode;
- void *buffer;
- int ret;
-
if (force) {
set_bit(FORCE_DETACH, &device->flags);
drbd_force_state(device, NS(disk, D_FAILED));
- retcode = SS_SUCCESS;
- goto out;
+ return SS_SUCCESS;
}
- drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
- if (buffer) {
- retcode = drbd_request_state(device, NS(disk, D_FAILED));
- drbd_md_put_buffer(device);
- } else /* already <= D_FAILED */
- retcode = SS_NOTHING_TO_DO;
- /* D_FAILED will transition to DISKLESS. */
- drbd_resume_io(device);
- ret = wait_event_interruptible(device->misc_wait,
- device->state.disk != D_FAILED);
- if ((int)retcode == (int)SS_IS_DISKLESS)
- retcode = SS_NOTHING_TO_DO;
- if (ret)
- retcode = ERR_INTR;
-out:
- return retcode;
+ return drbd_request_detach_interruptible(device);
}
/* Detaching the disk is a process in multiple stages. First we need to lock
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 8378142..582caeb 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -127,7 +127,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_putc(seq, '=');
seq_putc(seq, '>');
for (i = 0; i < y; i++)
- seq_printf(seq, ".");
+ seq_putc(seq, '.');
seq_puts(seq, "] ");
if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
@@ -179,7 +179,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
seq_printf_with_thousands_grouping(seq, dbdt);
seq_puts(seq, " (");
/* ------------------------- ~3s average ------------------------ */
- if (proc_details >= 1) {
+ if (drbd_proc_details >= 1) {
/* this is what drbd_rs_should_slow_down() uses */
i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ;
@@ -209,7 +209,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
}
seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
- if (proc_details >= 1) {
+ if (drbd_proc_details >= 1) {
/* 64 bit:
* we convert to sectors in the display below. */
unsigned long bm_bits = drbd_bm_bits(device);
@@ -332,13 +332,13 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
state.conn == C_VERIFY_T)
drbd_syncer_progress(device, seq, state);
- if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
+ if (drbd_proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
lc_seq_printf_stats(seq, device->resync);
lc_seq_printf_stats(seq, device->act_log);
put_ldev(device);
}
- if (proc_details >= 2)
+ if (drbd_proc_details >= 2)
seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
}
rcu_read_unlock();
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c7e95e6..796eaf3 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -332,7 +332,7 @@ static void drbd_free_pages(struct drbd_device *device, struct page *page, int i
if (page == NULL)
return;
- if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
+ if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count)
i = page_chain_free(page);
else {
struct page *tmp;
@@ -1100,7 +1100,10 @@ static int conn_connect(struct drbd_connection *connection)
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_lock(peer_device->device->state_mutex);
+ /* avoid a race with conn_request_state( C_DISCONNECTING ) */
+ spin_lock_irq(&connection->resource->req_lock);
set_bit(STATE_SENT, &connection->flags);
+ spin_unlock_irq(&connection->resource->req_lock);
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_unlock(peer_device->device->state_mutex);
@@ -1194,6 +1197,14 @@ static int decode_header(struct drbd_connection *connection, void *header, struc
return 0;
}
+static void drbd_unplug_all_devices(struct drbd_connection *connection)
+{
+ if (current->plug == &connection->receiver_plug) {
+ blk_finish_plug(&connection->receiver_plug);
+ blk_start_plug(&connection->receiver_plug);
+ } /* else: maybe just schedule() ?? */
+}
+
static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
{
void *buffer = connection->data.rbuf;
@@ -1209,6 +1220,36 @@ static int drbd_recv_header(struct drbd_connection *connection, struct packet_in
return err;
}
+static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, struct packet_info *pi)
+{
+ void *buffer = connection->data.rbuf;
+ unsigned int size = drbd_header_size(connection);
+ int err;
+
+ err = drbd_recv_short(connection->data.socket, buffer, size, MSG_NOSIGNAL|MSG_DONTWAIT);
+ if (err != size) {
+ /* If we have nothing in the receive buffer now, to reduce
+ * application latency, try to drain the backend queues as
+ * quickly as possible, and let remote TCP know what we have
+ * received so far. */
+ if (err == -EAGAIN) {
+ drbd_tcp_quickack(connection->data.socket);
+ drbd_unplug_all_devices(connection);
+ }
+ if (err > 0) {
+ buffer += err;
+ size -= err;
+ }
+ err = drbd_recv_all_warn(connection, buffer, size);
+ if (err)
+ return err;
+ }
+
+ err = decode_header(connection, connection->data.rbuf, pi);
+ connection->last_received = jiffies;
+
+ return err;
+}
/* This is blkdev_issue_flush, but asynchronous.
* We want to submit to all component volumes in parallel,
* then wait for all completions.
@@ -1223,7 +1264,7 @@ struct one_flush_context {
struct issue_flush_context *ctx;
};
-void one_flush_endio(struct bio *bio)
+static void one_flush_endio(struct bio *bio)
{
struct one_flush_context *octx = bio->bi_private;
struct drbd_device *device = octx->device;
@@ -1265,7 +1306,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
octx->device = device;
octx->ctx = ctx;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -1548,7 +1589,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
}
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
bio_set_op_attrs(bio, op, op_flags);
bio->bi_private = peer_req;
bio->bi_end_io = drbd_peer_request_endio;
@@ -4085,7 +4126,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
return config_unknown_volume(connection, pi);
device = peer_device->device;
- p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
+ p_uuid = kmalloc_array(UI_EXTENDED_SIZE, sizeof(*p_uuid), GFP_NOIO);
if (!p_uuid) {
drbd_err(device, "kmalloc of p_uuid failed\n");
return false;
@@ -4882,8 +4923,8 @@ static void drbdd(struct drbd_connection *connection)
struct data_cmd const *cmd;
drbd_thread_current_set_cpu(&connection->receiver);
- update_receiver_timing_details(connection, drbd_recv_header);
- if (drbd_recv_header(connection, &pi))
+ update_receiver_timing_details(connection, drbd_recv_header_maybe_unplug);
+ if (drbd_recv_header_maybe_unplug(connection, &pi))
goto err_out;
cmd = &drbd_cmd_handler[pi.cmd];
@@ -5375,8 +5416,11 @@ int drbd_receiver(struct drbd_thread *thi)
}
} while (h == 0);
- if (h > 0)
+ if (h > 0) {
+ blk_start_plug(&connection->receiver_plug);
drbdd(connection);
+ blk_finish_plug(&connection->receiver_plug);
+ }
conn_disconnect(connection);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f6e865b..de8566e 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -36,14 +36,18 @@ static bool drbd_may_do_local_read(struct drbd_device *device, sector_t sector,
/* Update disk stats at start of I/O request */
static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request *req)
{
- generic_start_io_acct(bio_data_dir(req->master_bio), req->i.size >> 9,
- &device->vdisk->part0);
+ struct request_queue *q = device->rq_queue;
+
+ generic_start_io_acct(q, bio_data_dir(req->master_bio),
+ req->i.size >> 9, &device->vdisk->part0);
}
/* Update disk stats when completing request upwards */
static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
{
- generic_end_io_acct(bio_data_dir(req->master_bio),
+ struct request_queue *q = device->rq_queue;
+
+ generic_end_io_acct(q, bio_data_dir(req->master_bio),
&device->vdisk->part0, req->start_jif);
}
@@ -1175,7 +1179,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
else
type = DRBD_FAULT_DT_RD;
- bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(bio, device->ldev->backing_bdev);
/* State may have changed since we grabbed our reference on the
* ->ldev member. Double check, and short-circuit to endio.
@@ -1275,6 +1279,57 @@ static bool may_do_writes(struct drbd_device *device)
return s.disk == D_UP_TO_DATE || s.pdsk == D_UP_TO_DATE;
}
+struct drbd_plug_cb {
+ struct blk_plug_cb cb;
+ struct drbd_request *most_recent_req;
+ /* do we need more? */
+};
+
+static void drbd_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+ struct drbd_plug_cb *plug = container_of(cb, struct drbd_plug_cb, cb);
+ struct drbd_resource *resource = plug->cb.data;
+ struct drbd_request *req = plug->most_recent_req;
+
+ kfree(cb);
+ if (!req)
+ return;
+
+ spin_lock_irq(&resource->req_lock);
+ /* In case the sender did not process it yet, raise the flag to
+ * have it followed with P_UNPLUG_REMOTE just after. */
+ req->rq_state |= RQ_UNPLUG;
+ /* but also queue a generic unplug */
+ drbd_queue_unplug(req->device);
+ kref_put(&req->kref, drbd_req_destroy);
+ spin_unlock_irq(&resource->req_lock);
+}
+
+static struct drbd_plug_cb* drbd_check_plugged(struct drbd_resource *resource)
+{
+ /* A lot of text to say
+ * return (struct drbd_plug_cb*)blk_check_plugged(); */
+ struct drbd_plug_cb *plug;
+ struct blk_plug_cb *cb = blk_check_plugged(drbd_unplug, resource, sizeof(*plug));
+
+ if (cb)
+ plug = container_of(cb, struct drbd_plug_cb, cb);
+ else
+ plug = NULL;
+ return plug;
+}
+
+static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req)
+{
+ struct drbd_request *tmp = plug->most_recent_req;
+ /* Will be sent to some peer.
+ * Remember to tag it with UNPLUG_REMOTE on unplug */
+ kref_get(&req->kref);
+ plug->most_recent_req = req;
+ if (tmp)
+ kref_put(&tmp->kref, drbd_req_destroy);
+}
+
static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
{
struct drbd_resource *resource = device->resource;
@@ -1347,6 +1402,12 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
no_remote = true;
}
+ if (no_remote == false) {
+ struct drbd_plug_cb *plug = drbd_check_plugged(resource);
+ if (plug)
+ drbd_update_plug(plug, req);
+ }
+
/* If it took the fast path in drbd_request_prepare, add it here.
* The slow path has added it already. */
if (list_empty(&req->req_pending_master_completion))
@@ -1395,7 +1456,10 @@ void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned l
static void submit_fast_path(struct drbd_device *device, struct list_head *incoming)
{
+ struct blk_plug plug;
struct drbd_request *req, *tmp;
+
+ blk_start_plug(&plug);
list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
const int rw = bio_data_dir(req->master_bio);
@@ -1413,6 +1477,7 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
+ blk_finish_plug(&plug);
}
static bool prepare_al_transaction_nonblock(struct drbd_device *device,
@@ -1420,12 +1485,12 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
struct list_head *pending,
struct list_head *later)
{
- struct drbd_request *req, *tmp;
+ struct drbd_request *req;
int wake = 0;
int err;
spin_lock_irq(&device->al_lock);
- list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
+ while ((req = list_first_entry_or_null(incoming, struct drbd_request, tl_requests))) {
err = drbd_al_begin_io_nonblock(device, &req->i);
if (err == -ENOBUFS)
break;
@@ -1442,17 +1507,20 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
return !list_empty(pending);
}
-void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
+static void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
{
- struct drbd_request *req, *tmp;
+ struct blk_plug plug;
+ struct drbd_request *req;
- list_for_each_entry_safe(req, tmp, pending, tl_requests) {
+ blk_start_plug(&plug);
+ while ((req = list_first_entry_or_null(pending, struct drbd_request, tl_requests))) {
req->rq_state |= RQ_IN_ACT_LOG;
req->in_actlog_jif = jiffies;
atomic_dec(&device->ap_actlog_cnt);
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
+ blk_finish_plug(&plug);
}
void do_submit(struct work_struct *ws)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 9e1866a..a2254f8 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -212,6 +212,11 @@ enum drbd_req_state_bits {
/* Should call drbd_al_complete_io() for this request... */
__RQ_IN_ACT_LOG,
+ /* This was the most recent request during some blk_finish_plug()
+ * or its implicit from-schedule equivalent.
+ * We may use it as hint to send a P_UNPLUG_REMOTE */
+ __RQ_UNPLUG,
+
/* The peer has sent a retry ACK */
__RQ_POSTPONED,
@@ -249,6 +254,7 @@ enum drbd_req_state_bits {
#define RQ_WSAME (1UL << __RQ_WSAME)
#define RQ_UNMAP (1UL << __RQ_UNMAP)
#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
+#define RQ_UNPLUG (1UL << __RQ_UNPLUG)
#define RQ_POSTPONED (1UL << __RQ_POSTPONED)
#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index eea0c4a..0813c65 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -346,7 +346,7 @@ static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
enum drbd_role conn_highest_role(struct drbd_connection *connection)
{
- enum drbd_role role = R_UNKNOWN;
+ enum drbd_role role = R_SECONDARY;
struct drbd_peer_device *peer_device;
int vnr;
@@ -579,11 +579,14 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
unsigned long flags;
union drbd_state os, ns;
enum drbd_state_rv rv;
+ void *buffer = NULL;
init_completion(&done);
if (f & CS_SERIALIZE)
mutex_lock(device->state_mutex);
+ if (f & CS_INHIBIT_MD_IO)
+ buffer = drbd_md_get_buffer(device, __func__);
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
@@ -636,6 +639,8 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
}
abort:
+ if (buffer)
+ drbd_md_put_buffer(device);
if (f & CS_SERIALIZE)
mutex_unlock(device->state_mutex);
@@ -664,6 +669,47 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
return rv;
}
+/*
+ * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
+ * there is IO in-flight: the transition into D_FAILED for detach purposes
+ * may get misinterpreted as actual IO error in a confused endio function.
+ *
+ * We wrap it all into wait_event(), to retry in case the drbd_req_state()
+ * returns SS_IN_TRANSIENT_STATE.
+ *
+ * To avoid potential deadlock with e.g. the receiver thread trying to grab
+ * drbd_md_get_buffer() while trying to get out of the "transient state", we
+ * need to grab and release the meta data buffer inside of that wait_event loop.
+ */
+static enum drbd_state_rv
+request_detach(struct drbd_device *device)
+{
+ return drbd_req_state(device, NS(disk, D_FAILED),
+ CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
+}
+
+enum drbd_state_rv
+drbd_request_detach_interruptible(struct drbd_device *device)
+{
+ enum drbd_state_rv rv;
+ int ret;
+
+ drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
+ wait_event_interruptible(device->state_wait,
+ (rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
+ drbd_resume_io(device);
+
+ ret = wait_event_interruptible(device->misc_wait,
+ device->state.disk != D_FAILED);
+
+ if (rv == SS_IS_DISKLESS)
+ rv = SS_NOTHING_TO_DO;
+ if (ret)
+ rv = ERR_INTR;
+
+ return rv;
+}
+
enum drbd_state_rv
_drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask,
union drbd_state val, enum chg_state_flags f)
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index 6c9d5d4..0276c98 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -71,6 +71,10 @@ enum chg_state_flags {
CS_DC_SUSP = 1 << 10,
CS_DC_MASK = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
CS_IGN_OUTD_FAIL = 1 << 11,
+
+ /* Make sure no meta data IO is in flight, by calling
+ * drbd_md_get_buffer(). Used for graceful detach. */
+ CS_INHIBIT_MD_IO = 1 << 12,
};
/* drbd_dev_state and drbd_state are different types. This is to stress the
@@ -156,6 +160,10 @@ static inline int drbd_request_state(struct drbd_device *device,
return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED);
}
+/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
+enum drbd_state_rv
+drbd_request_detach_interruptible(struct drbd_device *device);
+
enum drbd_role conn_highest_role(struct drbd_connection *connection);
enum drbd_role conn_highest_peer(struct drbd_connection *connection);
enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1d8726a..03471b3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -65,6 +65,11 @@ void drbd_md_endio(struct bio *bio)
device = bio->bi_private;
device->md_io.error = blk_status_to_errno(bio->bi_status);
+ /* special case: drbd_md_read() during drbd_adm_attach() */
+ if (device->ldev)
+ put_ldev(device);
+ bio_put(bio);
+
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
* If this io completion runs after that timeout expired, this
@@ -79,9 +84,6 @@ void drbd_md_endio(struct bio *bio)
drbd_md_put_buffer(device);
device->md_io.done = 1;
wake_up(&device->misc_wait);
- bio_put(bio);
- if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
- put_ldev(device);
}
/* reads on behalf of the partner,
@@ -128,6 +130,14 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
block_id = peer_req->block_id;
peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
+ if (peer_req->flags & EE_WAS_ERROR) {
+ /* In protocol != C, we usually do not send write acks.
+ * In case of a write error, send the neg ack anyways. */
+ if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
+ inc_unacked(device);
+ drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
+ }
+
spin_lock_irqsave(&device->resource->req_lock, flags);
device->writ_cnt += peer_req->i.size >> 9;
list_move_tail(&peer_req->w.list, &device->done_ee);
@@ -195,7 +205,8 @@ void drbd_peer_request_endio(struct bio *bio)
}
}
-void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
+static void
+drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
{
panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
device->minor, device->resource->name, device->vnr);
@@ -1382,18 +1393,22 @@ static int drbd_send_barrier(struct drbd_connection *connection)
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
}
+static int pd_send_unplug_remote(struct drbd_peer_device *pd)
+{
+ struct drbd_socket *sock = &pd->connection->data;
+ if (!drbd_prepare_command(pd, sock))
+ return -EIO;
+ return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
+}
+
int w_send_write_hint(struct drbd_work *w, int cancel)
{
struct drbd_device *device =
container_of(w, struct drbd_device, unplug_work);
- struct drbd_socket *sock;
if (cancel)
return 0;
- sock = &first_peer_device(device)->connection->data;
- if (!drbd_prepare_command(first_peer_device(device), sock))
- return -EIO;
- return drbd_send_command(first_peer_device(device), sock, P_UNPLUG_REMOTE, 0, NULL, 0);
+ return pd_send_unplug_remote(first_peer_device(device));
}
static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
@@ -1455,6 +1470,7 @@ int w_send_dblock(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
+ bool do_send_unplug = req->rq_state & RQ_UNPLUG;
int err;
if (unlikely(cancel)) {
@@ -1470,6 +1486,9 @@ int w_send_dblock(struct drbd_work *w, int cancel)
err = drbd_send_dblock(peer_device, req);
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ if (do_send_unplug && !err)
+ pd_send_unplug_remote(peer_device);
+
return err;
}
@@ -1484,6 +1503,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
struct drbd_device *device = req->device;
struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device->connection;
+ bool do_send_unplug = req->rq_state & RQ_UNPLUG;
int err;
if (unlikely(cancel)) {
@@ -1501,6 +1521,9 @@ int w_send_read_req(struct drbd_work *w, int cancel)
req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ if (do_send_unplug && !err)
+ pd_send_unplug_remote(peer_device);
+
return err;
}
@@ -1513,7 +1536,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
drbd_al_begin_io(device, &req->i);
drbd_req_make_private_bio(req, req->master_bio);
- req->private_bio->bi_bdev = device->ldev->backing_bdev;
+ bio_set_dev(req->private_bio, device->ldev->backing_bdev);
generic_make_request(req->private_bio);
return 0;
@@ -1733,6 +1756,11 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
return;
}
+ if (!connection) {
+ drbd_err(device, "No connection to peer, aborting!\n");
+ return;
+ }
+
if (!test_bit(B_RS_H_DONE, &device->flags)) {
if (side == C_SYNC_TARGET) {
/* Since application IO was locked out during C_WF_BITMAP_T and
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9c00f29..60c086a 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4134,7 +4134,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
cbdata.drive = drive;
bio_init(&bio, &bio_vec, 1);
- bio.bi_bdev = bdev;
+ bio_set_dev(&bio, bdev);
bio_add_page(&bio, page, size, 0);
bio.bi_iter.bi_sector = 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f321b96..407cb17 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1966,10 +1966,6 @@ static int __init loop_init(void)
struct loop_device *lo;
int err;
- err = misc_register(&loop_misc);
- if (err < 0)
- return err;
-
part_shift = 0;
if (max_part > 0) {
part_shift = fls(max_part);
@@ -1987,12 +1983,12 @@ static int __init loop_init(void)
if ((1UL << part_shift) > DISK_MAX_PARTS) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
if (max_loop > 1UL << (MINORBITS - part_shift)) {
err = -EINVAL;
- goto misc_out;
+ goto err_out;
}
/*
@@ -2011,6 +2007,11 @@ static int __init loop_init(void)
range = 1UL << MINORBITS;
}
+ err = misc_register(&loop_misc);
+ if (err < 0)
+ goto err_out;
+
+
if (register_blkdev(LOOP_MAJOR, "loop")) {
err = -EIO;
goto misc_out;
@@ -2030,6 +2031,7 @@ static int __init loop_init(void)
misc_out:
misc_deregister(&loop_misc);
+err_out:
return err;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5bdf923..2aa87cb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -128,7 +128,7 @@ static struct dentry *nbd_dbg_dir;
#define NBD_MAGIC 0x68797548
static unsigned int nbds_max = 16;
-static int max_part;
+static int max_part = 16;
static struct workqueue_struct *recv_workqueue;
static int part_shift;
@@ -165,7 +165,7 @@ static ssize_t pid_show(struct device *dev,
return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
}
-static struct device_attribute pid_attr = {
+static const struct device_attribute pid_attr = {
.attr = { .name = "pid", .mode = S_IRUGO},
.show = pid_show,
};
@@ -1584,6 +1584,15 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
}
} else {
nbd = idr_find(&nbd_index_idr, index);
+ if (!nbd) {
+ ret = nbd_dev_add(index);
+ if (ret < 0) {
+ mutex_unlock(&nbd_index_mutex);
+ printk(KERN_ERR "nbd: failed to add new device\n");
+ return ret;
+ }
+ nbd = idr_find(&nbd_index_idr, index);
+ }
}
if (!nbd) {
printk(KERN_ERR "nbd: couldn't find device at index %d\n",
@@ -2137,4 +2146,4 @@ MODULE_LICENSE("GPL");
module_param(nbds_max, int, 0444);
MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
module_param(max_part, int, 0444);
-MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
+MODULE_PARM_DESC(max_part, "number of partitions per device (default: 16)");
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 81142ce..8042c26 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1,3 +1,7 @@
+/*
+ * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
+ * Shaohua Li <shli@fb.com>
+ */
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -9,6 +13,24 @@
#include <linux/blk-mq.h>
#include <linux/hrtimer.h>
#include <linux/lightnvm.h>
+#include <linux/configfs.h>
+#include <linux/badblocks.h>
+
+#define SECTOR_SHIFT 9
+#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#define SECTOR_MASK (PAGE_SECTORS - 1)
+
+#define FREE_BATCH 16
+
+#define TICKS_PER_SEC 50ULL
+#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
+
+static inline u64 mb_per_tick(int mbps)
+{
+ return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
+}
struct nullb_cmd {
struct list_head list;
@@ -19,17 +41,82 @@ struct nullb_cmd {
unsigned int tag;
struct nullb_queue *nq;
struct hrtimer timer;
+ blk_status_t error;
};
struct nullb_queue {
unsigned long *tag_map;
wait_queue_head_t wait;
unsigned int queue_depth;
+ struct nullb_device *dev;
struct nullb_cmd *cmds;
};
+/*
+ * Status flags for nullb_device.
+ *
+ * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
+ * UP: Device is currently on and visible in userspace.
+ * THROTTLED: Device is being throttled.
+ * CACHE: Device is using a write-back cache.
+ */
+enum nullb_device_flags {
+ NULLB_DEV_FL_CONFIGURED = 0,
+ NULLB_DEV_FL_UP = 1,
+ NULLB_DEV_FL_THROTTLED = 2,
+ NULLB_DEV_FL_CACHE = 3,
+};
+
+/*
+ * nullb_page is a page in memory for nullb devices.
+ *
+ * @page: The page holding the data.
+ * @bitmap: The bitmap represents which sector in the page has data.
+ * Each bit represents one block size. For example, sector 8
+ * will use the 7th bit
+ * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
+ * page is being flushing to storage. FREE means the cache page is freed and
+ * should be skipped from flushing to storage. Please see
+ * null_make_cache_space
+ */
+struct nullb_page {
+ struct page *page;
+ unsigned long bitmap;
+};
+#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1)
+#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2)
+
+struct nullb_device {
+ struct nullb *nullb;
+ struct config_item item;
+ struct radix_tree_root data; /* data stored in the disk */
+ struct radix_tree_root cache; /* disk cache data */
+ unsigned long flags; /* device flags */
+ unsigned int curr_cache;
+ struct badblocks badblocks;
+
+ unsigned long size; /* device size in MB */
+ unsigned long completion_nsec; /* time in ns to complete a request */
+ unsigned long cache_size; /* disk cache size in MB */
+ unsigned int submit_queues; /* number of submission queues */
+ unsigned int home_node; /* home node for the device */
+ unsigned int queue_mode; /* block interface */
+ unsigned int blocksize; /* block size */
+ unsigned int irqmode; /* IRQ completion handler */
+ unsigned int hw_queue_depth; /* queue depth */
+ unsigned int index; /* index of the disk, only valid with a disk */
+ unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
+ bool use_lightnvm; /* register as a LightNVM device */
+ bool blocking; /* blocking blk-mq device */
+ bool use_per_node_hctx; /* use per-node allocation for hardware context */
+ bool power; /* power on/off the device */
+ bool memory_backed; /* if data is stored in memory */
+ bool discard; /* if support discard */
+};
+
struct nullb {
+ struct nullb_device *dev;
struct list_head list;
unsigned int index;
struct request_queue *q;
@@ -37,8 +124,10 @@ struct nullb {
struct nvm_dev *ndev;
struct blk_mq_tag_set *tag_set;
struct blk_mq_tag_set __tag_set;
- struct hrtimer timer;
unsigned int queue_depth;
+ atomic_long_t cur_bytes;
+ struct hrtimer bw_timer;
+ unsigned long cache_flush_pos;
spinlock_t lock;
struct nullb_queue *queues;
@@ -49,7 +138,7 @@ struct nullb {
static LIST_HEAD(nullb_list);
static struct mutex lock;
static int null_major;
-static int nullb_indexes;
+static DEFINE_IDA(nullb_indexes);
static struct kmem_cache *ppa_cache;
static struct blk_mq_tag_set tag_set;
@@ -65,15 +154,15 @@ enum {
NULL_Q_MQ = 2,
};
-static int submit_queues;
-module_param(submit_queues, int, S_IRUGO);
+static int g_submit_queues = 1;
+module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");
-static int home_node = NUMA_NO_NODE;
-module_param(home_node, int, S_IRUGO);
+static int g_home_node = NUMA_NO_NODE;
+module_param_named(home_node, g_home_node, int, S_IRUGO);
MODULE_PARM_DESC(home_node, "Home node for the device");
-static int queue_mode = NULL_Q_MQ;
+static int g_queue_mode = NULL_Q_MQ;
static int null_param_store_val(const char *str, int *val, int min, int max)
{
@@ -92,7 +181,7 @@ static int null_param_store_val(const char *str, int *val, int min, int max)
static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
{
- return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
+ return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
}
static const struct kernel_param_ops null_queue_mode_param_ops = {
@@ -100,38 +189,38 @@ static const struct kernel_param_ops null_queue_mode_param_ops = {
.get = param_get_int,
};
-device_param_cb(queue_mode, &null_queue_mode_param_ops, &queue_mode, S_IRUGO);
+device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
-static int gb = 250;
-module_param(gb, int, S_IRUGO);
+static int g_gb = 250;
+module_param_named(gb, g_gb, int, S_IRUGO);
MODULE_PARM_DESC(gb, "Size in GB");
-static int bs = 512;
-module_param(bs, int, S_IRUGO);
+static int g_bs = 512;
+module_param_named(bs, g_bs, int, S_IRUGO);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
static int nr_devices = 1;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
-static bool use_lightnvm;
-module_param(use_lightnvm, bool, S_IRUGO);
+static bool g_use_lightnvm;
+module_param_named(use_lightnvm, g_use_lightnvm, bool, S_IRUGO);
MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
-static bool blocking;
-module_param(blocking, bool, S_IRUGO);
+static bool g_blocking;
+module_param_named(blocking, g_blocking, bool, S_IRUGO);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
static bool shared_tags;
module_param(shared_tags, bool, S_IRUGO);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
-static int irqmode = NULL_IRQ_SOFTIRQ;
+static int g_irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
{
- return null_param_store_val(str, &irqmode, NULL_IRQ_NONE,
+ return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
NULL_IRQ_TIMER);
}
@@ -140,21 +229,358 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
.get = param_get_int,
};
-device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
+device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
-static unsigned long completion_nsec = 10000;
-module_param(completion_nsec, ulong, S_IRUGO);
+static unsigned long g_completion_nsec = 10000;
+module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
-static int hw_queue_depth = 64;
-module_param(hw_queue_depth, int, S_IRUGO);
+static int g_hw_queue_depth = 64;
+module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
-static bool use_per_node_hctx = false;
-module_param(use_per_node_hctx, bool, S_IRUGO);
+static bool g_use_per_node_hctx;
+module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+static struct nullb_device *null_alloc_dev(void);
+static void null_free_dev(struct nullb_device *dev);
+static void null_del_dev(struct nullb *nullb);
+static int null_add_dev(struct nullb_device *dev);
+static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
+
+static inline struct nullb_device *to_nullb_device(struct config_item *item)
+{
+ return item ? container_of(item, struct nullb_device, item) : NULL;
+}
+
+static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", val);
+}
+
+static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%lu\n", val);
+}
+
+static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t nullb_device_uint_attr_store(unsigned int *val,
+ const char *page, size_t count)
+{
+ unsigned int tmp;
+ int result;
+
+ result = kstrtouint(page, 0, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
+ const char *page, size_t count)
+{
+ int result;
+ unsigned long tmp;
+
+ result = kstrtoul(page, 0, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
+ size_t count)
+{
+ bool tmp;
+ int result;
+
+ result = kstrtobool(page, &tmp);
+ if (result)
+ return result;
+
+ *val = tmp;
+ return count;
+}
+
+/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
+#define NULLB_DEVICE_ATTR(NAME, TYPE) \
+static ssize_t \
+nullb_device_##NAME##_show(struct config_item *item, char *page) \
+{ \
+ return nullb_device_##TYPE##_attr_show( \
+ to_nullb_device(item)->NAME, page); \
+} \
+static ssize_t \
+nullb_device_##NAME##_store(struct config_item *item, const char *page, \
+ size_t count) \
+{ \
+ if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \
+ return -EBUSY; \
+ return nullb_device_##TYPE##_attr_store( \
+ &to_nullb_device(item)->NAME, page, count); \
+} \
+CONFIGFS_ATTR(nullb_device_, NAME);
+
+NULLB_DEVICE_ATTR(size, ulong);
+NULLB_DEVICE_ATTR(completion_nsec, ulong);
+NULLB_DEVICE_ATTR(submit_queues, uint);
+NULLB_DEVICE_ATTR(home_node, uint);
+NULLB_DEVICE_ATTR(queue_mode, uint);
+NULLB_DEVICE_ATTR(blocksize, uint);
+NULLB_DEVICE_ATTR(irqmode, uint);
+NULLB_DEVICE_ATTR(hw_queue_depth, uint);
+NULLB_DEVICE_ATTR(index, uint);
+NULLB_DEVICE_ATTR(use_lightnvm, bool);
+NULLB_DEVICE_ATTR(blocking, bool);
+NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
+NULLB_DEVICE_ATTR(memory_backed, bool);
+NULLB_DEVICE_ATTR(discard, bool);
+NULLB_DEVICE_ATTR(mbps, uint);
+NULLB_DEVICE_ATTR(cache_size, ulong);
+
+static ssize_t nullb_device_power_show(struct config_item *item, char *page)
+{
+ return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
+}
+
+static ssize_t nullb_device_power_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+ bool newp = false;
+ ssize_t ret;
+
+ ret = nullb_device_bool_attr_store(&newp, page, count);
+ if (ret < 0)
+ return ret;
+
+ if (!dev->power && newp) {
+ if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
+ return count;
+ if (null_add_dev(dev)) {
+ clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+ return -ENOMEM;
+ }
+
+ set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+ dev->power = newp;
+ } else if (dev->power && !newp) {
+ mutex_lock(&lock);
+ dev->power = newp;
+ null_del_dev(dev->nullb);
+ mutex_unlock(&lock);
+ clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+ }
+
+ return count;
+}
+
+CONFIGFS_ATTR(nullb_device_, power);
+
+static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
+{
+ struct nullb_device *t_dev = to_nullb_device(item);
+
+ return badblocks_show(&t_dev->badblocks, page, 0);
+}
+
+static ssize_t nullb_device_badblocks_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nullb_device *t_dev = to_nullb_device(item);
+ char *orig, *buf, *tmp;
+ u64 start, end;
+ int ret;
+
+ orig = kstrndup(page, count, GFP_KERNEL);
+ if (!orig)
+ return -ENOMEM;
+
+ buf = strstrip(orig);
+
+ ret = -EINVAL;
+ if (buf[0] != '+' && buf[0] != '-')
+ goto out;
+ tmp = strchr(&buf[1], '-');
+ if (!tmp)
+ goto out;
+ *tmp = '\0';
+ ret = kstrtoull(buf + 1, 0, &start);
+ if (ret)
+ goto out;
+ ret = kstrtoull(tmp + 1, 0, &end);
+ if (ret)
+ goto out;
+ ret = -EINVAL;
+ if (start > end)
+ goto out;
+ /* enable badblocks */
+ cmpxchg(&t_dev->badblocks.shift, -1, 0);
+ if (buf[0] == '+')
+ ret = badblocks_set(&t_dev->badblocks, start,
+ end - start + 1, 1);
+ else
+ ret = badblocks_clear(&t_dev->badblocks, start,
+ end - start + 1);
+ if (ret == 0)
+ ret = count;
+out:
+ kfree(orig);
+ return ret;
+}
+CONFIGFS_ATTR(nullb_device_, badblocks);
+
+static struct configfs_attribute *nullb_device_attrs[] = {
+ &nullb_device_attr_size,
+ &nullb_device_attr_completion_nsec,
+ &nullb_device_attr_submit_queues,
+ &nullb_device_attr_home_node,
+ &nullb_device_attr_queue_mode,
+ &nullb_device_attr_blocksize,
+ &nullb_device_attr_irqmode,
+ &nullb_device_attr_hw_queue_depth,
+ &nullb_device_attr_index,
+ &nullb_device_attr_use_lightnvm,
+ &nullb_device_attr_blocking,
+ &nullb_device_attr_use_per_node_hctx,
+ &nullb_device_attr_power,
+ &nullb_device_attr_memory_backed,
+ &nullb_device_attr_discard,
+ &nullb_device_attr_mbps,
+ &nullb_device_attr_cache_size,
+ &nullb_device_attr_badblocks,
+ NULL,
+};
+
+static void nullb_device_release(struct config_item *item)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ badblocks_exit(&dev->badblocks);
+ null_free_device_storage(dev, false);
+ null_free_dev(dev);
+}
+
+static struct configfs_item_operations nullb_device_ops = {
+ .release = nullb_device_release,
+};
+
+static struct config_item_type nullb_device_type = {
+ .ct_item_ops = &nullb_device_ops,
+ .ct_attrs = nullb_device_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct
+config_item *nullb_group_make_item(struct config_group *group, const char *name)
+{
+ struct nullb_device *dev;
+
+ dev = null_alloc_dev();
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&dev->item, name, &nullb_device_type);
+
+ return &dev->item;
+}
+
+static void
+nullb_group_drop_item(struct config_group *group, struct config_item *item)
+{
+ struct nullb_device *dev = to_nullb_device(item);
+
+ if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
+ mutex_lock(&lock);
+ dev->power = false;
+ null_del_dev(dev->nullb);
+ mutex_unlock(&lock);
+ }
+
+ config_item_put(item);
+}
+
+static ssize_t memb_group_features_show(struct config_item *item, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks\n");
+}
+
+CONFIGFS_ATTR_RO(memb_group_, features);
+
+static struct configfs_attribute *nullb_group_attrs[] = {
+ &memb_group_attr_features,
+ NULL,
+};
+
+static struct configfs_group_operations nullb_group_ops = {
+ .make_item = nullb_group_make_item,
+ .drop_item = nullb_group_drop_item,
+};
+
+static struct config_item_type nullb_group_type = {
+ .ct_group_ops = &nullb_group_ops,
+ .ct_attrs = nullb_group_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem nullb_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "nullb",
+ .ci_type = &nullb_group_type,
+ },
+ },
+};
+
+static inline int null_cache_active(struct nullb *nullb)
+{
+ return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+}
+
+static struct nullb_device *null_alloc_dev(void)
+{
+ struct nullb_device *dev;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return NULL;
+ INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
+ INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
+ if (badblocks_init(&dev->badblocks, 0)) {
+ kfree(dev);
+ return NULL;
+ }
+
+ dev->size = g_gb * 1024;
+ dev->completion_nsec = g_completion_nsec;
+ dev->submit_queues = g_submit_queues;
+ dev->home_node = g_home_node;
+ dev->queue_mode = g_queue_mode;
+ dev->blocksize = g_bs;
+ dev->irqmode = g_irqmode;
+ dev->hw_queue_depth = g_hw_queue_depth;
+ dev->use_lightnvm = g_use_lightnvm;
+ dev->blocking = g_blocking;
+ dev->use_per_node_hctx = g_use_per_node_hctx;
+ return dev;
+}
+
+static void null_free_dev(struct nullb_device *dev)
+{
+ kfree(dev);
+}
+
static void put_tag(struct nullb_queue *nq, unsigned int tag)
{
clear_bit_unlock(tag, nq->tag_map);
@@ -193,7 +619,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
cmd = &nq->cmds[tag];
cmd->tag = tag;
cmd->nq = nq;
- if (irqmode == NULL_IRQ_TIMER) {
+ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
@@ -229,19 +655,21 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
static void end_cmd(struct nullb_cmd *cmd)
{
struct request_queue *q = NULL;
+ int queue_mode = cmd->nq->dev->queue_mode;
if (cmd->rq)
q = cmd->rq->q;
switch (queue_mode) {
case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, BLK_STS_OK);
+ blk_mq_end_request(cmd->rq, cmd->error);
return;
case NULL_Q_RQ:
INIT_LIST_HEAD(&cmd->rq->queuelist);
- blk_end_request_all(cmd->rq, BLK_STS_OK);
+ blk_end_request_all(cmd->rq, cmd->error);
break;
case NULL_Q_BIO:
+ cmd->bio->bi_status = cmd->error;
bio_endio(cmd->bio);
break;
}
@@ -267,25 +695,582 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
static void null_cmd_end_timer(struct nullb_cmd *cmd)
{
- ktime_t kt = completion_nsec;
+ ktime_t kt = cmd->nq->dev->completion_nsec;
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
static void null_softirq_done_fn(struct request *rq)
{
- if (queue_mode == NULL_Q_MQ)
+ struct nullb *nullb = rq->q->queuedata;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
end_cmd(blk_mq_rq_to_pdu(rq));
else
end_cmd(rq->special);
}
-static inline void null_handle_cmd(struct nullb_cmd *cmd)
+static struct nullb_page *null_alloc_page(gfp_t gfp_flags)
{
+ struct nullb_page *t_page;
+
+ t_page = kmalloc(sizeof(struct nullb_page), gfp_flags);
+ if (!t_page)
+ goto out;
+
+ t_page->page = alloc_pages(gfp_flags, 0);
+ if (!t_page->page)
+ goto out_freepage;
+
+ t_page->bitmap = 0;
+ return t_page;
+out_freepage:
+ kfree(t_page);
+out:
+ return NULL;
+}
+
+static void null_free_page(struct nullb_page *t_page)
+{
+ __set_bit(NULLB_PAGE_FREE, &t_page->bitmap);
+ if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap))
+ return;
+ __free_page(t_page->page);
+ kfree(t_page);
+}
+
+static void null_free_sector(struct nullb *nullb, sector_t sector,
+ bool is_cache)
+{
+ unsigned int sector_bit;
+ u64 idx;
+ struct nullb_page *t_page, *ret;
+ struct radix_tree_root *root;
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ sector_bit = (sector & SECTOR_MASK);
+
+ t_page = radix_tree_lookup(root, idx);
+ if (t_page) {
+ __clear_bit(sector_bit, &t_page->bitmap);
+
+ if (!t_page->bitmap) {
+ ret = radix_tree_delete_item(root, idx, t_page);
+ WARN_ON(ret != t_page);
+ null_free_page(ret);
+ if (is_cache)
+ nullb->dev->curr_cache -= PAGE_SIZE;
+ }
+ }
+}
+
+static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
+ struct nullb_page *t_page, bool is_cache)
+{
+ struct radix_tree_root *root;
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+
+ if (radix_tree_insert(root, idx, t_page)) {
+ null_free_page(t_page);
+ t_page = radix_tree_lookup(root, idx);
+ WARN_ON(!t_page || t_page->page->index != idx);
+ } else if (is_cache)
+ nullb->dev->curr_cache += PAGE_SIZE;
+
+ return t_page;
+}
+
+static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
+{
+ unsigned long pos = 0;
+ int nr_pages;
+ struct nullb_page *ret, *t_pages[FREE_BATCH];
+ struct radix_tree_root *root;
+
+ root = is_cache ? &dev->cache : &dev->data;
+
+ do {
+ int i;
+
+ nr_pages = radix_tree_gang_lookup(root,
+ (void **)t_pages, pos, FREE_BATCH);
+
+ for (i = 0; i < nr_pages; i++) {
+ pos = t_pages[i]->page->index;
+ ret = radix_tree_delete_item(root, pos, t_pages[i]);
+ WARN_ON(ret != t_pages[i]);
+ null_free_page(ret);
+ }
+
+ pos++;
+ } while (nr_pages == FREE_BATCH);
+
+ if (is_cache)
+ dev->curr_cache = 0;
+}
+
+static struct nullb_page *__null_lookup_page(struct nullb *nullb,
+ sector_t sector, bool for_write, bool is_cache)
+{
+ unsigned int sector_bit;
+ u64 idx;
+ struct nullb_page *t_page;
+ struct radix_tree_root *root;
+
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ sector_bit = (sector & SECTOR_MASK);
+
+ root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
+ t_page = radix_tree_lookup(root, idx);
+ WARN_ON(t_page && t_page->page->index != idx);
+
+ if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap)))
+ return t_page;
+
+ return NULL;
+}
+
+static struct nullb_page *null_lookup_page(struct nullb *nullb,
+ sector_t sector, bool for_write, bool ignore_cache)
+{
+ struct nullb_page *page = NULL;
+
+ if (!ignore_cache)
+ page = __null_lookup_page(nullb, sector, for_write, true);
+ if (page)
+ return page;
+ return __null_lookup_page(nullb, sector, for_write, false);
+}
+
+static struct nullb_page *null_insert_page(struct nullb *nullb,
+ sector_t sector, bool ignore_cache)
+{
+ u64 idx;
+ struct nullb_page *t_page;
+
+ t_page = null_lookup_page(nullb, sector, true, ignore_cache);
+ if (t_page)
+ return t_page;
+
+ spin_unlock_irq(&nullb->lock);
+
+ t_page = null_alloc_page(GFP_NOIO);
+ if (!t_page)
+ goto out_lock;
+
+ if (radix_tree_preload(GFP_NOIO))
+ goto out_freepage;
+
+ spin_lock_irq(&nullb->lock);
+ idx = sector >> PAGE_SECTORS_SHIFT;
+ t_page->page->index = idx;
+ t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
+ radix_tree_preload_end();
+
+ return t_page;
+out_freepage:
+ null_free_page(t_page);
+out_lock:
+ spin_lock_irq(&nullb->lock);
+ return null_lookup_page(nullb, sector, true, ignore_cache);
+}
+
+static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
+{
+ int i;
+ unsigned int offset;
+ u64 idx;
+ struct nullb_page *t_page, *ret;
+ void *dst, *src;
+
+ idx = c_page->page->index;
+
+ t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
+
+ __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap);
+ if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) {
+ null_free_page(c_page);
+ if (t_page && t_page->bitmap == 0) {
+ ret = radix_tree_delete_item(&nullb->dev->data,
+ idx, t_page);
+ null_free_page(t_page);
+ }
+ return 0;
+ }
+
+ if (!t_page)
+ return -ENOMEM;
+
+ src = kmap_atomic(c_page->page);
+ dst = kmap_atomic(t_page->page);
+
+ for (i = 0; i < PAGE_SECTORS;
+ i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
+ if (test_bit(i, &c_page->bitmap)) {
+ offset = (i << SECTOR_SHIFT);
+ memcpy(dst + offset, src + offset,
+ nullb->dev->blocksize);
+ __set_bit(i, &t_page->bitmap);
+ }
+ }
+
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
+ null_free_page(ret);
+ nullb->dev->curr_cache -= PAGE_SIZE;
+
+ return 0;
+}
+
+static int null_make_cache_space(struct nullb *nullb, unsigned long n)
+{
+ int i, err, nr_pages;
+ struct nullb_page *c_pages[FREE_BATCH];
+ unsigned long flushed = 0, one_round;
+
+again:
+ if ((nullb->dev->cache_size * 1024 * 1024) >
+ nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
+ return 0;
+
+ nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
+ (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
+ /*
+ * nullb_flush_cache_page could unlock before using the c_pages. To
+ * avoid race, we don't allow page free
+ */
+ for (i = 0; i < nr_pages; i++) {
+ nullb->cache_flush_pos = c_pages[i]->page->index;
+ /*
+ * We found the page which is being flushed to disk by other
+ * threads
+ */
+ if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap))
+ c_pages[i] = NULL;
+ else
+ __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap);
+ }
+
+ one_round = 0;
+ for (i = 0; i < nr_pages; i++) {
+ if (c_pages[i] == NULL)
+ continue;
+ err = null_flush_cache_page(nullb, c_pages[i]);
+ if (err)
+ return err;
+ one_round++;
+ }
+ flushed += one_round << PAGE_SHIFT;
+
+ if (n > flushed) {
+ if (nr_pages == 0)
+ nullb->cache_flush_pos = 0;
+ if (one_round == 0) {
+ /* give other threads a chance */
+ spin_unlock_irq(&nullb->lock);
+ spin_lock_irq(&nullb->lock);
+ }
+ goto again;
+ }
+ return 0;
+}
+
+static int copy_to_nullb(struct nullb *nullb, struct page *source,
+ unsigned int off, sector_t sector, size_t n, bool is_fua)
+{
+ size_t temp, count = 0;
+ unsigned int offset;
+ struct nullb_page *t_page;
+ void *dst, *src;
+
+ while (count < n) {
+ temp = min_t(size_t, nullb->dev->blocksize, n - count);
+
+ if (null_cache_active(nullb) && !is_fua)
+ null_make_cache_space(nullb, PAGE_SIZE);
+
+ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+ t_page = null_insert_page(nullb, sector,
+ !null_cache_active(nullb) || is_fua);
+ if (!t_page)
+ return -ENOSPC;
+
+ src = kmap_atomic(source);
+ dst = kmap_atomic(t_page->page);
+ memcpy(dst + offset, src + off + count, temp);
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ __set_bit(sector & SECTOR_MASK, &t_page->bitmap);
+
+ if (is_fua)
+ null_free_sector(nullb, sector, true);
+
+ count += temp;
+ sector += temp >> SECTOR_SHIFT;
+ }
+ return 0;
+}
+
+static int copy_from_nullb(struct nullb *nullb, struct page *dest,
+ unsigned int off, sector_t sector, size_t n)
+{
+ size_t temp, count = 0;
+ unsigned int offset;
+ struct nullb_page *t_page;
+ void *dst, *src;
+
+ while (count < n) {
+ temp = min_t(size_t, nullb->dev->blocksize, n - count);
+
+ offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
+ t_page = null_lookup_page(nullb, sector, false,
+ !null_cache_active(nullb));
+
+ dst = kmap_atomic(dest);
+ if (!t_page) {
+ memset(dst + off + count, 0, temp);
+ goto next;
+ }
+ src = kmap_atomic(t_page->page);
+ memcpy(dst + off + count, src + offset, temp);
+ kunmap_atomic(src);
+next:
+ kunmap_atomic(dst);
+
+ count += temp;
+ sector += temp >> SECTOR_SHIFT;
+ }
+ return 0;
+}
+
+static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
+{
+ size_t temp;
+
+ spin_lock_irq(&nullb->lock);
+ while (n > 0) {
+ temp = min_t(size_t, n, nullb->dev->blocksize);
+ null_free_sector(nullb, sector, false);
+ if (null_cache_active(nullb))
+ null_free_sector(nullb, sector, true);
+ sector += temp >> SECTOR_SHIFT;
+ n -= temp;
+ }
+ spin_unlock_irq(&nullb->lock);
+}
+
+static int null_handle_flush(struct nullb *nullb)
+{
+ int err;
+
+ if (!null_cache_active(nullb))
+ return 0;
+
+ spin_lock_irq(&nullb->lock);
+ while (true) {
+ err = null_make_cache_space(nullb,
+ nullb->dev->cache_size * 1024 * 1024);
+ if (err || nullb->dev->curr_cache == 0)
+ break;
+ }
+
+ WARN_ON(!radix_tree_empty(&nullb->dev->cache));
+ spin_unlock_irq(&nullb->lock);
+ return err;
+}
+
+static int null_transfer(struct nullb *nullb, struct page *page,
+ unsigned int len, unsigned int off, bool is_write, sector_t sector,
+ bool is_fua)
+{
+ int err = 0;
+
+ if (!is_write) {
+ err = copy_from_nullb(nullb, page, off, sector, len);
+ flush_dcache_page(page);
+ } else {
+ flush_dcache_page(page);
+ err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
+ }
+
+ return err;
+}
+
+static int null_handle_rq(struct nullb_cmd *cmd)
+{
+ struct request *rq = cmd->rq;
+ struct nullb *nullb = cmd->nq->dev->nullb;
+ int err;
+ unsigned int len;
+ sector_t sector;
+ struct req_iterator iter;
+ struct bio_vec bvec;
+
+ sector = blk_rq_pos(rq);
+
+ if (req_op(rq) == REQ_OP_DISCARD) {
+ null_handle_discard(nullb, sector, blk_rq_bytes(rq));
+ return 0;
+ }
+
+ spin_lock_irq(&nullb->lock);
+ rq_for_each_segment(bvec, rq, iter) {
+ len = bvec.bv_len;
+ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+ op_is_write(req_op(rq)), sector,
+ req_op(rq) & REQ_FUA);
+ if (err) {
+ spin_unlock_irq(&nullb->lock);
+ return err;
+ }
+ sector += len >> SECTOR_SHIFT;
+ }
+ spin_unlock_irq(&nullb->lock);
+
+ return 0;
+}
+
+static int null_handle_bio(struct nullb_cmd *cmd)
+{
+ struct bio *bio = cmd->bio;
+ struct nullb *nullb = cmd->nq->dev->nullb;
+ int err;
+ unsigned int len;
+ sector_t sector;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
+
+ sector = bio->bi_iter.bi_sector;
+
+ if (bio_op(bio) == REQ_OP_DISCARD) {
+ null_handle_discard(nullb, sector,
+ bio_sectors(bio) << SECTOR_SHIFT);
+ return 0;
+ }
+
+ spin_lock_irq(&nullb->lock);
+ bio_for_each_segment(bvec, bio, iter) {
+ len = bvec.bv_len;
+ err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
+ op_is_write(bio_op(bio)), sector,
+ bio_op(bio) & REQ_FUA);
+ if (err) {
+ spin_unlock_irq(&nullb->lock);
+ return err;
+ }
+ sector += len >> SECTOR_SHIFT;
+ }
+ spin_unlock_irq(&nullb->lock);
+ return 0;
+}
+
+static void null_stop_queue(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
+ blk_mq_stop_hw_queues(q);
+ else {
+ spin_lock_irq(q->queue_lock);
+ blk_stop_queue(q);
+ spin_unlock_irq(q->queue_lock);
+ }
+}
+
+static void null_restart_queue_async(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+ unsigned long flags;
+
+ if (nullb->dev->queue_mode == NULL_Q_MQ)
+ blk_mq_start_stopped_hw_queues(q, true);
+ else {
+ spin_lock_irqsave(q->queue_lock, flags);
+ blk_start_queue_async(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+ }
+}
+
+static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
+{
+ struct nullb_device *dev = cmd->nq->dev;
+ struct nullb *nullb = dev->nullb;
+ int err = 0;
+
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
+ struct request *rq = cmd->rq;
+
+ if (!hrtimer_active(&nullb->bw_timer))
+ hrtimer_restart(&nullb->bw_timer);
+
+ if (atomic_long_sub_return(blk_rq_bytes(rq),
+ &nullb->cur_bytes) < 0) {
+ null_stop_queue(nullb);
+ /* race with timer */
+ if (atomic_long_read(&nullb->cur_bytes) > 0)
+ null_restart_queue_async(nullb);
+ if (dev->queue_mode == NULL_Q_RQ) {
+ struct request_queue *q = nullb->q;
+
+ spin_lock_irq(q->queue_lock);
+ rq->rq_flags |= RQF_DONTPREP;
+ blk_requeue_request(q, rq);
+ spin_unlock_irq(q->queue_lock);
+ return BLK_STS_OK;
+ } else
+ /* requeue request */
+ return BLK_STS_RESOURCE;
+ }
+ }
+
+ if (nullb->dev->badblocks.shift != -1) {
+ int bad_sectors;
+ sector_t sector, size, first_bad;
+ bool is_flush = true;
+
+ if (dev->queue_mode == NULL_Q_BIO &&
+ bio_op(cmd->bio) != REQ_OP_FLUSH) {
+ is_flush = false;
+ sector = cmd->bio->bi_iter.bi_sector;
+ size = bio_sectors(cmd->bio);
+ }
+ if (dev->queue_mode != NULL_Q_BIO &&
+ req_op(cmd->rq) != REQ_OP_FLUSH) {
+ is_flush = false;
+ sector = blk_rq_pos(cmd->rq);
+ size = blk_rq_sectors(cmd->rq);
+ }
+ if (!is_flush && badblocks_check(&nullb->dev->badblocks, sector,
+ size, &first_bad, &bad_sectors)) {
+ cmd->error = BLK_STS_IOERR;
+ goto out;
+ }
+ }
+
+ if (dev->memory_backed) {
+ if (dev->queue_mode == NULL_Q_BIO) {
+ if (bio_op(cmd->bio) == REQ_OP_FLUSH)
+ err = null_handle_flush(nullb);
+ else
+ err = null_handle_bio(cmd);
+ } else {
+ if (req_op(cmd->rq) == REQ_OP_FLUSH)
+ err = null_handle_flush(nullb);
+ else
+ err = null_handle_rq(cmd);
+ }
+ }
+ cmd->error = errno_to_blk_status(err);
+out:
/* Complete IO by inline, softirq or timer */
- switch (irqmode) {
+ switch (dev->irqmode) {
case NULL_IRQ_SOFTIRQ:
- switch (queue_mode) {
+ switch (dev->queue_mode) {
case NULL_Q_MQ:
blk_mq_complete_request(cmd->rq);
break;
@@ -307,6 +1292,34 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
null_cmd_end_timer(cmd);
break;
}
+ return BLK_STS_OK;
+}
+
+static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
+{
+ struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
+ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+ unsigned int mbps = nullb->dev->mbps;
+
+ if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
+ return HRTIMER_NORESTART;
+
+ atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
+ null_restart_queue_async(nullb);
+
+ hrtimer_forward_now(&nullb->bw_timer, timer_interval);
+
+ return HRTIMER_RESTART;
+}
+
+static void nullb_setup_bwtimer(struct nullb *nullb)
+{
+ ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
+
+ hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ nullb->bw_timer.function = nullb_bwtimer_fn;
+ atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
+ hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
}
static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
@@ -366,20 +1379,20 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ struct nullb_queue *nq = hctx->driver_data;
might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
- if (irqmode == NULL_IRQ_TIMER) {
+ if (nq->dev->irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
cmd->rq = bd->rq;
- cmd->nq = hctx->driver_data;
+ cmd->nq = nq;
blk_mq_start_request(bd->rq);
- null_handle_cmd(cmd);
- return BLK_STS_OK;
+ return null_handle_cmd(cmd);
}
static const struct blk_mq_ops null_mq_ops = {
@@ -438,7 +1451,8 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
{
- sector_t size = gb * 1024 * 1024 * 1024ULL;
+ struct nullb *nullb = dev->q->queuedata;
+ sector_t size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
sector_t blksize;
struct nvm_id_group *grp;
@@ -460,7 +1474,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
id->ppaf.ch_offset = 56;
id->ppaf.ch_len = 8;
- sector_div(size, bs); /* convert size to pages */
+ sector_div(size, nullb->dev->blocksize); /* convert size to pages */
size >>= 8; /* concert size to pgs pr blk */
grp = &id->grp;
grp->mtype = 0;
@@ -474,8 +1488,8 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
grp->num_blk = blksize;
grp->num_pln = 1;
- grp->fpg_sz = bs;
- grp->csecs = bs;
+ grp->fpg_sz = nullb->dev->blocksize;
+ grp->csecs = nullb->dev->blocksize;
grp->trdt = 25000;
grp->trdm = 25000;
grp->tprt = 500000;
@@ -483,7 +1497,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
grp->tbet = 1500000;
grp->tbem = 1500000;
grp->mpos = 0x010101; /* single plane rwe */
- grp->cpar = hw_queue_depth;
+ grp->cpar = nullb->dev->hw_queue_depth;
return 0;
}
@@ -568,19 +1582,44 @@ static void null_nvm_unregister(struct nullb *nullb) {}
static void null_del_dev(struct nullb *nullb)
{
+ struct nullb_device *dev = nullb->dev;
+
+ ida_simple_remove(&nullb_indexes, nullb->index);
+
list_del_init(&nullb->list);
- if (use_lightnvm)
+ if (dev->use_lightnvm)
null_nvm_unregister(nullb);
else
del_gendisk(nullb->disk);
+
+ if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
+ hrtimer_cancel(&nullb->bw_timer);
+ atomic_long_set(&nullb->cur_bytes, LONG_MAX);
+ null_restart_queue_async(nullb);
+ }
+
blk_cleanup_queue(nullb->q);
- if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (dev->queue_mode == NULL_Q_MQ &&
+ nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
- if (!use_lightnvm)
+ if (!dev->use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
+ if (null_cache_active(nullb))
+ null_free_device_storage(nullb->dev, true);
kfree(nullb);
+ dev->nullb = NULL;
+}
+
+static void null_config_discard(struct nullb *nullb)
+{
+ if (nullb->dev->discard == false)
+ return;
+ nullb->q->limits.discard_granularity = nullb->dev->blocksize;
+ nullb->q->limits.discard_alignment = nullb->dev->blocksize;
+ blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q);
}
static int null_open(struct block_device *bdev, fmode_t mode)
@@ -605,6 +1644,7 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
init_waitqueue_head(&nq->wait);
nq->queue_depth = nullb->queue_depth;
+ nq->dev = nullb->dev;
}
static void null_init_queues(struct nullb *nullb)
@@ -652,13 +1692,13 @@ static int setup_commands(struct nullb_queue *nq)
static int setup_queues(struct nullb *nullb)
{
- nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
- GFP_KERNEL);
+ nullb->queues = kzalloc(nullb->dev->submit_queues *
+ sizeof(struct nullb_queue), GFP_KERNEL);
if (!nullb->queues)
return -ENOMEM;
nullb->nr_queues = 0;
- nullb->queue_depth = hw_queue_depth;
+ nullb->queue_depth = nullb->dev->hw_queue_depth;
return 0;
}
@@ -668,7 +1708,7 @@ static int init_driver_queues(struct nullb *nullb)
struct nullb_queue *nq;
int i, ret = 0;
- for (i = 0; i < submit_queues; i++) {
+ for (i = 0; i < nullb->dev->submit_queues; i++) {
nq = &nullb->queues[i];
null_init_queue(nullb, nq);
@@ -686,10 +1726,10 @@ static int null_gendisk_register(struct nullb *nullb)
struct gendisk *disk;
sector_t size;
- disk = nullb->disk = alloc_disk_node(1, home_node);
+ disk = nullb->disk = alloc_disk_node(1, nullb->dev->home_node);
if (!disk)
return -ENOMEM;
- size = gb * 1024 * 1024 * 1024ULL;
+ size = (sector_t)nullb->dev->size * 1024 * 1024ULL;
set_capacity(disk, size >> 9);
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
@@ -704,49 +1744,86 @@ static int null_gendisk_register(struct nullb *nullb)
return 0;
}
-static int null_init_tag_set(struct blk_mq_tag_set *set)
+static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
{
set->ops = &null_mq_ops;
- set->nr_hw_queues = submit_queues;
- set->queue_depth = hw_queue_depth;
- set->numa_node = home_node;
+ set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
+ g_submit_queues;
+ set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
+ g_hw_queue_depth;
+ set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
set->cmd_size = sizeof(struct nullb_cmd);
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->driver_data = NULL;
- if (blocking)
+ if ((nullb && nullb->dev->blocking) || g_blocking)
set->flags |= BLK_MQ_F_BLOCKING;
return blk_mq_alloc_tag_set(set);
}
-static int null_add_dev(void)
+static void null_validate_conf(struct nullb_device *dev)
+{
+ dev->blocksize = round_down(dev->blocksize, 512);
+ dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+ if (dev->use_lightnvm && dev->blocksize != 4096)
+ dev->blocksize = 4096;
+
+ if (dev->use_lightnvm && dev->queue_mode != NULL_Q_MQ)
+ dev->queue_mode = NULL_Q_MQ;
+
+ if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
+ if (dev->submit_queues != nr_online_nodes)
+ dev->submit_queues = nr_online_nodes;
+ } else if (dev->submit_queues > nr_cpu_ids)
+ dev->submit_queues = nr_cpu_ids;
+ else if (dev->submit_queues == 0)
+ dev->submit_queues = 1;
+
+ dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
+ dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+
+ /* Do memory allocation, so set blocking */
+ if (dev->memory_backed)
+ dev->blocking = true;
+ else /* cache is meaningless */
+ dev->cache_size = 0;
+ dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
+ dev->cache_size);
+ dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
+ /* can not stop a queue */
+ if (dev->queue_mode == NULL_Q_BIO)
+ dev->mbps = 0;
+}
+
+static int null_add_dev(struct nullb_device *dev)
{
struct nullb *nullb;
int rv;
- nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
+ null_validate_conf(dev);
+
+ nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
if (!nullb) {
rv = -ENOMEM;
goto out;
}
+ nullb->dev = dev;
+ dev->nullb = nullb;
spin_lock_init(&nullb->lock);
- if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
- submit_queues = nr_online_nodes;
-
rv = setup_queues(nullb);
if (rv)
goto out_free_nullb;
- if (queue_mode == NULL_Q_MQ) {
+ if (dev->queue_mode == NULL_Q_MQ) {
if (shared_tags) {
nullb->tag_set = &tag_set;
rv = 0;
} else {
nullb->tag_set = &nullb->__tag_set;
- rv = null_init_tag_set(nullb->tag_set);
+ rv = null_init_tag_set(nullb, nullb->tag_set);
}
if (rv)
@@ -758,8 +1835,8 @@ static int null_add_dev(void)
goto out_cleanup_tags;
}
null_init_queues(nullb);
- } else if (queue_mode == NULL_Q_BIO) {
- nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
+ } else if (dev->queue_mode == NULL_Q_BIO) {
+ nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
@@ -769,7 +1846,8 @@ static int null_add_dev(void)
if (rv)
goto out_cleanup_blk_queue;
} else {
- nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
+ nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock,
+ dev->home_node);
if (!nullb->q) {
rv = -ENOMEM;
goto out_cleanup_queues;
@@ -781,20 +1859,34 @@ static int null_add_dev(void)
goto out_cleanup_blk_queue;
}
+ if (dev->mbps) {
+ set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
+ nullb_setup_bwtimer(nullb);
+ }
+
+ if (dev->cache_size > 0) {
+ set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
+ blk_queue_write_cache(nullb->q, true, true);
+ blk_queue_flush_queueable(nullb->q, true);
+ }
+
nullb->q->queuedata = nullb;
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock);
- nullb->index = nullb_indexes++;
+ nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+ dev->index = nullb->index;
mutex_unlock(&lock);
- blk_queue_logical_block_size(nullb->q, bs);
- blk_queue_physical_block_size(nullb->q, bs);
+ blk_queue_logical_block_size(nullb->q, dev->blocksize);
+ blk_queue_physical_block_size(nullb->q, dev->blocksize);
+
+ null_config_discard(nullb);
sprintf(nullb->disk_name, "nullb%d", nullb->index);
- if (use_lightnvm)
+ if (dev->use_lightnvm)
rv = null_nvm_register(nullb);
else
rv = null_gendisk_register(nullb);
@@ -810,7 +1902,7 @@ static int null_add_dev(void)
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
- if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
cleanup_queues(nullb);
@@ -825,51 +1917,63 @@ static int __init null_init(void)
int ret = 0;
unsigned int i;
struct nullb *nullb;
+ struct nullb_device *dev;
- if (bs > PAGE_SIZE) {
+ /* check for nullb_page.bitmap */
+ if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT))
+ return -EINVAL;
+
+ if (g_bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n");
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
- bs = PAGE_SIZE;
+ g_bs = PAGE_SIZE;
}
- if (use_lightnvm && bs != 4096) {
+ if (g_use_lightnvm && g_bs != 4096) {
pr_warn("null_blk: LightNVM only supports 4k block size\n");
pr_warn("null_blk: defaults block size to 4k\n");
- bs = 4096;
+ g_bs = 4096;
}
- if (use_lightnvm && queue_mode != NULL_Q_MQ) {
+ if (g_use_lightnvm && g_queue_mode != NULL_Q_MQ) {
pr_warn("null_blk: LightNVM only supported for blk-mq\n");
pr_warn("null_blk: defaults queue mode to blk-mq\n");
- queue_mode = NULL_Q_MQ;
+ g_queue_mode = NULL_Q_MQ;
}
- if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
- if (submit_queues < nr_online_nodes) {
- pr_warn("null_blk: submit_queues param is set to %u.",
+ if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+ if (g_submit_queues != nr_online_nodes) {
+ pr_warn("null_blk: submit_queues param is set to %u.\n",
nr_online_nodes);
- submit_queues = nr_online_nodes;
+ g_submit_queues = nr_online_nodes;
}
- } else if (submit_queues > nr_cpu_ids)
- submit_queues = nr_cpu_ids;
- else if (!submit_queues)
- submit_queues = 1;
+ } else if (g_submit_queues > nr_cpu_ids)
+ g_submit_queues = nr_cpu_ids;
+ else if (g_submit_queues <= 0)
+ g_submit_queues = 1;
- if (queue_mode == NULL_Q_MQ && shared_tags) {
- ret = null_init_tag_set(&tag_set);
+ if (g_queue_mode == NULL_Q_MQ && shared_tags) {
+ ret = null_init_tag_set(NULL, &tag_set);
if (ret)
return ret;
}
+ config_group_init(&nullb_subsys.su_group);
+ mutex_init(&nullb_subsys.su_mutex);
+
+ ret = configfs_register_subsystem(&nullb_subsys);
+ if (ret)
+ goto err_tagset;
+
mutex_init(&lock);
null_major = register_blkdev(0, "nullb");
if (null_major < 0) {
ret = null_major;
- goto err_tagset;
+ goto err_conf;
}
- if (use_lightnvm) {
+ if (g_use_lightnvm) {
ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
0, 0, NULL);
if (!ppa_cache) {
@@ -880,9 +1984,14 @@ static int __init null_init(void)
}
for (i = 0; i < nr_devices; i++) {
- ret = null_add_dev();
- if (ret)
+ dev = null_alloc_dev();
+ if (!dev)
goto err_dev;
+ ret = null_add_dev(dev);
+ if (ret) {
+ null_free_dev(dev);
+ goto err_dev;
+ }
}
pr_info("null: module loaded\n");
@@ -891,13 +2000,17 @@ static int __init null_init(void)
err_dev:
while (!list_empty(&nullb_list)) {
nullb = list_entry(nullb_list.next, struct nullb, list);
+ dev = nullb->dev;
null_del_dev(nullb);
+ null_free_dev(dev);
}
kmem_cache_destroy(ppa_cache);
err_ppa:
unregister_blkdev(null_major, "nullb");
+err_conf:
+ configfs_unregister_subsystem(&nullb_subsys);
err_tagset:
- if (queue_mode == NULL_Q_MQ && shared_tags)
+ if (g_queue_mode == NULL_Q_MQ && shared_tags)
blk_mq_free_tag_set(&tag_set);
return ret;
}
@@ -906,16 +2019,22 @@ static void __exit null_exit(void)
{
struct nullb *nullb;
+ configfs_unregister_subsystem(&nullb_subsys);
+
unregister_blkdev(null_major, "nullb");
mutex_lock(&lock);
while (!list_empty(&nullb_list)) {
+ struct nullb_device *dev;
+
nullb = list_entry(nullb_list.next, struct nullb, list);
+ dev = nullb->dev;
null_del_dev(nullb);
+ null_free_dev(dev);
}
mutex_unlock(&lock);
- if (queue_mode == NULL_Q_MQ && shared_tags)
+ if (g_queue_mode == NULL_Q_MQ && shared_tags)
blk_mq_free_tag_set(&tag_set);
kmem_cache_destroy(ppa_cache);
@@ -924,5 +2043,5 @@ static void __exit null_exit(void)
module_init(null_init);
module_exit(null_exit);
-MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
+MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 6b8b097..6797479 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1028,7 +1028,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
bio = pkt->r_bios[f];
bio_reset(bio);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
- bio->bi_bdev = pd->bdev;
+ bio_set_dev(bio, pd->bdev);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@@ -1122,7 +1122,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
pkt->sector = new_sector;
bio_reset(pkt->bio);
- pkt->bio->bi_bdev = pd->bdev;
+ bio_set_set(pkt->bio, pd->bdev);
bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0);
pkt->bio->bi_iter.bi_sector = new_sector;
pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
@@ -1267,7 +1267,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
bio_reset(pkt->w_bio);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
- pkt->w_bio->bi_bdev = pd->bdev;
+ bio_set_dev(pkt->w_bio, pd->bdev);
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@@ -2314,7 +2314,7 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
psd->pd = pd;
psd->bio = bio;
- cloned_bio->bi_bdev = pd->bdev;
+ bio_set_dev(cloned_bio, pd->bdev);
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
@@ -2415,8 +2415,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
pd = q->queuedata;
if (!pd) {
- pr_err("%s incorrect request queue\n",
- bdevname(bio->bi_bdev, b));
+ pr_err("%s incorrect request queue\n", bio_devname(bio, b));
goto end_io;
}
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 7f4aceb..e397d3e 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -112,7 +112,7 @@ static const struct block_device_operations rsxx_fops = {
static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
{
- generic_start_io_acct(bio_data_dir(bio), bio_sectors(bio),
+ generic_start_io_acct(card->queue, bio_data_dir(bio), bio_sectors(bio),
&card->gendisk->part0);
}
@@ -120,8 +120,8 @@ static void disk_stats_complete(struct rsxx_cardinfo *card,
struct bio *bio,
unsigned long start_time)
{
- generic_end_io_acct(bio_data_dir(bio), &card->gendisk->part0,
- start_time);
+ generic_end_io_acct(card->queue, bio_data_dir(bio),
+ &card->gendisk->part0, start_time);
}
static void bio_dma_done_cb(struct rsxx_cardinfo *card,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index d036868..7cedb42 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -1,19 +1,12 @@
-/* Copyright 2012 STEC, Inc.
+/*
+ * Driver for sTec s1120 PCIe SSDs. sTec was acquired in 2013 by HGST and HGST
+ * was acquired by Western Digital in 2012.
*
- * This file is licensed under the terms of the 3-clause
- * BSD License (http://opensource.org/licenses/BSD-3-Clause)
- * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
- * at your option. Both licenses are also available in the LICENSE file
- * distributed with this project. This file may not be copied, modified,
- * or distributed except in accordance with those terms.
- * Gordoni Waidhofer <gwaidhofer@stec-inc.com>
- * Initial Driver Design!
- * Thomas Swann <tswann@stec-inc.com>
- * Interrupt handling.
- * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
- * biomode implementation.
- * Akhil Bhansali <abhansali@stec-inc.com>
- * Added support for DISCARD / FLUSH and FUA.
+ * Copyright 2012 sTec, Inc.
+ * Copyright (c) 2017 Western Digital Corporation or its affiliates.
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
*/
#include <linux/kernel.h>
@@ -23,11 +16,11 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/compiler.h>
#include <linux/workqueue.h>
-#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/hdreg.h>
@@ -37,9 +30,9 @@
#include <linux/version.h>
#include <linux/err.h>
#include <linux/aer.h>
-#include <linux/ctype.h>
#include <linux/wait.h>
-#include <linux/uio.h>
+#include <linux/stringify.h>
+#include <linux/slab_def.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>
#include <linux/io.h>
@@ -51,19 +44,6 @@
static int skd_dbg_level;
static int skd_isr_comp_limit = 4;
-enum {
- STEC_LINK_2_5GTS = 0,
- STEC_LINK_5GTS = 1,
- STEC_LINK_8GTS = 2,
- STEC_LINK_UNKNOWN = 0xFF
-};
-
-enum {
- SKD_FLUSH_INITIALIZER,
- SKD_FLUSH_ZERO_SIZE_FIRST,
- SKD_FLUSH_DATA_SECOND,
-};
-
#define SKD_ASSERT(expr) \
do { \
if (unlikely(!(expr))) { \
@@ -73,17 +53,11 @@ enum {
} while (0)
#define DRV_NAME "skd"
-#define DRV_VERSION "2.2.1"
-#define DRV_BUILD_ID "0260"
#define PFX DRV_NAME ": "
-#define DRV_BIN_VERSION 0x100
-#define DRV_VER_COMPL "2.2.1." DRV_BUILD_ID
-MODULE_AUTHOR("bug-reports: support@stec-inc.com");
-MODULE_LICENSE("Dual BSD/GPL");
+MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver (b" DRV_BUILD_ID ")");
-MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
+MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver");
#define PCI_VENDOR_ID_STEC 0x1B39
#define PCI_DEVICE_ID_S1120 0x0001
@@ -96,34 +70,32 @@ MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
#define SKD_PAUSE_TIMEOUT (5 * 1000)
#define SKD_N_FITMSG_BYTES (512u)
+#define SKD_MAX_REQ_PER_MSG 14
-#define SKD_N_SPECIAL_CONTEXT 32u
#define SKD_N_SPECIAL_FITMSG_BYTES (128u)
/* SG elements are 32 bytes, so we can make this 4096 and still be under the
* 128KB limit. That allows 4096*4K = 16M xfer size
*/
#define SKD_N_SG_PER_REQ_DEFAULT 256u
-#define SKD_N_SG_PER_SPECIAL 256u
#define SKD_N_COMPLETION_ENTRY 256u
#define SKD_N_READ_CAP_BYTES (8u)
#define SKD_N_INTERNAL_BYTES (512u)
+#define SKD_SKCOMP_SIZE \
+ ((sizeof(struct fit_completion_entry_v1) + \
+ sizeof(struct fit_comp_error_info)) * SKD_N_COMPLETION_ENTRY)
+
/* 5 bits of uniqifier, 0xF800 */
-#define SKD_ID_INCR (0x400)
#define SKD_ID_TABLE_MASK (3u << 8u)
#define SKD_ID_RW_REQUEST (0u << 8u)
#define SKD_ID_INTERNAL (1u << 8u)
-#define SKD_ID_SPECIAL_REQUEST (2u << 8u)
#define SKD_ID_FIT_MSG (3u << 8u)
#define SKD_ID_SLOT_MASK 0x00FFu
#define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu
-#define SKD_N_TIMEOUT_SLOT 4u
-#define SKD_TIMEOUT_SLOT_MASK 3u
-
#define SKD_N_MAX_SECTORS 2048u
#define SKD_MAX_RETRIES 2u
@@ -141,7 +113,6 @@ enum skd_drvr_state {
SKD_DRVR_STATE_ONLINE,
SKD_DRVR_STATE_PAUSING,
SKD_DRVR_STATE_PAUSED,
- SKD_DRVR_STATE_DRAINING_TIMEOUT,
SKD_DRVR_STATE_RESTARTING,
SKD_DRVR_STATE_RESUMING,
SKD_DRVR_STATE_STOPPING,
@@ -158,7 +129,6 @@ enum skd_drvr_state {
#define SKD_WAIT_BOOT_TIMO SKD_TIMER_SECONDS(90u)
#define SKD_STARTING_TIMO SKD_TIMER_SECONDS(8u)
#define SKD_RESTARTING_TIMO SKD_TIMER_MINUTES(4u)
-#define SKD_DRAINING_TIMO SKD_TIMER_SECONDS(6u)
#define SKD_BUSY_TIMO SKD_TIMER_MINUTES(20u)
#define SKD_STARTED_BUSY_TIMO SKD_TIMER_SECONDS(60u)
#define SKD_START_WAIT_SECONDS 90u
@@ -169,12 +139,6 @@ enum skd_req_state {
SKD_REQ_STATE_BUSY,
SKD_REQ_STATE_COMPLETED,
SKD_REQ_STATE_TIMEOUT,
- SKD_REQ_STATE_ABORTED,
-};
-
-enum skd_fit_msg_state {
- SKD_MSG_STATE_IDLE,
- SKD_MSG_STATE_BUSY,
};
enum skd_check_status_action {
@@ -185,34 +149,29 @@ enum skd_check_status_action {
SKD_CHECK_STATUS_BUSY_IMMINENT,
};
+struct skd_msg_buf {
+ struct fit_msg_hdr fmh;
+ struct skd_scsi_request scsi[SKD_MAX_REQ_PER_MSG];
+};
+
struct skd_fitmsg_context {
- enum skd_fit_msg_state state;
-
- struct skd_fitmsg_context *next;
-
u32 id;
- u16 outstanding;
u32 length;
- u32 offset;
- u8 *msg_buf;
+ struct skd_msg_buf *msg_buf;
dma_addr_t mb_dma_address;
};
struct skd_request_context {
enum skd_req_state state;
- struct skd_request_context *next;
-
u16 id;
u32 fitmsg_id;
- struct request *req;
u8 flush_cmd;
- u32 timeout_stamp;
- u8 sg_data_dir;
+ enum dma_data_direction data_dir;
struct scatterlist *sg;
u32 n_sg;
u32 sg_byte_count;
@@ -224,38 +183,19 @@ struct skd_request_context {
struct fit_comp_error_info err_info;
+ blk_status_t status;
};
-#define SKD_DATA_DIR_HOST_TO_CARD 1
-#define SKD_DATA_DIR_CARD_TO_HOST 2
struct skd_special_context {
struct skd_request_context req;
- u8 orphaned;
-
void *data_buf;
dma_addr_t db_dma_address;
- u8 *msg_buf;
+ struct skd_msg_buf *msg_buf;
dma_addr_t mb_dma_address;
};
-struct skd_sg_io {
- fmode_t mode;
- void __user *argp;
-
- struct sg_io_hdr sg;
-
- u8 cdb[16];
-
- u32 dxfer_len;
- u32 iovcnt;
- struct sg_iovec *iov;
- struct sg_iovec no_iov_iov;
-
- struct skd_special_context *skspcl;
-};
-
typedef enum skd_irq_type {
SKD_IRQ_LEGACY,
SKD_IRQ_MSI,
@@ -265,7 +205,7 @@ typedef enum skd_irq_type {
#define SKD_MAX_BARS 2
struct skd_device {
- volatile void __iomem *mem_map[SKD_MAX_BARS];
+ void __iomem *mem_map[SKD_MAX_BARS];
resource_size_t mem_phys[SKD_MAX_BARS];
u32 mem_size[SKD_MAX_BARS];
@@ -276,21 +216,20 @@ struct skd_device {
spinlock_t lock;
struct gendisk *disk;
+ struct blk_mq_tag_set tag_set;
struct request_queue *queue;
+ struct skd_fitmsg_context *skmsg;
struct device *class_dev;
int gendisk_on;
int sync_done;
- atomic_t device_count;
u32 devno;
u32 major;
- char name[32];
char isr_name[30];
enum skd_drvr_state state;
u32 drive_state;
- u32 in_flight;
u32 cur_max_queue_depth;
u32 queue_low_water_mark;
u32 dev_max_queue_depth;
@@ -298,27 +237,20 @@ struct skd_device {
u32 num_fitmsg_context;
u32 num_req_context;
- u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
- u32 timeout_stamp;
- struct skd_fitmsg_context *skmsg_free_list;
struct skd_fitmsg_context *skmsg_table;
- struct skd_request_context *skreq_free_list;
- struct skd_request_context *skreq_table;
-
- struct skd_special_context *skspcl_free_list;
- struct skd_special_context *skspcl_table;
-
struct skd_special_context internal_skspcl;
u32 read_cap_blocksize;
u32 read_cap_last_lba;
int read_cap_is_valid;
int inquiry_is_valid;
u8 inq_serial_num[13]; /*12 chars plus null term */
- u8 id_str[80]; /* holds a composite name (pci + sernum) */
u8 skcomp_cycle;
u32 skcomp_ix;
+ struct kmem_cache *msgbuf_cache;
+ struct kmem_cache *sglist_cache;
+ struct kmem_cache *databuf_cache;
struct fit_completion_entry_v1 *skcomp_table;
struct fit_comp_error_info *skerr_table;
dma_addr_t cq_dma_address;
@@ -329,7 +261,6 @@ struct skd_device {
u32 timer_countdown;
u32 timer_substate;
- int n_special;
int sgs_per_request;
u32 last_mtd;
@@ -343,7 +274,7 @@ struct skd_device {
u32 timo_slot;
-
+ struct work_struct start_queue;
struct work_struct completion_worker;
};
@@ -353,53 +284,32 @@ struct skd_device {
static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset)
{
- u32 val;
+ u32 val = readl(skdev->mem_map[1] + offset);
- if (likely(skdev->dbg_level < 2))
- return readl(skdev->mem_map[1] + offset);
- else {
- barrier();
- val = readl(skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %x\n",
- skdev->name, __func__, __LINE__, offset, val);
- return val;
- }
-
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
+ return val;
}
static inline void skd_reg_write32(struct skd_device *skdev, u32 val,
u32 offset)
{
- if (likely(skdev->dbg_level < 2)) {
- writel(val, skdev->mem_map[1] + offset);
- barrier();
- } else {
- barrier();
- writel(val, skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %x\n",
- skdev->name, __func__, __LINE__, offset, val);
- }
+ writel(val, skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
}
static inline void skd_reg_write64(struct skd_device *skdev, u64 val,
u32 offset)
{
- if (likely(skdev->dbg_level < 2)) {
- writeq(val, skdev->mem_map[1] + offset);
- barrier();
- } else {
- barrier();
- writeq(val, skdev->mem_map[1] + offset);
- barrier();
- pr_debug("%s:%s:%d offset %x = %016llx\n",
- skdev->name, __func__, __LINE__, offset, val);
- }
+ writeq(val, skdev->mem_map[1] + offset);
+ if (unlikely(skdev->dbg_level >= 2))
+ dev_dbg(&skdev->pdev->dev, "offset %x = %016llx\n", offset,
+ val);
}
-#define SKD_IRQ_DEFAULT SKD_IRQ_MSI
+#define SKD_IRQ_DEFAULT SKD_IRQ_MSIX
static int skd_isr_type = SKD_IRQ_DEFAULT;
module_param(skd_isr_type, int, 0444);
@@ -412,7 +322,7 @@ static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
module_param(skd_max_req_per_msg, int, 0444);
MODULE_PARM_DESC(skd_max_req_per_msg,
"Maximum SCSI requests packed in a single message."
- " (1-14, default==1)");
+ " (1-" __stringify(SKD_MAX_REQ_PER_MSG) ", default==1)");
#define SKD_MAX_QUEUE_DEPTH_DEFAULT 64
#define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64"
@@ -429,10 +339,10 @@ MODULE_PARM_DESC(skd_sgs_per_request,
"Maximum SG elements per block request."
" (1-4096, default==256)");
-static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
+static int skd_max_pass_thru = 1;
module_param(skd_max_pass_thru, int, 0444);
MODULE_PARM_DESC(skd_max_pass_thru,
- "Maximum SCSI pass-thru at a time." " (1-50, default==32)");
+ "Maximum SCSI pass-thru at a time. IGNORED");
module_param(skd_dbg_level, int, 0444);
MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)");
@@ -449,9 +359,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
struct skd_fitmsg_context *skmsg);
static void skd_send_special_fitmsg(struct skd_device *skdev,
struct skd_special_context *skspcl);
-static void skd_request_fn(struct request_queue *rq);
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t status);
static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
static void skd_postop_sg_list(struct skd_device *skdev,
@@ -460,19 +367,14 @@ static void skd_postop_sg_list(struct skd_device *skdev,
static void skd_restart_device(struct skd_device *skdev);
static int skd_quiesce_dev(struct skd_device *skdev);
static int skd_unquiesce_dev(struct skd_device *skdev);
-static void skd_release_special(struct skd_device *skdev,
- struct skd_special_context *skspcl);
static void skd_disable_interrupts(struct skd_device *skdev);
static void skd_isr_fwstate(struct skd_device *skdev);
-static void skd_recover_requests(struct skd_device *skdev, int requeue);
+static void skd_recover_requests(struct skd_device *skdev);
static void skd_soft_reset(struct skd_device *skdev);
-static const char *skd_name(struct skd_device *skdev);
const char *skd_drive_state_to_str(int state);
const char *skd_skdev_state_to_str(enum skd_drvr_state state);
static void skd_log_skdev(struct skd_device *skdev, const char *event);
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event);
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event);
@@ -481,18 +383,20 @@ static void skd_log_skreq(struct skd_device *skdev,
* READ/WRITE REQUESTS
*****************************************************************************
*/
-static void skd_fail_all_pending(struct skd_device *skdev)
+static void skd_inc_in_flight(struct request *rq, void *data, bool reserved)
{
- struct request_queue *q = skdev->queue;
- struct request *req;
+ int *count = data;
- for (;; ) {
- req = blk_peek_request(q);
- if (req == NULL)
- break;
- blk_start_request(req);
- __blk_end_request_all(req, BLK_STS_IOERR);
- }
+ count++;
+}
+
+static int skd_in_flight(struct skd_device *skdev)
+{
+ int count = 0;
+
+ blk_mq_tagset_busy_iter(&skdev->tag_set, skd_inc_in_flight, &count);
+
+ return count;
}
static void
@@ -501,9 +405,9 @@ skd_prep_rw_cdb(struct skd_scsi_request *scsi_req,
unsigned count)
{
if (data_dir == READ)
- scsi_req->cdb[0] = 0x28;
+ scsi_req->cdb[0] = READ_10;
else
- scsi_req->cdb[0] = 0x2a;
+ scsi_req->cdb[0] = WRITE_10;
scsi_req->cdb[1] = 0;
scsi_req->cdb[2] = (lba & 0xff000000) >> 24;
@@ -522,7 +426,7 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
{
skreq->flush_cmd = 1;
- scsi_req->cdb[0] = 0x35;
+ scsi_req->cdb[0] = SYNCHRONIZE_CACHE;
scsi_req->cdb[1] = 0;
scsi_req->cdb[2] = 0;
scsi_req->cdb[3] = 0;
@@ -534,374 +438,12 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
scsi_req->cdb[9] = 0;
}
-static void skd_request_fn_not_online(struct request_queue *q);
-
-static void skd_request_fn(struct request_queue *q)
+/*
+ * Return true if and only if all pending requests should be failed.
+ */
+static bool skd_fail_all(struct request_queue *q)
{
struct skd_device *skdev = q->queuedata;
- struct skd_fitmsg_context *skmsg = NULL;
- struct fit_msg_hdr *fmh = NULL;
- struct skd_request_context *skreq;
- struct request *req = NULL;
- struct skd_scsi_request *scsi_req;
- unsigned long io_flags;
- u32 lba;
- u32 count;
- int data_dir;
- u32 be_lba;
- u32 be_count;
- u64 be_dmaa;
- u64 cmdctxt;
- u32 timo_slot;
- void *cmd_ptr;
- int flush, fua;
-
- if (skdev->state != SKD_DRVR_STATE_ONLINE) {
- skd_request_fn_not_online(q);
- return;
- }
-
- if (blk_queue_stopped(skdev->queue)) {
- if (skdev->skmsg_free_list == NULL ||
- skdev->skreq_free_list == NULL ||
- skdev->in_flight >= skdev->queue_low_water_mark)
- /* There is still some kind of shortage */
- return;
-
- queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue);
- }
-
- /*
- * Stop conditions:
- * - There are no more native requests
- * - There are already the maximum number of requests in progress
- * - There are no more skd_request_context entries
- * - There are no more FIT msg buffers
- */
- for (;; ) {
-
- flush = fua = 0;
-
- req = blk_peek_request(q);
-
- /* Are there any native requests to start? */
- if (req == NULL)
- break;
-
- lba = (u32)blk_rq_pos(req);
- count = blk_rq_sectors(req);
- data_dir = rq_data_dir(req);
- io_flags = req->cmd_flags;
-
- if (req_op(req) == REQ_OP_FLUSH)
- flush++;
-
- if (io_flags & REQ_FUA)
- fua++;
-
- pr_debug("%s:%s:%d new req=%p lba=%u(0x%x) "
- "count=%u(0x%x) dir=%d\n",
- skdev->name, __func__, __LINE__,
- req, lba, lba, count, count, data_dir);
-
- /* At this point we know there is a request */
-
- /* Are too many requets already in progress? */
- if (skdev->in_flight >= skdev->cur_max_queue_depth) {
- pr_debug("%s:%s:%d qdepth %d, limit %d\n",
- skdev->name, __func__, __LINE__,
- skdev->in_flight, skdev->cur_max_queue_depth);
- break;
- }
-
- /* Is a skd_request_context available? */
- skreq = skdev->skreq_free_list;
- if (skreq == NULL) {
- pr_debug("%s:%s:%d Out of req=%p\n",
- skdev->name, __func__, __LINE__, q);
- break;
- }
- SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
- SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
-
- /* Now we check to see if we can get a fit msg */
- if (skmsg == NULL) {
- if (skdev->skmsg_free_list == NULL) {
- pr_debug("%s:%s:%d Out of msg\n",
- skdev->name, __func__, __LINE__);
- break;
- }
- }
-
- skreq->flush_cmd = 0;
- skreq->n_sg = 0;
- skreq->sg_byte_count = 0;
-
- /*
- * OK to now dequeue request from q.
- *
- * At this point we are comitted to either start or reject
- * the native request. Note that skd_request_context is
- * available but is still at the head of the free list.
- */
- blk_start_request(req);
- skreq->req = req;
- skreq->fitmsg_id = 0;
-
- /* Either a FIT msg is in progress or we have to start one. */
- if (skmsg == NULL) {
- /* Are there any FIT msg buffers available? */
- skmsg = skdev->skmsg_free_list;
- if (skmsg == NULL) {
- pr_debug("%s:%s:%d Out of msg skdev=%p\n",
- skdev->name, __func__, __LINE__,
- skdev);
- break;
- }
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
-
- skdev->skmsg_free_list = skmsg->next;
-
- skmsg->state = SKD_MSG_STATE_BUSY;
- skmsg->id += SKD_ID_INCR;
-
- /* Initialize the FIT msg header */
- fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
- memset(fmh, 0, sizeof(*fmh));
- fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
- skmsg->length = sizeof(*fmh);
- }
-
- skreq->fitmsg_id = skmsg->id;
-
- /*
- * Note that a FIT msg may have just been started
- * but contains no SoFIT requests yet.
- */
-
- /*
- * Transcode the request, checking as we go. The outcome of
- * the transcoding is represented by the error variable.
- */
- cmd_ptr = &skmsg->msg_buf[skmsg->length];
- memset(cmd_ptr, 0, 32);
-
- be_lba = cpu_to_be32(lba);
- be_count = cpu_to_be32(count);
- be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address);
- cmdctxt = skreq->id + SKD_ID_INCR;
-
- scsi_req = cmd_ptr;
- scsi_req->hdr.tag = cmdctxt;
- scsi_req->hdr.sg_list_dma_address = be_dmaa;
-
- if (data_dir == READ)
- skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST;
- else
- skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
-
- if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
- skd_prep_zerosize_flush_cdb(scsi_req, skreq);
- SKD_ASSERT(skreq->flush_cmd == 1);
-
- } else {
- skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
- }
-
- if (fua)
- scsi_req->cdb[1] |= SKD_FUA_NV;
-
- if (!req->bio)
- goto skip_sg;
-
- if (!skd_preop_sg_list(skdev, skreq)) {
- /*
- * Complete the native request with error.
- * Note that the request context is still at the
- * head of the free list, and that the SoFIT request
- * was encoded into the FIT msg buffer but the FIT
- * msg length has not been updated. In short, the
- * only resource that has been allocated but might
- * not be used is that the FIT msg could be empty.
- */
- pr_debug("%s:%s:%d error Out\n",
- skdev->name, __func__, __LINE__);
- skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
- continue;
- }
-
-skip_sg:
- scsi_req->hdr.sg_list_len_bytes =
- cpu_to_be32(skreq->sg_byte_count);
-
- /* Complete resource allocations. */
- skdev->skreq_free_list = skreq->next;
- skreq->state = SKD_REQ_STATE_BUSY;
- skreq->id += SKD_ID_INCR;
-
- skmsg->length += sizeof(struct skd_scsi_request);
- fmh->num_protocol_cmds_coalesced++;
-
- /*
- * Update the active request counts.
- * Capture the timeout timestamp.
- */
- skreq->timeout_stamp = skdev->timeout_stamp;
- timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
- skdev->timeout_slot[timo_slot]++;
- skdev->in_flight++;
- pr_debug("%s:%s:%d req=0x%x busy=%d\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skdev->in_flight);
-
- /*
- * If the FIT msg buffer is full send it.
- */
- if (skmsg->length >= SKD_N_FITMSG_BYTES ||
- fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
- skd_send_fitmsg(skdev, skmsg);
- skmsg = NULL;
- fmh = NULL;
- }
- }
-
- /*
- * Is a FIT msg in progress? If it is empty put the buffer back
- * on the free list. If it is non-empty send what we got.
- * This minimizes latency when there are fewer requests than
- * what fits in a FIT msg.
- */
- if (skmsg != NULL) {
- /* Bigger than just a FIT msg header? */
- if (skmsg->length > sizeof(struct fit_msg_hdr)) {
- pr_debug("%s:%s:%d sending msg=%p, len %d\n",
- skdev->name, __func__, __LINE__,
- skmsg, skmsg->length);
- skd_send_fitmsg(skdev, skmsg);
- } else {
- /*
- * The FIT msg is empty. It means we got started
- * on the msg, but the requests were rejected.
- */
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- skmsg->next = skdev->skmsg_free_list;
- skdev->skmsg_free_list = skmsg;
- }
- skmsg = NULL;
- fmh = NULL;
- }
-
- /*
- * If req is non-NULL it means there is something to do but
- * we are out of a resource.
- */
- if (req)
- blk_stop_queue(skdev->queue);
-}
-
-static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, blk_status_t error)
-{
- if (unlikely(error)) {
- struct request *req = skreq->req;
- char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
- u32 lba = (u32)blk_rq_pos(req);
- u32 count = blk_rq_sectors(req);
-
- pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
- skd_name(skdev), cmd, lba, count, skreq->id);
- } else
- pr_debug("%s:%s:%d id=0x%x error=%d\n",
- skdev->name, __func__, __LINE__, skreq->id, error);
-
- __blk_end_request_all(skreq->req, error);
-}
-
-static bool skd_preop_sg_list(struct skd_device *skdev,
- struct skd_request_context *skreq)
-{
- struct request *req = skreq->req;
- int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
- int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
- struct scatterlist *sg = &skreq->sg[0];
- int n_sg;
- int i;
-
- skreq->sg_byte_count = 0;
-
- /* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD ||
- skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */
-
- n_sg = blk_rq_map_sg(skdev->queue, req, sg);
- if (n_sg <= 0)
- return false;
-
- /*
- * Map scatterlist to PCI bus addresses.
- * Note PCI might change the number of entries.
- */
- n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
- if (n_sg <= 0)
- return false;
-
- SKD_ASSERT(n_sg <= skdev->sgs_per_request);
-
- skreq->n_sg = n_sg;
-
- for (i = 0; i < n_sg; i++) {
- struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
- u32 cnt = sg_dma_len(&sg[i]);
- uint64_t dma_addr = sg_dma_address(&sg[i]);
-
- sgd->control = FIT_SGD_CONTROL_NOT_LAST;
- sgd->byte_count = cnt;
- skreq->sg_byte_count += cnt;
- sgd->host_side_addr = dma_addr;
- sgd->dev_side_addr = 0;
- }
-
- skreq->sksg_list[n_sg - 1].next_desc_ptr = 0LL;
- skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
-
- if (unlikely(skdev->dbg_level > 1)) {
- pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
- for (i = 0; i < n_sg; i++) {
- struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
- }
- }
-
- return true;
-}
-
-static void skd_postop_sg_list(struct skd_device *skdev,
- struct skd_request_context *skreq)
-{
- int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
- int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
-
- /*
- * restore the next ptr for next IO request so we
- * don't have to set it every time.
- */
- skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
- skreq->sksg_dma_address +
- ((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
- pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, pci_dir);
-}
-
-static void skd_request_fn_not_online(struct request_queue *q)
-{
- struct skd_device *skdev = q->queuedata;
- int error;
SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
@@ -922,8 +464,7 @@ static void skd_request_fn_not_online(struct request_queue *q)
case SKD_DRVR_STATE_BUSY:
case SKD_DRVR_STATE_BUSY_IMMINENT:
case SKD_DRVR_STATE_BUSY_ERASE:
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- return;
+ return false;
case SKD_DRVR_STATE_BUSY_SANITIZE:
case SKD_DRVR_STATE_STOPPING:
@@ -931,15 +472,218 @@ static void skd_request_fn_not_online(struct request_queue *q)
case SKD_DRVR_STATE_FAULT:
case SKD_DRVR_STATE_DISAPPEARED:
default:
- error = -EIO;
- break;
+ return true;
+ }
+}
+
+static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *mqd)
+{
+ struct request *const req = mqd->rq;
+ struct request_queue *const q = req->q;
+ struct skd_device *skdev = q->queuedata;
+ struct skd_fitmsg_context *skmsg;
+ struct fit_msg_hdr *fmh;
+ const u32 tag = blk_mq_unique_tag(req);
+ struct skd_request_context *const skreq = blk_mq_rq_to_pdu(req);
+ struct skd_scsi_request *scsi_req;
+ unsigned long flags = 0;
+ const u32 lba = blk_rq_pos(req);
+ const u32 count = blk_rq_sectors(req);
+ const int data_dir = rq_data_dir(req);
+
+ if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE))
+ return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE;
+
+ blk_mq_start_request(req);
+
+ WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n",
+ tag, skd_max_queue_depth, q->nr_requests);
+
+ SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
+
+ dev_dbg(&skdev->pdev->dev,
+ "new req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", req, lba,
+ lba, count, count, data_dir);
+
+ skreq->id = tag + SKD_ID_RW_REQUEST;
+ skreq->flush_cmd = 0;
+ skreq->n_sg = 0;
+ skreq->sg_byte_count = 0;
+
+ skreq->fitmsg_id = 0;
+
+ skreq->data_dir = data_dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+ if (req->bio && !skd_preop_sg_list(skdev, skreq)) {
+ dev_dbg(&skdev->pdev->dev, "error Out\n");
+ skreq->status = BLK_STS_RESOURCE;
+ blk_mq_complete_request(req);
+ return BLK_STS_OK;
}
- /* If we get here, terminate all pending block requeusts
- * with EIO and any scsi pass thru with appropriate sense
- */
+ dma_sync_single_for_device(&skdev->pdev->dev, skreq->sksg_dma_address,
+ skreq->n_sg *
+ sizeof(struct fit_sg_descriptor),
+ DMA_TO_DEVICE);
- skd_fail_all_pending(skdev);
+ /* Either a FIT msg is in progress or we have to start one. */
+ if (skd_max_req_per_msg == 1) {
+ skmsg = NULL;
+ } else {
+ spin_lock_irqsave(&skdev->lock, flags);
+ skmsg = skdev->skmsg;
+ }
+ if (!skmsg) {
+ skmsg = &skdev->skmsg_table[tag];
+ skdev->skmsg = skmsg;
+
+ /* Initialize the FIT msg header */
+ fmh = &skmsg->msg_buf->fmh;
+ memset(fmh, 0, sizeof(*fmh));
+ fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
+ skmsg->length = sizeof(*fmh);
+ } else {
+ fmh = &skmsg->msg_buf->fmh;
+ }
+
+ skreq->fitmsg_id = skmsg->id;
+
+ scsi_req = &skmsg->msg_buf->scsi[fmh->num_protocol_cmds_coalesced];
+ memset(scsi_req, 0, sizeof(*scsi_req));
+
+ scsi_req->hdr.tag = skreq->id;
+ scsi_req->hdr.sg_list_dma_address =
+ cpu_to_be64(skreq->sksg_dma_address);
+
+ if (req_op(req) == REQ_OP_FLUSH) {
+ skd_prep_zerosize_flush_cdb(scsi_req, skreq);
+ SKD_ASSERT(skreq->flush_cmd == 1);
+ } else {
+ skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
+ }
+
+ if (req->cmd_flags & REQ_FUA)
+ scsi_req->cdb[1] |= SKD_FUA_NV;
+
+ scsi_req->hdr.sg_list_len_bytes = cpu_to_be32(skreq->sg_byte_count);
+
+ /* Complete resource allocations. */
+ skreq->state = SKD_REQ_STATE_BUSY;
+
+ skmsg->length += sizeof(struct skd_scsi_request);
+ fmh->num_protocol_cmds_coalesced++;
+
+ dev_dbg(&skdev->pdev->dev, "req=0x%x busy=%d\n", skreq->id,
+ skd_in_flight(skdev));
+
+ /*
+ * If the FIT msg buffer is full send it.
+ */
+ if (skd_max_req_per_msg == 1) {
+ skd_send_fitmsg(skdev, skmsg);
+ } else {
+ if (mqd->last ||
+ fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
+ skd_send_fitmsg(skdev, skmsg);
+ skdev->skmsg = NULL;
+ }
+ spin_unlock_irqrestore(&skdev->lock, flags);
+ }
+
+ return BLK_STS_OK;
+}
+
+static enum blk_eh_timer_return skd_timed_out(struct request *req,
+ bool reserved)
+{
+ struct skd_device *skdev = req->q->queuedata;
+
+ dev_err(&skdev->pdev->dev, "request with tag %#x timed out\n",
+ blk_mq_unique_tag(req));
+
+ return BLK_EH_RESET_TIMER;
+}
+
+static void skd_complete_rq(struct request *req)
+{
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
+
+ blk_mq_end_request(req, skreq->status);
+}
+
+static bool skd_preop_sg_list(struct skd_device *skdev,
+ struct skd_request_context *skreq)
+{
+ struct request *req = blk_mq_rq_from_pdu(skreq);
+ struct scatterlist *sgl = &skreq->sg[0], *sg;
+ int n_sg;
+ int i;
+
+ skreq->sg_byte_count = 0;
+
+ WARN_ON_ONCE(skreq->data_dir != DMA_TO_DEVICE &&
+ skreq->data_dir != DMA_FROM_DEVICE);
+
+ n_sg = blk_rq_map_sg(skdev->queue, req, sgl);
+ if (n_sg <= 0)
+ return false;
+
+ /*
+ * Map scatterlist to PCI bus addresses.
+ * Note PCI might change the number of entries.
+ */
+ n_sg = pci_map_sg(skdev->pdev, sgl, n_sg, skreq->data_dir);
+ if (n_sg <= 0)
+ return false;
+
+ SKD_ASSERT(n_sg <= skdev->sgs_per_request);
+
+ skreq->n_sg = n_sg;
+
+ for_each_sg(sgl, sg, n_sg, i) {
+ struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
+ u32 cnt = sg_dma_len(sg);
+ uint64_t dma_addr = sg_dma_address(sg);
+
+ sgd->control = FIT_SGD_CONTROL_NOT_LAST;
+ sgd->byte_count = cnt;
+ skreq->sg_byte_count += cnt;
+ sgd->host_side_addr = dma_addr;
+ sgd->dev_side_addr = 0;
+ }
+
+ skreq->sksg_list[n_sg - 1].next_desc_ptr = 0LL;
+ skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
+
+ if (unlikely(skdev->dbg_level > 1)) {
+ dev_dbg(&skdev->pdev->dev,
+ "skreq=%x sksg_list=%p sksg_dma=%llx\n",
+ skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
+ for (i = 0; i < n_sg; i++) {
+ struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
+
+ dev_dbg(&skdev->pdev->dev,
+ " sg[%d] count=%u ctrl=0x%x addr=0x%llx next=0x%llx\n",
+ i, sgd->byte_count, sgd->control,
+ sgd->host_side_addr, sgd->next_desc_ptr);
+ }
+ }
+
+ return true;
+}
+
+static void skd_postop_sg_list(struct skd_device *skdev,
+ struct skd_request_context *skreq)
+{
+ /*
+ * restore the next ptr for next IO request so we
+ * don't have to set it every time.
+ */
+ skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
+ skreq->sksg_dma_address +
+ ((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
+ pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, skreq->data_dir);
}
/*
@@ -950,12 +694,22 @@ static void skd_request_fn_not_online(struct request_queue *q)
static void skd_timer_tick_not_online(struct skd_device *skdev);
+static void skd_start_queue(struct work_struct *work)
+{
+ struct skd_device *skdev = container_of(work, typeof(*skdev),
+ start_queue);
+
+ /*
+ * Although it is safe to call blk_start_queue() from interrupt
+ * context, blk_mq_start_hw_queues() must not be called from
+ * interrupt context.
+ */
+ blk_mq_start_hw_queues(skdev->queue);
+}
+
static void skd_timer_tick(ulong arg)
{
struct skd_device *skdev = (struct skd_device *)arg;
-
- u32 timo_slot;
- u32 overdue_timestamp;
unsigned long reqflags;
u32 state;
@@ -972,37 +726,9 @@ static void skd_timer_tick(ulong arg)
if (state != skdev->drive_state)
skd_isr_fwstate(skdev);
- if (skdev->state != SKD_DRVR_STATE_ONLINE) {
+ if (skdev->state != SKD_DRVR_STATE_ONLINE)
skd_timer_tick_not_online(skdev);
- goto timer_func_out;
- }
- skdev->timeout_stamp++;
- timo_slot = skdev->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
- /*
- * All requests that happened during the previous use of
- * this slot should be done by now. The previous use was
- * over 7 seconds ago.
- */
- if (skdev->timeout_slot[timo_slot] == 0)
- goto timer_func_out;
-
- /* Something is overdue */
- overdue_timestamp = skdev->timeout_stamp - SKD_N_TIMEOUT_SLOT;
-
- pr_debug("%s:%s:%d found %d timeouts, draining busy=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->timeout_slot[timo_slot], skdev->in_flight);
- pr_err("(%s): Overdue IOs (%d), busy %d\n",
- skd_name(skdev), skdev->timeout_slot[timo_slot],
- skdev->in_flight);
-
- skdev->timer_countdown = SKD_DRAINING_TIMO;
- skdev->state = SKD_DRVR_STATE_DRAINING_TIMEOUT;
- skdev->timo_slot = timo_slot;
- blk_stop_queue(skdev->queue);
-
-timer_func_out:
mod_timer(&skdev->timer, (jiffies + HZ));
spin_unlock_irqrestore(&skdev->lock, reqflags);
@@ -1015,9 +741,9 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
case SKD_DRVR_STATE_LOAD:
break;
case SKD_DRVR_STATE_BUSY_SANITIZE:
- pr_debug("%s:%s:%d drive busy sanitize[%x], driver[%x]\n",
- skdev->name, __func__, __LINE__,
- skdev->drive_state, skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "drive busy sanitize[%x], driver[%x]\n",
+ skdev->drive_state, skdev->state);
/* If we've been in sanitize for 3 seconds, we figure we're not
* going to get anymore completions, so recover requests now
*/
@@ -1025,22 +751,21 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
skdev->timer_countdown--;
return;
}
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
break;
case SKD_DRVR_STATE_BUSY:
case SKD_DRVR_STATE_BUSY_IMMINENT:
case SKD_DRVR_STATE_BUSY_ERASE:
- pr_debug("%s:%s:%d busy[%x], countdown=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->state, skdev->timer_countdown);
+ dev_dbg(&skdev->pdev->dev, "busy[%x], countdown=%d\n",
+ skdev->state, skdev->timer_countdown);
if (skdev->timer_countdown > 0) {
skdev->timer_countdown--;
return;
}
- pr_debug("%s:%s:%d busy[%x], timedout=%d, restarting device.",
- skdev->name, __func__, __LINE__,
- skdev->state, skdev->timer_countdown);
+ dev_dbg(&skdev->pdev->dev,
+ "busy[%x], timedout=%d, restarting device.",
+ skdev->state, skdev->timer_countdown);
skd_restart_device(skdev);
break;
@@ -1054,12 +779,12 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
* revcover at some point. */
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): DriveFault Connect Timeout (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "DriveFault Connect Timeout (%x)\n",
+ skdev->drive_state);
/*start the queue so we can respond with error to requests */
/* wakeup anyone waiting for startup complete */
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -1072,29 +797,6 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
case SKD_DRVR_STATE_PAUSED:
break;
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- pr_debug("%s:%s:%d "
- "draining busy [%d] tick[%d] qdb[%d] tmls[%d]\n",
- skdev->name, __func__, __LINE__,
- skdev->timo_slot,
- skdev->timer_countdown,
- skdev->in_flight,
- skdev->timeout_slot[skdev->timo_slot]);
- /* if the slot has cleared we can let the I/O continue */
- if (skdev->timeout_slot[skdev->timo_slot] == 0) {
- pr_debug("%s:%s:%d Slot drained, starting queue.\n",
- skdev->name, __func__, __LINE__);
- skdev->state = SKD_DRVR_STATE_ONLINE;
- blk_start_queue(skdev->queue);
- return;
- }
- if (skdev->timer_countdown > 0) {
- skdev->timer_countdown--;
- return;
- }
- skd_restart_device(skdev);
- break;
-
case SKD_DRVR_STATE_RESTARTING:
if (skdev->timer_countdown > 0) {
skdev->timer_countdown--;
@@ -1103,8 +805,9 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
/* For now, we fault the drive. Could attempt resets to
* revcover at some point. */
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): DriveFault Reconnect Timeout (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev,
+ "DriveFault Reconnect Timeout (%x)\n",
+ skdev->drive_state);
/*
* Recovering does two things:
@@ -1124,18 +827,18 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
/* It never came out of soft reset. Try to
* recover the requests and then let them
* fail. This is to mitigate hung processes. */
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
else {
- pr_err("(%s): Disable BusMaster (%x)\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "Disable BusMaster (%x)\n",
+ skdev->drive_state);
pci_disable_device(skdev->pdev);
skd_disable_interrupts(skdev);
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
}
/*start the queue so we can respond with error to requests */
/* wakeup anyone waiting for startup complete */
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -1154,13 +857,11 @@ static int skd_start_timer(struct skd_device *skdev)
{
int rc;
- init_timer(&skdev->timer);
setup_timer(&skdev->timer, skd_timer_tick, (ulong)skdev);
rc = mod_timer(&skdev->timer, (jiffies + HZ));
if (rc)
- pr_err("%s: failed to start timer %d\n",
- __func__, rc);
+ dev_err(&skdev->pdev->dev, "failed to start timer %d\n", rc);
return rc;
}
@@ -1171,634 +872,6 @@ static void skd_kill_timer(struct skd_device *skdev)
/*
*****************************************************************************
- * IOCTL
- *****************************************************************************
- */
-static int skd_ioctl_sg_io(struct skd_device *skdev,
- fmode_t mode, void __user *argp);
-static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_prep_buffering(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_copy_buffer(struct skd_device *skdev,
- struct skd_sg_io *sksgio, int dxfer_dir);
-static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio);
-static int skd_sg_io_release_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-static int skd_sg_io_put_status(struct skd_device *skdev,
- struct skd_sg_io *sksgio);
-
-static void skd_complete_special(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl);
-
-static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode,
- uint cmd_in, ulong arg)
-{
- static const int sg_version_num = 30527;
- int rc = 0, timeout;
- struct gendisk *disk = bdev->bd_disk;
- struct skd_device *skdev = disk->private_data;
- int __user *p = (int __user *)arg;
-
- pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n",
- skdev->name, __func__, __LINE__,
- disk->disk_name, current->comm, mode, cmd_in, arg);
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch (cmd_in) {
- case SG_SET_TIMEOUT:
- rc = get_user(timeout, p);
- if (!rc)
- disk->queue->sg_timeout = clock_t_to_jiffies(timeout);
- break;
- case SG_GET_TIMEOUT:
- rc = jiffies_to_clock_t(disk->queue->sg_timeout);
- break;
- case SG_GET_VERSION_NUM:
- rc = put_user(sg_version_num, p);
- break;
- case SG_IO:
- rc = skd_ioctl_sg_io(skdev, mode, (void __user *)arg);
- break;
-
- default:
- rc = -ENOTTY;
- break;
- }
-
- pr_debug("%s:%s:%d %s: completion rc %d\n",
- skdev->name, __func__, __LINE__, disk->disk_name, rc);
- return rc;
-}
-
-static int skd_ioctl_sg_io(struct skd_device *skdev, fmode_t mode,
- void __user *argp)
-{
- int rc;
- struct skd_sg_io sksgio;
-
- memset(&sksgio, 0, sizeof(sksgio));
- sksgio.mode = mode;
- sksgio.argp = argp;
- sksgio.iov = &sksgio.no_iov_iov;
-
- switch (skdev->state) {
- case SKD_DRVR_STATE_ONLINE:
- case SKD_DRVR_STATE_BUSY_IMMINENT:
- break;
-
- default:
- pr_debug("%s:%s:%d drive not online\n",
- skdev->name, __func__, __LINE__);
- rc = -ENXIO;
- goto out;
- }
-
- rc = skd_sg_io_get_and_check_args(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_obtain_skspcl(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_prep_buffering(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_TO_DEV);
- if (rc)
- goto out;
-
- rc = skd_sg_io_send_fitmsg(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_await(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = skd_sg_io_copy_buffer(skdev, &sksgio, SG_DXFER_FROM_DEV);
- if (rc)
- goto out;
-
- rc = skd_sg_io_put_status(skdev, &sksgio);
- if (rc)
- goto out;
-
- rc = 0;
-
-out:
- skd_sg_io_release_skspcl(skdev, &sksgio);
-
- if (sksgio.iov != NULL && sksgio.iov != &sksgio.no_iov_iov)
- kfree(sksgio.iov);
- return rc;
-}
-
-static int skd_sg_io_get_and_check_args(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct sg_io_hdr *sgp = &sksgio->sg;
- int i, acc;
-
- if (!access_ok(VERIFY_WRITE, sksgio->argp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d access sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- if (__copy_from_user(sgp, sksgio->argp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d copy_from_user sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- if (sgp->interface_id != SG_INTERFACE_ID_ORIG) {
- pr_debug("%s:%s:%d interface_id invalid 0x%x\n",
- skdev->name, __func__, __LINE__, sgp->interface_id);
- return -EINVAL;
- }
-
- if (sgp->cmd_len > sizeof(sksgio->cdb)) {
- pr_debug("%s:%s:%d cmd_len invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->cmd_len);
- return -EINVAL;
- }
-
- if (sgp->iovec_count > 256) {
- pr_debug("%s:%s:%d iovec_count invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->iovec_count);
- return -EINVAL;
- }
-
- if (sgp->dxfer_len > (PAGE_SIZE * SKD_N_SG_PER_SPECIAL)) {
- pr_debug("%s:%s:%d dxfer_len invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->dxfer_len);
- return -EINVAL;
- }
-
- switch (sgp->dxfer_direction) {
- case SG_DXFER_NONE:
- acc = -1;
- break;
-
- case SG_DXFER_TO_DEV:
- acc = VERIFY_READ;
- break;
-
- case SG_DXFER_FROM_DEV:
- case SG_DXFER_TO_FROM_DEV:
- acc = VERIFY_WRITE;
- break;
-
- default:
- pr_debug("%s:%s:%d dxfer_dir invalid %d\n",
- skdev->name, __func__, __LINE__, sgp->dxfer_direction);
- return -EINVAL;
- }
-
- if (copy_from_user(sksgio->cdb, sgp->cmdp, sgp->cmd_len)) {
- pr_debug("%s:%s:%d copy_from_user cmdp failed %p\n",
- skdev->name, __func__, __LINE__, sgp->cmdp);
- return -EFAULT;
- }
-
- if (sgp->mx_sb_len != 0) {
- if (!access_ok(VERIFY_WRITE, sgp->sbp, sgp->mx_sb_len)) {
- pr_debug("%s:%s:%d access sbp failed %p\n",
- skdev->name, __func__, __LINE__, sgp->sbp);
- return -EFAULT;
- }
- }
-
- if (sgp->iovec_count == 0) {
- sksgio->iov[0].iov_base = sgp->dxferp;
- sksgio->iov[0].iov_len = sgp->dxfer_len;
- sksgio->iovcnt = 1;
- sksgio->dxfer_len = sgp->dxfer_len;
- } else {
- struct sg_iovec *iov;
- uint nbytes = sizeof(*iov) * sgp->iovec_count;
- size_t iov_data_len;
-
- iov = kmalloc(nbytes, GFP_KERNEL);
- if (iov == NULL) {
- pr_debug("%s:%s:%d alloc iovec failed %d\n",
- skdev->name, __func__, __LINE__,
- sgp->iovec_count);
- return -ENOMEM;
- }
- sksgio->iov = iov;
- sksgio->iovcnt = sgp->iovec_count;
-
- if (copy_from_user(iov, sgp->dxferp, nbytes)) {
- pr_debug("%s:%s:%d copy_from_user iovec failed %p\n",
- skdev->name, __func__, __LINE__, sgp->dxferp);
- return -EFAULT;
- }
-
- /*
- * Sum up the vecs, making sure they don't overflow
- */
- iov_data_len = 0;
- for (i = 0; i < sgp->iovec_count; i++) {
- if (iov_data_len + iov[i].iov_len < iov_data_len)
- return -EINVAL;
- iov_data_len += iov[i].iov_len;
- }
-
- /* SG_IO howto says that the shorter of the two wins */
- if (sgp->dxfer_len < iov_data_len) {
- sksgio->iovcnt = iov_shorten((struct iovec *)iov,
- sgp->iovec_count,
- sgp->dxfer_len);
- sksgio->dxfer_len = sgp->dxfer_len;
- } else
- sksgio->dxfer_len = iov_data_len;
- }
-
- if (sgp->dxfer_direction != SG_DXFER_NONE) {
- struct sg_iovec *iov = sksgio->iov;
- for (i = 0; i < sksgio->iovcnt; i++, iov++) {
- if (!access_ok(acc, iov->iov_base, iov->iov_len)) {
- pr_debug("%s:%s:%d access data failed %p/%d\n",
- skdev->name, __func__, __LINE__,
- iov->iov_base, (int)iov->iov_len);
- return -EFAULT;
- }
- }
- }
-
- return 0;
-}
-
-static int skd_sg_io_obtain_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = NULL;
- int rc;
-
- for (;;) {
- ulong flags;
-
- spin_lock_irqsave(&skdev->lock, flags);
- skspcl = skdev->skspcl_free_list;
- if (skspcl != NULL) {
- skdev->skspcl_free_list =
- (struct skd_special_context *)skspcl->req.next;
- skspcl->req.id += SKD_ID_INCR;
- skspcl->req.state = SKD_REQ_STATE_SETUP;
- skspcl->orphaned = 0;
- skspcl->req.n_sg = 0;
- }
- spin_unlock_irqrestore(&skdev->lock, flags);
-
- if (skspcl != NULL) {
- rc = 0;
- break;
- }
-
- pr_debug("%s:%s:%d blocking\n",
- skdev->name, __func__, __LINE__);
-
- rc = wait_event_interruptible_timeout(
- skdev->waitq,
- (skdev->skspcl_free_list != NULL),
- msecs_to_jiffies(sksgio->sg.timeout));
-
- pr_debug("%s:%s:%d unblocking, rc=%d\n",
- skdev->name, __func__, __LINE__, rc);
-
- if (rc <= 0) {
- if (rc == 0)
- rc = -ETIMEDOUT;
- else
- rc = -EINTR;
- break;
- }
- /*
- * If we get here rc > 0 meaning the timeout to
- * wait_event_interruptible_timeout() had time left, hence the
- * sought event -- non-empty free list -- happened.
- * Retry the allocation.
- */
- }
- sksgio->skspcl = skspcl;
-
- return rc;
-}
-
-static int skd_skreq_prep_buffering(struct skd_device *skdev,
- struct skd_request_context *skreq,
- u32 dxfer_len)
-{
- u32 resid = dxfer_len;
-
- /*
- * The DMA engine must have aligned addresses and byte counts.
- */
- resid += (-resid) & 3;
- skreq->sg_byte_count = resid;
-
- skreq->n_sg = 0;
-
- while (resid > 0) {
- u32 nbytes = PAGE_SIZE;
- u32 ix = skreq->n_sg;
- struct scatterlist *sg = &skreq->sg[ix];
- struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
- struct page *page;
-
- if (nbytes > resid)
- nbytes = resid;
-
- page = alloc_page(GFP_KERNEL);
- if (page == NULL)
- return -ENOMEM;
-
- sg_set_page(sg, page, nbytes, 0);
-
- /* TODO: This should be going through a pci_???()
- * routine to do proper mapping. */
- sksg->control = FIT_SGD_CONTROL_NOT_LAST;
- sksg->byte_count = nbytes;
-
- sksg->host_side_addr = sg_phys(sg);
-
- sksg->dev_side_addr = 0;
- sksg->next_desc_ptr = skreq->sksg_dma_address +
- (ix + 1) * sizeof(*sksg);
-
- skreq->n_sg++;
- resid -= nbytes;
- }
-
- if (skreq->n_sg > 0) {
- u32 ix = skreq->n_sg - 1;
- struct fit_sg_descriptor *sksg = &skreq->sksg_list[ix];
-
- sksg->control = FIT_SGD_CONTROL_LAST;
- sksg->next_desc_ptr = 0;
- }
-
- if (unlikely(skdev->dbg_level > 1)) {
- u32 i;
-
- pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
- for (i = 0; i < skreq->n_sg; i++) {
- struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
-
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
- }
- }
-
- return 0;
-}
-
-static int skd_sg_io_prep_buffering(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- struct skd_request_context *skreq = &skspcl->req;
- u32 dxfer_len = sksgio->dxfer_len;
- int rc;
-
- rc = skd_skreq_prep_buffering(skdev, skreq, dxfer_len);
- /*
- * Eventually, errors or not, skd_release_special() is called
- * to recover allocations including partial allocations.
- */
- return rc;
-}
-
-static int skd_sg_io_copy_buffer(struct skd_device *skdev,
- struct skd_sg_io *sksgio, int dxfer_dir)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- u32 iov_ix = 0;
- struct sg_iovec curiov;
- u32 sksg_ix = 0;
- u8 *bufp = NULL;
- u32 buf_len = 0;
- u32 resid = sksgio->dxfer_len;
- int rc;
-
- curiov.iov_len = 0;
- curiov.iov_base = NULL;
-
- if (dxfer_dir != sksgio->sg.dxfer_direction) {
- if (dxfer_dir != SG_DXFER_TO_DEV ||
- sksgio->sg.dxfer_direction != SG_DXFER_TO_FROM_DEV)
- return 0;
- }
-
- while (resid > 0) {
- u32 nbytes = PAGE_SIZE;
-
- if (curiov.iov_len == 0) {
- curiov = sksgio->iov[iov_ix++];
- continue;
- }
-
- if (buf_len == 0) {
- struct page *page;
- page = sg_page(&skspcl->req.sg[sksg_ix++]);
- bufp = page_address(page);
- buf_len = PAGE_SIZE;
- }
-
- nbytes = min_t(u32, nbytes, resid);
- nbytes = min_t(u32, nbytes, curiov.iov_len);
- nbytes = min_t(u32, nbytes, buf_len);
-
- if (dxfer_dir == SG_DXFER_TO_DEV)
- rc = __copy_from_user(bufp, curiov.iov_base, nbytes);
- else
- rc = __copy_to_user(curiov.iov_base, bufp, nbytes);
-
- if (rc)
- return -EFAULT;
-
- resid -= nbytes;
- curiov.iov_len -= nbytes;
- curiov.iov_base += nbytes;
- buf_len -= nbytes;
- }
-
- return 0;
-}
-
-static int skd_sg_io_send_fitmsg(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
- struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
- struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
-
- memset(skspcl->msg_buf, 0, SKD_N_SPECIAL_FITMSG_BYTES);
-
- /* Initialize the FIT msg header */
- fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
- fmh->num_protocol_cmds_coalesced = 1;
-
- /* Initialize the SCSI request */
- if (sksgio->sg.dxfer_direction != SG_DXFER_NONE)
- scsi_req->hdr.sg_list_dma_address =
- cpu_to_be64(skspcl->req.sksg_dma_address);
- scsi_req->hdr.tag = skspcl->req.id;
- scsi_req->hdr.sg_list_len_bytes =
- cpu_to_be32(skspcl->req.sg_byte_count);
- memcpy(scsi_req->cdb, sksgio->cdb, sizeof(scsi_req->cdb));
-
- skspcl->req.state = SKD_REQ_STATE_BUSY;
- skd_send_special_fitmsg(skdev, skspcl);
-
- return 0;
-}
-
-static int skd_sg_io_await(struct skd_device *skdev, struct skd_sg_io *sksgio)
-{
- unsigned long flags;
- int rc;
-
- rc = wait_event_interruptible_timeout(skdev->waitq,
- (sksgio->skspcl->req.state !=
- SKD_REQ_STATE_BUSY),
- msecs_to_jiffies(sksgio->sg.
- timeout));
-
- spin_lock_irqsave(&skdev->lock, flags);
-
- if (sksgio->skspcl->req.state == SKD_REQ_STATE_ABORTED) {
- pr_debug("%s:%s:%d skspcl %p aborted\n",
- skdev->name, __func__, __LINE__, sksgio->skspcl);
-
- /* Build check cond, sense and let command finish. */
- /* For a timeout, we must fabricate completion and sense
- * data to complete the command */
- sksgio->skspcl->req.completion.status =
- SAM_STAT_CHECK_CONDITION;
-
- memset(&sksgio->skspcl->req.err_info, 0,
- sizeof(sksgio->skspcl->req.err_info));
- sksgio->skspcl->req.err_info.type = 0x70;
- sksgio->skspcl->req.err_info.key = ABORTED_COMMAND;
- sksgio->skspcl->req.err_info.code = 0x44;
- sksgio->skspcl->req.err_info.qual = 0;
- rc = 0;
- } else if (sksgio->skspcl->req.state != SKD_REQ_STATE_BUSY)
- /* No longer on the adapter. We finish. */
- rc = 0;
- else {
- /* Something's gone wrong. Still busy. Timeout or
- * user interrupted (control-C). Mark as an orphan
- * so it will be disposed when completed. */
- sksgio->skspcl->orphaned = 1;
- sksgio->skspcl = NULL;
- if (rc == 0) {
- pr_debug("%s:%s:%d timed out %p (%u ms)\n",
- skdev->name, __func__, __LINE__,
- sksgio, sksgio->sg.timeout);
- rc = -ETIMEDOUT;
- } else {
- pr_debug("%s:%s:%d cntlc %p\n",
- skdev->name, __func__, __LINE__, sksgio);
- rc = -EINTR;
- }
- }
-
- spin_unlock_irqrestore(&skdev->lock, flags);
-
- return rc;
-}
-
-static int skd_sg_io_put_status(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct sg_io_hdr *sgp = &sksgio->sg;
- struct skd_special_context *skspcl = sksgio->skspcl;
- int resid = 0;
-
- u32 nb = be32_to_cpu(skspcl->req.completion.num_returned_bytes);
-
- sgp->status = skspcl->req.completion.status;
- resid = sksgio->dxfer_len - nb;
-
- sgp->masked_status = sgp->status & STATUS_MASK;
- sgp->msg_status = 0;
- sgp->host_status = 0;
- sgp->driver_status = 0;
- sgp->resid = resid;
- if (sgp->masked_status || sgp->host_status || sgp->driver_status)
- sgp->info |= SG_INFO_CHECK;
-
- pr_debug("%s:%s:%d status %x masked %x resid 0x%x\n",
- skdev->name, __func__, __LINE__,
- sgp->status, sgp->masked_status, sgp->resid);
-
- if (sgp->masked_status == SAM_STAT_CHECK_CONDITION) {
- if (sgp->mx_sb_len > 0) {
- struct fit_comp_error_info *ei = &skspcl->req.err_info;
- u32 nbytes = sizeof(*ei);
-
- nbytes = min_t(u32, nbytes, sgp->mx_sb_len);
-
- sgp->sb_len_wr = nbytes;
-
- if (__copy_to_user(sgp->sbp, ei, nbytes)) {
- pr_debug("%s:%s:%d copy_to_user sense failed %p\n",
- skdev->name, __func__, __LINE__,
- sgp->sbp);
- return -EFAULT;
- }
- }
- }
-
- if (__copy_to_user(sksgio->argp, sgp, sizeof(sg_io_hdr_t))) {
- pr_debug("%s:%s:%d copy_to_user sg failed %p\n",
- skdev->name, __func__, __LINE__, sksgio->argp);
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int skd_sg_io_release_skspcl(struct skd_device *skdev,
- struct skd_sg_io *sksgio)
-{
- struct skd_special_context *skspcl = sksgio->skspcl;
-
- if (skspcl != NULL) {
- ulong flags;
-
- sksgio->skspcl = NULL;
-
- spin_lock_irqsave(&skdev->lock, flags);
- skd_release_special(skdev, skspcl);
- spin_unlock_irqrestore(&skdev->lock, flags);
- }
-
- return 0;
-}
-
-/*
- *****************************************************************************
* INTERNAL REQUESTS -- generated by driver itself
*****************************************************************************
*/
@@ -1811,14 +884,15 @@ static int skd_format_internal_skspcl(struct skd_device *skdev)
uint64_t dma_address;
struct skd_scsi_request *scsi;
- fmh = (struct fit_msg_hdr *)&skspcl->msg_buf[0];
+ fmh = &skspcl->msg_buf->fmh;
fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
fmh->num_protocol_cmds_coalesced = 1;
- scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ scsi = &skspcl->msg_buf->scsi[0];
memset(scsi, 0, sizeof(*scsi));
dma_address = skspcl->req.sksg_dma_address;
scsi->hdr.sg_list_dma_address = cpu_to_be64(dma_address);
+ skspcl->req.n_sg = 1;
sgd->control = FIT_SGD_CONTROL_LAST;
sgd->byte_count = 0;
sgd->host_side_addr = skspcl->db_dma_address;
@@ -1846,11 +920,9 @@ static void skd_send_internal_skspcl(struct skd_device *skdev,
*/
return;
- SKD_ASSERT((skspcl->req.id & SKD_ID_INCR) == 0);
skspcl->req.state = SKD_REQ_STATE_BUSY;
- skspcl->req.id += SKD_ID_INCR;
- scsi = (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ scsi = &skspcl->msg_buf->scsi[0];
scsi->hdr.tag = skspcl->req.id;
memset(scsi->cdb, 0, sizeof(scsi->cdb));
@@ -1940,32 +1012,35 @@ static void skd_log_check_status(struct skd_device *skdev, u8 status, u8 key,
/* If the check condition is of special interest, log a message */
if ((status == SAM_STAT_CHECK_CONDITION) && (key == 0x02)
&& (code == 0x04) && (qual == 0x06)) {
- pr_err("(%s): *** LOST_WRITE_DATA ERROR *** key/asc/"
- "ascq/fruc %02x/%02x/%02x/%02x\n",
- skd_name(skdev), key, code, qual, fruc);
+ dev_err(&skdev->pdev->dev,
+ "*** LOST_WRITE_DATA ERROR *** key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
+ key, code, qual, fruc);
}
}
static void skd_complete_internal(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr,
struct skd_special_context *skspcl)
{
u8 *buf = skspcl->data_buf;
u8 status;
int i;
- struct skd_scsi_request *scsi =
- (struct skd_scsi_request *)&skspcl->msg_buf[64];
+ struct skd_scsi_request *scsi = &skspcl->msg_buf->scsi[0];
+
+ lockdep_assert_held(&skdev->lock);
SKD_ASSERT(skspcl == &skdev->internal_skspcl);
- pr_debug("%s:%s:%d complete internal %x\n",
- skdev->name, __func__, __LINE__, scsi->cdb[0]);
+ dev_dbg(&skdev->pdev->dev, "complete internal %x\n", scsi->cdb[0]);
+
+ dma_sync_single_for_cpu(&skdev->pdev->dev,
+ skspcl->db_dma_address,
+ skspcl->req.sksg_list[0].byte_count,
+ DMA_BIDIRECTIONAL);
skspcl->req.completion = *skcomp;
skspcl->req.state = SKD_REQ_STATE_IDLE;
- skspcl->req.id += SKD_ID_INCR;
status = skspcl->req.completion.status;
@@ -1981,14 +1056,15 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl, WRITE_BUFFER);
else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d TUR failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "TUR failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d **** TUR failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** TUR failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -1997,14 +1073,15 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl, READ_BUFFER);
else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d write buffer failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "write buffer failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d **** write buffer failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** write buffer failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -2014,33 +1091,31 @@ static void skd_complete_internal(struct skd_device *skdev,
skd_send_internal_skspcl(skdev, skspcl,
READ_CAPACITY);
else {
- pr_err(
- "(%s):*** W/R Buffer mismatch %d ***\n",
- skd_name(skdev), skdev->connect_retries);
+ dev_err(&skdev->pdev->dev,
+ "*** W/R Buffer mismatch %d ***\n",
+ skdev->connect_retries);
if (skdev->connect_retries <
SKD_MAX_CONNECT_RETRIES) {
skdev->connect_retries++;
skd_soft_reset(skdev);
} else {
- pr_err(
- "(%s): W/R Buffer Connect Error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev,
+ "W/R Buffer Connect Error\n");
return;
}
}
} else {
if (skdev->state == SKD_DRVR_STATE_STOPPING) {
- pr_debug("%s:%s:%d "
- "read buffer failed, don't send anymore state 0x%x\n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "read buffer failed, don't send anymore state 0x%x\n",
+ skdev->state);
return;
}
- pr_debug("%s:%s:%d "
- "**** read buffer failed, retry skerr\n",
- skdev->name, __func__, __LINE__);
- skd_send_internal_skspcl(skdev, skspcl, 0x00);
+ dev_dbg(&skdev->pdev->dev,
+ "**** read buffer failed, retry skerr\n");
+ skd_send_internal_skspcl(skdev, skspcl,
+ TEST_UNIT_READY);
}
break;
@@ -2054,10 +1129,9 @@ static void skd_complete_internal(struct skd_device *skdev,
(buf[4] << 24) | (buf[5] << 16) |
(buf[6] << 8) | buf[7];
- pr_debug("%s:%s:%d last lba %d, bs %d\n",
- skdev->name, __func__, __LINE__,
- skdev->read_cap_last_lba,
- skdev->read_cap_blocksize);
+ dev_dbg(&skdev->pdev->dev, "last lba %d, bs %d\n",
+ skdev->read_cap_last_lba,
+ skdev->read_cap_blocksize);
set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
@@ -2068,13 +1142,10 @@ static void skd_complete_internal(struct skd_device *skdev,
(skerr->key == MEDIUM_ERROR)) {
skdev->read_cap_last_lba = ~0;
set_capacity(skdev->disk, skdev->read_cap_last_lba + 1);
- pr_debug("%s:%s:%d "
- "**** MEDIUM ERROR caused READCAP to fail, ignore failure and continue to inquiry\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** MEDIUM ERROR caused READCAP to fail, ignore failure and continue to inquiry\n");
skd_send_internal_skspcl(skdev, skspcl, INQUIRY);
} else {
- pr_debug("%s:%s:%d **** READCAP failed, retry TUR\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** READCAP failed, retry TUR\n");
skd_send_internal_skspcl(skdev, skspcl,
TEST_UNIT_READY);
}
@@ -2091,8 +1162,7 @@ static void skd_complete_internal(struct skd_device *skdev,
}
if (skd_unquiesce_dev(skdev) < 0)
- pr_debug("%s:%s:%d **** failed, to ONLINE device\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** failed, to ONLINE device\n");
/* connection is complete */
skdev->connect_retries = 0;
break;
@@ -2120,27 +1190,20 @@ static void skd_send_fitmsg(struct skd_device *skdev,
struct skd_fitmsg_context *skmsg)
{
u64 qcmd;
- struct fit_msg_hdr *fmh;
- pr_debug("%s:%s:%d dma address 0x%llx, busy=%d\n",
- skdev->name, __func__, __LINE__,
- skmsg->mb_dma_address, skdev->in_flight);
- pr_debug("%s:%s:%d msg_buf 0x%p, offset %x\n",
- skdev->name, __func__, __LINE__,
- skmsg->msg_buf, skmsg->offset);
+ dev_dbg(&skdev->pdev->dev, "dma address 0x%llx, busy=%d\n",
+ skmsg->mb_dma_address, skd_in_flight(skdev));
+ dev_dbg(&skdev->pdev->dev, "msg_buf %p\n", skmsg->msg_buf);
qcmd = skmsg->mb_dma_address;
qcmd |= FIT_QCMD_QID_NORMAL;
- fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
- skmsg->outstanding = fmh->num_protocol_cmds_coalesced;
-
if (unlikely(skdev->dbg_level > 1)) {
u8 *bp = (u8 *)skmsg->msg_buf;
int i;
for (i = 0; i < skmsg->length; i += 8) {
- pr_debug("%s:%s:%d msg[%2d] %8ph\n",
- skdev->name, __func__, __LINE__, i, &bp[i]);
+ dev_dbg(&skdev->pdev->dev, "msg[%2d] %8ph\n", i,
+ &bp[i]);
if (i == 0)
i = 64 - 8;
}
@@ -2160,6 +1223,12 @@ static void skd_send_fitmsg(struct skd_device *skdev,
*/
qcmd |= FIT_QCMD_MSGSIZE_64;
+ dma_sync_single_for_device(&skdev->pdev->dev, skmsg->mb_dma_address,
+ skmsg->length, DMA_TO_DEVICE);
+
+ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
+ smp_wmb();
+
SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
}
@@ -2168,30 +1237,31 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
{
u64 qcmd;
+ WARN_ON_ONCE(skspcl->req.n_sg != 1);
+
if (unlikely(skdev->dbg_level > 1)) {
u8 *bp = (u8 *)skspcl->msg_buf;
int i;
for (i = 0; i < SKD_N_SPECIAL_FITMSG_BYTES; i += 8) {
- pr_debug("%s:%s:%d spcl[%2d] %8ph\n",
- skdev->name, __func__, __LINE__, i, &bp[i]);
+ dev_dbg(&skdev->pdev->dev, " spcl[%2d] %8ph\n", i,
+ &bp[i]);
if (i == 0)
i = 64 - 8;
}
- pr_debug("%s:%s:%d skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n",
- skdev->name, __func__, __LINE__,
- skspcl, skspcl->req.id, skspcl->req.sksg_list,
- skspcl->req.sksg_dma_address);
+ dev_dbg(&skdev->pdev->dev,
+ "skspcl=%p id=%04x sksg_list=%p sksg_dma=%llx\n",
+ skspcl, skspcl->req.id, skspcl->req.sksg_list,
+ skspcl->req.sksg_dma_address);
for (i = 0; i < skspcl->req.n_sg; i++) {
struct fit_sg_descriptor *sgd =
&skspcl->req.sksg_list[i];
- pr_debug("%s:%s:%d sg[%d] count=%u ctrl=0x%x "
- "addr=0x%llx next=0x%llx\n",
- skdev->name, __func__, __LINE__,
- i, sgd->byte_count, sgd->control,
- sgd->host_side_addr, sgd->next_desc_ptr);
+ dev_dbg(&skdev->pdev->dev,
+ " sg[%d] count=%u ctrl=0x%x addr=0x%llx next=0x%llx\n",
+ i, sgd->byte_count, sgd->control,
+ sgd->host_side_addr, sgd->next_desc_ptr);
}
}
@@ -2202,6 +1272,20 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
qcmd = skspcl->mb_dma_address;
qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
+ dma_sync_single_for_device(&skdev->pdev->dev, skspcl->mb_dma_address,
+ SKD_N_SPECIAL_FITMSG_BYTES, DMA_TO_DEVICE);
+ dma_sync_single_for_device(&skdev->pdev->dev,
+ skspcl->req.sksg_dma_address,
+ 1 * sizeof(struct fit_sg_descriptor),
+ DMA_TO_DEVICE);
+ dma_sync_single_for_device(&skdev->pdev->dev,
+ skspcl->db_dma_address,
+ skspcl->req.sksg_list[0].byte_count,
+ DMA_BIDIRECTIONAL);
+
+ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
+ smp_wmb();
+
SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
}
@@ -2212,8 +1296,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
*/
static void skd_complete_other(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr);
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr);
struct sns_info {
u8 type;
@@ -2262,21 +1346,20 @@ static struct sns_info skd_chkstat_table[] = {
static enum skd_check_status_action
skd_check_status(struct skd_device *skdev,
- u8 cmp_status, volatile struct fit_comp_error_info *skerr)
+ u8 cmp_status, struct fit_comp_error_info *skerr)
{
- int i, n;
+ int i;
- pr_err("(%s): key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
- skd_name(skdev), skerr->key, skerr->code, skerr->qual,
- skerr->fruc);
+ dev_err(&skdev->pdev->dev, "key/asc/ascq/fruc %02x/%02x/%02x/%02x\n",
+ skerr->key, skerr->code, skerr->qual, skerr->fruc);
- pr_debug("%s:%s:%d stat: t=%02x stat=%02x k=%02x c=%02x q=%02x fruc=%02x\n",
- skdev->name, __func__, __LINE__, skerr->type, cmp_status,
- skerr->key, skerr->code, skerr->qual, skerr->fruc);
+ dev_dbg(&skdev->pdev->dev,
+ "stat: t=%02x stat=%02x k=%02x c=%02x q=%02x fruc=%02x\n",
+ skerr->type, cmp_status, skerr->key, skerr->code, skerr->qual,
+ skerr->fruc);
/* Does the info match an entry in the good category? */
- n = sizeof(skd_chkstat_table) / sizeof(skd_chkstat_table[0]);
- for (i = 0; i < n; i++) {
+ for (i = 0; i < ARRAY_SIZE(skd_chkstat_table); i++) {
struct sns_info *sns = &skd_chkstat_table[i];
if (sns->mask & 0x10)
@@ -2300,10 +1383,9 @@ skd_check_status(struct skd_device *skdev,
continue;
if (sns->action == SKD_CHECK_STATUS_REPORT_SMART_ALERT) {
- pr_err("(%s): SMART Alert: sense key/asc/ascq "
- "%02x/%02x/%02x\n",
- skd_name(skdev), skerr->key,
- skerr->code, skerr->qual);
+ dev_err(&skdev->pdev->dev,
+ "SMART Alert: sense key/asc/ascq %02x/%02x/%02x\n",
+ skerr->key, skerr->code, skerr->qual);
}
return sns->action;
}
@@ -2312,335 +1394,80 @@ skd_check_status(struct skd_device *skdev,
* zero status means good
*/
if (cmp_status) {
- pr_debug("%s:%s:%d status check: error\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "status check: error\n");
return SKD_CHECK_STATUS_REPORT_ERROR;
}
- pr_debug("%s:%s:%d status check good default\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "status check good default\n");
return SKD_CHECK_STATUS_REPORT_GOOD;
}
static void skd_resolve_req_exception(struct skd_device *skdev,
- struct skd_request_context *skreq)
+ struct skd_request_context *skreq,
+ struct request *req)
{
u8 cmp_status = skreq->completion.status;
switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
case SKD_CHECK_STATUS_REPORT_GOOD:
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
- skd_end_request(skdev, skreq, BLK_STS_OK);
+ skreq->status = BLK_STS_OK;
+ blk_mq_complete_request(req);
break;
case SKD_CHECK_STATUS_BUSY_IMMINENT:
skd_log_skreq(skdev, skreq, "retry(busy)");
- blk_requeue_request(skdev->queue, skreq->req);
- pr_info("(%s) drive BUSY imminent\n", skd_name(skdev));
+ blk_requeue_request(skdev->queue, req);
+ dev_info(&skdev->pdev->dev, "drive BUSY imminent\n");
skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT;
skdev->timer_countdown = SKD_TIMER_MINUTES(20);
skd_quiesce_dev(skdev);
break;
case SKD_CHECK_STATUS_REQUEUE_REQUEST:
- if ((unsigned long) ++skreq->req->special < SKD_MAX_RETRIES) {
+ if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
skd_log_skreq(skdev, skreq, "retry");
- blk_requeue_request(skdev->queue, skreq->req);
+ blk_requeue_request(skdev->queue, req);
break;
}
- /* fall through to report error */
+ /* fall through */
case SKD_CHECK_STATUS_REPORT_ERROR:
default:
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
+ skreq->status = BLK_STS_IOERR;
+ blk_mq_complete_request(req);
break;
}
}
-/* assume spinlock is already held */
static void skd_release_skreq(struct skd_device *skdev,
struct skd_request_context *skreq)
{
- u32 msg_slot;
- struct skd_fitmsg_context *skmsg;
-
- u32 timo_slot;
-
- /*
- * Reclaim the FIT msg buffer if this is
- * the first of the requests it carried to
- * be completed. The FIT msg buffer used to
- * send this request cannot be reused until
- * we are sure the s1120 card has copied
- * it to its memory. The FIT msg might have
- * contained several requests. As soon as
- * any of them are completed we know that
- * the entire FIT msg was transferred.
- * Only the first completed request will
- * match the FIT msg buffer id. The FIT
- * msg buffer id is immediately updated.
- * When subsequent requests complete the FIT
- * msg buffer id won't match, so we know
- * quite cheaply that it is already done.
- */
- msg_slot = skreq->fitmsg_id & SKD_ID_SLOT_MASK;
- SKD_ASSERT(msg_slot < skdev->num_fitmsg_context);
-
- skmsg = &skdev->skmsg_table[msg_slot];
- if (skmsg->id == skreq->fitmsg_id) {
- SKD_ASSERT(skmsg->state == SKD_MSG_STATE_BUSY);
- SKD_ASSERT(skmsg->outstanding > 0);
- skmsg->outstanding--;
- if (skmsg->outstanding == 0) {
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- skmsg->next = skdev->skmsg_free_list;
- skdev->skmsg_free_list = skmsg;
- }
- }
-
- /*
- * Decrease the number of active requests.
- * Also decrements the count in the timeout slot.
- */
- SKD_ASSERT(skdev->in_flight > 0);
- skdev->in_flight -= 1;
-
- timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
- SKD_ASSERT(skdev->timeout_slot[timo_slot] > 0);
- skdev->timeout_slot[timo_slot] -= 1;
-
- /*
- * Reset backpointer
- */
- skreq->req = NULL;
-
/*
* Reclaim the skd_request_context
*/
skreq->state = SKD_REQ_STATE_IDLE;
- skreq->id += SKD_ID_INCR;
- skreq->next = skdev->skreq_free_list;
- skdev->skreq_free_list = skreq;
}
-#define DRIVER_INQ_EVPD_PAGE_CODE 0xDA
-
-static void skd_do_inq_page_00(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- uint16_t insert_pt, max_bytes, drive_pages, drive_bytes, new_size;
-
- /* Caller requested "supported pages". The driver needs to insert
- * its page.
- */
- pr_debug("%s:%s:%d skd_do_driver_inquiry: modify supported pages.\n",
- skdev->name, __func__, __LINE__);
-
- /* If the device rejected the request because the CDB was
- * improperly formed, then just leave.
- */
- if (skcomp->status == SAM_STAT_CHECK_CONDITION &&
- skerr->key == ILLEGAL_REQUEST && skerr->code == 0x24)
- return;
-
- /* Get the amount of space the caller allocated */
- max_bytes = (cdb[3] << 8) | cdb[4];
-
- /* Get the number of pages actually returned by the device */
- drive_pages = (buf[2] << 8) | buf[3];
- drive_bytes = drive_pages + 4;
- new_size = drive_pages + 1;
-
- /* Supported pages must be in numerical order, so find where
- * the driver page needs to be inserted into the list of
- * pages returned by the device.
- */
- for (insert_pt = 4; insert_pt < drive_bytes; insert_pt++) {
- if (buf[insert_pt] == DRIVER_INQ_EVPD_PAGE_CODE)
- return; /* Device using this page code. abort */
- else if (buf[insert_pt] > DRIVER_INQ_EVPD_PAGE_CODE)
- break;
- }
-
- if (insert_pt < max_bytes) {
- uint16_t u;
-
- /* Shift everything up one byte to make room. */
- for (u = new_size + 3; u > insert_pt; u--)
- buf[u] = buf[u - 1];
- buf[insert_pt] = DRIVER_INQ_EVPD_PAGE_CODE;
-
- /* SCSI byte order increment of num_returned_bytes by 1 */
- skcomp->num_returned_bytes =
- be32_to_cpu(skcomp->num_returned_bytes) + 1;
- skcomp->num_returned_bytes =
- be32_to_cpu(skcomp->num_returned_bytes);
- }
-
- /* update page length field to reflect the driver's page too */
- buf[2] = (uint8_t)((new_size >> 8) & 0xFF);
- buf[3] = (uint8_t)((new_size >> 0) & 0xFF);
-}
-
-static void skd_get_link_info(struct pci_dev *pdev, u8 *speed, u8 *width)
-{
- int pcie_reg;
- u16 pci_bus_speed;
- u8 pci_lanes;
-
- pcie_reg = pci_find_capability(pdev, PCI_CAP_ID_EXP);
- if (pcie_reg) {
- u16 linksta;
- pci_read_config_word(pdev, pcie_reg + PCI_EXP_LNKSTA, &linksta);
-
- pci_bus_speed = linksta & 0xF;
- pci_lanes = (linksta & 0x3F0) >> 4;
- } else {
- *speed = STEC_LINK_UNKNOWN;
- *width = 0xFF;
- return;
- }
-
- switch (pci_bus_speed) {
- case 1:
- *speed = STEC_LINK_2_5GTS;
- break;
- case 2:
- *speed = STEC_LINK_5GTS;
- break;
- case 3:
- *speed = STEC_LINK_8GTS;
- break;
- default:
- *speed = STEC_LINK_UNKNOWN;
- break;
- }
-
- if (pci_lanes <= 0x20)
- *width = pci_lanes;
- else
- *width = 0xFF;
-}
-
-static void skd_do_inq_page_da(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- struct pci_dev *pdev = skdev->pdev;
- unsigned max_bytes;
- struct driver_inquiry_data inq;
- u16 val;
-
- pr_debug("%s:%s:%d skd_do_driver_inquiry: return driver page\n",
- skdev->name, __func__, __LINE__);
-
- memset(&inq, 0, sizeof(inq));
-
- inq.page_code = DRIVER_INQ_EVPD_PAGE_CODE;
-
- skd_get_link_info(pdev, &inq.pcie_link_speed, &inq.pcie_link_lanes);
- inq.pcie_bus_number = cpu_to_be16(pdev->bus->number);
- inq.pcie_device_number = PCI_SLOT(pdev->devfn);
- inq.pcie_function_number = PCI_FUNC(pdev->devfn);
-
- pci_read_config_word(pdev, PCI_VENDOR_ID, &val);
- inq.pcie_vendor_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_DEVICE_ID, &val);
- inq.pcie_device_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_SUBSYSTEM_VENDOR_ID, &val);
- inq.pcie_subsystem_vendor_id = cpu_to_be16(val);
-
- pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &val);
- inq.pcie_subsystem_device_id = cpu_to_be16(val);
-
- /* Driver version, fixed lenth, padded with spaces on the right */
- inq.driver_version_length = sizeof(inq.driver_version);
- memset(&inq.driver_version, ' ', sizeof(inq.driver_version));
- memcpy(inq.driver_version, DRV_VER_COMPL,
- min(sizeof(inq.driver_version), strlen(DRV_VER_COMPL)));
-
- inq.page_length = cpu_to_be16((sizeof(inq) - 4));
-
- /* Clear the error set by the device */
- skcomp->status = SAM_STAT_GOOD;
- memset((void *)skerr, 0, sizeof(*skerr));
-
- /* copy response into output buffer */
- max_bytes = (cdb[3] << 8) | cdb[4];
- memcpy(buf, &inq, min_t(unsigned, max_bytes, sizeof(inq)));
-
- skcomp->num_returned_bytes =
- be32_to_cpu(min_t(uint16_t, max_bytes, sizeof(inq)));
-}
-
-static void skd_do_driver_inq(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr,
- uint8_t *cdb, uint8_t *buf)
-{
- if (!buf)
- return;
- else if (cdb[0] != INQUIRY)
- return; /* Not an INQUIRY */
- else if ((cdb[1] & 1) == 0)
- return; /* EVPD not set */
- else if (cdb[2] == 0)
- /* Need to add driver's page to supported pages list */
- skd_do_inq_page_00(skdev, skcomp, skerr, cdb, buf);
- else if (cdb[2] == DRIVER_INQ_EVPD_PAGE_CODE)
- /* Caller requested driver's page */
- skd_do_inq_page_da(skdev, skcomp, skerr, cdb, buf);
-}
-
-static unsigned char *skd_sg_1st_page_ptr(struct scatterlist *sg)
-{
- if (!sg)
- return NULL;
- if (!sg_page(sg))
- return NULL;
- return sg_virt(sg);
-}
-
-static void skd_process_scsi_inq(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl)
-{
- uint8_t *buf;
- struct fit_msg_hdr *fmh = (struct fit_msg_hdr *)skspcl->msg_buf;
- struct skd_scsi_request *scsi_req = (struct skd_scsi_request *)&fmh[1];
-
- dma_sync_sg_for_cpu(skdev->class_dev, skspcl->req.sg, skspcl->req.n_sg,
- skspcl->req.sg_data_dir);
- buf = skd_sg_1st_page_ptr(skspcl->req.sg);
-
- if (buf)
- skd_do_driver_inq(skdev, skcomp, skerr, scsi_req->cdb, buf);
-}
-
-
static int skd_isr_completion_posted(struct skd_device *skdev,
int limit, int *enqueued)
{
- volatile struct fit_completion_entry_v1 *skcmp = NULL;
- volatile struct fit_comp_error_info *skerr;
+ struct fit_completion_entry_v1 *skcmp;
+ struct fit_comp_error_info *skerr;
u16 req_id;
- u32 req_slot;
+ u32 tag;
+ u16 hwq = 0;
+ struct request *rq;
struct skd_request_context *skreq;
- u16 cmp_cntxt = 0;
- u8 cmp_status = 0;
- u8 cmp_cycle = 0;
- u32 cmp_bytes = 0;
+ u16 cmp_cntxt;
+ u8 cmp_status;
+ u8 cmp_cycle;
+ u32 cmp_bytes;
int rc = 0;
int processed = 0;
+ lockdep_assert_held(&skdev->lock);
+
for (;; ) {
SKD_ASSERT(skdev->skcomp_ix < SKD_N_COMPLETION_ENTRY);
@@ -2652,16 +1479,14 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
skerr = &skdev->skerr_table[skdev->skcomp_ix];
- pr_debug("%s:%s:%d "
- "cycle=%d ix=%d got cycle=%d cmdctxt=0x%x stat=%d "
- "busy=%d rbytes=0x%x proto=%d\n",
- skdev->name, __func__, __LINE__, skdev->skcomp_cycle,
- skdev->skcomp_ix, cmp_cycle, cmp_cntxt, cmp_status,
- skdev->in_flight, cmp_bytes, skdev->proto_ver);
+ dev_dbg(&skdev->pdev->dev,
+ "cycle=%d ix=%d got cycle=%d cmdctxt=0x%x stat=%d busy=%d rbytes=0x%x proto=%d\n",
+ skdev->skcomp_cycle, skdev->skcomp_ix, cmp_cycle,
+ cmp_cntxt, cmp_status, skd_in_flight(skdev),
+ cmp_bytes, skdev->proto_ver);
if (cmp_cycle != skdev->skcomp_cycle) {
- pr_debug("%s:%s:%d end of completions\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "end of completions\n");
break;
}
/*
@@ -2680,49 +1505,38 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
* r/w request (see skd_start() above) or a special request.
*/
req_id = cmp_cntxt;
- req_slot = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
+ tag = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
/* Is this other than a r/w request? */
- if (req_slot >= skdev->num_req_context) {
+ if (tag >= skdev->num_req_context) {
/*
* This is not a completion for a r/w request.
*/
+ WARN_ON_ONCE(blk_mq_tag_to_rq(skdev->tag_set.tags[hwq],
+ tag));
skd_complete_other(skdev, skcmp, skerr);
continue;
}
- skreq = &skdev->skreq_table[req_slot];
+ rq = blk_mq_tag_to_rq(skdev->tag_set.tags[hwq], tag);
+ if (WARN(!rq, "No request for tag %#x -> %#x\n", cmp_cntxt,
+ tag))
+ continue;
+ skreq = blk_mq_rq_to_pdu(rq);
/*
* Make sure the request ID for the slot matches.
*/
if (skreq->id != req_id) {
- pr_debug("%s:%s:%d mismatch comp_id=0x%x req_id=0x%x\n",
- skdev->name, __func__, __LINE__,
- req_id, skreq->id);
- {
- u16 new_id = cmp_cntxt;
- pr_err("(%s): Completion mismatch "
- "comp_id=0x%04x skreq=0x%04x new=0x%04x\n",
- skd_name(skdev), req_id,
- skreq->id, new_id);
+ dev_err(&skdev->pdev->dev,
+ "Completion mismatch comp_id=0x%04x skreq=0x%04x new=0x%04x\n",
+ req_id, skreq->id, cmp_cntxt);
- continue;
- }
+ continue;
}
SKD_ASSERT(skreq->state == SKD_REQ_STATE_BUSY);
- if (skreq->state == SKD_REQ_STATE_ABORTED) {
- pr_debug("%s:%s:%d reclaim req %p id=%04x\n",
- skdev->name, __func__, __LINE__,
- skreq, skreq->id);
- /* a previously timed out command can
- * now be cleaned up */
- skd_release_skreq(skdev, skreq);
- continue;
- }
-
skreq->completion = *skcmp;
if (unlikely(cmp_status == SAM_STAT_CHECK_CONDITION)) {
skreq->err_info = *skerr;
@@ -2734,27 +1548,17 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
if (skreq->n_sg > 0)
skd_postop_sg_list(skdev, skreq);
- if (!skreq->req) {
- pr_debug("%s:%s:%d NULL backptr skdreq %p, "
- "req=0x%x req_id=0x%x\n",
- skdev->name, __func__, __LINE__,
- skreq, skreq->id, req_id);
- } else {
- /*
- * Capture the outcome and post it back to the
- * native request.
- */
- if (likely(cmp_status == SAM_STAT_GOOD))
- skd_end_request(skdev, skreq, BLK_STS_OK);
- else
- skd_resolve_req_exception(skdev, skreq);
- }
+ skd_release_skreq(skdev, skreq);
/*
- * Release the skreq, its FIT msg (if one), timeout slot,
- * and queue depth.
+ * Capture the outcome and post it back to the native request.
*/
- skd_release_skreq(skdev, skreq);
+ if (likely(cmp_status == SAM_STAT_GOOD)) {
+ skreq->status = BLK_STS_OK;
+ blk_mq_complete_request(rq);
+ } else {
+ skd_resolve_req_exception(skdev, skreq, rq);
+ }
/* skd_isr_comp_limit equal zero means no limit */
if (limit) {
@@ -2765,8 +1569,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
}
}
- if ((skdev->state == SKD_DRVR_STATE_PAUSING)
- && (skdev->in_flight) == 0) {
+ if (skdev->state == SKD_DRVR_STATE_PAUSING &&
+ skd_in_flight(skdev) == 0) {
skdev->state = SKD_DRVR_STATE_PAUSED;
wake_up_interruptible(&skdev->waitq);
}
@@ -2775,21 +1579,22 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
}
static void skd_complete_other(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1 *skcomp,
- volatile struct fit_comp_error_info *skerr)
+ struct fit_completion_entry_v1 *skcomp,
+ struct fit_comp_error_info *skerr)
{
u32 req_id = 0;
u32 req_table;
u32 req_slot;
struct skd_special_context *skspcl;
+ lockdep_assert_held(&skdev->lock);
+
req_id = skcomp->tag;
req_table = req_id & SKD_ID_TABLE_MASK;
req_slot = req_id & SKD_ID_SLOT_MASK;
- pr_debug("%s:%s:%d table=0x%x id=0x%x slot=%d\n",
- skdev->name, __func__, __LINE__,
- req_table, req_id, req_slot);
+ dev_dbg(&skdev->pdev->dev, "table=0x%x id=0x%x slot=%d\n", req_table,
+ req_id, req_slot);
/*
* Based on the request id, determine how to dispatch this completion.
@@ -2799,28 +1604,12 @@ static void skd_complete_other(struct skd_device *skdev,
switch (req_table) {
case SKD_ID_RW_REQUEST:
/*
- * The caller, skd_completion_posted_isr() above,
+ * The caller, skd_isr_completion_posted() above,
* handles r/w requests. The only way we get here
* is if the req_slot is out of bounds.
*/
break;
- case SKD_ID_SPECIAL_REQUEST:
- /*
- * Make sure the req_slot is in bounds and that the id
- * matches.
- */
- if (req_slot < skdev->n_special) {
- skspcl = &skdev->skspcl_table[req_slot];
- if (skspcl->req.id == req_id &&
- skspcl->req.state == SKD_REQ_STATE_BUSY) {
- skd_complete_special(skdev,
- skcomp, skerr, skspcl);
- return;
- }
- }
- break;
-
case SKD_ID_INTERNAL:
if (req_slot == 0) {
skspcl = &skdev->internal_skspcl;
@@ -2851,72 +1640,9 @@ static void skd_complete_other(struct skd_device *skdev,
*/
}
-static void skd_complete_special(struct skd_device *skdev,
- volatile struct fit_completion_entry_v1
- *skcomp,
- volatile struct fit_comp_error_info *skerr,
- struct skd_special_context *skspcl)
-{
- pr_debug("%s:%s:%d completing special request %p\n",
- skdev->name, __func__, __LINE__, skspcl);
- if (skspcl->orphaned) {
- /* Discard orphaned request */
- /* ?: Can this release directly or does it need
- * to use a worker? */
- pr_debug("%s:%s:%d release orphaned %p\n",
- skdev->name, __func__, __LINE__, skspcl);
- skd_release_special(skdev, skspcl);
- return;
- }
-
- skd_process_scsi_inq(skdev, skcomp, skerr, skspcl);
-
- skspcl->req.state = SKD_REQ_STATE_COMPLETED;
- skspcl->req.completion = *skcomp;
- skspcl->req.err_info = *skerr;
-
- skd_log_check_status(skdev, skspcl->req.completion.status, skerr->key,
- skerr->code, skerr->qual, skerr->fruc);
-
- wake_up_interruptible(&skdev->waitq);
-}
-
-/* assume spinlock is already held */
-static void skd_release_special(struct skd_device *skdev,
- struct skd_special_context *skspcl)
-{
- int i, was_depleted;
-
- for (i = 0; i < skspcl->req.n_sg; i++) {
- struct page *page = sg_page(&skspcl->req.sg[i]);
- __free_page(page);
- }
-
- was_depleted = (skdev->skspcl_free_list == NULL);
-
- skspcl->req.state = SKD_REQ_STATE_IDLE;
- skspcl->req.id += SKD_ID_INCR;
- skspcl->req.next =
- (struct skd_request_context *)skdev->skspcl_free_list;
- skdev->skspcl_free_list = (struct skd_special_context *)skspcl;
-
- if (was_depleted) {
- pr_debug("%s:%s:%d skspcl was depleted\n",
- skdev->name, __func__, __LINE__);
- /* Free list was depleted. Their might be waiters. */
- wake_up_interruptible(&skdev->waitq);
- }
-}
-
static void skd_reset_skcomp(struct skd_device *skdev)
{
- u32 nbytes;
- struct fit_completion_entry_v1 *skcomp;
-
- nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
- nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
-
- memset(skdev->skcomp_table, 0, nbytes);
+ memset(skdev->skcomp_table, 0, SKD_SKCOMP_SIZE);
skdev->skcomp_ix = 0;
skdev->skcomp_cycle = 1;
@@ -2941,7 +1667,7 @@ static void skd_completion_worker(struct work_struct *work)
* process everything in compq
*/
skd_isr_completion_posted(skdev, 0, &flush_enqueued);
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock_irqrestore(&skdev->lock, flags);
}
@@ -2951,14 +1677,13 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev);
static irqreturn_t
skd_isr(int irq, void *ptr)
{
- struct skd_device *skdev;
+ struct skd_device *skdev = ptr;
u32 intstat;
u32 ack;
int rc = 0;
int deferred = 0;
int flush_enqueued = 0;
- skdev = (struct skd_device *)ptr;
spin_lock(&skdev->lock);
for (;; ) {
@@ -2967,8 +1692,8 @@ skd_isr(int irq, void *ptr)
ack = FIT_INT_DEF_MASK;
ack &= intstat;
- pr_debug("%s:%s:%d intstat=0x%x ack=0x%x\n",
- skdev->name, __func__, __LINE__, intstat, ack);
+ dev_dbg(&skdev->pdev->dev, "intstat=0x%x ack=0x%x\n", intstat,
+ ack);
/* As long as there is an int pending on device, keep
* running loop. When none, get out, but if we've never
@@ -3018,12 +1743,12 @@ skd_isr(int irq, void *ptr)
}
if (unlikely(flush_enqueued))
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
if (deferred)
schedule_work(&skdev->completion_worker);
else if (!flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock(&skdev->lock);
@@ -3033,13 +1758,13 @@ skd_isr(int irq, void *ptr)
static void skd_drive_fault(struct skd_device *skdev)
{
skdev->state = SKD_DRVR_STATE_FAULT;
- pr_err("(%s): Drive FAULT\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive FAULT\n");
}
static void skd_drive_disappeared(struct skd_device *skdev)
{
skdev->state = SKD_DRVR_STATE_DISAPPEARED;
- pr_err("(%s): Drive DISAPPEARED\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive DISAPPEARED\n");
}
static void skd_isr_fwstate(struct skd_device *skdev)
@@ -3052,10 +1777,9 @@ static void skd_isr_fwstate(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_STATUS);
state = sense & FIT_SR_DRIVE_STATE_MASK;
- pr_err("(%s): s1120 state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
- skd_drive_state_to_str(state), state);
+ dev_err(&skdev->pdev->dev, "s1120 state %s(%d)=>%s(%d)\n",
+ skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
+ skd_drive_state_to_str(state), state);
skdev->drive_state = state;
@@ -3066,7 +1790,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
break;
}
if (skdev->state == SKD_DRVR_STATE_RESTARTING)
- skd_recover_requests(skdev, 0);
+ skd_recover_requests(skdev);
if (skdev->state == SKD_DRVR_STATE_WAIT_BOOT) {
skdev->timer_countdown = SKD_STARTING_TIMO;
skdev->state = SKD_DRVR_STATE_STARTING;
@@ -3087,11 +1811,11 @@ static void skd_isr_fwstate(struct skd_device *skdev)
skdev->cur_max_queue_depth * 2 / 3 + 1;
if (skdev->queue_low_water_mark < 1)
skdev->queue_low_water_mark = 1;
- pr_info(
- "(%s): Queue depth limit=%d dev=%d lowat=%d\n",
- skd_name(skdev),
- skdev->cur_max_queue_depth,
- skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
+ dev_info(&skdev->pdev->dev,
+ "Queue depth limit=%d dev=%d lowat=%d\n",
+ skdev->cur_max_queue_depth,
+ skdev->dev_max_queue_depth,
+ skdev->queue_low_water_mark);
skd_refresh_device_data(skdev);
break;
@@ -3107,7 +1831,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
*/
skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
skdev->timer_countdown = SKD_TIMER_SECONDS(3);
- blk_start_queue(skdev->queue);
+ schedule_work(&skdev->start_queue);
break;
case FIT_SR_DRIVE_BUSY_ERASE:
skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
@@ -3128,8 +1852,7 @@ static void skd_isr_fwstate(struct skd_device *skdev)
}
break;
case FIT_SR_DRIVE_FW_BOOTING:
- pr_debug("%s:%s:%d ISR FIT_SR_DRIVE_FW_BOOTING %s\n",
- skdev->name, __func__, __LINE__, skdev->name);
+ dev_dbg(&skdev->pdev->dev, "ISR FIT_SR_DRIVE_FW_BOOTING\n");
skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
break;
@@ -3141,17 +1864,17 @@ static void skd_isr_fwstate(struct skd_device *skdev)
case FIT_SR_DRIVE_FAULT:
skd_drive_fault(skdev);
- skd_recover_requests(skdev, 0);
- blk_start_queue(skdev->queue);
+ skd_recover_requests(skdev);
+ schedule_work(&skdev->start_queue);
break;
/* PCIe bus returned all Fs? */
case 0xFF:
- pr_info("(%s): state=0x%x sense=0x%x\n",
- skd_name(skdev), state, sense);
+ dev_info(&skdev->pdev->dev, "state=0x%x sense=0x%x\n", state,
+ sense);
skd_drive_disappeared(skdev);
- skd_recover_requests(skdev, 0);
- blk_start_queue(skdev->queue);
+ skd_recover_requests(skdev);
+ schedule_work(&skdev->start_queue);
break;
default:
/*
@@ -3159,92 +1882,33 @@ static void skd_isr_fwstate(struct skd_device *skdev)
*/
break;
}
- pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_skdev_state_to_str(prev_driver_state), prev_driver_state,
- skd_skdev_state_to_str(skdev->state), skdev->state);
+ dev_err(&skdev->pdev->dev, "Driver state %s(%d)=>%s(%d)\n",
+ skd_skdev_state_to_str(prev_driver_state), prev_driver_state,
+ skd_skdev_state_to_str(skdev->state), skdev->state);
}
-static void skd_recover_requests(struct skd_device *skdev, int requeue)
+static void skd_recover_request(struct request *req, void *data, bool reserved)
{
- int i;
+ struct skd_device *const skdev = data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq = &skdev->skreq_table[i];
+ if (skreq->state != SKD_REQ_STATE_BUSY)
+ return;
- if (skreq->state == SKD_REQ_STATE_BUSY) {
- skd_log_skreq(skdev, skreq, "recover");
+ skd_log_skreq(skdev, skreq, "recover");
- SKD_ASSERT((skreq->id & SKD_ID_INCR) != 0);
- SKD_ASSERT(skreq->req != NULL);
+ /* Release DMA resources for the request. */
+ if (skreq->n_sg > 0)
+ skd_postop_sg_list(skdev, skreq);
- /* Release DMA resources for the request. */
- if (skreq->n_sg > 0)
- skd_postop_sg_list(skdev, skreq);
+ skreq->state = SKD_REQ_STATE_IDLE;
+ skreq->status = BLK_STS_IOERR;
+ blk_mq_complete_request(req);
+}
- if (requeue &&
- (unsigned long) ++skreq->req->special <
- SKD_MAX_RETRIES)
- blk_requeue_request(skdev->queue, skreq->req);
- else
- skd_end_request(skdev, skreq, BLK_STS_IOERR);
-
- skreq->req = NULL;
-
- skreq->state = SKD_REQ_STATE_IDLE;
- skreq->id += SKD_ID_INCR;
- }
- if (i > 0)
- skreq[-1].next = skreq;
- skreq->next = NULL;
- }
- skdev->skreq_free_list = skdev->skreq_table;
-
- for (i = 0; i < skdev->num_fitmsg_context; i++) {
- struct skd_fitmsg_context *skmsg = &skdev->skmsg_table[i];
-
- if (skmsg->state == SKD_MSG_STATE_BUSY) {
- skd_log_skmsg(skdev, skmsg, "salvaged");
- SKD_ASSERT((skmsg->id & SKD_ID_INCR) != 0);
- skmsg->state = SKD_MSG_STATE_IDLE;
- skmsg->id += SKD_ID_INCR;
- }
- if (i > 0)
- skmsg[-1].next = skmsg;
- skmsg->next = NULL;
- }
- skdev->skmsg_free_list = skdev->skmsg_table;
-
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl = &skdev->skspcl_table[i];
-
- /* If orphaned, reclaim it because it has already been reported
- * to the process as an error (it was just waiting for
- * a completion that didn't come, and now it will never come)
- * If busy, change to a state that will cause it to error
- * out in the wait routine and let it do the normal
- * reporting and reclaiming
- */
- if (skspcl->req.state == SKD_REQ_STATE_BUSY) {
- if (skspcl->orphaned) {
- pr_debug("%s:%s:%d orphaned %p\n",
- skdev->name, __func__, __LINE__,
- skspcl);
- skd_release_special(skdev, skspcl);
- } else {
- pr_debug("%s:%s:%d not orphaned %p\n",
- skdev->name, __func__, __LINE__,
- skspcl);
- skspcl->req.state = SKD_REQ_STATE_ABORTED;
- }
- }
- }
- skdev->skspcl_free_list = skdev->skspcl_table;
-
- for (i = 0; i < SKD_N_TIMEOUT_SLOT; i++)
- skdev->timeout_slot[i] = 0;
-
- skdev->in_flight = 0;
+static void skd_recover_requests(struct skd_device *skdev)
+{
+ blk_mq_tagset_busy_iter(&skdev->tag_set, skd_recover_request, skdev);
}
static void skd_isr_msg_from_dev(struct skd_device *skdev)
@@ -3255,8 +1919,8 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
mfd = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
- pr_debug("%s:%s:%d mfd=0x%x last_mtd=0x%x\n",
- skdev->name, __func__, __LINE__, mfd, skdev->last_mtd);
+ dev_dbg(&skdev->pdev->dev, "mfd=0x%x last_mtd=0x%x\n", mfd,
+ skdev->last_mtd);
/* ignore any mtd that is an ack for something we didn't send */
if (FIT_MXD_TYPE(mfd) != FIT_MXD_TYPE(skdev->last_mtd))
@@ -3267,13 +1931,10 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
skdev->proto_ver = FIT_PROTOCOL_MAJOR_VER(mfd);
if (skdev->proto_ver != FIT_PROTOCOL_VERSION_1) {
- pr_err("(%s): protocol mismatch\n",
- skdev->name);
- pr_err("(%s): got=%d support=%d\n",
- skdev->name, skdev->proto_ver,
- FIT_PROTOCOL_VERSION_1);
- pr_err("(%s): please upgrade driver\n",
- skdev->name);
+ dev_err(&skdev->pdev->dev, "protocol mismatch\n");
+ dev_err(&skdev->pdev->dev, " got=%d support=%d\n",
+ skdev->proto_ver, FIT_PROTOCOL_VERSION_1);
+ dev_err(&skdev->pdev->dev, " please upgrade driver\n");
skdev->state = SKD_DRVR_STATE_PROTOCOL_MISMATCH;
skd_soft_reset(skdev);
break;
@@ -3327,9 +1988,8 @@ static void skd_isr_msg_from_dev(struct skd_device *skdev)
SKD_WRITEL(skdev, mtd, FIT_MSG_TO_DEVICE);
skdev->last_mtd = mtd;
- pr_err("(%s): Time sync driver=0x%x device=0x%x\n",
- skd_name(skdev),
- skdev->connect_time_stamp, skdev->drive_jiffies);
+ dev_err(&skdev->pdev->dev, "Time sync driver=0x%x device=0x%x\n",
+ skdev->connect_time_stamp, skdev->drive_jiffies);
break;
case FIT_MTD_ARM_QUEUE:
@@ -3351,8 +2011,7 @@ static void skd_disable_interrupts(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_CONTROL);
sense &= ~FIT_CR_ENABLE_INTERRUPTS;
SKD_WRITEL(skdev, sense, FIT_CONTROL);
- pr_debug("%s:%s:%d sense 0x%x\n",
- skdev->name, __func__, __LINE__, sense);
+ dev_dbg(&skdev->pdev->dev, "sense 0x%x\n", sense);
/* Note that the 1s is written. A 1-bit means
* disable, a 0 means enable.
@@ -3371,13 +2030,11 @@ static void skd_enable_interrupts(struct skd_device *skdev)
/* Note that the compliment of mask is written. A 1-bit means
* disable, a 0 means enable. */
SKD_WRITEL(skdev, ~val, FIT_INT_MASK_HOST);
- pr_debug("%s:%s:%d interrupt mask=0x%x\n",
- skdev->name, __func__, __LINE__, ~val);
+ dev_dbg(&skdev->pdev->dev, "interrupt mask=0x%x\n", ~val);
val = SKD_READL(skdev, FIT_CONTROL);
val |= FIT_CR_ENABLE_INTERRUPTS;
- pr_debug("%s:%s:%d control=0x%x\n",
- skdev->name, __func__, __LINE__, val);
+ dev_dbg(&skdev->pdev->dev, "control=0x%x\n", val);
SKD_WRITEL(skdev, val, FIT_CONTROL);
}
@@ -3393,8 +2050,7 @@ static void skd_soft_reset(struct skd_device *skdev)
val = SKD_READL(skdev, FIT_CONTROL);
val |= (FIT_CR_SOFT_RESET);
- pr_debug("%s:%s:%d control=0x%x\n",
- skdev->name, __func__, __LINE__, val);
+ dev_dbg(&skdev->pdev->dev, "control=0x%x\n", val);
SKD_WRITEL(skdev, val, FIT_CONTROL);
}
@@ -3411,8 +2067,7 @@ static void skd_start_device(struct skd_device *skdev)
sense = SKD_READL(skdev, FIT_STATUS);
- pr_debug("%s:%s:%d initial status=0x%x\n",
- skdev->name, __func__, __LINE__, sense);
+ dev_dbg(&skdev->pdev->dev, "initial status=0x%x\n", sense);
state = sense & FIT_SR_DRIVE_STATE_MASK;
skdev->drive_state = state;
@@ -3425,25 +2080,23 @@ static void skd_start_device(struct skd_device *skdev)
switch (skdev->drive_state) {
case FIT_SR_DRIVE_OFFLINE:
- pr_err("(%s): Drive offline...\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive offline...\n");
break;
case FIT_SR_DRIVE_FW_BOOTING:
- pr_debug("%s:%s:%d FIT_SR_DRIVE_FW_BOOTING %s\n",
- skdev->name, __func__, __LINE__, skdev->name);
+ dev_dbg(&skdev->pdev->dev, "FIT_SR_DRIVE_FW_BOOTING\n");
skdev->state = SKD_DRVR_STATE_WAIT_BOOT;
skdev->timer_countdown = SKD_WAIT_BOOT_TIMO;
break;
case FIT_SR_DRIVE_BUSY_SANITIZE:
- pr_info("(%s): Start: BUSY_SANITIZE\n",
- skd_name(skdev));
+ dev_info(&skdev->pdev->dev, "Start: BUSY_SANITIZE\n");
skdev->state = SKD_DRVR_STATE_BUSY_SANITIZE;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
case FIT_SR_DRIVE_BUSY_ERASE:
- pr_info("(%s): Start: BUSY_ERASE\n", skd_name(skdev));
+ dev_info(&skdev->pdev->dev, "Start: BUSY_ERASE\n");
skdev->state = SKD_DRVR_STATE_BUSY_ERASE;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
@@ -3454,14 +2107,13 @@ static void skd_start_device(struct skd_device *skdev)
break;
case FIT_SR_DRIVE_BUSY:
- pr_err("(%s): Drive Busy...\n", skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "Drive Busy...\n");
skdev->state = SKD_DRVR_STATE_BUSY;
skdev->timer_countdown = SKD_STARTED_BUSY_TIMO;
break;
case FIT_SR_DRIVE_SOFT_RESET:
- pr_err("(%s) drive soft reset in prog\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "drive soft reset in prog\n");
break;
case FIT_SR_DRIVE_FAULT:
@@ -3471,9 +2123,8 @@ static void skd_start_device(struct skd_device *skdev)
*/
skd_drive_fault(skdev);
/*start the queue so we can respond with error to requests */
- pr_debug("%s:%s:%d starting %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_start_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "starting queue\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
@@ -3483,38 +2134,33 @@ static void skd_start_device(struct skd_device *skdev)
* to the BAR1 addresses. */
skd_drive_disappeared(skdev);
/*start the queue so we can respond with error to requests */
- pr_debug("%s:%s:%d starting %s queue to error-out reqs\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_start_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev,
+ "starting queue to error-out reqs\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = -1;
wake_up_interruptible(&skdev->waitq);
break;
default:
- pr_err("(%s) Start: unknown state %x\n",
- skd_name(skdev), skdev->drive_state);
+ dev_err(&skdev->pdev->dev, "Start: unknown state %x\n",
+ skdev->drive_state);
break;
}
state = SKD_READL(skdev, FIT_CONTROL);
- pr_debug("%s:%s:%d FIT Control Status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "FIT Control Status=0x%x\n", state);
state = SKD_READL(skdev, FIT_INT_STATUS_HOST);
- pr_debug("%s:%s:%d Intr Status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Intr Status=0x%x\n", state);
state = SKD_READL(skdev, FIT_INT_MASK_HOST);
- pr_debug("%s:%s:%d Intr Mask=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Intr Mask=0x%x\n", state);
state = SKD_READL(skdev, FIT_MSG_FROM_DEVICE);
- pr_debug("%s:%s:%d Msg from Dev=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "Msg from Dev=0x%x\n", state);
state = SKD_READL(skdev, FIT_HW_VERSION);
- pr_debug("%s:%s:%d HW version=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "HW version=0x%x\n", state);
spin_unlock_irqrestore(&skdev->lock, flags);
}
@@ -3529,14 +2175,12 @@ static void skd_stop_device(struct skd_device *skdev)
spin_lock_irqsave(&skdev->lock, flags);
if (skdev->state != SKD_DRVR_STATE_ONLINE) {
- pr_err("(%s): skd_stop_device not online no sync\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s not online no sync\n", __func__);
goto stop_out;
}
if (skspcl->req.state != SKD_REQ_STATE_IDLE) {
- pr_err("(%s): skd_stop_device no special\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s no special\n", __func__);
goto stop_out;
}
@@ -3554,16 +2198,13 @@ static void skd_stop_device(struct skd_device *skdev)
switch (skdev->sync_done) {
case 0:
- pr_err("(%s): skd_stop_device no sync\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s no sync\n", __func__);
break;
case 1:
- pr_err("(%s): skd_stop_device sync done\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s sync done\n", __func__);
break;
default:
- pr_err("(%s): skd_stop_device sync error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "%s sync error\n", __func__);
}
stop_out:
@@ -3593,8 +2234,8 @@ static void skd_stop_device(struct skd_device *skdev)
}
if (dev_state != FIT_SR_DRIVE_INIT)
- pr_err("(%s): skd_stop_device state error 0x%02x\n",
- skd_name(skdev), dev_state);
+ dev_err(&skdev->pdev->dev, "%s state error 0x%02x\n", __func__,
+ dev_state);
}
/* assume spinlock is held */
@@ -3607,8 +2248,7 @@ static void skd_restart_device(struct skd_device *skdev)
state = SKD_READL(skdev, FIT_STATUS);
- pr_debug("%s:%s:%d drive status=0x%x\n",
- skdev->name, __func__, __LINE__, state);
+ dev_dbg(&skdev->pdev->dev, "drive status=0x%x\n", state);
state &= FIT_SR_DRIVE_STATE_MASK;
skdev->drive_state = state;
@@ -3628,9 +2268,8 @@ static int skd_quiesce_dev(struct skd_device *skdev)
switch (skdev->state) {
case SKD_DRVR_STATE_BUSY:
case SKD_DRVR_STATE_BUSY_IMMINENT:
- pr_debug("%s:%s:%d stopping %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_stop_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "stopping queue\n");
+ blk_mq_stop_hw_queues(skdev->queue);
break;
case SKD_DRVR_STATE_ONLINE:
case SKD_DRVR_STATE_STOPPING:
@@ -3642,8 +2281,8 @@ static int skd_quiesce_dev(struct skd_device *skdev)
case SKD_DRVR_STATE_RESUMING:
default:
rc = -EINVAL;
- pr_debug("%s:%s:%d state [%d] not implemented\n",
- skdev->name, __func__, __LINE__, skdev->state);
+ dev_dbg(&skdev->pdev->dev, "state [%d] not implemented\n",
+ skdev->state);
}
return rc;
}
@@ -3655,8 +2294,7 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
skd_log_skdev(skdev, "unquiesce");
if (skdev->state == SKD_DRVR_STATE_ONLINE) {
- pr_debug("%s:%s:%d **** device already ONLINE\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "**** device already ONLINE\n");
return 0;
}
if (skdev->drive_state != FIT_SR_DRIVE_ONLINE) {
@@ -3669,8 +2307,7 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
* to become available.
*/
skdev->state = SKD_DRVR_STATE_BUSY;
- pr_debug("%s:%s:%d drive BUSY state\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "drive BUSY state\n");
return 0;
}
@@ -3689,26 +2326,24 @@ static int skd_unquiesce_dev(struct skd_device *skdev)
case SKD_DRVR_STATE_IDLE:
case SKD_DRVR_STATE_LOAD:
skdev->state = SKD_DRVR_STATE_ONLINE;
- pr_err("(%s): Driver state %s(%d)=>%s(%d)\n",
- skd_name(skdev),
- skd_skdev_state_to_str(prev_driver_state),
- prev_driver_state, skd_skdev_state_to_str(skdev->state),
- skdev->state);
- pr_debug("%s:%s:%d **** device ONLINE...starting block queue\n",
- skdev->name, __func__, __LINE__);
- pr_debug("%s:%s:%d starting %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- pr_info("(%s): STEC s1120 ONLINE\n", skd_name(skdev));
- blk_start_queue(skdev->queue);
+ dev_err(&skdev->pdev->dev, "Driver state %s(%d)=>%s(%d)\n",
+ skd_skdev_state_to_str(prev_driver_state),
+ prev_driver_state, skd_skdev_state_to_str(skdev->state),
+ skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "**** device ONLINE...starting block queue\n");
+ dev_dbg(&skdev->pdev->dev, "starting queue\n");
+ dev_info(&skdev->pdev->dev, "STEC s1120 ONLINE\n");
+ schedule_work(&skdev->start_queue);
skdev->gendisk_on = 1;
wake_up_interruptible(&skdev->waitq);
break;
case SKD_DRVR_STATE_DISAPPEARED:
default:
- pr_debug("%s:%s:%d **** driver state %d, not implemented \n",
- skdev->name, __func__, __LINE__,
- skdev->state);
+ dev_dbg(&skdev->pdev->dev,
+ "**** driver state %d, not implemented\n",
+ skdev->state);
return -EBUSY;
}
return 0;
@@ -3726,11 +2361,10 @@ static irqreturn_t skd_reserved_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
- pr_err("(%s): MSIX reserved irq %d = 0x%x\n", skd_name(skdev),
- irq, SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_err(&skdev->pdev->dev, "MSIX reserved irq %d = 0x%x\n", irq,
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_INT_RESERVED_MASK, FIT_INT_STATUS_HOST);
spin_unlock_irqrestore(&skdev->lock, flags);
return IRQ_HANDLED;
@@ -3742,9 +2376,8 @@ static irqreturn_t skd_statec_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_FW_STATE_CHANGE, FIT_INT_STATUS_HOST);
skd_isr_fwstate(skdev);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3759,19 +2392,18 @@ static irqreturn_t skd_comp_q(int irq, void *skd_host_data)
int deferred;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_COMPLETION_POSTED, FIT_INT_STATUS_HOST);
deferred = skd_isr_completion_posted(skdev, skd_isr_comp_limit,
&flush_enqueued);
if (flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
if (deferred)
schedule_work(&skdev->completion_worker);
else if (!flush_enqueued)
- skd_request_fn(skdev->queue);
+ schedule_work(&skdev->start_queue);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3784,9 +2416,8 @@ static irqreturn_t skd_msg_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_ISH_MSG_FROM_DEV, FIT_INT_STATUS_HOST);
skd_isr_msg_from_dev(skdev);
spin_unlock_irqrestore(&skdev->lock, flags);
@@ -3799,9 +2430,8 @@ static irqreturn_t skd_qfull_isr(int irq, void *skd_host_data)
unsigned long flags;
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d MSIX = 0x%x\n",
- skdev->name, __func__, __LINE__,
- SKD_READL(skdev, FIT_INT_STATUS_HOST));
+ dev_dbg(&skdev->pdev->dev, "MSIX = 0x%x\n",
+ SKD_READL(skdev, FIT_INT_STATUS_HOST));
SKD_WRITEL(skdev, FIT_INT_QUEUE_FULL, FIT_INT_STATUS_HOST);
spin_unlock_irqrestore(&skdev->lock, flags);
return IRQ_HANDLED;
@@ -3850,8 +2480,7 @@ static int skd_acquire_msix(struct skd_device *skdev)
rc = pci_alloc_irq_vectors(pdev, SKD_MAX_MSIX_COUNT, SKD_MAX_MSIX_COUNT,
PCI_IRQ_MSIX);
if (rc < 0) {
- pr_err("(%s): failed to enable MSI-X %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev, "failed to enable MSI-X %d\n", rc);
goto out;
}
@@ -3859,8 +2488,7 @@ static int skd_acquire_msix(struct skd_device *skdev)
sizeof(struct skd_msix_entry), GFP_KERNEL);
if (!skdev->msix_entries) {
rc = -ENOMEM;
- pr_err("(%s): msix table allocation error\n",
- skd_name(skdev));
+ dev_err(&skdev->pdev->dev, "msix table allocation error\n");
goto out;
}
@@ -3877,16 +2505,15 @@ static int skd_acquire_msix(struct skd_device *skdev)
msix_entries[i].handler, 0,
qentry->isr_name, skdev);
if (rc) {
- pr_err("(%s): Unable to register(%d) MSI-X "
- "handler %d: %s\n",
- skd_name(skdev), rc, i, qentry->isr_name);
+ dev_err(&skdev->pdev->dev,
+ "Unable to register(%d) MSI-X handler %d: %s\n",
+ rc, i, qentry->isr_name);
goto msix_out;
}
}
- pr_debug("%s:%s:%d %s: <%s> msix %d irq(s) enabled\n",
- skdev->name, __func__, __LINE__,
- pci_name(pdev), skdev->name, SKD_MAX_MSIX_COUNT);
+ dev_dbg(&skdev->pdev->dev, "%d msix irq(s) enabled\n",
+ SKD_MAX_MSIX_COUNT);
return 0;
msix_out:
@@ -3909,8 +2536,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
if (!rc)
return 0;
- pr_err("(%s): failed to enable MSI-X, re-trying with MSI %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev,
+ "failed to enable MSI-X, re-trying with MSI %d\n", rc);
}
snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d", DRV_NAME,
@@ -3920,8 +2547,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
irq_flag |= PCI_IRQ_MSI;
rc = pci_alloc_irq_vectors(pdev, 1, 1, irq_flag);
if (rc < 0) {
- pr_err("(%s): failed to allocate the MSI interrupt %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev,
+ "failed to allocate the MSI interrupt %d\n", rc);
return rc;
}
@@ -3930,8 +2557,8 @@ static int skd_acquire_irq(struct skd_device *skdev)
skdev->isr_name, skdev);
if (rc) {
pci_free_irq_vectors(pdev);
- pr_err("(%s): failed to allocate interrupt %d\n",
- skd_name(skdev), rc);
+ dev_err(&skdev->pdev->dev, "failed to allocate interrupt %d\n",
+ rc);
return rc;
}
@@ -3965,20 +2592,45 @@ static void skd_release_irq(struct skd_device *skdev)
*****************************************************************************
*/
+static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ enum dma_data_direction dir)
+{
+ struct device *dev = &skdev->pdev->dev;
+ void *buf;
+
+ buf = kmem_cache_alloc(s, gfp);
+ if (!buf)
+ return NULL;
+ *dma_handle = dma_map_single(dev, buf, s->size, dir);
+ if (dma_mapping_error(dev, *dma_handle)) {
+ kfree(buf);
+ buf = NULL;
+ }
+ return buf;
+}
+
+static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s,
+ void *vaddr, dma_addr_t dma_handle,
+ enum dma_data_direction dir)
+{
+ if (!vaddr)
+ return;
+
+ dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir);
+ kmem_cache_free(s, vaddr);
+}
+
static int skd_cons_skcomp(struct skd_device *skdev)
{
int rc = 0;
struct fit_completion_entry_v1 *skcomp;
- u32 nbytes;
- nbytes = sizeof(*skcomp) * SKD_N_COMPLETION_ENTRY;
- nbytes += sizeof(struct fit_comp_error_info) * SKD_N_COMPLETION_ENTRY;
+ dev_dbg(&skdev->pdev->dev,
+ "comp pci_alloc, total bytes %zd entries %d\n",
+ SKD_SKCOMP_SIZE, SKD_N_COMPLETION_ENTRY);
- pr_debug("%s:%s:%d comp pci_alloc, total bytes %d entries %d\n",
- skdev->name, __func__, __LINE__,
- nbytes, SKD_N_COMPLETION_ENTRY);
-
- skcomp = pci_zalloc_consistent(skdev->pdev, nbytes,
+ skcomp = pci_zalloc_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
&skdev->cq_dma_address);
if (skcomp == NULL) {
@@ -4000,14 +2652,14 @@ static int skd_cons_skmsg(struct skd_device *skdev)
int rc = 0;
u32 i;
- pr_debug("%s:%s:%d skmsg_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_fitmsg_context),
- skdev->num_fitmsg_context,
- sizeof(struct skd_fitmsg_context) * skdev->num_fitmsg_context);
+ dev_dbg(&skdev->pdev->dev,
+ "skmsg_table kcalloc, struct %lu, count %u total %lu\n",
+ sizeof(struct skd_fitmsg_context), skdev->num_fitmsg_context,
+ sizeof(struct skd_fitmsg_context) * skdev->num_fitmsg_context);
- skdev->skmsg_table = kzalloc(sizeof(struct skd_fitmsg_context)
- *skdev->num_fitmsg_context, GFP_KERNEL);
+ skdev->skmsg_table = kcalloc(skdev->num_fitmsg_context,
+ sizeof(struct skd_fitmsg_context),
+ GFP_KERNEL);
if (skdev->skmsg_table == NULL) {
rc = -ENOMEM;
goto err_out;
@@ -4020,9 +2672,8 @@ static int skd_cons_skmsg(struct skd_device *skdev)
skmsg->id = i + SKD_ID_FIT_MSG;
- skmsg->state = SKD_MSG_STATE_IDLE;
skmsg->msg_buf = pci_alloc_consistent(skdev->pdev,
- SKD_N_FITMSG_BYTES + 64,
+ SKD_N_FITMSG_BYTES,
&skmsg->mb_dma_address);
if (skmsg->msg_buf == NULL) {
@@ -4030,22 +2681,13 @@ static int skd_cons_skmsg(struct skd_device *skdev)
goto err_out;
}
- skmsg->offset = (u32)((u64)skmsg->msg_buf &
- (~FIT_QCMD_BASE_ADDRESS_MASK));
- skmsg->msg_buf += ~FIT_QCMD_BASE_ADDRESS_MASK;
- skmsg->msg_buf = (u8 *)((u64)skmsg->msg_buf &
- FIT_QCMD_BASE_ADDRESS_MASK);
- skmsg->mb_dma_address += ~FIT_QCMD_BASE_ADDRESS_MASK;
- skmsg->mb_dma_address &= FIT_QCMD_BASE_ADDRESS_MASK;
+ WARN(((uintptr_t)skmsg->msg_buf | skmsg->mb_dma_address) &
+ (FIT_QCMD_ALIGN - 1),
+ "not aligned: msg_buf %p mb_dma_address %#llx\n",
+ skmsg->msg_buf, skmsg->mb_dma_address);
memset(skmsg->msg_buf, 0, SKD_N_FITMSG_BYTES);
-
- skmsg->next = &skmsg[1];
}
- /* Free list is in order starting with the 0th entry. */
- skdev->skmsg_table[i - 1].next = NULL;
- skdev->skmsg_free_list = skdev->skmsg_table;
-
err_out:
return rc;
}
@@ -4055,18 +2697,14 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
dma_addr_t *ret_dma_addr)
{
struct fit_sg_descriptor *sg_list;
- u32 nbytes;
- nbytes = sizeof(*sg_list) * n_sg;
-
- sg_list = pci_alloc_consistent(skdev->pdev, nbytes, ret_dma_addr);
+ sg_list = skd_alloc_dma(skdev, skdev->sglist_cache, ret_dma_addr,
+ GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
if (sg_list != NULL) {
uint64_t dma_address = *ret_dma_addr;
u32 i;
- memset(sg_list, 0, nbytes);
-
for (i = 0; i < n_sg - 1; i++) {
uint64_t ndp_off;
ndp_off = (i + 1) * sizeof(struct fit_sg_descriptor);
@@ -4079,153 +2717,63 @@ static struct fit_sg_descriptor *skd_cons_sg_list(struct skd_device *skdev,
return sg_list;
}
-static int skd_cons_skreq(struct skd_device *skdev)
+static void skd_free_sg_list(struct skd_device *skdev,
+ struct fit_sg_descriptor *sg_list,
+ dma_addr_t dma_addr)
{
- int rc = 0;
- u32 i;
+ if (WARN_ON_ONCE(!sg_list))
+ return;
- pr_debug("%s:%s:%d skreq_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_request_context),
- skdev->num_req_context,
- sizeof(struct skd_request_context) * skdev->num_req_context);
-
- skdev->skreq_table = kzalloc(sizeof(struct skd_request_context)
- * skdev->num_req_context, GFP_KERNEL);
- if (skdev->skreq_table == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- pr_debug("%s:%s:%d alloc sg_table sg_per_req %u scatlist %lu total %lu\n",
- skdev->name, __func__, __LINE__,
- skdev->sgs_per_request, sizeof(struct scatterlist),
- skdev->sgs_per_request * sizeof(struct scatterlist));
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq;
-
- skreq = &skdev->skreq_table[i];
-
- skreq->id = i + SKD_ID_RW_REQUEST;
- skreq->state = SKD_REQ_STATE_IDLE;
-
- skreq->sg = kzalloc(sizeof(struct scatterlist) *
- skdev->sgs_per_request, GFP_KERNEL);
- if (skreq->sg == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
- sg_init_table(skreq->sg, skdev->sgs_per_request);
-
- skreq->sksg_list = skd_cons_sg_list(skdev,
- skdev->sgs_per_request,
- &skreq->sksg_dma_address);
-
- if (skreq->sksg_list == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skreq->next = &skreq[1];
- }
-
- /* Free list is in order starting with the 0th entry. */
- skdev->skreq_table[i - 1].next = NULL;
- skdev->skreq_free_list = skdev->skreq_table;
-
-err_out:
- return rc;
+ skd_free_dma(skdev, skdev->sglist_cache, sg_list, dma_addr,
+ DMA_TO_DEVICE);
}
-static int skd_cons_skspcl(struct skd_device *skdev)
+static int skd_init_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx, unsigned int numa_node)
{
- int rc = 0;
- u32 i, nbytes;
+ struct skd_device *skdev = set->driver_data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
- pr_debug("%s:%s:%d skspcl_table kzalloc, struct %lu, count %u total %lu\n",
- skdev->name, __func__, __LINE__,
- sizeof(struct skd_special_context),
- skdev->n_special,
- sizeof(struct skd_special_context) * skdev->n_special);
+ skreq->state = SKD_REQ_STATE_IDLE;
+ skreq->sg = (void *)(skreq + 1);
+ sg_init_table(skreq->sg, skd_sgs_per_request);
+ skreq->sksg_list = skd_cons_sg_list(skdev, skd_sgs_per_request,
+ &skreq->sksg_dma_address);
- skdev->skspcl_table = kzalloc(sizeof(struct skd_special_context)
- * skdev->n_special, GFP_KERNEL);
- if (skdev->skspcl_table == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
+ return skreq->sksg_list ? 0 : -ENOMEM;
+}
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl;
+static void skd_exit_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx)
+{
+ struct skd_device *skdev = set->driver_data;
+ struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
- skspcl = &skdev->skspcl_table[i];
-
- skspcl->req.id = i + SKD_ID_SPECIAL_REQUEST;
- skspcl->req.state = SKD_REQ_STATE_IDLE;
-
- skspcl->req.next = &skspcl[1].req;
-
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-
- skspcl->msg_buf =
- pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
- if (skspcl->msg_buf == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skspcl->req.sg = kzalloc(sizeof(struct scatterlist) *
- SKD_N_SG_PER_SPECIAL, GFP_KERNEL);
- if (skspcl->req.sg == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
-
- skspcl->req.sksg_list = skd_cons_sg_list(skdev,
- SKD_N_SG_PER_SPECIAL,
- &skspcl->req.
- sksg_dma_address);
- if (skspcl->req.sksg_list == NULL) {
- rc = -ENOMEM;
- goto err_out;
- }
- }
-
- /* Free list is in order starting with the 0th entry. */
- skdev->skspcl_table[i - 1].req.next = NULL;
- skdev->skspcl_free_list = skdev->skspcl_table;
-
- return rc;
-
-err_out:
- return rc;
+ skd_free_sg_list(skdev, skreq->sksg_list, skreq->sksg_dma_address);
}
static int skd_cons_sksb(struct skd_device *skdev)
{
int rc = 0;
struct skd_special_context *skspcl;
- u32 nbytes;
skspcl = &skdev->internal_skspcl;
skspcl->req.id = 0 + SKD_ID_INTERNAL;
skspcl->req.state = SKD_REQ_STATE_IDLE;
- nbytes = SKD_N_INTERNAL_BYTES;
-
- skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->db_dma_address);
+ skspcl->data_buf = skd_alloc_dma(skdev, skdev->databuf_cache,
+ &skspcl->db_dma_address,
+ GFP_DMA | __GFP_ZERO,
+ DMA_BIDIRECTIONAL);
if (skspcl->data_buf == NULL) {
rc = -ENOMEM;
goto err_out;
}
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
+ skspcl->msg_buf = skd_alloc_dma(skdev, skdev->msgbuf_cache,
+ &skspcl->mb_dma_address,
+ GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
if (skspcl->msg_buf == NULL) {
rc = -ENOMEM;
goto err_out;
@@ -4247,6 +2795,14 @@ static int skd_cons_sksb(struct skd_device *skdev)
return rc;
}
+static const struct blk_mq_ops skd_mq_ops = {
+ .queue_rq = skd_mq_queue_rq,
+ .complete = skd_complete_rq,
+ .timeout = skd_timed_out,
+ .init_request = skd_init_request,
+ .exit_request = skd_exit_request,
+};
+
static int skd_cons_disk(struct skd_device *skdev)
{
int rc = 0;
@@ -4268,31 +2824,46 @@ static int skd_cons_disk(struct skd_device *skdev)
disk->fops = &skd_blockdev_ops;
disk->private_data = skdev;
- q = blk_init_queue(skd_request_fn, &skdev->lock);
- if (!q) {
- rc = -ENOMEM;
+ memset(&skdev->tag_set, 0, sizeof(skdev->tag_set));
+ skdev->tag_set.ops = &skd_mq_ops;
+ skdev->tag_set.nr_hw_queues = 1;
+ skdev->tag_set.queue_depth = skd_max_queue_depth;
+ skdev->tag_set.cmd_size = sizeof(struct skd_request_context) +
+ skdev->sgs_per_request * sizeof(struct scatterlist);
+ skdev->tag_set.numa_node = NUMA_NO_NODE;
+ skdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
+ BLK_MQ_F_SG_MERGE |
+ BLK_ALLOC_POLICY_TO_MQ_FLAG(BLK_TAG_ALLOC_FIFO);
+ skdev->tag_set.driver_data = skdev;
+ rc = blk_mq_alloc_tag_set(&skdev->tag_set);
+ if (rc)
+ goto err_out;
+ q = blk_mq_init_queue(&skdev->tag_set);
+ if (IS_ERR(q)) {
+ blk_mq_free_tag_set(&skdev->tag_set);
+ rc = PTR_ERR(q);
goto err_out;
}
- blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+ q->queuedata = skdev;
skdev->queue = q;
disk->queue = q;
- q->queuedata = skdev;
blk_queue_write_cache(q, true, true);
blk_queue_max_segments(q, skdev->sgs_per_request);
blk_queue_max_hw_sectors(q, SKD_N_MAX_SECTORS);
- /* set sysfs ptimal_io_size to 8K */
+ /* set optimal I/O size to 8KB */
blk_queue_io_opt(q, 8192);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
+ blk_queue_rq_timeout(q, 8 * HZ);
+
spin_lock_irqsave(&skdev->lock, flags);
- pr_debug("%s:%s:%d stopping %s queue\n",
- skdev->name, __func__, __LINE__, skdev->name);
- blk_stop_queue(skdev->queue);
+ dev_dbg(&skdev->pdev->dev, "stopping queue\n");
+ blk_mq_stop_hw_queues(skdev->queue);
spin_unlock_irqrestore(&skdev->lock, flags);
err_out:
@@ -4306,13 +2877,13 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
{
struct skd_device *skdev;
int blk_major = skd_major;
+ size_t size;
int rc;
skdev = kzalloc(sizeof(*skdev), GFP_KERNEL);
if (!skdev) {
- pr_err(PFX "(%s): memory alloc failure\n",
- pci_name(pdev));
+ dev_err(&pdev->dev, "memory alloc failure\n");
return NULL;
}
@@ -4320,60 +2891,71 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
skdev->pdev = pdev;
skdev->devno = skd_next_devno++;
skdev->major = blk_major;
- sprintf(skdev->name, DRV_NAME "%d", skdev->devno);
skdev->dev_max_queue_depth = 0;
skdev->num_req_context = skd_max_queue_depth;
skdev->num_fitmsg_context = skd_max_queue_depth;
- skdev->n_special = skd_max_pass_thru;
skdev->cur_max_queue_depth = 1;
skdev->queue_low_water_mark = 1;
skdev->proto_ver = 99;
skdev->sgs_per_request = skd_sgs_per_request;
skdev->dbg_level = skd_dbg_level;
- atomic_set(&skdev->device_count, 0);
-
spin_lock_init(&skdev->lock);
+ INIT_WORK(&skdev->start_queue, skd_start_queue);
INIT_WORK(&skdev->completion_worker, skd_completion_worker);
- pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
+ size = max(SKD_N_FITMSG_BYTES, SKD_N_SPECIAL_FITMSG_BYTES);
+ skdev->msgbuf_cache = kmem_cache_create("skd-msgbuf", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->msgbuf_cache)
+ goto err_out;
+ WARN_ONCE(kmem_cache_size(skdev->msgbuf_cache) < size,
+ "skd-msgbuf: %d < %zd\n",
+ kmem_cache_size(skdev->msgbuf_cache), size);
+ size = skd_sgs_per_request * sizeof(struct fit_sg_descriptor);
+ skdev->sglist_cache = kmem_cache_create("skd-sglist", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->sglist_cache)
+ goto err_out;
+ WARN_ONCE(kmem_cache_size(skdev->sglist_cache) < size,
+ "skd-sglist: %d < %zd\n",
+ kmem_cache_size(skdev->sglist_cache), size);
+ size = SKD_N_INTERNAL_BYTES;
+ skdev->databuf_cache = kmem_cache_create("skd-databuf", size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!skdev->databuf_cache)
+ goto err_out;
+ WARN_ONCE(kmem_cache_size(skdev->databuf_cache) < size,
+ "skd-databuf: %d < %zd\n",
+ kmem_cache_size(skdev->databuf_cache), size);
+
+ dev_dbg(&skdev->pdev->dev, "skcomp\n");
rc = skd_cons_skcomp(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "skmsg\n");
rc = skd_cons_skmsg(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skreq(skdev);
- if (rc < 0)
- goto err_out;
-
- pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
- rc = skd_cons_skspcl(skdev);
- if (rc < 0)
- goto err_out;
-
- pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "sksb\n");
rc = skd_cons_sksb(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "disk\n");
rc = skd_cons_disk(skdev);
if (rc < 0)
goto err_out;
- pr_debug("%s:%s:%d VICTORY\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "VICTORY\n");
return skdev;
err_out:
- pr_debug("%s:%s:%d construct failed\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "construct failed\n");
skd_destruct(skdev);
return NULL;
}
@@ -4386,14 +2968,9 @@ static struct skd_device *skd_construct(struct pci_dev *pdev)
static void skd_free_skcomp(struct skd_device *skdev)
{
- if (skdev->skcomp_table != NULL) {
- u32 nbytes;
-
- nbytes = sizeof(skdev->skcomp_table[0]) *
- SKD_N_COMPLETION_ENTRY;
- pci_free_consistent(skdev->pdev, nbytes,
+ if (skdev->skcomp_table)
+ pci_free_consistent(skdev->pdev, SKD_SKCOMP_SIZE,
skdev->skcomp_table, skdev->cq_dma_address);
- }
skdev->skcomp_table = NULL;
skdev->cq_dma_address = 0;
@@ -4412,8 +2989,6 @@ static void skd_free_skmsg(struct skd_device *skdev)
skmsg = &skdev->skmsg_table[i];
if (skmsg->msg_buf != NULL) {
- skmsg->msg_buf += skmsg->offset;
- skmsg->mb_dma_address += skmsg->offset;
pci_free_consistent(skdev->pdev, SKD_N_FITMSG_BYTES,
skmsg->msg_buf,
skmsg->mb_dma_address);
@@ -4426,109 +3001,23 @@ static void skd_free_skmsg(struct skd_device *skdev)
skdev->skmsg_table = NULL;
}
-static void skd_free_sg_list(struct skd_device *skdev,
- struct fit_sg_descriptor *sg_list,
- u32 n_sg, dma_addr_t dma_addr)
-{
- if (sg_list != NULL) {
- u32 nbytes;
-
- nbytes = sizeof(*sg_list) * n_sg;
-
- pci_free_consistent(skdev->pdev, nbytes, sg_list, dma_addr);
- }
-}
-
-static void skd_free_skreq(struct skd_device *skdev)
-{
- u32 i;
-
- if (skdev->skreq_table == NULL)
- return;
-
- for (i = 0; i < skdev->num_req_context; i++) {
- struct skd_request_context *skreq;
-
- skreq = &skdev->skreq_table[i];
-
- skd_free_sg_list(skdev, skreq->sksg_list,
- skdev->sgs_per_request,
- skreq->sksg_dma_address);
-
- skreq->sksg_list = NULL;
- skreq->sksg_dma_address = 0;
-
- kfree(skreq->sg);
- }
-
- kfree(skdev->skreq_table);
- skdev->skreq_table = NULL;
-}
-
-static void skd_free_skspcl(struct skd_device *skdev)
-{
- u32 i;
- u32 nbytes;
-
- if (skdev->skspcl_table == NULL)
- return;
-
- for (i = 0; i < skdev->n_special; i++) {
- struct skd_special_context *skspcl;
-
- skspcl = &skdev->skspcl_table[i];
-
- if (skspcl->msg_buf != NULL) {
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->msg_buf,
- skspcl->mb_dma_address);
- }
-
- skspcl->msg_buf = NULL;
- skspcl->mb_dma_address = 0;
-
- skd_free_sg_list(skdev, skspcl->req.sksg_list,
- SKD_N_SG_PER_SPECIAL,
- skspcl->req.sksg_dma_address);
-
- skspcl->req.sksg_list = NULL;
- skspcl->req.sksg_dma_address = 0;
-
- kfree(skspcl->req.sg);
- }
-
- kfree(skdev->skspcl_table);
- skdev->skspcl_table = NULL;
-}
-
static void skd_free_sksb(struct skd_device *skdev)
{
- struct skd_special_context *skspcl;
- u32 nbytes;
+ struct skd_special_context *skspcl = &skdev->internal_skspcl;
- skspcl = &skdev->internal_skspcl;
-
- if (skspcl->data_buf != NULL) {
- nbytes = SKD_N_INTERNAL_BYTES;
-
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->data_buf, skspcl->db_dma_address);
- }
+ skd_free_dma(skdev, skdev->databuf_cache, skspcl->data_buf,
+ skspcl->db_dma_address, DMA_BIDIRECTIONAL);
skspcl->data_buf = NULL;
skspcl->db_dma_address = 0;
- if (skspcl->msg_buf != NULL) {
- nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- pci_free_consistent(skdev->pdev, nbytes,
- skspcl->msg_buf, skspcl->mb_dma_address);
- }
+ skd_free_dma(skdev, skdev->msgbuf_cache, skspcl->msg_buf,
+ skspcl->mb_dma_address, DMA_TO_DEVICE);
skspcl->msg_buf = NULL;
skspcl->mb_dma_address = 0;
- skd_free_sg_list(skdev, skspcl->req.sksg_list, 1,
+ skd_free_sg_list(skdev, skspcl->req.sksg_list,
skspcl->req.sksg_dma_address);
skspcl->req.sksg_list = NULL;
@@ -4539,15 +3028,20 @@ static void skd_free_disk(struct skd_device *skdev)
{
struct gendisk *disk = skdev->disk;
- if (disk != NULL) {
- struct request_queue *q = disk->queue;
+ if (disk && (disk->flags & GENHD_FL_UP))
+ del_gendisk(disk);
- if (disk->flags & GENHD_FL_UP)
- del_gendisk(disk);
- if (q)
- blk_cleanup_queue(q);
- put_disk(disk);
+ if (skdev->queue) {
+ blk_cleanup_queue(skdev->queue);
+ skdev->queue = NULL;
+ if (disk)
+ disk->queue = NULL;
}
+
+ if (skdev->tag_set.tags)
+ blk_mq_free_tag_set(&skdev->tag_set);
+
+ put_disk(disk);
skdev->disk = NULL;
}
@@ -4556,26 +3050,25 @@ static void skd_destruct(struct skd_device *skdev)
if (skdev == NULL)
return;
+ cancel_work_sync(&skdev->start_queue);
- pr_debug("%s:%s:%d disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "disk\n");
skd_free_disk(skdev);
- pr_debug("%s:%s:%d sksb\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "sksb\n");
skd_free_sksb(skdev);
- pr_debug("%s:%s:%d skspcl\n", skdev->name, __func__, __LINE__);
- skd_free_skspcl(skdev);
-
- pr_debug("%s:%s:%d skreq\n", skdev->name, __func__, __LINE__);
- skd_free_skreq(skdev);
-
- pr_debug("%s:%s:%d skmsg\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "skmsg\n");
skd_free_skmsg(skdev);
- pr_debug("%s:%s:%d skcomp\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "skcomp\n");
skd_free_skcomp(skdev);
- pr_debug("%s:%s:%d skdev\n", skdev->name, __func__, __LINE__);
+ kmem_cache_destroy(skdev->databuf_cache);
+ kmem_cache_destroy(skdev->sglist_cache);
+ kmem_cache_destroy(skdev->msgbuf_cache);
+
+ dev_dbg(&skdev->pdev->dev, "skdev\n");
kfree(skdev);
}
@@ -4592,9 +3085,8 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
skdev = bdev->bd_disk->private_data;
- pr_debug("%s:%s:%d %s: CMD[%s] getgeo device\n",
- skdev->name, __func__, __LINE__,
- bdev->bd_disk->disk_name, current->comm);
+ dev_dbg(&skdev->pdev->dev, "%s: CMD[%s] getgeo device\n",
+ bdev->bd_disk->disk_name, current->comm);
if (skdev->read_cap_is_valid) {
capacity = get_capacity(skdev->disk);
@@ -4609,18 +3101,16 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
static int skd_bdev_attach(struct device *parent, struct skd_device *skdev)
{
- pr_debug("%s:%s:%d add_disk\n", skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev, "add_disk\n");
device_add_disk(parent, skdev->disk);
return 0;
}
static const struct block_device_operations skd_blockdev_ops = {
.owner = THIS_MODULE,
- .ioctl = skd_bdev_ioctl,
.getgeo = skd_bdev_getgeo,
};
-
/*
*****************************************************************************
* PCIe DRIVER GLUE
@@ -4671,10 +3161,8 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
char pci_str[32];
struct skd_device *skdev;
- pr_info("STEC s1120 Driver(%s) version %s-b%s\n",
- DRV_NAME, DRV_VERSION, DRV_BUILD_ID);
- pr_info("(skd?:??:[%s]): vendor=%04X device=%04x\n",
- pci_name(pdev), pdev->vendor, pdev->device);
+ dev_dbg(&pdev->dev, "vendor=%04X device=%04x\n", pdev->vendor,
+ pdev->device);
rc = pci_enable_device(pdev);
if (rc)
@@ -4685,16 +3173,13 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (!rc) {
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
-
- pr_err("(%s): consistent DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "consistent DMA mask error %d\n",
+ rc);
}
} else {
- (rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)));
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc) {
-
- pr_err("(%s): DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
}
}
@@ -4714,19 +3199,17 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
skd_pci_info(skdev, pci_str);
- pr_info("(%s): %s 64bit\n", skd_name(skdev), pci_str);
+ dev_info(&pdev->dev, "%s 64bit\n", pci_str);
pci_set_master(pdev);
rc = pci_enable_pcie_error_reporting(pdev);
if (rc) {
- pr_err(
- "(%s): bad enable of PCIe error reporting rc=%d\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev,
+ "bad enable of PCIe error reporting rc=%d\n", rc);
skdev->pcie_error_reporting_is_enabled = 0;
} else
skdev->pcie_error_reporting_is_enabled = 1;
-
pci_set_drvdata(pdev, skdev);
for (i = 0; i < SKD_MAX_BARS; i++) {
@@ -4735,21 +3218,19 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
skdev->mem_size[i]);
if (!skdev->mem_map[i]) {
- pr_err("(%s): Unable to map adapter memory!\n",
- skd_name(skdev));
+ dev_err(&pdev->dev,
+ "Unable to map adapter memory!\n");
rc = -ENODEV;
goto err_out_iounmap;
}
- pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->mem_map[i],
- (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
+ dev_dbg(&pdev->dev, "mem_map=%p, phyd=%016llx, size=%d\n",
+ skdev->mem_map[i], (uint64_t)skdev->mem_phys[i],
+ skdev->mem_size[i]);
}
rc = skd_acquire_irq(skdev);
if (rc) {
- pr_err("(%s): interrupt resource error %d\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev, "interrupt resource error %d\n", rc);
goto err_out_iounmap;
}
@@ -4771,29 +3252,14 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
} else {
/* we timed out, something is wrong with the device,
don't add the disk structure */
- pr_err(
- "(%s): error: waiting for s1120 timed out %d!\n",
- skd_name(skdev), rc);
+ dev_err(&pdev->dev, "error: waiting for s1120 timed out %d!\n",
+ rc);
/* in case of no error; we timeout with ENXIO */
if (!rc)
rc = -ENXIO;
goto err_out_timer;
}
-
-#ifdef SKD_VMK_POLL_HANDLER
- if (skdev->irq_type == SKD_IRQ_MSIX) {
- /* MSIX completion handler is being used for coredump */
- vmklnx_scsi_register_poll_handler(skdev->scsi_host,
- skdev->msix_entries[5].vector,
- skd_comp_q, skdev);
- } else {
- vmklnx_scsi_register_poll_handler(skdev->scsi_host,
- skdev->pdev->irq, skd_isr,
- skdev);
- }
-#endif /* SKD_VMK_POLL_HANDLER */
-
return rc;
err_out_timer:
@@ -4826,7 +3292,7 @@ static void skd_pci_remove(struct pci_dev *pdev)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return;
}
skd_stop_device(skdev);
@@ -4834,7 +3300,7 @@ static void skd_pci_remove(struct pci_dev *pdev)
for (i = 0; i < SKD_MAX_BARS; i++)
if (skdev->mem_map[i])
- iounmap((u32 *)skdev->mem_map[i]);
+ iounmap(skdev->mem_map[i]);
if (skdev->pcie_error_reporting_is_enabled)
pci_disable_pcie_error_reporting(pdev);
@@ -4855,7 +3321,7 @@ static int skd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return -EIO;
}
@@ -4865,7 +3331,7 @@ static int skd_pci_suspend(struct pci_dev *pdev, pm_message_t state)
for (i = 0; i < SKD_MAX_BARS; i++)
if (skdev->mem_map[i])
- iounmap((u32 *)skdev->mem_map[i]);
+ iounmap(skdev->mem_map[i]);
if (skdev->pcie_error_reporting_is_enabled)
pci_disable_pcie_error_reporting(pdev);
@@ -4885,7 +3351,7 @@ static int skd_pci_resume(struct pci_dev *pdev)
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return -1;
}
@@ -4903,15 +3369,14 @@ static int skd_pci_resume(struct pci_dev *pdev)
if (!rc) {
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
- pr_err("(%s): consistent DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "consistent DMA mask error %d\n",
+ rc);
}
} else {
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc) {
- pr_err("(%s): DMA mask error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
}
}
@@ -4919,8 +3384,8 @@ static int skd_pci_resume(struct pci_dev *pdev)
pci_set_master(pdev);
rc = pci_enable_pcie_error_reporting(pdev);
if (rc) {
- pr_err("(%s): bad enable of PCIe error reporting rc=%d\n",
- skdev->name, rc);
+ dev_err(&pdev->dev,
+ "bad enable of PCIe error reporting rc=%d\n", rc);
skdev->pcie_error_reporting_is_enabled = 0;
} else
skdev->pcie_error_reporting_is_enabled = 1;
@@ -4932,21 +3397,17 @@ static int skd_pci_resume(struct pci_dev *pdev)
skdev->mem_map[i] = ioremap(skdev->mem_phys[i],
skdev->mem_size[i]);
if (!skdev->mem_map[i]) {
- pr_err("(%s): Unable to map adapter memory!\n",
- skd_name(skdev));
+ dev_err(&pdev->dev, "Unable to map adapter memory!\n");
rc = -ENODEV;
goto err_out_iounmap;
}
- pr_debug("%s:%s:%d mem_map=%p, phyd=%016llx, size=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->mem_map[i],
- (uint64_t)skdev->mem_phys[i], skdev->mem_size[i]);
+ dev_dbg(&pdev->dev, "mem_map=%p, phyd=%016llx, size=%d\n",
+ skdev->mem_map[i], (uint64_t)skdev->mem_phys[i],
+ skdev->mem_size[i]);
}
rc = skd_acquire_irq(skdev);
if (rc) {
-
- pr_err("(%s): interrupt resource error %d\n",
- pci_name(pdev), rc);
+ dev_err(&pdev->dev, "interrupt resource error %d\n", rc);
goto err_out_iounmap;
}
@@ -4984,15 +3445,15 @@ static void skd_pci_shutdown(struct pci_dev *pdev)
{
struct skd_device *skdev;
- pr_err("skd_pci_shutdown called\n");
+ dev_err(&pdev->dev, "%s called\n", __func__);
skdev = pci_get_drvdata(pdev);
if (!skdev) {
- pr_err("%s: no device data for PCI\n", pci_name(pdev));
+ dev_err(&pdev->dev, "no device data for PCI\n");
return;
}
- pr_err("%s: calling stop\n", skd_name(skdev));
+ dev_err(&pdev->dev, "calling stop\n");
skd_stop_device(skdev);
}
@@ -5012,21 +3473,6 @@ static struct pci_driver skd_driver = {
*****************************************************************************
*/
-static const char *skd_name(struct skd_device *skdev)
-{
- memset(skdev->id_str, 0, sizeof(skdev->id_str));
-
- if (skdev->inquiry_is_valid)
- snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:%s:[%s]",
- skdev->name, skdev->inq_serial_num,
- pci_name(skdev->pdev));
- else
- snprintf(skdev->id_str, sizeof(skdev->id_str), "%s:??:[%s]",
- skdev->name, pci_name(skdev->pdev));
-
- return skdev->id_str;
-}
-
const char *skd_drive_state_to_str(int state)
{
switch (state) {
@@ -5078,8 +3524,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
return "PAUSING";
case SKD_DRVR_STATE_PAUSED:
return "PAUSED";
- case SKD_DRVR_STATE_DRAINING_TIMEOUT:
- return "DRAINING_TIMEOUT";
case SKD_DRVR_STATE_RESTARTING:
return "RESTARTING";
case SKD_DRVR_STATE_RESUMING:
@@ -5106,18 +3550,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
}
}
-static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
-{
- switch (state) {
- case SKD_MSG_STATE_IDLE:
- return "IDLE";
- case SKD_MSG_STATE_BUSY:
- return "BUSY";
- default:
- return "???";
- }
-}
-
static const char *skd_skreq_state_to_str(enum skd_req_state state)
{
switch (state) {
@@ -5131,8 +3563,6 @@ static const char *skd_skreq_state_to_str(enum skd_req_state state)
return "COMPLETED";
case SKD_REQ_STATE_TIMEOUT:
return "TIMEOUT";
- case SKD_REQ_STATE_ABORTED:
- return "ABORTED";
default:
return "???";
}
@@ -5140,58 +3570,34 @@ static const char *skd_skreq_state_to_str(enum skd_req_state state)
static void skd_log_skdev(struct skd_device *skdev, const char *event)
{
- pr_debug("%s:%s:%d (%s) skdev=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skdev, event);
- pr_debug("%s:%s:%d drive_state=%s(%d) driver_state=%s(%d)\n",
- skdev->name, __func__, __LINE__,
- skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
- skd_skdev_state_to_str(skdev->state), skdev->state);
- pr_debug("%s:%s:%d busy=%d limit=%d dev=%d lowat=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->in_flight, skdev->cur_max_queue_depth,
- skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
- pr_debug("%s:%s:%d timestamp=0x%x cycle=%d cycle_ix=%d\n",
- skdev->name, __func__, __LINE__,
- skdev->timeout_stamp, skdev->skcomp_cycle, skdev->skcomp_ix);
-}
-
-static void skd_log_skmsg(struct skd_device *skdev,
- struct skd_fitmsg_context *skmsg, const char *event)
-{
- pr_debug("%s:%s:%d (%s) skmsg=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skmsg, event);
- pr_debug("%s:%s:%d state=%s(%d) id=0x%04x length=%d\n",
- skdev->name, __func__, __LINE__,
- skd_skmsg_state_to_str(skmsg->state), skmsg->state,
- skmsg->id, skmsg->length);
+ dev_dbg(&skdev->pdev->dev, "skdev=%p event='%s'\n", skdev, event);
+ dev_dbg(&skdev->pdev->dev, " drive_state=%s(%d) driver_state=%s(%d)\n",
+ skd_drive_state_to_str(skdev->drive_state), skdev->drive_state,
+ skd_skdev_state_to_str(skdev->state), skdev->state);
+ dev_dbg(&skdev->pdev->dev, " busy=%d limit=%d dev=%d lowat=%d\n",
+ skd_in_flight(skdev), skdev->cur_max_queue_depth,
+ skdev->dev_max_queue_depth, skdev->queue_low_water_mark);
+ dev_dbg(&skdev->pdev->dev, " cycle=%d cycle_ix=%d\n",
+ skdev->skcomp_cycle, skdev->skcomp_ix);
}
static void skd_log_skreq(struct skd_device *skdev,
struct skd_request_context *skreq, const char *event)
{
- pr_debug("%s:%s:%d (%s) skreq=%p event='%s'\n",
- skdev->name, __func__, __LINE__, skdev->name, skreq, event);
- pr_debug("%s:%s:%d state=%s(%d) id=0x%04x fitmsg=0x%04x\n",
- skdev->name, __func__, __LINE__,
- skd_skreq_state_to_str(skreq->state), skreq->state,
- skreq->id, skreq->fitmsg_id);
- pr_debug("%s:%s:%d timo=0x%x sg_dir=%d n_sg=%d\n",
- skdev->name, __func__, __LINE__,
- skreq->timeout_stamp, skreq->sg_data_dir, skreq->n_sg);
+ struct request *req = blk_mq_rq_from_pdu(skreq);
+ u32 lba = blk_rq_pos(req);
+ u32 count = blk_rq_sectors(req);
- if (skreq->req != NULL) {
- struct request *req = skreq->req;
- u32 lba = (u32)blk_rq_pos(req);
- u32 count = blk_rq_sectors(req);
+ dev_dbg(&skdev->pdev->dev, "skreq=%p event='%s'\n", skreq, event);
+ dev_dbg(&skdev->pdev->dev, " state=%s(%d) id=0x%04x fitmsg=0x%04x\n",
+ skd_skreq_state_to_str(skreq->state), skreq->state, skreq->id,
+ skreq->fitmsg_id);
+ dev_dbg(&skdev->pdev->dev, " sg_dir=%d n_sg=%d\n",
+ skreq->data_dir, skreq->n_sg);
- pr_debug("%s:%s:%d "
- "req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n",
- skdev->name, __func__, __LINE__,
- req, lba, lba, count, count,
- (int)rq_data_dir(req));
- } else
- pr_debug("%s:%s:%d req=NULL\n",
- skdev->name, __func__, __LINE__);
+ dev_dbg(&skdev->pdev->dev,
+ "req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", req, lba, lba,
+ count, count, (int)rq_data_dir(req));
}
/*
@@ -5202,7 +3608,14 @@ static void skd_log_skreq(struct skd_device *skdev,
static int __init skd_init(void)
{
- pr_info(PFX " v%s-b%s loaded\n", DRV_VERSION, DRV_BUILD_ID);
+ BUILD_BUG_ON(sizeof(struct fit_completion_entry_v1) != 8);
+ BUILD_BUG_ON(sizeof(struct fit_comp_error_info) != 32);
+ BUILD_BUG_ON(sizeof(struct skd_command_header) != 16);
+ BUILD_BUG_ON(sizeof(struct skd_scsi_request) != 32);
+ BUILD_BUG_ON(sizeof(struct driver_inquiry_data) != 44);
+ BUILD_BUG_ON(offsetof(struct skd_msg_buf, fmh) != 0);
+ BUILD_BUG_ON(offsetof(struct skd_msg_buf, scsi) != 64);
+ BUILD_BUG_ON(sizeof(struct skd_msg_buf) != SKD_N_FITMSG_BYTES);
switch (skd_isr_type) {
case SKD_IRQ_LEGACY:
@@ -5222,7 +3635,8 @@ static int __init skd_init(void)
skd_max_queue_depth = SKD_MAX_QUEUE_DEPTH_DEFAULT;
}
- if (skd_max_req_per_msg < 1 || skd_max_req_per_msg > 14) {
+ if (skd_max_req_per_msg < 1 ||
+ skd_max_req_per_msg > SKD_MAX_REQ_PER_MSG) {
pr_err(PFX "skd_max_req_per_msg %d invalid, re-set to %d\n",
skd_max_req_per_msg, SKD_MAX_REQ_PER_MSG_DEFAULT);
skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
@@ -5246,19 +3660,11 @@ static int __init skd_init(void)
skd_isr_comp_limit = 0;
}
- if (skd_max_pass_thru < 1 || skd_max_pass_thru > 50) {
- pr_err(PFX "skd_max_pass_thru %d invalid, re-set to %d\n",
- skd_max_pass_thru, SKD_N_SPECIAL_CONTEXT);
- skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
- }
-
return pci_register_driver(&skd_driver);
}
static void __exit skd_exit(void)
{
- pr_info(PFX " v%s-b%s unloading\n", DRV_VERSION, DRV_BUILD_ID);
-
pci_unregister_driver(&skd_driver);
if (skd_major)
diff --git a/drivers/block/skd_s1120.h b/drivers/block/skd_s1120.h
index 61c757f..de35f47 100644
--- a/drivers/block/skd_s1120.h
+++ b/drivers/block/skd_s1120.h
@@ -1,19 +1,15 @@
-/* Copyright 2012 STEC, Inc.
+/*
+ * Copyright 2012 STEC, Inc.
+ * Copyright (c) 2017 Western Digital Corporation or its affiliates.
*
- * This file is licensed under the terms of the 3-clause
- * BSD License (http://opensource.org/licenses/BSD-3-Clause)
- * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
- * at your option. Both licenses are also available in the LICENSE file
- * distributed with this project. This file may not be copied, modified,
- * or distributed except in accordance with those terms.
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
*/
#ifndef SKD_S1120_H
#define SKD_S1120_H
-#pragma pack(push, s1120_h, 1)
-
/*
* Q-channel, 64-bit r/w
*/
@@ -30,7 +26,7 @@
#define FIT_QCMD_MSGSIZE_128 (0x1 << 4)
#define FIT_QCMD_MSGSIZE_256 (0x2 << 4)
#define FIT_QCMD_MSGSIZE_512 (0x3 << 4)
-#define FIT_QCMD_BASE_ADDRESS_MASK (0xFFFFFFFFFFFFFFC0ull)
+#define FIT_QCMD_ALIGN L1_CACHE_BYTES
/*
* Control, 32-bit r/w
@@ -250,7 +246,7 @@ struct fit_msg_hdr {
* 20-23 of the FIT_MTD_FITFW_INIT response.
*/
struct fit_completion_entry_v1 {
- uint32_t num_returned_bytes;
+ __be32 num_returned_bytes;
uint16_t tag;
uint8_t status; /* SCSI status */
uint8_t cycle;
@@ -278,7 +274,7 @@ struct fit_comp_error_info {
uint16_t sks_low; /* 10: Sense Key Specific (LSW) */
uint16_t reserved3; /* 12: Part of additional sense bytes (unused) */
uint16_t uec; /* 14: Additional Sense Bytes */
- uint64_t per; /* 16: Additional Sense Bytes */
+ uint64_t per __packed; /* 16: Additional Sense Bytes */
uint8_t reserved4[2]; /* 1E: Additional Sense Bytes (unused) */
};
@@ -292,11 +288,11 @@ struct fit_comp_error_info {
* Version one has the last 32 bits sg_list_len_bytes;
*/
struct skd_command_header {
- uint64_t sg_list_dma_address;
+ __be64 sg_list_dma_address;
uint16_t tag;
uint8_t attribute;
uint8_t add_cdb_len; /* In 32 bit words */
- uint32_t sg_list_len_bytes;
+ __be32 sg_list_len_bytes;
};
struct skd_scsi_request {
@@ -309,22 +305,20 @@ struct driver_inquiry_data {
uint8_t peripheral_device_type:5;
uint8_t qualifier:3;
uint8_t page_code;
- uint16_t page_length;
- uint16_t pcie_bus_number;
+ __be16 page_length;
+ __be16 pcie_bus_number;
uint8_t pcie_device_number;
uint8_t pcie_function_number;
uint8_t pcie_link_speed;
uint8_t pcie_link_lanes;
- uint16_t pcie_vendor_id;
- uint16_t pcie_device_id;
- uint16_t pcie_subsystem_vendor_id;
- uint16_t pcie_subsystem_device_id;
+ __be16 pcie_vendor_id;
+ __be16 pcie_device_id;
+ __be16 pcie_subsystem_vendor_id;
+ __be16 pcie_subsystem_device_id;
uint8_t reserved1[2];
uint8_t reserved2[3];
uint8_t driver_version_length;
uint8_t driver_version[0x14];
};
-#pragma pack(pop, s1120_h)
-
#endif /* SKD_S1120_H */
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index d3d5523..34e17ee 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -265,7 +265,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
}
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
- if (req_op(req) == REQ_OP_SCSI_IN || req_op(req) == REQ_OP_SCSI_OUT)
+ if (blk_rq_is_scsi(req))
err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num);
else
err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index fe7cd58..987d665 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -705,9 +705,9 @@ static unsigned int xen_blkbk_unmap_prepare(
GNTMAP_host_map, pages[i]->handle);
pages[i]->handle = BLKBACK_INVALID_HANDLE;
invcount++;
- }
+ }
- return invcount;
+ return invcount;
}
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
@@ -1251,6 +1251,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
break;
case BLKIF_OP_WRITE_BARRIER:
drain = true;
+ /* fall through */
case BLKIF_OP_FLUSH_DISKCACHE:
ring->st_f_req++;
operation = REQ_OP_WRITE;
@@ -1362,7 +1363,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
goto fail_put_bio;
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
+ bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_iter.bi_sector = preq.sector_number;
@@ -1381,7 +1382,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
goto fail_put_bio;
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
+ bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio_set_op_attrs(bio, operation, operation_flags);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 2adb859..21c1be1 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -816,7 +816,8 @@ static void frontend_changed(struct xenbus_device *dev,
xenbus_switch_state(dev, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
- /* fall through if not online */
+ /* fall through */
+ /* if not online */
case XenbusStateUnknown:
/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
device_unregister(&dev->dev);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2468c28..891265a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2456,7 +2456,7 @@ static void blkback_changed(struct xenbus_device *dev,
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
- /* Missed the backend's Closing state -- fallthrough */
+ /* fall through */
case XenbusStateClosing:
if (info)
blkfront_closing(info);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 4a0438c..4063f3f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -467,7 +467,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
return -ENOMEM;
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
- bio->bi_bdev = zram->bdev;
+ bio_set_dev(bio, zram->bdev);
if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
bio_put(bio);
return -EIO;
@@ -561,7 +561,7 @@ static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
}
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
- bio->bi_bdev = zram->bdev;
+ bio_set_dev(bio, zram->bdev);
if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
bvec->bv_offset)) {
bio_put(bio);
@@ -1171,9 +1171,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
{
unsigned long start_time = jiffies;
int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
+ struct request_queue *q = zram->disk->queue;
int ret;
- generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
+ generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
&zram->disk->part0);
if (!is_write) {
@@ -1185,7 +1186,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
- generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
+ generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
if (unlikely(ret < 0)) {
if (!is_write)
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 627b1f6..3ddd882 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -72,7 +72,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
drive->failed_pc = NULL;
if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
- (req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT))
+ blk_rq_is_scsi(rq))
uptodate = 1; /* FIXME */
else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 35a5a72..61076eda 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -49,7 +49,7 @@ void bch_btree_verify(struct btree *b)
v->keys.ops = b->keys.ops;
bio = bch_bbio_alloc(b->c);
- bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev;
+ bio_set_dev(bio, PTR_CACHE(b->c, &b->key, 0)->bdev);
bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9;
bio->bi_opf = REQ_OP_READ | REQ_META;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 6a9b850..7e871bd 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -34,7 +34,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
struct bbio *b = container_of(bio, struct bbio, bio);
bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0);
- bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev;
+ bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
b->submit_time_us = local_clock_us();
closure_bio_submit(bio, bio->bi_private);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 0352d05..7e1d1c3 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -53,7 +53,7 @@ reread: left = ca->sb.bucket_size - offset;
bio_reset(bio);
bio->bi_iter.bi_sector = bucket + offset;
- bio->bi_bdev = ca->bdev;
+ bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = len << 9;
bio->bi_end_io = journal_read_endio;
@@ -452,7 +452,7 @@ static void do_journal_discard(struct cache *ca)
bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
bio->bi_iter.bi_sector = bucket_to_sector(ca->set,
ca->sb.d[ja->discard_idx]);
- bio->bi_bdev = ca->bdev;
+ bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = bucket_bytes(ca);
bio->bi_end_io = journal_discard_endio;
@@ -623,7 +623,7 @@ static void journal_write_unlocked(struct closure *cl)
bio_reset(bio);
bio->bi_iter.bi_sector = PTR_OFFSET(k, i);
- bio->bi_bdev = ca->bdev;
+ bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = sectors << 9;
bio->bi_end_io = journal_write_endio;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 019b3df..0e1463d 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -607,7 +607,8 @@ static void request_endio(struct bio *bio)
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
- generic_end_io_acct(bio_data_dir(s->orig_bio),
+ struct request_queue *q = s->orig_bio->bi_disk->queue;
+ generic_end_io_acct(q, bio_data_dir(s->orig_bio),
&s->d->disk->part0, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio);
@@ -734,7 +735,7 @@ static void cached_dev_read_done(struct closure *cl)
if (s->iop.bio) {
bio_reset(s->iop.bio);
s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector;
- s->iop.bio->bi_bdev = s->cache_miss->bi_bdev;
+ bio_copy_dev(s->iop.bio, s->cache_miss);
s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
bch_bio_map(s->iop.bio, NULL);
@@ -793,7 +794,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
!(bio->bi_opf & REQ_META) &&
s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA)
reada = min_t(sector_t, dc->readahead >> 9,
- bdev_sectors(bio->bi_bdev) - bio_end_sector(bio));
+ get_capacity(bio->bi_disk) - bio_end_sector(bio));
s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
@@ -819,7 +820,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
goto out_submit;
cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector;
- cache_bio->bi_bdev = miss->bi_bdev;
+ bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
cache_bio->bi_end_io = request_endio;
@@ -918,7 +919,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
dc->disk.bio_split);
- flush->bi_bdev = bio->bi_bdev;
+ bio_copy_dev(flush, bio);
flush->bi_end_io = request_endio;
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
@@ -955,13 +956,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct bio *bio)
{
struct search *s;
- struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
+ struct bcache_device *d = bio->bi_disk->private_data;
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
- generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0);
+ generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
- bio->bi_bdev = dc->bdev;
+ bio_set_dev(bio, dc->bdev);
bio->bi_iter.bi_sector += dc->sb.data_offset;
if (cached_dev_get(dc)) {
@@ -1071,10 +1072,10 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
{
struct search *s;
struct closure *cl;
- struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
+ struct bcache_device *d = bio->bi_disk->private_data;
int rw = bio_data_dir(bio);
- generic_start_io_acct(rw, bio_sectors(bio), &d->disk->part0);
+ generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
s = search_alloc(bio, d);
cl = &s->cl;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 8352fad..974d832 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -257,7 +257,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
closure_init(cl, parent);
bio_reset(bio);
- bio->bi_bdev = dc->bdev;
+ bio_set_dev(bio, dc->bdev);
bio->bi_end_io = write_bdev_super_endio;
bio->bi_private = dc;
@@ -303,7 +303,7 @@ void bcache_write_super(struct cache_set *c)
SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb));
bio_reset(bio);
- bio->bi_bdev = ca->bdev;
+ bio_set_dev(bio, ca->bdev);
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
@@ -508,7 +508,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
closure_init_stack(cl);
bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size;
- bio->bi_bdev = ca->bdev;
+ bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = bucket_bytes(ca);
bio->bi_end_io = prio_endio;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 42c66e7..c49022a 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -181,7 +181,7 @@ static void write_dirty(struct closure *cl)
dirty_init(w);
bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
io->bio.bi_iter.bi_sector = KEY_START(&w->key);
- io->bio.bi_bdev = io->dc->bdev;
+ bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
closure_bio_submit(&io->bio, cl);
@@ -250,8 +250,7 @@ static void read_dirty(struct cached_dev *dc)
dirty_init(w);
bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
- io->bio.bi_bdev = PTR_CACHE(dc->disk.c,
- &w->key, 0)->bdev;
+ bio_set_dev(&io->bio, PTR_CACHE(dc->disk.c, &w->key, 0)->bdev);
io->bio.bi_end_io = read_dirty_endio;
if (bio_alloc_pages(&io->bio, GFP_KERNEL))
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index dd36461..c82578a 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -18,21 +18,24 @@
*/
struct dm_bio_details {
- struct block_device *bi_bdev;
+ struct gendisk *bi_disk;
+ u8 bi_partno;
unsigned long bi_flags;
struct bvec_iter bi_iter;
};
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
{
- bd->bi_bdev = bio->bi_bdev;
+ bd->bi_disk = bio->bi_disk;
+ bd->bi_partno = bio->bi_partno;
bd->bi_flags = bio->bi_flags;
bd->bi_iter = bio->bi_iter;
}
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
{
- bio->bi_bdev = bd->bi_bdev;
+ bio->bi_disk = bd->bi_disk;
+ bio->bi_partno = bd->bi_partno;
bio->bi_flags = bd->bi_flags;
bio->bi_iter = bd->bi_iter;
}
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 44f4a8a..9601225 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -616,7 +616,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
b->bio.bi_iter.bi_sector = sector;
- b->bio.bi_bdev = b->c->bdev;
+ bio_set_dev(&b->bio, b->c->bdev);
b->bio.bi_end_io = inline_endio;
/*
* Use of .bi_private isn't a problem here because
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index c5ea03f..dcac25c 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -833,7 +833,7 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
*--------------------------------------------------------------*/
static void remap_to_origin(struct cache *cache, struct bio *bio)
{
- bio->bi_bdev = cache->origin_dev->bdev;
+ bio_set_dev(bio, cache->origin_dev->bdev);
}
static void remap_to_cache(struct cache *cache, struct bio *bio,
@@ -842,7 +842,7 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
sector_t bi_sector = bio->bi_iter.bi_sector;
sector_t block = from_cblock(cblock);
- bio->bi_bdev = cache->cache_dev->bdev;
+ bio_set_dev(bio, cache->cache_dev->bdev);
if (!block_size_is_power_of_two(cache))
bio->bi_iter.bi_sector =
(block * cache->sectors_per_block) +
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index fa17e54..54aef8e 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -932,9 +932,6 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio)
bip->bip_iter.bi_size = tag_len;
bip->bip_iter.bi_sector = io->cc->start + io->sector;
- /* We own the metadata, do not let bio_free to release it */
- bip->bip_flags &= ~BIP_BLOCK_INTEGRITY;
-
ret = bio_integrity_add_page(bio, virt_to_page(io->integrity_metadata),
tag_len, offset_in_page(io->integrity_metadata));
if (unlikely(ret != tag_len))
@@ -1546,7 +1543,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
- clone->bi_bdev = cc->dev->bdev;
+ bio_set_dev(clone, cc->dev->bdev);
clone->bi_opf = io->base_bio->bi_opf;
}
@@ -2795,7 +2792,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
*/
if (unlikely(bio->bi_opf & REQ_PREFLUSH ||
bio_op(bio) == REQ_OP_DISCARD)) {
- bio->bi_bdev = cc->dev->bdev;
+ bio_set_dev(bio, cc->dev->bdev);
if (bio_sectors(bio))
bio->bi_iter.bi_sector = cc->start +
dm_target_offset(ti, bio->bi_iter.bi_sector);
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index ae31587..2209a97 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -282,7 +282,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
struct delay_c *dc = ti->private;
if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
- bio->bi_bdev = dc->dev_write->bdev;
+ bio_set_dev(bio, dc->dev_write->bdev);
if (bio_sectors(bio))
bio->bi_iter.bi_sector = dc->start_write +
dm_target_offset(ti, bio->bi_iter.bi_sector);
@@ -290,7 +290,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
return delay_bio(dc, dc->write_delay, bio);
}
- bio->bi_bdev = dc->dev_read->bdev;
+ bio_set_dev(bio, dc->dev_read->bdev);
bio->bi_iter.bi_sector = dc->start_read +
dm_target_offset(ti, bio->bi_iter.bi_sector);
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index e7ba89f..ba84b8d 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1192,7 +1192,7 @@ static dm_block_t get_block(struct era *era, struct bio *bio)
static void remap_to_origin(struct era *era, struct bio *bio)
{
- bio->bi_bdev = era->origin_dev->bdev;
+ bio_set_dev(bio, era->origin_dev->bdev);
}
/*----------------------------------------------------------------
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index e2c7234..7146c2d 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -274,7 +274,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
{
struct flakey_c *fc = ti->private;
- bio->bi_bdev = fc->dev->bdev;
+ bio_set_dev(bio, fc->dev->bdev);
if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
bio->bi_iter.bi_sector =
flakey_map_sector(ti, bio->bi_iter.bi_sector);
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 3acce09..27c0f22 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -250,7 +250,8 @@ struct dm_integrity_io {
struct completion *completion;
- struct block_device *orig_bi_bdev;
+ struct gendisk *orig_bi_disk;
+ u8 orig_bi_partno;
bio_end_io_t *orig_bi_end_io;
struct bio_integrity_payload *orig_bi_integrity;
struct bvec_iter orig_bi_iter;
@@ -1164,7 +1165,8 @@ static void integrity_end_io(struct bio *bio)
struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
bio->bi_iter = dio->orig_bi_iter;
- bio->bi_bdev = dio->orig_bi_bdev;
+ bio->bi_disk = dio->orig_bi_disk;
+ bio->bi_partno = dio->orig_bi_partno;
if (dio->orig_bi_integrity) {
bio->bi_integrity = dio->orig_bi_integrity;
bio->bi_opf |= REQ_INTEGRITY;
@@ -1681,8 +1683,9 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map
dio->orig_bi_iter = bio->bi_iter;
- dio->orig_bi_bdev = bio->bi_bdev;
- bio->bi_bdev = ic->dev->bdev;
+ dio->orig_bi_disk = bio->bi_disk;
+ dio->orig_bi_partno = bio->bi_partno;
+ bio_set_dev(bio, ic->dev->bdev);
dio->orig_bi_integrity = bio_integrity(bio);
bio->bi_integrity = NULL;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2503960..b4357ed4 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -347,7 +347,7 @@ static void do_region(int op, int op_flags, unsigned region,
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
- bio->bi_bdev = where->bdev;
+ bio_set_dev(bio, where->bdev);
bio->bi_end_io = endio;
bio_set_op_attrs(bio, op, op_flags);
store_io_and_region_in_bio(bio, io, region);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 41971a0..405eca2 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -88,7 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
{
struct linear_c *lc = ti->private;
- bio->bi_bdev = lc->dev->bdev;
+ bio_set_dev(bio, lc->dev->bdev);
if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
bio->bi_iter.bi_sector =
linear_map_sector(ti, bio->bi_iter.bi_sector);
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index a1da0eb..534a254 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -198,7 +198,7 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
}
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = lc->logdev->bdev;
+ bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -263,7 +263,7 @@ static int log_one_block(struct log_writes_c *lc,
}
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = lc->logdev->bdev;
+ bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -285,7 +285,7 @@ static int log_one_block(struct log_writes_c *lc,
}
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = lc->logdev->bdev;
+ bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -539,7 +539,7 @@ static void normal_map_bio(struct dm_target *ti, struct bio *bio)
{
struct log_writes_c *lc = ti->private;
- bio->bi_bdev = lc->dev->bdev;
+ bio_set_dev(bio, lc->dev->bdev);
}
static int log_writes_map(struct dm_target *ti, struct bio *bio)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d24e4b0..96aedaa 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -565,7 +565,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
mpio->nr_bytes = nr_bytes;
bio->bi_status = 0;
- bio->bi_bdev = pgpath->path.dev->bdev;
+ bio_set_dev(bio, pgpath->path.dev->bdev);
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
if (pgpath->pg->ps.type->start_io)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index a4fbd91..c0b8213 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,7 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
struct dm_raid1_bio_record {
struct mirror *m;
- /* if details->bi_bdev == NULL, details were not saved */
+ /* if details->bi_disk == NULL, details were not saved */
struct dm_bio_details details;
region_t write_region;
};
@@ -464,7 +464,7 @@ static sector_t map_sector(struct mirror *m, struct bio *bio)
static void map_bio(struct mirror *m, struct bio *bio)
{
- bio->bi_bdev = m->dev->bdev;
+ bio_set_dev(bio, m->dev->bdev);
bio->bi_iter.bi_sector = map_sector(m, bio);
}
@@ -1199,7 +1199,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
struct dm_raid1_bio_record *bio_record =
dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
- bio_record->details.bi_bdev = NULL;
+ bio_record->details.bi_disk = NULL;
if (rw == WRITE) {
/* Save region for mirror_end_io() handler */
@@ -1266,7 +1266,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
goto out;
if (unlikely(*error)) {
- if (!bio_record->details.bi_bdev) {
+ if (!bio_record->details.bi_disk) {
/*
* There wasn't enough memory to record necessary
* information for a retry or there was no other
@@ -1291,7 +1291,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
bd = &bio_record->details;
dm_bio_restore(bd, bio);
- bio_record->details.bi_bdev = NULL;
+ bio_record->details.bi_disk = NULL;
bio->bi_status = 0;
queue_bio(ms, bio, rw);
@@ -1301,7 +1301,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
}
out:
- bio_record->details.bi_bdev = NULL;
+ bio_record->details.bi_disk = NULL;
return DM_ENDIO_DONE;
}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 1ba4104..1113b42 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1663,7 +1663,7 @@ __find_pending_exception(struct dm_snapshot *s,
static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
struct bio *bio, chunk_t chunk)
{
- bio->bi_bdev = s->cow->bdev;
+ bio_set_dev(bio, s->cow->bdev);
bio->bi_iter.bi_sector =
chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
(chunk - e->old_chunk)) +
@@ -1681,7 +1681,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
init_tracked_chunk(bio);
if (bio->bi_opf & REQ_PREFLUSH) {
- bio->bi_bdev = s->cow->bdev;
+ bio_set_dev(bio, s->cow->bdev);
return DM_MAPIO_REMAPPED;
}
@@ -1769,7 +1769,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
goto out;
}
} else {
- bio->bi_bdev = s->origin->bdev;
+ bio_set_dev(bio, s->origin->bdev);
track_chunk(s, bio, chunk);
}
@@ -1802,9 +1802,9 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
if (bio->bi_opf & REQ_PREFLUSH) {
if (!dm_bio_get_target_bio_nr(bio))
- bio->bi_bdev = s->origin->bdev;
+ bio_set_dev(bio, s->origin->bdev);
else
- bio->bi_bdev = s->cow->bdev;
+ bio_set_dev(bio, s->cow->bdev);
return DM_MAPIO_REMAPPED;
}
@@ -1824,7 +1824,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
chunk >= s->first_merging_chunk &&
chunk < (s->first_merging_chunk +
s->num_merging_chunks)) {
- bio->bi_bdev = s->origin->bdev;
+ bio_set_dev(bio, s->origin->bdev);
bio_list_add(&s->bios_queued_during_merge, bio);
r = DM_MAPIO_SUBMITTED;
goto out_unlock;
@@ -1838,7 +1838,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
}
redirect_to_origin:
- bio->bi_bdev = s->origin->bdev;
+ bio_set_dev(bio, s->origin->bdev);
if (bio_data_dir(bio) == WRITE) {
up_write(&s->lock);
@@ -2285,7 +2285,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
struct dm_origin *o = ti->private;
unsigned available_sectors;
- bio->bi_bdev = o->dev->bdev;
+ bio_set_dev(bio, o->dev->bdev);
if (unlikely(bio->bi_opf & REQ_PREFLUSH))
return DM_MAPIO_REMAPPED;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index a037553..ab50d7c 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -270,7 +270,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
stripe_map_range_sector(sc, bio_end_sector(bio),
target_stripe, &end);
if (begin < end) {
- bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
+ bio_set_dev(bio, sc->stripe[target_stripe].dev->bdev);
bio->bi_iter.bi_sector = begin +
sc->stripe[target_stripe].physical_start;
bio->bi_iter.bi_size = to_bytes(end - begin);
@@ -291,7 +291,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
if (bio->bi_opf & REQ_PREFLUSH) {
target_bio_nr = dm_bio_get_target_bio_nr(bio);
BUG_ON(target_bio_nr >= sc->stripes);
- bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev;
+ bio_set_dev(bio, sc->stripe[target_bio_nr].dev->bdev);
return DM_MAPIO_REMAPPED;
}
if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
@@ -306,7 +306,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
&stripe, &bio->bi_iter.bi_sector);
bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start;
- bio->bi_bdev = sc->stripe[stripe].dev->bdev;
+ bio_set_dev(bio, sc->stripe[stripe].dev->bdev);
return DM_MAPIO_REMAPPED;
}
@@ -430,9 +430,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
return DM_ENDIO_DONE;
memset(major_minor, 0, sizeof(major_minor));
- sprintf(major_minor, "%d:%d",
- MAJOR(disk_devt(bio->bi_bdev->bd_disk)),
- MINOR(disk_devt(bio->bi_bdev->bd_disk)));
+ sprintf(major_minor, "%d:%d", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)));
/*
* Test to see which stripe drive triggered the event
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 871c18f..2dcea4c 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -322,7 +322,7 @@ static int switch_map(struct dm_target *ti, struct bio *bio)
sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
unsigned path_nr = switch_get_path_nr(sctx, offset);
- bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev;
+ bio_set_dev(bio, sctx->path_list[path_nr].dmdev->bdev);
bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
return DM_MAPIO_REMAPPED;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 9dec2f8..69d88ae 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -679,7 +679,7 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
struct pool *pool = tc->pool;
sector_t bi_sector = bio->bi_iter.bi_sector;
- bio->bi_bdev = tc->pool_dev->bdev;
+ bio_set_dev(bio, tc->pool_dev->bdev);
if (block_size_is_power_of_two(pool))
bio->bi_iter.bi_sector =
(block << pool->sectors_per_block_shift) |
@@ -691,7 +691,7 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
static void remap_to_origin(struct thin_c *tc, struct bio *bio)
{
- bio->bi_bdev = tc->origin_dev->bdev;
+ bio_set_dev(bio, tc->origin_dev->bdev);
}
static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
@@ -3313,7 +3313,7 @@ static int pool_map(struct dm_target *ti, struct bio *bio)
* As this is a singleton target, ti->begin is always zero.
*/
spin_lock_irqsave(&pool->lock, flags);
- bio->bi_bdev = pt->data_dev->bdev;
+ bio_set_dev(bio, pt->data_dev->bdev);
r = DM_MAPIO_REMAPPED;
spin_unlock_irqrestore(&pool->lock, flags);
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index b46705e..1c5b618 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -637,7 +637,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
struct dm_verity *v = ti->private;
struct dm_verity_io *io;
- bio->bi_bdev = v->data_dev->bdev;
+ bio_set_dev(bio, v->data_dev->bdev);
bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index a4fa2ad..70485de 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -409,7 +409,7 @@ static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd,
}
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio->bi_bdev = zmd->dev->bdev;
+ bio_set_dev(bio, zmd->dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO);
@@ -564,7 +564,7 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
set_bit(DMZ_META_WRITING, &mblk->state);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio->bi_bdev = zmd->dev->bdev;
+ bio_set_dev(bio, zmd->dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
@@ -586,7 +586,7 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
return -ENOMEM;
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio->bi_bdev = zmd->dev->bdev;
+ bio_set_dev(bio, zmd->dev->bdev);
bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO);
bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
ret = submit_bio_wait(bio);
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index b08bbbd..b87c174 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -238,7 +238,7 @@ static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
/* Setup and submit the BIO */
- bio->bi_bdev = dmz->dev->bdev;
+ bio_set_dev(bio, dmz->dev->bdev);
bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
atomic_inc(&bioctx->ref);
generic_make_request(bio);
@@ -586,7 +586,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
(unsigned long long)dmz_chunk_block(dmz->dev, dmz_bio_block(bio)),
(unsigned int)dmz_bio_blocks(bio));
- bio->bi_bdev = dev->bdev;
+ bio_set_dev(bio, dev->bdev);
if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE)
return DM_MAPIO_REMAPPED;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d669fdd..04ae795 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -510,7 +510,7 @@ static void start_io_acct(struct dm_io *io)
io->start_time = jiffies;
cpu = part_stat_lock();
- part_round_stats(cpu, &dm_disk(md)->part0);
+ part_round_stats(md->queue, cpu, &dm_disk(md)->part0);
part_stat_unlock();
atomic_set(&dm_disk(md)->part0.in_flight[rw],
atomic_inc_return(&md->pending[rw]));
@@ -529,7 +529,7 @@ static void end_io_acct(struct dm_io *io)
int pending;
int rw = bio_data_dir(bio);
- generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
+ generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time);
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
@@ -841,10 +841,10 @@ static void clone_endio(struct bio *bio)
if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_WRITE_SAME &&
- !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
+ !bio->bi_disk->queue->limits.max_write_same_sectors)
disable_write_same(md);
if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
- !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+ !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
disable_write_zeroes(md);
}
@@ -1205,8 +1205,8 @@ static void __map_bio(struct dm_target_io *tio)
break;
case DM_MAPIO_REMAPPED:
/* the bio has been remapped so dispatch it */
- trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
- tio->io->bio->bi_bdev->bd_dev, sector);
+ trace_block_bio_remap(clone->bi_disk->queue, clone,
+ bio_dev(tio->io->bio), sector);
generic_make_request(clone);
break;
case DM_MAPIO_KILL:
@@ -1532,7 +1532,7 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
map = dm_get_live_table(md, &srcu_idx);
- generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
+ generic_start_io_acct(q, rw, bio_sectors(bio), &dm_disk(md)->part0);
/* if we're suspended, we have to queue this io for later */
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
@@ -1786,7 +1786,7 @@ static struct mapped_device *alloc_dev(int minor)
goto bad;
bio_init(&md->flush_bio, NULL, 0);
- md->flush_bio.bi_bdev = md->bdev;
+ bio_set_dev(&md->flush_bio, md->bdev);
md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
dm_stats_init(&md->stats);
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 06a64d5..38264b3 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -216,12 +216,12 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
if (failit) {
struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
- b->bi_bdev = conf->rdev->bdev;
+ bio_set_dev(b, conf->rdev->bdev);
b->bi_private = bio;
b->bi_end_io = faulty_fail;
bio = b;
} else
- bio->bi_bdev = conf->rdev->bdev;
+ bio_set_dev(bio, conf->rdev->bdev);
generic_make_request(bio);
return true;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 5f1eb91..c464fb4 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -275,17 +275,17 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio = split;
}
- bio->bi_bdev = tmp_dev->rdev->bdev;
+ bio_set_dev(bio, tmp_dev->rdev->bdev);
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
start_sector + data_offset;
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
+ !blk_queue_discard(bio->bi_disk->queue))) {
/* Just ignore it */
bio_endio(bio);
} else {
if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
+ trace_block_bio_remap(bio->bi_disk->queue,
bio, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_writesame(mddev, bio);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b01e458..078c6f3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -422,7 +422,7 @@ static void submit_flushes(struct work_struct *ws)
bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
bi->bi_end_io = md_end_flush;
bi->bi_private = rdev;
- bi->bi_bdev = rdev->bdev;
+ bio_set_dev(bi, rdev->bdev);
bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
atomic_inc(&mddev->flush_pending);
submit_bio(bi);
@@ -772,7 +772,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
atomic_inc(&rdev->nr_pending);
- bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
+ bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
bio->bi_iter.bi_sector = sector;
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
@@ -803,8 +803,10 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
struct bio *bio = md_bio_alloc_sync(rdev->mddev);
int ret;
- bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
- rdev->meta_bdev : rdev->bdev;
+ if (metadata_op && rdev->meta_bdev)
+ bio_set_dev(bio, rdev->meta_bdev);
+ else
+ bio_set_dev(bio, rdev->bdev);
bio_set_op_attrs(bio, op, op_flags);
if (metadata_op)
bio->bi_iter.bi_sector = sector + rdev->sb_start;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 09db034..c0d436f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -509,6 +509,11 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect
atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
}
+static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
+{
+ atomic_add(nr_sectors, &bio->bi_disk->sync_io);
+}
+
struct md_personality
{
char *name;
@@ -721,14 +726,14 @@ static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
{
if (bio_op(bio) == REQ_OP_WRITE_SAME &&
- !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
+ !bio->bi_disk->queue->limits.max_write_same_sectors)
mddev->queue->limits.max_write_same_sectors = 0;
}
static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
{
if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
- !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
+ !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
mddev->queue->limits.max_write_zeroes_sectors = 0;
}
#endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 23a162b..b68e066 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -134,7 +134,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
__bio_clone_fast(&mp_bh->bio, bio);
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
- mp_bh->bio.bi_bdev = multipath->rdev->bdev;
+ bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
mp_bh->bio.bi_end_io = multipath_end_request;
mp_bh->bio.bi_private = mp_bh;
@@ -345,17 +345,17 @@ static void multipathd(struct md_thread *thread)
if ((mp_bh->path = multipath_map (conf))<0) {
pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
- bdevname(bio->bi_bdev,b),
+ bio_devname(bio, b),
(unsigned long long)bio->bi_iter.bi_sector);
multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
} else {
pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
- bdevname(bio->bi_bdev,b),
+ bio_devname(bio, b),
(unsigned long long)bio->bi_iter.bi_sector);
*bio = *(mp_bh->master_bio);
bio->bi_iter.bi_sector +=
conf->multipaths[mp_bh->path].rdev->data_offset;
- bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
+ bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
bio->bi_end_io = multipath_end_request;
bio->bi_private = mp_bh;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 94d9ae9..05a4521 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -588,14 +588,13 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
zone = find_zone(mddev->private, §or);
tmp_dev = map_sector(mddev, zone, sector, §or);
- bio->bi_bdev = tmp_dev->bdev;
+ bio_set_dev(bio, tmp_dev->bdev);
bio->bi_iter.bi_sector = sector + zone->dev_start +
tmp_dev->data_offset;
if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
- bio, disk_devt(mddev->gendisk),
- bio_sector);
+ trace_block_bio_remap(bio->bi_disk->queue, bio,
+ disk_devt(mddev->gendisk), bio_sector);
mddev_check_writesame(mddev, bio);
mddev_check_write_zeroes(mddev, bio);
generic_make_request(bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f50958d..baf5e35 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -786,13 +786,13 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
- struct md_rdev *rdev = (void*)bio->bi_bdev;
+ struct md_rdev *rdev = (void *)bio->bi_disk;
bio->bi_next = NULL;
- bio->bi_bdev = rdev->bdev;
+ bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
bio_io_error(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ !blk_queue_discard(bio->bi_disk->queue)))
/* Just ignore it */
bio_endio(bio);
else
@@ -1273,7 +1273,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
- read_bio->bi_bdev = mirror->rdev->bdev;
+ bio_set_dev(read_bio, mirror->rdev->bdev);
read_bio->bi_end_io = raid1_end_read_request;
bio_set_op_attrs(read_bio, op, do_sync);
if (test_bit(FailFast, &mirror->rdev->flags) &&
@@ -1282,9 +1282,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_private = r1_bio;
if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
- read_bio, disk_devt(mddev->gendisk),
- r1_bio->sector);
+ trace_block_bio_remap(read_bio->bi_disk->queue, read_bio,
+ disk_devt(mddev->gendisk), r1_bio->sector);
generic_make_request(read_bio);
}
@@ -1496,7 +1495,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
mbio->bi_iter.bi_sector = (r1_bio->sector +
conf->mirrors[i].rdev->data_offset);
- mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ bio_set_dev(mbio, conf->mirrors[i].rdev->bdev);
mbio->bi_end_io = raid1_end_write_request;
mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA));
if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) &&
@@ -1508,11 +1507,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
atomic_inc(&r1_bio->remaining);
if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
+ trace_block_bio_remap(mbio->bi_disk->queue,
mbio, disk_devt(mddev->gendisk),
r1_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
- mbio->bi_bdev = (void*)conf->mirrors[i].rdev;
+ mbio->bi_disk = (void *)conf->mirrors[i].rdev;
cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
if (cb)
@@ -1990,8 +1989,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
* Don't fail devices as that won't really help.
*/
pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
- mdname(mddev),
- bdevname(bio->bi_bdev, b),
+ mdname(mddev), bio_devname(bio, b),
(unsigned long long)r1_bio->sector);
for (d = 0; d < conf->raid_disks * 2; d++) {
rdev = conf->mirrors[d].rdev;
@@ -2082,7 +2080,7 @@ static void process_checks(struct r1bio *r1_bio)
b->bi_status = status;
b->bi_iter.bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
- b->bi_bdev = conf->mirrors[i].rdev->bdev;
+ bio_set_dev(b, conf->mirrors[i].rdev->bdev);
b->bi_end_io = end_sync_read;
rp->raid_bio = r1_bio;
b->bi_private = rp;
@@ -2350,7 +2348,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
bio_trim(wbio, sector - r1_bio->sector, sectors);
wbio->bi_iter.bi_sector += rdev->data_offset;
- wbio->bi_bdev = rdev->bdev;
+ bio_set_dev(wbio, rdev->bdev);
if (submit_bio_wait(wbio) < 0)
/* failure! */
@@ -2440,7 +2438,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
struct mddev *mddev = conf->mddev;
struct bio *bio;
struct md_rdev *rdev;
- dev_t bio_dev;
sector_t bio_sector;
clear_bit(R1BIO_ReadError, &r1_bio->state);
@@ -2454,7 +2451,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
*/
bio = r1_bio->bios[r1_bio->read_disk];
- bio_dev = bio->bi_bdev->bd_dev;
bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector;
bio_put(bio);
r1_bio->bios[r1_bio->read_disk] = NULL;
@@ -2727,7 +2723,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (bio->bi_end_io) {
atomic_inc(&rdev->nr_pending);
bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
+ bio_set_dev(bio, rdev->bdev);
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
}
@@ -2853,7 +2849,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r1_bio->bios[i];
if (bio->bi_end_io == end_sync_read) {
read_targets--;
- md_sync_acct(bio->bi_bdev, nr_sectors);
+ md_sync_acct_bio(bio, nr_sectors);
if (read_targets == 1)
bio->bi_opf &= ~MD_FAILFAST;
generic_make_request(bio);
@@ -2862,7 +2858,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
} else {
atomic_set(&r1_bio->remaining, 1);
bio = r1_bio->bios[r1_bio->read_disk];
- md_sync_acct(bio->bi_bdev, nr_sectors);
+ md_sync_acct_bio(bio, nr_sectors);
if (read_targets == 1)
bio->bi_opf &= ~MD_FAILFAST;
generic_make_request(bio);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f55d4cc..d1f948e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -901,13 +901,13 @@ static void flush_pending_writes(struct r10conf *conf)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
- struct md_rdev *rdev = (void*)bio->bi_bdev;
+ struct md_rdev *rdev = (void*)bio->bi_disk;
bio->bi_next = NULL;
- bio->bi_bdev = rdev->bdev;
+ bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
bio_io_error(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ !blk_queue_discard(bio->bi_disk->queue)))
/* Just ignore it */
bio_endio(bio);
else
@@ -1085,13 +1085,13 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
- struct md_rdev *rdev = (void*)bio->bi_bdev;
+ struct md_rdev *rdev = (void*)bio->bi_disk;
bio->bi_next = NULL;
- bio->bi_bdev = rdev->bdev;
+ bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
bio_io_error(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ !blk_queue_discard(bio->bi_disk->queue)))
/* Just ignore it */
bio_endio(bio);
else
@@ -1200,7 +1200,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
- read_bio->bi_bdev = rdev->bdev;
+ bio_set_dev(read_bio, rdev->bdev);
read_bio->bi_end_io = raid10_end_read_request;
bio_set_op_attrs(read_bio, op, do_sync);
if (test_bit(FailFast, &rdev->flags) &&
@@ -1209,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_private = r10_bio;
if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
+ trace_block_bio_remap(read_bio->bi_disk->queue,
read_bio, disk_devt(mddev->gendisk),
r10_bio->sector);
generic_make_request(read_bio);
@@ -1249,7 +1249,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
- mbio->bi_bdev = rdev->bdev;
+ bio_set_dev(mbio, rdev->bdev);
mbio->bi_end_io = raid10_end_write_request;
bio_set_op_attrs(mbio, op, do_sync | do_fua);
if (!replacement && test_bit(FailFast,
@@ -1259,11 +1259,11 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_private = r10_bio;
if (conf->mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
+ trace_block_bio_remap(mbio->bi_disk->queue,
mbio, disk_devt(conf->mddev->gendisk),
r10_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
- mbio->bi_bdev = (void *)rdev;
+ mbio->bi_disk = (void *)rdev;
atomic_inc(&r10_bio->remaining);
@@ -2094,7 +2094,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
tbio->bi_opf |= MD_FAILFAST;
tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
- tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
+ bio_set_dev(tbio, conf->mirrors[d].rdev->bdev);
generic_make_request(tbio);
}
@@ -2552,7 +2552,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
wbio->bi_iter.bi_sector = wsector +
choose_data_offset(r10_bio, rdev);
- wbio->bi_bdev = rdev->bdev;
+ bio_set_dev(wbio, rdev->bdev);
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
if (submit_bio_wait(wbio) < 0)
@@ -2575,7 +2575,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
struct bio *bio;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev = r10_bio->devs[slot].rdev;
- dev_t bio_dev;
sector_t bio_last_sector;
/* we got a read error. Maybe the drive is bad. Maybe just
@@ -2587,7 +2586,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
* frozen.
*/
bio = r10_bio->devs[slot].bio;
- bio_dev = bio->bi_bdev->bd_dev;
bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors;
bio_put(bio);
r10_bio->devs[slot].bio = NULL;
@@ -2950,7 +2948,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Again, very different code for resync and recovery.
* Both must result in an r10bio with a list of bios that
- * have bi_end_io, bi_sector, bi_bdev set,
+ * have bi_end_io, bi_sector, bi_disk set,
* and bi_private set to the r10bio.
* For recovery, we may actually create several r10bios
* with 2 bios in each, that correspond to the bios in the main one.
@@ -3095,7 +3093,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
from_addr = r10_bio->devs[j].addr;
bio->bi_iter.bi_sector = from_addr +
rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
+ bio_set_dev(bio, rdev->bdev);
atomic_inc(&rdev->nr_pending);
/* and we write to 'i' (if not in_sync) */
@@ -3117,7 +3115,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_iter.bi_sector = to_addr
+ mrdev->data_offset;
- bio->bi_bdev = mrdev->bdev;
+ bio_set_dev(bio, mrdev->bdev);
atomic_inc(&r10_bio->remaining);
} else
r10_bio->devs[1].bio->bi_end_io = NULL;
@@ -3143,7 +3141,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_iter.bi_sector = to_addr +
mreplace->data_offset;
- bio->bi_bdev = mreplace->bdev;
+ bio_set_dev(bio, mreplace->bdev);
atomic_inc(&r10_bio->remaining);
break;
}
@@ -3289,7 +3287,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
bio->bi_iter.bi_sector = sector + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
+ bio_set_dev(bio, rdev->bdev);
count++;
rdev = rcu_dereference(conf->mirrors[d].replacement);
@@ -3311,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (test_bit(FailFast, &rdev->flags))
bio->bi_opf |= MD_FAILFAST;
bio->bi_iter.bi_sector = sector + rdev->data_offset;
- bio->bi_bdev = rdev->bdev;
+ bio_set_dev(bio, rdev->bdev);
count++;
rcu_read_unlock();
}
@@ -3367,7 +3365,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->sectors = nr_sectors;
if (bio->bi_end_io == end_sync_read) {
- md_sync_acct(bio->bi_bdev, nr_sectors);
+ md_sync_acct_bio(bio, nr_sectors);
bio->bi_status = 0;
generic_make_request(bio);
}
@@ -4383,7 +4381,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
- read_bio->bi_bdev = rdev->bdev;
+ bio_set_dev(read_bio, rdev->bdev);
read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset);
read_bio->bi_private = r10_bio;
@@ -4417,7 +4415,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
continue;
- b->bi_bdev = rdev2->bdev;
+ bio_set_dev(b, rdev2->bdev);
b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
rdev2->new_data_offset;
b->bi_end_io = end_reshape_write;
@@ -4449,7 +4447,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->sectors = nr_sectors;
/* Now submit the read */
- md_sync_acct(read_bio->bi_bdev, r10_bio->sectors);
+ md_sync_acct_bio(read_bio, r10_bio->sectors);
atomic_inc(&r10_bio->remaining);
read_bio->bi_next = NULL;
generic_make_request(read_bio);
@@ -4511,7 +4509,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
}
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- md_sync_acct(b->bi_bdev, r10_bio->sectors);
+ md_sync_acct_bio(b, r10_bio->sectors);
atomic_inc(&r10_bio->remaining);
b->bi_next = NULL;
generic_make_request(b);
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 2dcbafa..5d7da1c 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -745,7 +745,7 @@ static struct bio *r5l_bio_alloc(struct r5l_log *log)
struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- bio->bi_bdev = log->rdev->bdev;
+ bio_set_dev(bio, log->rdev->bdev);
bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start;
return bio;
@@ -1313,7 +1313,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log)
if (!do_flush)
return;
bio_reset(&log->flush_bio);
- log->flush_bio.bi_bdev = log->rdev->bdev;
+ bio_set_dev(&log->flush_bio, log->rdev->bdev);
log->flush_bio.bi_end_io = r5l_log_flush_endio;
log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
submit_bio(&log->flush_bio);
@@ -1691,7 +1691,7 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log,
sector_t offset)
{
bio_reset(ctx->ra_bio);
- ctx->ra_bio->bi_bdev = log->rdev->bdev;
+ bio_set_dev(ctx->ra_bio, log->rdev->bdev);
bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0);
ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset;
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 44ad5ba..1e237c4 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -415,7 +415,7 @@ static void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio)
pr_debug("%s: seq: %llu size: %u sector: %llu dev: %s\n",
__func__, io->seq, bio->bi_iter.bi_size,
(unsigned long long)bio->bi_iter.bi_sector,
- bdevname(bio->bi_bdev, b));
+ bio_devname(bio, b));
submit_bio(bio);
}
@@ -453,7 +453,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
bio->bi_end_io = ppl_log_endio;
bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
- bio->bi_bdev = log->rdev->bdev;
+ bio_set_dev(bio, log->rdev->bdev);
bio->bi_iter.bi_sector = log->rdev->ppl.sector;
bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
@@ -468,7 +468,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
ppl_conf->bs);
bio->bi_opf = prev->bi_opf;
- bio->bi_bdev = prev->bi_bdev;
+ bio_copy_dev(bio, prev);
bio->bi_iter.bi_sector = bio_end_sector(prev);
bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0fc2748..3ae8bbc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1096,7 +1096,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
set_bit(STRIPE_IO_STARTED, &sh->state);
- bi->bi_bdev = rdev->bdev;
+ bio_set_dev(bi, rdev->bdev);
bio_set_op_attrs(bi, op, op_flags);
bi->bi_end_io = op_is_write(op)
? raid5_end_write_request
@@ -1145,7 +1145,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
if (conf->mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+ trace_block_bio_remap(bi->bi_disk->queue,
bi, disk_devt(conf->mddev->gendisk),
sh->dev[i].sector);
if (should_defer && op_is_write(op))
@@ -1160,7 +1160,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
set_bit(STRIPE_IO_STARTED, &sh->state);
- rbi->bi_bdev = rrdev->bdev;
+ bio_set_dev(rbi, rrdev->bdev);
bio_set_op_attrs(rbi, op, op_flags);
BUG_ON(!op_is_write(op));
rbi->bi_end_io = raid5_end_write_request;
@@ -1193,7 +1193,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (op == REQ_OP_DISCARD)
rbi->bi_vcnt = 0;
if (conf->mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+ trace_block_bio_remap(rbi->bi_disk->queue,
rbi, disk_devt(conf->mddev->gendisk),
sh->dev[i].sector);
if (should_defer && op_is_write(op))
@@ -5092,10 +5092,12 @@ static int raid5_congested(struct mddev *mddev, int bits)
static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
{
struct r5conf *conf = mddev->private;
- sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bio->bi_iter.bi_sector;
unsigned int chunk_sectors;
unsigned int bio_sectors = bio_sectors(bio);
+ WARN_ON_ONCE(bio->bi_partno);
+
chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
return chunk_sectors >=
((sector & (chunk_sectors - 1)) + bio_sectors);
@@ -5231,7 +5233,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
raid_bio->bi_next = (void*)rdev;
- align_bi->bi_bdev = rdev->bdev;
+ bio_set_dev(align_bi, rdev->bdev);
bio_clear_flag(align_bi, BIO_SEG_VALID);
if (is_badblock(rdev, align_bi->bi_iter.bi_sector,
@@ -5253,7 +5255,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
spin_unlock_irq(&conf->device_lock);
if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+ trace_block_bio_remap(align_bi->bi_disk->queue,
align_bi, disk_devt(mddev->gendisk),
raid_bio->bi_iter.bi_sector);
generic_make_request(align_bi);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index e1b5715..a87f793 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -390,21 +390,22 @@ int nd_region_activate(struct nd_region *nd_region);
void __nd_iostat_start(struct bio *bio, unsigned long *start);
static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
{
- struct gendisk *disk = bio->bi_bdev->bd_disk;
+ struct gendisk *disk = bio->bi_disk;
if (!blk_queue_io_stat(disk->queue))
return false;
*start = jiffies;
- generic_start_io_acct(bio_data_dir(bio),
+ generic_start_io_acct(disk->queue, bio_data_dir(bio),
bio_sectors(bio), &disk->part0);
return true;
}
static inline void nd_iostat_end(struct bio *bio, unsigned long start)
{
- struct gendisk *disk = bio->bi_bdev->bd_disk;
+ struct gendisk *disk = bio->bi_disk;
- generic_end_io_acct(bio_data_dir(bio), &disk->part0, start);
+ generic_end_io_acct(disk->queue, bio_data_dir(bio), &disk->part0,
+ start);
}
static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
unsigned int len)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37046ac..c596dd3c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -613,11 +613,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
if (!disk)
goto submit;
- bio->bi_bdev = bdget_disk(disk, 0);
- if (!bio->bi_bdev) {
- ret = -ENODEV;
- goto out_unmap;
- }
+ bio->bi_disk = disk;
if (meta_buffer && meta_len) {
struct bio_integrity_payload *bip;
@@ -668,11 +664,8 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
out_free_meta:
kfree(meta);
out_unmap:
- if (bio) {
- if (disk && bio->bi_bdev)
- bdput(bio->bi_bdev);
+ if (bio)
blk_rq_unmap_user(bio);
- }
out:
blk_mq_free_request(req);
return ret;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5c2a08e..1438be6 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2168,7 +2168,6 @@ static const struct blk_mq_ops nvme_fc_mq_ops = {
.complete = nvme_fc_complete_rq,
.init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
- .reinit_request = nvme_fc_reinit_request,
.init_hctx = nvme_fc_init_hctx,
.poll = nvme_fc_poll,
.timeout = nvme_fc_timeout,
@@ -2269,7 +2268,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
nvme_fc_init_io_queues(ctrl);
- ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+ ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request);
if (ret)
goto out_free_io_queues;
@@ -2655,7 +2654,6 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
.complete = nvme_fc_complete_rq,
.init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
- .reinit_request = nvme_fc_reinit_request,
.init_hctx = nvme_fc_init_admin_hctx,
.timeout = nvme_fc_timeout,
};
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index be85413..c1a2856 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -643,17 +643,9 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
}
- if (!disk)
- goto submit;
-
- bio->bi_bdev = bdget_disk(disk, 0);
- if (!bio->bi_bdev) {
- ret = -ENODEV;
- goto err_meta;
- }
+ bio->bi_disk = disk;
}
-submit:
blk_execute_rq(q, NULL, rq, 0);
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
@@ -673,11 +665,8 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
if (meta_buf && meta_len)
dma_pool_free(dev->dma_pool, metadata, metadata_dma);
err_map:
- if (bio) {
- if (disk && bio->bi_bdev)
- bdput(bio->bi_bdev);
+ if (bio)
blk_rq_unmap_user(bio);
- }
err_ppa:
if (ppa_buf && ppa_len)
dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index a7f7d0a..bf42d31 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -711,14 +711,16 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ctrl->ctrl.queue_count > 1) {
nvme_rdma_free_io_queues(ctrl);
- ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+ ret = blk_mq_reinit_tagset(&ctrl->tag_set,
+ nvme_rdma_reinit_request);
if (ret)
goto requeue;
}
nvme_rdma_stop_and_free_queue(&ctrl->queues[0]);
- ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set);
+ ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
+ nvme_rdma_reinit_request);
if (ret)
goto requeue;
@@ -1521,7 +1523,6 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
.complete = nvme_rdma_complete_rq,
.init_request = nvme_rdma_init_request,
.exit_request = nvme_rdma_exit_request,
- .reinit_request = nvme_rdma_reinit_request,
.init_hctx = nvme_rdma_init_hctx,
.poll = nvme_rdma_poll,
.timeout = nvme_rdma_timeout,
@@ -1533,7 +1534,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
.complete = nvme_rdma_complete_rq,
.init_request = nvme_rdma_init_request,
.exit_request = nvme_rdma_exit_request,
- .reinit_request = nvme_rdma_reinit_request,
.init_hctx = nvme_rdma_init_admin_hctx,
.timeout = nvme_rdma_timeout,
};
@@ -1731,7 +1731,8 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
}
if (ctrl->ctrl.queue_count > 1) {
- ret = blk_mq_reinit_tagset(&ctrl->tag_set);
+ ret = blk_mq_reinit_tagset(&ctrl->tag_set,
+ nvme_rdma_reinit_request);
if (ret)
goto del_dead_ctrl;
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 3b4d47a..0d4c23d 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -68,7 +68,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
nvmet_inline_bio_init(req);
bio = &req->inline_bio;
- bio->bi_bdev = req->ns->bdev;
+ bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
@@ -80,7 +80,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
struct bio *prev = bio;
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
- bio->bi_bdev = req->ns->bdev;
+ bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio_set_op_attrs(bio, op, op_flags);
@@ -104,7 +104,7 @@ static void nvmet_execute_flush(struct nvmet_req *req)
nvmet_inline_bio_init(req);
bio = &req->inline_bio;
- bio->bi_bdev = req->ns->bdev;
+ bio_set_dev(bio, req->ns->bdev);
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 68bae4f..7abb240 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -856,14 +856,14 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
blk_queue_split(q, &bio);
bytes_done = 0;
- dev_info = bio->bi_bdev->bd_disk->private_data;
+ dev_info = bio->bi_disk->private_data;
if (dev_info == NULL)
goto fail;
if ((bio->bi_iter.bi_sector & 7) != 0 ||
(bio->bi_iter.bi_size & 4095) != 0)
/* Request is not page-aligned. */
goto fail;
- if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
+ if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) {
/* Request beyond end of DCSS segment. */
goto fail;
}
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index a48f0d4..571a070 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -183,7 +183,7 @@ static unsigned long xpram_highest_page_index(void)
*/
static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
{
- xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
+ xpram_device_t *xdev = bio->bi_disk->private_data;
struct bio_vec bvec;
struct bvec_iter iter;
unsigned int index;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index ee7c7fa..07c814c 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -338,7 +338,7 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op,
return NULL;
}
- bio->bi_bdev = ib_dev->ibd_bd;
+ bio_set_dev(bio, ib_dev->ibd_bd);
bio->bi_private = cmd;
bio->bi_end_io = &iblock_bio_done;
bio->bi_iter.bi_sector = lba;
@@ -395,7 +395,7 @@ iblock_execute_sync_cache(struct se_cmd *cmd)
bio = bio_alloc(GFP_KERNEL, 0);
bio->bi_end_io = iblock_end_io_flush;
- bio->bi_bdev = ib_dev->ibd_bd;
+ bio_set_dev(bio, ib_dev->ibd_bd);
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
if (!immed)
bio->bi_private = cmd;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9941dc8..bb715b2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -223,7 +223,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
}
bio_init(&bio, vecs, nr_pages);
- bio.bi_bdev = bdev;
+ bio_set_dev(&bio, bdev);
bio.bi_iter.bi_sector = pos >> 9;
bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
@@ -362,7 +362,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
blk_start_plug(&plug);
for (;;) {
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
@@ -1451,6 +1451,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
+ bdev->bd_partno = partno;
if (!partno) {
ret = -ENXIO;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 11d37c9..fb07e3c 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -296,8 +296,7 @@ static void btrfsic_dev_state_hashtable_add(
struct btrfsic_dev_state *ds,
struct btrfsic_dev_state_hashtable *h);
static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
- struct block_device *bdev,
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
struct btrfsic_dev_state_hashtable *h);
static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
@@ -385,8 +384,7 @@ static int btrfsic_process_superblock_dev_mirror(
int superblock_mirror_num,
struct btrfsic_dev_state **selected_dev_state,
struct btrfs_super_block *selected_super);
-static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
- struct block_device *bdev);
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev);
static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
u64 bytenr,
struct btrfsic_dev_state *dev_state,
@@ -626,17 +624,15 @@ static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
list_del(&ds->collision_resolving_node);
}
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
- struct block_device *bdev,
+static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
struct btrfsic_dev_state_hashtable *h)
{
const unsigned int hashval =
- (((unsigned int)((uintptr_t)bdev)) &
- (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
+ dev & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1);
struct btrfsic_dev_state *ds;
list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
- if (ds->bdev == bdev)
+ if (ds->bdev->bd_dev == dev)
return ds;
}
@@ -668,7 +664,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
if (!device->bdev || !device->name)
continue;
- dev_state = btrfsic_dev_state_lookup(device->bdev);
+ dev_state = btrfsic_dev_state_lookup(device->bdev->bd_dev);
BUG_ON(NULL == dev_state);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
ret = btrfsic_process_superblock_dev_mirror(
@@ -1556,7 +1552,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
}
device = multi->stripes[0].dev;
- block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
+ block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev->bd_dev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
block_ctx_out->start = bytenr;
block_ctx_out->len = len;
@@ -1639,7 +1635,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
unsigned int j;
bio = btrfs_io_bio_alloc(num_pages - i);
- bio->bi_bdev = block_ctx->dev->bdev;
+ bio_set_dev(bio, block_ctx->dev->bdev);
bio->bi_iter.bi_sector = dev_bytenr >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
@@ -2654,7 +2650,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
pr_info("btrfsic: error, kmalloc failed!\n");
return NULL;
}
- dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
+ dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev);
if (NULL == dev_state) {
pr_info("btrfsic: error, lookup dev_state failed!\n");
btrfsic_block_free(block);
@@ -2734,10 +2730,9 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
}
}
-static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
- struct block_device *bdev)
+static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
{
- return btrfsic_dev_state_hashtable_lookup(bdev,
+ return btrfsic_dev_state_hashtable_lookup(dev,
&btrfsic_dev_state_hashtable);
}
@@ -2751,7 +2746,7 @@ int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bh() might also be called before
* btrfsic_mount(), this might return NULL */
- dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
+ dev_state = btrfsic_dev_state_lookup(bh->b_bdev->bd_dev);
/* Only called to write the superblock (incl. FLUSH/FUA) */
if (NULL != dev_state &&
@@ -2808,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
* btrfsic_mount(), this might return NULL */
- dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
+ dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
if (NULL != dev_state &&
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
unsigned int i = 0;
@@ -2824,10 +2819,10 @@ static void __btrfsic_submit_bio(struct bio *bio)
bio_is_patched = 0;
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
+ pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n",
bio_op(bio), bio->bi_opf, segs,
(unsigned long long)bio->bi_iter.bi_sector,
- dev_bytenr, bio->bi_bdev);
+ dev_bytenr, bio->bi_disk);
mapped_datav = kmalloc_array(segs,
sizeof(*mapped_datav), GFP_NOFS);
@@ -2856,8 +2851,8 @@ static void __btrfsic_submit_bio(struct bio *bio)
} else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
- bio_op(bio), bio->bi_opf, bio->bi_bdev);
+ pr_info("submit_bio(rw=%d,0x%x FLUSH, disk=%p)\n",
+ bio_op(bio), bio->bi_opf, bio->bi_disk);
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
if ((dev_state->state->print_mask &
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -2998,7 +2993,7 @@ void btrfsic_unmount(struct btrfs_fs_devices *fs_devices)
continue;
ds = btrfsic_dev_state_hashtable_lookup(
- device->bdev,
+ device->bdev->bd_dev,
&btrfsic_dev_state_hashtable);
if (NULL != ds) {
state = ds->state;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f45b61f..4f428a4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3499,7 +3499,7 @@ static void write_dev_flush(struct btrfs_device *device)
bio_reset(bio);
bio->bi_end_io = btrfs_end_empty_barrier;
- bio->bi_bdev = device->bdev;
+ bio_set_dev(bio, device->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0aff9b2..42b12a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2033,7 +2033,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
bio_put(bio);
return -EIO;
}
- bio->bi_bdev = dev->bdev;
+ bio_set_dev(bio, dev->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
bio_add_page(bio, page, length, pg_offset);
@@ -2335,7 +2335,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
bio = btrfs_io_bio_alloc(1);
bio->bi_end_io = endio_func;
bio->bi_iter.bi_sector = failrec->logical >> 9;
- bio->bi_bdev = fs_info->fs_devices->latest_bdev;
+ bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
bio->bi_iter.bi_size = 0;
bio->bi_private = data;
@@ -2675,7 +2675,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 2cf6ba4..24a6222 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1090,7 +1090,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
*/
if (last_end == disk_start && stripe->dev->bdev &&
!last->bi_status &&
- last->bi_bdev == stripe->dev->bdev) {
+ last->bi_disk == stripe->dev->bdev->bd_disk &&
+ last->bi_partno == stripe->dev->bdev->bd_partno) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
if (ret == PAGE_SIZE)
return 0;
@@ -1100,7 +1101,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* put a new bio on the list */
bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
bio->bi_iter.bi_size = 0;
- bio->bi_bdev = stripe->dev->bdev;
+ bio_set_dev(bio, stripe->dev->bdev);
bio->bi_iter.bi_sector = disk_start >> 9;
bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -1347,7 +1348,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
- bio->bi_bdev == stripe->dev->bdev) {
+ bio->bi_disk == stripe->dev->bdev->bd_disk &&
+ bio->bi_partno == stripe->dev->bdev->bd_partno) {
return i;
}
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6f1e4c9..b0b71e8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1738,7 +1738,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
WARN_ON(!page->page);
bio = btrfs_io_bio_alloc(1);
- bio->bi_bdev = page->dev->bdev;
+ bio_set_dev(bio, page->dev->bdev);
bio_add_page(bio, page->page, PAGE_SIZE, 0);
if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
@@ -1826,7 +1826,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
}
bio = btrfs_io_bio_alloc(1);
- bio->bi_bdev = page_bad->dev->bdev;
+ bio_set_dev(bio, page_bad->dev->bdev);
bio->bi_iter.bi_sector = page_bad->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -1921,7 +1921,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
bio->bi_private = sbio;
bio->bi_end_io = scrub_wr_bio_end_io;
- bio->bi_bdev = sbio->dev->bdev;
+ bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
sbio->status = 0;
@@ -1964,7 +1964,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)
sbio = sctx->wr_curr_bio;
sctx->wr_curr_bio = NULL;
- WARN_ON(!sbio->bio->bi_bdev);
+ WARN_ON(!sbio->bio->bi_disk);
scrub_pending_bio_inc(sctx);
/* process all writes in a single worker thread. Then the block layer
* orders the requests before sending them to the driver which
@@ -2321,7 +2321,7 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
bio->bi_private = sbio;
bio->bi_end_io = scrub_bio_end_io;
- bio->bi_bdev = sbio->dev->bdev;
+ bio_set_dev(bio, sbio->dev->bdev);
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
sbio->status = 0;
@@ -4627,7 +4627,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio = btrfs_io_bio_alloc(1);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
- bio->bi_bdev = dev->bdev;
+ bio_set_dev(bio, dev->bdev);
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
ret = bio_add_page(bio, page, PAGE_SIZE, 0);
if (ret != PAGE_SIZE) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bd679bc..002aa31 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6188,7 +6188,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
rcu_read_unlock();
}
#endif
- bio->bi_bdev = dev->bdev;
+ bio_set_dev(bio, dev->bdev);
btrfs_bio_counter_inc_noblocked(fs_info);
diff --git a/fs/buffer.c b/fs/buffer.c
index 50da0e1..170df85 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3056,7 +3056,7 @@ void guard_bio_eod(int op, struct bio *bio)
struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
unsigned truncated_bytes;
- maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
+ maxsector = get_capacity(bio->bi_disk);
if (!maxsector)
return;
@@ -3115,7 +3115,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
}
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 6181e95..483784d 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -115,7 +115,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
err = -ENOMEM;
goto errout;
}
- bio->bi_bdev = inode->i_sb->s_bdev;
+ bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 08cf278..5fa2211 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -111,7 +111,7 @@ struct dio {
int op;
int op_flags;
blk_qc_t bio_cookie;
- struct block_device *bio_bdev;
+ struct gendisk *bio_disk;
struct inode *inode;
loff_t i_size; /* i_size when submitted */
dio_iodone_t *end_io; /* IO completion function */
@@ -377,7 +377,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
*/
bio = bio_alloc(GFP_KERNEL, nr_vecs);
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_sector;
bio_set_op_attrs(bio, dio->op, dio->op_flags);
if (dio->is_async)
@@ -412,7 +412,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty)
bio_set_pages_dirty(bio);
- dio->bio_bdev = bio->bi_bdev;
+ dio->bio_disk = bio->bi_disk;
if (sdio->submit_io) {
sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
@@ -458,7 +458,7 @@ static struct bio *dio_await_one(struct dio *dio)
dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
- !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
+ !blk_mq_poll(dio->bio_disk->queue, dio->bio_cookie))
io_schedule();
/* wake up sets us TASK_RUNNING */
spin_lock_irqsave(&dio->bio_lock, flags);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 8bb7280..3c6a9c1 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -869,7 +869,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
goto out;
}
- bio->bi_bdev = NULL;
+ bio->bi_disk = NULL;
bio->bi_next = NULL;
per_dev->offset = master_dev->offset;
per_dev->length = master_dev->length;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index c2fce44..55ad7dd 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -300,7 +300,7 @@ static void ext4_end_bio(struct bio *bio)
char b[BDEVNAME_SIZE];
if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
- bdevname(bio->bi_bdev, b),
+ bio_devname(bio, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
bio->bi_status)) {
@@ -375,7 +375,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
return -ENOMEM;
wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 40a5497..04c9064 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -254,7 +254,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
fscrypt_release_ctx(ctx);
goto set_error_page;
}
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
bio->bi_private = ctx;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 87c1f41..a791aac 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -142,7 +142,7 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
}
}
if (bio) {
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
}
return bdev;
@@ -161,7 +161,8 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
static bool __same_bdev(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio)
{
- return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+ struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
+ return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
}
/*
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f964b68..6f8fc4a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -447,7 +447,7 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
int ret;
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
ret = submit_bio_wait(bio);
bio_put(bio);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 7dabbe7..c8ff7b7 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -268,7 +268,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
- bio->bi_bdev = sb->s_bdev;
+ bio_set_dev(bio, sb->s_bdev);
bio->bi_end_io = gfs2_end_log_write;
bio->bi_private = sdp;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 61ef6c9..52de103 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -221,7 +221,7 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
bio = bio_alloc(GFP_NOIO, num);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
while (num > 0) {
bh = *bhs;
if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c0a4b37..8459358 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -242,7 +242,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
- bio->bi_bdev = sb->s_bdev;
+ bio_set_dev(bio, sb->s_bdev);
bio_add_page(bio, page, PAGE_SIZE, 0);
bio->bi_end_io = end_bio_io_page;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index e254fa0..10032b9 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -65,7 +65,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = sb->s_bdev;
+ bio_set_dev(bio, sb->s_bdev);
bio_set_op_attrs(bio, op, op_flags);
if (op != WRITE && data)
diff --git a/fs/iomap.c b/fs/iomap.c
index 8554a8d..269b24a0 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -805,7 +805,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, 1);
- bio->bi_bdev = iomap->bdev;
+ bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
bio->bi_private = dio;
@@ -884,7 +884,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return 0;
bio = bio_alloc(GFP_KERNEL, nr_pages);
- bio->bi_bdev = iomap->bdev;
+ bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
bio->bi_write_hint = dio->iocb->ki_hint;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index a21f0e9..0e5d412 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1995,7 +1995,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
- bio->bi_bdev = log->bdev;
+ bio_set_dev(bio, log->bdev);
bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
@@ -2139,7 +2139,7 @@ static void lbmStartIO(struct lbuf * bp)
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
- bio->bi_bdev = log->bdev;
+ bio_set_dev(bio, log->bdev);
bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 65120a4..1c4b9ad 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -430,7 +430,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
bio = bio_alloc(GFP_NOFS, 1);
- bio->bi_bdev = inode->i_sb->s_bdev;
+ bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_write_end_io;
bio->bi_private = page;
@@ -510,7 +510,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
submit_bio(bio);
bio = bio_alloc(GFP_NOFS, 1);
- bio->bi_bdev = inode->i_sb->s_bdev;
+ bio_set_dev(bio, inode->i_sb->s_bdev);
bio->bi_iter.bi_sector =
pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_read_end_io;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index db5900aaa..89d1dc1 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -21,6 +21,7 @@
DEFINE_MUTEX(kernfs_mutex);
static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
+static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
@@ -507,6 +508,10 @@ void kernfs_put(struct kernfs_node *kn)
struct kernfs_node *parent;
struct kernfs_root *root;
+ /*
+ * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
+ * depends on this to filter reused stale node
+ */
if (!kn || !atomic_dec_and_test(&kn->count))
return;
root = kernfs_root(kn);
@@ -533,7 +538,9 @@ void kernfs_put(struct kernfs_node *kn)
simple_xattrs_free(&kn->iattr->xattrs);
}
kfree(kn->iattr);
- ida_simple_remove(&root->ino_ida, kn->ino);
+ spin_lock(&kernfs_idr_lock);
+ idr_remove(&root->ino_idr, kn->id.ino);
+ spin_unlock(&kernfs_idr_lock);
kmem_cache_free(kernfs_node_cache, kn);
kn = parent;
@@ -542,7 +549,7 @@ void kernfs_put(struct kernfs_node *kn)
goto repeat;
} else {
/* just released the root kn, free @root too */
- ida_destroy(&root->ino_ida);
+ idr_destroy(&root->ino_idr);
kfree(root);
}
}
@@ -559,7 +566,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
if (d_really_is_negative(dentry))
goto out_bad_unlocked;
- kn = dentry->d_fsdata;
+ kn = kernfs_dentry_node(dentry);
mutex_lock(&kernfs_mutex);
/* The kernfs node has been deactivated */
@@ -567,7 +574,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
goto out_bad;
/* The kernfs node has been moved? */
- if (dentry->d_parent->d_fsdata != kn->parent)
+ if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
goto out_bad;
/* The kernfs node has been renamed */
@@ -587,14 +594,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
-static void kernfs_dop_release(struct dentry *dentry)
-{
- kernfs_put(dentry->d_fsdata);
-}
-
const struct dentry_operations kernfs_dops = {
.d_revalidate = kernfs_dop_revalidate,
- .d_release = kernfs_dop_release,
};
/**
@@ -610,8 +611,9 @@ const struct dentry_operations kernfs_dops = {
*/
struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
{
- if (dentry->d_sb->s_op == &kernfs_sops)
- return dentry->d_fsdata;
+ if (dentry->d_sb->s_op == &kernfs_sops &&
+ !d_really_is_negative(dentry))
+ return kernfs_dentry_node(dentry);
return NULL;
}
@@ -620,6 +622,8 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
unsigned flags)
{
struct kernfs_node *kn;
+ u32 gen;
+ int cursor;
int ret;
name = kstrdup_const(name, GFP_KERNEL);
@@ -630,11 +634,25 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
if (!kn)
goto err_out1;
- ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+ idr_preload(GFP_KERNEL);
+ spin_lock(&kernfs_idr_lock);
+ cursor = idr_get_cursor(&root->ino_idr);
+ ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
+ if (ret >= 0 && ret < cursor)
+ root->next_generation++;
+ gen = root->next_generation;
+ spin_unlock(&kernfs_idr_lock);
+ idr_preload_end();
if (ret < 0)
goto err_out2;
- kn->ino = ret;
+ kn->id.ino = ret;
+ kn->id.generation = gen;
+ /*
+ * set ino first. This barrier is paired with atomic_inc_not_zero in
+ * kernfs_find_and_get_node_by_ino
+ */
+ smp_mb__before_atomic();
atomic_set(&kn->count, 1);
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
RB_CLEAR_NODE(&kn->rb);
@@ -666,6 +684,54 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
return kn;
}
+/*
+ * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
+ * @root: the kernfs root
+ * @ino: inode number
+ *
+ * RETURNS:
+ * NULL on failure. Return a kernfs node with reference counter incremented
+ */
+struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
+ unsigned int ino)
+{
+ struct kernfs_node *kn;
+
+ rcu_read_lock();
+ kn = idr_find(&root->ino_idr, ino);
+ if (!kn)
+ goto out;
+
+ /*
+ * Since kernfs_node is freed in RCU, it's possible an old node for ino
+ * is freed, but reused before RCU grace period. But a freed node (see
+ * kernfs_put) or an incompletedly initialized node (see
+ * __kernfs_new_node) should have 'count' 0. We can use this fact to
+ * filter out such node.
+ */
+ if (!atomic_inc_not_zero(&kn->count)) {
+ kn = NULL;
+ goto out;
+ }
+
+ /*
+ * The node could be a new node or a reused node. If it's a new node,
+ * we are ok. If it's reused because of RCU (because of
+ * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
+ * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
+ * hence we can use 'ino' to filter stale node.
+ */
+ if (kn->id.ino != ino)
+ goto out;
+ rcu_read_unlock();
+
+ return kn;
+out:
+ rcu_read_unlock();
+ kernfs_put(kn);
+ return NULL;
+}
+
/**
* kernfs_add_one - add kernfs_node to parent without warning
* @kn: kernfs_node to be added
@@ -875,13 +941,14 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
if (!root)
return ERR_PTR(-ENOMEM);
- ida_init(&root->ino_ida);
+ idr_init(&root->ino_idr);
INIT_LIST_HEAD(&root->supers);
+ root->next_generation = 1;
kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
KERNFS_DIR);
if (!kn) {
- ida_destroy(&root->ino_ida);
+ idr_destroy(&root->ino_idr);
kfree(root);
return ERR_PTR(-ENOMEM);
}
@@ -984,7 +1051,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
unsigned int flags)
{
struct dentry *ret;
- struct kernfs_node *parent = dentry->d_parent->d_fsdata;
+ struct kernfs_node *parent = dir->i_private;
struct kernfs_node *kn;
struct inode *inode;
const void *ns = NULL;
@@ -1001,8 +1068,6 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
ret = NULL;
goto out_unlock;
}
- kernfs_get(kn);
- dentry->d_fsdata = kn;
/* attach dentry and inode */
inode = kernfs_get_inode(dir->i_sb, kn);
@@ -1039,7 +1104,7 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(dentry);
struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
int ret;
@@ -1059,7 +1124,7 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- struct kernfs_node *kn = old_dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
struct kernfs_node *new_parent = new_dir->i_private;
struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
int ret;
@@ -1572,7 +1637,7 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file->f_path.dentry;
- struct kernfs_node *parent = dentry->d_fsdata;
+ struct kernfs_node *parent = kernfs_dentry_node(dentry);
struct kernfs_node *pos = file->private_data;
const void *ns = NULL;
@@ -1589,7 +1654,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
const char *name = pos->name;
unsigned int type = dt_type(pos);
int len = strlen(name);
- ino_t ino = pos->ino;
+ ino_t ino = pos->id.ino;
ctx->pos = pos->hash;
file->private_data = pos;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e6c8954..9698e51 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -616,7 +616,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
static int kernfs_fop_open(struct inode *inode, struct file *file)
{
- struct kernfs_node *kn = file->f_path.dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
struct kernfs_root *root = kernfs_root(kn);
const struct kernfs_ops *ops;
struct kernfs_open_file *of;
@@ -768,7 +768,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
- struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
struct kernfs_open_file *of = kernfs_of(filp);
if (kn->flags & KERNFS_HAS_RELEASE) {
@@ -835,7 +835,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
{
struct kernfs_open_file *of = kernfs_of(filp);
- struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
struct kernfs_open_node *on = kn->attr.open;
if (!kernfs_get_active(kn))
@@ -895,7 +895,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
* have the matching @file available. Look up the inodes
* and generate the events manually.
*/
- inode = ilookup(info->sb, kn->ino);
+ inode = ilookup(info->sb, kn->id.ino);
if (!inode)
continue;
@@ -903,7 +903,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
if (parent) {
struct inode *p_inode;
- p_inode = ilookup(info->sb, parent->ino);
+ p_inode = ilookup(info->sb, parent->id.ino);
if (p_inode) {
fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
inode, FSNOTIFY_EVENT_INODE, kn->name, 0);
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index fb4b4a7..a343039 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -112,7 +112,7 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
int error;
if (!kn)
@@ -154,7 +154,7 @@ static int kernfs_node_setsecdata(struct kernfs_iattrs *attrs, void **secdata,
ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
{
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = kernfs_dentry_node(dentry);
struct kernfs_iattrs *attrs;
attrs = kernfs_iattrs(kn);
@@ -203,8 +203,8 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
- struct kernfs_node *kn = path->dentry->d_fsdata;
struct inode *inode = d_inode(path->dentry);
+ struct kernfs_node *kn = inode->i_private;
mutex_lock(&kernfs_mutex);
kernfs_refresh_inode(kn, inode);
@@ -220,6 +220,7 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
inode->i_private = kn;
inode->i_mapping->a_ops = &kernfs_aops;
inode->i_op = &kernfs_iops;
+ inode->i_generation = kn->id.generation;
set_default_inode_attr(inode, kn->mode);
kernfs_refresh_inode(kn, inode);
@@ -265,7 +266,7 @@ struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
{
struct inode *inode;
- inode = iget_locked(sb, kn->ino);
+ inode = iget_locked(sb, kn->id.ino);
if (inode && (inode->i_state & I_NEW))
kernfs_init_inode(kn, inode);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 2d5144a..0f260dc 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -70,6 +70,13 @@ struct kernfs_super_info {
};
#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
+static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry)
+{
+ if (d_really_is_negative(dentry))
+ return NULL;
+ return d_inode(dentry)->i_private;
+}
+
extern const struct super_operations kernfs_sops;
extern struct kmem_cache *kernfs_node_cache;
@@ -98,6 +105,8 @@ int kernfs_add_one(struct kernfs_node *kn);
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
const char *name, umode_t mode,
unsigned flags);
+struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
+ unsigned int ino);
/*
* file.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d5b149a..95a7c88 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -16,6 +16,7 @@
#include <linux/pagemap.h>
#include <linux/namei.h>
#include <linux/seq_file.h>
+#include <linux/exportfs.h>
#include "kernfs-internal.h"
@@ -33,7 +34,7 @@ static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
{
- struct kernfs_root *root = kernfs_root(dentry->d_fsdata);
+ struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
struct kernfs_syscall_ops *scops = root->syscall_ops;
if (scops && scops->show_options)
@@ -43,7 +44,7 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
{
- struct kernfs_node *node = dentry->d_fsdata;
+ struct kernfs_node *node = kernfs_dentry_node(dentry);
struct kernfs_root *root = kernfs_root(node);
struct kernfs_syscall_ops *scops = root->syscall_ops;
@@ -64,6 +65,78 @@ const struct super_operations kernfs_sops = {
.show_path = kernfs_sop_show_path,
};
+/*
+ * Similar to kernfs_fh_get_inode, this one gets kernfs node from inode
+ * number and generation
+ */
+struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root,
+ const union kernfs_node_id *id)
+{
+ struct kernfs_node *kn;
+
+ kn = kernfs_find_and_get_node_by_ino(root, id->ino);
+ if (!kn)
+ return NULL;
+ if (kn->id.generation != id->generation) {
+ kernfs_put(kn);
+ return NULL;
+ }
+ return kn;
+}
+
+static struct inode *kernfs_fh_get_inode(struct super_block *sb,
+ u64 ino, u32 generation)
+{
+ struct kernfs_super_info *info = kernfs_info(sb);
+ struct inode *inode;
+ struct kernfs_node *kn;
+
+ if (ino == 0)
+ return ERR_PTR(-ESTALE);
+
+ kn = kernfs_find_and_get_node_by_ino(info->root, ino);
+ if (!kn)
+ return ERR_PTR(-ESTALE);
+ inode = kernfs_get_inode(sb, kn);
+ kernfs_put(kn);
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+
+ if (generation && inode->i_generation != generation) {
+ /* we didn't find the right inode.. */
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ return inode;
+}
+
+static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ kernfs_fh_get_inode);
+}
+
+static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ kernfs_fh_get_inode);
+}
+
+static struct dentry *kernfs_get_parent_dentry(struct dentry *child)
+{
+ struct kernfs_node *kn = kernfs_dentry_node(child);
+
+ return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent));
+}
+
+static const struct export_operations kernfs_export_ops = {
+ .fh_to_dentry = kernfs_fh_to_dentry,
+ .fh_to_parent = kernfs_fh_to_parent,
+ .get_parent = kernfs_get_parent_dentry,
+};
+
/**
* kernfs_root_from_sb - determine kernfs_root associated with a super_block
* @sb: the super_block in question
@@ -159,6 +232,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
sb->s_magic = magic;
sb->s_op = &kernfs_sops;
sb->s_xattr = kernfs_xattr_handlers;
+ if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
+ sb->s_export_op = &kernfs_export_ops;
sb->s_time_gran = 1;
/* get root inode, initialize and unlock it */
@@ -176,8 +251,6 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
pr_debug("%s: could not get root dentry!\n", __func__);
return -ENOMEM;
}
- kernfs_get(info->root->kn);
- root->d_fsdata = info->root->kn;
sb->s_root = root;
sb->s_d_op = &kernfs_dops;
return 0;
@@ -283,7 +356,6 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
void kernfs_kill_sb(struct super_block *sb)
{
struct kernfs_super_info *info = kernfs_info(sb);
- struct kernfs_node *root_kn = sb->s_root->d_fsdata;
mutex_lock(&kernfs_mutex);
list_del(&info->node);
@@ -295,7 +367,6 @@ void kernfs_kill_sb(struct super_block *sb)
*/
kill_anon_super(sb);
kfree(info);
- kernfs_put(root_kn);
}
/**
@@ -330,7 +401,16 @@ struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
void __init kernfs_init(void)
{
+
+ /*
+ * the slab is freed in RCU context, so kernfs_find_and_get_node_by_ino
+ * can access the slab lock free. This could introduce stale nodes,
+ * please see how kernfs_find_and_get_node_by_ino filters out stale
+ * nodes.
+ */
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
sizeof(struct kernfs_node),
- 0, SLAB_PANIC, NULL);
+ 0,
+ SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
+ NULL);
}
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 1684af4..08ccabd 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -98,9 +98,9 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
return 0;
}
-static int kernfs_getlink(struct dentry *dentry, char *path)
+static int kernfs_getlink(struct inode *inode, char *path)
{
- struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *kn = inode->i_private;
struct kernfs_node *parent = kn->parent;
struct kernfs_node *target = kn->symlink.target_kn;
int error;
@@ -124,7 +124,7 @@ static const char *kernfs_iop_get_link(struct dentry *dentry,
body = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!body)
return ERR_PTR(-ENOMEM);
- error = kernfs_getlink(dentry, body);
+ error = kernfs_getlink(inode, body);
if (unlikely(error < 0)) {
kfree(body);
return ERR_PTR(error);
diff --git a/fs/mpage.c b/fs/mpage.c
index 2e4c41c..37bb77c 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -83,7 +83,7 @@ mpage_alloc(struct block_device *bdev,
}
if (bio) {
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_sector;
}
return bio;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index d8863a8..995d707 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -130,7 +130,7 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
if (bio) {
bio->bi_iter.bi_sector = disk_sector;
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_end_io = end_io;
bio->bi_private = par;
}
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index e73c86d..6c5009c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -400,7 +400,7 @@ static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
bio = bio_alloc(GFP_NOIO, nr_vecs);
}
if (likely(bio)) {
- bio->bi_bdev = nilfs->ns_bdev;
+ bio_set_dev(bio, nilfs->ns_bdev);
bio->bi_iter.bi_sector =
start << (nilfs->ns_blocksize_bits - 9);
}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 56ac07c..d020604 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -553,7 +553,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
/* Must put everything in 512 byte sectors for the bio... */
bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
- bio->bi_bdev = reg->hr_bdev;
+ bio_set_dev(bio, reg->hr_bdev);
bio->bi_private = wc;
bio->bi_end_io = o2hb_bio_end_io;
bio_set_op_attrs(bio, op, op_flags);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f9efd67..fffae13 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -540,7 +540,7 @@ xfs_init_bio_from_bh(
struct buffer_head *bh)
{
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
}
static struct xfs_ioend *
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 72f0384..b1c9711 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1281,7 +1281,7 @@ xfs_buf_ioapply_map(
nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, nr_pages);
- bio->bi_bdev = bp->b_target->bt_bdev;
+ bio_set_dev(bio, bp->b_target->bt_bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1f0720d..275c91c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -471,10 +471,11 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
-void generic_start_io_acct(int rw, unsigned long sectors,
- struct hd_struct *part);
-void generic_end_io_acct(int rw, struct hd_struct *part,
- unsigned long start_time);
+void generic_start_io_acct(struct request_queue *q, int rw,
+ unsigned long sectors, struct hd_struct *part);
+void generic_end_io_acct(struct request_queue *q, int rw,
+ struct hd_struct *part,
+ unsigned long start_time);
#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
@@ -501,6 +502,24 @@ extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
extern unsigned int bvec_nr_vecs(unsigned short idx);
+#define bio_set_dev(bio, bdev) \
+do { \
+ (bio)->bi_disk = (bdev)->bd_disk; \
+ (bio)->bi_partno = (bdev)->bd_partno; \
+} while (0)
+
+#define bio_copy_dev(dst, src) \
+do { \
+ (dst)->bi_disk = (src)->bi_disk; \
+ (dst)->bi_partno = (src)->bi_partno; \
+} while (0)
+
+#define bio_dev(bio) \
+ disk_devt((bio)->bi_disk)
+
+#define bio_devname(bio, buf) \
+ __bdevname(bio_dev(bio), (buf))
+
#ifdef CONFIG_BLK_CGROUP
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
int bio_associate_current(struct bio *bio);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 7104bea..9d92153 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -691,6 +691,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
rcu_read_lock();
blkcg = bio_blkcg(bio);
+ /* associate blkcg if bio hasn't attached one */
+ bio_associate_blkcg(bio, &blkcg->css);
+
blkg = blkg_lookup(blkcg, q);
if (unlikely(!blkg)) {
spin_lock_irq(q->queue_lock);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1454230..50c6485 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -97,7 +97,6 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *,
unsigned int, unsigned int);
typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *,
unsigned int);
-typedef int (reinit_request_fn)(void *, struct request *);
typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
bool);
@@ -143,7 +142,6 @@ struct blk_mq_ops {
*/
init_request_fn *init_request;
exit_request_fn *exit_request;
- reinit_request_fn *reinit_request;
/* Called from inside blk_get_request() */
void (*initialize_rq_fn)(struct request *rq);
@@ -261,7 +259,8 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
-int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
+int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
+ int (reinit_request)(void *, struct request *));
int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d2eb87c..a2d2aa7 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -48,7 +48,8 @@ struct blk_issue_stat {
*/
struct bio {
struct bio *bi_next; /* request queue link */
- struct block_device *bi_bdev;
+ struct gendisk *bi_disk;
+ u8 bi_partno;
blk_status_t bi_status;
unsigned int bi_opf; /* bottom bits req flags,
* top bits REQ_OP. Use
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4b99b13..460294bb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -600,38 +600,36 @@ struct request_queue {
u64 write_hints[BLK_MAX_WRITE_HINTS];
};
-#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
-#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
-#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
-#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
-#define QUEUE_FLAG_DYING 5 /* queue being torn down */
-#define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */
-#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */
-#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */
-#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */
-#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */
+#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */
+#define QUEUE_FLAG_STOPPED 1 /* queue is stopped */
+#define QUEUE_FLAG_DYING 2 /* queue being torn down */
+#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */
+#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */
+#define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */
+#define QUEUE_FLAG_STACKABLE 8 /* supports request stacking */
+#define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */
#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT 13 /* do IO stats */
-#define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
-#define QUEUE_FLAG_SECERASE 17 /* supports secure erase */
-#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
-#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
-#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */
-#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/
-#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */
-#define QUEUE_FLAG_WC 23 /* Write back caching */
-#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */
-#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
-#define QUEUE_FLAG_DAX 26 /* device supports DAX */
-#define QUEUE_FLAG_STATS 27 /* track rq completion times */
-#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */
-#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */
-#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */
-#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */
+#define QUEUE_FLAG_IO_STAT 10 /* do IO stats */
+#define QUEUE_FLAG_DISCARD 11 /* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES 12 /* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM 13 /* Contributes to random pool */
+#define QUEUE_FLAG_SECERASE 14 /* supports secure erase */
+#define QUEUE_FLAG_SAME_FORCE 15 /* force complete on same CPU */
+#define QUEUE_FLAG_DEAD 16 /* queue tear-down finished */
+#define QUEUE_FLAG_INIT_DONE 17 /* queue is initialized */
+#define QUEUE_FLAG_NO_SG_MERGE 18 /* don't attempt to merge SG segments*/
+#define QUEUE_FLAG_POLL 19 /* IO polling enabled if set */
+#define QUEUE_FLAG_WC 20 /* Write back caching */
+#define QUEUE_FLAG_FUA 21 /* device supports FUA writes */
+#define QUEUE_FLAG_FLUSH_NQ 22 /* flush not queueuable */
+#define QUEUE_FLAG_DAX 23 /* device supports DAX */
+#define QUEUE_FLAG_STATS 24 /* track rq completion times */
+#define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */
+#define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d2e9085..67b4d4d 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -28,10 +28,12 @@ struct blk_trace {
atomic_t dropped;
};
+struct blkcg;
+
extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
extern void blk_trace_shutdown(struct request_queue *);
-extern __printf(2, 3)
-void __trace_note_message(struct blk_trace *, const char *fmt, ...);
+extern __printf(3, 4)
+void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *fmt, ...);
/**
* blk_add_trace_msg - Add a (simple) message to the blktrace stream
@@ -46,12 +48,14 @@ void __trace_note_message(struct blk_trace *, const char *fmt, ...);
* NOTE: Can not use 'static inline' due to presence of var args...
*
**/
-#define blk_add_trace_msg(q, fmt, ...) \
+#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \
do { \
struct blk_trace *bt = (q)->blk_trace; \
if (unlikely(bt)) \
- __trace_note_message(bt, fmt, ##__VA_ARGS__); \
+ __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
} while (0)
+#define blk_add_trace_msg(q, fmt, ...) \
+ blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
#define BLK_TN_MAX_MSG 128
static inline bool blk_trace_note_message_enabled(struct request_queue *q)
@@ -82,6 +86,7 @@ extern struct attribute_group blk_trace_attr_group;
# define blk_trace_startstop(q, start) (-ENOTTY)
# define blk_trace_remove(q) (-ENOTTY)
# define blk_add_trace_msg(q, fmt, ...) do { } while (0)
+# define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0)
# define blk_trace_remove_sysfs(dev) do { } while (0)
# define blk_trace_note_message_enabled(q) (false)
static inline int blk_trace_init_sysfs(struct device *dev)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 085056e..d023ac5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -578,7 +578,7 @@ static inline bool cgroup_is_populated(struct cgroup *cgrp)
/* returns ino associated with a cgroup */
static inline ino_t cgroup_ino(struct cgroup *cgrp)
{
- return cgrp->kn->ino;
+ return cgrp->kn->id.ino;
}
/* cft/css accessors for cftype->write() operation */
@@ -644,6 +644,13 @@ static inline void cgroup_kthread_ready(void)
current->no_cgroup_migration = 0;
}
+static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp)
+{
+ return &cgrp->kn->id;
+}
+
+void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
+ char *buf, size_t buflen);
#else /* !CONFIG_CGROUPS */
struct cgroup_subsys_state;
@@ -666,12 +673,19 @@ static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; }
static inline void cgroup_init_kthreadd(void) {}
static inline void cgroup_kthread_ready(void) {}
+static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp)
+{
+ return NULL;
+}
static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
struct cgroup *ancestor)
{
return true;
}
+
+static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
+ char *buf, size_t buflen) {}
#endif /* !CONFIG_CGROUPS */
/*
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 002611c..2d02593 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,7 +51,7 @@
#endif
extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.7"
+#define REL_VERSION "8.4.10"
#define API_VERSION 1
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 101
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 2896f93..4e6d4d4 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -132,7 +132,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
__flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF)
__flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF)
__flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF)
- __flg_field_def(24, 0 /* OPTIONAL */, discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED)
+ __flg_field_def(24, 0 /* OPTIONAL */, discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF)
+ __flg_field_def(26, 0 /* OPTIONAL */, disable_write_same, DRBD_DISABLE_WRITE_SAME_DEF)
)
GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index ddac684..24ae1b9 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -209,12 +209,18 @@
#define DRBD_MD_FLUSHES_DEF 1
#define DRBD_TCP_CORK_DEF 1
#define DRBD_AL_UPDATES_DEF 1
+
/* We used to ignore the discard_zeroes_data setting.
* To not change established (and expected) behaviour,
* by default assume that, for discard_zeroes_data=0,
* we can make that an effective discard_zeroes_data=1,
* if we only explicitly zero-out unaligned partial chunks. */
-#define DRBD_DISCARD_ZEROES_IF_ALIGNED 1
+#define DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF 1
+
+/* Some backends pretend to support WRITE SAME,
+ * but fail such requests when they are actually submitted.
+ * This is to tell DRBD to not even try. */
+#define DRBD_DISABLE_WRITE_SAME_DEF 0
#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
#define DRBD_ALWAYS_ASBP_DEF 0
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c57002a..7d6079d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -429,6 +429,7 @@ struct block_device {
#endif
struct block_device * bd_contains;
unsigned bd_block_size;
+ u8 bd_partno;
struct hd_struct * bd_part;
/* number of times partitions within this device have been opened. */
unsigned bd_part_count;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e619fae..ea652bf 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -362,24 +362,12 @@ static inline void free_part_stats(struct hd_struct *part)
#define part_stat_sub(cpu, gendiskp, field, subnd) \
part_stat_add(cpu, gendiskp, field, -subnd)
-static inline void part_inc_in_flight(struct hd_struct *part, int rw)
-{
- atomic_inc(&part->in_flight[rw]);
- if (part->partno)
- atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
-}
-
-static inline void part_dec_in_flight(struct hd_struct *part, int rw)
-{
- atomic_dec(&part->in_flight[rw]);
- if (part->partno)
- atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
-}
-
-static inline int part_in_flight(struct hd_struct *part)
-{
- return atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]);
-}
+void part_in_flight(struct request_queue *q, struct hd_struct *part,
+ unsigned int inflight[2]);
+void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
+void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
+ int rw);
static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
{
@@ -395,7 +383,7 @@ static inline void free_part_info(struct hd_struct *part)
}
/* block/blk-core.c */
-extern void part_round_stats(int cpu, struct hd_struct *part);
+extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part);
/* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index a9b11b8..ab25c8b 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -69,6 +69,12 @@ enum kernfs_root_flag {
* following flag enables that behavior.
*/
KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002,
+
+ /*
+ * The filesystem supports exportfs operation, so userspace can use
+ * fhandle to access nodes of the fs.
+ */
+ KERNFS_ROOT_SUPPORT_EXPORTOP = 0x0004,
};
/* type-specific structures for kernfs_node union members */
@@ -95,6 +101,21 @@ struct kernfs_elem_attr {
struct kernfs_node *notify_next; /* for kernfs_notify() */
};
+/* represent a kernfs node */
+union kernfs_node_id {
+ struct {
+ /*
+ * blktrace will export this struct as a simplified 'struct
+ * fid' (which is a big data struction), so userspace can use
+ * it to find kernfs node. The layout must match the first two
+ * fields of 'struct fid' exactly.
+ */
+ u32 ino;
+ u32 generation;
+ };
+ u64 id;
+};
+
/*
* kernfs_node - the building block of kernfs hierarchy. Each and every
* kernfs node is represented by single kernfs_node. Most fields are
@@ -131,9 +152,9 @@ struct kernfs_node {
void *priv;
+ union kernfs_node_id id;
unsigned short flags;
umode_t mode;
- unsigned int ino;
struct kernfs_iattrs *iattr;
};
@@ -163,7 +184,8 @@ struct kernfs_root {
unsigned int flags; /* KERNFS_ROOT_* flags */
/* private fields, do not use outside kernfs proper */
- struct ida ino_ida;
+ struct idr ino_idr;
+ u32 next_generation;
struct kernfs_syscall_ops *syscall_ops;
/* list of kernfs_super_info of this root, protected by kernfs_mutex */
@@ -336,6 +358,8 @@ struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns);
void kernfs_init(void);
+struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root,
+ const union kernfs_node_id *id);
#else /* CONFIG_KERNFS */
static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index df3e9ae..daf7491 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(bcache_request,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->orig_major = d->disk->major;
__entry->orig_minor = d->disk->first_minor;
__entry->sector = bio->bi_iter.bi_sector;
@@ -98,7 +98,7 @@ DECLARE_EVENT_CLASS(bcache_bio,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
@@ -133,7 +133,7 @@ TRACE_EVENT(bcache_read,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d0dbe60..f815aaa 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -236,8 +236,7 @@ TRACE_EVENT(block_bio_bounce,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev ?
- bio->bi_bdev->bd_dev : 0;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
@@ -274,7 +273,7 @@ TRACE_EVENT(block_bio_complete,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
__entry->error = error;
@@ -302,7 +301,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
@@ -369,7 +368,7 @@ TRACE_EVENT(block_bio_queue,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
@@ -397,7 +396,8 @@ DECLARE_EVENT_CLASS(block_get_rq,
),
TP_fast_assign(
- __entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
+ __entry->dev = bio ? bio_dev(bio) : 0;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio ? bio->bi_iter.bi_sector : 0;
__entry->nr_sector = bio ? bio_sectors(bio) : 0;
blk_fill_rwbs(__entry->rwbs,
@@ -532,7 +532,7 @@ TRACE_EVENT(block_split,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->new_sector = new_sector;
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
@@ -573,7 +573,7 @@ TRACE_EVENT(block_bio_remap,
),
TP_fast_assign(
- __entry->dev = bio->bi_bdev->bd_dev;
+ __entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
__entry->old_dev = dev;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 6f77a27..bc4dd78 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(f2fs__bio,
TP_fast_assign(
__entry->dev = sb->s_dev;
- __entry->target = bio->bi_bdev->bd_dev;
+ __entry->target = bio_dev(bio);
__entry->op = bio_op(bio);
__entry->op_flags = bio->bi_opf;
__entry->type = type;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 7bd8783..9b57f01 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -136,7 +136,7 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb)
{
- return wb->memcg_css->cgroup->kn->ino;
+ return wb->memcg_css->cgroup->kn->id.ino;
}
static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index c590ca6..9cdaede 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -52,6 +52,7 @@ enum blktrace_act {
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* driver-specific binary data */
+ __BLK_TA_CGROUP = 1 << 8, /* from a cgroup*/
};
/*
@@ -61,6 +62,7 @@ enum blktrace_notify {
__BLK_TN_PROCESS = 0, /* establish pid/name mapping */
__BLK_TN_TIMESTAMP, /* include system clock */
__BLK_TN_MESSAGE, /* Character string message */
+ __BLK_TN_CGROUP = __BLK_TA_CGROUP, /* from a cgroup */
};
@@ -107,6 +109,7 @@ struct blk_io_trace {
__u32 cpu; /* on what cpu did it happen */
__u16 error; /* completion error */
__u16 pdu_len; /* length of data after this trace */
+ /* cgroup id will be stored here if exists */
};
/*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4f2196a..d6551cd 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1879,7 +1879,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
&cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
root->kf_root = kernfs_create_root(kf_sops,
- KERNFS_ROOT_CREATE_DEACTIVATED,
+ KERNFS_ROOT_CREATE_DEACTIVATED |
+ KERNFS_ROOT_SUPPORT_EXPORTOP,
root_cgrp);
if (IS_ERR(root->kf_root)) {
ret = PTR_ERR(root->kf_root);
@@ -5256,6 +5257,18 @@ static int __init cgroup_wq_init(void)
}
core_initcall(cgroup_wq_init);
+void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
+ char *buf, size_t buflen)
+{
+ struct kernfs_node *kn;
+
+ kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
+ if (!kn)
+ return;
+ kernfs_path(kn, buf, buflen);
+ kernfs_put(kn);
+}
+
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 57d2257..d7cdc42 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -242,8 +242,7 @@ static void hib_end_io(struct bio *bio)
if (bio->bi_status) {
printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
- imajor(bio->bi_bdev->bd_inode),
- iminor(bio->bi_bdev->bd_inode),
+ MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
}
@@ -270,7 +269,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
- bio->bi_bdev = hib_resume_bdev;
+ bio_set_dev(bio, hib_resume_bdev);
bio_set_op_attrs(bio, op, op_flags);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bc364f8..2a685b4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -27,6 +27,7 @@
#include <linux/time.h>
#include <linux/uaccess.h>
#include <linux/list.h>
+#include <linux/blk-cgroup.h>
#include "../../block/blk.h"
@@ -46,10 +47,16 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
+#define TRACE_BLK_OPT_CGROUP 0x2
+#define TRACE_BLK_OPT_CGNAME 0x4
static struct tracer_opt blk_tracer_opts[] = {
/* Default disable the minimalistic output */
{ TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
+#ifdef CONFIG_BLK_CGROUP
+ { TRACER_OPT(blk_cgroup, TRACE_BLK_OPT_CGROUP) },
+ { TRACER_OPT(blk_cgname, TRACE_BLK_OPT_CGNAME) },
+#endif
{ }
};
@@ -68,7 +75,8 @@ static void blk_unregister_tracepoints(void);
* Send out a notify message.
*/
static void trace_note(struct blk_trace *bt, pid_t pid, int action,
- const void *data, size_t len)
+ const void *data, size_t len,
+ union kernfs_node_id *cgid)
{
struct blk_io_trace *t;
struct ring_buffer_event *event = NULL;
@@ -76,12 +84,13 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
int pc = 0;
int cpu = smp_processor_id();
bool blk_tracer = blk_tracer_enabled;
+ ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;
if (blk_tracer) {
buffer = blk_tr->trace_buffer.buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
- sizeof(*t) + len,
+ sizeof(*t) + len + cgid_len,
0, pc);
if (!event)
return;
@@ -92,17 +101,19 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
if (!bt->rchan)
return;
- t = relay_reserve(bt->rchan, sizeof(*t) + len);
+ t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len);
if (t) {
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->time = ktime_to_ns(ktime_get());
record_it:
t->device = bt->dev;
- t->action = action;
+ t->action = action | (cgid ? __BLK_TN_CGROUP : 0);
t->pid = pid;
t->cpu = cpu;
- t->pdu_len = len;
- memcpy((void *) t + sizeof(*t), data, len);
+ t->pdu_len = len + cgid_len;
+ if (cgid)
+ memcpy((void *)t + sizeof(*t), cgid, cgid_len);
+ memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
if (blk_tracer)
trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
@@ -122,7 +133,7 @@ static void trace_note_tsk(struct task_struct *tsk)
spin_lock_irqsave(&running_trace_lock, flags);
list_for_each_entry(bt, &running_trace_list, running_list) {
trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
- sizeof(tsk->comm));
+ sizeof(tsk->comm), NULL);
}
spin_unlock_irqrestore(&running_trace_lock, flags);
}
@@ -139,11 +150,12 @@ static void trace_note_time(struct blk_trace *bt)
words[1] = now.tv_nsec;
local_irq_save(flags);
- trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
+ trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL);
local_irq_restore(flags);
}
-void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
+void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
+ const char *fmt, ...)
{
int n;
va_list args;
@@ -167,7 +179,14 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
- trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
+ if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+ blkcg = NULL;
+#ifdef CONFIG_BLK_CGROUP
+ trace_note(bt, 0, BLK_TN_MESSAGE, buf, n,
+ blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL);
+#else
+ trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL);
+#endif
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(__trace_note_message);
@@ -204,7 +223,7 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
*/
static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
int op, int op_flags, u32 what, int error, int pdu_len,
- void *pdu_data)
+ void *pdu_data, union kernfs_node_id *cgid)
{
struct task_struct *tsk = current;
struct ring_buffer_event *event = NULL;
@@ -215,6 +234,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
pid_t pid;
int cpu, pc = 0;
bool blk_tracer = blk_tracer_enabled;
+ ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;
if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
return;
@@ -229,6 +249,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= BLK_TC_ACT(BLK_TC_DISCARD);
if (op == REQ_OP_FLUSH)
what |= BLK_TC_ACT(BLK_TC_FLUSH);
+ if (cgid)
+ what |= __BLK_TA_CGROUP;
pid = tsk->pid;
if (act_log_check(bt, what, sector, pid))
@@ -241,7 +263,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
buffer = blk_tr->trace_buffer.buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
- sizeof(*t) + pdu_len,
+ sizeof(*t) + pdu_len + cgid_len,
0, pc);
if (!event)
return;
@@ -258,7 +280,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
* from coming in and stepping on our toes.
*/
local_irq_save(flags);
- t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
+ t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len);
if (t) {
sequence = per_cpu_ptr(bt->sequence, cpu);
@@ -280,10 +302,12 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
t->action = what;
t->device = bt->dev;
t->error = error;
- t->pdu_len = pdu_len;
+ t->pdu_len = pdu_len + cgid_len;
+ if (cgid_len)
+ memcpy((void *)t + sizeof(*t), cgid, cgid_len);
if (pdu_len)
- memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
+ memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
if (blk_tracer) {
trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
@@ -359,7 +383,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
return PTR_ERR(msg);
bt = filp->private_data;
- __trace_note_message(bt, "%s", msg);
+ __trace_note_message(bt, NULL, "%s", msg);
kfree(msg);
return count;
@@ -684,6 +708,36 @@ void blk_trace_shutdown(struct request_queue *q)
}
}
+#ifdef CONFIG_BLK_CGROUP
+static union kernfs_node_id *
+blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
+ return NULL;
+
+ if (!bio->bi_css)
+ return NULL;
+ return cgroup_get_kernfs_id(bio->bi_css->cgroup);
+}
+#else
+static union kernfs_node_id *
+blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
+{
+ return NULL;
+}
+#endif
+
+static union kernfs_node_id *
+blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
+{
+ if (!rq->bio)
+ return NULL;
+ /* Use the first bio */
+ return blk_trace_bio_get_cgid(q, rq->bio);
+}
+
/*
* blktrace probes
*/
@@ -694,13 +748,15 @@ void blk_trace_shutdown(struct request_queue *q)
* @error: return status to log
* @nr_bytes: number of completed bytes
* @what: the action
+ * @cgid: the cgroup info
*
* Description:
* Records an action against a request. Will log the bio offset + size.
*
**/
static void blk_add_trace_rq(struct request *rq, int error,
- unsigned int nr_bytes, u32 what)
+ unsigned int nr_bytes, u32 what,
+ union kernfs_node_id *cgid)
{
struct blk_trace *bt = rq->q->blk_trace;
@@ -713,32 +769,36 @@ static void blk_add_trace_rq(struct request *rq, int error,
what |= BLK_TC_ACT(BLK_TC_FS);
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
- rq->cmd_flags, what, error, 0, NULL);
+ rq->cmd_flags, what, error, 0, NULL, cgid);
}
static void blk_add_trace_rq_insert(void *ignore,
struct request_queue *q, struct request *rq)
{
- blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT);
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT,
+ blk_trace_request_get_cgid(q, rq));
}
static void blk_add_trace_rq_issue(void *ignore,
struct request_queue *q, struct request *rq)
{
- blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE);
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE,
+ blk_trace_request_get_cgid(q, rq));
}
static void blk_add_trace_rq_requeue(void *ignore,
struct request_queue *q,
struct request *rq)
{
- blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE);
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE,
+ blk_trace_request_get_cgid(q, rq));
}
static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
int error, unsigned int nr_bytes)
{
- blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE);
+ blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE,
+ blk_trace_request_get_cgid(rq->q, rq));
}
/**
@@ -753,7 +813,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
*
**/
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
- u32 what, int error)
+ u32 what, int error, union kernfs_node_id *cgid)
{
struct blk_trace *bt = q->blk_trace;
@@ -761,20 +821,22 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
return;
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
- bio_op(bio), bio->bi_opf, what, error, 0, NULL);
+ bio_op(bio), bio->bi_opf, what, error, 0, NULL, cgid);
}
static void blk_add_trace_bio_bounce(void *ignore,
struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
+ blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0,
+ blk_trace_bio_get_cgid(q, bio));
}
static void blk_add_trace_bio_complete(void *ignore,
struct request_queue *q, struct bio *bio,
int error)
{
- blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
+ blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error,
+ blk_trace_bio_get_cgid(q, bio));
}
static void blk_add_trace_bio_backmerge(void *ignore,
@@ -782,7 +844,8 @@ static void blk_add_trace_bio_backmerge(void *ignore,
struct request *rq,
struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
+ blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0,
+ blk_trace_bio_get_cgid(q, bio));
}
static void blk_add_trace_bio_frontmerge(void *ignore,
@@ -790,13 +853,15 @@ static void blk_add_trace_bio_frontmerge(void *ignore,
struct request *rq,
struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
+ blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0,
+ blk_trace_bio_get_cgid(q, bio));
}
static void blk_add_trace_bio_queue(void *ignore,
struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0,
+ blk_trace_bio_get_cgid(q, bio));
}
static void blk_add_trace_getrq(void *ignore,
@@ -804,13 +869,14 @@ static void blk_add_trace_getrq(void *ignore,
struct bio *bio, int rw)
{
if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
+ blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0,
+ blk_trace_bio_get_cgid(q, bio));
else {
struct blk_trace *bt = q->blk_trace;
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
- NULL);
+ NULL, NULL);
}
}
@@ -820,13 +886,14 @@ static void blk_add_trace_sleeprq(void *ignore,
struct bio *bio, int rw)
{
if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
+ blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0,
+ blk_trace_bio_get_cgid(q, bio));
else {
struct blk_trace *bt = q->blk_trace;
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
- 0, 0, NULL);
+ 0, 0, NULL, NULL);
}
}
@@ -835,7 +902,7 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q)
struct blk_trace *bt = q->blk_trace;
if (bt)
- __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+ __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL);
}
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
@@ -852,7 +919,7 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
else
what = BLK_TA_UNPLUG_TIMER;
- __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
+ __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL);
}
}
@@ -868,7 +935,7 @@ static void blk_add_trace_split(void *ignore,
__blk_add_trace(bt, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
- &rpdu);
+ &rpdu, blk_trace_bio_get_cgid(q, bio));
}
}
@@ -896,12 +963,12 @@ static void blk_add_trace_bio_remap(void *ignore,
return;
r.device_from = cpu_to_be32(dev);
- r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
+ r.device_to = cpu_to_be32(bio_dev(bio));
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
- sizeof(r), &r);
+ sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
}
/**
@@ -934,7 +1001,7 @@ static void blk_add_trace_rq_remap(void *ignore,
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
- sizeof(r), &r);
+ sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
}
/**
@@ -958,7 +1025,8 @@ void blk_add_driver_data(struct request_queue *q,
return;
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
- BLK_TA_DRV_DATA, 0, len, data);
+ BLK_TA_DRV_DATA, 0, len, data,
+ blk_trace_request_get_cgid(q, rq));
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -1031,7 +1099,7 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
int i = 0;
int tc = t->action >> BLK_TC_SHIFT;
- if (t->action == BLK_TN_MESSAGE) {
+ if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
rwbs[i++] = 'N';
goto out;
}
@@ -1066,9 +1134,21 @@ const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
return (const struct blk_io_trace *)ent;
}
-static inline const void *pdu_start(const struct trace_entry *ent)
+static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg)
{
- return te_blk_io_trace(ent) + 1;
+ return (void *)(te_blk_io_trace(ent) + 1) +
+ (has_cg ? sizeof(union kernfs_node_id) : 0);
+}
+
+static inline const void *cgid_start(const struct trace_entry *ent)
+{
+ return (void *)(te_blk_io_trace(ent) + 1);
+}
+
+static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg)
+{
+ return te_blk_io_trace(ent)->pdu_len -
+ (has_cg ? sizeof(union kernfs_node_id) : 0);
}
static inline u32 t_action(const struct trace_entry *ent)
@@ -1096,16 +1176,16 @@ static inline __u16 t_error(const struct trace_entry *ent)
return te_blk_io_trace(ent)->error;
}
-static __u64 get_pdu_int(const struct trace_entry *ent)
+static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg)
{
- const __u64 *val = pdu_start(ent);
+ const __u64 *val = pdu_start(ent, has_cg);
return be64_to_cpu(*val);
}
static void get_pdu_remap(const struct trace_entry *ent,
- struct blk_io_trace_remap *r)
+ struct blk_io_trace_remap *r, bool has_cg)
{
- const struct blk_io_trace_remap *__r = pdu_start(ent);
+ const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
__u64 sector_from = __r->sector_from;
r->device_from = be32_to_cpu(__r->device_from);
@@ -1113,9 +1193,11 @@ static void get_pdu_remap(const struct trace_entry *ent,
r->sector_from = be64_to_cpu(sector_from);
}
-typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act);
+typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act,
+ bool has_cg);
-static void blk_log_action_classic(struct trace_iterator *iter, const char *act)
+static void blk_log_action_classic(struct trace_iterator *iter, const char *act,
+ bool has_cg)
{
char rwbs[RWBS_LEN];
unsigned long long ts = iter->ts;
@@ -1131,24 +1213,43 @@ static void blk_log_action_classic(struct trace_iterator *iter, const char *act)
secs, nsec_rem, iter->ent->pid, act, rwbs);
}
-static void blk_log_action(struct trace_iterator *iter, const char *act)
+static void blk_log_action(struct trace_iterator *iter, const char *act,
+ bool has_cg)
{
char rwbs[RWBS_LEN];
const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
- trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
- MAJOR(t->device), MINOR(t->device), act, rwbs);
+ if (has_cg) {
+ const union kernfs_node_id *id = cgid_start(iter->ent);
+
+ if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) {
+ char blkcg_name_buf[NAME_MAX + 1] = "<...>";
+
+ cgroup_path_from_kernfs_id(id, blkcg_name_buf,
+ sizeof(blkcg_name_buf));
+ trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ",
+ MAJOR(t->device), MINOR(t->device),
+ blkcg_name_buf, act, rwbs);
+ } else
+ trace_seq_printf(&iter->seq,
+ "%3d,%-3d %x,%-x %2s %3s ",
+ MAJOR(t->device), MINOR(t->device),
+ id->ino, id->generation, act, rwbs);
+ } else
+ trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
+ MAJOR(t->device), MINOR(t->device), act, rwbs);
}
-static void blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
+static void blk_log_dump_pdu(struct trace_seq *s,
+ const struct trace_entry *ent, bool has_cg)
{
const unsigned char *pdu_buf;
int pdu_len;
int i, end;
- pdu_buf = pdu_start(ent);
- pdu_len = te_blk_io_trace(ent)->pdu_len;
+ pdu_buf = pdu_start(ent, has_cg);
+ pdu_len = pdu_real_len(ent, has_cg);
if (!pdu_len)
return;
@@ -1179,7 +1280,7 @@ static void blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
trace_seq_puts(s, ") ");
}
-static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
+static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
{
char cmd[TASK_COMM_LEN];
@@ -1187,7 +1288,7 @@ static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
trace_seq_printf(s, "%u ", t_bytes(ent));
- blk_log_dump_pdu(s, ent);
+ blk_log_dump_pdu(s, ent, has_cg);
trace_seq_printf(s, "[%s]\n", cmd);
} else {
if (t_sec(ent))
@@ -1199,10 +1300,10 @@ static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
}
static void blk_log_with_error(struct trace_seq *s,
- const struct trace_entry *ent)
+ const struct trace_entry *ent, bool has_cg)
{
if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
- blk_log_dump_pdu(s, ent);
+ blk_log_dump_pdu(s, ent, has_cg);
trace_seq_printf(s, "[%d]\n", t_error(ent));
} else {
if (t_sec(ent))
@@ -1215,18 +1316,18 @@ static void blk_log_with_error(struct trace_seq *s,
}
}
-static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
+static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
{
struct blk_io_trace_remap r = { .device_from = 0, };
- get_pdu_remap(ent, &r);
+ get_pdu_remap(ent, &r, has_cg);
trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
t_sector(ent), t_sec(ent),
MAJOR(r.device_from), MINOR(r.device_from),
(unsigned long long)r.sector_from);
}
-static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
+static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
{
char cmd[TASK_COMM_LEN];
@@ -1235,30 +1336,31 @@ static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
trace_seq_printf(s, "[%s]\n", cmd);
}
-static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
+static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
{
char cmd[TASK_COMM_LEN];
trace_find_cmdline(ent->pid, cmd);
- trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent));
+ trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent, has_cg));
}
-static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
+static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
{
char cmd[TASK_COMM_LEN];
trace_find_cmdline(ent->pid, cmd);
trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
- get_pdu_int(ent), cmd);
+ get_pdu_int(ent, has_cg), cmd);
}
-static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent)
+static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent,
+ bool has_cg)
{
- const struct blk_io_trace *t = te_blk_io_trace(ent);
- trace_seq_putmem(s, t + 1, t->pdu_len);
+ trace_seq_putmem(s, pdu_start(ent, has_cg),
+ pdu_real_len(ent, has_cg));
trace_seq_putc(s, '\n');
}
@@ -1298,7 +1400,8 @@ static void blk_tracer_reset(struct trace_array *tr)
static const struct {
const char *act[2];
- void (*print)(struct trace_seq *s, const struct trace_entry *ent);
+ void (*print)(struct trace_seq *s, const struct trace_entry *ent,
+ bool has_cg);
} what2act[] = {
[__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
[__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
@@ -1326,23 +1429,25 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
u16 what;
bool long_act;
blk_log_action_t *log_action;
+ bool has_cg;
t = te_blk_io_trace(iter->ent);
- what = t->action & ((1 << BLK_TC_SHIFT) - 1);
+ what = (t->action & ((1 << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP;
long_act = !!(tr->trace_flags & TRACE_ITER_VERBOSE);
log_action = classic ? &blk_log_action_classic : &blk_log_action;
+ has_cg = t->action & __BLK_TA_CGROUP;
- if (t->action == BLK_TN_MESSAGE) {
- log_action(iter, long_act ? "message" : "m");
- blk_log_msg(s, iter->ent);
+ if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) {
+ log_action(iter, long_act ? "message" : "m", has_cg);
+ blk_log_msg(s, iter->ent, has_cg);
return trace_handle_return(s);
}
if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
trace_seq_printf(s, "Unknown action %x\n", what);
else {
- log_action(iter, what2act[what].act[long_act]);
- what2act[what].print(s, iter->ent);
+ log_action(iter, what2act[what].act[long_act], has_cg);
+ what2act[what].print(s, iter->ent, has_cg);
}
return trace_handle_return(s);
diff --git a/mm/page_io.c b/mm/page_io.c
index 20139b9..21502d3 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -33,7 +33,10 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
bio = bio_alloc(gfp_flags, nr);
if (bio) {
- bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev);
+ struct block_device *bdev;
+
+ bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
bio->bi_end_io = end_io;
@@ -60,8 +63,7 @@ void end_swap_bio_write(struct bio *bio)
*/
set_page_dirty(page);
pr_alert("Write-error on swap-device (%u:%u:%llu)\n",
- imajor(bio->bi_bdev->bd_inode),
- iminor(bio->bi_bdev->bd_inode),
+ MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
ClearPageReclaim(page);
}
@@ -126,8 +128,7 @@ static void end_swap_bio_read(struct bio *bio)
SetPageError(page);
ClearPageUptodate(page);
pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
- imajor(bio->bi_bdev->bd_inode),
- iminor(bio->bi_bdev->bd_inode),
+ MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
goto out;
}
@@ -351,7 +352,7 @@ int swap_readpage(struct page *page, bool do_poll)
int ret = 0;
struct swap_info_struct *sis = page_swap_info(page);
blk_qc_t qc;
- struct block_device *bdev;
+ struct gendisk *disk;
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -390,7 +391,7 @@ int swap_readpage(struct page *page, bool do_poll)
ret = -ENOMEM;
goto out;
}
- bdev = bio->bi_bdev;
+ disk = bio->bi_disk;
/*
* Keep this task valid during swap readpage because the oom killer may
* attempt to access it in the page fault retry time check.
@@ -406,7 +407,7 @@ int swap_readpage(struct page *page, bool do_poll)
if (!READ_ONCE(bio->bi_private))
break;
- if (!blk_mq_poll(bdev_get_queue(bdev), qc))
+ if (!blk_mq_poll(disk->queue, qc))
break;
}
__set_current_state(TASK_RUNNING);