Merge tag 'md-6.13-20241115' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux into for-6.13/block
Pull MD fixes from Song:
"This set contains a fix for a W=1 warning, by John Garry, and a
MAINTAINERS update."
* tag 'md-6.13-20241115' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux:
MAINTAINERS: Update git tree for mdraid subsystem
md/raid5: Increase r5conf.cache_name size
diff --git a/block/blk-core.c b/block/blk-core.c
index 0387172..666efe8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1120,8 +1120,8 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
return;
plug->cur_ktime = 0;
- plug->mq_list = NULL;
- plug->cached_rq = NULL;
+ rq_list_init(&plug->mq_list);
+ rq_list_init(&plug->cached_rqs);
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
plug->rq_count = 0;
plug->multiple_queues = false;
@@ -1217,7 +1217,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
* queue for cached requests, we don't want a blocked task holding
* up a queue freeze/quiesce event.
*/
- if (unlikely(!rq_list_empty(plug->cached_rq)))
+ if (unlikely(!rq_list_empty(&plug->cached_rqs)))
blk_mq_free_plug_rqs(plug);
plug->cur_ktime = 0;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7b0af83..e0b28e9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -867,12 +867,13 @@ static struct request *attempt_merge(struct request_queue *q,
if (rq_data_dir(req) != rq_data_dir(next))
return NULL;
- /* Don't merge requests with different write hints. */
- if (req->write_hint != next->write_hint)
- return NULL;
-
- if (req->ioprio != next->ioprio)
- return NULL;
+ if (req->bio && next->bio) {
+ /* Don't merge requests with different write hints. */
+ if (req->bio->bi_write_hint != next->bio->bi_write_hint)
+ return NULL;
+ if (req->bio->bi_ioprio != next->bio->bi_ioprio)
+ return NULL;
+ }
if (!blk_atomic_write_mergeable_rqs(req, next))
return NULL;
@@ -1001,12 +1002,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (!bio_crypt_rq_ctx_compatible(rq, bio))
return false;
- /* Don't merge requests with different write hints. */
- if (rq->write_hint != bio->bi_write_hint)
- return false;
-
- if (rq->ioprio != bio_prio(bio))
- return false;
+ if (rq->bio) {
+ /* Don't merge requests with different write hints. */
+ if (rq->bio->bi_write_hint != bio->bi_write_hint)
+ return false;
+ if (rq->bio->bi_ioprio != bio->bi_ioprio)
+ return false;
+ }
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
return false;
@@ -1177,7 +1179,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
struct blk_plug *plug = current->plug;
struct request *rq;
- if (!plug || rq_list_empty(plug->mq_list))
+ if (!plug || rq_list_empty(&plug->mq_list))
return false;
rq_list_for_each(&plug->mq_list, rq) {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5e240a4..270cfd9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -478,7 +478,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
prefetch(tags->static_rqs[tag]);
tag_mask &= ~(1UL << i);
rq = blk_mq_rq_ctx_init(data, tags, tag);
- rq_list_add(data->cached_rq, rq);
+ rq_list_add_head(data->cached_rqs, rq);
nr++;
}
if (!(data->rq_flags & RQF_SCHED_TAGS))
@@ -487,7 +487,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
data->nr_tags -= nr;
- return rq_list_pop(data->cached_rq);
+ return rq_list_pop(data->cached_rqs);
}
static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
@@ -584,7 +584,7 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
.flags = flags,
.cmd_flags = opf,
.nr_tags = plug->nr_ios,
- .cached_rq = &plug->cached_rq,
+ .cached_rqs = &plug->cached_rqs,
};
struct request *rq;
@@ -609,14 +609,14 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
if (!plug)
return NULL;
- if (rq_list_empty(plug->cached_rq)) {
+ if (rq_list_empty(&plug->cached_rqs)) {
if (plug->nr_ios == 1)
return NULL;
rq = blk_mq_rq_cache_fill(q, plug, opf, flags);
if (!rq)
return NULL;
} else {
- rq = rq_list_peek(&plug->cached_rq);
+ rq = rq_list_peek(&plug->cached_rqs);
if (!rq || rq->q != q)
return NULL;
@@ -625,7 +625,7 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
return NULL;
- plug->cached_rq = rq_list_next(rq);
+ rq_list_pop(&plug->cached_rqs);
blk_mq_rq_time_init(rq, blk_time_get_ns());
}
@@ -802,7 +802,7 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug)
{
struct request *rq;
- while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)
+ while ((rq = rq_list_pop(&plug->cached_rqs)) != NULL)
blk_mq_free_request(rq);
}
@@ -842,7 +842,7 @@ static void blk_print_req_error(struct request *req, blk_status_t status)
blk_op_str(req_op(req)),
(__force u32)(req->cmd_flags & ~REQ_OP_MASK),
req->nr_phys_segments,
- IOPRIO_PRIO_CLASS(req->ioprio));
+ IOPRIO_PRIO_CLASS(req_get_ioprio(req)));
}
/*
@@ -1392,8 +1392,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
*/
if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
plug->has_elevator = true;
- rq->rq_next = NULL;
- rq_list_add(&plug->mq_list, rq);
+ rq_list_add_tail(&plug->mq_list, rq);
plug->rq_count++;
}
@@ -2660,7 +2659,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
rq->cmd_flags |= REQ_FAILFAST_MASK;
rq->__sector = bio->bi_iter.bi_sector;
- rq->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(rq, bio, nr_segs);
if (bio_integrity(bio))
rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
@@ -2786,7 +2784,7 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug)
blk_status_t ret = BLK_STS_OK;
while ((rq = rq_list_pop(&plug->mq_list))) {
- bool last = rq_list_empty(plug->mq_list);
+ bool last = rq_list_empty(&plug->mq_list);
if (hctx != rq->mq_hctx) {
if (hctx) {
@@ -2829,8 +2827,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
{
struct blk_mq_hw_ctx *this_hctx = NULL;
struct blk_mq_ctx *this_ctx = NULL;
- struct request *requeue_list = NULL;
- struct request **requeue_lastp = &requeue_list;
+ struct rq_list requeue_list = {};
unsigned int depth = 0;
bool is_passthrough = false;
LIST_HEAD(list);
@@ -2844,12 +2841,12 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
is_passthrough = blk_rq_is_passthrough(rq);
} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx ||
is_passthrough != blk_rq_is_passthrough(rq)) {
- rq_list_add_tail(&requeue_lastp, rq);
+ rq_list_add_tail(&requeue_list, rq);
continue;
}
- list_add(&rq->queuelist, &list);
+ list_add_tail(&rq->queuelist, &list);
depth++;
- } while (!rq_list_empty(plug->mq_list));
+ } while (!rq_list_empty(&plug->mq_list));
plug->mq_list = requeue_list;
trace_block_unplug(this_hctx->queue, depth, !from_sched);
@@ -2904,19 +2901,19 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
if (q->mq_ops->queue_rqs) {
blk_mq_run_dispatch_ops(q,
__blk_mq_flush_plug_list(q, plug));
- if (rq_list_empty(plug->mq_list))
+ if (rq_list_empty(&plug->mq_list))
return;
}
blk_mq_run_dispatch_ops(q,
blk_mq_plug_issue_direct(plug));
- if (rq_list_empty(plug->mq_list))
+ if (rq_list_empty(&plug->mq_list))
return;
}
do {
blk_mq_dispatch_plug_list(plug, from_schedule);
- } while (!rq_list_empty(plug->mq_list));
+ } while (!rq_list_empty(&plug->mq_list));
}
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
@@ -2981,7 +2978,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
if (plug) {
data.nr_tags = plug->nr_ios;
plug->nr_ios = 1;
- data.cached_rq = &plug->cached_rq;
+ data.cached_rqs = &plug->cached_rqs;
}
rq = __blk_mq_alloc_requests(&data);
@@ -3004,7 +3001,7 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
if (!plug)
return NULL;
- rq = rq_list_peek(&plug->cached_rq);
+ rq = rq_list_peek(&plug->cached_rqs);
if (!rq || rq->q != q)
return NULL;
if (type != rq->mq_hctx->type &&
@@ -3018,14 +3015,14 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
struct bio *bio)
{
- WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
+ if (rq_list_pop(&plug->cached_rqs) != rq)
+ WARN_ON_ONCE(1);
/*
* If any qos ->throttle() end up blocking, we will have flushed the
* plug and hence killed the cached_rq list as well. Pop this entry
* before we throttle.
*/
- plug->cached_rq = rq_list_next(rq);
rq_qos_throttle(rq->q, bio);
blk_mq_rq_time_init(rq, blk_time_get_ns());
@@ -3307,8 +3304,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
rq->special_vec = rq_src->special_vec;
}
rq->nr_phys_segments = rq_src->nr_phys_segments;
- rq->ioprio = rq_src->ioprio;
- rq->write_hint = rq_src->write_hint;
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
goto free_and_out;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index f4ac1af..89a20ff 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -155,7 +155,7 @@ struct blk_mq_alloc_data {
/* allocate multiple requests/tags in one go */
unsigned int nr_tags;
- struct request **cached_rq;
+ struct rq_list *cached_rqs;
/* input & output parameter */
struct blk_mq_ctx *ctx;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7d6b296..f1d4dfd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -222,7 +222,7 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim)
* Check that the limits in lim are valid, initialize defaults for unset
* values, and cap values based on others where needed.
*/
-static int blk_validate_limits(struct queue_limits *lim)
+int blk_validate_limits(struct queue_limits *lim)
{
unsigned int max_hw_sectors;
unsigned int logical_block_sectors;
@@ -365,6 +365,7 @@ static int blk_validate_limits(struct queue_limits *lim)
return err;
return blk_validate_zoned_limits(lim);
}
+EXPORT_SYMBOL_GPL(blk_validate_limits);
/*
* Set the default limits for a newly allocated queue. @lim contains the
diff --git a/block/genhd.c b/block/genhd.c
index 1971c91..9130e16 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1291,40 +1291,35 @@ static int diskstats_show(struct seq_file *seqf, void *v)
part_stat_unlock();
}
part_stat_read_all(hd, &stat);
- seq_printf(seqf, "%4d %7d %pg "
- "%lu %lu %lu %u "
- "%lu %lu %lu %u "
- "%u %u %u "
- "%lu %lu %lu %u "
- "%lu %u"
- "\n",
- MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
- stat.ios[STAT_READ],
- stat.merges[STAT_READ],
- stat.sectors[STAT_READ],
- (unsigned int)div_u64(stat.nsecs[STAT_READ],
- NSEC_PER_MSEC),
- stat.ios[STAT_WRITE],
- stat.merges[STAT_WRITE],
- stat.sectors[STAT_WRITE],
- (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
- NSEC_PER_MSEC),
- inflight,
- jiffies_to_msecs(stat.io_ticks),
- (unsigned int)div_u64(stat.nsecs[STAT_READ] +
- stat.nsecs[STAT_WRITE] +
- stat.nsecs[STAT_DISCARD] +
- stat.nsecs[STAT_FLUSH],
- NSEC_PER_MSEC),
- stat.ios[STAT_DISCARD],
- stat.merges[STAT_DISCARD],
- stat.sectors[STAT_DISCARD],
- (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
- NSEC_PER_MSEC),
- stat.ios[STAT_FLUSH],
- (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
- NSEC_PER_MSEC)
- );
+ seq_put_decimal_ull_width(seqf, "", MAJOR(hd->bd_dev), 4);
+ seq_put_decimal_ull_width(seqf, " ", MINOR(hd->bd_dev), 7);
+ seq_printf(seqf, " %pg", hd);
+ seq_put_decimal_ull(seqf, " ", stat.ios[STAT_READ]);
+ seq_put_decimal_ull(seqf, " ", stat.merges[STAT_READ]);
+ seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_READ]);
+ seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ],
+ NSEC_PER_MSEC));
+ seq_put_decimal_ull(seqf, " ", stat.ios[STAT_WRITE]);
+ seq_put_decimal_ull(seqf, " ", stat.merges[STAT_WRITE]);
+ seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_WRITE]);
+ seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
+ NSEC_PER_MSEC));
+ seq_put_decimal_ull(seqf, " ", inflight);
+ seq_put_decimal_ull(seqf, " ", jiffies_to_msecs(stat.io_ticks));
+ seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ] +
+ stat.nsecs[STAT_WRITE] +
+ stat.nsecs[STAT_DISCARD] +
+ stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC));
+ seq_put_decimal_ull(seqf, " ", stat.ios[STAT_DISCARD]);
+ seq_put_decimal_ull(seqf, " ", stat.merges[STAT_DISCARD]);
+ seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_DISCARD]);
+ seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
+ NSEC_PER_MSEC));
+ seq_put_decimal_ull(seqf, " ", stat.ios[STAT_FLUSH]);
+ seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
+ NSEC_PER_MSEC));
+ seq_putc(seqf, '\n');
}
rcu_read_unlock();
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 2f0431e..3c3d8d2 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1638,10 +1638,9 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
-static void null_queue_rqs(struct request **rqlist)
+static void null_queue_rqs(struct rq_list *rqlist)
{
- struct request *requeue_list = NULL;
- struct request **requeue_lastp = &requeue_list;
+ struct rq_list requeue_list = {};
struct blk_mq_queue_data bd = { };
blk_status_t ret;
@@ -1651,8 +1650,8 @@ static void null_queue_rqs(struct request **rqlist)
bd.rq = rq;
ret = null_queue_rq(rq->mq_hctx, &bd);
if (ret != BLK_STS_OK)
- rq_list_add_tail(&requeue_lastp, rq);
- } while (!rq_list_empty(*rqlist));
+ rq_list_add_tail(&requeue_list, rq);
+ } while (!rq_list_empty(rqlist));
*rqlist = requeue_list;
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0e99a47..c0cdba7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -471,18 +471,18 @@ static bool virtblk_prep_rq_batch(struct request *req)
return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK;
}
-static bool virtblk_add_req_batch(struct virtio_blk_vq *vq,
- struct request **rqlist)
+static void virtblk_add_req_batch(struct virtio_blk_vq *vq,
+ struct rq_list *rqlist)
{
+ struct request *req;
unsigned long flags;
- int err;
bool kick;
spin_lock_irqsave(&vq->lock, flags);
- while (!rq_list_empty(*rqlist)) {
- struct request *req = rq_list_pop(rqlist);
+ while ((req = rq_list_pop(rqlist))) {
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
+ int err;
err = virtblk_add_req(vq->vq, vbr);
if (err) {
@@ -495,37 +495,32 @@ static bool virtblk_add_req_batch(struct virtio_blk_vq *vq,
kick = virtqueue_kick_prepare(vq->vq);
spin_unlock_irqrestore(&vq->lock, flags);
- return kick;
+ if (kick)
+ virtqueue_notify(vq->vq);
}
-static void virtio_queue_rqs(struct request **rqlist)
+static void virtio_queue_rqs(struct rq_list *rqlist)
{
- struct request *req, *next, *prev = NULL;
- struct request *requeue_list = NULL;
+ struct rq_list submit_list = { };
+ struct rq_list requeue_list = { };
+ struct virtio_blk_vq *vq = NULL;
+ struct request *req;
- rq_list_for_each_safe(rqlist, req, next) {
- struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
- bool kick;
+ while ((req = rq_list_pop(rqlist))) {
+ struct virtio_blk_vq *this_vq = get_virtio_blk_vq(req->mq_hctx);
- if (!virtblk_prep_rq_batch(req)) {
- rq_list_move(rqlist, &requeue_list, req, prev);
- req = prev;
- if (!req)
- continue;
- }
+ if (vq && vq != this_vq)
+ virtblk_add_req_batch(vq, &submit_list);
+ vq = this_vq;
- if (!next || req->mq_hctx != next->mq_hctx) {
- req->rq_next = NULL;
- kick = virtblk_add_req_batch(vq, rqlist);
- if (kick)
- virtqueue_notify(vq->vq);
-
- *rqlist = next;
- prev = NULL;
- } else
- prev = req;
+ if (virtblk_prep_rq_batch(req))
+ rq_list_add_tail(&submit_list, req);
+ else
+ rq_list_add_tail(&requeue_list, req);
}
+ if (vq)
+ virtblk_add_req_batch(vq, &submit_list);
*rqlist = requeue_list;
}
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index b1387dc..7cd1102 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -649,7 +649,7 @@ static bool apple_nvme_handle_cq(struct apple_nvme_queue *q, bool force)
found = apple_nvme_poll_cq(q, &iob);
- if (!rq_list_empty(iob.req_list))
+ if (!rq_list_empty(&iob.req_list))
apple_nvme_complete_batch(&iob);
return found;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3de7555..7360e9c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -42,6 +42,8 @@ struct nvme_ns_info {
bool is_readonly;
bool is_ready;
bool is_removed;
+ bool is_rotational;
+ bool no_vwc;
};
unsigned int admin_timeout = 60;
@@ -1615,6 +1617,8 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
info->is_ready = id->nstat & NVME_NSTAT_NRDY;
+ info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
+ info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
}
kfree(id);
return ret;
@@ -2157,11 +2161,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
ns->head->ids.csi == NVME_CSI_ZNS)
nvme_update_zone_info(ns, &lim, &zi);
- if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+ if ((ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) && !info->no_vwc)
lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
else
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
+ if (info->is_rotational)
+ lim.features |= BLK_FEAT_ROTATIONAL;
+
/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
* metadata masquerading as Type 0 if supported, otherwise reject block
@@ -3608,6 +3615,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ns_id = info->nsid;
head->ids = info->ids;
head->shared = info->is_shared;
+ head->rotational = info->is_rotational;
ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
kref_init(&head->ref);
@@ -3988,7 +3996,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns_info info = { .nsid = nsid };
struct nvme_ns *ns;
- int ret;
+ int ret = 1;
if (nvme_identify_ns_descs(ctrl, &info))
return;
@@ -4005,9 +4013,10 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
* set up a namespace. If not fall back to the legacy version.
*/
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
- (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
+ (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS) ||
+ ctrl->vs >= NVME_VS(2, 0, 0))
ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
- else
+ if (ret > 0)
ret = nvme_ns_info_from_identify(ctrl, &info);
if (info.is_removed)
@@ -5006,6 +5015,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
+ BUILD_BUG_ON(sizeof(struct nvme_endurance_group_log) != 512);
+ BUILD_BUG_ON(sizeof(struct nvme_rotational_media_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512);
@@ -5014,22 +5025,20 @@ static inline void _nvme_check_size(void)
static int __init nvme_core_init(void)
{
+ unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS;
int result = -ENOMEM;
_nvme_check_size();
- nvme_wq = alloc_workqueue("nvme-wq",
- WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ nvme_wq = alloc_workqueue("nvme-wq", wq_flags, 0);
if (!nvme_wq)
goto out;
- nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
- WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ nvme_reset_wq = alloc_workqueue("nvme-reset-wq", wq_flags, 0);
if (!nvme_reset_wq)
goto destroy_wq;
- nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
- WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ nvme_delete_wq = alloc_workqueue("nvme-delete-wq", wq_flags, 0);
if (!nvme_delete_wq)
goto destroy_reset_wq;
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index f697d2d..cb7f61e 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -401,7 +401,7 @@ struct nvme_uring_cmd_pdu {
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
struct io_uring_cmd *ioucmd)
{
- return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
+ return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu);
}
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
@@ -631,8 +631,6 @@ static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
- BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));
-
ret = nvme_uring_cmd_checks(issue_flags);
if (ret)
return ret;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 093cb423..900719c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -474,6 +474,7 @@ struct nvme_ns_head {
struct list_head entry;
struct kref ref;
bool shared;
+ bool rotational;
bool passthru_err_log_enabled;
struct nvme_effects_log *effects;
u64 nuse;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4b9fda0..5f2e3ad 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -141,6 +141,7 @@ struct nvme_dev {
struct nvme_ctrl ctrl;
u32 last_ps;
bool hmb;
+ struct sg_table *hmb_sgt;
mempool_t *iod_mempool;
@@ -153,6 +154,7 @@ struct nvme_dev {
/* host memory buffer support: */
u64 host_mem_size;
u32 nr_host_mem_descs;
+ u32 host_mem_descs_size;
dma_addr_t host_mem_descs_dma;
struct nvme_host_mem_buf_desc *host_mem_descs;
void **host_mem_desc_bufs;
@@ -902,11 +904,12 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_OK;
}
-static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct request **rqlist)
+static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
{
+ struct request *req;
+
spin_lock(&nvmeq->sq_lock);
- while (!rq_list_empty(*rqlist)) {
- struct request *req = rq_list_pop(rqlist);
+ while ((req = rq_list_pop(rqlist))) {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
nvme_sq_copy_cmd(nvmeq, &iod->cmd);
@@ -929,33 +932,26 @@ static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req)
return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK;
}
-static void nvme_queue_rqs(struct request **rqlist)
+static void nvme_queue_rqs(struct rq_list *rqlist)
{
- struct request *req, *next, *prev = NULL;
- struct request *requeue_list = NULL;
+ struct rq_list submit_list = { };
+ struct rq_list requeue_list = { };
+ struct nvme_queue *nvmeq = NULL;
+ struct request *req;
- rq_list_for_each_safe(rqlist, req, next) {
- struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+ while ((req = rq_list_pop(rqlist))) {
+ if (nvmeq && nvmeq != req->mq_hctx->driver_data)
+ nvme_submit_cmds(nvmeq, &submit_list);
+ nvmeq = req->mq_hctx->driver_data;
- if (!nvme_prep_rq_batch(nvmeq, req)) {
- /* detach 'req' and add to remainder list */
- rq_list_move(rqlist, &requeue_list, req, prev);
-
- req = prev;
- if (!req)
- continue;
- }
-
- if (!next || req->mq_hctx != next->mq_hctx) {
- /* detach rest of list, and submit */
- req->rq_next = NULL;
- nvme_submit_cmds(nvmeq, rqlist);
- *rqlist = next;
- prev = NULL;
- } else
- prev = req;
+ if (nvme_prep_rq_batch(nvmeq, req))
+ rq_list_add_tail(&submit_list, req);
+ else
+ rq_list_add_tail(&requeue_list, req);
}
+ if (nvmeq)
+ nvme_submit_cmds(nvmeq, &submit_list);
*rqlist = requeue_list;
}
@@ -1083,7 +1079,7 @@ static irqreturn_t nvme_irq(int irq, void *data)
DEFINE_IO_COMP_BATCH(iob);
if (nvme_poll_cq(nvmeq, &iob)) {
- if (!rq_list_empty(iob.req_list))
+ if (!rq_list_empty(&iob.req_list))
nvme_pci_complete_batch(&iob);
return IRQ_HANDLED;
}
@@ -1951,7 +1947,7 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
return ret;
}
-static void nvme_free_host_mem(struct nvme_dev *dev)
+static void nvme_free_host_mem_multi(struct nvme_dev *dev)
{
int i;
@@ -1966,18 +1962,54 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
kfree(dev->host_mem_desc_bufs);
dev->host_mem_desc_bufs = NULL;
- dma_free_coherent(dev->dev,
- dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+ if (dev->hmb_sgt)
+ dma_free_noncontiguous(dev->dev, dev->host_mem_size,
+ dev->hmb_sgt, DMA_BIDIRECTIONAL);
+ else
+ nvme_free_host_mem_multi(dev);
+
+ dma_free_coherent(dev->dev, dev->host_mem_descs_size,
dev->host_mem_descs, dev->host_mem_descs_dma);
dev->host_mem_descs = NULL;
+ dev->host_mem_descs_size = 0;
dev->nr_host_mem_descs = 0;
}
-static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
+static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size)
+{
+ dev->hmb_sgt = dma_alloc_noncontiguous(dev->dev, size,
+ DMA_BIDIRECTIONAL, GFP_KERNEL, 0);
+ if (!dev->hmb_sgt)
+ return -ENOMEM;
+
+ dev->host_mem_descs = dma_alloc_coherent(dev->dev,
+ sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma,
+ GFP_KERNEL);
+ if (!dev->host_mem_descs) {
+ dma_free_noncontiguous(dev->dev, dev->host_mem_size,
+ dev->hmb_sgt, DMA_BIDIRECTIONAL);
+ dev->hmb_sgt = NULL;
+ return -ENOMEM;
+ }
+ dev->host_mem_size = size;
+ dev->host_mem_descs_size = sizeof(*dev->host_mem_descs);
+ dev->nr_host_mem_descs = 1;
+
+ dev->host_mem_descs[0].addr =
+ cpu_to_le64(dev->hmb_sgt->sgl->dma_address);
+ dev->host_mem_descs[0].size = cpu_to_le32(size / NVME_CTRL_PAGE_SIZE);
+ return 0;
+}
+
+static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred,
u32 chunk_size)
{
struct nvme_host_mem_buf_desc *descs;
- u32 max_entries, len;
+ u32 max_entries, len, descs_size;
dma_addr_t descs_dma;
int i = 0;
void **bufs;
@@ -1990,8 +2022,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
max_entries = dev->ctrl.hmmaxd;
- descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
- &descs_dma, GFP_KERNEL);
+ descs_size = max_entries * sizeof(*descs);
+ descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma,
+ GFP_KERNEL);
if (!descs)
goto out;
@@ -2020,6 +2053,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
dev->host_mem_size = size;
dev->host_mem_descs = descs;
dev->host_mem_descs_dma = descs_dma;
+ dev->host_mem_descs_size = descs_size;
dev->host_mem_desc_bufs = bufs;
return 0;
@@ -2034,8 +2068,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
kfree(bufs);
out_free_descs:
- dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
- descs_dma);
+ dma_free_coherent(dev->dev, descs_size, descs, descs_dma);
out:
dev->host_mem_descs = NULL;
return -ENOMEM;
@@ -2047,9 +2080,18 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
u64 chunk_size;
+ /*
+ * If there is an IOMMU that can merge pages, try a virtually
+ * non-contiguous allocation for a single segment first.
+ */
+ if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) {
+ if (!nvme_alloc_host_mem_single(dev, preferred))
+ return 0;
+ }
+
/* start big and work our way down */
for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
- if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
+ if (!nvme_alloc_host_mem_multi(dev, preferred, chunk_size)) {
if (!min || dev->host_mem_size >= min)
return 0;
nvme_free_host_mem(dev);
@@ -2097,8 +2139,10 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
}
dev_info(dev->ctrl.device,
- "allocated %lld MiB host memory buffer.\n",
- dev->host_mem_size >> ilog2(SZ_1M));
+ "allocated %lld MiB host memory buffer (%u segment%s).\n",
+ dev->host_mem_size >> ilog2(SZ_1M),
+ dev->nr_host_mem_descs,
+ str_plural(dev->nr_host_mem_descs));
}
ret = nvme_set_host_mem(dev, enable_bits);
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 87c437f..ad25ad1 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -228,27 +228,61 @@ static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
{
+ static const char * const rrega_strs[] = {
+ [0x00] = "register",
+ [0x01] = "unregister",
+ [0x02] = "replace",
+ };
const char *ret = trace_seq_buffer_ptr(p);
u8 rrega = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 ptpl = (cdw10[3] >> 6) & 0x3;
+ const char *rrega_str;
- trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
- rrega, iekey, ptpl);
+ if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
+ rrega_str = rrega_strs[rrega];
+ else
+ rrega_str = "reserved";
+
+ trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
+ rrega, rrega_str, iekey, ptpl);
trace_seq_putc(p, 0);
return ret;
}
+static const char * const rtype_strs[] = {
+ [0x00] = "reserved",
+ [0x01] = "write exclusive",
+ [0x02] = "exclusive access",
+ [0x03] = "write exclusive registrants only",
+ [0x04] = "exclusive access registrants only",
+ [0x05] = "write exclusive all registrants",
+ [0x06] = "exclusive access all registrants",
+};
+
static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
{
+ static const char * const racqa_strs[] = {
+ [0x00] = "acquire",
+ [0x01] = "preempt",
+ [0x02] = "preempt and abort",
+ };
const char *ret = trace_seq_buffer_ptr(p);
u8 racqa = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
+ const char *racqa_str = "reserved";
+ const char *rtype_str = "reserved";
- trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
- racqa, iekey, rtype);
+ if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
+ racqa_str = racqa_strs[racqa];
+
+ if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
+ rtype_str = rtype_strs[rtype];
+
+ trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
+ racqa, racqa_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);
return ret;
@@ -256,13 +290,25 @@ static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
{
+ static const char * const rrela_strs[] = {
+ [0x00] = "release",
+ [0x01] = "clear",
+ };
const char *ret = trace_seq_buffer_ptr(p);
u8 rrela = cdw10[0] & 0x7;
u8 iekey = (cdw10[0] >> 3) & 0x1;
u8 rtype = cdw10[1];
+ const char *rrela_str = "reserved";
+ const char *rtype_str = "reserved";
- trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
- rrela, iekey, rtype);
+ if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
+ rrela_str = rrela_strs[rrela];
+
+ if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
+ rtype_str = rtype_strs[rtype];
+
+ trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
+ rrela, rrela_str, iekey, rtype, rtype_str);
trace_seq_putc(p, 0);
return ret;
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index c402c44..f2b025b 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -10,7 +10,7 @@
obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
- discovery.o io-cmd-file.o io-cmd-bdev.o
+ discovery.o io-cmd-file.o io-cmd-bdev.o pr.o
nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 081f047..934b401 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -71,6 +71,35 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
nvmet_req_complete(req, 0);
}
+static void nvmet_execute_get_supported_log_pages(struct nvmet_req *req)
+{
+ struct nvme_supported_log *logs;
+ u16 status;
+
+ logs = kzalloc(sizeof(*logs), GFP_KERNEL);
+ if (!logs) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ logs->lids[NVME_LOG_SUPPORTED] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_ERROR] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_SMART] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_FW_SLOT] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_CHANGED_NS] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_CMD_EFFECTS] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_ENDURANCE_GROUP] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_ANA] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_FEATURES] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_RMI] = cpu_to_le32(NVME_LIDS_LSUPP);
+ logs->lids[NVME_LOG_RESERVATION] = cpu_to_le32(NVME_LIDS_LSUPP);
+
+ status = nvmet_copy_to_sgl(req, 0, logs, sizeof(*logs));
+ kfree(logs);
+out:
+ nvmet_req_complete(req, status);
+}
+
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
struct nvme_smart_log *slog)
{
@@ -130,6 +159,45 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
return NVME_SC_SUCCESS;
}
+static void nvmet_execute_get_log_page_rmi(struct nvmet_req *req)
+{
+ struct nvme_rotational_media_log *log;
+ struct gendisk *disk;
+ u16 status;
+
+ req->cmd->common.nsid = cpu_to_le32(le16_to_cpu(
+ req->cmd->get_log_page.lsi));
+ status = nvmet_req_find_ns(req);
+ if (status)
+ goto out;
+
+ if (!req->ns->bdev || bdev_nonrot(req->ns->bdev)) {
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ goto out;
+ }
+
+ if (req->transfer_len != sizeof(*log)) {
+ status = NVME_SC_SGL_INVALID_DATA | NVME_STATUS_DNR;
+ goto out;
+ }
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL);
+ if (!log)
+ goto out;
+
+ log->endgid = req->cmd->get_log_page.lsi;
+ disk = req->ns->bdev->bd_disk;
+ if (disk && disk->ia_ranges)
+ log->numa = cpu_to_le16(disk->ia_ranges->nr_ia_ranges);
+ else
+ log->numa = cpu_to_le16(1);
+
+ status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
+ kfree(log);
+out:
+ nvmet_req_complete(req, status);
+}
+
static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
{
struct nvme_smart_log *log;
@@ -176,6 +244,10 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
log->iocs[nvme_cmd_read] =
log->iocs[nvme_cmd_flush] =
log->iocs[nvme_cmd_dsm] =
+ log->iocs[nvme_cmd_resv_acquire] =
+ log->iocs[nvme_cmd_resv_register] =
+ log->iocs[nvme_cmd_resv_release] =
+ log->iocs[nvme_cmd_resv_report] =
cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
log->iocs[nvme_cmd_write] =
log->iocs[nvme_cmd_write_zeroes] =
@@ -272,6 +344,49 @@ static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
return struct_size(desc, nsids, count);
}
+static void nvmet_execute_get_log_page_endgrp(struct nvmet_req *req)
+{
+ u64 host_reads, host_writes, data_units_read, data_units_written;
+ struct nvme_endurance_group_log *log;
+ u16 status;
+
+ /*
+ * The target driver emulates each endurance group as its own
+ * namespace, reusing the nsid as the endurance group identifier.
+ */
+ req->cmd->common.nsid = cpu_to_le32(le16_to_cpu(
+ req->cmd->get_log_page.lsi));
+ status = nvmet_req_find_ns(req);
+ if (status)
+ goto out;
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL);
+ if (!log) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ if (!req->ns->bdev)
+ goto copy;
+
+ host_reads = part_stat_read(req->ns->bdev, ios[READ]);
+ data_units_read =
+ DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[READ]), 1000);
+ host_writes = part_stat_read(req->ns->bdev, ios[WRITE]);
+ data_units_written =
+ DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[WRITE]), 1000);
+
+ put_unaligned_le64(host_reads, &log->hrc[0]);
+ put_unaligned_le64(data_units_read, &log->dur[0]);
+ put_unaligned_le64(host_writes, &log->hwc[0]);
+ put_unaligned_le64(data_units_written, &log->duw[0]);
+copy:
+ status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
+ kfree(log);
+out:
+ nvmet_req_complete(req, status);
+}
+
static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
{
struct nvme_ana_rsp_hdr hdr = { 0, };
@@ -317,12 +432,44 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
nvmet_req_complete(req, status);
}
+static void nvmet_execute_get_log_page_features(struct nvmet_req *req)
+{
+ struct nvme_supported_features_log *features;
+ u16 status;
+
+ features = kzalloc(sizeof(*features), GFP_KERNEL);
+ if (!features) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ features->fis[NVME_FEAT_NUM_QUEUES] =
+ cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
+ features->fis[NVME_FEAT_KATO] =
+ cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
+ features->fis[NVME_FEAT_ASYNC_EVENT] =
+ cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
+ features->fis[NVME_FEAT_HOST_ID] =
+ cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_CSCPE);
+ features->fis[NVME_FEAT_WRITE_PROTECT] =
+ cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE);
+ features->fis[NVME_FEAT_RESV_MASK] =
+ cpu_to_le32(NVME_FIS_FSUPP | NVME_FIS_NSCPE);
+
+ status = nvmet_copy_to_sgl(req, 0, features, sizeof(*features));
+ kfree(features);
+out:
+ nvmet_req_complete(req, status);
+}
+
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
return;
switch (req->cmd->get_log_page.lid) {
+ case NVME_LOG_SUPPORTED:
+ return nvmet_execute_get_supported_log_pages(req);
case NVME_LOG_ERROR:
return nvmet_execute_get_log_page_error(req);
case NVME_LOG_SMART:
@@ -338,8 +485,16 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
return nvmet_execute_get_log_changed_ns(req);
case NVME_LOG_CMD_EFFECTS:
return nvmet_execute_get_log_cmd_effects_ns(req);
+ case NVME_LOG_ENDURANCE_GROUP:
+ return nvmet_execute_get_log_page_endgrp(req);
case NVME_LOG_ANA:
return nvmet_execute_get_log_page_ana(req);
+ case NVME_LOG_FEATURES:
+ return nvmet_execute_get_log_page_features(req);
+ case NVME_LOG_RMI:
+ return nvmet_execute_get_log_page_rmi(req);
+ case NVME_LOG_RESERVATION:
+ return nvmet_execute_get_log_page_resv(req);
}
pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
@@ -433,7 +588,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
- NVME_CTRL_ONCS_WRITE_ZEROES);
+ NVME_CTRL_ONCS_WRITE_ZEROES |
+ NVME_CTRL_ONCS_RESERVATIONS);
/* XXX: don't report vwc if the underlying device is write through */
id->vwc = NVME_CTRL_VWC_PRESENT;
@@ -467,6 +623,13 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->msdbd = ctrl->ops->msdbd;
+ /*
+ * Endurance group identifier is 16 bits, so we can't let namespaces
+ * overflow that since we reuse the nsid
+ */
+ BUILD_BUG_ON(NVMET_MAX_NAMESPACES > USHRT_MAX);
+ id->endgidmax = cpu_to_le16(NVMET_MAX_NAMESPACES);
+
id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
id->anatt = 10; /* random value */
id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
@@ -551,6 +714,21 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->nmic = NVME_NS_NMIC_SHARED;
id->anagrpid = cpu_to_le32(req->ns->anagrpid);
+ if (req->ns->pr.enable)
+ id->rescap = NVME_PR_SUPPORT_WRITE_EXCLUSIVE |
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS |
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY |
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY |
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS |
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS |
+ NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF;
+
+ /*
+ * Since we don't know any better, every namespace is its own endurance
+ * group.
+ */
+ id->endgid = cpu_to_le16(req->ns->nsid);
+
memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));
id->lbaf[0].ds = req->ns->blksize_shift;
@@ -576,7 +754,40 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
nvmet_req_complete(req, status);
}
-static void nvmet_execute_identify_nslist(struct nvmet_req *req)
+static void nvmet_execute_identify_endgrp_list(struct nvmet_req *req)
+{
+ u16 min_endgid = le16_to_cpu(req->cmd->identify.cnssid);
+ static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_ns *ns;
+ unsigned long idx;
+ __le16 *list;
+ u16 status;
+ int i = 1;
+
+ list = kzalloc(buf_size, GFP_KERNEL);
+ if (!list) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ if (ns->nsid <= min_endgid)
+ continue;
+
+ list[i++] = cpu_to_le16(ns->nsid);
+ if (i == buf_size / sizeof(__le16))
+ break;
+ }
+
+ list[0] = cpu_to_le16(i - 1);
+ status = nvmet_copy_to_sgl(req, 0, list, buf_size);
+ kfree(list);
+out:
+ nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_identify_nslist(struct nvmet_req *req, bool match_css)
{
static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -606,6 +817,8 @@ static void nvmet_execute_identify_nslist(struct nvmet_req *req)
xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
if (ns->nsid <= min_nsid)
continue;
+ if (match_css && req->ns->csi != req->cmd->identify.csi)
+ continue;
list[i++] = cpu_to_le32(ns->nsid);
if (i == buf_size / sizeof(__le32))
break;
@@ -685,6 +898,56 @@ static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req)
nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm)));
}
+static void nvme_execute_identify_ns_nvm(struct nvmet_req *req)
+{
+ u16 status;
+
+ status = nvmet_req_find_ns(req);
+ if (status)
+ goto out;
+
+ status = nvmet_copy_to_sgl(req, 0, ZERO_PAGE(0),
+ NVME_IDENTIFY_DATA_SIZE);
+out:
+ nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_id_cs_indep(struct nvmet_req *req)
+{
+ struct nvme_id_ns_cs_indep *id;
+ u16 status;
+
+ status = nvmet_req_find_ns(req);
+ if (status)
+ goto out;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ if (!id) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ id->nstat = NVME_NSTAT_NRDY;
+ id->anagrpid = cpu_to_le32(req->ns->anagrpid);
+ id->nmic = NVME_NS_NMIC_SHARED;
+ if (req->ns->readonly)
+ id->nsattr |= NVME_NS_ATTR_RO;
+ if (req->ns->bdev && !bdev_nonrot(req->ns->bdev))
+ id->nsfeat |= NVME_NS_ROTATIONAL;
+ /*
+ * We need flush command to flush the file's metadata,
+ * so report supporting vwc if backend is file, even
+ * though buffered_io is disable.
+ */
+ if (req->ns->bdev && !bdev_write_cache(req->ns->bdev))
+ id->nsfeat |= NVME_NS_VWC_NOT_PRESENT;
+
+ status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
+ kfree(id);
+out:
+ nvmet_req_complete(req, status);
+}
+
static void nvmet_execute_identify(struct nvmet_req *req)
{
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
@@ -698,7 +961,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
nvmet_execute_identify_ctrl(req);
return;
case NVME_ID_CNS_NS_ACTIVE_LIST:
- nvmet_execute_identify_nslist(req);
+ nvmet_execute_identify_nslist(req, false);
return;
case NVME_ID_CNS_NS_DESC_LIST:
nvmet_execute_identify_desclist(req);
@@ -706,8 +969,8 @@ static void nvmet_execute_identify(struct nvmet_req *req)
case NVME_ID_CNS_CS_NS:
switch (req->cmd->identify.csi) {
case NVME_CSI_NVM:
- /* Not supported */
- break;
+ nvme_execute_identify_ns_nvm(req);
+ return;
case NVME_CSI_ZNS:
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
nvmet_execute_identify_ns_zns(req);
@@ -729,6 +992,15 @@ static void nvmet_execute_identify(struct nvmet_req *req)
break;
}
break;
+ case NVME_ID_CNS_NS_ACTIVE_LIST_CS:
+ nvmet_execute_identify_nslist(req, true);
+ return;
+ case NVME_ID_CNS_NS_CS_INDEP:
+ nvmet_execute_id_cs_indep(req);
+ return;
+ case NVME_ID_CNS_ENDGRP_LIST:
+ nvmet_execute_identify_endgrp_list(req);
+ return;
}
pr_debug("unhandled identify cns %d on qid %d\n",
@@ -861,6 +1133,9 @@ void nvmet_execute_set_features(struct nvmet_req *req)
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_set_feat_write_protect(req);
break;
+ case NVME_FEAT_RESV_MASK:
+ status = nvmet_set_feat_resv_notif_mask(req, cdw11);
+ break;
default:
req->error_loc = offsetof(struct nvme_common_command, cdw10);
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
@@ -959,6 +1234,9 @@ void nvmet_execute_get_features(struct nvmet_req *req)
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_get_feat_write_protect(req);
break;
+ case NVME_FEAT_RESV_MASK:
+ status = nvmet_get_feat_resv_notif_mask(req);
+ break;
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 685e89b..eeee9e9 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -769,6 +769,32 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
+static ssize_t nvmet_ns_resv_enable_show(struct config_item *item, char *page)
+{
+ return sysfs_emit(page, "%d\n", to_nvmet_ns(item)->pr.enable);
+}
+
+static ssize_t nvmet_ns_resv_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ bool val;
+
+ if (kstrtobool(page, &val))
+ return -EINVAL;
+
+ mutex_lock(&ns->subsys->lock);
+ if (ns->enabled) {
+ pr_err("the ns:%d is already enabled.\n", ns->nsid);
+ mutex_unlock(&ns->subsys->lock);
+ return -EINVAL;
+ }
+ ns->pr.enable = val;
+ mutex_unlock(&ns->subsys->lock);
+ return count;
+}
+CONFIGFS_ATTR(nvmet_ns_, resv_enable);
+
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@@ -777,6 +803,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
&nvmet_ns_attr_revalidate_size,
+ &nvmet_ns_attr_resv_enable,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ed2424f..1f4e998 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -611,6 +611,12 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
if (ret)
goto out_restore_subsys_maxnsid;
+ if (ns->pr.enable) {
+ ret = nvmet_pr_init_ns(ns);
+ if (ret)
+ goto out_remove_from_subsys;
+ }
+
subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid);
@@ -620,6 +626,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
mutex_unlock(&subsys->lock);
return ret;
+out_remove_from_subsys:
+ xa_erase(&subsys->namespaces, ns->nsid);
out_restore_subsys_maxnsid:
subsys->max_nsid = nvmet_max_nsid(subsys);
percpu_ref_exit(&ns->ref);
@@ -663,6 +671,9 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
wait_for_completion(&ns->disable_done);
percpu_ref_exit(&ns->ref);
+ if (ns->pr.enable)
+ nvmet_pr_exit_ns(ns);
+
mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
@@ -754,6 +765,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
{
struct nvmet_ns *ns = req->ns;
+ struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref;
if (!req->sq->sqhd_disabled)
nvmet_update_sq_head(req);
@@ -766,6 +778,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
trace_nvmet_req_complete(req);
req->ops->queue_response(req);
+
+ if (pc_ref)
+ nvmet_pr_put_ns_pc_ref(pc_ref);
if (ns)
nvmet_put_namespace(ns);
}
@@ -929,18 +944,39 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return ret;
}
+ if (req->ns->pr.enable) {
+ ret = nvmet_parse_pr_cmd(req);
+ if (!ret)
+ return ret;
+ }
+
switch (req->ns->csi) {
case NVME_CSI_NVM:
if (req->ns->file)
- return nvmet_file_parse_io_cmd(req);
- return nvmet_bdev_parse_io_cmd(req);
+ ret = nvmet_file_parse_io_cmd(req);
+ else
+ ret = nvmet_bdev_parse_io_cmd(req);
+ break;
case NVME_CSI_ZNS:
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
- return nvmet_bdev_zns_parse_io_cmd(req);
- return NVME_SC_INVALID_IO_CMD_SET;
+ ret = nvmet_bdev_zns_parse_io_cmd(req);
+ else
+ ret = NVME_SC_INVALID_IO_CMD_SET;
+ break;
default:
- return NVME_SC_INVALID_IO_CMD_SET;
+ ret = NVME_SC_INVALID_IO_CMD_SET;
}
+ if (ret)
+ return ret;
+
+ if (req->ns->pr.enable) {
+ ret = nvmet_pr_check_cmd_access(req);
+ if (ret)
+ return ret;
+
+ ret = nvmet_pr_get_ns_pc_ref(req);
+ }
+ return ret;
}
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
@@ -964,6 +1000,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->ns = NULL;
req->error_loc = NVMET_NO_ERROR_LOC;
req->error_slba = 0;
+ req->pc_ref = NULL;
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
@@ -1015,6 +1052,8 @@ EXPORT_SYMBOL_GPL(nvmet_req_init);
void nvmet_req_uninit(struct nvmet_req *req)
{
percpu_ref_put(&req->sq->ref);
+ if (req->pc_ref)
+ nvmet_pr_put_ns_pc_ref(req->pc_ref);
if (req->ns)
nvmet_put_namespace(req->ns);
}
@@ -1383,7 +1422,8 @@ static void nvmet_fatal_error_handler(struct work_struct *work)
}
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
- struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
+ struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
+ uuid_t *hostid)
{
struct nvmet_subsys *subsys;
struct nvmet_ctrl *ctrl;
@@ -1462,6 +1502,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
}
ctrl->cntlid = ret;
+ uuid_copy(&ctrl->hostid, hostid);
+
/*
* Discovery controllers may use some arbitrary high value
* in order to cleanup stale discovery sessions
@@ -1478,6 +1520,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
nvmet_start_keep_alive_timer(ctrl);
mutex_lock(&subsys->lock);
+ ret = nvmet_ctrl_init_pr(ctrl);
+ if (ret)
+ goto init_pr_fail;
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
nvmet_setup_p2p_ns_map(ctrl, req);
nvmet_debugfs_ctrl_setup(ctrl);
@@ -1486,6 +1531,10 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
*ctrlp = ctrl;
return 0;
+init_pr_fail:
+ mutex_unlock(&subsys->lock);
+ nvmet_stop_keep_alive_timer(ctrl);
+ ida_free(&cntlid_ida, ctrl->cntlid);
out_free_sqs:
kfree(ctrl->sqs);
out_free_changed_ns_list:
@@ -1504,6 +1553,7 @@ static void nvmet_ctrl_free(struct kref *ref)
struct nvmet_subsys *subsys = ctrl->subsys;
mutex_lock(&subsys->lock);
+ nvmet_ctrl_destroy_pr(ctrl);
nvmet_release_p2p_ns_map(ctrl);
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);
@@ -1717,7 +1767,7 @@ static int __init nvmet_init(void)
goto out_free_zbd_work_queue;
nvmet_wq = alloc_workqueue("nvmet-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_SYSFS, 0);
if (!nvmet_wq)
goto out_free_buffered_work_queue;
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index c4b2edd..c49904e 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -64,6 +64,9 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
case NVME_REG_CSTS:
val = ctrl->csts;
break;
+ case NVME_REG_CRTO:
+ val = NVME_CAP_TIMEOUT(ctrl->csts);
+ break;
default:
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
break;
@@ -245,12 +248,10 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
- le32_to_cpu(c->kato), &ctrl);
+ le32_to_cpu(c->kato), &ctrl, &d->hostid);
if (status)
goto out;
- uuid_copy(&ctrl->hostid, &d->hostid);
-
dhchap_status = nvmet_setup_auth(ctrl);
if (dhchap_status) {
pr_err("Failed to setup authentication, dhchap status %u\n",
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 190f55e..58328b3 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -20,8 +20,9 @@
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/t10-pi.h>
+#include <linux/kfifo.h>
-#define NVMET_DEFAULT_VS NVME_VS(1, 3, 0)
+#define NVMET_DEFAULT_VS NVME_VS(2, 1, 0)
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
@@ -30,6 +31,7 @@
#define NVMET_MN_MAX_SIZE 40
#define NVMET_SN_MAX_SIZE 20
#define NVMET_FR_MAX_SIZE 8
+#define NVMET_PR_LOG_QUEUE_SIZE 64
/*
* Supported optional AENs:
@@ -56,6 +58,38 @@
#define IPO_IATTR_CONNECT_SQE(x) \
(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
+struct nvmet_pr_registrant {
+ u64 rkey;
+ uuid_t hostid;
+ enum nvme_pr_type rtype;
+ struct list_head entry;
+ struct rcu_head rcu;
+};
+
+struct nvmet_pr {
+ bool enable;
+ unsigned long notify_mask;
+ atomic_t generation;
+ struct nvmet_pr_registrant __rcu *holder;
+ /*
+ * During the execution of the reservation command, mutual
+ * exclusion is required throughout the process. However,
+ * while waiting asynchronously for the 'per controller
+ * percpu_ref' to complete before the 'preempt and abort'
+ * command finishes, a semaphore is needed to ensure mutual
+ * exclusion instead of a mutex.
+ */
+ struct semaphore pr_sem;
+ struct list_head registrant_list;
+};
+
+struct nvmet_pr_per_ctrl_ref {
+ struct percpu_ref ref;
+ struct completion free_done;
+ struct completion confirm_done;
+ uuid_t hostid;
+};
+
struct nvmet_ns {
struct percpu_ref ref;
struct file *bdev_file;
@@ -85,6 +119,8 @@ struct nvmet_ns {
int pi_type;
int metadata_size;
u8 csi;
+ struct nvmet_pr pr;
+ struct xarray pr_per_ctrl_refs;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -191,6 +227,13 @@ static inline bool nvmet_port_secure_channel_required(struct nvmet_port *port)
return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED;
}
+struct nvmet_pr_log_mgr {
+ struct mutex lock;
+ u64 lost_count;
+ u64 counter;
+ DECLARE_KFIFO(log_queue, struct nvme_pr_log, NVMET_PR_LOG_QUEUE_SIZE);
+};
+
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_sq **sqs;
@@ -246,6 +289,7 @@ struct nvmet_ctrl {
u8 *dh_key;
size_t dh_keysize;
#endif
+ struct nvmet_pr_log_mgr pr_log_mgr;
};
struct nvmet_subsys {
@@ -396,6 +440,9 @@ struct nvmet_req {
struct work_struct zmgmt_work;
} z;
#endif /* CONFIG_BLK_DEV_ZONED */
+ struct {
+ struct work_struct abort_work;
+ } r;
};
int sg_cnt;
int metadata_sg_cnt;
@@ -412,6 +459,7 @@ struct nvmet_req {
struct device *p2p_client;
u16 error_loc;
u64 error_slba;
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
};
#define NVMET_MAX_MPOOL_BVEC 16
@@ -498,7 +546,8 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
- struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
+ struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
+ uuid_t *hostid);
struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
const char *hostnqn, u16 cntlid,
struct nvmet_req *req);
@@ -761,4 +810,18 @@ static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; }
#endif
+int nvmet_pr_init_ns(struct nvmet_ns *ns);
+u16 nvmet_parse_pr_cmd(struct nvmet_req *req);
+u16 nvmet_pr_check_cmd_access(struct nvmet_req *req);
+int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl);
+void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl);
+void nvmet_pr_exit_ns(struct nvmet_ns *ns);
+void nvmet_execute_get_log_page_resv(struct nvmet_req *req);
+u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask);
+u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req);
+u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req);
+static inline void nvmet_pr_put_ns_pc_ref(struct nvmet_pr_per_ctrl_ref *pc_ref)
+{
+ percpu_ref_put(&pc_ref->ref);
+}
#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/pr.c b/drivers/nvme/target/pr.c
new file mode 100644
index 0000000..25a02b5
--- /dev/null
+++ b/drivers/nvme/target/pr.c
@@ -0,0 +1,1156 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe over Fabrics Persist Reservation.
+ * Copyright (c) 2024 Guixin Liu, Alibaba Group.
+ * All rights reserved.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/unaligned.h>
+#include "nvmet.h"
+
+#define NVMET_PR_NOTIFI_MASK_ALL \
+ (1 << NVME_PR_NOTIFY_BIT_REG_PREEMPTED | \
+ 1 << NVME_PR_NOTIFY_BIT_RESV_RELEASED | \
+ 1 << NVME_PR_NOTIFY_BIT_RESV_PREEMPTED)
+
+static inline bool nvmet_pr_parse_ignore_key(u32 cdw10)
+{
+ /* Ignore existing key, bit 03. */
+ return (cdw10 >> 3) & 1;
+}
+
+static inline struct nvmet_ns *nvmet_pr_to_ns(struct nvmet_pr *pr)
+{
+ return container_of(pr, struct nvmet_ns, pr);
+}
+
+static struct nvmet_pr_registrant *
+nvmet_pr_find_registrant(struct nvmet_pr *pr, uuid_t *hostid)
+{
+ struct nvmet_pr_registrant *reg;
+
+ list_for_each_entry_rcu(reg, &pr->registrant_list, entry) {
+ if (uuid_equal(®->hostid, hostid))
+ return reg;
+ }
+ return NULL;
+}
+
+u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask)
+{
+ u32 nsid = le32_to_cpu(req->cmd->common.nsid);
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_ns *ns;
+ unsigned long idx;
+ u16 status;
+
+ if (mask & ~(NVMET_PR_NOTIFI_MASK_ALL)) {
+ req->error_loc = offsetof(struct nvme_common_command, cdw11);
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ }
+
+ if (nsid != U32_MAX) {
+ status = nvmet_req_find_ns(req);
+ if (status)
+ return status;
+ if (!req->ns->pr.enable)
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+
+ WRITE_ONCE(req->ns->pr.notify_mask, mask);
+ goto success;
+ }
+
+ xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ if (ns->pr.enable)
+ WRITE_ONCE(ns->pr.notify_mask, mask);
+ }
+
+success:
+ nvmet_set_result(req, mask);
+ return NVME_SC_SUCCESS;
+}
+
+u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req)
+{
+ u16 status;
+
+ status = nvmet_req_find_ns(req);
+ if (status)
+ return status;
+
+ if (!req->ns->pr.enable)
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+
+ nvmet_set_result(req, READ_ONCE(req->ns->pr.notify_mask));
+ return status;
+}
+
+void nvmet_execute_get_log_page_resv(struct nvmet_req *req)
+{
+ struct nvmet_pr_log_mgr *log_mgr = &req->sq->ctrl->pr_log_mgr;
+ struct nvme_pr_log next_log = {0};
+ struct nvme_pr_log log = {0};
+ u16 status = NVME_SC_SUCCESS;
+ u64 lost_count;
+ u64 cur_count;
+ u64 next_count;
+
+ mutex_lock(&log_mgr->lock);
+ if (!kfifo_get(&log_mgr->log_queue, &log))
+ goto out;
+
+ /*
+ * We can't get the last in kfifo.
+ * Utilize the current count and the count from the next log to
+ * calculate the number of lost logs, while also addressing cases
+ * of overflow. If there is no subsequent log, the number of lost
+ * logs is equal to the lost_count within the nvmet_pr_log_mgr.
+ */
+ cur_count = le64_to_cpu(log.count);
+ if (kfifo_peek(&log_mgr->log_queue, &next_log)) {
+ next_count = le64_to_cpu(next_log.count);
+ if (next_count > cur_count)
+ lost_count = next_count - cur_count - 1;
+ else
+ lost_count = U64_MAX - cur_count + next_count - 1;
+ } else {
+ lost_count = log_mgr->lost_count;
+ }
+
+ log.count = cpu_to_le64((cur_count + lost_count) == 0 ?
+ 1 : (cur_count + lost_count));
+ log_mgr->lost_count -= lost_count;
+
+ log.nr_pages = kfifo_len(&log_mgr->log_queue);
+
+out:
+ status = nvmet_copy_to_sgl(req, 0, &log, sizeof(log));
+ mutex_unlock(&log_mgr->lock);
+ nvmet_req_complete(req, status);
+}
+
+static void nvmet_pr_add_resv_log(struct nvmet_ctrl *ctrl, u8 log_type,
+ u32 nsid)
+{
+ struct nvmet_pr_log_mgr *log_mgr = &ctrl->pr_log_mgr;
+ struct nvme_pr_log log = {0};
+
+ mutex_lock(&log_mgr->lock);
+ log_mgr->counter++;
+ if (log_mgr->counter == 0)
+ log_mgr->counter = 1;
+
+ log.count = cpu_to_le64(log_mgr->counter);
+ log.type = log_type;
+ log.nsid = cpu_to_le32(nsid);
+
+ if (!kfifo_put(&log_mgr->log_queue, log)) {
+ pr_info("a reservation log lost, cntlid:%d, log_type:%d, nsid:%d\n",
+ ctrl->cntlid, log_type, nsid);
+ log_mgr->lost_count++;
+ }
+
+ mutex_unlock(&log_mgr->lock);
+}
+
+static void nvmet_pr_resv_released(struct nvmet_pr *pr, uuid_t *hostid)
+{
+ struct nvmet_ns *ns = nvmet_pr_to_ns(pr);
+ struct nvmet_subsys *subsys = ns->subsys;
+ struct nvmet_ctrl *ctrl;
+
+ if (test_bit(NVME_PR_NOTIFY_BIT_RESV_RELEASED, &pr->notify_mask))
+ return;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (!uuid_equal(&ctrl->hostid, hostid) &&
+ nvmet_pr_find_registrant(pr, &ctrl->hostid)) {
+ nvmet_pr_add_resv_log(ctrl,
+ NVME_PR_LOG_RESERVATION_RELEASED, ns->nsid);
+ nvmet_add_async_event(ctrl, NVME_AER_CSS,
+ NVME_AEN_RESV_LOG_PAGE_AVALIABLE,
+ NVME_LOG_RESERVATION);
+ }
+ }
+ mutex_unlock(&subsys->lock);
+}
+
+static void nvmet_pr_send_event_to_host(struct nvmet_pr *pr, uuid_t *hostid,
+ u8 log_type)
+{
+ struct nvmet_ns *ns = nvmet_pr_to_ns(pr);
+ struct nvmet_subsys *subsys = ns->subsys;
+ struct nvmet_ctrl *ctrl;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (uuid_equal(hostid, &ctrl->hostid)) {
+ nvmet_pr_add_resv_log(ctrl, log_type, ns->nsid);
+ nvmet_add_async_event(ctrl, NVME_AER_CSS,
+ NVME_AEN_RESV_LOG_PAGE_AVALIABLE,
+ NVME_LOG_RESERVATION);
+ }
+ }
+ mutex_unlock(&subsys->lock);
+}
+
+static void nvmet_pr_resv_preempted(struct nvmet_pr *pr, uuid_t *hostid)
+{
+ if (test_bit(NVME_PR_NOTIFY_BIT_RESV_PREEMPTED, &pr->notify_mask))
+ return;
+
+ nvmet_pr_send_event_to_host(pr, hostid,
+ NVME_PR_LOG_RESERVATOIN_PREEMPTED);
+}
+
+static void nvmet_pr_registration_preempted(struct nvmet_pr *pr,
+ uuid_t *hostid)
+{
+ if (test_bit(NVME_PR_NOTIFY_BIT_REG_PREEMPTED, &pr->notify_mask))
+ return;
+
+ nvmet_pr_send_event_to_host(pr, hostid,
+ NVME_PR_LOG_REGISTRATION_PREEMPTED);
+}
+
+static inline void nvmet_pr_set_new_holder(struct nvmet_pr *pr, u8 new_rtype,
+ struct nvmet_pr_registrant *reg)
+{
+ reg->rtype = new_rtype;
+ rcu_assign_pointer(pr->holder, reg);
+}
+
+static u16 nvmet_pr_register(struct nvmet_req *req,
+ struct nvmet_pr_register_data *d)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_pr_registrant *new, *reg;
+ struct nvmet_pr *pr = &req->ns->pr;
+ u16 status = NVME_SC_SUCCESS;
+ u64 nrkey = le64_to_cpu(d->nrkey);
+
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NVME_SC_INTERNAL;
+
+ down(&pr->pr_sem);
+ reg = nvmet_pr_find_registrant(pr, &ctrl->hostid);
+ if (reg) {
+ if (reg->rkey != nrkey)
+ status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ kfree(new);
+ goto out;
+ }
+
+ memset(new, 0, sizeof(*new));
+ INIT_LIST_HEAD(&new->entry);
+ new->rkey = nrkey;
+ uuid_copy(&new->hostid, &ctrl->hostid);
+ list_add_tail_rcu(&new->entry, &pr->registrant_list);
+
+out:
+ up(&pr->pr_sem);
+ return status;
+}
+
+static void nvmet_pr_unregister_one(struct nvmet_pr *pr,
+ struct nvmet_pr_registrant *reg)
+{
+ struct nvmet_pr_registrant *first_reg;
+ struct nvmet_pr_registrant *holder;
+ u8 original_rtype;
+
+ list_del_rcu(®->entry);
+
+ holder = rcu_dereference_protected(pr->holder, 1);
+ if (reg != holder)
+ goto out;
+
+ original_rtype = holder->rtype;
+ if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS ||
+ original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) {
+ first_reg = list_first_or_null_rcu(&pr->registrant_list,
+ struct nvmet_pr_registrant, entry);
+ if (first_reg)
+ first_reg->rtype = original_rtype;
+ rcu_assign_pointer(pr->holder, first_reg);
+ } else {
+ rcu_assign_pointer(pr->holder, NULL);
+
+ if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_REG_ONLY ||
+ original_rtype == NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY)
+ nvmet_pr_resv_released(pr, ®->hostid);
+ }
+out:
+ kfree_rcu(reg, rcu);
+}
+
+static u16 nvmet_pr_unregister(struct nvmet_req *req,
+ struct nvmet_pr_register_data *d,
+ bool ignore_key)
+{
+ u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_registrant *reg;
+
+ down(&pr->pr_sem);
+ list_for_each_entry_rcu(reg, &pr->registrant_list, entry) {
+ if (uuid_equal(®->hostid, &ctrl->hostid)) {
+ if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) {
+ status = NVME_SC_SUCCESS;
+ nvmet_pr_unregister_one(pr, reg);
+ }
+ break;
+ }
+ }
+ up(&pr->pr_sem);
+
+ return status;
+}
+
+static void nvmet_pr_update_reg_rkey(struct nvmet_pr_registrant *reg,
+ void *attr)
+{
+ reg->rkey = *(u64 *)attr;
+}
+
+static u16 nvmet_pr_update_reg_attr(struct nvmet_pr *pr,
+ struct nvmet_pr_registrant *reg,
+ void (*change_attr)(struct nvmet_pr_registrant *reg,
+ void *attr),
+ void *attr)
+{
+ struct nvmet_pr_registrant *holder;
+ struct nvmet_pr_registrant *new;
+
+ holder = rcu_dereference_protected(pr->holder, 1);
+ if (reg != holder) {
+ change_attr(reg, attr);
+ return NVME_SC_SUCCESS;
+ }
+
+ new = kmalloc(sizeof(*new), GFP_ATOMIC);
+ if (!new)
+ return NVME_SC_INTERNAL;
+
+ new->rkey = holder->rkey;
+ new->rtype = holder->rtype;
+ uuid_copy(&new->hostid, &holder->hostid);
+ INIT_LIST_HEAD(&new->entry);
+
+ change_attr(new, attr);
+ list_replace_rcu(&holder->entry, &new->entry);
+ rcu_assign_pointer(pr->holder, new);
+ kfree_rcu(holder, rcu);
+
+ return NVME_SC_SUCCESS;
+}
+
+static u16 nvmet_pr_replace(struct nvmet_req *req,
+ struct nvmet_pr_register_data *d,
+ bool ignore_key)
+{
+ u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_registrant *reg;
+ u64 nrkey = le64_to_cpu(d->nrkey);
+
+ down(&pr->pr_sem);
+ list_for_each_entry_rcu(reg, &pr->registrant_list, entry) {
+ if (uuid_equal(®->hostid, &ctrl->hostid)) {
+ if (ignore_key || reg->rkey == le64_to_cpu(d->crkey))
+ status = nvmet_pr_update_reg_attr(pr, reg,
+ nvmet_pr_update_reg_rkey,
+ &nrkey);
+ break;
+ }
+ }
+ up(&pr->pr_sem);
+ return status;
+}
+
+static void nvmet_execute_pr_register(struct nvmet_req *req)
+{
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ bool ignore_key = nvmet_pr_parse_ignore_key(cdw10);
+ struct nvmet_pr_register_data *d;
+ u8 reg_act = cdw10 & 0x07; /* Reservation Register Action, bit 02:00 */
+ u16 status;
+
+ d = kmalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d));
+ if (status)
+ goto free_data;
+
+ switch (reg_act) {
+ case NVME_PR_REGISTER_ACT_REG:
+ status = nvmet_pr_register(req, d);
+ break;
+ case NVME_PR_REGISTER_ACT_UNREG:
+ status = nvmet_pr_unregister(req, d, ignore_key);
+ break;
+ case NVME_PR_REGISTER_ACT_REPLACE:
+ status = nvmet_pr_replace(req, d, ignore_key);
+ break;
+ default:
+ req->error_loc = offsetof(struct nvme_common_command, cdw10);
+ status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
+ break;
+ }
+free_data:
+ kfree(d);
+out:
+ if (!status)
+ atomic_inc(&req->ns->pr.generation);
+ nvmet_req_complete(req, status);
+}
+
+static u16 nvmet_pr_acquire(struct nvmet_req *req,
+ struct nvmet_pr_registrant *reg,
+ u8 rtype)
+{
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_registrant *holder;
+
+ holder = rcu_dereference_protected(pr->holder, 1);
+ if (holder && reg != holder)
+ return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ if (holder && reg == holder) {
+ if (holder->rtype == rtype)
+ return NVME_SC_SUCCESS;
+ return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ }
+
+ nvmet_pr_set_new_holder(pr, rtype, reg);
+ return NVME_SC_SUCCESS;
+}
+
+static void nvmet_pr_confirm_ns_pc_ref(struct percpu_ref *ref)
+{
+ struct nvmet_pr_per_ctrl_ref *pc_ref =
+ container_of(ref, struct nvmet_pr_per_ctrl_ref, ref);
+
+ complete(&pc_ref->confirm_done);
+}
+
+static void nvmet_pr_set_ctrl_to_abort(struct nvmet_req *req, uuid_t *hostid)
+{
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+ struct nvmet_ns *ns = req->ns;
+ unsigned long idx;
+
+ xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) {
+ if (uuid_equal(&pc_ref->hostid, hostid)) {
+ percpu_ref_kill_and_confirm(&pc_ref->ref,
+ nvmet_pr_confirm_ns_pc_ref);
+ wait_for_completion(&pc_ref->confirm_done);
+ }
+ }
+}
+
+static u16 nvmet_pr_unreg_all_host_by_prkey(struct nvmet_req *req, u64 prkey,
+ uuid_t *send_hostid,
+ bool abort)
+{
+ u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ struct nvmet_pr_registrant *reg, *tmp;
+ struct nvmet_pr *pr = &req->ns->pr;
+ uuid_t hostid;
+
+ list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) {
+ if (reg->rkey == prkey) {
+ status = NVME_SC_SUCCESS;
+ uuid_copy(&hostid, ®->hostid);
+ if (abort)
+ nvmet_pr_set_ctrl_to_abort(req, &hostid);
+ nvmet_pr_unregister_one(pr, reg);
+ if (!uuid_equal(&hostid, send_hostid))
+ nvmet_pr_registration_preempted(pr, &hostid);
+ }
+ }
+ return status;
+}
+
+static void nvmet_pr_unreg_all_others_by_prkey(struct nvmet_req *req,
+ u64 prkey,
+ uuid_t *send_hostid,
+ bool abort)
+{
+ struct nvmet_pr_registrant *reg, *tmp;
+ struct nvmet_pr *pr = &req->ns->pr;
+ uuid_t hostid;
+
+ list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) {
+ if (reg->rkey == prkey &&
+ !uuid_equal(®->hostid, send_hostid)) {
+ uuid_copy(&hostid, ®->hostid);
+ if (abort)
+ nvmet_pr_set_ctrl_to_abort(req, &hostid);
+ nvmet_pr_unregister_one(pr, reg);
+ nvmet_pr_registration_preempted(pr, &hostid);
+ }
+ }
+}
+
+static void nvmet_pr_unreg_all_others(struct nvmet_req *req,
+ uuid_t *send_hostid,
+ bool abort)
+{
+ struct nvmet_pr_registrant *reg, *tmp;
+ struct nvmet_pr *pr = &req->ns->pr;
+ uuid_t hostid;
+
+ list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) {
+ if (!uuid_equal(®->hostid, send_hostid)) {
+ uuid_copy(&hostid, ®->hostid);
+ if (abort)
+ nvmet_pr_set_ctrl_to_abort(req, &hostid);
+ nvmet_pr_unregister_one(pr, reg);
+ nvmet_pr_registration_preempted(pr, &hostid);
+ }
+ }
+}
+
+static void nvmet_pr_update_holder_rtype(struct nvmet_pr_registrant *reg,
+ void *attr)
+{
+ u8 new_rtype = *(u8 *)attr;
+
+ reg->rtype = new_rtype;
+}
+
+static u16 nvmet_pr_preempt(struct nvmet_req *req,
+ struct nvmet_pr_registrant *reg,
+ u8 rtype,
+ struct nvmet_pr_acquire_data *d,
+ bool abort)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_registrant *holder;
+ enum nvme_pr_type original_rtype;
+ u64 prkey = le64_to_cpu(d->prkey);
+ u16 status;
+
+ holder = rcu_dereference_protected(pr->holder, 1);
+ if (!holder)
+ return nvmet_pr_unreg_all_host_by_prkey(req, prkey,
+ &ctrl->hostid, abort);
+
+ original_rtype = holder->rtype;
+ if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS ||
+ original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) {
+ if (!prkey) {
+ /*
+ * To prevent possible access from other hosts, and
+ * avoid terminate the holder, set the new holder
+ * first before unregistering.
+ */
+ nvmet_pr_set_new_holder(pr, rtype, reg);
+ nvmet_pr_unreg_all_others(req, &ctrl->hostid, abort);
+ return NVME_SC_SUCCESS;
+ }
+ return nvmet_pr_unreg_all_host_by_prkey(req, prkey,
+ &ctrl->hostid, abort);
+ }
+
+ if (holder == reg) {
+ status = nvmet_pr_update_reg_attr(pr, holder,
+ nvmet_pr_update_holder_rtype, &rtype);
+ if (!status && original_rtype != rtype)
+ nvmet_pr_resv_released(pr, ®->hostid);
+ return status;
+ }
+
+ if (prkey == holder->rkey) {
+ /*
+ * Same as before, set the new holder first.
+ */
+ nvmet_pr_set_new_holder(pr, rtype, reg);
+ nvmet_pr_unreg_all_others_by_prkey(req, prkey, &ctrl->hostid,
+ abort);
+ if (original_rtype != rtype)
+ nvmet_pr_resv_released(pr, ®->hostid);
+ return NVME_SC_SUCCESS;
+ }
+
+ if (prkey)
+ return nvmet_pr_unreg_all_host_by_prkey(req, prkey,
+ &ctrl->hostid, abort);
+ return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+}
+
+static void nvmet_pr_do_abort(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, r.abort_work);
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+ struct nvmet_ns *ns = req->ns;
+ unsigned long idx;
+
+ /*
+ * The target does not support abort, just wait per-controller ref to 0.
+ */
+ xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) {
+ if (percpu_ref_is_dying(&pc_ref->ref)) {
+ wait_for_completion(&pc_ref->free_done);
+ reinit_completion(&pc_ref->confirm_done);
+ reinit_completion(&pc_ref->free_done);
+ percpu_ref_resurrect(&pc_ref->ref);
+ }
+ }
+
+ up(&ns->pr.pr_sem);
+ nvmet_req_complete(req, NVME_SC_SUCCESS);
+}
+
+static u16 __nvmet_execute_pr_acquire(struct nvmet_req *req,
+ struct nvmet_pr_registrant *reg,
+ u8 acquire_act,
+ u8 rtype,
+ struct nvmet_pr_acquire_data *d)
+{
+ u16 status;
+
+ switch (acquire_act) {
+ case NVME_PR_ACQUIRE_ACT_ACQUIRE:
+ status = nvmet_pr_acquire(req, reg, rtype);
+ goto out;
+ case NVME_PR_ACQUIRE_ACT_PREEMPT:
+ status = nvmet_pr_preempt(req, reg, rtype, d, false);
+ goto inc_gen;
+ case NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT:
+ status = nvmet_pr_preempt(req, reg, rtype, d, true);
+ goto inc_gen;
+ default:
+ req->error_loc = offsetof(struct nvme_common_command, cdw10);
+ status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
+ goto out;
+ }
+inc_gen:
+ if (!status)
+ atomic_inc(&req->ns->pr.generation);
+out:
+ return status;
+}
+
+static void nvmet_execute_pr_acquire(struct nvmet_req *req)
+{
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ bool ignore_key = nvmet_pr_parse_ignore_key(cdw10);
+ /* Reservation type, bit 15:08 */
+ u8 rtype = (u8)((cdw10 >> 8) & 0xff);
+ /* Reservation acquire action, bit 02:00 */
+ u8 acquire_act = cdw10 & 0x07;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_pr_acquire_data *d = NULL;
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_registrant *reg;
+ u16 status = NVME_SC_SUCCESS;
+
+ if (ignore_key ||
+ rtype < NVME_PR_WRITE_EXCLUSIVE ||
+ rtype > NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) {
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ goto out;
+ }
+
+ d = kmalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d));
+ if (status)
+ goto free_data;
+
+ status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ down(&pr->pr_sem);
+ list_for_each_entry_rcu(reg, &pr->registrant_list, entry) {
+ if (uuid_equal(®->hostid, &ctrl->hostid) &&
+ reg->rkey == le64_to_cpu(d->crkey)) {
+ status = __nvmet_execute_pr_acquire(req, reg,
+ acquire_act, rtype, d);
+ break;
+ }
+ }
+
+ if (!status && acquire_act == NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT) {
+ kfree(d);
+ INIT_WORK(&req->r.abort_work, nvmet_pr_do_abort);
+ queue_work(nvmet_wq, &req->r.abort_work);
+ return;
+ }
+
+ up(&pr->pr_sem);
+
+free_data:
+ kfree(d);
+out:
+ nvmet_req_complete(req, status);
+}
+
+static u16 nvmet_pr_release(struct nvmet_req *req,
+ struct nvmet_pr_registrant *reg,
+ u8 rtype)
+{
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_registrant *holder;
+ u8 original_rtype;
+
+ holder = rcu_dereference_protected(pr->holder, 1);
+ if (!holder || reg != holder)
+ return NVME_SC_SUCCESS;
+
+ original_rtype = holder->rtype;
+ if (original_rtype != rtype)
+ return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+
+ rcu_assign_pointer(pr->holder, NULL);
+
+ if (original_rtype != NVME_PR_WRITE_EXCLUSIVE &&
+ original_rtype != NVME_PR_EXCLUSIVE_ACCESS)
+ nvmet_pr_resv_released(pr, ®->hostid);
+
+ return NVME_SC_SUCCESS;
+}
+
+static void nvmet_pr_clear(struct nvmet_req *req)
+{
+ struct nvmet_pr_registrant *reg, *tmp;
+ struct nvmet_pr *pr = &req->ns->pr;
+
+ rcu_assign_pointer(pr->holder, NULL);
+
+ list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) {
+ list_del_rcu(®->entry);
+ if (!uuid_equal(&req->sq->ctrl->hostid, ®->hostid))
+ nvmet_pr_resv_preempted(pr, ®->hostid);
+ kfree_rcu(reg, rcu);
+ }
+
+ atomic_inc(&pr->generation);
+}
+
+static u16 __nvmet_execute_pr_release(struct nvmet_req *req,
+ struct nvmet_pr_registrant *reg,
+ u8 release_act, u8 rtype)
+{
+ switch (release_act) {
+ case NVME_PR_RELEASE_ACT_RELEASE:
+ return nvmet_pr_release(req, reg, rtype);
+ case NVME_PR_RELEASE_ACT_CLEAR:
+ nvmet_pr_clear(req);
+ return NVME_SC_SUCCESS;
+ default:
+ req->error_loc = offsetof(struct nvme_common_command, cdw10);
+ return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
+ }
+}
+
+static void nvmet_execute_pr_release(struct nvmet_req *req)
+{
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ bool ignore_key = nvmet_pr_parse_ignore_key(cdw10);
+ u8 rtype = (u8)((cdw10 >> 8) & 0xff); /* Reservation type, bit 15:08 */
+ u8 release_act = cdw10 & 0x07; /* Reservation release action, bit 02:00 */
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_release_data *d;
+ struct nvmet_pr_registrant *reg;
+ u16 status;
+
+ if (ignore_key) {
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ goto out;
+ }
+
+ d = kmalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+
+ status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d));
+ if (status)
+ goto free_data;
+
+ status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ down(&pr->pr_sem);
+ list_for_each_entry_rcu(reg, &pr->registrant_list, entry) {
+ if (uuid_equal(®->hostid, &ctrl->hostid) &&
+ reg->rkey == le64_to_cpu(d->crkey)) {
+ status = __nvmet_execute_pr_release(req, reg,
+ release_act, rtype);
+ break;
+ }
+ }
+ up(&pr->pr_sem);
+free_data:
+ kfree(d);
+out:
+ nvmet_req_complete(req, status);
+}
+
+static void nvmet_execute_pr_report(struct nvmet_req *req)
+{
+ u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11);
+ u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
+ u32 num_bytes = 4 * (cdw10 + 1); /* cdw10 is number of dwords */
+ u8 eds = cdw11 & 1; /* Extended data structure, bit 00 */
+ struct nvme_registered_ctrl_ext *ctrl_eds;
+ struct nvme_reservation_status_ext *data;
+ struct nvmet_pr *pr = &req->ns->pr;
+ struct nvmet_pr_registrant *holder;
+ struct nvmet_pr_registrant *reg;
+ u16 num_ctrls = 0;
+ u16 status;
+ u8 rtype;
+
+ /* nvmet hostid(uuid_t) is 128 bit. */
+ if (!eds) {
+ req->error_loc = offsetof(struct nvme_common_command, cdw11);
+ status = NVME_SC_HOST_ID_INCONSIST | NVME_STATUS_DNR;
+ goto out;
+ }
+
+ if (num_bytes < sizeof(struct nvme_reservation_status_ext)) {
+ req->error_loc = offsetof(struct nvme_common_command, cdw10);
+ status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
+ goto out;
+ }
+
+ data = kmalloc(num_bytes, GFP_KERNEL);
+ if (!data) {
+ status = NVME_SC_INTERNAL;
+ goto out;
+ }
+ memset(data, 0, num_bytes);
+ data->gen = cpu_to_le32(atomic_read(&pr->generation));
+ data->ptpls = 0;
+ ctrl_eds = data->regctl_eds;
+
+ rcu_read_lock();
+ holder = rcu_dereference(pr->holder);
+ rtype = holder ? holder->rtype : 0;
+ data->rtype = rtype;
+
+ list_for_each_entry_rcu(reg, &pr->registrant_list, entry) {
+ num_ctrls++;
+ /*
+ * continue to get the number of all registrans.
+ */
+ if (((void *)ctrl_eds + sizeof(*ctrl_eds)) >
+ ((void *)data + num_bytes))
+ continue;
+ /*
+ * Dynamic controller, set cntlid to 0xffff.
+ */
+ ctrl_eds->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
+ if (rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS ||
+ rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS)
+ ctrl_eds->rcsts = 1;
+ if (reg == holder)
+ ctrl_eds->rcsts = 1;
+ uuid_copy((uuid_t *)&ctrl_eds->hostid, ®->hostid);
+ ctrl_eds->rkey = cpu_to_le64(reg->rkey);
+ ctrl_eds++;
+ }
+ rcu_read_unlock();
+
+ put_unaligned_le16(num_ctrls, data->regctl);
+ status = nvmet_copy_to_sgl(req, 0, data, num_bytes);
+ kfree(data);
+out:
+ nvmet_req_complete(req, status);
+}
+
+u16 nvmet_parse_pr_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+
+ switch (cmd->common.opcode) {
+ case nvme_cmd_resv_register:
+ req->execute = nvmet_execute_pr_register;
+ break;
+ case nvme_cmd_resv_acquire:
+ req->execute = nvmet_execute_pr_acquire;
+ break;
+ case nvme_cmd_resv_release:
+ req->execute = nvmet_execute_pr_release;
+ break;
+ case nvme_cmd_resv_report:
+ req->execute = nvmet_execute_pr_report;
+ break;
+ default:
+ return 1;
+ }
+ return NVME_SC_SUCCESS;
+}
+
+static bool nvmet_is_req_write_cmd_group(struct nvmet_req *req)
+{
+ u8 opcode = req->cmd->common.opcode;
+
+ if (req->sq->qid) {
+ switch (opcode) {
+ case nvme_cmd_flush:
+ case nvme_cmd_write:
+ case nvme_cmd_write_zeroes:
+ case nvme_cmd_dsm:
+ case nvme_cmd_zone_append:
+ case nvme_cmd_zone_mgmt_send:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+static bool nvmet_is_req_read_cmd_group(struct nvmet_req *req)
+{
+ u8 opcode = req->cmd->common.opcode;
+
+ if (req->sq->qid) {
+ switch (opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_zone_mgmt_recv:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+u16 nvmet_pr_check_cmd_access(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_pr_registrant *holder;
+ struct nvmet_ns *ns = req->ns;
+ struct nvmet_pr *pr = &ns->pr;
+ u16 status = NVME_SC_SUCCESS;
+
+ rcu_read_lock();
+ holder = rcu_dereference(pr->holder);
+ if (!holder)
+ goto unlock;
+ if (uuid_equal(&ctrl->hostid, &holder->hostid))
+ goto unlock;
+
+ /*
+ * The Reservation command group is checked in executing,
+ * allow it here.
+ */
+ switch (holder->rtype) {
+ case NVME_PR_WRITE_EXCLUSIVE:
+ if (nvmet_is_req_write_cmd_group(req))
+ status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ break;
+ case NVME_PR_EXCLUSIVE_ACCESS:
+ if (nvmet_is_req_read_cmd_group(req) ||
+ nvmet_is_req_write_cmd_group(req))
+ status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ break;
+ case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY:
+ case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS:
+ if ((nvmet_is_req_write_cmd_group(req)) &&
+ !nvmet_pr_find_registrant(pr, &ctrl->hostid))
+ status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ break;
+ case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ if ((nvmet_is_req_read_cmd_group(req) ||
+ nvmet_is_req_write_cmd_group(req)) &&
+ !nvmet_pr_find_registrant(pr, &ctrl->hostid))
+ status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR;
+ break;
+ default:
+ pr_warn("the reservation type is set wrong, type:%d\n",
+ holder->rtype);
+ break;
+ }
+
+unlock:
+ rcu_read_unlock();
+ if (status)
+ req->error_loc = offsetof(struct nvme_common_command, opcode);
+ return status;
+}
+
+u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req)
+{
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+
+ pc_ref = xa_load(&req->ns->pr_per_ctrl_refs,
+ req->sq->ctrl->cntlid);
+ if (unlikely(!percpu_ref_tryget_live(&pc_ref->ref)))
+ return NVME_SC_INTERNAL;
+ req->pc_ref = pc_ref;
+ return NVME_SC_SUCCESS;
+}
+
+static void nvmet_pr_ctrl_ns_all_cmds_done(struct percpu_ref *ref)
+{
+ struct nvmet_pr_per_ctrl_ref *pc_ref =
+ container_of(ref, struct nvmet_pr_per_ctrl_ref, ref);
+
+ complete(&pc_ref->free_done);
+}
+
+static int nvmet_pr_alloc_and_insert_pc_ref(struct nvmet_ns *ns,
+ unsigned long idx,
+ uuid_t *hostid)
+{
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+ int ret;
+
+ pc_ref = kmalloc(sizeof(*pc_ref), GFP_ATOMIC);
+ if (!pc_ref)
+ return -ENOMEM;
+
+ ret = percpu_ref_init(&pc_ref->ref, nvmet_pr_ctrl_ns_all_cmds_done,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
+ if (ret)
+ goto free;
+
+ init_completion(&pc_ref->free_done);
+ init_completion(&pc_ref->confirm_done);
+ uuid_copy(&pc_ref->hostid, hostid);
+
+ ret = xa_insert(&ns->pr_per_ctrl_refs, idx, pc_ref, GFP_KERNEL);
+ if (ret)
+ goto exit;
+ return ret;
+exit:
+ percpu_ref_exit(&pc_ref->ref);
+free:
+ kfree(pc_ref);
+ return ret;
+}
+
+int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl)
+{
+ struct nvmet_subsys *subsys = ctrl->subsys;
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+ struct nvmet_ns *ns = NULL;
+ unsigned long idx;
+ int ret;
+
+ ctrl->pr_log_mgr.counter = 0;
+ ctrl->pr_log_mgr.lost_count = 0;
+ mutex_init(&ctrl->pr_log_mgr.lock);
+ INIT_KFIFO(ctrl->pr_log_mgr.log_queue);
+
+ /*
+ * Here we are under subsys lock, if an ns not in subsys->namespaces,
+ * we can make sure that ns is not enabled, and not call
+ * nvmet_pr_init_ns(), see more details in nvmet_ns_enable().
+ * So just check ns->pr.enable.
+ */
+ xa_for_each(&subsys->namespaces, idx, ns) {
+ if (ns->pr.enable) {
+ ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid,
+ &ctrl->hostid);
+ if (ret)
+ goto free_per_ctrl_refs;
+ }
+ }
+ return 0;
+
+free_per_ctrl_refs:
+ xa_for_each(&subsys->namespaces, idx, ns) {
+ if (ns->pr.enable) {
+ pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid);
+ if (pc_ref)
+ percpu_ref_exit(&pc_ref->ref);
+ kfree(pc_ref);
+ }
+ }
+ return ret;
+}
+
+void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl)
+{
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+ struct nvmet_ns *ns;
+ unsigned long idx;
+
+ kfifo_free(&ctrl->pr_log_mgr.log_queue);
+ mutex_destroy(&ctrl->pr_log_mgr.lock);
+
+ xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ if (ns->pr.enable) {
+ pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid);
+ if (pc_ref)
+ percpu_ref_exit(&pc_ref->ref);
+ kfree(pc_ref);
+ }
+ }
+}
+
+int nvmet_pr_init_ns(struct nvmet_ns *ns)
+{
+ struct nvmet_subsys *subsys = ns->subsys;
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+ struct nvmet_ctrl *ctrl = NULL;
+ unsigned long idx;
+ int ret;
+
+ ns->pr.holder = NULL;
+ atomic_set(&ns->pr.generation, 0);
+ sema_init(&ns->pr.pr_sem, 1);
+ INIT_LIST_HEAD(&ns->pr.registrant_list);
+ ns->pr.notify_mask = 0;
+
+ xa_init(&ns->pr_per_ctrl_refs);
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid,
+ &ctrl->hostid);
+ if (ret)
+ goto free_per_ctrl_refs;
+ }
+ return 0;
+
+free_per_ctrl_refs:
+ xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) {
+ xa_erase(&ns->pr_per_ctrl_refs, idx);
+ percpu_ref_exit(&pc_ref->ref);
+ kfree(pc_ref);
+ }
+ return ret;
+}
+
+void nvmet_pr_exit_ns(struct nvmet_ns *ns)
+{
+ struct nvmet_pr_registrant *reg, *tmp;
+ struct nvmet_pr_per_ctrl_ref *pc_ref;
+ struct nvmet_pr *pr = &ns->pr;
+ unsigned long idx;
+
+ list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) {
+ list_del(®->entry);
+ kfree(reg);
+ }
+
+ xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) {
+ /*
+ * No command on ns here, we can safely free pc_ref.
+ */
+ pc_ref = xa_erase(&ns->pr_per_ctrl_refs, idx);
+ percpu_ref_exit(&pc_ref->ref);
+ kfree(pc_ref);
+ }
+
+ xa_destroy(&ns->pr_per_ctrl_refs);
+}
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
index 9a35481..6dbc703 100644
--- a/drivers/nvme/target/trace.c
+++ b/drivers/nvme/target/trace.c
@@ -180,6 +180,106 @@ static const char *nvmet_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
return ret;
}
+static const char *nvmet_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
+{
+ static const char * const rrega_strs[] = {
+ [0x00] = "register",
+ [0x01] = "unregister",
+ [0x02] = "replace",
+ };
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 rrega = cdw10[0] & 0x7;
+ u8 iekey = (cdw10[0] >> 3) & 0x1;
+ u8 ptpl = (cdw10[3] >> 6) & 0x3;
+ const char *rrega_str;
+
+ if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
+ rrega_str = rrega_strs[rrega];
+ else
+ rrega_str = "reserved";
+
+ trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
+ rrega, rrega_str, iekey, ptpl);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char * const rtype_strs[] = {
+ [0x00] = "reserved",
+ [0x01] = "write exclusive",
+ [0x02] = "exclusive access",
+ [0x03] = "write exclusive registrants only",
+ [0x04] = "exclusive access registrants only",
+ [0x05] = "write exclusive all registrants",
+ [0x06] = "exclusive access all registrants",
+};
+
+static const char *nvmet_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
+{
+ static const char * const racqa_strs[] = {
+ [0x00] = "acquire",
+ [0x01] = "preempt",
+ [0x02] = "preempt and abort",
+ };
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 racqa = cdw10[0] & 0x7;
+ u8 iekey = (cdw10[0] >> 3) & 0x1;
+ u8 rtype = cdw10[1];
+ const char *racqa_str = "reserved";
+ const char *rtype_str = "reserved";
+
+ if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
+ racqa_str = racqa_strs[racqa];
+
+ if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
+ rtype_str = rtype_strs[rtype];
+
+ trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
+ racqa, racqa_str, iekey, rtype, rtype_str);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
+{
+ static const char * const rrela_strs[] = {
+ [0x00] = "release",
+ [0x01] = "clear",
+ };
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 rrela = cdw10[0] & 0x7;
+ u8 iekey = (cdw10[0] >> 3) & 0x1;
+ u8 rtype = cdw10[1];
+ const char *rrela_str = "reserved";
+ const char *rtype_str = "reserved";
+
+ if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
+ rrela_str = rrela_strs[rrela];
+
+ if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
+ rtype_str = rtype_strs[rtype];
+
+ trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
+ rrela, rrela_str, iekey, rtype, rtype_str);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvmet_trace_resv_report(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u32 numd = get_unaligned_le32(cdw10);
+ u8 eds = cdw10[4] & 0x1;
+
+ trace_seq_printf(p, "numd=%u, eds=%u", numd, eds);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
u8 opcode, u8 *cdw10)
{
@@ -195,6 +295,14 @@ const char *nvmet_trace_parse_nvm_cmd(struct trace_seq *p,
return nvmet_trace_zone_mgmt_send(p, cdw10);
case nvme_cmd_zone_mgmt_recv:
return nvmet_trace_zone_mgmt_recv(p, cdw10);
+ case nvme_cmd_resv_register:
+ return nvmet_trace_resv_reg(p, cdw10);
+ case nvme_cmd_resv_acquire:
+ return nvmet_trace_resv_acq(p, cdw10);
+ case nvme_cmd_resv_release:
+ return nvmet_trace_resv_rel(p, cdw10);
+ case nvme_cmd_resv_report:
+ return nvmet_trace_resv_report(p, cdw10);
default:
return nvmet_trace_common(p, cdw10);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ca4bc0a..8947dab 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1190,8 +1190,8 @@ static u8 sd_group_number(struct scsi_cmnd *cmd)
if (!sdkp->rscs)
return 0;
- return min3((u32)rq->write_hint, (u32)sdkp->permanent_stream_count,
- 0x3fu);
+ return min3((u32)rq->bio->bi_write_hint,
+ (u32)sdkp->permanent_stream_count, 0x3fu);
}
static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write,
@@ -1389,7 +1389,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld);
} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
- sdp->use_10_for_rw || protect || rq->write_hint) {
+ sdp->use_10_for_rw || protect || rq->bio->bi_write_hint) {
ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks,
protect | fua);
} else {
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 46b9386..64cdae3 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -711,6 +711,12 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
blk_stack_limits(lim, bdev_limits(device->bdev), 0);
}
+ ret = blk_validate_limits(lim);
+ if (ret) {
+ btrfs_err(fs_info, "zoned: failed to validate queue limits");
+ return ret;
+ }
+
/*
* stripe_size is always aligned to BTRFS_STRIPE_LEN in
* btrfs_create_chunk(). Since we want stripe_len == zone_size,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2035fad..c596e0e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -156,9 +156,6 @@ struct request {
struct blk_crypto_keyslot *crypt_keyslot;
#endif
- enum rw_hint write_hint;
- unsigned short ioprio;
-
enum mq_rq_state state;
atomic_t ref;
@@ -222,7 +219,9 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
static inline unsigned short req_get_ioprio(struct request *req)
{
- return req->ioprio;
+ if (req->bio)
+ return req->bio->bi_ioprio;
+ return 0;
}
#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
@@ -230,62 +229,61 @@ static inline unsigned short req_get_ioprio(struct request *req)
#define rq_dma_dir(rq) \
(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
-#define rq_list_add(listptr, rq) do { \
- (rq)->rq_next = *(listptr); \
- *(listptr) = rq; \
-} while (0)
-
-#define rq_list_add_tail(lastpptr, rq) do { \
- (rq)->rq_next = NULL; \
- **(lastpptr) = rq; \
- *(lastpptr) = &rq->rq_next; \
-} while (0)
-
-#define rq_list_pop(listptr) \
-({ \
- struct request *__req = NULL; \
- if ((listptr) && *(listptr)) { \
- __req = *(listptr); \
- *(listptr) = __req->rq_next; \
- } \
- __req; \
-})
-
-#define rq_list_peek(listptr) \
-({ \
- struct request *__req = NULL; \
- if ((listptr) && *(listptr)) \
- __req = *(listptr); \
- __req; \
-})
-
-#define rq_list_for_each(listptr, pos) \
- for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
-
-#define rq_list_for_each_safe(listptr, pos, nxt) \
- for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \
- pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL)
-
-#define rq_list_next(rq) (rq)->rq_next
-#define rq_list_empty(list) ((list) == (struct request *) NULL)
-
-/**
- * rq_list_move() - move a struct request from one list to another
- * @src: The source list @rq is currently in
- * @dst: The destination list that @rq will be appended to
- * @rq: The request to move
- * @prev: The request preceding @rq in @src (NULL if @rq is the head)
- */
-static inline void rq_list_move(struct request **src, struct request **dst,
- struct request *rq, struct request *prev)
+static inline int rq_list_empty(const struct rq_list *rl)
{
- if (prev)
- prev->rq_next = rq->rq_next;
- else
- *src = rq->rq_next;
- rq_list_add(dst, rq);
+ return rl->head == NULL;
}
+static inline void rq_list_init(struct rq_list *rl)
+{
+ rl->head = NULL;
+ rl->tail = NULL;
+}
+
+static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq)
+{
+ rq->rq_next = NULL;
+ if (rl->tail)
+ rl->tail->rq_next = rq;
+ else
+ rl->head = rq;
+ rl->tail = rq;
+}
+
+static inline void rq_list_add_head(struct rq_list *rl, struct request *rq)
+{
+ rq->rq_next = rl->head;
+ rl->head = rq;
+ if (!rl->tail)
+ rl->tail = rq;
+}
+
+static inline struct request *rq_list_pop(struct rq_list *rl)
+{
+ struct request *rq = rl->head;
+
+ if (rq) {
+ rl->head = rl->head->rq_next;
+ if (!rl->head)
+ rl->tail = NULL;
+ rq->rq_next = NULL;
+ }
+
+ return rq;
+}
+
+static inline struct request *rq_list_peek(struct rq_list *rl)
+{
+ return rl->head;
+}
+
+#define rq_list_for_each(rl, pos) \
+ for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next)
+
+#define rq_list_for_each_safe(rl, pos, nxt) \
+ for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \
+ pos; pos = nxt, nxt = pos ? pos->rq_next : NULL)
+
/**
* enum blk_eh_timer_return - How the timeout handler should proceed
* @BLK_EH_DONE: The block driver completed the command or will complete it at
@@ -577,7 +575,7 @@ struct blk_mq_ops {
* empty the @rqlist completely, then the rest will be queued
* individually by the block layer upon return.
*/
- void (*queue_rqs)(struct request **rqlist);
+ void (*queue_rqs)(struct rq_list *rqlist);
/**
* @get_budget: Reserve budget before queue request, once .queue_rq is
@@ -886,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
else if (iob->complete != complete)
return false;
iob->need_ts |= blk_mq_need_time_stamp(req);
- rq_list_add(&iob->req_list, req);
+ rq_list_add_tail(&iob->req_list, req);
return true;
}
@@ -985,7 +983,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
rq->nr_phys_segments = nr_segs;
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
- rq->ioprio = bio_prio(bio);
}
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 65f37ae..a1fd0dd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -948,6 +948,7 @@ queue_limits_start_update(struct request_queue *q)
int queue_limits_commit_update(struct request_queue *q,
struct queue_limits *lim);
int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
+int blk_validate_limits(struct queue_limits *lim);
/**
* queue_limits_cancel_update - cancel an atomic update of queue limits
@@ -1005,6 +1006,11 @@ extern void blk_put_queue(struct request_queue *);
void blk_mark_disk_dead(struct gendisk *disk);
+struct rq_list {
+ struct request *head;
+ struct request *tail;
+};
+
#ifdef CONFIG_BLOCK
/*
* blk_plug permits building a queue of related requests by holding the I/O
@@ -1018,10 +1024,10 @@ void blk_mark_disk_dead(struct gendisk *disk);
* blk_flush_plug() is called.
*/
struct blk_plug {
- struct request *mq_list; /* blk-mq requests */
+ struct rq_list mq_list; /* blk-mq requests */
/* if ios_left is > 1, we can batch tag/rq allocations */
- struct request *cached_rq;
+ struct rq_list cached_rqs;
u64 cur_ktime;
unsigned short nr_ios;
@@ -1683,7 +1689,7 @@ int bdev_thaw(struct block_device *bdev);
void bdev_fput(struct file *bdev_file);
struct io_comp_batch {
- struct request *req_list;
+ struct rq_list req_list;
bool need_ts;
void (*complete)(struct io_comp_batch *);
};
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b58d940..0a6e220 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -327,7 +327,8 @@ struct nvme_id_ctrl {
__le32 sanicap;
__le32 hmminds;
__le16 hmmaxd;
- __u8 rsvd338[4];
+ __le16 nvmsetidmax;
+ __le16 endgidmax;
__u8 anatt;
__u8 anacap;
__le32 anagrpmax;
@@ -522,6 +523,7 @@ enum {
NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
+ NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07,
NVME_ID_CNS_NS_CS_INDEP = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
@@ -530,6 +532,7 @@ enum {
NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15,
NVME_ID_CNS_NS_GRANULARITY = 0x16,
NVME_ID_CNS_UUID_LIST = 0x17,
+ NVME_ID_CNS_ENDGRP_LIST = 0x19,
};
enum {
@@ -560,6 +563,8 @@ enum {
NVME_NS_FLBAS_LBA_SHIFT = 1,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_NS_NMIC_SHARED = 1 << 0,
+ NVME_NS_ROTATIONAL = 1 << 4,
+ NVME_NS_VWC_NOT_PRESENT = 1 << 5,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
@@ -617,6 +622,40 @@ enum {
NVME_NIDT_CSI = 0x04,
};
+struct nvme_endurance_group_log {
+ __u8 egcw;
+ __u8 egfeat;
+ __u8 rsvd2;
+ __u8 avsp;
+ __u8 avspt;
+ __u8 pused;
+ __le16 did;
+ __u8 rsvd8[24];
+ __u8 ee[16];
+ __u8 dur[16];
+ __u8 duw[16];
+ __u8 muw[16];
+ __u8 hrc[16];
+ __u8 hwc[16];
+ __u8 mdie[16];
+ __u8 neile[16];
+ __u8 tegcap[16];
+ __u8 uegcap[16];
+ __u8 rsvd192[320];
+};
+
+struct nvme_rotational_media_log {
+ __le16 endgid;
+ __le16 numa;
+ __le16 nrs;
+ __u8 rsvd6[2];
+ __le32 spinc;
+ __le32 fspinc;
+ __le32 ldc;
+ __le32 fldc;
+ __u8 rsvd24[488];
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@@ -1244,6 +1283,7 @@ enum {
NVME_FEAT_WRITE_PROTECT = 0x84,
NVME_FEAT_VENDOR_START = 0xC0,
NVME_FEAT_VENDOR_END = 0xFF,
+ NVME_LOG_SUPPORTED = 0x00,
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
@@ -1254,6 +1294,8 @@ enum {
NVME_LOG_TELEMETRY_CTRL = 0x08,
NVME_LOG_ENDURANCE_GROUP = 0x09,
NVME_LOG_ANA = 0x0c,
+ NVME_LOG_FEATURES = 0x12,
+ NVME_LOG_RMI = 0x16,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -1261,6 +1303,24 @@ enum {
NVME_FWACT_ACTV = (2 << 3),
};
+struct nvme_supported_log {
+ __le32 lids[256];
+};
+
+enum {
+ NVME_LIDS_LSUPP = 1 << 0,
+};
+
+struct nvme_supported_features_log {
+ __le32 fis[256];
+};
+
+enum {
+ NVME_FIS_FSUPP = 1 << 0,
+ NVME_FIS_NSCPE = 1 << 20,
+ NVME_FIS_CSCPE = 1 << 21,
+};
+
/* NVMe Namespace Write Protect State */
enum {
NVME_NS_NO_WRITE_PROTECT = 0,
@@ -1281,7 +1341,8 @@ struct nvme_identify {
__u8 cns;
__u8 rsvd3;
__le16 ctrlid;
- __u8 rsvd11[3];
+ __le16 cnssid;
+ __u8 rsvd11;
__u8 csi;
__u32 rsvd12[4];
};
@@ -1389,7 +1450,7 @@ struct nvme_get_log_page_command {
__u8 lsp; /* upper 4 bits reserved */
__le16 numdl;
__le16 numdu;
- __u16 rsvd11;
+ __le16 lsi;
union {
struct {
__le32 lpol;
@@ -2037,4 +2098,72 @@ struct nvme_completion {
#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
#define NVME_TERTIARY(ver) ((ver) & 0xff)
+enum {
+ NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00,
+};
+
+enum {
+ NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00,
+ NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01,
+ NVME_PR_LOG_RESERVATION_RELEASED = 0x02,
+ NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03,
+};
+
+enum {
+ NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1,
+ NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2,
+ NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3,
+};
+
+struct nvme_pr_log {
+ __le64 count;
+ __u8 type;
+ __u8 nr_pages;
+ __u8 rsvd1[2];
+ __le32 nsid;
+ __u8 rsvd2[48];
+};
+
+struct nvmet_pr_register_data {
+ __le64 crkey;
+ __le64 nrkey;
+};
+
+struct nvmet_pr_acquire_data {
+ __le64 crkey;
+ __le64 prkey;
+};
+
+struct nvmet_pr_release_data {
+ __le64 crkey;
+};
+
+enum nvme_pr_capabilities {
+ NVME_PR_SUPPORT_PTPL = 1,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6,
+ NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7,
+};
+
+enum nvme_pr_register_action {
+ NVME_PR_REGISTER_ACT_REG = 0,
+ NVME_PR_REGISTER_ACT_UNREG = 1,
+ NVME_PR_REGISTER_ACT_REPLACE = 1 << 1,
+};
+
+enum nvme_pr_acquire_action {
+ NVME_PR_ACQUIRE_ACT_ACQUIRE = 0,
+ NVME_PR_ACQUIRE_ACT_PREEMPT = 1,
+ NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1,
+};
+
+enum nvme_pr_release_action {
+ NVME_PR_RELEASE_ACT_RELEASE = 0,
+ NVME_PR_RELEASE_ACT_CLEAR = 1,
+};
+
#endif /* _LINUX_NVME_H */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 1527d5d..bd0ea07 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -99,7 +99,7 @@ TRACE_EVENT(block_rq_requeue,
__entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0;
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
- __entry->ioprio = rq->ioprio;
+ __entry->ioprio = req_get_ioprio(rq);
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
@@ -136,7 +136,7 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__entry->sector = blk_rq_pos(rq);
__entry->nr_sector = nr_bytes >> 9;
__entry->error = blk_status_to_errno(error);
- __entry->ioprio = rq->ioprio;
+ __entry->ioprio = req_get_ioprio(rq);
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
@@ -209,7 +209,7 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
__entry->bytes = blk_rq_bytes(rq);
- __entry->ioprio = rq->ioprio;
+ __entry->ioprio = req_get_ioprio(rq);
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 354c4e1..9daef98 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -1160,12 +1160,12 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
poll_flags |= BLK_POLL_ONESHOT;
/* iopoll may have completed current req */
- if (!rq_list_empty(iob.req_list) ||
+ if (!rq_list_empty(&iob.req_list) ||
READ_ONCE(req->iopoll_completed))
break;
}
- if (!rq_list_empty(iob.req_list))
+ if (!rq_list_empty(&iob.req_list))
iob.complete(&iob);
else if (!pos)
return 0;