blob: 337a624a537ce14f764667e02a55607b54ca0e66 [file] [log] [blame]
Christoph Hellwig5d8762d2019-02-18 09:34:21 +01001// SPDX-License-Identifier: GPL-2.0
Christoph Hellwig71102302016-07-06 21:55:52 +09002/*
3 * NVMe over Fabrics RDMA host code.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
Christoph Hellwig71102302016-07-06 21:55:52 +09005 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
Christoph Hellwig71102302016-07-06 21:55:52 +09007#include <linux/module.h>
8#include <linux/init.h>
9#include <linux/slab.h>
Israel Rukshinf41725b2017-11-26 10:40:55 +000010#include <rdma/mr_pool.h>
Christoph Hellwig71102302016-07-06 21:55:52 +090011#include <linux/err.h>
12#include <linux/string.h>
Christoph Hellwig71102302016-07-06 21:55:52 +090013#include <linux/atomic.h>
14#include <linux/blk-mq.h>
Christoph Hellwigfe45e632021-09-20 14:33:27 +020015#include <linux/blk-integrity.h>
Christoph Hellwig71102302016-07-06 21:55:52 +090016#include <linux/types.h>
17#include <linux/list.h>
18#include <linux/mutex.h>
19#include <linux/scatterlist.h>
20#include <linux/nvme.h>
Christoph Hellwig71102302016-07-06 21:55:52 +090021#include <asm/unaligned.h>
22
23#include <rdma/ib_verbs.h>
24#include <rdma/rdma_cm.h>
Christoph Hellwig71102302016-07-06 21:55:52 +090025#include <linux/nvme-rdma.h>
26
27#include "nvme.h"
28#include "fabrics.h"
29
30
Israel Rukshin0525af72022-05-15 18:04:40 +030031#define NVME_RDMA_CM_TIMEOUT_MS 3000 /* 3 second */
Christoph Hellwig71102302016-07-06 21:55:52 +090032
Christoph Hellwig71102302016-07-06 21:55:52 +090033#define NVME_RDMA_MAX_SEGMENTS 256
34
Steve Wise64a741c2018-06-20 07:15:05 -070035#define NVME_RDMA_MAX_INLINE_SEGMENTS 4
Christoph Hellwig71102302016-07-06 21:55:52 +090036
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +030037#define NVME_RDMA_DATA_SGL_SIZE \
38 (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
39#define NVME_RDMA_METADATA_SGL_SIZE \
40 (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
41
Christoph Hellwig71102302016-07-06 21:55:52 +090042struct nvme_rdma_device {
Max Gurtovoyf87c89a2017-10-23 12:59:27 +030043 struct ib_device *dev;
44 struct ib_pd *pd;
Christoph Hellwig71102302016-07-06 21:55:52 +090045 struct kref ref;
46 struct list_head entry;
Steve Wise64a741c2018-06-20 07:15:05 -070047 unsigned int num_inline_segments;
Christoph Hellwig71102302016-07-06 21:55:52 +090048};
49
50struct nvme_rdma_qe {
51 struct ib_cqe cqe;
52 void *data;
53 u64 dma;
54};
55
Israel Rukshin324d9e72020-05-19 17:05:55 +030056struct nvme_rdma_sgl {
57 int nents;
58 struct sg_table sg_table;
59};
60
Christoph Hellwig71102302016-07-06 21:55:52 +090061struct nvme_rdma_queue;
62struct nvme_rdma_request {
Christoph Hellwigd49187e2016-11-10 07:32:33 -080063 struct nvme_request req;
Christoph Hellwig71102302016-07-06 21:55:52 +090064 struct ib_mr *mr;
65 struct nvme_rdma_qe sqe;
Sagi Grimberg4af7f7f2017-11-23 17:35:22 +020066 union nvme_result result;
67 __le16 status;
68 refcount_t ref;
Christoph Hellwig71102302016-07-06 21:55:52 +090069 struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
70 u32 num_sge;
Christoph Hellwig71102302016-07-06 21:55:52 +090071 struct ib_reg_wr reg_wr;
72 struct ib_cqe reg_cqe;
73 struct nvme_rdma_queue *queue;
Israel Rukshin324d9e72020-05-19 17:05:55 +030074 struct nvme_rdma_sgl data_sgl;
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +030075 struct nvme_rdma_sgl *metadata_sgl;
76 bool use_sig_mr;
Christoph Hellwig71102302016-07-06 21:55:52 +090077};
78
79enum nvme_rdma_queue_flags {
Sagi Grimberg5013e982017-10-11 15:29:12 +030080 NVME_RDMA_Q_ALLOCATED = 0,
81 NVME_RDMA_Q_LIVE = 1,
Max Gurtovoyeb1bd242017-11-28 18:28:44 +020082 NVME_RDMA_Q_TR_READY = 2,
Christoph Hellwig71102302016-07-06 21:55:52 +090083};
84
85struct nvme_rdma_queue {
86 struct nvme_rdma_qe *rsp_ring;
Christoph Hellwig71102302016-07-06 21:55:52 +090087 int queue_size;
88 size_t cmnd_capsule_len;
89 struct nvme_rdma_ctrl *ctrl;
90 struct nvme_rdma_device *device;
91 struct ib_cq *ib_cq;
92 struct ib_qp *qp;
93
94 unsigned long flags;
95 struct rdma_cm_id *cm_id;
96 int cm_error;
97 struct completion cm_done;
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +030098 bool pi_support;
Yamin Friedman287f3292020-07-13 11:53:29 +030099 int cq_size;
Chao Leng76740732021-01-14 17:09:25 +0800100 struct mutex queue_lock;
Christoph Hellwig71102302016-07-06 21:55:52 +0900101};
102
103struct nvme_rdma_ctrl {
Christoph Hellwig71102302016-07-06 21:55:52 +0900104 /* read only in the hot path */
105 struct nvme_rdma_queue *queues;
Christoph Hellwig71102302016-07-06 21:55:52 +0900106
107 /* other member variables */
Christoph Hellwig71102302016-07-06 21:55:52 +0900108 struct blk_mq_tag_set tag_set;
Christoph Hellwig71102302016-07-06 21:55:52 +0900109 struct work_struct err_work;
110
111 struct nvme_rdma_qe async_event_sqe;
112
Christoph Hellwig71102302016-07-06 21:55:52 +0900113 struct delayed_work reconnect_work;
114
115 struct list_head list;
116
117 struct blk_mq_tag_set admin_tag_set;
118 struct nvme_rdma_device *device;
119
Christoph Hellwig71102302016-07-06 21:55:52 +0900120 u32 max_fr_pages;
121
Sagi Grimberg0928f9b2017-02-05 21:49:32 +0200122 struct sockaddr_storage addr;
123 struct sockaddr_storage src_addr;
Christoph Hellwig71102302016-07-06 21:55:52 +0900124
125 struct nvme_ctrl ctrl;
Steve Wise64a741c2018-06-20 07:15:05 -0700126 bool use_inline_data;
Sagi Grimbergb1064d32019-01-18 16:43:24 -0800127 u32 io_queues[HCTX_MAX_TYPES];
Christoph Hellwig71102302016-07-06 21:55:52 +0900128};
129
130static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
131{
132 return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
133}
134
135static LIST_HEAD(device_list);
136static DEFINE_MUTEX(device_list_mutex);
137
138static LIST_HEAD(nvme_rdma_ctrl_list);
139static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
140
Christoph Hellwig71102302016-07-06 21:55:52 +0900141/*
142 * Disabling this option makes small I/O goes faster, but is fundamentally
143 * unsafe. With it turned off we will have to register a global rkey that
144 * allows read and write access to all physical memory.
145 */
146static bool register_always = true;
147module_param(register_always, bool, 0444);
148MODULE_PARM_DESC(register_always,
149 "Use memory registration even for contiguous memory regions");
150
151static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
152 struct rdma_cm_event *event);
153static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
Christoph Hellwigff029452020-06-11 08:44:52 +0200154static void nvme_rdma_complete_rq(struct request *rq);
Christoph Hellwig71102302016-07-06 21:55:52 +0900155
Sagi Grimberg90af3512017-07-10 09:22:28 +0300156static const struct blk_mq_ops nvme_rdma_mq_ops;
157static const struct blk_mq_ops nvme_rdma_admin_mq_ops;
158
Christoph Hellwig71102302016-07-06 21:55:52 +0900159static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
160{
161 return queue - queue->ctrl->queues;
162}
163
Sagi Grimbergff8519f2018-12-14 11:06:10 -0800164static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue)
165{
166 return nvme_rdma_queue_idx(queue) >
Sagi Grimbergb1064d32019-01-18 16:43:24 -0800167 queue->ctrl->io_queues[HCTX_TYPE_DEFAULT] +
168 queue->ctrl->io_queues[HCTX_TYPE_READ];
Sagi Grimbergff8519f2018-12-14 11:06:10 -0800169}
170
Christoph Hellwig71102302016-07-06 21:55:52 +0900171static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
172{
173 return queue->cmnd_capsule_len - sizeof(struct nvme_command);
174}
175
176static void nvme_rdma_free_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
177 size_t capsule_size, enum dma_data_direction dir)
178{
179 ib_dma_unmap_single(ibdev, qe->dma, capsule_size, dir);
180 kfree(qe->data);
181}
182
183static int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
184 size_t capsule_size, enum dma_data_direction dir)
185{
186 qe->data = kzalloc(capsule_size, GFP_KERNEL);
187 if (!qe->data)
188 return -ENOMEM;
189
190 qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
191 if (ib_dma_mapping_error(ibdev, qe->dma)) {
192 kfree(qe->data);
Prabhath Sajeepa6344d022018-11-28 11:11:29 -0700193 qe->data = NULL;
Christoph Hellwig71102302016-07-06 21:55:52 +0900194 return -ENOMEM;
195 }
196
197 return 0;
198}
199
200static void nvme_rdma_free_ring(struct ib_device *ibdev,
201 struct nvme_rdma_qe *ring, size_t ib_queue_size,
202 size_t capsule_size, enum dma_data_direction dir)
203{
204 int i;
205
206 for (i = 0; i < ib_queue_size; i++)
207 nvme_rdma_free_qe(ibdev, &ring[i], capsule_size, dir);
208 kfree(ring);
209}
210
211static struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev,
212 size_t ib_queue_size, size_t capsule_size,
213 enum dma_data_direction dir)
214{
215 struct nvme_rdma_qe *ring;
216 int i;
217
218 ring = kcalloc(ib_queue_size, sizeof(struct nvme_rdma_qe), GFP_KERNEL);
219 if (!ring)
220 return NULL;
221
Max Gurtovoy62f99b62019-06-06 12:27:36 +0300222 /*
223 * Bind the CQEs (post recv buffers) DMA mapping to the RDMA queue
224 * lifetime. It's safe, since any chage in the underlying RDMA device
225 * will issue error recovery and queue re-creation.
226 */
Christoph Hellwig71102302016-07-06 21:55:52 +0900227 for (i = 0; i < ib_queue_size; i++) {
228 if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir))
229 goto out_free_ring;
230 }
231
232 return ring;
233
234out_free_ring:
235 nvme_rdma_free_ring(ibdev, ring, i, capsule_size, dir);
236 return NULL;
237}
238
239static void nvme_rdma_qp_event(struct ib_event *event, void *context)
240{
Max Gurtovoy27a4bee2016-11-23 11:38:48 +0200241 pr_debug("QP event %s (%d)\n",
242 ib_event_msg(event->event), event->event);
243
Christoph Hellwig71102302016-07-06 21:55:52 +0900244}
245
246static int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
247{
Bart Van Assche35da77d2018-10-08 14:28:54 -0700248 int ret;
249
Israel Rukshin0525af72022-05-15 18:04:40 +0300250 ret = wait_for_completion_interruptible(&queue->cm_done);
251 if (ret)
Bart Van Assche35da77d2018-10-08 14:28:54 -0700252 return ret;
Bart Van Assche35da77d2018-10-08 14:28:54 -0700253 WARN_ON_ONCE(queue->cm_error > 0);
Christoph Hellwig71102302016-07-06 21:55:52 +0900254 return queue->cm_error;
255}
256
257static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
258{
259 struct nvme_rdma_device *dev = queue->device;
260 struct ib_qp_init_attr init_attr;
261 int ret;
262
263 memset(&init_attr, 0, sizeof(init_attr));
264 init_attr.event_handler = nvme_rdma_qp_event;
265 /* +1 for drain */
266 init_attr.cap.max_send_wr = factor * queue->queue_size + 1;
267 /* +1 for drain */
268 init_attr.cap.max_recv_wr = queue->queue_size + 1;
269 init_attr.cap.max_recv_sge = 1;
Steve Wise64a741c2018-06-20 07:15:05 -0700270 init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
Christoph Hellwig71102302016-07-06 21:55:52 +0900271 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
272 init_attr.qp_type = IB_QPT_RC;
273 init_attr.send_cq = queue->ib_cq;
274 init_attr.recv_cq = queue->ib_cq;
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300275 if (queue->pi_support)
276 init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
Yamin Friedman287f3292020-07-13 11:53:29 +0300277 init_attr.qp_context = queue;
Christoph Hellwig71102302016-07-06 21:55:52 +0900278
279 ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
280
281 queue->qp = queue->cm_id->qp;
282 return ret;
283}
284
Christoph Hellwig385475e2017-06-13 09:15:19 +0200285static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
286 struct request *rq, unsigned int hctx_idx)
Christoph Hellwig71102302016-07-06 21:55:52 +0900287{
288 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
Christoph Hellwig71102302016-07-06 21:55:52 +0900289
Max Gurtovoy62f99b62019-06-06 12:27:36 +0300290 kfree(req->sqe.data);
Christoph Hellwig71102302016-07-06 21:55:52 +0900291}
292
Christoph Hellwig385475e2017-06-13 09:15:19 +0200293static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
294 struct request *rq, unsigned int hctx_idx,
295 unsigned int numa_node)
Christoph Hellwig71102302016-07-06 21:55:52 +0900296{
Christoph Hellwig2d607382022-09-20 17:14:01 +0200297 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data);
Christoph Hellwig71102302016-07-06 21:55:52 +0900298 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
Christoph Hellwig385475e2017-06-13 09:15:19 +0200299 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
Christoph Hellwig71102302016-07-06 21:55:52 +0900300 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
Christoph Hellwig71102302016-07-06 21:55:52 +0900301
Sagi Grimberg59e29ce2018-06-29 16:50:00 -0600302 nvme_req(rq)->ctrl = &ctrl->ctrl;
Max Gurtovoy62f99b62019-06-06 12:27:36 +0300303 req->sqe.data = kzalloc(sizeof(struct nvme_command), GFP_KERNEL);
304 if (!req->sqe.data)
305 return -ENOMEM;
Christoph Hellwig71102302016-07-06 21:55:52 +0900306
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300307 /* metadata nvme_rdma_sgl struct is located after command's data SGL */
308 if (queue->pi_support)
309 req->metadata_sgl = (void *)nvme_req(rq) +
310 sizeof(struct nvme_rdma_request) +
311 NVME_RDMA_DATA_SGL_SIZE;
312
Christoph Hellwig71102302016-07-06 21:55:52 +0900313 req->queue = queue;
Keith Buschf4b9e6c2021-03-17 13:37:03 -0700314 nvme_req(rq)->cmd = req->sqe.data;
Christoph Hellwig71102302016-07-06 21:55:52 +0900315
316 return 0;
Christoph Hellwig71102302016-07-06 21:55:52 +0900317}
318
Christoph Hellwig71102302016-07-06 21:55:52 +0900319static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
320 unsigned int hctx_idx)
321{
Christoph Hellwig2d607382022-09-20 17:14:01 +0200322 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data);
Christoph Hellwig71102302016-07-06 21:55:52 +0900323 struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
324
Sagi Grimbergd858e5f2017-04-24 10:58:29 +0300325 BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
Christoph Hellwig71102302016-07-06 21:55:52 +0900326
327 hctx->driver_data = queue;
328 return 0;
329}
330
331static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
332 unsigned int hctx_idx)
333{
Christoph Hellwig2d607382022-09-20 17:14:01 +0200334 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data);
Christoph Hellwig71102302016-07-06 21:55:52 +0900335 struct nvme_rdma_queue *queue = &ctrl->queues[0];
336
337 BUG_ON(hctx_idx != 0);
338
339 hctx->driver_data = queue;
340 return 0;
341}
342
343static void nvme_rdma_free_dev(struct kref *ref)
344{
345 struct nvme_rdma_device *ndev =
346 container_of(ref, struct nvme_rdma_device, ref);
347
348 mutex_lock(&device_list_mutex);
349 list_del(&ndev->entry);
350 mutex_unlock(&device_list_mutex);
351
Christoph Hellwig71102302016-07-06 21:55:52 +0900352 ib_dealloc_pd(ndev->pd);
Christoph Hellwig71102302016-07-06 21:55:52 +0900353 kfree(ndev);
354}
355
356static void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
357{
358 kref_put(&dev->ref, nvme_rdma_free_dev);
359}
360
361static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
362{
363 return kref_get_unless_zero(&dev->ref);
364}
365
366static struct nvme_rdma_device *
367nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
368{
369 struct nvme_rdma_device *ndev;
370
371 mutex_lock(&device_list_mutex);
372 list_for_each_entry(ndev, &device_list, entry) {
373 if (ndev->dev->node_guid == cm_id->device->node_guid &&
374 nvme_rdma_dev_get(ndev))
375 goto out_unlock;
376 }
377
378 ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
379 if (!ndev)
380 goto out_err;
381
382 ndev->dev = cm_id->device;
383 kref_init(&ndev->ref);
384
Christoph Hellwig11975e02016-09-05 12:56:20 +0200385 ndev->pd = ib_alloc_pd(ndev->dev,
386 register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
Christoph Hellwig71102302016-07-06 21:55:52 +0900387 if (IS_ERR(ndev->pd))
388 goto out_free_dev;
389
Christoph Hellwig71102302016-07-06 21:55:52 +0900390 if (!(ndev->dev->attrs.device_cap_flags &
391 IB_DEVICE_MEM_MGT_EXTENSIONS)) {
392 dev_err(&ndev->dev->dev,
393 "Memory registrations not supported.\n");
Christoph Hellwig11975e02016-09-05 12:56:20 +0200394 goto out_free_pd;
Christoph Hellwig71102302016-07-06 21:55:52 +0900395 }
396
Steve Wise64a741c2018-06-20 07:15:05 -0700397 ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
Jason Gunthorpe0a3173a2018-08-16 14:13:03 -0600398 ndev->dev->attrs.max_send_sge - 1);
Christoph Hellwig71102302016-07-06 21:55:52 +0900399 list_add(&ndev->entry, &device_list);
400out_unlock:
401 mutex_unlock(&device_list_mutex);
402 return ndev;
403
Christoph Hellwig71102302016-07-06 21:55:52 +0900404out_free_pd:
405 ib_dealloc_pd(ndev->pd);
406out_free_dev:
407 kfree(ndev);
408out_err:
409 mutex_unlock(&device_list_mutex);
410 return NULL;
411}
412
Yamin Friedman287f3292020-07-13 11:53:29 +0300413static void nvme_rdma_free_cq(struct nvme_rdma_queue *queue)
414{
415 if (nvme_rdma_poll_queue(queue))
416 ib_free_cq(queue->ib_cq);
417 else
418 ib_cq_pool_put(queue->ib_cq, queue->cq_size);
419}
420
Christoph Hellwig71102302016-07-06 21:55:52 +0900421static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
422{
Max Gurtovoyeb1bd242017-11-28 18:28:44 +0200423 struct nvme_rdma_device *dev;
424 struct ib_device *ibdev;
425
426 if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
427 return;
428
429 dev = queue->device;
430 ibdev = dev->dev;
Christoph Hellwig71102302016-07-06 21:55:52 +0900431
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300432 if (queue->pi_support)
433 ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
Israel Rukshinf41725b2017-11-26 10:40:55 +0000434 ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
435
Max Gurtovoyeb1bd242017-11-28 18:28:44 +0200436 /*
437 * The cm_id object might have been destroyed during RDMA connection
438 * establishment error flow to avoid getting other cma events, thus
439 * the destruction of the QP shouldn't use rdma_cm API.
440 */
441 ib_destroy_qp(queue->qp);
Yamin Friedman287f3292020-07-13 11:53:29 +0300442 nvme_rdma_free_cq(queue);
Christoph Hellwig71102302016-07-06 21:55:52 +0900443
444 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
445 sizeof(struct nvme_completion), DMA_FROM_DEVICE);
446
447 nvme_rdma_dev_put(dev);
448}
449
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300450static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
Israel Rukshinf41725b2017-11-26 10:40:55 +0000451{
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300452 u32 max_page_list_len;
453
454 if (pi_support)
455 max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
456 else
457 max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
458
459 return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
Israel Rukshinf41725b2017-11-26 10:40:55 +0000460}
461
Yamin Friedman287f3292020-07-13 11:53:29 +0300462static int nvme_rdma_create_cq(struct ib_device *ibdev,
463 struct nvme_rdma_queue *queue)
464{
465 int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
Yamin Friedman287f3292020-07-13 11:53:29 +0300466
467 /*
468 * Spread I/O queues completion vectors according their queue index.
469 * Admin queues can always go on completion vector 0.
470 */
471 comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
472
473 /* Polling queues need direct cq polling context */
zhenwei pi015ad2b12023-04-07 17:15:57 +0800474 if (nvme_rdma_poll_queue(queue))
Yamin Friedman287f3292020-07-13 11:53:29 +0300475 queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
zhenwei pi015ad2b12023-04-07 17:15:57 +0800476 comp_vector, IB_POLL_DIRECT);
477 else
Yamin Friedman287f3292020-07-13 11:53:29 +0300478 queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
zhenwei pi015ad2b12023-04-07 17:15:57 +0800479 comp_vector, IB_POLL_SOFTIRQ);
Yamin Friedman287f3292020-07-13 11:53:29 +0300480
481 if (IS_ERR(queue->ib_cq)) {
482 ret = PTR_ERR(queue->ib_cq);
483 return ret;
484 }
485
486 return 0;
487}
488
Sagi Grimbergca6e95b2017-05-04 13:33:09 +0300489static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
Christoph Hellwig71102302016-07-06 21:55:52 +0900490{
Sagi Grimbergca6e95b2017-05-04 13:33:09 +0300491 struct ib_device *ibdev;
Christoph Hellwig71102302016-07-06 21:55:52 +0900492 const int send_wr_factor = 3; /* MR, SEND, INV */
493 const int cq_factor = send_wr_factor + 1; /* + RECV */
Max Gurtovoyff13c1b2019-09-21 23:58:19 +0300494 int ret, pages_per_mr;
Christoph Hellwig71102302016-07-06 21:55:52 +0900495
Sagi Grimbergca6e95b2017-05-04 13:33:09 +0300496 queue->device = nvme_rdma_find_get_device(queue->cm_id);
497 if (!queue->device) {
498 dev_err(queue->cm_id->device->dev.parent,
499 "no client data found!\n");
500 return -ECONNREFUSED;
501 }
502 ibdev = queue->device->dev;
Christoph Hellwig71102302016-07-06 21:55:52 +0900503
Max Gurtovoyf3f28372023-04-20 02:17:50 +0300504 /* +1 for ib_drain_qp */
Yamin Friedman287f3292020-07-13 11:53:29 +0300505 queue->cq_size = cq_factor * queue->queue_size + 1;
506
507 ret = nvme_rdma_create_cq(ibdev, queue);
508 if (ret)
Sagi Grimbergca6e95b2017-05-04 13:33:09 +0300509 goto out_put_dev;
Christoph Hellwig71102302016-07-06 21:55:52 +0900510
511 ret = nvme_rdma_create_qp(queue, send_wr_factor);
512 if (ret)
513 goto out_destroy_ib_cq;
514
515 queue->rsp_ring = nvme_rdma_alloc_ring(ibdev, queue->queue_size,
516 sizeof(struct nvme_completion), DMA_FROM_DEVICE);
517 if (!queue->rsp_ring) {
518 ret = -ENOMEM;
519 goto out_destroy_qp;
520 }
521
Max Gurtovoyff13c1b2019-09-21 23:58:19 +0300522 /*
523 * Currently we don't use SG_GAPS MR's so if the first entry is
524 * misaligned we'll end up using two entries for a single data page,
525 * so one additional entry is required.
526 */
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300527 pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
Israel Rukshinf41725b2017-11-26 10:40:55 +0000528 ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
529 queue->queue_size,
530 IB_MR_TYPE_MEM_REG,
Max Gurtovoyff13c1b2019-09-21 23:58:19 +0300531 pages_per_mr, 0);
Israel Rukshinf41725b2017-11-26 10:40:55 +0000532 if (ret) {
533 dev_err(queue->ctrl->ctrl.device,
534 "failed to initialize MR pool sized %d for QID %d\n",
Yamin Friedman287f3292020-07-13 11:53:29 +0300535 queue->queue_size, nvme_rdma_queue_idx(queue));
Israel Rukshinf41725b2017-11-26 10:40:55 +0000536 goto out_destroy_ring;
537 }
538
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300539 if (queue->pi_support) {
540 ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
541 queue->queue_size, IB_MR_TYPE_INTEGRITY,
542 pages_per_mr, pages_per_mr);
543 if (ret) {
544 dev_err(queue->ctrl->ctrl.device,
545 "failed to initialize PI MR pool sized %d for QID %d\n",
Yamin Friedman287f3292020-07-13 11:53:29 +0300546 queue->queue_size, nvme_rdma_queue_idx(queue));
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300547 goto out_destroy_mr_pool;
548 }
549 }
550
Max Gurtovoyeb1bd242017-11-28 18:28:44 +0200551 set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
552
Christoph Hellwig71102302016-07-06 21:55:52 +0900553 return 0;
554
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300555out_destroy_mr_pool:
556 ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
Israel Rukshinf41725b2017-11-26 10:40:55 +0000557out_destroy_ring:
558 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
559 sizeof(struct nvme_completion), DMA_FROM_DEVICE);
Christoph Hellwig71102302016-07-06 21:55:52 +0900560out_destroy_qp:
Max Gurtovoy1f61def2017-11-06 16:18:51 +0200561 rdma_destroy_qp(queue->cm_id);
Christoph Hellwig71102302016-07-06 21:55:52 +0900562out_destroy_ib_cq:
Yamin Friedman287f3292020-07-13 11:53:29 +0300563 nvme_rdma_free_cq(queue);
Sagi Grimbergca6e95b2017-05-04 13:33:09 +0300564out_put_dev:
565 nvme_rdma_dev_put(queue->device);
Christoph Hellwig71102302016-07-06 21:55:52 +0900566 return ret;
567}
568
Sagi Grimberg41e8cfa2017-07-10 09:22:36 +0300569static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
Christoph Hellwig71102302016-07-06 21:55:52 +0900570 int idx, size_t queue_size)
571{
572 struct nvme_rdma_queue *queue;
Max Gurtovoy8f4e8da2017-02-19 20:08:03 +0200573 struct sockaddr *src_addr = NULL;
Christoph Hellwig71102302016-07-06 21:55:52 +0900574 int ret;
575
576 queue = &ctrl->queues[idx];
Chao Leng76740732021-01-14 17:09:25 +0800577 mutex_init(&queue->queue_lock);
Christoph Hellwig71102302016-07-06 21:55:52 +0900578 queue->ctrl = ctrl;
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300579 if (idx && ctrl->ctrl.max_integrity_segments)
580 queue->pi_support = true;
581 else
582 queue->pi_support = false;
Christoph Hellwig71102302016-07-06 21:55:52 +0900583 init_completion(&queue->cm_done);
584
585 if (idx > 0)
586 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
587 else
588 queue->cmnd_capsule_len = sizeof(struct nvme_command);
589
590 queue->queue_size = queue_size;
591
592 queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
593 RDMA_PS_TCP, IB_QPT_RC);
594 if (IS_ERR(queue->cm_id)) {
595 dev_info(ctrl->ctrl.device,
596 "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
Chao Leng76740732021-01-14 17:09:25 +0800597 ret = PTR_ERR(queue->cm_id);
598 goto out_destroy_mutex;
Christoph Hellwig71102302016-07-06 21:55:52 +0900599 }
600
Max Gurtovoy8f4e8da2017-02-19 20:08:03 +0200601 if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
Sagi Grimberg0928f9b2017-02-05 21:49:32 +0200602 src_addr = (struct sockaddr *)&ctrl->src_addr;
Max Gurtovoy8f4e8da2017-02-19 20:08:03 +0200603
Sagi Grimberg0928f9b2017-02-05 21:49:32 +0200604 queue->cm_error = -ETIMEDOUT;
605 ret = rdma_resolve_addr(queue->cm_id, src_addr,
606 (struct sockaddr *)&ctrl->addr,
Israel Rukshin0525af72022-05-15 18:04:40 +0300607 NVME_RDMA_CM_TIMEOUT_MS);
Christoph Hellwig71102302016-07-06 21:55:52 +0900608 if (ret) {
609 dev_info(ctrl->ctrl.device,
610 "rdma_resolve_addr failed (%d).\n", ret);
611 goto out_destroy_cm_id;
612 }
613
614 ret = nvme_rdma_wait_for_cm(queue);
615 if (ret) {
616 dev_info(ctrl->ctrl.device,
Sagi Grimbergd8bfcee2017-10-11 15:29:07 +0300617 "rdma connection establishment failed (%d)\n", ret);
Christoph Hellwig71102302016-07-06 21:55:52 +0900618 goto out_destroy_cm_id;
619 }
620
Sagi Grimberg5013e982017-10-11 15:29:12 +0300621 set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
Christoph Hellwig71102302016-07-06 21:55:52 +0900622
623 return 0;
624
625out_destroy_cm_id:
626 rdma_destroy_id(queue->cm_id);
Max Gurtovoyeb1bd242017-11-28 18:28:44 +0200627 nvme_rdma_destroy_queue_ib(queue);
Chao Leng76740732021-01-14 17:09:25 +0800628out_destroy_mutex:
629 mutex_destroy(&queue->queue_lock);
Christoph Hellwig71102302016-07-06 21:55:52 +0900630 return ret;
631}
632
Sagi Grimbergd94211b2019-07-26 10:29:49 -0700633static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
634{
635 rdma_disconnect(queue->cm_id);
636 ib_drain_qp(queue->qp);
637}
638
Christoph Hellwig71102302016-07-06 21:55:52 +0900639static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
640{
Chao Leng76740732021-01-14 17:09:25 +0800641 mutex_lock(&queue->queue_lock);
642 if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
643 __nvme_rdma_stop_queue(queue);
644 mutex_unlock(&queue->queue_lock);
Christoph Hellwig71102302016-07-06 21:55:52 +0900645}
646
647static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
648{
Sagi Grimberg5013e982017-10-11 15:29:12 +0300649 if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
Christoph Hellwig71102302016-07-06 21:55:52 +0900650 return;
Sagi Grimberga57bd542017-08-28 21:41:10 +0200651
Sagi Grimberga57bd542017-08-28 21:41:10 +0200652 rdma_destroy_id(queue->cm_id);
Ruozhu Li9817d762021-09-06 11:51:34 +0800653 nvme_rdma_destroy_queue_ib(queue);
Chao Leng76740732021-01-14 17:09:25 +0800654 mutex_destroy(&queue->queue_lock);
Christoph Hellwig71102302016-07-06 21:55:52 +0900655}
656
657static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
658{
659 int i;
660
Sagi Grimbergd858e5f2017-04-24 10:58:29 +0300661 for (i = 1; i < ctrl->ctrl.queue_count; i++)
Sagi Grimberga57bd542017-08-28 21:41:10 +0200662 nvme_rdma_free_queue(&ctrl->queues[i]);
Christoph Hellwig71102302016-07-06 21:55:52 +0900663}
664
Sagi Grimberga57bd542017-08-28 21:41:10 +0200665static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
666{
667 int i;
668
669 for (i = 1; i < ctrl->ctrl.queue_count; i++)
670 nvme_rdma_stop_queue(&ctrl->queues[i]);
Christoph Hellwig71102302016-07-06 21:55:52 +0900671}
672
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300673static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
674{
Sagi Grimbergff8519f2018-12-14 11:06:10 -0800675 struct nvme_rdma_queue *queue = &ctrl->queues[idx];
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300676 int ret;
677
678 if (idx)
Keith Buschbe42a332021-06-10 14:44:35 -0700679 ret = nvmf_connect_io_queue(&ctrl->ctrl, idx);
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300680 else
681 ret = nvmf_connect_admin_queue(&ctrl->ctrl);
682
Sagi Grimbergd94211b2019-07-26 10:29:49 -0700683 if (!ret) {
Sagi Grimbergff8519f2018-12-14 11:06:10 -0800684 set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
Sagi Grimbergd94211b2019-07-26 10:29:49 -0700685 } else {
Sagi Grimberg67b483d2019-09-24 11:27:05 -0700686 if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
687 __nvme_rdma_stop_queue(queue);
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300688 dev_info(ctrl->ctrl.device,
689 "failed to connect queue: %d ret=%d\n", idx, ret);
Sagi Grimbergd94211b2019-07-26 10:29:49 -0700690 }
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300691 return ret;
692}
693
Daniel Wagner1c467e22022-08-29 11:28:41 +0200694static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl,
695 int first, int last)
Christoph Hellwig71102302016-07-06 21:55:52 +0900696{
697 int i, ret = 0;
698
Daniel Wagner1c467e22022-08-29 11:28:41 +0200699 for (i = first; i < last; i++) {
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300700 ret = nvme_rdma_start_queue(ctrl, i);
701 if (ret)
Sagi Grimberga57bd542017-08-28 21:41:10 +0200702 goto out_stop_queues;
Christoph Hellwig71102302016-07-06 21:55:52 +0900703 }
704
Steve Wisec8dbc372016-11-08 09:16:02 -0800705 return 0;
706
Sagi Grimberga57bd542017-08-28 21:41:10 +0200707out_stop_queues:
Daniel Wagner1c467e22022-08-29 11:28:41 +0200708 for (i--; i >= first; i--)
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300709 nvme_rdma_stop_queue(&ctrl->queues[i]);
Christoph Hellwig71102302016-07-06 21:55:52 +0900710 return ret;
711}
712
Sagi Grimberg41e8cfa2017-07-10 09:22:36 +0300713static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
Christoph Hellwig71102302016-07-06 21:55:52 +0900714{
Sagi Grimbergc248c642017-03-09 13:26:07 +0200715 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
Keith Buscha249d302023-04-26 08:04:41 -0700716 unsigned int nr_io_queues;
Christoph Hellwig71102302016-07-06 21:55:52 +0900717 int i, ret;
718
Keith Buscha249d302023-04-26 08:04:41 -0700719 nr_io_queues = nvmf_nr_io_queues(opts);
Sagi Grimbergc248c642017-03-09 13:26:07 +0200720 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
721 if (ret)
722 return ret;
723
Ruozhu Li85032872021-07-28 17:41:20 +0800724 if (nr_io_queues == 0) {
Sagi Grimbergc4c6df52021-03-15 14:04:27 -0700725 dev_err(ctrl->ctrl.device,
726 "unable to set any I/O queues\n");
727 return -ENOMEM;
728 }
Sagi Grimbergc248c642017-03-09 13:26:07 +0200729
Ruozhu Li85032872021-07-28 17:41:20 +0800730 ctrl->ctrl.queue_count = nr_io_queues + 1;
Sagi Grimbergc248c642017-03-09 13:26:07 +0200731 dev_info(ctrl->ctrl.device,
732 "creating %d I/O queues.\n", nr_io_queues);
733
Keith Buscha249d302023-04-26 08:04:41 -0700734 nvmf_set_io_queues(opts, nr_io_queues, ctrl->io_queues);
Sagi Grimbergd858e5f2017-04-24 10:58:29 +0300735 for (i = 1; i < ctrl->ctrl.queue_count; i++) {
Sagi Grimberg41e8cfa2017-07-10 09:22:36 +0300736 ret = nvme_rdma_alloc_queue(ctrl, i,
737 ctrl->ctrl.sqsize + 1);
738 if (ret)
Christoph Hellwig71102302016-07-06 21:55:52 +0900739 goto out_free_queues;
Christoph Hellwig71102302016-07-06 21:55:52 +0900740 }
741
742 return 0;
743
744out_free_queues:
Steve Wisef361e5a2016-09-02 09:01:27 -0700745 for (i--; i >= 1; i--)
Sagi Grimberga57bd542017-08-28 21:41:10 +0200746 nvme_rdma_free_queue(&ctrl->queues[i]);
Christoph Hellwig71102302016-07-06 21:55:52 +0900747
748 return ret;
749}
750
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200751static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl)
Sagi Grimbergb28a3082017-07-10 09:22:30 +0300752{
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200753 unsigned int cmd_size = sizeof(struct nvme_rdma_request) +
754 NVME_RDMA_DATA_SGL_SIZE;
Sagi Grimbergb28a3082017-07-10 09:22:30 +0300755
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200756 if (ctrl->max_integrity_segments)
757 cmd_size += sizeof(struct nvme_rdma_sgl) +
758 NVME_RDMA_METADATA_SGL_SIZE;
759
760 return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set,
Christoph Hellwigdb45e1a2022-11-30 17:19:50 +0100761 &nvme_rdma_mq_ops,
Christoph Hellwigdcef7722022-11-30 17:16:52 +0100762 ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
763 cmd_size);
Christoph Hellwiga7f7b712022-07-21 08:19:02 +0200764}
Sagi Grimbergb28a3082017-07-10 09:22:30 +0300765
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200766static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl)
Christoph Hellwiga7f7b712022-07-21 08:19:02 +0200767{
Sagi Grimberg682630f2018-06-25 20:58:17 +0300768 if (ctrl->async_event_sqe.data) {
David Milburn925dd042020-09-02 17:42:52 -0500769 cancel_work_sync(&ctrl->ctrl.async_event_work);
Sagi Grimberg682630f2018-06-25 20:58:17 +0300770 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
771 sizeof(struct nvme_command), DMA_TO_DEVICE);
772 ctrl->async_event_sqe.data = NULL;
773 }
Sagi Grimberga57bd542017-08-28 21:41:10 +0200774 nvme_rdma_free_queue(&ctrl->queues[0]);
Christoph Hellwig71102302016-07-06 21:55:52 +0900775}
776
Sagi Grimberg3f02fff2017-07-10 09:22:32 +0300777static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
778 bool new)
Sagi Grimberg90af3512017-07-10 09:22:28 +0300779{
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300780 bool pi_capable = false;
Sagi Grimberg90af3512017-07-10 09:22:28 +0300781 int error;
782
Sagi Grimberg41e8cfa2017-07-10 09:22:36 +0300783 error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
Sagi Grimberg90af3512017-07-10 09:22:28 +0300784 if (error)
785 return error;
786
787 ctrl->device = ctrl->queues[0].device;
Christoph Hellwig22dd4c72020-11-06 19:19:35 +0100788 ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
Sagi Grimberg90af3512017-07-10 09:22:28 +0300789
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300790 /* T10-PI support */
Jason Gunthorpee945c652022-04-04 12:26:42 -0300791 if (ctrl->device->dev->attrs.kernel_cap_flags &
792 IBK_INTEGRITY_HANDOVER)
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300793 pi_capable = true;
794
795 ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
796 pi_capable);
Sagi Grimberg90af3512017-07-10 09:22:28 +0300797
Max Gurtovoy62f99b62019-06-06 12:27:36 +0300798 /*
799 * Bind the async event SQE DMA mapping to the admin queue lifetime.
800 * It's safe, since any chage in the underlying RDMA device will issue
801 * error recovery and queue re-creation.
802 */
Sagi Grimberg94e42212018-06-19 15:34:10 +0300803 error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
804 sizeof(struct nvme_command), DMA_TO_DEVICE);
805 if (error)
806 goto out_free_queue;
807
Sagi Grimberg3f02fff2017-07-10 09:22:32 +0300808 if (new) {
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200809 error = nvme_alloc_admin_tag_set(&ctrl->ctrl,
810 &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops,
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200811 sizeof(struct nvme_rdma_request) +
812 NVME_RDMA_DATA_SGL_SIZE);
Christoph Hellwiga7f7b712022-07-21 08:19:02 +0200813 if (error)
Sagi Grimberg94e42212018-06-19 15:34:10 +0300814 goto out_free_async_qe;
Sagi Grimberg90af3512017-07-10 09:22:28 +0300815
Sagi Grimberg90af3512017-07-10 09:22:28 +0300816 }
817
Sagi Grimberg68e16fc2017-07-10 09:22:37 +0300818 error = nvme_rdma_start_queue(ctrl, 0);
Sagi Grimberg90af3512017-07-10 09:22:28 +0300819 if (error)
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200820 goto out_remove_admin_tag_set;
Sagi Grimberg90af3512017-07-10 09:22:28 +0300821
Sagi Grimbergc0f2f452019-07-22 17:06:53 -0700822 error = nvme_enable_ctrl(&ctrl->ctrl);
Sagi Grimberg90af3512017-07-10 09:22:28 +0300823 if (error)
Jianchao Wang2e050f02018-05-24 09:27:38 +0800824 goto out_stop_queue;
Sagi Grimberg90af3512017-07-10 09:22:28 +0300825
Max Gurtovoyff13c1b2019-09-21 23:58:19 +0300826 ctrl->ctrl.max_segments = ctrl->max_fr_pages;
827 ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +0300828 if (pi_capable)
829 ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
830 else
831 ctrl->ctrl.max_integrity_segments = 0;
Sagi Grimberg90af3512017-07-10 09:22:28 +0300832
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100833 nvme_unquiesce_admin_queue(&ctrl->ctrl);
Sagi Grimberge7832cb2019-08-02 19:33:59 -0700834
Christoph Hellwig94cc7812022-11-08 15:48:27 +0100835 error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
Sagi Grimberg90af3512017-07-10 09:22:28 +0300836 if (error)
Chao Leng958dc1d2021-01-21 11:32:37 +0800837 goto out_quiesce_queue;
Sagi Grimberg90af3512017-07-10 09:22:28 +0300838
Sagi Grimberg90af3512017-07-10 09:22:28 +0300839 return 0;
840
Chao Leng958dc1d2021-01-21 11:32:37 +0800841out_quiesce_queue:
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100842 nvme_quiesce_admin_queue(&ctrl->ctrl);
Chao Leng958dc1d2021-01-21 11:32:37 +0800843 blk_sync_queue(ctrl->ctrl.admin_q);
Jianchao Wang2e050f02018-05-24 09:27:38 +0800844out_stop_queue:
845 nvme_rdma_stop_queue(&ctrl->queues[0]);
Chao Leng958dc1d2021-01-21 11:32:37 +0800846 nvme_cancel_admin_tagset(&ctrl->ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200847out_remove_admin_tag_set:
Sagi Grimberg3f02fff2017-07-10 09:22:32 +0300848 if (new)
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200849 nvme_remove_admin_tag_set(&ctrl->ctrl);
Sagi Grimberg94e42212018-06-19 15:34:10 +0300850out_free_async_qe:
Prabhath Sajeepa9134ae2a2020-03-09 15:07:53 -0600851 if (ctrl->async_event_sqe.data) {
852 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
853 sizeof(struct nvme_command), DMA_TO_DEVICE);
854 ctrl->async_event_sqe.data = NULL;
855 }
Sagi Grimberg90af3512017-07-10 09:22:28 +0300856out_free_queue:
857 nvme_rdma_free_queue(&ctrl->queues[0]);
858 return error;
859}
860
Sagi Grimberga57bd542017-08-28 21:41:10 +0200861static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
862{
Daniel Wagner1c467e22022-08-29 11:28:41 +0200863 int ret, nr_queues;
Sagi Grimberga57bd542017-08-28 21:41:10 +0200864
Sagi Grimberg41e8cfa2017-07-10 09:22:36 +0300865 ret = nvme_rdma_alloc_io_queues(ctrl);
Sagi Grimberga57bd542017-08-28 21:41:10 +0200866 if (ret)
867 return ret;
868
869 if (new) {
Christoph Hellwiga7f7b712022-07-21 08:19:02 +0200870 ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl);
871 if (ret)
Sagi Grimberga57bd542017-08-28 21:41:10 +0200872 goto out_free_io_queues;
Sagi Grimberga57bd542017-08-28 21:41:10 +0200873 }
874
Daniel Wagner1c467e22022-08-29 11:28:41 +0200875 /*
876 * Only start IO queues for which we have allocated the tagset
877 * and limitted it to the available queues. On reconnects, the
878 * queue number might have changed.
879 */
880 nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count);
881 ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues);
Sagi Grimberga57bd542017-08-28 21:41:10 +0200882 if (ret)
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200883 goto out_cleanup_tagset;
Sagi Grimberga57bd542017-08-28 21:41:10 +0200884
Sagi Grimberg9f987722020-07-27 17:32:09 -0700885 if (!new) {
Ming Lei29b434d2023-07-11 17:40:41 +0800886 nvme_start_freeze(&ctrl->ctrl);
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100887 nvme_unquiesce_io_queues(&ctrl->ctrl);
Sagi Grimberg2362acb2020-07-30 13:42:42 -0700888 if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
889 /*
890 * If we timed out waiting for freeze we are likely to
891 * be stuck. Fail the controller initialization just
892 * to be safe.
893 */
894 ret = -ENODEV;
Ming Lei29b434d2023-07-11 17:40:41 +0800895 nvme_unfreeze(&ctrl->ctrl);
Sagi Grimberg2362acb2020-07-30 13:42:42 -0700896 goto out_wait_freeze_timed_out;
897 }
Sagi Grimberg9f987722020-07-27 17:32:09 -0700898 blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
899 ctrl->ctrl.queue_count - 1);
900 nvme_unfreeze(&ctrl->ctrl);
901 }
902
Daniel Wagner1c467e22022-08-29 11:28:41 +0200903 /*
904 * If the number of queues has increased (reconnect case)
905 * start all new queues now.
906 */
907 ret = nvme_rdma_start_io_queues(ctrl, nr_queues,
908 ctrl->tag_set.nr_hw_queues + 1);
909 if (ret)
910 goto out_wait_freeze_timed_out;
911
Sagi Grimberga57bd542017-08-28 21:41:10 +0200912 return 0;
913
Sagi Grimberg2362acb2020-07-30 13:42:42 -0700914out_wait_freeze_timed_out:
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100915 nvme_quiesce_io_queues(&ctrl->ctrl);
Chao Leng958dc1d2021-01-21 11:32:37 +0800916 nvme_sync_io_queues(&ctrl->ctrl);
Sagi Grimberg2362acb2020-07-30 13:42:42 -0700917 nvme_rdma_stop_io_queues(ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200918out_cleanup_tagset:
Chao Leng958dc1d2021-01-21 11:32:37 +0800919 nvme_cancel_tagset(&ctrl->ctrl);
Sagi Grimberga57bd542017-08-28 21:41:10 +0200920 if (new)
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200921 nvme_remove_io_tag_set(&ctrl->ctrl);
Sagi Grimberga57bd542017-08-28 21:41:10 +0200922out_free_io_queues:
923 nvme_rdma_free_io_queues(ctrl);
924 return ret;
Christoph Hellwig71102302016-07-06 21:55:52 +0900925}
926
Sagi Grimberg75862c72018-07-09 12:49:07 +0300927static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
928 bool remove)
929{
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100930 nvme_quiesce_admin_queue(&ctrl->ctrl);
Chao Leng30170132020-10-22 10:15:08 +0800931 blk_sync_queue(ctrl->ctrl.admin_q);
Sagi Grimberg75862c72018-07-09 12:49:07 +0300932 nvme_rdma_stop_queue(&ctrl->queues[0]);
Chao Lengc4189d62021-01-21 11:32:39 +0800933 nvme_cancel_admin_tagset(&ctrl->ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200934 if (remove) {
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100935 nvme_unquiesce_admin_queue(&ctrl->ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200936 nvme_remove_admin_tag_set(&ctrl->ctrl);
937 }
938 nvme_rdma_destroy_admin_queue(ctrl);
Sagi Grimberg75862c72018-07-09 12:49:07 +0300939}
940
941static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
942 bool remove)
943{
944 if (ctrl->ctrl.queue_count > 1) {
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100945 nvme_quiesce_io_queues(&ctrl->ctrl);
Chao Leng30170132020-10-22 10:15:08 +0800946 nvme_sync_io_queues(&ctrl->ctrl);
Sagi Grimberg75862c72018-07-09 12:49:07 +0300947 nvme_rdma_stop_io_queues(ctrl);
Chao Lengc4189d62021-01-21 11:32:39 +0800948 nvme_cancel_tagset(&ctrl->ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200949 if (remove) {
Christoph Hellwig9f27bd72022-11-15 11:22:14 +0100950 nvme_unquiesce_io_queues(&ctrl->ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +0200951 nvme_remove_io_tag_set(&ctrl->ctrl);
952 }
953 nvme_rdma_free_io_queues(ctrl);
Sagi Grimberg75862c72018-07-09 12:49:07 +0300954 }
955}
956
Ruozhu Lif7f70f42022-06-23 14:45:39 +0800957static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
958{
959 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
960
Sagi Grimberga1ae8d42022-09-28 09:23:26 +0300961 flush_work(&ctrl->err_work);
Ruozhu Lif7f70f42022-06-23 14:45:39 +0800962 cancel_delayed_work_sync(&ctrl->reconnect_work);
963}
964
Christoph Hellwig71102302016-07-06 21:55:52 +0900965static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
966{
967 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
968
969 if (list_empty(&ctrl->list))
970 goto free_ctrl;
971
972 mutex_lock(&nvme_rdma_ctrl_mutex);
973 list_del(&ctrl->list);
974 mutex_unlock(&nvme_rdma_ctrl_mutex);
975
Christoph Hellwig71102302016-07-06 21:55:52 +0900976 nvmf_free_options(nctrl->opts);
977free_ctrl:
Sagi Grimberg3d064102018-06-19 15:34:09 +0300978 kfree(ctrl->queues);
Christoph Hellwig71102302016-07-06 21:55:52 +0900979 kfree(ctrl);
980}
981
Sagi Grimbergfd8563c2017-03-18 20:58:29 +0200982static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
983{
984 /* If we are resetting/deleting then do nothing */
Max Gurtovoyad6a0a52018-01-31 18:31:24 +0200985 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
Sagi Grimbergfd8563c2017-03-18 20:58:29 +0200986 WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
987 ctrl->ctrl.state == NVME_CTRL_LIVE);
988 return;
989 }
990
991 if (nvmf_should_reconnect(&ctrl->ctrl)) {
992 dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
993 ctrl->ctrl.opts->reconnect_delay);
Sagi Grimberg9a6327d2017-06-07 20:31:55 +0200994 queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
Sagi Grimbergfd8563c2017-03-18 20:58:29 +0200995 ctrl->ctrl.opts->reconnect_delay * HZ);
996 } else {
Sagi Grimberg12fa1302017-10-29 14:21:01 +0200997 nvme_delete_ctrl(&ctrl->ctrl);
Sagi Grimbergfd8563c2017-03-18 20:58:29 +0200998 }
999}
1000
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001001static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
Christoph Hellwig71102302016-07-06 21:55:52 +09001002{
Colin Ian King13ce7e6252021-05-13 12:59:52 +01001003 int ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09001004 bool changed;
Christoph Hellwig71102302016-07-06 21:55:52 +09001005
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001006 ret = nvme_rdma_configure_admin_queue(ctrl, new);
Christoph Hellwig71102302016-07-06 21:55:52 +09001007 if (ret)
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001008 return ret;
1009
1010 if (ctrl->ctrl.icdoff) {
Max Gurtovoy09748122021-10-17 11:58:16 +03001011 ret = -EOPNOTSUPP;
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001012 dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
1013 goto destroy_admin;
1014 }
1015
1016 if (!(ctrl->ctrl.sgls & (1 << 2))) {
Max Gurtovoy09748122021-10-17 11:58:16 +03001017 ret = -EOPNOTSUPP;
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001018 dev_err(ctrl->ctrl.device,
1019 "Mandatory keyed sgls are not supported!\n");
1020 goto destroy_admin;
1021 }
1022
1023 if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
1024 dev_warn(ctrl->ctrl.device,
1025 "queue_size %zu > ctrl sqsize %u, clamping down\n",
1026 ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
1027 }
1028
Max Gurtovoy44c3c622021-09-23 00:55:35 +03001029 if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) {
1030 dev_warn(ctrl->ctrl.device,
1031 "ctrl sqsize %u > max queue size %u, clamping down\n",
1032 ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE);
1033 ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1;
1034 }
1035
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001036 if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
1037 dev_warn(ctrl->ctrl.device,
1038 "sqsize %u > ctrl maxcmd %u, clamping down\n",
1039 ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
1040 ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
1041 }
Christoph Hellwig71102302016-07-06 21:55:52 +09001042
Steve Wise64a741c2018-06-20 07:15:05 -07001043 if (ctrl->ctrl.sgls & (1 << 20))
1044 ctrl->use_inline_data = true;
Christoph Hellwig71102302016-07-06 21:55:52 +09001045
Sagi Grimbergd858e5f2017-04-24 10:58:29 +03001046 if (ctrl->ctrl.queue_count > 1) {
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001047 ret = nvme_rdma_configure_io_queues(ctrl, new);
Christoph Hellwig71102302016-07-06 21:55:52 +09001048 if (ret)
Sagi Grimberg5e1fe612017-10-11 15:29:11 +03001049 goto destroy_admin;
Christoph Hellwig71102302016-07-06 21:55:52 +09001050 }
1051
1052 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
Sagi Grimberg0a960afd2017-09-21 17:01:37 +03001053 if (!changed) {
Israel Rukshin96135862020-03-24 17:29:44 +02001054 /*
Sagi Grimbergecca390e2020-07-22 16:32:19 -07001055 * state change failure is ok if we started ctrl delete,
Israel Rukshin96135862020-03-24 17:29:44 +02001056 * unless we're during creation of a new controller to
1057 * avoid races with teardown flow.
1058 */
Sagi Grimbergecca390e2020-07-22 16:32:19 -07001059 WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
1060 ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
Israel Rukshin96135862020-03-24 17:29:44 +02001061 WARN_ON_ONCE(new);
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001062 ret = -EINVAL;
1063 goto destroy_io;
Sagi Grimberg0a960afd2017-09-21 17:01:37 +03001064 }
1065
Sagi Grimbergd09f2b42017-07-02 10:56:43 +03001066 nvme_start_ctrl(&ctrl->ctrl);
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001067 return 0;
1068
1069destroy_io:
Chao Leng958dc1d2021-01-21 11:32:37 +08001070 if (ctrl->ctrl.queue_count > 1) {
Christoph Hellwig9f27bd72022-11-15 11:22:14 +01001071 nvme_quiesce_io_queues(&ctrl->ctrl);
Chao Leng958dc1d2021-01-21 11:32:37 +08001072 nvme_sync_io_queues(&ctrl->ctrl);
1073 nvme_rdma_stop_io_queues(ctrl);
1074 nvme_cancel_tagset(&ctrl->ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +02001075 if (new)
1076 nvme_remove_io_tag_set(&ctrl->ctrl);
1077 nvme_rdma_free_io_queues(ctrl);
Chao Leng958dc1d2021-01-21 11:32:37 +08001078 }
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001079destroy_admin:
Christoph Hellwig9f27bd72022-11-15 11:22:14 +01001080 nvme_quiesce_admin_queue(&ctrl->ctrl);
Chao Leng958dc1d2021-01-21 11:32:37 +08001081 blk_sync_queue(ctrl->ctrl.admin_q);
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001082 nvme_rdma_stop_queue(&ctrl->queues[0]);
Chao Leng958dc1d2021-01-21 11:32:37 +08001083 nvme_cancel_admin_tagset(&ctrl->ctrl);
Christoph Hellwigcefa1032022-09-20 17:14:53 +02001084 if (new)
1085 nvme_remove_admin_tag_set(&ctrl->ctrl);
1086 nvme_rdma_destroy_admin_queue(ctrl);
Sagi Grimbergc66e2992018-07-09 12:49:06 +03001087 return ret;
1088}
1089
1090static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
1091{
1092 struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
1093 struct nvme_rdma_ctrl, reconnect_work);
1094
1095 ++ctrl->ctrl.nr_reconnects;
1096
1097 if (nvme_rdma_setup_ctrl(ctrl, false))
1098 goto requeue;
Christoph Hellwig71102302016-07-06 21:55:52 +09001099
Sagi Grimberg5e1fe612017-10-11 15:29:11 +03001100 dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
1101 ctrl->ctrl.nr_reconnects);
1102
1103 ctrl->ctrl.nr_reconnects = 0;
Christoph Hellwig71102302016-07-06 21:55:52 +09001104
1105 return;
1106
Christoph Hellwig71102302016-07-06 21:55:52 +09001107requeue:
Sagi Grimbergfd8563c2017-03-18 20:58:29 +02001108 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
Sagi Grimbergfdf9dfa2017-05-04 13:33:15 +03001109 ctrl->ctrl.nr_reconnects);
Sagi Grimbergfd8563c2017-03-18 20:58:29 +02001110 nvme_rdma_reconnect_or_remove(ctrl);
Christoph Hellwig71102302016-07-06 21:55:52 +09001111}
1112
1113static void nvme_rdma_error_recovery_work(struct work_struct *work)
1114{
1115 struct nvme_rdma_ctrl *ctrl = container_of(work,
1116 struct nvme_rdma_ctrl, err_work);
1117
Sagi Grimberge4d753d2017-09-21 17:01:38 +03001118 nvme_stop_keep_alive(&ctrl->ctrl);
Sagi Grimbergb6bb1722022-02-01 14:54:21 +02001119 flush_work(&ctrl->ctrl.async_event_work);
Sagi Grimberg75862c72018-07-09 12:49:07 +03001120 nvme_rdma_teardown_io_queues(ctrl, false);
Christoph Hellwig9f27bd72022-11-15 11:22:14 +01001121 nvme_unquiesce_io_queues(&ctrl->ctrl);
Sagi Grimberg75862c72018-07-09 12:49:07 +03001122 nvme_rdma_teardown_admin_queue(ctrl, false);
Christoph Hellwig9f27bd72022-11-15 11:22:14 +01001123 nvme_unquiesce_admin_queue(&ctrl->ctrl);
Sagi Grimberg91c11d5f2022-11-13 13:24:24 +02001124 nvme_auth_stop(&ctrl->ctrl);
Sagi Grimberge818a5b2017-06-05 20:35:56 +03001125
Max Gurtovoyad6a0a52018-01-31 18:31:24 +02001126 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergecca390e2020-07-22 16:32:19 -07001127 /* state change failure is ok if we started ctrl delete */
1128 WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
1129 ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
Sagi Grimbergd5bf4b72017-12-21 14:54:15 +02001130 return;
1131 }
1132
Sagi Grimbergfd8563c2017-03-18 20:58:29 +02001133 nvme_rdma_reconnect_or_remove(ctrl);
Christoph Hellwig71102302016-07-06 21:55:52 +09001134}
1135
1136static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
1137{
Sagi Grimbergd5bf4b72017-12-21 14:54:15 +02001138 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
Christoph Hellwig71102302016-07-06 21:55:52 +09001139 return;
1140
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001141 dev_warn(ctrl->ctrl.device, "starting error recovery\n");
Nigel Kirkland97b25122020-02-10 16:01:45 -08001142 queue_work(nvme_reset_wq, &ctrl->err_work);
Christoph Hellwig71102302016-07-06 21:55:52 +09001143}
1144
Christoph Hellwig84465462020-06-11 08:44:51 +02001145static void nvme_rdma_end_request(struct nvme_rdma_request *req)
1146{
1147 struct request *rq = blk_mq_rq_from_pdu(req);
1148
1149 if (!refcount_dec_and_test(&req->ref))
1150 return;
Christoph Hellwig2eb81a32020-08-18 09:11:29 +02001151 if (!nvme_try_complete_req(rq, req->status, req->result))
Christoph Hellwigff029452020-06-11 08:44:52 +02001152 nvme_rdma_complete_rq(rq);
Christoph Hellwig84465462020-06-11 08:44:51 +02001153}
1154
Christoph Hellwig71102302016-07-06 21:55:52 +09001155static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
1156 const char *op)
1157{
Yamin Friedman287f3292020-07-13 11:53:29 +03001158 struct nvme_rdma_queue *queue = wc->qp->qp_context;
Christoph Hellwig71102302016-07-06 21:55:52 +09001159 struct nvme_rdma_ctrl *ctrl = queue->ctrl;
1160
1161 if (ctrl->ctrl.state == NVME_CTRL_LIVE)
1162 dev_info(ctrl->ctrl.device,
1163 "%s for CQE 0x%p failed with status %s (%d)\n",
1164 op, wc->wr_cqe,
1165 ib_wc_status_msg(wc->status), wc->status);
1166 nvme_rdma_error_recovery(ctrl);
1167}
1168
1169static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
1170{
1171 if (unlikely(wc->status != IB_WC_SUCCESS))
1172 nvme_rdma_wr_error(cq, wc, "MEMREG");
1173}
1174
1175static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
1176{
Sagi Grimberg2f122e42017-11-23 17:35:23 +02001177 struct nvme_rdma_request *req =
1178 container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
Sagi Grimberg2f122e42017-11-23 17:35:23 +02001179
Christoph Hellwig84465462020-06-11 08:44:51 +02001180 if (unlikely(wc->status != IB_WC_SUCCESS))
Christoph Hellwig71102302016-07-06 21:55:52 +09001181 nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
Christoph Hellwig84465462020-06-11 08:44:51 +02001182 else
1183 nvme_rdma_end_request(req);
Christoph Hellwig71102302016-07-06 21:55:52 +09001184}
1185
1186static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
1187 struct nvme_rdma_request *req)
1188{
Christoph Hellwig71102302016-07-06 21:55:52 +09001189 struct ib_send_wr wr = {
1190 .opcode = IB_WR_LOCAL_INV,
1191 .next = NULL,
1192 .num_sge = 0,
Sagi Grimberg2f122e42017-11-23 17:35:23 +02001193 .send_flags = IB_SEND_SIGNALED,
Christoph Hellwig71102302016-07-06 21:55:52 +09001194 .ex.invalidate_rkey = req->mr->rkey,
1195 };
1196
1197 req->reg_cqe.done = nvme_rdma_inv_rkey_done;
1198 wr.wr_cqe = &req->reg_cqe;
1199
Bart Van Assche45e3cc1a2018-07-18 09:25:23 -07001200 return ib_post_send(queue->qp, &wr, NULL);
Christoph Hellwig71102302016-07-06 21:55:52 +09001201}
1202
Max Gurtovoy4686af82022-02-09 10:54:49 +02001203static void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq)
1204{
1205 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1206
1207 if (blk_integrity_rq(rq)) {
1208 ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
1209 req->metadata_sgl->nents, rq_dma_dir(rq));
1210 sg_free_table_chained(&req->metadata_sgl->sg_table,
1211 NVME_INLINE_METADATA_SG_CNT);
1212 }
1213
1214 ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
1215 rq_dma_dir(rq));
1216 sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
1217}
1218
Christoph Hellwig71102302016-07-06 21:55:52 +09001219static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
1220 struct request *rq)
1221{
1222 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
Christoph Hellwig71102302016-07-06 21:55:52 +09001223 struct nvme_rdma_device *dev = queue->device;
1224 struct ib_device *ibdev = dev->dev;
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001225 struct list_head *pool = &queue->qp->rdma_mrs;
Christoph Hellwig71102302016-07-06 21:55:52 +09001226
Chaitanya Kulkarni34e08192019-02-20 20:13:34 -08001227 if (!blk_rq_nr_phys_segments(rq))
Christoph Hellwig71102302016-07-06 21:55:52 +09001228 return;
1229
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001230 if (req->use_sig_mr)
1231 pool = &queue->qp->sig_mrs;
1232
Israel Rukshinf41725b2017-11-26 10:40:55 +00001233 if (req->mr) {
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001234 ib_mr_pool_put(queue->qp, pool, req->mr);
Israel Rukshinf41725b2017-11-26 10:40:55 +00001235 req->mr = NULL;
1236 }
1237
Max Gurtovoy4686af82022-02-09 10:54:49 +02001238 nvme_rdma_dma_unmap_req(ibdev, rq);
Christoph Hellwig71102302016-07-06 21:55:52 +09001239}
1240
1241static int nvme_rdma_set_sg_null(struct nvme_command *c)
1242{
1243 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
1244
1245 sg->addr = 0;
1246 put_unaligned_le24(0, sg->length);
1247 put_unaligned_le32(0, sg->key);
1248 sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
1249 return 0;
1250}
1251
1252static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
Steve Wise64a741c2018-06-20 07:15:05 -07001253 struct nvme_rdma_request *req, struct nvme_command *c,
1254 int count)
Christoph Hellwig71102302016-07-06 21:55:52 +09001255{
1256 struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
Steve Wise64a741c2018-06-20 07:15:05 -07001257 struct ib_sge *sge = &req->sge[1];
Sagi Grimberg12b2aaa2021-05-27 18:16:38 -07001258 struct scatterlist *sgl;
Steve Wise64a741c2018-06-20 07:15:05 -07001259 u32 len = 0;
1260 int i;
Christoph Hellwig71102302016-07-06 21:55:52 +09001261
Sagi Grimberg12b2aaa2021-05-27 18:16:38 -07001262 for_each_sg(req->data_sgl.sg_table.sgl, sgl, count, i) {
Steve Wise64a741c2018-06-20 07:15:05 -07001263 sge->addr = sg_dma_address(sgl);
1264 sge->length = sg_dma_len(sgl);
1265 sge->lkey = queue->device->pd->local_dma_lkey;
1266 len += sge->length;
Sagi Grimberg12b2aaa2021-05-27 18:16:38 -07001267 sge++;
Steve Wise64a741c2018-06-20 07:15:05 -07001268 }
Christoph Hellwig71102302016-07-06 21:55:52 +09001269
1270 sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
Steve Wise64a741c2018-06-20 07:15:05 -07001271 sg->length = cpu_to_le32(len);
Christoph Hellwig71102302016-07-06 21:55:52 +09001272 sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
1273
Steve Wise64a741c2018-06-20 07:15:05 -07001274 req->num_sge += count;
Christoph Hellwig71102302016-07-06 21:55:52 +09001275 return 0;
1276}
1277
1278static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
1279 struct nvme_rdma_request *req, struct nvme_command *c)
1280{
1281 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
1282
Israel Rukshin324d9e72020-05-19 17:05:55 +03001283 sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
1284 put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
Christoph Hellwig11975e02016-09-05 12:56:20 +02001285 put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
Christoph Hellwig71102302016-07-06 21:55:52 +09001286 sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
1287 return 0;
1288}
1289
1290static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
1291 struct nvme_rdma_request *req, struct nvme_command *c,
1292 int count)
1293{
1294 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
1295 int nr;
1296
Israel Rukshinf41725b2017-11-26 10:40:55 +00001297 req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
1298 if (WARN_ON_ONCE(!req->mr))
1299 return -EAGAIN;
1300
Max Gurtovoyb925a2d2017-08-28 12:52:27 +03001301 /*
1302 * Align the MR to a 4K page size to match the ctrl page size and
1303 * the block virtual boundary.
1304 */
Israel Rukshin324d9e72020-05-19 17:05:55 +03001305 nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
1306 SZ_4K);
Max Gurtovoya7b7c7a2017-08-14 15:29:26 +03001307 if (unlikely(nr < count)) {
Israel Rukshinf41725b2017-11-26 10:40:55 +00001308 ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
1309 req->mr = NULL;
Christoph Hellwig71102302016-07-06 21:55:52 +09001310 if (nr < 0)
1311 return nr;
1312 return -EINVAL;
1313 }
1314
1315 ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
1316
1317 req->reg_cqe.done = nvme_rdma_memreg_done;
1318 memset(&req->reg_wr, 0, sizeof(req->reg_wr));
1319 req->reg_wr.wr.opcode = IB_WR_REG_MR;
1320 req->reg_wr.wr.wr_cqe = &req->reg_cqe;
1321 req->reg_wr.wr.num_sge = 0;
1322 req->reg_wr.mr = req->mr;
1323 req->reg_wr.key = req->mr->rkey;
1324 req->reg_wr.access = IB_ACCESS_LOCAL_WRITE |
1325 IB_ACCESS_REMOTE_READ |
1326 IB_ACCESS_REMOTE_WRITE;
1327
Christoph Hellwig71102302016-07-06 21:55:52 +09001328 sg->addr = cpu_to_le64(req->mr->iova);
1329 put_unaligned_le24(req->mr->length, sg->length);
1330 put_unaligned_le32(req->mr->rkey, sg->key);
1331 sg->type = (NVME_KEY_SGL_FMT_DATA_DESC << 4) |
1332 NVME_SGL_FMT_INVALIDATE;
1333
1334 return 0;
1335}
1336
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001337static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
1338 struct nvme_command *cmd, struct ib_sig_domain *domain,
1339 u16 control, u8 pi_type)
1340{
1341 domain->sig_type = IB_SIG_TYPE_T10_DIF;
1342 domain->sig.dif.bg_type = IB_T10DIF_CRC;
1343 domain->sig.dif.pi_interval = 1 << bi->interval_exp;
1344 domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
1345 if (control & NVME_RW_PRINFO_PRCHK_REF)
1346 domain->sig.dif.ref_remap = true;
1347
1348 domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
1349 domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
1350 domain->sig.dif.app_escape = true;
1351 if (pi_type == NVME_NS_DPS_PI_TYPE3)
1352 domain->sig.dif.ref_escape = true;
1353}
1354
1355static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
1356 struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
1357 u8 pi_type)
1358{
1359 u16 control = le16_to_cpu(cmd->rw.control);
1360
1361 memset(sig_attrs, 0, sizeof(*sig_attrs));
1362 if (control & NVME_RW_PRINFO_PRACT) {
1363 /* for WRITE_INSERT/READ_STRIP no memory domain */
1364 sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
1365 nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
1366 pi_type);
1367 /* Clear the PRACT bit since HCA will generate/verify the PI */
1368 control &= ~NVME_RW_PRINFO_PRACT;
1369 cmd->rw.control = cpu_to_le16(control);
1370 } else {
1371 /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
1372 nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
1373 pi_type);
1374 nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
1375 pi_type);
1376 }
1377}
1378
1379static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
1380{
1381 *mask = 0;
1382 if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
1383 *mask |= IB_SIG_CHECK_REFTAG;
1384 if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
1385 *mask |= IB_SIG_CHECK_GUARD;
1386}
1387
1388static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
1389{
1390 if (unlikely(wc->status != IB_WC_SUCCESS))
1391 nvme_rdma_wr_error(cq, wc, "SIG");
1392}
1393
1394static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
1395 struct nvme_rdma_request *req, struct nvme_command *c,
1396 int count, int pi_count)
1397{
1398 struct nvme_rdma_sgl *sgl = &req->data_sgl;
1399 struct ib_reg_wr *wr = &req->reg_wr;
1400 struct request *rq = blk_mq_rq_from_pdu(req);
1401 struct nvme_ns *ns = rq->q->queuedata;
1402 struct bio *bio = rq->bio;
1403 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
1404 int nr;
1405
1406 req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
1407 if (WARN_ON_ONCE(!req->mr))
1408 return -EAGAIN;
1409
1410 nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
1411 req->metadata_sgl->sg_table.sgl, pi_count, NULL,
1412 SZ_4K);
1413 if (unlikely(nr))
1414 goto mr_put;
1415
Christoph Hellwig309dca302021-01-24 11:02:34 +01001416 nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001417 req->mr->sig_attrs, ns->pi_type);
1418 nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
1419
1420 ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
1421
1422 req->reg_cqe.done = nvme_rdma_sig_done;
1423 memset(wr, 0, sizeof(*wr));
1424 wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
1425 wr->wr.wr_cqe = &req->reg_cqe;
1426 wr->wr.num_sge = 0;
1427 wr->wr.send_flags = 0;
1428 wr->mr = req->mr;
1429 wr->key = req->mr->rkey;
1430 wr->access = IB_ACCESS_LOCAL_WRITE |
1431 IB_ACCESS_REMOTE_READ |
1432 IB_ACCESS_REMOTE_WRITE;
1433
1434 sg->addr = cpu_to_le64(req->mr->iova);
1435 put_unaligned_le24(req->mr->length, sg->length);
1436 put_unaligned_le32(req->mr->rkey, sg->key);
1437 sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
1438
1439 return 0;
1440
1441mr_put:
1442 ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
1443 req->mr = NULL;
1444 if (nr < 0)
1445 return nr;
1446 return -EINVAL;
1447}
1448
Max Gurtovoy4686af82022-02-09 10:54:49 +02001449static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq,
1450 int *count, int *pi_count)
Christoph Hellwig71102302016-07-06 21:55:52 +09001451{
1452 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
Max Gurtovoy4686af82022-02-09 10:54:49 +02001453 int ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09001454
Israel Rukshin324d9e72020-05-19 17:05:55 +03001455 req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
1456 ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
1457 blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
Israel Rukshin38e18002019-11-24 18:38:30 +02001458 NVME_INLINE_SG_CNT);
Christoph Hellwig71102302016-07-06 21:55:52 +09001459 if (ret)
1460 return -ENOMEM;
1461
Israel Rukshin324d9e72020-05-19 17:05:55 +03001462 req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
1463 req->data_sgl.sg_table.sgl);
Christoph Hellwig71102302016-07-06 21:55:52 +09001464
Max Gurtovoy4686af82022-02-09 10:54:49 +02001465 *count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
1466 req->data_sgl.nents, rq_dma_dir(rq));
1467 if (unlikely(*count <= 0)) {
Max Gurtovoy94423a82018-06-10 16:58:29 +03001468 ret = -EIO;
1469 goto out_free_table;
Christoph Hellwig71102302016-07-06 21:55:52 +09001470 }
1471
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001472 if (blk_integrity_rq(rq)) {
1473 req->metadata_sgl->sg_table.sgl =
1474 (struct scatterlist *)(req->metadata_sgl + 1);
1475 ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
1476 blk_rq_count_integrity_sg(rq->q, rq->bio),
1477 req->metadata_sgl->sg_table.sgl,
1478 NVME_INLINE_METADATA_SG_CNT);
1479 if (unlikely(ret)) {
1480 ret = -ENOMEM;
1481 goto out_unmap_sg;
1482 }
1483
1484 req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
1485 rq->bio, req->metadata_sgl->sg_table.sgl);
Max Gurtovoy4686af82022-02-09 10:54:49 +02001486 *pi_count = ib_dma_map_sg(ibdev,
1487 req->metadata_sgl->sg_table.sgl,
1488 req->metadata_sgl->nents,
1489 rq_dma_dir(rq));
1490 if (unlikely(*pi_count <= 0)) {
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001491 ret = -EIO;
1492 goto out_free_pi_table;
1493 }
1494 }
1495
Max Gurtovoy4686af82022-02-09 10:54:49 +02001496 return 0;
1497
1498out_free_pi_table:
1499 sg_free_table_chained(&req->metadata_sgl->sg_table,
1500 NVME_INLINE_METADATA_SG_CNT);
1501out_unmap_sg:
1502 ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
1503 rq_dma_dir(rq));
1504out_free_table:
1505 sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
1506 return ret;
1507}
1508
1509static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
1510 struct request *rq, struct nvme_command *c)
1511{
1512 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1513 struct nvme_rdma_device *dev = queue->device;
1514 struct ib_device *ibdev = dev->dev;
1515 int pi_count = 0;
1516 int count, ret;
1517
1518 req->num_sge = 1;
1519 refcount_set(&req->ref, 2); /* send and recv completions */
1520
1521 c->common.flags |= NVME_CMD_SGL_METABUF;
1522
1523 if (!blk_rq_nr_phys_segments(rq))
1524 return nvme_rdma_set_sg_null(c);
1525
1526 ret = nvme_rdma_dma_map_req(ibdev, rq, &count, &pi_count);
1527 if (unlikely(ret))
1528 return ret;
1529
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03001530 if (req->use_sig_mr) {
1531 ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
1532 goto out;
1533 }
1534
Steve Wise64a741c2018-06-20 07:15:05 -07001535 if (count <= dev->num_inline_segments) {
Christoph Hellwigb131c612017-01-13 12:29:12 +01001536 if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
Steve Wise64a741c2018-06-20 07:15:05 -07001537 queue->ctrl->use_inline_data &&
Christoph Hellwigb131c612017-01-13 12:29:12 +01001538 blk_rq_payload_bytes(rq) <=
Max Gurtovoy94423a82018-06-10 16:58:29 +03001539 nvme_rdma_inline_data_size(queue)) {
Steve Wise64a741c2018-06-20 07:15:05 -07001540 ret = nvme_rdma_map_sg_inline(queue, req, c, count);
Max Gurtovoy94423a82018-06-10 16:58:29 +03001541 goto out;
1542 }
Christoph Hellwig71102302016-07-06 21:55:52 +09001543
Steve Wise64a741c2018-06-20 07:15:05 -07001544 if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
Max Gurtovoy94423a82018-06-10 16:58:29 +03001545 ret = nvme_rdma_map_sg_single(queue, req, c);
1546 goto out;
1547 }
Christoph Hellwig71102302016-07-06 21:55:52 +09001548 }
1549
Max Gurtovoy94423a82018-06-10 16:58:29 +03001550 ret = nvme_rdma_map_sg_fr(queue, req, c, count);
1551out:
1552 if (unlikely(ret))
Max Gurtovoy4686af82022-02-09 10:54:49 +02001553 goto out_dma_unmap_req;
Max Gurtovoy94423a82018-06-10 16:58:29 +03001554
1555 return 0;
1556
Max Gurtovoy4686af82022-02-09 10:54:49 +02001557out_dma_unmap_req:
1558 nvme_rdma_dma_unmap_req(ibdev, rq);
Max Gurtovoy94423a82018-06-10 16:58:29 +03001559 return ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09001560}
1561
1562static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
1563{
Sagi Grimberg4af7f7f2017-11-23 17:35:22 +02001564 struct nvme_rdma_qe *qe =
1565 container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
1566 struct nvme_rdma_request *req =
1567 container_of(qe, struct nvme_rdma_request, sqe);
Sagi Grimberg4af7f7f2017-11-23 17:35:22 +02001568
Christoph Hellwig84465462020-06-11 08:44:51 +02001569 if (unlikely(wc->status != IB_WC_SUCCESS))
Christoph Hellwig71102302016-07-06 21:55:52 +09001570 nvme_rdma_wr_error(cq, wc, "SEND");
Christoph Hellwig84465462020-06-11 08:44:51 +02001571 else
1572 nvme_rdma_end_request(req);
Christoph Hellwig71102302016-07-06 21:55:52 +09001573}
1574
1575static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1576 struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
Sagi Grimbergb4b591c2017-11-23 17:35:21 +02001577 struct ib_send_wr *first)
Christoph Hellwig71102302016-07-06 21:55:52 +09001578{
Bart Van Assche45e3cc1a2018-07-18 09:25:23 -07001579 struct ib_send_wr wr;
Christoph Hellwig71102302016-07-06 21:55:52 +09001580 int ret;
1581
1582 sge->addr = qe->dma;
Israel Rukshina62315b2020-03-31 15:46:33 +03001583 sge->length = sizeof(struct nvme_command);
Christoph Hellwig71102302016-07-06 21:55:52 +09001584 sge->lkey = queue->device->pd->local_dma_lkey;
1585
Christoph Hellwig71102302016-07-06 21:55:52 +09001586 wr.next = NULL;
1587 wr.wr_cqe = &qe->cqe;
1588 wr.sg_list = sge;
1589 wr.num_sge = num_sge;
1590 wr.opcode = IB_WR_SEND;
Sagi Grimbergb4b591c2017-11-23 17:35:21 +02001591 wr.send_flags = IB_SEND_SIGNALED;
Christoph Hellwig71102302016-07-06 21:55:52 +09001592
1593 if (first)
1594 first->next = &wr;
1595 else
1596 first = &wr;
1597
Bart Van Assche45e3cc1a2018-07-18 09:25:23 -07001598 ret = ib_post_send(queue->qp, first, NULL);
Max Gurtovoya7b7c7a2017-08-14 15:29:26 +03001599 if (unlikely(ret)) {
Christoph Hellwig71102302016-07-06 21:55:52 +09001600 dev_err(queue->ctrl->ctrl.device,
1601 "%s failed with error code %d\n", __func__, ret);
1602 }
1603 return ret;
1604}
1605
1606static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
1607 struct nvme_rdma_qe *qe)
1608{
Bart Van Assche45e3cc1a2018-07-18 09:25:23 -07001609 struct ib_recv_wr wr;
Christoph Hellwig71102302016-07-06 21:55:52 +09001610 struct ib_sge list;
1611 int ret;
1612
1613 list.addr = qe->dma;
1614 list.length = sizeof(struct nvme_completion);
1615 list.lkey = queue->device->pd->local_dma_lkey;
1616
1617 qe->cqe.done = nvme_rdma_recv_done;
1618
1619 wr.next = NULL;
1620 wr.wr_cqe = &qe->cqe;
1621 wr.sg_list = &list;
1622 wr.num_sge = 1;
1623
Bart Van Assche45e3cc1a2018-07-18 09:25:23 -07001624 ret = ib_post_recv(queue->qp, &wr, NULL);
Max Gurtovoya7b7c7a2017-08-14 15:29:26 +03001625 if (unlikely(ret)) {
Christoph Hellwig71102302016-07-06 21:55:52 +09001626 dev_err(queue->ctrl->ctrl.device,
1627 "%s failed with error code %d\n", __func__, ret);
1628 }
1629 return ret;
1630}
1631
1632static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
1633{
1634 u32 queue_idx = nvme_rdma_queue_idx(queue);
1635
1636 if (queue_idx == 0)
1637 return queue->ctrl->admin_tag_set.tags[queue_idx];
1638 return queue->ctrl->tag_set.tags[queue_idx - 1];
1639}
1640
Sagi Grimbergb4b591c2017-11-23 17:35:21 +02001641static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
1642{
1643 if (unlikely(wc->status != IB_WC_SUCCESS))
1644 nvme_rdma_wr_error(cq, wc, "ASYNC");
1645}
1646
Keith Buschad22c352017-11-07 15:13:12 -07001647static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
Christoph Hellwig71102302016-07-06 21:55:52 +09001648{
1649 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
1650 struct nvme_rdma_queue *queue = &ctrl->queues[0];
1651 struct ib_device *dev = queue->device->dev;
1652 struct nvme_rdma_qe *sqe = &ctrl->async_event_sqe;
1653 struct nvme_command *cmd = sqe->data;
1654 struct ib_sge sge;
1655 int ret;
1656
Christoph Hellwig71102302016-07-06 21:55:52 +09001657 ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE);
1658
1659 memset(cmd, 0, sizeof(*cmd));
1660 cmd->common.opcode = nvme_admin_async_event;
Keith Busch38dabe22017-11-07 15:13:10 -07001661 cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
Christoph Hellwig71102302016-07-06 21:55:52 +09001662 cmd->common.flags |= NVME_CMD_SGL_METABUF;
1663 nvme_rdma_set_sg_null(cmd);
1664
Sagi Grimbergb4b591c2017-11-23 17:35:21 +02001665 sqe->cqe.done = nvme_rdma_async_done;
1666
Christoph Hellwig71102302016-07-06 21:55:52 +09001667 ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
1668 DMA_TO_DEVICE);
1669
Sagi Grimbergb4b591c2017-11-23 17:35:21 +02001670 ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
Christoph Hellwig71102302016-07-06 21:55:52 +09001671 WARN_ON_ONCE(ret);
1672}
1673
Jens Axboe1052b8a2018-11-26 08:21:49 -07001674static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
1675 struct nvme_completion *cqe, struct ib_wc *wc)
Christoph Hellwig71102302016-07-06 21:55:52 +09001676{
Christoph Hellwig71102302016-07-06 21:55:52 +09001677 struct request *rq;
1678 struct nvme_rdma_request *req;
Christoph Hellwig71102302016-07-06 21:55:52 +09001679
Sagi Grimberge7006de2021-06-16 14:19:36 -07001680 rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id);
Christoph Hellwig71102302016-07-06 21:55:52 +09001681 if (!rq) {
1682 dev_err(queue->ctrl->ctrl.device,
Sagi Grimberge7006de2021-06-16 14:19:36 -07001683 "got bad command_id %#x on QP %#x\n",
Christoph Hellwig71102302016-07-06 21:55:52 +09001684 cqe->command_id, queue->qp->qp_num);
1685 nvme_rdma_error_recovery(queue->ctrl);
Jens Axboe1052b8a2018-11-26 08:21:49 -07001686 return;
Christoph Hellwig71102302016-07-06 21:55:52 +09001687 }
1688 req = blk_mq_rq_to_pdu(rq);
1689
Sagi Grimberg4af7f7f2017-11-23 17:35:22 +02001690 req->status = cqe->status;
1691 req->result = cqe->result;
Christoph Hellwig71102302016-07-06 21:55:52 +09001692
Sagi Grimberg3ef02792017-11-23 17:35:24 +02001693 if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
Chao Lenga87da502020-10-12 16:55:37 +08001694 if (unlikely(!req->mr ||
1695 wc->ex.invalidate_rkey != req->mr->rkey)) {
Sagi Grimberg3ef02792017-11-23 17:35:24 +02001696 dev_err(queue->ctrl->ctrl.device,
1697 "Bogus remote invalidation for rkey %#x\n",
Chao Lenga87da502020-10-12 16:55:37 +08001698 req->mr ? req->mr->rkey : 0);
Sagi Grimberg3ef02792017-11-23 17:35:24 +02001699 nvme_rdma_error_recovery(queue->ctrl);
1700 }
Israel Rukshinf41725b2017-11-26 10:40:55 +00001701 } else if (req->mr) {
Jens Axboe1052b8a2018-11-26 08:21:49 -07001702 int ret;
1703
Sagi Grimberg2f122e42017-11-23 17:35:23 +02001704 ret = nvme_rdma_inv_rkey(queue, req);
1705 if (unlikely(ret < 0)) {
1706 dev_err(queue->ctrl->ctrl.device,
1707 "Queueing INV WR for rkey %#x failed (%d)\n",
1708 req->mr->rkey, ret);
1709 nvme_rdma_error_recovery(queue->ctrl);
1710 }
1711 /* the local invalidation completion will end the request */
Christoph Hellwig7a804c32020-06-23 18:22:39 +02001712 return;
Sagi Grimberg2f122e42017-11-23 17:35:23 +02001713 }
Christoph Hellwig7a804c32020-06-23 18:22:39 +02001714
1715 nvme_rdma_end_request(req);
Christoph Hellwig71102302016-07-06 21:55:52 +09001716}
1717
Jens Axboe1052b8a2018-11-26 08:21:49 -07001718static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
Christoph Hellwig71102302016-07-06 21:55:52 +09001719{
1720 struct nvme_rdma_qe *qe =
1721 container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
Yamin Friedman287f3292020-07-13 11:53:29 +03001722 struct nvme_rdma_queue *queue = wc->qp->qp_context;
Christoph Hellwig71102302016-07-06 21:55:52 +09001723 struct ib_device *ibdev = queue->device->dev;
1724 struct nvme_completion *cqe = qe->data;
1725 const size_t len = sizeof(struct nvme_completion);
Christoph Hellwig71102302016-07-06 21:55:52 +09001726
1727 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1728 nvme_rdma_wr_error(cq, wc, "RECV");
Jens Axboe1052b8a2018-11-26 08:21:49 -07001729 return;
Christoph Hellwig71102302016-07-06 21:55:52 +09001730 }
1731
zhenwei pi25c1ca62020-10-25 19:51:24 +08001732 /* sanity checking for received data length */
1733 if (unlikely(wc->byte_len < len)) {
1734 dev_err(queue->ctrl->ctrl.device,
1735 "Unexpected nvme completion length(%d)\n", wc->byte_len);
1736 nvme_rdma_error_recovery(queue->ctrl);
1737 return;
1738 }
1739
Christoph Hellwig71102302016-07-06 21:55:52 +09001740 ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
1741 /*
1742 * AEN requests are special as they don't time out and can
1743 * survive any kind of queue freeze and often don't respond to
1744 * aborts. We don't even bother to allocate a struct request
1745 * for them but rather special case them here.
1746 */
Israel Rukshin58a8df62019-10-13 19:57:31 +03001747 if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue),
1748 cqe->command_id)))
Christoph Hellwig7bf58532016-11-10 07:32:34 -08001749 nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
1750 &cqe->result);
Christoph Hellwig71102302016-07-06 21:55:52 +09001751 else
Jens Axboe1052b8a2018-11-26 08:21:49 -07001752 nvme_rdma_process_nvme_rsp(queue, cqe, wc);
Christoph Hellwig71102302016-07-06 21:55:52 +09001753 ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
1754
1755 nvme_rdma_post_recv(queue, qe);
Christoph Hellwig71102302016-07-06 21:55:52 +09001756}
1757
1758static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
1759{
1760 int ret, i;
1761
1762 for (i = 0; i < queue->queue_size; i++) {
1763 ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]);
1764 if (ret)
Ruozhu Li9817d762021-09-06 11:51:34 +08001765 return ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09001766 }
1767
1768 return 0;
Christoph Hellwig71102302016-07-06 21:55:52 +09001769}
1770
1771static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
1772 struct rdma_cm_event *ev)
1773{
Steve Wise7f039532016-10-26 12:36:47 -07001774 struct rdma_cm_id *cm_id = queue->cm_id;
1775 int status = ev->status;
1776 const char *rej_msg;
1777 const struct nvme_rdma_cm_rej *rej_data;
1778 u8 rej_data_len;
1779
1780 rej_msg = rdma_reject_msg(cm_id, status);
1781 rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len);
1782
1783 if (rej_data && rej_data_len >= sizeof(u16)) {
1784 u16 sts = le16_to_cpu(rej_data->sts);
Christoph Hellwig71102302016-07-06 21:55:52 +09001785
1786 dev_err(queue->ctrl->ctrl.device,
Steve Wise7f039532016-10-26 12:36:47 -07001787 "Connect rejected: status %d (%s) nvme status %d (%s).\n",
1788 status, rej_msg, sts, nvme_rdma_cm_msg(sts));
Christoph Hellwig71102302016-07-06 21:55:52 +09001789 } else {
1790 dev_err(queue->ctrl->ctrl.device,
Steve Wise7f039532016-10-26 12:36:47 -07001791 "Connect rejected: status %d (%s).\n", status, rej_msg);
Christoph Hellwig71102302016-07-06 21:55:52 +09001792 }
1793
1794 return -ECONNRESET;
1795}
1796
1797static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
1798{
Israel Rukshine63440d2019-08-18 12:08:52 +03001799 struct nvme_ctrl *ctrl = &queue->ctrl->ctrl;
Christoph Hellwig71102302016-07-06 21:55:52 +09001800 int ret;
1801
Sagi Grimbergca6e95b2017-05-04 13:33:09 +03001802 ret = nvme_rdma_create_queue_ib(queue);
1803 if (ret)
1804 return ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09001805
Israel Rukshine63440d2019-08-18 12:08:52 +03001806 if (ctrl->opts->tos >= 0)
1807 rdma_set_service_type(queue->cm_id, ctrl->opts->tos);
Israel Rukshin0525af72022-05-15 18:04:40 +03001808 ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CM_TIMEOUT_MS);
Christoph Hellwig71102302016-07-06 21:55:52 +09001809 if (ret) {
Israel Rukshine63440d2019-08-18 12:08:52 +03001810 dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n",
Christoph Hellwig71102302016-07-06 21:55:52 +09001811 queue->cm_error);
1812 goto out_destroy_queue;
1813 }
1814
1815 return 0;
1816
1817out_destroy_queue:
1818 nvme_rdma_destroy_queue_ib(queue);
Christoph Hellwig71102302016-07-06 21:55:52 +09001819 return ret;
1820}
1821
1822static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
1823{
1824 struct nvme_rdma_ctrl *ctrl = queue->ctrl;
1825 struct rdma_conn_param param = { };
Roland Dreier0b857b42016-07-31 00:27:39 -07001826 struct nvme_rdma_cm_req priv = { };
Christoph Hellwig71102302016-07-06 21:55:52 +09001827 int ret;
1828
1829 param.qp_num = queue->qp->qp_num;
1830 param.flow_control = 1;
1831
1832 param.responder_resources = queue->device->dev->attrs.max_qp_rd_atom;
Sagi Grimberg2ac17c22016-06-22 15:06:00 +03001833 /* maximum retry count */
1834 param.retry_count = 7;
Christoph Hellwig71102302016-07-06 21:55:52 +09001835 param.rnr_retry_count = 7;
1836 param.private_data = &priv;
1837 param.private_data_len = sizeof(priv);
1838
1839 priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
1840 priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue));
Jay Freyenseef994d9d2016-08-17 15:00:26 -07001841 /*
1842 * set the admin queue depth to the minimum size
1843 * specified by the Fabrics standard.
1844 */
1845 if (priv.qid == 0) {
Sagi Grimberg7aa1f422017-06-18 16:15:59 +03001846 priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
1847 priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
Jay Freyenseef994d9d2016-08-17 15:00:26 -07001848 } else {
Jay Freyenseec5af8652016-08-17 15:00:27 -07001849 /*
1850 * current interpretation of the fabrics spec
1851 * is at minimum you make hrqsize sqsize+1, or a
1852 * 1's based representation of sqsize.
1853 */
Jay Freyenseef994d9d2016-08-17 15:00:26 -07001854 priv.hrqsize = cpu_to_le16(queue->queue_size);
Jay Freyenseec5af8652016-08-17 15:00:27 -07001855 priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
Jay Freyenseef994d9d2016-08-17 15:00:26 -07001856 }
Christoph Hellwig71102302016-07-06 21:55:52 +09001857
Jason Gunthorpe071ba4c2020-10-26 11:25:49 -03001858 ret = rdma_connect_locked(queue->cm_id, &param);
Christoph Hellwig71102302016-07-06 21:55:52 +09001859 if (ret) {
1860 dev_err(ctrl->ctrl.device,
Jason Gunthorpe071ba4c2020-10-26 11:25:49 -03001861 "rdma_connect_locked failed (%d).\n", ret);
Ruozhu Li9817d762021-09-06 11:51:34 +08001862 return ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09001863 }
1864
1865 return 0;
Christoph Hellwig71102302016-07-06 21:55:52 +09001866}
1867
Christoph Hellwig71102302016-07-06 21:55:52 +09001868static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
1869 struct rdma_cm_event *ev)
1870{
1871 struct nvme_rdma_queue *queue = cm_id->context;
1872 int cm_error = 0;
1873
1874 dev_dbg(queue->ctrl->ctrl.device, "%s (%d): status %d id %p\n",
1875 rdma_event_msg(ev->event), ev->event,
1876 ev->status, cm_id);
1877
1878 switch (ev->event) {
1879 case RDMA_CM_EVENT_ADDR_RESOLVED:
1880 cm_error = nvme_rdma_addr_resolved(queue);
1881 break;
1882 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1883 cm_error = nvme_rdma_route_resolved(queue);
1884 break;
1885 case RDMA_CM_EVENT_ESTABLISHED:
1886 queue->cm_error = nvme_rdma_conn_established(queue);
1887 /* complete cm_done regardless of success/failure */
1888 complete(&queue->cm_done);
1889 return 0;
1890 case RDMA_CM_EVENT_REJECTED:
1891 cm_error = nvme_rdma_conn_rejected(queue, ev);
1892 break;
Christoph Hellwig71102302016-07-06 21:55:52 +09001893 case RDMA_CM_EVENT_ROUTE_ERROR:
1894 case RDMA_CM_EVENT_CONNECT_ERROR:
1895 case RDMA_CM_EVENT_UNREACHABLE:
Sagi Grimbergabf87d52017-05-04 13:33:10 +03001896 case RDMA_CM_EVENT_ADDR_ERROR:
Christoph Hellwig71102302016-07-06 21:55:52 +09001897 dev_dbg(queue->ctrl->ctrl.device,
1898 "CM error event %d\n", ev->event);
1899 cm_error = -ECONNRESET;
1900 break;
1901 case RDMA_CM_EVENT_DISCONNECTED:
1902 case RDMA_CM_EVENT_ADDR_CHANGE:
1903 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1904 dev_dbg(queue->ctrl->ctrl.device,
1905 "disconnect received - connection closed\n");
1906 nvme_rdma_error_recovery(queue->ctrl);
1907 break;
1908 case RDMA_CM_EVENT_DEVICE_REMOVAL:
Steve Wisee87a9112016-09-02 09:01:54 -07001909 /* device removal is handled via the ib_client API */
1910 break;
Christoph Hellwig71102302016-07-06 21:55:52 +09001911 default:
1912 dev_err(queue->ctrl->ctrl.device,
1913 "Unexpected RDMA CM event (%d)\n", ev->event);
1914 nvme_rdma_error_recovery(queue->ctrl);
1915 break;
1916 }
1917
1918 if (cm_error) {
1919 queue->cm_error = cm_error;
1920 complete(&queue->cm_done);
1921 }
1922
1923 return 0;
1924}
1925
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001926static void nvme_rdma_complete_timed_out(struct request *rq)
1927{
1928 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1929 struct nvme_rdma_queue *queue = req->queue;
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001930
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001931 nvme_rdma_stop_queue(queue);
Chaitanya Kulkarni93ba75c2022-03-30 02:40:32 -07001932 nvmf_complete_timed_out_request(rq);
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001933}
1934
John Garry9bdb4832022-07-06 20:03:51 +08001935static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
Christoph Hellwig71102302016-07-06 21:55:52 +09001936{
1937 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
Sagi Grimberg4c174e62019-01-08 00:53:22 -08001938 struct nvme_rdma_queue *queue = req->queue;
1939 struct nvme_rdma_ctrl *ctrl = queue->ctrl;
Christoph Hellwig71102302016-07-06 21:55:52 +09001940
Sagi Grimberg4c174e62019-01-08 00:53:22 -08001941 dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
1942 rq->tag, nvme_rdma_queue_idx(queue));
Nitzan Carmie62a5382017-10-22 09:37:04 +00001943
Sagi Grimberg4c174e62019-01-08 00:53:22 -08001944 if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
1945 /*
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001946 * If we are resetting, connecting or deleting we should
1947 * complete immediately because we may block controller
1948 * teardown or setup sequence
1949 * - ctrl disable/shutdown fabrics requests
1950 * - connect requests
1951 * - initialization admin requests
1952 * - I/O requests that entered after unquiescing and
1953 * the controller stopped responding
1954 *
1955 * All other requests should be cancelled by the error
1956 * recovery work, so it's fine that we fail it here.
Sagi Grimberg4c174e62019-01-08 00:53:22 -08001957 */
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001958 nvme_rdma_complete_timed_out(rq);
Sagi Grimberg4c174e62019-01-08 00:53:22 -08001959 return BLK_EH_DONE;
1960 }
Christoph Hellwig71102302016-07-06 21:55:52 +09001961
Sagi Grimberg0475a8d2020-07-29 02:36:03 -07001962 /*
1963 * LIVE state should trigger the normal error recovery which will
1964 * handle completing this request.
1965 */
Sagi Grimberg4c174e62019-01-08 00:53:22 -08001966 nvme_rdma_error_recovery(ctrl);
Sagi Grimberg4c174e62019-01-08 00:53:22 -08001967 return BLK_EH_RESET_TIMER;
Christoph Hellwig71102302016-07-06 21:55:52 +09001968}
1969
Christoph Hellwigfc17b652017-06-03 09:38:05 +02001970static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
Christoph Hellwig71102302016-07-06 21:55:52 +09001971 const struct blk_mq_queue_data *bd)
1972{
1973 struct nvme_ns *ns = hctx->queue->queuedata;
1974 struct nvme_rdma_queue *queue = hctx->driver_data;
1975 struct request *rq = bd->rq;
1976 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1977 struct nvme_rdma_qe *sqe = &req->sqe;
Keith Buschf4b9e6c2021-03-17 13:37:03 -07001978 struct nvme_command *c = nvme_req(rq)->cmd;
Christoph Hellwig71102302016-07-06 21:55:52 +09001979 struct ib_device *dev;
Christoph Hellwig3bc32bb2018-06-11 17:34:06 +02001980 bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
Christoph Hellwigfc17b652017-06-03 09:38:05 +02001981 blk_status_t ret;
1982 int err;
Christoph Hellwig71102302016-07-06 21:55:52 +09001983
1984 WARN_ON_ONCE(rq->tag < 0);
1985
Tao Chiua9715742021-04-26 10:53:10 +08001986 if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
1987 return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
Christoph Hellwig553cd9e2016-11-02 08:49:18 -06001988
Christoph Hellwig71102302016-07-06 21:55:52 +09001989 dev = queue->device->dev;
Max Gurtovoy62f99b62019-06-06 12:27:36 +03001990
1991 req->sqe.dma = ib_dma_map_single(dev, req->sqe.data,
1992 sizeof(struct nvme_command),
1993 DMA_TO_DEVICE);
1994 err = ib_dma_mapping_error(dev, req->sqe.dma);
1995 if (unlikely(err))
1996 return BLK_STS_RESOURCE;
1997
Christoph Hellwig71102302016-07-06 21:55:52 +09001998 ib_dma_sync_single_for_cpu(dev, sqe->dma,
1999 sizeof(struct nvme_command), DMA_TO_DEVICE);
2000
Keith Buschf4b9e6c2021-03-17 13:37:03 -07002001 ret = nvme_setup_cmd(ns, rq);
Christoph Hellwigfc17b652017-06-03 09:38:05 +02002002 if (ret)
Max Gurtovoy62f99b62019-06-06 12:27:36 +03002003 goto unmap_qe;
Christoph Hellwig71102302016-07-06 21:55:52 +09002004
Sagi Grimberg6887fc62022-10-03 12:43:43 +03002005 nvme_start_request(rq);
Christoph Hellwig71102302016-07-06 21:55:52 +09002006
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03002007 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
2008 queue->pi_support &&
2009 (c->common.opcode == nvme_cmd_write ||
2010 c->common.opcode == nvme_cmd_read) &&
2011 nvme_ns_has_pi(ns))
2012 req->use_sig_mr = true;
2013 else
2014 req->use_sig_mr = false;
2015
Christoph Hellwigfc17b652017-06-03 09:38:05 +02002016 err = nvme_rdma_map_data(queue, rq, c);
Max Gurtovoya7b7c7a2017-08-14 15:29:26 +03002017 if (unlikely(err < 0)) {
Christoph Hellwig71102302016-07-06 21:55:52 +09002018 dev_err(queue->ctrl->ctrl.device,
Christoph Hellwigfc17b652017-06-03 09:38:05 +02002019 "Failed to map data (%d)\n", err);
Christoph Hellwig71102302016-07-06 21:55:52 +09002020 goto err;
2021 }
2022
Sagi Grimbergb4b591c2017-11-23 17:35:21 +02002023 sqe->cqe.done = nvme_rdma_send_done;
2024
Christoph Hellwig71102302016-07-06 21:55:52 +09002025 ib_dma_sync_single_for_device(dev, sqe->dma,
2026 sizeof(struct nvme_command), DMA_TO_DEVICE);
2027
Christoph Hellwigfc17b652017-06-03 09:38:05 +02002028 err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
Israel Rukshinf41725b2017-11-26 10:40:55 +00002029 req->mr ? &req->reg_wr.wr : NULL);
Max Gurtovoy16686f32019-10-13 19:57:36 +03002030 if (unlikely(err))
2031 goto err_unmap;
Christoph Hellwig71102302016-07-06 21:55:52 +09002032
Christoph Hellwigfc17b652017-06-03 09:38:05 +02002033 return BLK_STS_OK;
Max Gurtovoy62f99b62019-06-06 12:27:36 +03002034
Max Gurtovoy16686f32019-10-13 19:57:36 +03002035err_unmap:
2036 nvme_rdma_unmap_data(queue, rq);
Christoph Hellwig71102302016-07-06 21:55:52 +09002037err:
Chao Leng62eca392021-02-01 11:49:40 +08002038 if (err == -EIO)
2039 ret = nvme_host_path_error(rq);
2040 else if (err == -ENOMEM || err == -EAGAIN)
Max Gurtovoy62f99b62019-06-06 12:27:36 +03002041 ret = BLK_STS_RESOURCE;
2042 else
2043 ret = BLK_STS_IOERR;
Max Gurtovoy16686f32019-10-13 19:57:36 +03002044 nvme_cleanup_cmd(rq);
Max Gurtovoy62f99b62019-06-06 12:27:36 +03002045unmap_qe:
2046 ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
2047 DMA_TO_DEVICE);
2048 return ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09002049}
2050
Jens Axboe5a72e892021-10-12 09:24:29 -06002051static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
Sagi Grimbergff8519f2018-12-14 11:06:10 -08002052{
2053 struct nvme_rdma_queue *queue = hctx->driver_data;
2054
2055 return ib_process_cq_direct(queue->ib_cq, -1);
2056}
2057
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03002058static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
2059{
2060 struct request *rq = blk_mq_rq_from_pdu(req);
2061 struct ib_mr_status mr_status;
2062 int ret;
2063
2064 ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
2065 if (ret) {
2066 pr_err("ib_check_mr_status failed, ret %d\n", ret);
2067 nvme_req(rq)->status = NVME_SC_INVALID_PI;
2068 return;
2069 }
2070
2071 if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
2072 switch (mr_status.sig_err.err_type) {
2073 case IB_SIG_BAD_GUARD:
2074 nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
2075 break;
2076 case IB_SIG_BAD_REFTAG:
2077 nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
2078 break;
2079 case IB_SIG_BAD_APPTAG:
2080 nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
2081 break;
2082 }
2083 pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
2084 mr_status.sig_err.err_type, mr_status.sig_err.expected,
2085 mr_status.sig_err.actual);
2086 }
2087}
2088
Christoph Hellwig71102302016-07-06 21:55:52 +09002089static void nvme_rdma_complete_rq(struct request *rq)
2090{
2091 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
Max Gurtovoy62f99b62019-06-06 12:27:36 +03002092 struct nvme_rdma_queue *queue = req->queue;
2093 struct ib_device *ibdev = queue->device->dev;
Christoph Hellwig71102302016-07-06 21:55:52 +09002094
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03002095 if (req->use_sig_mr)
2096 nvme_rdma_check_pi_status(req);
2097
Max Gurtovoy62f99b62019-06-06 12:27:36 +03002098 nvme_rdma_unmap_data(queue, rq);
2099 ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
2100 DMA_TO_DEVICE);
Christoph Hellwig77f02a72017-03-30 13:41:32 +02002101 nvme_complete_rq(rq);
Christoph Hellwig71102302016-07-06 21:55:52 +09002102}
2103
Bart Van Asschea4e1d0b2022-08-15 10:00:43 -07002104static void nvme_rdma_map_queues(struct blk_mq_tag_set *set)
Sagi Grimberg0b366582017-07-13 11:09:44 +03002105{
Christoph Hellwig2d607382022-09-20 17:14:01 +02002106 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data);
Sagi Grimberg0b366582017-07-13 11:09:44 +03002107
Keith Buscha249d302023-04-26 08:04:41 -07002108 nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues);
Sagi Grimberg0b366582017-07-13 11:09:44 +03002109}
2110
Eric Biggersf363b082017-03-30 13:39:16 -07002111static const struct blk_mq_ops nvme_rdma_mq_ops = {
Christoph Hellwig71102302016-07-06 21:55:52 +09002112 .queue_rq = nvme_rdma_queue_rq,
2113 .complete = nvme_rdma_complete_rq,
Christoph Hellwig71102302016-07-06 21:55:52 +09002114 .init_request = nvme_rdma_init_request,
2115 .exit_request = nvme_rdma_exit_request,
Christoph Hellwig71102302016-07-06 21:55:52 +09002116 .init_hctx = nvme_rdma_init_hctx,
Christoph Hellwig71102302016-07-06 21:55:52 +09002117 .timeout = nvme_rdma_timeout,
Sagi Grimberg0b366582017-07-13 11:09:44 +03002118 .map_queues = nvme_rdma_map_queues,
Sagi Grimbergff8519f2018-12-14 11:06:10 -08002119 .poll = nvme_rdma_poll,
Christoph Hellwig71102302016-07-06 21:55:52 +09002120};
2121
Eric Biggersf363b082017-03-30 13:39:16 -07002122static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
Christoph Hellwig71102302016-07-06 21:55:52 +09002123 .queue_rq = nvme_rdma_queue_rq,
2124 .complete = nvme_rdma_complete_rq,
Christoph Hellwig385475e2017-06-13 09:15:19 +02002125 .init_request = nvme_rdma_init_request,
2126 .exit_request = nvme_rdma_exit_request,
Christoph Hellwig71102302016-07-06 21:55:52 +09002127 .init_hctx = nvme_rdma_init_admin_hctx,
2128 .timeout = nvme_rdma_timeout,
2129};
2130
Sagi Grimberg18398af2017-07-10 09:22:31 +03002131static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
Christoph Hellwig71102302016-07-06 21:55:52 +09002132{
Sagi Grimberg75862c72018-07-09 12:49:07 +03002133 nvme_rdma_teardown_io_queues(ctrl, shutdown);
Christoph Hellwig9f27bd72022-11-15 11:22:14 +01002134 nvme_quiesce_admin_queue(&ctrl->ctrl);
Christoph Hellwig285b6e92022-11-08 11:20:12 +01002135 nvme_disable_ctrl(&ctrl->ctrl, shutdown);
Sagi Grimberg75862c72018-07-09 12:49:07 +03002136 nvme_rdma_teardown_admin_queue(ctrl, shutdown);
Christoph Hellwig71102302016-07-06 21:55:52 +09002137}
2138
Christoph Hellwigc5017e82017-10-29 10:44:29 +02002139static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
Sagi Grimberg2461a8d2016-07-24 09:29:51 +03002140{
Christoph Hellwige9bc2582017-10-29 10:44:30 +02002141 nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true);
Christoph Hellwig71102302016-07-06 21:55:52 +09002142}
2143
Christoph Hellwig71102302016-07-06 21:55:52 +09002144static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
2145{
Christoph Hellwigd86c4d82017-06-15 15:41:08 +02002146 struct nvme_rdma_ctrl *ctrl =
2147 container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
Christoph Hellwig71102302016-07-06 21:55:52 +09002148
Sagi Grimbergd09f2b42017-07-02 10:56:43 +03002149 nvme_stop_ctrl(&ctrl->ctrl);
Sagi Grimberg18398af2017-07-10 09:22:31 +03002150 nvme_rdma_shutdown_ctrl(ctrl, false);
Christoph Hellwig71102302016-07-06 21:55:52 +09002151
Max Gurtovoyad6a0a52018-01-31 18:31:24 +02002152 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
Sagi Grimbergd5bf4b72017-12-21 14:54:15 +02002153 /* state change failure should never happen */
2154 WARN_ON_ONCE(1);
2155 return;
2156 }
2157
Sagi Grimbergc66e2992018-07-09 12:49:06 +03002158 if (nvme_rdma_setup_ctrl(ctrl, false))
Sagi Grimberg370ae6e2017-07-10 09:22:38 +03002159 goto out_fail;
Christoph Hellwig71102302016-07-06 21:55:52 +09002160
Christoph Hellwig71102302016-07-06 21:55:52 +09002161 return;
2162
Sagi Grimberg370ae6e2017-07-10 09:22:38 +03002163out_fail:
Nitzan Carmi8000d1f2018-01-17 11:01:14 +00002164 ++ctrl->ctrl.nr_reconnects;
2165 nvme_rdma_reconnect_or_remove(ctrl);
Christoph Hellwig71102302016-07-06 21:55:52 +09002166}
2167
Christoph Hellwig71102302016-07-06 21:55:52 +09002168static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
2169 .name = "rdma",
2170 .module = THIS_MODULE,
Max Gurtovoy5ec5d3b2020-05-19 17:05:56 +03002171 .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
Christoph Hellwig71102302016-07-06 21:55:52 +09002172 .reg_read32 = nvmf_reg_read32,
2173 .reg_read64 = nvmf_reg_read64,
2174 .reg_write32 = nvmf_reg_write32,
Christoph Hellwig71102302016-07-06 21:55:52 +09002175 .free_ctrl = nvme_rdma_free_ctrl,
2176 .submit_async_event = nvme_rdma_submit_async_event,
Christoph Hellwigc5017e82017-10-29 10:44:29 +02002177 .delete_ctrl = nvme_rdma_delete_ctrl,
Christoph Hellwig71102302016-07-06 21:55:52 +09002178 .get_address = nvmf_get_address,
Ruozhu Lif7f70f42022-06-23 14:45:39 +08002179 .stop_ctrl = nvme_rdma_stop_ctrl,
Christoph Hellwig71102302016-07-06 21:55:52 +09002180};
2181
James Smart36e835f2017-10-20 16:17:09 -07002182/*
2183 * Fails a connection request if it matches an existing controller
2184 * (association) with the same tuple:
2185 * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN>
2186 *
2187 * if local address is not specified in the request, it will match an
2188 * existing controller with all the other parameters the same and no
2189 * local port address specified as well.
2190 *
2191 * The ports don't need to be compared as they are intrinsically
2192 * already matched by the port pointers supplied.
2193 */
2194static bool
2195nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
2196{
2197 struct nvme_rdma_ctrl *ctrl;
2198 bool found = false;
2199
2200 mutex_lock(&nvme_rdma_ctrl_mutex);
2201 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
Sagi Grimbergb7c7be6f62018-10-18 17:40:40 -07002202 found = nvmf_ip_options_match(&ctrl->ctrl, opts);
James Smart36e835f2017-10-20 16:17:09 -07002203 if (found)
2204 break;
2205 }
2206 mutex_unlock(&nvme_rdma_ctrl_mutex);
2207
2208 return found;
2209}
2210
Christoph Hellwig71102302016-07-06 21:55:52 +09002211static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
2212 struct nvmf_ctrl_options *opts)
2213{
2214 struct nvme_rdma_ctrl *ctrl;
2215 int ret;
2216 bool changed;
2217
2218 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
2219 if (!ctrl)
2220 return ERR_PTR(-ENOMEM);
2221 ctrl->ctrl.opts = opts;
2222 INIT_LIST_HEAD(&ctrl->list);
2223
Sagi Grimbergbb59b8e2018-10-19 00:50:29 -07002224 if (!(opts->mask & NVMF_OPT_TRSVCID)) {
2225 opts->trsvcid =
2226 kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL);
2227 if (!opts->trsvcid) {
2228 ret = -ENOMEM;
2229 goto out_free_ctrl;
2230 }
2231 opts->mask |= NVMF_OPT_TRSVCID;
2232 }
Sagi Grimberg0928f9b2017-02-05 21:49:32 +02002233
2234 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
Sagi Grimbergbb59b8e2018-10-19 00:50:29 -07002235 opts->traddr, opts->trsvcid, &ctrl->addr);
Christoph Hellwig71102302016-07-06 21:55:52 +09002236 if (ret) {
Sagi Grimbergbb59b8e2018-10-19 00:50:29 -07002237 pr_err("malformed address passed: %s:%s\n",
2238 opts->traddr, opts->trsvcid);
Christoph Hellwig71102302016-07-06 21:55:52 +09002239 goto out_free_ctrl;
2240 }
2241
Max Gurtovoy8f4e8da2017-02-19 20:08:03 +02002242 if (opts->mask & NVMF_OPT_HOST_TRADDR) {
Sagi Grimberg0928f9b2017-02-05 21:49:32 +02002243 ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
2244 opts->host_traddr, NULL, &ctrl->src_addr);
Max Gurtovoy8f4e8da2017-02-19 20:08:03 +02002245 if (ret) {
Sagi Grimberg0928f9b2017-02-05 21:49:32 +02002246 pr_err("malformed src address passed: %s\n",
Max Gurtovoy8f4e8da2017-02-19 20:08:03 +02002247 opts->host_traddr);
2248 goto out_free_ctrl;
2249 }
2250 }
2251
James Smart36e835f2017-10-20 16:17:09 -07002252 if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) {
2253 ret = -EALREADY;
2254 goto out_free_ctrl;
2255 }
2256
Christoph Hellwig71102302016-07-06 21:55:52 +09002257 INIT_DELAYED_WORK(&ctrl->reconnect_work,
2258 nvme_rdma_reconnect_ctrl_work);
2259 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
Christoph Hellwigd86c4d82017-06-15 15:41:08 +02002260 INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
Christoph Hellwig71102302016-07-06 21:55:52 +09002261
Sagi Grimbergff8519f2018-12-14 11:06:10 -08002262 ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
2263 opts->nr_poll_queues + 1;
Jay Freyenseec5af8652016-08-17 15:00:27 -07002264 ctrl->ctrl.sqsize = opts->queue_size - 1;
Christoph Hellwig71102302016-07-06 21:55:52 +09002265 ctrl->ctrl.kato = opts->kato;
2266
2267 ret = -ENOMEM;
Sagi Grimbergd858e5f2017-04-24 10:58:29 +03002268 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
Christoph Hellwig71102302016-07-06 21:55:52 +09002269 GFP_KERNEL);
2270 if (!ctrl->queues)
Sagi Grimberg3d064102018-06-19 15:34:09 +03002271 goto out_free_ctrl;
2272
2273 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
2274 0 /* no quirks, we're perfect! */);
2275 if (ret)
2276 goto out_kfree_queues;
Christoph Hellwig71102302016-07-06 21:55:52 +09002277
Max Gurtovoyb754a322018-01-31 18:31:25 +02002278 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
2279 WARN_ON_ONCE(!changed);
2280
Sagi Grimbergc66e2992018-07-09 12:49:06 +03002281 ret = nvme_rdma_setup_ctrl(ctrl, true);
Christoph Hellwig71102302016-07-06 21:55:52 +09002282 if (ret)
Sagi Grimberg3d064102018-06-19 15:34:09 +03002283 goto out_uninit_ctrl;
Christoph Hellwig71102302016-07-06 21:55:52 +09002284
Sagi Grimberg0928f9b2017-02-05 21:49:32 +02002285 dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
Hannes Reineckee5ea42f2021-09-22 08:35:25 +02002286 nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
Christoph Hellwig71102302016-07-06 21:55:52 +09002287
Christoph Hellwig71102302016-07-06 21:55:52 +09002288 mutex_lock(&nvme_rdma_ctrl_mutex);
2289 list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
2290 mutex_unlock(&nvme_rdma_ctrl_mutex);
2291
Christoph Hellwig71102302016-07-06 21:55:52 +09002292 return &ctrl->ctrl;
2293
Christoph Hellwig71102302016-07-06 21:55:52 +09002294out_uninit_ctrl:
2295 nvme_uninit_ctrl(&ctrl->ctrl);
2296 nvme_put_ctrl(&ctrl->ctrl);
2297 if (ret > 0)
2298 ret = -EIO;
2299 return ERR_PTR(ret);
Sagi Grimberg3d064102018-06-19 15:34:09 +03002300out_kfree_queues:
2301 kfree(ctrl->queues);
Christoph Hellwig71102302016-07-06 21:55:52 +09002302out_free_ctrl:
2303 kfree(ctrl);
2304 return ERR_PTR(ret);
2305}
2306
2307static struct nvmf_transport_ops nvme_rdma_transport = {
2308 .name = "rdma",
Roy Shterman0de5cd32017-12-25 14:18:30 +02002309 .module = THIS_MODULE,
Christoph Hellwig71102302016-07-06 21:55:52 +09002310 .required_opts = NVMF_OPT_TRADDR,
Max Gurtovoy8f4e8da2017-02-19 20:08:03 +02002311 .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
Sagi Grimbergb65bb772018-12-11 23:38:58 -08002312 NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
Israel Rukshine63440d2019-08-18 12:08:52 +03002313 NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
2314 NVMF_OPT_TOS,
Christoph Hellwig71102302016-07-06 21:55:52 +09002315 .create_ctrl = nvme_rdma_create_ctrl,
2316};
2317
Steve Wisee87a9112016-09-02 09:01:54 -07002318static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
2319{
2320 struct nvme_rdma_ctrl *ctrl;
Max Gurtovoy9bad0402018-02-28 13:12:39 +02002321 struct nvme_rdma_device *ndev;
2322 bool found = false;
2323
2324 mutex_lock(&device_list_mutex);
2325 list_for_each_entry(ndev, &device_list, entry) {
2326 if (ndev->dev == ib_device) {
2327 found = true;
2328 break;
2329 }
2330 }
2331 mutex_unlock(&device_list_mutex);
2332
2333 if (!found)
2334 return;
Steve Wisee87a9112016-09-02 09:01:54 -07002335
2336 /* Delete all controllers using this device */
2337 mutex_lock(&nvme_rdma_ctrl_mutex);
2338 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
2339 if (ctrl->device->dev != ib_device)
2340 continue;
Christoph Hellwigc5017e82017-10-29 10:44:29 +02002341 nvme_delete_ctrl(&ctrl->ctrl);
Steve Wisee87a9112016-09-02 09:01:54 -07002342 }
2343 mutex_unlock(&nvme_rdma_ctrl_mutex);
2344
Roy Shtermanb227c592018-01-14 12:39:02 +02002345 flush_workqueue(nvme_delete_wq);
Steve Wisee87a9112016-09-02 09:01:54 -07002346}
2347
2348static struct ib_client nvme_rdma_ib_client = {
2349 .name = "nvme_rdma",
Steve Wisee87a9112016-09-02 09:01:54 -07002350 .remove = nvme_rdma_remove_one
2351};
2352
Christoph Hellwig71102302016-07-06 21:55:52 +09002353static int __init nvme_rdma_init_module(void)
2354{
Steve Wisee87a9112016-09-02 09:01:54 -07002355 int ret;
2356
Steve Wisee87a9112016-09-02 09:01:54 -07002357 ret = ib_register_client(&nvme_rdma_ib_client);
Sagi Grimberga56c79c2017-03-19 06:21:42 +02002358 if (ret)
Sagi Grimberg9a6327d2017-06-07 20:31:55 +02002359 return ret;
Steve Wisee87a9112016-09-02 09:01:54 -07002360
Sagi Grimberga56c79c2017-03-19 06:21:42 +02002361 ret = nvmf_register_transport(&nvme_rdma_transport);
2362 if (ret)
2363 goto err_unreg_client;
2364
2365 return 0;
2366
2367err_unreg_client:
2368 ib_unregister_client(&nvme_rdma_ib_client);
Sagi Grimberga56c79c2017-03-19 06:21:42 +02002369 return ret;
Christoph Hellwig71102302016-07-06 21:55:52 +09002370}
2371
2372static void __exit nvme_rdma_cleanup_module(void)
2373{
Max Gurtovoy9ad9e8d62019-10-29 16:42:27 +02002374 struct nvme_rdma_ctrl *ctrl;
2375
Christoph Hellwig71102302016-07-06 21:55:52 +09002376 nvmf_unregister_transport(&nvme_rdma_transport);
Steve Wisee87a9112016-09-02 09:01:54 -07002377 ib_unregister_client(&nvme_rdma_ib_client);
Max Gurtovoy9ad9e8d62019-10-29 16:42:27 +02002378
2379 mutex_lock(&nvme_rdma_ctrl_mutex);
2380 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
2381 nvme_delete_ctrl(&ctrl->ctrl);
2382 mutex_unlock(&nvme_rdma_ctrl_mutex);
2383 flush_workqueue(nvme_delete_wq);
Christoph Hellwig71102302016-07-06 21:55:52 +09002384}
2385
2386module_init(nvme_rdma_init_module);
2387module_exit(nvme_rdma_cleanup_module);
2388
2389MODULE_LICENSE("GPL v2");