[SCSI] lpfc 8.3.33: Parallelize SLI-4 Q distribution
Commonize SLI-3/4 Ring/Queue framework, to keep SLI-3 compatibility
Parallelize SLI-4 Q distribution - to use multiple posting/completion queues
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index a65c05a..a870af1 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -732,7 +732,7 @@
uint32_t hbq_count; /* Count of configured HBQs */
struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies */
- uint32_t fcp_qidx; /* next work queue to post work to */
+ atomic_t fcp_qidx; /* next work queue to post work to */
unsigned long pci_bar0_map; /* Physical address for PCI BAR0 */
unsigned long pci_bar1_map; /* Physical address for PCI BAR1 */
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 4ab0e35..d744704 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1188,8 +1188,8 @@
*/
/* Number of rings currently used and available. */
-#define MAX_CONFIGURED_RINGS 3
-#define MAX_RINGS 4
+#define MAX_SLI3_CONFIGURED_RINGS 3
+#define MAX_SLI3_RINGS 4
/* IOCB / Mailbox is owned by FireFly */
#define OWN_CHIP 1
@@ -2993,7 +2993,7 @@
uint32_t pgpAddrLow;
uint32_t pgpAddrHigh;
- SLI2_RDSC rdsc[MAX_RINGS];
+ SLI2_RDSC rdsc[MAX_SLI3_RINGS];
} PCB_t;
/* NEW_FEATURE */
@@ -3103,18 +3103,18 @@
struct sli2_desc {
uint32_t unused1[16];
- struct lpfc_hgp host[MAX_RINGS];
- struct lpfc_pgp port[MAX_RINGS];
+ struct lpfc_hgp host[MAX_SLI3_RINGS];
+ struct lpfc_pgp port[MAX_SLI3_RINGS];
};
struct sli3_desc {
- struct lpfc_hgp host[MAX_RINGS];
+ struct lpfc_hgp host[MAX_SLI3_RINGS];
uint32_t reserved[8];
uint32_t hbq_put[16];
};
struct sli3_pgp {
- struct lpfc_pgp port[MAX_RINGS];
+ struct lpfc_pgp port[MAX_SLI3_RINGS];
uint32_t hbq_get[16];
};
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 9efe5f8..176302f 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -4551,6 +4551,13 @@
phba->cfg_sg_seg_cnt = LPFC_DEFAULT_MENLO_SG_SEG_CNT;
}
+ if (!phba->sli.ring)
+ phba->sli.ring = (struct lpfc_sli_ring *)
+ kzalloc(LPFC_SLI3_MAX_RING *
+ sizeof(struct lpfc_sli_ring), GFP_KERNEL);
+ if (!phba->sli.ring)
+ return -ENOMEM;
+
/*
* Since the sg_tablesize is module parameter, the sg_dma_buf_size
* used to create the sg_dma_buf_pool must be dynamically calculated.
@@ -4710,6 +4717,16 @@
sges_per_segment = 2;
/*
+ * For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands
+ * we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple.
+ */
+ if (!phba->sli.ring)
+ phba->sli.ring = kzalloc(
+ (LPFC_SLI3_MAX_RING + phba->cfg_fcp_eq_count) *
+ sizeof(struct lpfc_sli_ring), GFP_KERNEL);
+ if (!phba->sli.ring)
+ return -ENOMEM;
+ /*
* Since the sg_tablesize is module parameter, the sg_dma_buf_size
* used to create the sg_dma_buf_pool must be dynamically calculated.
* 2 segments are added since the IOCB needs a command and response bde.
@@ -5555,6 +5572,10 @@
/* Release the driver assigned board number */
idr_remove(&lpfc_hba_index, phba->brd_no);
+ /* Free memory allocated with sli rings */
+ kfree(phba->sli.ring);
+ phba->sli.ring = NULL;
+
kfree(phba);
return;
}
@@ -6924,6 +6945,8 @@
int
lpfc_sli4_queue_setup(struct lpfc_hba *phba)
{
+ struct lpfc_sli *psli = &phba->sli;
+ struct lpfc_sli_ring *pring;
int rc = -ENOMEM;
int fcp_eqidx, fcp_cqidx, fcp_wqidx;
int fcp_cq_index = 0;
@@ -7107,6 +7130,12 @@
"rc = 0x%x\n", rc);
goto out_destroy_mbx_wq;
}
+
+ /* Bind this WQ to the ELS ring */
+ pring = &psli->ring[LPFC_ELS_RING];
+ pring->sli.sli4.wqp = (void *)phba->sli4_hba.els_wq;
+ phba->sli4_hba.els_cq->pring = pring;
+
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n",
phba->sli4_hba.els_wq->queue_id,
@@ -7137,6 +7166,12 @@
"WQ (%d), rc = 0x%x\n", fcp_wqidx, rc);
goto out_destroy_fcp_wq;
}
+
+ /* Bind this WQ to the next FCP ring */
+ pring = &psli->ring[MAX_SLI3_CONFIGURED_RINGS + fcp_wqidx];
+ pring->sli.sli4.wqp = (void *)phba->sli4_hba.fcp_wq[fcp_wqidx];
+ phba->sli4_hba.fcp_cq[fcp_cq_index]->pring = pring;
+
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"2591 FCP WQ setup: wq[%d]-id=%d, "
"parent cq[%d]-id=%d\n",
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d7afd0f..982bd40 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -7796,14 +7796,14 @@
*
* Return: index into SLI4 fast-path FCP queue index.
**/
-static uint32_t
+static inline uint32_t
lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba)
{
- ++phba->fcp_qidx;
- if (phba->fcp_qidx >= phba->cfg_fcp_wq_count)
- phba->fcp_qidx = 0;
+ int i;
- return phba->fcp_qidx;
+ i = atomic_add_return(1, &phba->fcp_qidx);
+ i = (i % phba->cfg_fcp_wq_count);
+ return i;
}
/**
@@ -8323,16 +8323,6 @@
if ((piocb->iocb_flag & LPFC_IO_FCP) ||
(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
- /*
- * For FCP command IOCB, get a new WQ index to distribute
- * WQE across the WQsr. On the other hand, for abort IOCB,
- * it carries the same WQ index to the original command
- * IOCB.
- */
- if (piocb->iocb_flag & LPFC_IO_FCP)
- piocb->fcp_wqidx = lpfc_sli4_scmd_to_wqidx_distr(phba);
- if (unlikely(!phba->sli4_hba.fcp_wq))
- return IOCB_ERROR;
if (lpfc_sli4_wq_put(phba->sli4_hba.fcp_wq[piocb->fcp_wqidx],
&wqe))
return IOCB_ERROR;
@@ -8413,11 +8403,18 @@
lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
struct lpfc_iocbq *piocb, uint32_t flag)
{
- struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+ struct lpfc_sli_ring *pring;
unsigned long iflags;
- int rc;
+ int rc, idx;
if (phba->sli_rev == LPFC_SLI_REV4) {
+ if (piocb->iocb_flag & LPFC_IO_FCP) {
+ if (unlikely(!phba->sli4_hba.fcp_wq))
+ return IOCB_ERROR;
+ idx = lpfc_sli4_scmd_to_wqidx_distr(phba);
+ piocb->fcp_wqidx = idx;
+ ring_number = MAX_SLI3_CONFIGURED_RINGS + idx;
+ }
pring = &phba->sli.ring[ring_number];
spin_lock_irqsave(&pring->ring_lock, iflags);
rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
@@ -8712,7 +8709,9 @@
struct lpfc_sli *psli = &phba->sli;
struct lpfc_sli_ring *pring;
- psli->num_rings = MAX_CONFIGURED_RINGS;
+ psli->num_rings = MAX_SLI3_CONFIGURED_RINGS;
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ psli->num_rings += phba->cfg_fcp_eq_count;
psli->sli_flag = 0;
psli->fcp_ring = LPFC_FCP_RING;
psli->next_ring = LPFC_FCP_NEXT_RING;
@@ -11191,6 +11190,7 @@
/**
* lpfc_sli4_sp_handle_els_wcqe - Handle els work-queue completion event
* @phba: Pointer to HBA context object.
+ * @cq: Pointer to associated CQ
* @wcqe: Pointer to work-queue completion queue entry.
*
* This routine handles an ELS work-queue completion event.
@@ -11198,12 +11198,12 @@
* Return: true if work posted to worker thread, otherwise false.
**/
static bool
-lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba,
+lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
struct lpfc_wcqe_complete *wcqe)
{
struct lpfc_iocbq *irspiocbq;
unsigned long iflags;
- struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+ struct lpfc_sli_ring *pring = cq->pring;
/* Get an irspiocbq for later ELS response processing use */
irspiocbq = lpfc_sli_get_iocbq(phba);
@@ -11408,7 +11408,7 @@
case CQE_CODE_COMPL_WQE:
/* Process the WQ/RQ complete event */
phba->last_completion_time = jiffies;
- workposted = lpfc_sli4_sp_handle_els_wcqe(phba,
+ workposted = lpfc_sli4_sp_handle_els_wcqe(phba, cq,
(struct lpfc_wcqe_complete *)&cqevt);
break;
case CQE_CODE_RELEASE_WQE:
@@ -11540,16 +11540,18 @@
/**
* lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
- * @eqe: Pointer to fast-path completion queue entry.
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to associated CQ
+ * @wcqe: Pointer to work-queue completion queue entry.
*
* This routine process a fast-path work queue completion entry from fast-path
* event queue for FCP command response completion.
**/
static void
-lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba,
+lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
struct lpfc_wcqe_complete *wcqe)
{
- struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_FCP_RING];
+ struct lpfc_sli_ring *pring = cq->pring;
struct lpfc_iocbq *cmdiocbq;
struct lpfc_iocbq irspiocbq;
unsigned long iflags;
@@ -11667,7 +11669,7 @@
cq->CQ_wq++;
/* Process the WQ complete event */
phba->last_completion_time = jiffies;
- lpfc_sli4_fp_handle_fcp_wcqe(phba,
+ lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
(struct lpfc_wcqe_complete *)&wcqe);
break;
case CQE_CODE_RELEASE_WQE:
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 2d64a2b..4b9b44e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -131,7 +131,9 @@
#define LPFC_MAX_RING_MASK 5 /* max num of rctl/type masks allowed per
ring */
-#define LPFC_MAX_RING 4 /* max num of SLI rings used by driver */
+#define LPFC_SLI3_MAX_RING 4 /* Max num of SLI3 rings used by driver.
+ For SLI4, an additional ring for each
+ FCP WQ will be allocated. */
struct lpfc_sli_ring;
@@ -172,7 +174,7 @@
};
struct lpfc_sli4_ring {
- void *wqp; /* Pointer to associated WQ */
+ struct lpfc_queue *wqp; /* Pointer to associated WQ */
};
@@ -284,7 +286,7 @@
#define LPFC_MENLO_MAINT 0x1000 /* need for menl fw download */
#define LPFC_SLI_ASYNC_MBX_BLK 0x2000 /* Async mailbox is blocked */
- struct lpfc_sli_ring ring[LPFC_MAX_RING];
+ struct lpfc_sli_ring *ring;
int fcp_ring; /* ring used for FCP initiator commands */
int next_ring;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index f4b5765..e7d8413 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -142,6 +142,8 @@
uint32_t host_index; /* The host's index for putting or getting */
uint32_t hba_index; /* The last known hba index for get or put */
+ struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
+
/* For q stats */
uint32_t q_cnt_1;
uint32_t q_cnt_2;