blob: 81dab9b82f79f98e8b14229647b3b70f6a2a983d [file] [log] [blame]
/*
* This is the Fusion MPT base driver providing common API layer interface
* for access to MPT (Message Passing Technology) firmware.
*
* This code is based on drivers/scsi/mpt3sas/mpt3sas_base.c
* Copyright (C) 2012-2014 LSI Corporation
* Copyright (C) 2013-2014 Avago Technologies
* (mailto: MPT-FusionLinux.pdl@avagotech.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* NO WARRANTY
* THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
* LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
* solely responsible for determining the appropriateness of using and
* distributing the Program and assumes all risks associated with its
* exercise of rights under this Agreement, including but not limited to
* the risks and costs of program errors, damage to or loss of data,
* programs or equipment, and unavailability or interruption of operations.
* DISCLAIMER OF LIABILITY
* NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
* HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
* USA.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/kdev_t.h>
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/io.h>
#include <linux/time.h>
#include <linux/ktime.h>
#include <linux/kthread.h>
#include <asm/page.h> /* To get host page size per arch */
#include <linux/aer.h>
#include "mpt3sas_base.h"
static MPT_CALLBACK mpt_callbacks[MPT_MAX_CALLBACKS];
#define FAULT_POLLING_INTERVAL 1000 /* in milliseconds */
/* maximum controller queue depth */
#define MAX_HBA_QUEUE_DEPTH 30000
#define MAX_CHAIN_DEPTH 100000
static int max_queue_depth = -1;
module_param(max_queue_depth, int, 0444);
MODULE_PARM_DESC(max_queue_depth, " max controller queue depth ");
static int max_sgl_entries = -1;
module_param(max_sgl_entries, int, 0444);
MODULE_PARM_DESC(max_sgl_entries, " max sg entries ");
static int msix_disable = -1;
module_param(msix_disable, int, 0444);
MODULE_PARM_DESC(msix_disable, " disable msix routed interrupts (default=0)");
static int smp_affinity_enable = 1;
module_param(smp_affinity_enable, int, 0444);
MODULE_PARM_DESC(smp_affinity_enable, "SMP affinity feature enable/disable Default: enable(1)");
static int max_msix_vectors = -1;
module_param(max_msix_vectors, int, 0444);
MODULE_PARM_DESC(max_msix_vectors,
" max msix vectors");
static int irqpoll_weight = -1;
module_param(irqpoll_weight, int, 0444);
MODULE_PARM_DESC(irqpoll_weight,
"irq poll weight (default= one fourth of HBA queue depth)");
static int mpt3sas_fwfault_debug;
MODULE_PARM_DESC(mpt3sas_fwfault_debug,
" enable detection of firmware fault and halt firmware - (default=0)");
static int perf_mode = -1;
module_param(perf_mode, int, 0444);
MODULE_PARM_DESC(perf_mode,
"Performance mode (only for Aero/Sea Generation), options:\n\t\t"
"0 - balanced: high iops mode is enabled &\n\t\t"
"interrupt coalescing is enabled only on high iops queues,\n\t\t"
"1 - iops: high iops mode is disabled &\n\t\t"
"interrupt coalescing is enabled on all queues,\n\t\t"
"2 - latency: high iops mode is disabled &\n\t\t"
"interrupt coalescing is enabled on all queues with timeout value 0xA,\n"
"\t\tdefault - default perf_mode is 'balanced'"
);
static int poll_queues;
module_param(poll_queues, int, 0444);
MODULE_PARM_DESC(poll_queues, "Number of queues to be use for io_uring poll mode.\n\t\t"
"This parameter is effective only if host_tagset_enable=1. &\n\t\t"
"when poll_queues are enabled then &\n\t\t"
"perf_mode is set to latency mode. &\n\t\t"
);
enum mpt3sas_perf_mode {
MPT_PERF_MODE_DEFAULT = -1,
MPT_PERF_MODE_BALANCED = 0,
MPT_PERF_MODE_IOPS = 1,
MPT_PERF_MODE_LATENCY = 2,
};
static int
_base_wait_on_iocstate(struct MPT3SAS_ADAPTER *ioc,
u32 ioc_state, int timeout);
static int
_base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc);
static void
_base_clear_outstanding_commands(struct MPT3SAS_ADAPTER *ioc);
/**
* mpt3sas_base_check_cmd_timeout - Function
* to check timeout and command termination due
* to Host reset.
*
* @ioc: per adapter object.
* @status: Status of issued command.
* @mpi_request:mf request pointer.
* @sz: size of buffer.
*
* Return: 1/0 Reset to be done or Not
*/
u8
mpt3sas_base_check_cmd_timeout(struct MPT3SAS_ADAPTER *ioc,
u8 status, void *mpi_request, int sz)
{
u8 issue_reset = 0;
if (!(status & MPT3_CMD_RESET))
issue_reset = 1;
ioc_err(ioc, "Command %s\n",
issue_reset == 0 ? "terminated due to Host Reset" : "Timeout");
_debug_dump_mf(mpi_request, sz);
return issue_reset;
}
/**
* _scsih_set_fwfault_debug - global setting of ioc->fwfault_debug.
* @val: ?
* @kp: ?
*
* Return: ?
*/
static int
_scsih_set_fwfault_debug(const char *val, const struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
struct MPT3SAS_ADAPTER *ioc;
if (ret)
return ret;
/* global ioc spinlock to protect controller list on list operations */
pr_info("setting fwfault_debug(%d)\n", mpt3sas_fwfault_debug);
spin_lock(&gioc_lock);
list_for_each_entry(ioc, &mpt3sas_ioc_list, list)
ioc->fwfault_debug = mpt3sas_fwfault_debug;
spin_unlock(&gioc_lock);
return 0;
}
module_param_call(mpt3sas_fwfault_debug, _scsih_set_fwfault_debug,
param_get_int, &mpt3sas_fwfault_debug, 0644);
/**
* _base_readl_aero - retry readl for max three times.
* @addr: MPT Fusion system interface register address
*
* Retry the readl() for max three times if it gets zero value
* while reading the system interface register.
*/
static inline u32
_base_readl_aero(const volatile void __iomem *addr)
{
u32 i = 0, ret_val;
do {
ret_val = readl(addr);
i++;
} while (ret_val == 0 && i < 3);
return ret_val;
}
static inline u32
_base_readl(const volatile void __iomem *addr)
{
return readl(addr);
}
/**
* _base_clone_reply_to_sys_mem - copies reply to reply free iomem
* in BAR0 space.
*
* @ioc: per adapter object
* @reply: reply message frame(lower 32bit addr)
* @index: System request message index.
*/
static void
_base_clone_reply_to_sys_mem(struct MPT3SAS_ADAPTER *ioc, u32 reply,
u32 index)
{
/*
* 256 is offset within sys register.
* 256 offset MPI frame starts. Max MPI frame supported is 32.
* 32 * 128 = 4K. From here, Clone of reply free for mcpu starts
*/
u16 cmd_credit = ioc->facts.RequestCredit + 1;
void __iomem *reply_free_iomem = (void __iomem *)ioc->chip +
MPI_FRAME_START_OFFSET +
(cmd_credit * ioc->request_sz) + (index * sizeof(u32));
writel(reply, reply_free_iomem);
}
/**
* _base_clone_mpi_to_sys_mem - Writes/copies MPI frames
* to system/BAR0 region.
*
* @dst_iomem: Pointer to the destination location in BAR0 space.
* @src: Pointer to the Source data.
* @size: Size of data to be copied.
*/
static void
_base_clone_mpi_to_sys_mem(void *dst_iomem, void *src, u32 size)
{
int i;
u32 *src_virt_mem = (u32 *)src;
for (i = 0; i < size/4; i++)
writel((u32)src_virt_mem[i],
(void __iomem *)dst_iomem + (i * 4));
}
/**
* _base_clone_to_sys_mem - Writes/copies data to system/BAR0 region
*
* @dst_iomem: Pointer to the destination location in BAR0 space.
* @src: Pointer to the Source data.
* @size: Size of data to be copied.
*/
static void
_base_clone_to_sys_mem(void __iomem *dst_iomem, void *src, u32 size)
{
int i;
u32 *src_virt_mem = (u32 *)(src);
for (i = 0; i < size/4; i++)
writel((u32)src_virt_mem[i],
(void __iomem *)dst_iomem + (i * 4));
}
/**
* _base_get_chain - Calculates and Returns virtual chain address
* for the provided smid in BAR0 space.
*
* @ioc: per adapter object
* @smid: system request message index
* @sge_chain_count: Scatter gather chain count.
*
* Return: the chain address.
*/
static inline void __iomem*
_base_get_chain(struct MPT3SAS_ADAPTER *ioc, u16 smid,
u8 sge_chain_count)
{
void __iomem *base_chain, *chain_virt;
u16 cmd_credit = ioc->facts.RequestCredit + 1;
base_chain = (void __iomem *)ioc->chip + MPI_FRAME_START_OFFSET +
(cmd_credit * ioc->request_sz) +
REPLY_FREE_POOL_SIZE;
chain_virt = base_chain + (smid * ioc->facts.MaxChainDepth *
ioc->request_sz) + (sge_chain_count * ioc->request_sz);
return chain_virt;
}
/**
* _base_get_chain_phys - Calculates and Returns physical address
* in BAR0 for scatter gather chains, for
* the provided smid.
*
* @ioc: per adapter object
* @smid: system request message index
* @sge_chain_count: Scatter gather chain count.
*
* Return: Physical chain address.
*/
static inline phys_addr_t
_base_get_chain_phys(struct MPT3SAS_ADAPTER *ioc, u16 smid,
u8 sge_chain_count)
{
phys_addr_t base_chain_phys, chain_phys;
u16 cmd_credit = ioc->facts.RequestCredit + 1;
base_chain_phys = ioc->chip_phys + MPI_FRAME_START_OFFSET +
(cmd_credit * ioc->request_sz) +
REPLY_FREE_POOL_SIZE;
chain_phys = base_chain_phys + (smid * ioc->facts.MaxChainDepth *
ioc->request_sz) + (sge_chain_count * ioc->request_sz);
return chain_phys;
}
/**
* _base_get_buffer_bar0 - Calculates and Returns BAR0 mapped Host
* buffer address for the provided smid.
* (Each smid can have 64K starts from 17024)
*
* @ioc: per adapter object
* @smid: system request message index
*
* Return: Pointer to buffer location in BAR0.
*/
static void __iomem *
_base_get_buffer_bar0(struct MPT3SAS_ADAPTER *ioc, u16 smid)
{
u16 cmd_credit = ioc->facts.RequestCredit + 1;
// Added extra 1 to reach end of chain.
void __iomem *chain_end = _base_get_chain(ioc,
cmd_credit + 1,
ioc->facts.MaxChainDepth);
return chain_end + (smid * 64 * 1024);
}
/**
* _base_get_buffer_phys_bar0 - Calculates and Returns BAR0 mapped
* Host buffer Physical address for the provided smid.
* (Each smid can have 64K starts from 17024)
*
* @ioc: per adapter object
* @smid: system request message index
*
* Return: Pointer to buffer location in BAR0.
*/
static phys_addr_t
_base_get_buffer_phys_bar0(struct MPT3SAS_ADAPTER *ioc, u16 smid)
{
u16 cmd_credit = ioc->facts.RequestCredit + 1;
phys_addr_t chain_end_phys = _base_get_chain_phys(ioc,
cmd_credit + 1,
ioc->facts.MaxChainDepth);
return chain_end_phys + (smid * 64 * 1024);
}
/**
* _base_get_chain_buffer_dma_to_chain_buffer - Iterates chain
* lookup list and Provides chain_buffer
* address for the matching dma address.
* (Each smid can have 64K starts from 17024)
*
* @ioc: per adapter object
* @chain_buffer_dma: Chain buffer dma address.
*
* Return: Pointer to chain buffer. Or Null on Failure.
*/
static void *
_base_get_chain_buffer_dma_to_chain_buffer(struct MPT3SAS_ADAPTER *ioc,
dma_addr_t chain_buffer_dma)
{
u16 index, j;
struct chain_tracker *ct;
for (index = 0; index < ioc->scsiio_depth; index++) {
for (j = 0; j < ioc->chains_needed_per_io; j++) {
ct = &ioc->chain_lookup[index].chains_per_smid[j];
if (ct && ct->chain_buffer_dma == chain_buffer_dma)
return ct->chain_buffer;
}
}
ioc_info(ioc, "Provided chain_buffer_dma address is not in the lookup list\n");
return NULL;
}
/**
* _clone_sg_entries - MPI EP's scsiio and config requests
* are handled here. Base function for
* double buffering, before submitting
* the requests.
*
* @ioc: per adapter object.
* @mpi_request: mf request pointer.
* @smid: system request message index.
*/
static void _clone_sg_entries(struct MPT3SAS_ADAPTER *ioc,
void *mpi_request, u16 smid)
{
Mpi2SGESimple32_t *sgel, *sgel_next;
u32 sgl_flags, sge_chain_count = 0;
bool is_write = false;
u16 i = 0;
void __iomem *buffer_iomem;
phys_addr_t buffer_iomem_phys;
void __iomem *buff_ptr;
phys_addr_t buff_ptr_phys;
void __iomem *dst_chain_addr[MCPU_MAX_CHAINS_PER_IO];
void *src_chain_addr[MCPU_MAX_CHAINS_PER_IO];
phys_addr_t dst_addr_phys;
MPI2RequestHeader_t *request_hdr;
struct scsi_cmnd *scmd;
struct scatterlist *sg_scmd = NULL;
int is_scsiio_req = 0;
request_hdr = (MPI2RequestHeader_t *) mpi_request;
if (request_hdr->Function == MPI2_FUNCTION_SCSI_IO_REQUEST) {
Mpi25SCSIIORequest_t *scsiio_request =
(Mpi25SCSIIORequest_t *)mpi_request;
sgel = (Mpi2SGESimple32_t *) &scsiio_request->SGL;
is_scsiio_req = 1;
} else if (request_hdr->Function == MPI2_FUNCTION_CONFIG) {
Mpi2ConfigRequest_t *config_req =
(Mpi2ConfigRequest_t *)mpi_request;
sgel = (Mpi2SGESimple32_t *) &config_req->PageBufferSGE;
} else
return;
/* From smid we can get scsi_cmd, once we have sg_scmd,
* we just need to get sg_virt and sg_next to get virtual
* address associated with sgel->Address.
*/
if (is_scsiio_req) {
/* Get scsi_cmd using smid */
scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid);
if (scmd == NULL) {
ioc_err(ioc, "scmd is NULL\n");
return;
}
/* Get sg_scmd from scmd provided */
sg_scmd = scsi_sglist(scmd);
}
/*
* 0 - 255 System register
* 256 - 4352 MPI Frame. (This is based on maxCredit 32)
* 4352 - 4864 Reply_free pool (512 byte is reserved
* considering maxCredit 32. Reply need extra
* room, for mCPU case kept four times of
* maxCredit).
* 4864 - 17152 SGE chain element. (32cmd * 3 chain of
* 128 byte size = 12288)
* 17152 - x Host buffer mapped with smid.
* (Each smid can have 64K Max IO.)
* BAR0+Last 1K MSIX Addr and Data
* Total size in use 2113664 bytes of 4MB BAR0
*/
buffer_iomem = _base_get_buffer_bar0(ioc, smid);
buffer_iomem_phys = _base_get_buffer_phys_bar0(ioc, smid);
buff_ptr = buffer_iomem;
buff_ptr_phys = buffer_iomem_phys;
WARN_ON(buff_ptr_phys > U32_MAX);
if (le32_to_cpu(sgel->FlagsLength) &
(MPI2_SGE_FLAGS_HOST_TO_IOC << MPI2_SGE_FLAGS_SHIFT))
is_write = true;
for (i = 0; i < MPT_MIN_PHYS_SEGMENTS + ioc->facts.MaxChainDepth; i++) {
sgl_flags =
(le32_to_cpu(sgel->FlagsLength) >> MPI2_SGE_FLAGS_SHIFT);
switch (sgl_flags & MPI2_SGE_FLAGS_ELEMENT_MASK) {
case MPI2_SGE_FLAGS_CHAIN_ELEMENT:
/*
* Helper function which on passing
* chain_buffer_dma returns chain_buffer. Get
* the virtual address for sgel->Address
*/
sgel_next =
_base_get_chain_buffer_dma_to_chain_buffer(ioc,
le32_to_cpu(sgel->Address));
if (sgel_next == NULL)
return;
/*
* This is coping 128 byte chain
* frame (not a host buffer)
*/
dst_chain_addr[sge_chain_count] =
_base_get_chain(ioc,
smid, sge_chain_count);
src_chain_addr[sge_chain_count] =
(void *) sgel_next;
dst_addr_phys = _base_get_chain_phys(ioc,
smid, sge_chain_count);
WARN_ON(dst_addr_phys > U32_MAX);
sgel->Address =
cpu_to_le32(lower_32_bits(dst_addr_phys));
sgel = sgel_next;
sge_chain_count++;
break;
case MPI2_SGE_FLAGS_SIMPLE_ELEMENT:
if (is_write) {
if (is_scsiio_req) {
_base_clone_to_sys_mem(buff_ptr,
sg_virt(sg_scmd),
(le32_to_cpu(sgel->FlagsLength) &
0x00ffffff));
/*
* FIXME: this relies on a a zero
* PCI mem_offset.
*/
sgel->Address =
cpu_to_le32((u32)buff_ptr_phys);
} else {
_base_clone_to_sys_mem(buff_ptr,
ioc->config_vaddr,
(le32_to_cpu(sgel->FlagsLength) &
0x00ffffff));
sgel->Address =
cpu_to_le32((u32)buff_ptr_phys);
}
}
buff_ptr += (le32_to_cpu(sgel->FlagsLength) &
0x00ffffff);
buff_ptr_phys += (le32_to_cpu(sgel->FlagsLength) &
0x00ffffff);
if ((le32_to_cpu(sgel->FlagsLength) &
(MPI2_SGE_FLAGS_END_OF_BUFFER
<< MPI2_SGE_FLAGS_SHIFT)))
goto eob_clone_chain;
else {
/*
* Every single element in MPT will have
* associated sg_next. Better to sanity that
* sg_next is not NULL, but it will be a bug
* if it is null.
*/
if (is_scsiio_req) {
sg_scmd = sg_next(sg_scmd);
if (sg_scmd)
sgel++;
else
goto eob_clone_chain;
}
}
break;
}
}
eob_clone_chain:
for (i = 0; i < sge_chain_count; i++) {
if (is_scsiio_req)
_base_clone_to_sys_mem(dst_chain_addr[i],
src_chain_addr[i], ioc->request_sz);
}
}
/**
* mpt3sas_remove_dead_ioc_func - kthread context to remove dead ioc
* @arg: input argument, used to derive ioc
*
* Return:
* 0 if controller is removed from pci subsystem.
* -1 for other case.
*/
static int mpt3sas_remove_dead_ioc_func(void *arg)
{
struct MPT3SAS_ADAPTER *ioc = (struct MPT3SAS_ADAPTER *)arg;
struct pci_dev *pdev;
if (!ioc)
return -1;
pdev = ioc->pdev;
if (!pdev)
return -1;
pci_stop_and_remove_bus_device_locked(pdev);
return 0;
}
/**
* _base_sync_drv_fw_timestamp - Sync Drive-Fw TimeStamp.
* @ioc: Per Adapter Object
*
* Return: nothing.
*/
static void _base_sync_drv_fw_timestamp(struct MPT3SAS_ADAPTER *ioc)
{
Mpi26IoUnitControlRequest_t *mpi_request;
Mpi26IoUnitControlReply_t *mpi_reply;
u16 smid;
ktime_t current_time;
u64 TimeStamp = 0;
u8 issue_reset = 0;
mutex_lock(&ioc->scsih_cmds.mutex);
if (ioc->scsih_cmds.status != MPT3_CMD_NOT_USED) {
ioc_err(ioc, "scsih_cmd in use %s\n", __func__);
goto out;
}
ioc->scsih_cmds.status = MPT3_CMD_PENDING;
smid = mpt3sas_base_get_smid(ioc, ioc->scsih_cb_idx);
if (!smid) {
ioc_err(ioc, "Failed obtaining a smid %s\n", __func__);
ioc->scsih_cmds.status = MPT3_CMD_NOT_USED;
goto out;
}
mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
ioc->scsih_cmds.smid = smid;
memset(mpi_request, 0, sizeof(Mpi26IoUnitControlRequest_t));
mpi_request->Function = MPI2_FUNCTION_IO_UNIT_CONTROL;
mpi_request->Operation = MPI26_CTRL_OP_SET_IOC_PARAMETER;
mpi_request->IOCParameter = MPI26_SET_IOC_PARAMETER_SYNC_TIMESTAMP;
current_time = ktime_get_real();
TimeStamp = ktime_to_ms(current_time);
mpi_request->Reserved7 = cpu_to_le32(TimeStamp >> 32);
mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
init_completion(&ioc->scsih_cmds.done);
ioc->put_smid_default(ioc, smid);
dinitprintk(ioc, ioc_info(ioc,
"Io Unit Control Sync TimeStamp (sending), @time %lld ms\n",
TimeStamp));
wait_for_completion_timeout(&ioc->scsih_cmds.done,
MPT3SAS_TIMESYNC_TIMEOUT_SECONDS*HZ);
if (!(ioc->scsih_cmds.status & MPT3_CMD_COMPLETE)) {
mpt3sas_check_cmd_timeout(ioc,
ioc->scsih_cmds.status, mpi_request,
sizeof(Mpi2SasIoUnitControlRequest_t)/4, issue_reset);
goto issue_host_reset;
}
if (ioc->scsih_cmds.status & MPT3_CMD_REPLY_VALID) {
mpi_reply = ioc->scsih_cmds.reply;
dinitprintk(ioc, ioc_info(ioc,
"Io Unit Control sync timestamp (complete): ioc_status(0x%04x), loginfo(0x%08x)\n",
le16_to_cpu(mpi_reply->IOCStatus),
le32_to_cpu(mpi_reply->IOCLogInfo)));
}
issue_host_reset:
if (issue_reset)
mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER);
ioc->scsih_cmds.status = MPT3_CMD_NOT_USED;
out:
mutex_unlock(&ioc->scsih_cmds.mutex);
}
/**
* _base_fault_reset_work - workq handling ioc fault conditions
* @work: input argument, used to derive ioc
*
* Context: sleep.
*/
static void
_base_fault_reset_work(struct work_struct *work)
{
struct MPT3SAS_ADAPTER *ioc =
container_of(work, struct MPT3SAS_ADAPTER, fault_reset_work.work);
unsigned long flags;
u32 doorbell;
int rc;
struct task_struct *p;
spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
if ((ioc->shost_recovery && (ioc->ioc_coredump_loop == 0)) ||
ioc->pci_error_recovery)
goto rearm_timer;
spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
doorbell = mpt3sas_base_get_iocstate(ioc, 0);
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_MASK) {
ioc_err(ioc, "SAS host is non-operational !!!!\n");
/* It may be possible that EEH recovery can resolve some of
* pci bus failure issues rather removing the dead ioc function
* by considering controller is in a non-operational state. So
* here priority is given to the EEH recovery. If it doesn't
* not resolve this issue, mpt3sas driver will consider this
* controller to non-operational state and remove the dead ioc
* function.
*/
if (ioc->non_operational_loop++ < 5) {
spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock,
flags);
goto rearm_timer;
}
/*
* Call _scsih_flush_pending_cmds callback so that we flush all
* pending commands back to OS. This call is required to avoid
* deadlock at block layer. Dead IOC will fail to do diag reset,
* and this call is safe since dead ioc will never return any
* command back from HW.
*/
mpt3sas_base_pause_mq_polling(ioc);
ioc->schedule_dead_ioc_flush_running_cmds(ioc);
/*
* Set remove_host flag early since kernel thread will
* take some time to execute.
*/
ioc->remove_host = 1;
/*Remove the Dead Host */
p = kthread_run(mpt3sas_remove_dead_ioc_func, ioc,
"%s_dead_ioc_%d", ioc->driver_name, ioc->id);
if (IS_ERR(p))
ioc_err(ioc, "%s: Running mpt3sas_dead_ioc thread failed !!!!\n",
__func__);
else
ioc_err(ioc, "%s: Running mpt3sas_dead_ioc thread success !!!!\n",
__func__);
return; /* don't rearm timer */
}
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_COREDUMP) {
u8 timeout = (ioc->manu_pg11.CoreDumpTOSec) ?
ioc->manu_pg11.CoreDumpTOSec :
MPT3SAS_DEFAULT_COREDUMP_TIMEOUT_SECONDS;
timeout /= (FAULT_POLLING_INTERVAL/1000);
if (ioc->ioc_coredump_loop == 0) {
mpt3sas_print_coredump_info(ioc,
doorbell & MPI2_DOORBELL_DATA_MASK);
/* do not accept any IOs and disable the interrupts */
spin_lock_irqsave(
&ioc->ioc_reset_in_progress_lock, flags);
ioc->shost_recovery = 1;
spin_unlock_irqrestore(
&ioc->ioc_reset_in_progress_lock, flags);
mpt3sas_base_mask_interrupts(ioc);
mpt3sas_base_pause_mq_polling(ioc);
_base_clear_outstanding_commands(ioc);
}
ioc_info(ioc, "%s: CoreDump loop %d.",
__func__, ioc->ioc_coredump_loop);
/* Wait until CoreDump completes or times out */
if (ioc->ioc_coredump_loop++ < timeout) {
spin_lock_irqsave(
&ioc->ioc_reset_in_progress_lock, flags);
goto rearm_timer;
}
}
if (ioc->ioc_coredump_loop) {
if ((doorbell & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_COREDUMP)
ioc_err(ioc, "%s: CoreDump completed. LoopCount: %d",
__func__, ioc->ioc_coredump_loop);
else
ioc_err(ioc, "%s: CoreDump Timed out. LoopCount: %d",
__func__, ioc->ioc_coredump_loop);
ioc->ioc_coredump_loop = MPT3SAS_COREDUMP_LOOP_DONE;
}
ioc->non_operational_loop = 0;
if ((doorbell & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_OPERATIONAL) {
rc = mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER);
ioc_warn(ioc, "%s: hard reset: %s\n",
__func__, rc == 0 ? "success" : "failed");
doorbell = mpt3sas_base_get_iocstate(ioc, 0);
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
mpt3sas_print_fault_code(ioc, doorbell &
MPI2_DOORBELL_DATA_MASK);
} else if ((doorbell & MPI2_IOC_STATE_MASK) ==
MPI2_IOC_STATE_COREDUMP)
mpt3sas_print_coredump_info(ioc, doorbell &
MPI2_DOORBELL_DATA_MASK);
if (rc && (doorbell & MPI2_IOC_STATE_MASK) !=
MPI2_IOC_STATE_OPERATIONAL)
return; /* don't rearm timer */
}
ioc->ioc_coredump_loop = 0;
if (ioc->time_sync_interval &&
++ioc->timestamp_update_count >= ioc->time_sync_interval) {
ioc->timestamp_update_count = 0;
_base_sync_drv_fw_timestamp(ioc);
}
spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
rearm_timer:
if (ioc->fault_reset_work_q)
queue_delayed_work(ioc->fault_reset_work_q,
&ioc->fault_reset_work,
msecs_to_jiffies(FAULT_POLLING_INTERVAL));
spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
}
/**
* mpt3sas_base_start_watchdog - start the fault_reset_work_q
* @ioc: per adapter object
*
* Context: sleep.
*/
void
mpt3sas_base_start_watchdog(struct MPT3SAS_ADAPTER *ioc)
{
unsigned long flags;
if (ioc->fault_reset_work_q)
return;
ioc->timestamp_update_count = 0;
/* initialize fault polling */
INIT_DELAYED_WORK(&ioc->fault_reset_work, _base_fault_reset_work);
snprintf(ioc->fault_reset_work_q_name,
sizeof(ioc->fault_reset_work_q_name), "poll_%s%d_status",
ioc->driver_name, ioc->id);
ioc->fault_reset_work_q =
create_singlethread_workqueue(ioc->fault_reset_work_q_name);
if (!ioc->fault_reset_work_q) {
ioc_err(ioc, "%s: failed (line=%d)\n", __func__, __LINE__);
return;
}
spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
if (ioc->fault_reset_work_q)
queue_delayed_work(ioc->fault_reset_work_q,
&ioc->fault_reset_work,
msecs_to_jiffies(FAULT_POLLING_INTERVAL));
spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
}
/**
* mpt3sas_base_stop_watchdog - stop the fault_reset_work_q
* @ioc: per adapter object
*
* Context: sleep.
*/
void
mpt3sas_base_stop_watchdog(struct MPT3SAS_ADAPTER *ioc)
{
unsigned long flags;
struct workqueue_struct *wq;
spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
wq = ioc->fault_reset_work_q;
ioc->fault_reset_work_q = NULL;
spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
if (wq) {
if (!cancel_delayed_work_sync(&ioc->fault_reset_work))
flush_workqueue(wq);
destroy_workqueue(wq);
}
}
/**
* mpt3sas_base_fault_info - verbose translation of firmware FAULT code
* @ioc: per adapter object
* @fault_code: fault code
*/
void
mpt3sas_base_fault_info(struct MPT3SAS_ADAPTER *ioc , u16 fault_code)
{
ioc_err(ioc, "fault_state(0x%04x)!\n", fault_code);
}
/**
* mpt3sas_base_coredump_info - verbose translation of firmware CoreDump state
* @ioc: per adapter object
* @fault_code: fault code
*
* Return: nothing.
*/
void
mpt3sas_base_coredump_info(struct MPT3SAS_ADAPTER *ioc, u16 fault_code)
{
ioc_err(ioc, "coredump_state(0x%04x)!\n", fault_code);
}
/**
* mpt3sas_base_wait_for_coredump_completion - Wait until coredump
* completes or times out
* @ioc: per adapter object
* @caller: caller function name
*
* Return: 0 for success, non-zero for failure.
*/
int
mpt3sas_base_wait_for_coredump_completion(struct MPT3SAS_ADAPTER *ioc,
const char *caller)
{
u8 timeout = (ioc->manu_pg11.CoreDumpTOSec) ?
ioc->manu_pg11.CoreDumpTOSec :
MPT3SAS_DEFAULT_COREDUMP_TIMEOUT_SECONDS;
int ioc_state = _base_wait_on_iocstate(ioc, MPI2_IOC_STATE_FAULT,
timeout);
if (ioc_state)
ioc_err(ioc,
"%s: CoreDump timed out. (ioc_state=0x%x)\n",
caller, ioc_state);
else
ioc_info(ioc,
"%s: CoreDump completed. (ioc_state=0x%x)\n",
caller, ioc_state);
return ioc_state;
}
/**
* mpt3sas_halt_firmware - halt's mpt controller firmware
* @ioc: per adapter object
*
* For debugging timeout related issues. Writing 0xCOFFEE00
* to the doorbell register will halt controller firmware. With
* the purpose to stop both driver and firmware, the enduser can
* obtain a ring buffer from controller UART.
*/
void
mpt3sas_halt_firmware(struct MPT3SAS_ADAPTER *ioc)
{
u32 doorbell;
if (!ioc->fwfault_debug)
return;
dump_stack();
doorbell = ioc->base_readl(&ioc->chip->Doorbell);
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
mpt3sas_print_fault_code(ioc, doorbell &
MPI2_DOORBELL_DATA_MASK);
} else if ((doorbell & MPI2_IOC_STATE_MASK) ==
MPI2_IOC_STATE_COREDUMP) {
mpt3sas_print_coredump_info(ioc, doorbell &
MPI2_DOORBELL_DATA_MASK);
} else {
writel(0xC0FFEE00, &ioc->chip->Doorbell);
ioc_err(ioc, "Firmware is halted due to command timeout\n");
}
if (ioc->fwfault_debug == 2)
for (;;)
;
else
panic("panic in %s\n", __func__);
}
/**
* _base_sas_ioc_info - verbose translation of the ioc status
* @ioc: per adapter object
* @mpi_reply: reply mf payload returned from firmware
* @request_hdr: request mf
*/
static void
_base_sas_ioc_info(struct MPT3SAS_ADAPTER *ioc, MPI2DefaultReply_t *mpi_reply,
MPI2RequestHeader_t *request_hdr)
{
u16 ioc_status = le16_to_cpu(mpi_reply->IOCStatus) &
MPI2_IOCSTATUS_MASK;
char *desc = NULL;
u16 frame_sz;
char *func_str = NULL;
/* SCSI_IO, RAID_PASS are handled from _scsih_scsi_ioc_info */
if (request_hdr->Function == MPI2_FUNCTION_SCSI_IO_REQUEST ||
request_hdr->Function == MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH ||
request_hdr->Function == MPI2_FUNCTION_EVENT_NOTIFICATION)
return;
if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE)
return;
/*
* Older Firmware version doesn't support driver trigger pages.
* So, skip displaying 'config invalid type' type
* of error message.
*/
if (request_hdr->Function == MPI2_FUNCTION_CONFIG) {
Mpi2ConfigRequest_t *rqst = (Mpi2ConfigRequest_t *)request_hdr;
if ((rqst->ExtPageType ==
MPI2_CONFIG_EXTPAGETYPE_DRIVER_PERSISTENT_TRIGGER) &&
!(ioc->logging_level & MPT_DEBUG_CONFIG)) {
return;
}
}
switch (ioc_status) {
/****************************************************************************
* Common IOCStatus values for all replies
****************************************************************************/
case MPI2_IOCSTATUS_INVALID_FUNCTION:
desc = "invalid function";
break;
case MPI2_IOCSTATUS_BUSY:
desc = "busy";
break;
case MPI2_IOCSTATUS_INVALID_SGL:
desc = "invalid sgl";
break;
case MPI2_IOCSTATUS_INTERNAL_ERROR:
desc = "internal error";
break;
case MPI2_IOCSTATUS_INVALID_VPID:
desc = "invalid vpid";
break;
case MPI2_IOCSTATUS_INSUFFICIENT_RESOURCES:
desc = "insufficient resources";
break;
case MPI2_IOCSTATUS_INSUFFICIENT_POWER:
desc = "insufficient power";
break;
case MPI2_IOCSTATUS_INVALID_FIELD:
desc = "invalid field";
break;
case MPI2_IOCSTATUS_INVALID_STATE:
desc = "invalid state";
break;
case MPI2_IOCSTATUS_OP_STATE_NOT_SUPPORTED:
desc = "op state not supported";
break;
/****************************************************************************
* Config IOCStatus values
****************************************************************************/
case MPI2_IOCSTATUS_CONFIG_INVALID_ACTION:
desc = "config invalid action";
break;
case MPI2_IOCSTATUS_CONFIG_INVALID_TYPE:
desc = "config invalid type";
break;
case MPI2_IOCSTATUS_CONFIG_INVALID_PAGE:
desc = "config invalid page";
break;
case MPI2_IOCSTATUS_CONFIG_INVALID_DATA:
desc = "config invalid data";
break;
case MPI2_IOCSTATUS_CONFIG_NO_DEFAULTS:
desc = "config no defaults";
break;
case MPI2_IOCSTATUS_CONFIG_CANT_COMMIT:
desc = "config cant commit";
break;
/****************************************************************************
* SCSI IO Reply
****************************************************************************/
case MPI2_IOCSTATUS_SCSI_RECOVERED_ERROR:
case MPI2_IOCSTATUS_SCSI_INVALID_DEVHANDLE:
case MPI2_IOCSTATUS_SCSI_DEVICE_NOT_THERE:
case MPI2_IOCSTATUS_SCSI_DATA_OVERRUN:
case MPI2_IOCSTATUS_SCSI_DATA_UNDERRUN:
case MPI2_IOCSTATUS_SCSI_IO_DATA_ERROR:
case MPI2_IOCSTATUS_SCSI_PROTOCOL_ERROR:
case MPI2_IOCSTATUS_SCSI_TASK_TERMINATED:
case MPI2_IOCSTATUS_SCSI_RESIDUAL_MISMATCH:
case MPI2_IOCSTATUS_SCSI_TASK_MGMT_FAILED:
case MPI2_IOCSTATUS_SCSI_IOC_TERMINATED:
case MPI2_IOCSTATUS_SCSI_EXT_TERMINATED:
break;
/****************************************************************************
* For use by SCSI Initiator and SCSI Target end-to-end data protection
****************************************************************************/
case MPI2_IOCSTATUS_EEDP_GUARD_ERROR:
desc = "eedp guard error";
break;
case MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR:
desc = "eedp ref tag error";
break;
case MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR:
desc = "eedp app tag error";
break;
/****************************************************************************
* SCSI Target values
****************************************************************************/
case MPI2_IOCSTATUS_TARGET_INVALID_IO_INDEX:
desc = "target invalid io index";
break;
case MPI2_IOCSTATUS_TARGET_ABORTED:
desc = "target aborted";
break;
case MPI2_IOCSTATUS_TARGET_NO_CONN_RETRYABLE:
desc = "target no conn retryable";
break;
case MPI2_IOCSTATUS_TARGET_NO_CONNECTION:
desc = "target no connection";
break;
case MPI2_IOCSTATUS_TARGET_XFER_COUNT_MISMATCH:
desc = "target xfer count mismatch";
break;
case MPI2_IOCSTATUS_TARGET_DATA_OFFSET_ERROR:
desc = "target data offset error";
break;
case MPI2_IOCSTATUS_TARGET_TOO_MUCH_WRITE_DATA:
desc = "target too much write data";
break;
case MPI2_IOCSTATUS_TARGET_IU_TOO_SHORT:
desc = "target iu too short";
break;
case MPI2_IOCSTATUS_TARGET_ACK_NAK_TIMEOUT:
desc = "target ack nak timeout";
break;
case MPI2_IOCSTATUS_TARGET_NAK_RECEIVED:
desc = "target nak received";
break;
/****************************************************************************
* Serial Attached SCSI values
****************************************************************************/
case MPI2_IOCSTATUS_SAS_SMP_REQUEST_FAILED:
desc = "smp request failed";
break;
case MPI2_IOCSTATUS_SAS_SMP_DATA_OVERRUN:
desc = "smp data overrun";
break;
/****************************************************************************
* Diagnostic Buffer Post / Diagnostic Release values
****************************************************************************/
case MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED:
desc = "diagnostic released";
break;
default:
break;
}
if (!desc)
return;
switch (request_hdr->Function) {
case MPI2_FUNCTION_CONFIG:
frame_sz = sizeof(Mpi2ConfigRequest_t) + ioc->sge_size;
func_str = "config_page";
break;
case MPI2_FUNCTION_SCSI_TASK_MGMT:
frame_sz = sizeof(Mpi2SCSITaskManagementRequest_t);
func_str = "task_mgmt";
break;
case MPI2_FUNCTION_SAS_IO_UNIT_CONTROL:
frame_sz = sizeof(Mpi2SasIoUnitControlRequest_t);
func_str = "sas_iounit_ctl";
break;
case MPI2_FUNCTION_SCSI_ENCLOSURE_PROCESSOR:
frame_sz = sizeof(Mpi2SepRequest_t);
func_str = "enclosure";
break;
case MPI2_FUNCTION_IOC_INIT:
frame_sz = sizeof(Mpi2IOCInitRequest_t);
func_str = "ioc_init";
break;
case MPI2_FUNCTION_PORT_ENABLE:
frame_sz = sizeof(Mpi2PortEnableRequest_t);
func_str = "port_enable";
break;
case MPI2_FUNCTION_SMP_PASSTHROUGH:
frame_sz = sizeof(Mpi2SmpPassthroughRequest_t) + ioc->sge_size;
func_str = "smp_passthru";
break;
case MPI2_FUNCTION_NVME_ENCAPSULATED:
frame_sz = sizeof(Mpi26NVMeEncapsulatedRequest_t) +
ioc->sge_size;
func_str = "nvme_encapsulated";
break;
default:
frame_sz = 32;
func_str = "unknown";
break;
}
ioc_warn(ioc, "ioc_status: %s(0x%04x), request(0x%p),(%s)\n",
desc, ioc_status, request_hdr, func_str);
_debug_dump_mf(request_hdr, frame_sz/4);
}
/**
* _base_display_event_data - verbose translation of firmware asyn events
* @ioc: per adapter object
* @mpi_reply: reply mf payload returned from firmware
*/
static void
_base_display_event_data(struct MPT3SAS_ADAPTER *ioc,
Mpi2EventNotificationReply_t *mpi_reply)
{
char *desc = NULL;
u16 event;
if (!(ioc->logging_level & MPT_DEBUG_EVENTS))
return;
event = le16_to_cpu(mpi_reply->Event);
switch (event) {
case MPI2_EVENT_LOG_DATA:
desc = "Log Data";
break;
case MPI2_EVENT_STATE_CHANGE:
desc = "Status Change";
break;
case MPI2_EVENT_HARD_RESET_RECEIVED:
desc = "Hard Reset Received";
break;
case MPI2_EVENT_EVENT_CHANGE:
desc = "Event Change";
break;
case MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE:
desc = "Device Status Change";
break;
case MPI2_EVENT_IR_OPERATION_STATUS:
if (!ioc->hide_ir_msg)
desc = "IR Operation Status";
break;
case MPI2_EVENT_SAS_DISCOVERY:
{
Mpi2EventDataSasDiscovery_t *event_data =
(Mpi2EventDataSasDiscovery_t *)mpi_reply->EventData;
ioc_info(ioc, "Discovery: (%s)",
event_data->ReasonCode == MPI2_EVENT_SAS_DISC_RC_STARTED ?
"start" : "stop");
if (event_data->DiscoveryStatus)
pr_cont(" discovery_status(0x%08x)",
le32_to_cpu(event_data->DiscoveryStatus));
pr_cont("\n");
return;
}
case MPI2_EVENT_SAS_BROADCAST_PRIMITIVE:
desc = "SAS Broadcast Primitive";
break;
case MPI2_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE:
desc = "SAS Init Device Status Change";
break;
case MPI2_EVENT_SAS_INIT_TABLE_OVERFLOW:
desc = "SAS Init Table Overflow";
break;
case MPI2_EVENT_SAS_TOPOLOGY_CHANGE_LIST:
desc = "SAS Topology Change List";
break;
case MPI2_EVENT_SAS_ENCL_DEVICE_STATUS_CHANGE:
desc = "SAS Enclosure Device Status Change";
break;
case MPI2_EVENT_IR_VOLUME:
if (!ioc->hide_ir_msg)
desc = "IR Volume";
break;
case MPI2_EVENT_IR_PHYSICAL_DISK:
if (!ioc->hide_ir_msg)
desc = "IR Physical Disk";
break;
case MPI2_EVENT_IR_CONFIGURATION_CHANGE_LIST:
if (!ioc->hide_ir_msg)
desc = "IR Configuration Change List";
break;
case MPI2_EVENT_LOG_ENTRY_ADDED:
if (!ioc->hide_ir_msg)
desc = "Log Entry Added";
break;
case MPI2_EVENT_TEMP_THRESHOLD:
desc = "Temperature Threshold";
break;
case MPI2_EVENT_ACTIVE_CABLE_EXCEPTION:
desc = "Cable Event";
break;
case MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR:
desc = "SAS Device Discovery Error";
break;
case MPI2_EVENT_PCIE_DEVICE_STATUS_CHANGE:
desc = "PCIE Device Status Change";
break;
case MPI2_EVENT_PCIE_ENUMERATION:
{
Mpi26EventDataPCIeEnumeration_t *event_data =
(Mpi26EventDataPCIeEnumeration_t *)mpi_reply->EventData;
ioc_info(ioc, "PCIE Enumeration: (%s)",
event_data->ReasonCode == MPI26_EVENT_PCIE_ENUM_RC_STARTED ?
"start" : "stop");
if (event_data->EnumerationStatus)
pr_cont("enumeration_status(0x%08x)",
le32_to_cpu(event_data->EnumerationStatus));
pr_cont("\n");
return;
}
case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST:
desc = "PCIE Topology Change List";
break;
}
if (!desc)
return;
ioc_info(ioc, "%s\n", desc);
}
/**
* _base_sas_log_info - verbose translation of firmware log info
* @ioc: per adapter object
* @log_info: log info
*/
static void
_base_sas_log_info(struct MPT3SAS_ADAPTER *ioc , u32 log_info)
{
union loginfo_type {
u32 loginfo;
struct {
u32 subcode:16;
u32 code:8;
u32 originator:4;
u32 bus_type:4;
} dw;
};
union loginfo_type sas_loginfo;
char *originator_str = NULL;
sas_loginfo.loginfo = log_info;
if (sas_loginfo.dw.bus_type != 3 /*SAS*/)
return;
/* each nexus loss loginfo */
if (log_info == 0x31170000)
return;
/* eat the loginfos associated with task aborts */
if (ioc->ignore_loginfos && (log_info == 0x30050000 || log_info ==
0x31140000 || log_info == 0x31130000))
return;
switch (sas_loginfo.dw.originator) {
case 0:
originator_str = "IOP";
break;
case 1:
originator_str = "PL";
break;
case 2:
if (!ioc->hide_ir_msg)
originator_str = "IR";
else
originator_str = "WarpDrive";
break;
}
ioc_warn(ioc, "log_info(0x%08x): originator(%s), code(0x%02x), sub_code(0x%04x)\n",
log_info,
originator_str, sas_loginfo.dw.code, sas_loginfo.dw.subcode);
}
/**
* _base_display_reply_info - handle reply descriptors depending on IOC Status
* @ioc: per adapter object
* @smid: system request message index
* @msix_index: MSIX table index supplied by the OS
* @reply: reply message frame (lower 32bit addr)
*/
static void
_base_display_reply_info(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
u32 reply)
{
MPI2DefaultReply_t *mpi_reply;
u16 ioc_status;
u32 loginfo = 0;
mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
if (unlikely(!mpi_reply)) {
ioc_err(ioc, "mpi_reply not valid at %s:%d/%s()!\n",
__FILE__, __LINE__, __func__);
return;
}
ioc_status = le16_to_cpu(mpi_reply->IOCStatus);
if ((ioc_status & MPI2_IOCSTATUS_MASK) &&
(ioc->logging_level & MPT_DEBUG_REPLY)) {
_base_sas_ioc_info(ioc , mpi_reply,
mpt3sas_base_get_msg_frame(ioc, smid));
}
if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
loginfo = le32_to_cpu(mpi_reply->IOCLogInfo);
_base_sas_log_info(ioc, loginfo);
}
if (ioc_status || loginfo) {
ioc_status &= MPI2_IOCSTATUS_MASK;
mpt3sas_trigger_mpi(ioc, ioc_status, loginfo);
}
}
/**
* mpt3sas_base_done - base internal command completion routine
* @ioc: per adapter object
* @smid: system request message index
* @msix_index: MSIX table index supplied by the OS
* @reply: reply message frame(lower 32bit addr)
*
* Return:
* 1 meaning mf should be freed from _base_interrupt
* 0 means the mf is freed from this function.
*/
u8
mpt3sas_base_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
u32 reply)
{
MPI2DefaultReply_t *mpi_reply;
mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
if (mpi_reply && mpi_reply->Function == MPI2_FUNCTION_EVENT_ACK)
return mpt3sas_check_for_pending_internal_cmds(ioc, smid);
if (ioc->base_cmds.status == MPT3_CMD_NOT_USED)
return 1;
ioc->base_cmds.status |= MPT3_CMD_COMPLETE;
if (mpi_reply) {
ioc->base_cmds.status |= MPT3_CMD_REPLY_VALID;
memcpy(ioc->base_cmds.reply, mpi_reply, mpi_reply->MsgLength*4);
}
ioc->base_cmds.status &= ~MPT3_CMD_PENDING;
complete(&ioc->base_cmds.done);
return 1;
}
/**
* _base_async_event - main callback handler for firmware asyn events
* @ioc: per adapter object
* @msix_index: MSIX table index supplied by the OS
* @reply: reply message frame(lower 32bit addr)
*
* Return:
* 1 meaning mf should be freed from _base_interrupt
* 0 means the mf is freed from this function.
*/
static u8
_base_async_event(struct MPT3SAS_ADAPTER *ioc, u8 msix_index, u32 reply)
{
Mpi2EventNotificationReply_t *mpi_reply;
Mpi2EventAckRequest_t *ack_request;
u16 smid;
struct _event_ack_list *delayed_event_ack;
mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
if (!mpi_reply)
return 1;
if (mpi_reply->Function != MPI2_FUNCTION_EVENT_NOTIFICATION)
return 1;
_base_display_event_data(ioc, mpi_reply);
if (!(mpi_reply->AckRequired & MPI2_EVENT_NOTIFICATION_ACK_REQUIRED))
goto out;
smid = mpt3sas_base_get_smid(ioc, ioc->base_cb_idx);
if (!smid) {
delayed_event_ack = kzalloc(sizeof(*delayed_event_ack),
GFP_ATOMIC);
if (!delayed_event_ack)
goto out;
INIT_LIST_HEAD(&delayed_event_ack->list);
delayed_event_ack->Event = mpi_reply->Event;
delayed_event_ack->EventContext = mpi_reply->EventContext;
list_add_tail(&delayed_event_ack->list,
&ioc->delayed_event_ack_list);
dewtprintk(ioc,
ioc_info(ioc, "DELAYED: EVENT ACK: event (0x%04x)\n",
le16_to_cpu(mpi_reply->Event)));
goto out;
}
ack_request = mpt3sas_base_get_msg_frame(ioc, smid);
memset(ack_request, 0, sizeof(Mpi2EventAckRequest_t));
ack_request->Function = MPI2_FUNCTION_EVENT_ACK;
ack_request->Event = mpi_reply->Event;
ack_request->EventContext = mpi_reply->EventContext;
ack_request->VF_ID = 0; /* TODO */
ack_request->VP_ID = 0;
ioc->put_smid_default(ioc, smid);
out:
/* scsih callback handler */
mpt3sas_scsih_event_callback(ioc, msix_index, reply);
/* ctl callback handler */
mpt3sas_ctl_event_callback(ioc, msix_index, reply);
return 1;
}
static struct scsiio_tracker *
_get_st_from_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid)
{
struct scsi_cmnd *cmd;
if (WARN_ON(!smid) ||
WARN_ON(smid >= ioc->hi_priority_smid))
return NULL;
cmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid);
if (cmd)
return scsi_cmd_priv(cmd);
return NULL;
}
/**
* _base_get_cb_idx - obtain the callback index
* @ioc: per adapter object
* @smid: system request message index
*
* Return: callback index.
*/
static u8
_base_get_cb_idx(struct MPT3SAS_ADAPTER *ioc, u16 smid)
{
int i;
u16 ctl_smid = ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT + 1;
u8 cb_idx = 0xFF;
if (smid < ioc->hi_priority_smid) {
struct scsiio_tracker *st;
if (smid < ctl_smid) {
st = _get_st_from_smid(ioc, smid);
if (st)
cb_idx = st->cb_idx;
} else if (smid == ctl_smid)
cb_idx = ioc->ctl_cb_idx;
} else if (smid < ioc->internal_smid) {
i = smid - ioc->hi_priority_smid;
cb_idx = ioc->hpr_lookup[i].cb_idx;
} else if (smid <= ioc->hba_queue_depth) {
i = smid - ioc->internal_smid;
cb_idx = ioc->internal_lookup[i].cb_idx;
}
return cb_idx;
}
/**
* mpt3sas_base_pause_mq_polling - pause polling on the mq poll queues
* when driver is flushing out the IOs.
* @ioc: per adapter object
*
* Pause polling on the mq poll (io uring) queues when driver is flushing
* out the IOs. Otherwise we may see the race condition of completing the same
* IO from two paths.
*
* Returns nothing.
*/
void
mpt3sas_base_pause_mq_polling(struct MPT3SAS_ADAPTER *ioc)
{
int iopoll_q_count =
ioc->reply_queue_count - ioc->iopoll_q_start_index;
int qid;
for (qid = 0; qid < iopoll_q_count; qid++)
atomic_set(&ioc->io_uring_poll_queues[qid].pause, 1);
/*
* wait for current poll to complete.
*/
for (qid = 0; qid < iopoll_q_count; qid++) {
while (atomic_read(&ioc->io_uring_poll_queues[qid].busy)) {
cpu_relax();
udelay(500);
}
}
}
/**
* mpt3sas_base_resume_mq_polling - Resume polling on mq poll queues.
* @ioc: per adapter object
*
* Returns nothing.
*/
void
mpt3sas_base_resume_mq_polling(struct MPT3SAS_ADAPTER *ioc)
{
int iopoll_q_count =
ioc->reply_queue_count - ioc->iopoll_q_start_index;
int qid;
for (qid = 0; qid < iopoll_q_count; qid++)
atomic_set(&ioc->io_uring_poll_queues[qid].pause, 0);
}
/**
* mpt3sas_base_mask_interrupts - disable interrupts
* @ioc: per adapter object
*
* Disabling ResetIRQ, Reply and Doorbell Interrupts
*/
void
mpt3sas_base_mask_interrupts(struct MPT3SAS_ADAPTER *ioc)
{
u32 him_register;
ioc->mask_interrupts = 1;
him_register = ioc->base_readl(&ioc->chip->HostInterruptMask);
him_register |= MPI2_HIM_DIM + MPI2_HIM_RIM + MPI2_HIM_RESET_IRQ_MASK;
writel(him_register, &ioc->chip->HostInterruptMask);
ioc->base_readl(&ioc->chip->HostInterruptMask);
}
/**
* mpt3sas_base_unmask_interrupts - enable interrupts
* @ioc: per adapter object
*
* Enabling only Reply Interrupts
*/
void
mpt3sas_base_unmask_interrupts(struct MPT3SAS_ADAPTER *ioc)
{
u32 him_register;
him_register = ioc->base_readl(&ioc->chip->HostInterruptMask);
him_register &= ~MPI2_HIM_RIM;
writel(him_register, &ioc->chip->HostInterruptMask);
ioc->mask_interrupts = 0;
}
union reply_descriptor {
u64 word;
struct {
u32 low;
u32 high;
} u;
};
static u32 base_mod64(u64 dividend, u32 divisor)
{
u32 remainder;
if (!divisor)
pr_err("mpt3sas: DIVISOR is zero, in div fn\n");
remainder = do_div(dividend, divisor);
return remainder;
}
/**
* _base_process_reply_queue - Process reply descriptors from reply
* descriptor post queue.
* @reply_q: per IRQ's reply queue object.
*
* Return: number of reply descriptors processed from reply
* descriptor queue.
*/
static int
_base_process_reply_queue(struct adapter_reply_queue *reply_q)
{
union reply_descriptor rd;
u64 completed_cmds;
u8 request_descript_type;
u16 smid;
u8 cb_idx;
u32 reply;
u8 msix_index = reply_q->msix_index;
struct MPT3SAS_ADAPTER *ioc = reply_q->ioc;
Mpi2ReplyDescriptorsUnion_t *rpf;
u8 rc;
completed_cmds = 0;
if (!atomic_add_unless(&reply_q->busy, 1, 1))
return completed_cmds;
rpf = &reply_q->reply_post_free[reply_q->reply_post_host_index];
request_descript_type = rpf->Default.ReplyFlags
& MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
if (request_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) {
atomic_dec(&reply_q->busy);
return completed_cmds;
}
cb_idx = 0xFF;
do {
rd.word = le64_to_cpu(rpf->Words);
if (rd.u.low == UINT_MAX || rd.u.high == UINT_MAX)
goto out;
reply = 0;
smid = le16_to_cpu(rpf->Default.DescriptorTypeDependent1);
if (request_descript_type ==
MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS ||
request_descript_type ==
MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS ||
request_descript_type ==
MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS) {
cb_idx = _base_get_cb_idx(ioc, smid);
if ((likely(cb_idx < MPT_MAX_CALLBACKS)) &&
(likely(mpt_callbacks[cb_idx] != NULL))) {
rc = mpt_callbacks[cb_idx](ioc, smid,
msix_index, 0);
if (rc)
mpt3sas_base_free_smid(ioc, smid);
}
} else if (request_descript_type ==
MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY) {
reply = le32_to_cpu(
rpf->AddressReply.ReplyFrameAddress);
if (reply > ioc->reply_dma_max_address ||
reply < ioc->reply_dma_min_address)
reply = 0;
if (smid) {
cb_idx = _base_get_cb_idx(ioc, smid);
if ((likely(cb_idx < MPT_MAX_CALLBACKS)) &&
(likely(mpt_callbacks[cb_idx] != NULL))) {
rc = mpt_callbacks[cb_idx](ioc, smid,
msix_index, reply);
if (reply)
_base_display_reply_info(ioc,
smid, msix_index, reply);
if (rc)
mpt3sas_base_free_smid(ioc,
smid);
}
} else {
_base_async_event(ioc, msix_index, reply);
}
/* reply free queue handling */
if (reply) {
ioc->reply_free_host_index =
(ioc->reply_free_host_index ==
(ioc->reply_free_queue_depth - 1)) ?
0 : ioc->reply_free_host_index + 1;
ioc->reply_free[ioc->reply_free_host_index] =
cpu_to_le32(reply);
if (ioc->is_mcpu_endpoint)
_base_clone_reply_to_sys_mem(ioc,
reply,
ioc->reply_free_host_index);
writel(ioc->reply_free_host_index,
&ioc->chip->ReplyFreeHostIndex);
}
}
rpf->Words = cpu_to_le64(ULLONG_MAX);
reply_q->reply_post_host_index =
(reply_q->reply_post_host_index ==
(ioc->reply_post_queue_depth - 1)) ? 0 :
reply_q->reply_post_host_index + 1;
request_descript_type =
reply_q->reply_post_free[reply_q->reply_post_host_index].
Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
completed_cmds++;
/* Update the reply post host index after continuously
* processing the threshold number of Reply Descriptors.
* So that FW can find enough entries to post the Reply
* Descriptors in the reply descriptor post queue.
*/
if (completed_cmds >= ioc->thresh_hold) {
if (ioc->combined_reply_queue) {
writel(reply_q->reply_post_host_index |
((msix_index & 7) <<
MPI2_RPHI_MSIX_INDEX_SHIFT),
ioc->replyPostRegisterIndex[msix_index/8]);
} else {
writel(reply_q->reply_post_host_index |
(msix_index <<
MPI2_RPHI_MSIX_INDEX_SHIFT),
&ioc->chip->ReplyPostHostIndex);
}
if (!reply_q->is_iouring_poll_q &&
!reply_q->irq_poll_scheduled) {
reply_q->irq_poll_scheduled = true;
irq_poll_sched(&reply_q->irqpoll);
}
atomic_dec(&reply_q->busy);
return completed_cmds;
}
if (request_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED)
goto out;
if (!reply_q->reply_post_host_index)
rpf = reply_q->reply_post_free;
else
rpf++;
} while (1);
out:
if (!completed_cmds) {
atomic_dec(&reply_q->busy);
return completed_cmds;
}
if (ioc->is_warpdrive) {
writel(reply_q->reply_post_host_index,
ioc->reply_post_host_index[msix_index]);
atomic_dec(&reply_q->busy);
return completed_cmds;
}
/* Update Reply Post Host Index.
* For those HBA's which support combined reply queue feature
* 1. Get the correct Supplemental Reply Post Host Index Register.
* i.e. (msix_index / 8)th entry from Supplemental Reply Post Host
* Index Register address bank i.e replyPostRegisterIndex[],
* 2. Then update this register with new reply host index value
* in ReplyPostIndex field and the MSIxIndex field with
* msix_index value reduced to a value between 0 and 7,
* using a modulo 8 operation. Since each Supplemental Reply Post
* Host Index Register supports 8 MSI-X vectors.
*
* For other HBA's just update the Reply Post Host Index register with
* new reply host index value in ReplyPostIndex Field and msix_index
* value in MSIxIndex field.
*/
if (ioc->combined_reply_queue)
writel(reply_q->reply_post_host_index | ((msix_index & 7) <<
MPI2_RPHI_MSIX_INDEX_SHIFT),
ioc->replyPostRegisterIndex[msix_index/8]);
else
writel(reply_q->reply_post_host_index | (msix_index <<
MPI2_RPHI_MSIX_INDEX_SHIFT),
&ioc->chip->ReplyPostHostIndex);
atomic_dec(&reply_q->busy);
return completed_cmds;
}
/**
* mpt3sas_blk_mq_poll - poll the blk mq poll queue
* @shost: Scsi_Host object
* @queue_num: hw ctx queue number
*
* Return number of entries that has been processed from poll queue.
*/
int mpt3sas_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
{
struct MPT3SAS_ADAPTER *ioc =
(struct MPT3SAS_ADAPTER *)shost->hostdata;
struct adapter_reply_queue *reply_q;
int num_entries = 0;
int qid = queue_num - ioc->iopoll_q_start_index;
if (atomic_read(&ioc->io_uring_poll_queues[qid].pause) ||
!atomic_add_unless(&ioc->io_uring_poll_queues[qid].busy, 1, 1))
return 0;
reply_q = ioc->io_uring_poll_queues[qid].reply_q;
num_entries = _base_process_reply_queue(reply_q);
atomic_dec(&ioc->io_uring_poll_queues[qid].busy);
return num_entries;
}
/**
* _base_interrupt - MPT adapter (IOC) specific interrupt handler.
* @irq: irq number (not used)
* @bus_id: bus identifier cookie == pointer to MPT_ADAPTER structure
*
* Return: IRQ_HANDLED if processed, else IRQ_NONE.
*/
static irqreturn_t
_base_interrupt(int irq, void *bus_id)
{
struct adapter_reply_queue *reply_q = bus_id;
struct MPT3SAS_ADAPTER *ioc = reply_q->ioc;
if (ioc->mask_interrupts)
return IRQ_NONE;
if (reply_q->irq_poll_scheduled)
return IRQ_HANDLED;
return ((_base_process_reply_queue(reply_q) > 0) ?
IRQ_HANDLED : IRQ_NONE);
}
/**
* _base_irqpoll - IRQ poll callback handler
* @irqpoll: irq_poll object
* @budget: irq poll weight
*
* Return: number of reply descriptors processed
*/
static int
_base_irqpoll(struct irq_poll *irqpoll, int budget)
{
struct adapter_reply_queue *reply_q;
int num_entries = 0;
reply_q = container_of(irqpoll, struct adapter_reply_queue,
irqpoll);
if (reply_q->irq_line_enable) {
disable_irq_nosync(reply_q->os_irq);
reply_q->irq_line_enable = false;
}
num_entries = _base_process_reply_queue(reply_q);
if (num_entries < budget) {
irq_poll_complete(irqpoll);
reply_q->irq_poll_scheduled = false;
reply_q->irq_line_enable = true;
enable_irq(reply_q->os_irq);
/*
* Go for one more round of processing the
* reply descriptor post queue in case the HBA
* Firmware has posted some reply descriptors
* while reenabling the IRQ.
*/
_base_process_reply_queue(reply_q);
}
return num_entries;
}
/**
* _base_init_irqpolls - initliaze IRQ polls
* @ioc: per adapter object
*
* Return: nothing
*/
static void
_base_init_irqpolls(struct MPT3SAS_ADAPTER *ioc)
{
struct adapter_reply_queue *reply_q, *next;
if (list_empty(&ioc->reply_queue_list))
return;
list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
if (reply_q->is_iouring_poll_q)
continue;
irq_poll_init(&reply_q->irqpoll,
ioc->hba_queue_depth/4, _base_irqpoll);
reply_q->irq_poll_scheduled = false;
reply_q->irq_line_enable = true;
reply_q->os_irq = pci_irq_vector(ioc->pdev,
reply_q->msix_index);
}
}
/**
* _base_is_controller_msix_enabled - is controller support muli-reply queues
* @ioc: per adapter object
*
* Return: Whether or not MSI/X is enabled.
*/
static inline int
_base_is_controller_msix_enabled(struct MPT3SAS_ADAPTER *ioc)
{
return (ioc->facts.IOCCapabilities &
MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX) && ioc->msix_enable;
}
/**
* mpt3sas_base_sync_reply_irqs - flush pending MSIX interrupts
* @ioc: per adapter object
* @poll: poll over reply descriptor pools incase interrupt for
* timed-out SCSI command got delayed
* Context: non-ISR context
*
* Called when a Task Management request has completed.
*/
void
mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc, u8 poll)
{
struct adapter_reply_queue *reply_q;
/* If MSIX capability is turned off
* then multi-queues are not enabled
*/
if (!_base_is_controller_msix_enabled(ioc))
return;
list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
if (ioc->shost_recovery || ioc->remove_host ||
ioc->pci_error_recovery)
return;
/* TMs are on msix_index == 0 */
if (reply_q->msix_index == 0)
continue;
if (reply_q->is_iouring_poll_q) {
_base_process_reply_queue(reply_q);
continue;
}
synchronize_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index));
if (reply_q->irq_poll_scheduled) {
/* Calling irq_poll_disable will wait for any pending
* callbacks to have completed.
*/
irq_poll_disable(&reply_q->irqpoll);
irq_poll_enable(&reply_q->irqpoll);
/* check how the scheduled poll has ended,
* clean up only if necessary
*/
if (reply_q->irq_poll_scheduled) {
reply_q->irq_poll_scheduled = false;
reply_q->irq_line_enable = true;
enable_irq(reply_q->os_irq);
}
}
}
if (poll)
_base_process_reply_queue(reply_q);
}
/**
* mpt3sas_base_release_callback_handler - clear interrupt callback handler
* @cb_idx: callback index
*/
void
mpt3sas_base_release_callback_handler(u8 cb_idx)
{
mpt_callbacks[cb_idx] = NULL;
}
/**
* mpt3sas_base_register_callback_handler - obtain index for the interrupt callback handler
* @cb_func: callback function
*
* Return: Index of @cb_func.
*/
u8
mpt3sas_base_register_callback_handler(MPT_CALLBACK cb_func)
{
u8 cb_idx;
for (cb_idx = MPT_MAX_CALLBACKS-1; cb_idx; cb_idx--)
if (mpt_callbacks[cb_idx] == NULL)
break;
mpt_callbacks[cb_idx] = cb_func;
return cb_idx;
}
/**
* mpt3sas_base_initialize_callback_handler - initialize the interrupt callback handler
*/
void
mpt3sas_base_initialize_callback_handler(void)
{
u8 cb_idx;
for (cb_idx = 0; cb_idx < MPT_MAX_CALLBACKS; cb_idx++)
mpt3sas_base_release_callback_handler(cb_idx);
}
/**
* _base_build_zero_len_sge - build zero length sg entry
* @ioc: per adapter object
* @paddr: virtual address for SGE
*
* Create a zero length scatter gather entry to insure the IOCs hardware has
* something to use if the target device goes brain dead and tries
* to send data even when none is asked for.
*/
static void
_base_build_zero_len_sge(struct MPT3SAS_ADAPTER *ioc, void *paddr)
{
u32 flags_length = (u32)((MPI2_SGE_FLAGS_LAST_ELEMENT |
MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_END_OF_LIST |
MPI2_SGE_FLAGS_SIMPLE_ELEMENT) <<
MPI2_SGE_FLAGS_SHIFT);
ioc->base_add_sg_single(paddr, flags_length, -1);
}
/**
* _base_add_sg_single_32 - Place a simple 32 bit SGE at address pAddr.
* @paddr: virtual address for SGE
* @flags_length: SGE flags and data transfer length
* @dma_addr: Physical address
*/
static void
_base_add_sg_single_32(void *paddr, u32 flags_length, dma_addr_t dma_addr)
{
Mpi2SGESimple32_t *sgel = paddr;
flags_length |= (MPI2_SGE_FLAGS_32_BIT_ADDRESSING |
MPI2_SGE_FLAGS_SYSTEM_ADDRESS) << MPI2_SGE_FLAGS_SHIFT;
sgel->FlagsLength = cpu_to_le32(flags_length);
sgel->Address = cpu_to_le32(dma_addr);
}
/**
* _base_add_sg_single_64 - Place a simple 64 bit SGE at address pAddr.
* @paddr: virtual address for SGE
* @flags_length: SGE flags and data transfer length
* @dma_addr: Physical address
*/
static void
_base_add_sg_single_64(void *paddr, u32 flags_length, dma_addr_t dma_addr)
{
Mpi2SGESimple64_t *sgel = paddr;
flags_length |= (MPI2_SGE_FLAGS_64_BIT_ADDRESSING |
MPI2_SGE_FLAGS_SYSTEM_ADDRESS) << MPI2_SGE_FLAGS_SHIFT;
sgel->FlagsLength = cpu_to_le32(flags_length);
sgel->Address = cpu_to_le64(dma_addr);
}
/**
* _base_get_chain_buffer_tracker - obtain chain tracker
* @ioc: per adapter object
* @scmd: SCSI commands of the IO request
*
* Return: chain tracker from chain_lookup table using key as
* smid and smid's chain_offset.
*/
static struct chain_tracker *
_base_get_chain_buffer_tracker(struct MPT3SAS_ADAPTER *ioc,
struct scsi_cmnd *scmd)
{
struct chain_tracker *chain_req;
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
u16 smid = st->smid;
u8 chain_offset =
atomic_read(&ioc->chain_lookup[smid - 1].chain_offset);
if (chain_offset == ioc->chains_needed_per_io)
return NULL;
chain_req = &ioc->chain_lookup[smid - 1].chains_per_smid[chain_offset];
atomic_inc(&ioc->chain_lookup[smid - 1].chain_offset);
return chain_req;
}
/**
* _base_build_sg - build generic sg
* @ioc: per adapter object
* @psge: virtual address for SGE
* @data_out_dma: physical address for WRITES
* @data_out_sz: data xfer size for WRITES
* @data_in_dma: physical address for READS
* @data_in_sz: data xfer size for READS
*/
static void
_base_build_sg(struct MPT3SAS_ADAPTER *ioc, void *psge,
dma_addr_t data_out_dma, size_t data_out_sz, dma_addr_t data_in_dma,
size_t data_in_sz)
{
u32 sgl_flags;
if (!data_out_sz && !data_in_sz) {
_base_build_zero_len_sge(ioc, psge);
return;
}
if (data_out_sz && data_in_sz) {
/* WRITE sgel first */
sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
ioc->base_add_sg_single(psge, sgl_flags |
data_out_sz, data_out_dma);
/* incr sgel */
psge += ioc->sge_size;
/* READ sgel last */
sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
MPI2_SGE_FLAGS_END_OF_LIST);
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
ioc->base_add_sg_single(psge, sgl_flags |
data_in_sz, data_in_dma);
} else if (data_out_sz) /* WRITE */ {
sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
MPI2_SGE_FLAGS_END_OF_LIST | MPI2_SGE_FLAGS_HOST_TO_IOC);
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
ioc->base_add_sg_single(psge, sgl_flags |
data_out_sz, data_out_dma);
} else if (data_in_sz) /* READ */ {
sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
MPI2_SGE_FLAGS_END_OF_LIST);
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
ioc->base_add_sg_single(psge, sgl_flags |
data_in_sz, data_in_dma);
}
}
/* IEEE format sgls */
/**
* _base_build_nvme_prp - This function is called for NVMe end devices to build
* a native SGL (NVMe PRP).
* @ioc: per adapter object
* @smid: system request message index for getting asscociated SGL
* @nvme_encap_request: the NVMe request msg frame pointer
* @data_out_dma: physical address for WRITES
* @data_out_sz: data xfer size for WRITES
* @data_in_dma: physical address for READS
* @data_in_sz: data xfer size for READS
*
* The native SGL is built starting in the first PRP
* entry of the NVMe message (PRP1). If the data buffer is small enough to be
* described entirely using PRP1, then PRP2 is not used. If needed, PRP2 is
* used to describe a larger data buffer. If the data buffer is too large to
* describe using the two PRP entriess inside the NVMe message, then PRP1
* describes the first data memory segment, and PRP2 contains a pointer to a PRP
* list located elsewhere in memory to describe the remaining data memory
* segments. The PRP list will be contiguous.
*
* The native SGL for NVMe devices is a Physical Region Page (PRP). A PRP
* consists of a list of PRP entries to describe a number of noncontigous
* physical memory segments as a single memory buffer, just as a SGL does. Note
* however, that this function is only used by the IOCTL call, so the memory
* given will be guaranteed to be contiguous. There is no need to translate
* non-contiguous SGL into a PRP in this case. All PRPs will describe
* contiguous space that is one page size each.
*
* Each NVMe message contains two PRP entries. The first (PRP1) either contains
* a PRP list pointer or a PRP element, depending upon the command. PRP2
* contains the second PRP element if the memory being described fits within 2
* PRP entries, or a PRP list pointer if the PRP spans more than two entries.
*
* A PRP list pointer contains the address of a PRP list, structured as a linear
* array of PRP entries. Each PRP entry in this list describes a segment of
* physical memory.
*
* Each 64-bit PRP entry comprises an address and an offset field. The address
* always points at the beginning of a 4KB physical memory page, and the offset
* describes where within that 4KB page the memory segment begins. Only the
* first element in a PRP list may contain a non-zero offset, implying that all
* memory segments following the first begin at the start of a 4KB page.
*
* Each PRP element normally describes 4KB of physical memory, with exceptions
* for the first and last elements in the list. If the memory being described
* by the list begins at a non-zero offset within the first 4KB page, then the
* first PRP element will contain a non-zero offset indicating where the region
* begins within the 4KB page. The last memory segment may end before the end
* of the 4KB segment, depending upon the overall size of the memory being
* described by the PRP list.
*
* Since PRP entries lack any indication of size, the overall data buffer length
* is used to determine where the end of the data memory buffer is located, and
* how many PRP entries are required to describe it.
*/
static void
_base_build_nvme_prp(struct MPT3SAS_ADAPTER *ioc, u16 smid,
Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request,
dma_addr_t data_out_dma, size_t data_out_sz, dma_addr_t data_in_dma,
size_t data_in_sz)
{
int prp_size = NVME_PRP_SIZE;
__le64 *prp_entry, *prp1_entry, *prp2_entry;
__le64 *prp_page;
dma_addr_t prp_entry_dma, prp_page_dma, dma_addr;
u32 offset, entry_len;
u32 page_mask_result, page_mask;
size_t length;
struct mpt3sas_nvme_cmd *nvme_cmd =
(void *)nvme_encap_request->NVMe_Command;
/*
* Not all commands require a data transfer. If no data, just return
* without constructing any PRP.
*/
if (!data_in_sz && !data_out_sz)
return;
prp1_entry = &nvme_cmd->prp1;
prp2_entry = &nvme_cmd->prp2;
prp_entry = prp1_entry;
/*
* For the PRP entries, use the specially allocated buffer of
* contiguous memory.
*/
prp_page = (__le64 *)mpt3sas_base_get_pcie_sgl(ioc, smid);
prp_page_dma = mpt3sas_base_get_pcie_sgl_dma(ioc, smid);
/*
* Check if we are within 1 entry of a page boundary we don't
* want our first entry to be a PRP List entry.
*/
page_mask = ioc->page_size - 1;
page_mask_result = (uintptr_t)((u8 *)prp_page + prp_size) & page_mask;
if (!page_mask_result) {
/* Bump up to next page boundary. */
prp_page = (__le64 *)((u8 *)prp_page + prp_size);
prp_page_dma = prp_page_dma + prp_size;
}
/*
* Set PRP physical pointer, which initially points to the current PRP
* DMA memory page.
*/
prp_entry_dma = prp_page_dma;
/* Get physical address and length of the data buffer. */
if (data_in_sz) {
dma_addr = data_in_dma;
length = data_in_sz;
} else {
dma_addr = data_out_dma;
length = data_out_sz;
}
/* Loop while the length is not zero. */
while (length) {
/*
* Check if we need to put a list pointer here if we are at
* page boundary - prp_size (8 bytes).
*/
page_mask_result = (prp_entry_dma + prp_size) & page_mask;
if (!page_mask_result) {
/*
* This is the last entry in a PRP List, so we need to
* put a PRP list pointer here. What this does is:
* - bump the current memory pointer to the next
* address, which will be the next full page.
* - set the PRP Entry to point to that page. This
* is now the PRP List pointer.
* - bump the PRP Entry pointer the start of the
* next page. Since all of this PRP memory is
* contiguous, no need to get a new page - it's
* just the next address.
*/
prp_entry_dma++;
*prp_entry = cpu_to_le64(prp_entry_dma);
prp_entry++;
}
/* Need to handle if entry will be part of a page. */
offset = dma_addr & page_mask;
entry_len = ioc->page_size - offset;
if (prp_entry == prp1_entry) {
/*
* Must fill in the first PRP pointer (PRP1) before
* moving on.
*/
*prp1_entry = cpu_to_le64(dma_addr);
/*
* Now point to the second PRP entry within the
* command (PRP2).
*/
prp_entry = prp2_entry;
} else if (prp_entry == prp2_entry) {
/*
* Should the PRP2 entry be a PRP List pointer or just
* a regular PRP pointer? If there is more than one
* more page of data, must use a PRP List pointer.
*/
if (length > ioc->page_size) {
/*
* PRP2 will contain a PRP List pointer because
* more PRP's are needed with this command. The
* list will start at the beginning of the
* contiguous buffer.
*/
*prp2_entry = cpu_to_le64(prp_entry_dma);
/*
* The next PRP Entry will be the start of the
* first PRP List.
*/
prp_entry = prp_page;
} else {
/*
* After this, the PRP Entries are complete.
* This command uses 2 PRP's and no PRP list.
*/
*prp2_entry = cpu_to_le64(dma_addr);
}
} else {
/*
* Put entry in list and bump the addresses.
*
* After PRP1 and PRP2 are filled in, this will fill in
* all remaining PRP entries in a PRP List, one per
* each time through the loop.
*/
*prp_entry = cpu_to_le64(dma_addr);
prp_entry++;
prp_entry_dma++;
}
/*
* Bump the phys address of the command's data buffer by the
* entry_len.
*/
dma_addr += entry_len;
/* Decrement length accounting for last partial page. */
if (entry_len > length)
length = 0;
else
length -= entry_len;
}
}
/**
* base_make_prp_nvme - Prepare PRPs (Physical Region Page) -
* SGLs specific to NVMe drives only
*
* @ioc: per adapter object
* @scmd: SCSI command from the mid-layer
* @mpi_request: mpi request
* @smid: msg Index
* @sge_count: scatter gather element count.
*
* Return: true: PRPs are built
* false: IEEE SGLs needs to be built
*/
static void
base_make_prp_nvme(struct MPT3SAS_ADAPTER *ioc,
struct scsi_cmnd *scmd,
Mpi25SCSIIORequest_t *mpi_request,
u16 smid, int sge_count)
{
int sge_len, num_prp_in_chain = 0;
Mpi25IeeeSgeChain64_t *main_chain_element, *ptr_first_sgl;
__le64 *curr_buff;
dma_addr_t msg_dma, sge_addr, offset;
u32 page_mask, page_mask_result;
struct scatterlist *sg_scmd;
u32 first_prp_len;
int data_len = scsi_bufflen(scmd);
u32 nvme_pg_size;
nvme_pg_size = max_t(u32, ioc->page_size, NVME_PRP_PAGE_SIZE);
/*
* Nvme has a very convoluted prp format. One prp is required
* for each page or partial page. Driver need to split up OS sg_list
* entries if it is longer than one page or cross a page
* boundary. Driver also have to insert a PRP list pointer entry as
* the last entry in each physical page of the PRP list.
*
* NOTE: The first PRP "entry" is actually placed in the first
* SGL entry in the main message as IEEE 64 format. The 2nd
* entry in the main message is the chain element, and the rest
* of the PRP entries are built in the contiguous pcie buffer.
*/
page_mask = nvme_pg_size - 1;
/*
* Native SGL is needed.
* Put a chain element in main message frame that points to the first
* chain buffer.
*
* NOTE: The ChainOffset field must be 0 when using a chain pointer to
* a native SGL.
*/
/* Set main message chain element pointer */
main_chain_element = (pMpi25IeeeSgeChain64_t)&mpi_request->SGL;
/*
* For NVMe the chain element needs to be the 2nd SG entry in the main
* message.
*/
main_chain_element = (Mpi25IeeeSgeChain64_t *)
((u8 *)main_chain_element + sizeof(MPI25_IEEE_SGE_CHAIN64));
/*
* For the PRP entries, use the specially allocated buffer of
* contiguous memory. Normal chain buffers can't be used
* because each chain buffer would need to be the size of an OS
* page (4k).
*/
curr_buff = mpt3sas_base_get_pcie_sgl(ioc, smid);
msg_dma = mpt3sas_base_get_pcie_sgl_dma(ioc, smid);
main_chain_element->Address = cpu_to_le64(msg_dma);
main_chain_element->NextChainOffset = 0;
main_chain_element->Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR |
MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP;
/* Build first prp, sge need not to be page aligned*/
ptr_first_sgl = (pMpi25IeeeSgeChain64_t)&mpi_request->SGL;
sg_scmd = scsi_sglist(scmd);
sge_addr = sg_dma_address(sg_scmd);
sge_len = sg_dma_len(sg_scmd);
offset = sge_addr & page_mask;
first_prp_len = nvme_pg_size - offset;
ptr_first_sgl->Address = cpu_to_le64(sge_addr);
ptr_first_sgl->Length = cpu_to_le32(first_prp_len);
data_len -= first_prp_len;
if (sge_len > first_prp_len) {
sge_addr += first_prp_len;
sge_len -= first_prp_len;
} else if (data_len && (sge_len == first_prp_len)) {
sg_scmd = sg_next(sg_scmd);
sge_addr = sg_dma_address(sg_scmd);
sge_len = sg_dma_len(sg_scmd);
}
for (;;) {
offset = sge_addr & page_mask;
/* Put PRP pointer due to page boundary*/
page_mask_result = (uintptr_t)(curr_buff + 1) & page_mask;
if (unlikely(!page_mask_result)) {
scmd_printk(KERN_NOTICE,
scmd, "page boundary curr_buff: 0x%p\n",
curr_buff);
msg_dma += 8;
*curr_buff = cpu_to_le64(msg_dma);
curr_buff++;
num_prp_in_chain++;
}
*curr_buff = cpu_to_le64(sge_addr);
curr_buff++;
msg_dma += 8;
num_prp_in_chain++;
sge_addr += nvme_pg_size;
sge_len -= nvme_pg_size;
data_len -= nvme_pg_size;
if (data_len <= 0)
break;
if (sge_len > 0)
continue;
sg_scmd = sg_next(sg_scmd);
sge_addr = sg_dma_address(sg_scmd);
sge_len = sg_dma_len(sg_scmd);
}
main_chain_element->Length =
cpu_to_le32(num_prp_in_chain * sizeof(u64));
return;
}
static bool
base_is_prp_possible(struct MPT3SAS_ADAPTER *ioc,
struct _pcie_device *pcie_device, struct scsi_cmnd *scmd, int sge_count)
{
u32 data_length = 0;
bool build_prp = true;
data_length = scsi_bufflen(scmd);
if (pcie_device &&
(mpt3sas_scsih_is_pcie_scsi_device(pcie_device->device_info))) {
build_prp = false;
return build_prp;
}
/* If Datalenth is <= 16K and number of SGE’s entries are <= 2
* we built IEEE SGL
*/
if ((data_length <= NVME_PRP_PAGE_SIZE*4) && (sge_count <= 2))
build_prp = false;
return build_prp;
}
/**
* _base_check_pcie_native_sgl - This function is called for PCIe end devices to
* determine if the driver needs to build a native SGL. If so, that native
* SGL is built in the special contiguous buffers allocated especially for
* PCIe SGL creation. If the driver will not build a native SGL, return
* TRUE and a normal IEEE SGL will be built. Currently this routine
* supports NVMe.
* @ioc: per adapter object
* @mpi_request: mf request pointer
* @smid: system request message index
* @scmd: scsi command
* @pcie_device: points to the PCIe device's info
*
* Return: 0 if native SGL was built, 1 if no SGL was built
*/
static int
_base_check_pcie_native_sgl(struct MPT3SAS_ADAPTER *ioc,
Mpi25SCSIIORequest_t *mpi_request, u16 smid, struct scsi_cmnd *scmd,
struct _pcie_device *pcie_device)
{
int sges_left;
/* Get the SG list pointer and info. */
sges_left = scsi_dma_map(scmd);
if (sges_left < 0) {
sdev_printk(KERN_ERR, scmd->device,
"scsi_dma_map failed: request for %d bytes!\n",
scsi_bufflen(scmd));
return 1;
}
/* Check if we need to build a native SG list. */
if (!base_is_prp_possible(ioc, pcie_device,
scmd, sges_left)) {
/* We built a native SG list, just return. */
goto out;
}
/*
* Build native NVMe PRP.
*/
base_make_prp_nvme(ioc, scmd, mpi_request,
smid, sges_left);
return 0;
out:
scsi_dma_unmap(scmd);
return 1;
}
/**
* _base_add_sg_single_ieee - add sg element for IEEE format
* @paddr: virtual address for SGE
* @flags: SGE flags
* @chain_offset: number of 128 byte elements from start of segment
* @length: data transfer length
* @dma_addr: Physical address
*/
static void
_base_add_sg_single_ieee(void *paddr, u8 flags, u8 chain_offset, u32 length,
dma_addr_t dma_addr)
{
Mpi25IeeeSgeChain64_t *sgel = paddr;
sgel->Flags = flags;
sgel->NextChainOffset = chain_offset;
sgel->Length = cpu_to_le32(length);
sgel->Address = cpu_to_le64(dma_addr);
}
/**
* _base_build_zero_len_sge_ieee - build zero length sg entry for IEEE format
* @ioc: per adapter object
* @paddr: virtual address for SGE
*
* Create a zero length scatter gather entry to insure the IOCs hardware has
* something to use if the target device goes brain dead and tries
* to send data even when none is asked for.
*/
static void
_base_build_zero_len_sge_ieee(struct MPT3SAS_ADAPTER *ioc, void *paddr)
{
u8 sgl_flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR |
MPI25_IEEE_SGE_FLAGS_END_OF_LIST);
_base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1);
}
/**
* _base_build_sg_scmd - main sg creation routine
* pcie_device is unused here!
* @ioc: per adapter object
* @scmd: scsi command
* @smid: system request message index
* @unused: unused pcie_device pointer
* Context: none.
*
* The main routine that builds scatter gather table from a given
* scsi request sent via the .queuecommand main handler.
*
* Return: 0 success, anything else error
*/
static int
_base_build_sg_scmd(struct MPT3SAS_ADAPTER *ioc,
struct scsi_cmnd *scmd, u16 smid, struct _pcie_device *unused)
{
Mpi2SCSIIORequest_t *mpi_request;
dma_addr_t chain_dma;
struct scatterlist *sg_scmd;
void *sg_local, *chain;
u32 chain_offset;
u32 chain_length;
u32 chain_flags;
int sges_left;
u32 sges_in_segment;
u32 sgl_flags;
u32 sgl_flags_last_element;
u32 sgl_flags_end_buffer;
struct chain_tracker *chain_req;
mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
/* init scatter gather flags */
sgl_flags = MPI2_SGE_FLAGS_SIMPLE_ELEMENT;
if (scmd->sc_data_direction == DMA_TO_DEVICE)
sgl_flags |= MPI2_SGE_FLAGS_HOST_TO_IOC;
sgl_flags_last_element = (sgl_flags | MPI2_SGE_FLAGS_LAST_ELEMENT)
<< MPI2_SGE_FLAGS_SHIFT;
sgl_flags_end_buffer = (sgl_flags | MPI2_SGE_FLAGS_LAST_ELEMENT |
MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_END_OF_LIST)
<< MPI2_SGE_FLAGS_SHIFT;
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
sg_scmd = scsi_sglist(scmd);
sges_left = scsi_dma_map(scmd);
if (sges_left < 0) {
sdev_printk(KERN_ERR, scmd->device,
"scsi_dma_map failed: request for %d bytes!\n",
scsi_bufflen(scmd));
return -ENOMEM;
}
sg_local = &mpi_request->SGL;
sges_in_segment = ioc->max_sges_in_main_message;
if (sges_left <= sges_in_segment)
goto fill_in_last_segment;
mpi_request->ChainOffset = (offsetof(Mpi2SCSIIORequest_t, SGL) +
(sges_in_segment * ioc->sge_size))/4;
/* fill in main message segment when there is a chain following */
while (sges_in_segment) {
if (sges_in_segment == 1)
ioc->base_add_sg_single(sg_local,
sgl_flags_last_element | sg_dma_len(sg_scmd),
sg_dma_address(sg_scmd));
else
ioc->base_add_sg_single(sg_local, sgl_flags |
sg_dma_len(sg_scmd), sg_dma_address(sg_scmd));
sg_scmd = sg_next(sg_scmd);
sg_local += ioc->sge_size;
sges_left--;
sges_in_segment--;
}
/* initializing the chain flags and pointers */
chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT;
chain_req = _base_get_chain_buffer_tracker(ioc, scmd);
if (!chain_req)
return -1;
chain = chain_req->chain_buffer;
chain_dma = chain_req->chain_buffer_dma;
do {
sges_in_segment = (sges_left <=
ioc->max_sges_in_chain_message) ? sges_left :
ioc->max_sges_in_chain_message;
chain_offset = (sges_left == sges_in_segment) ?
0 : (sges_in_segment * ioc->sge_size)/4;
chain_length = sges_in_segment * ioc->sge_size;
if (chain_offset) {
chain_offset = chain_offset <<
MPI2_SGE_CHAIN_OFFSET_SHIFT;
chain_length += ioc->sge_size;
}
ioc->base_add_sg_single(sg_local, chain_flags | chain_offset |
chain_length, chain_dma);
sg_local = chain;
if (!chain_offset)
goto fill_in_last_segment;
/* fill in chain segments */
while (sges_in_segment) {
if (sges_in_segment == 1)
ioc->base_add_sg_single(sg_local,
sgl_flags_last_element |
sg_dma_len(sg_scmd),
sg_dma_address(sg_scmd));
else
ioc->base_add_sg_single(sg_local, sgl_flags |
sg_dma_len(sg_scmd),
sg_dma_address(sg_scmd));
sg_scmd = sg_next(sg_scmd);
sg_local += ioc->sge_size;
sges_left--;
sges_in_segment--;
}
chain_req = _base_get_chain_buffer_tracker(ioc, scmd);
if (!chain_req)
return -1;
chain = chain_req->chain_buffer;
chain_dma = chain_req->chain_buffer_dma;
} while (1);
fill_in_last_segment:
/* fill the last segment */
while (sges_left) {
if (sges_left == 1)
ioc->base_add_sg_single(sg_local, sgl_flags_end_buffer |
sg_dma_len(sg_scmd), sg_dma_address(sg_scmd));
else
ioc->base_add_sg_single(sg_local, sgl_flags |
sg_dma_len(sg_scmd), sg_dma_address(sg_scmd));
sg_scmd = sg_next(sg_scmd);
sg_local += ioc->sge_size;
sges_left--;
}
return 0;
}
/**
* _base_build_sg_scmd_ieee - main sg creation routine for IEEE format
* @ioc: per adapter object
* @scmd: scsi command
* @smid: system request message index
* @pcie_device: Pointer to pcie_device. If set, the pcie native sgl will be
* constructed on need.
* Context: none.
*
* The main routine that builds scatter gather table from a given
* scsi request sent via the .queuecommand main handler.
*
* Return: 0 success, anything else error
*/
static int
_base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc,
struct scsi_cmnd *scmd, u16 smid, struct _pcie_device *pcie_device)
{
Mpi25SCSIIORequest_t *mpi_request;
dma_addr_t chain_dma;
struct scatterlist *sg_scmd;
void *sg_local, *chain;
u32 chain_offset;
u32 chain_length;
int sges_left;
u32 sges_in_segment;
u8 simple_sgl_flags;
u8 simple_sgl_flags_last;
u8 chain_sgl_flags;
struct chain_tracker *chain_req;
mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
/* init scatter gather flags */
simple_sgl_flags = MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
simple_sgl_flags_last = simple_sgl_flags |
MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
chain_sgl_flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
/* Check if we need to build a native SG list. */
if ((pcie_device) && (_base_check_pcie_native_sgl(ioc, mpi_request,
smid, scmd, pcie_device) == 0)) {
/* We built a native SG list, just return. */
return 0;
}
sg_scmd = scsi_sglist(scmd);
sges_left = scsi_dma_map(scmd);
if (sges_left < 0) {
sdev_printk(KERN_ERR, scmd->device,
"scsi_dma_map failed: request for %d bytes!\n",
scsi_bufflen(scmd));
return -ENOMEM;
}
sg_local = &mpi_request->SGL;
sges_in_segment = (ioc->request_sz -
offsetof(Mpi25SCSIIORequest_t, SGL))/ioc->sge_size_ieee;
if (sges_left <= sges_in_segment)
goto fill_in_last_segment;
mpi_request->ChainOffset = (sges_in_segment - 1 /* chain element */) +
(offsetof(Mpi25SCSIIORequest_t, SGL)/ioc->sge_size_ieee);
/* fill in main message segment when there is a chain following */
while (sges_in_segment > 1) {
_base_add_sg_single_ieee(sg_local, simple_sgl_flags, 0,
sg_dma_len(sg_scmd), sg_dma_address(sg_scmd));
sg_scmd = sg_next(sg_scmd);
sg_local += ioc->sge_size_ieee;
sges_left--;
sges_in_segment--;
}
/* initializing the pointers */
chain_req = _base_get_chain_buffer_tracker(ioc, scmd);
if (!chain_req)
return -1;
chain = chain_req->chain_buffer;
chain_dma = chain_req->chain_buffer_dma;
do {
sges_in_segment = (sges_left <=
ioc->max_sges_in_chain_message) ? sges_left :
ioc->max_sges_in_chain_message;
chain_offset = (sges_left == sges_in_segment) ?
0 : sges_in_segment;
chain_length = sges_in_segment * ioc->sge_size_ieee;
if (chain_offset)
chain_length += ioc->sge_size_ieee;
_base_add_sg_single_ieee(sg_local, chain_sgl_flags,
chain_offset, chain_length, chain_dma);
sg_local = chain;
if (!chain_offset)
goto fill_in_last_segment;
/* fill in chain segments */
while (sges_in_segment) {
_base_add_sg_single_ieee(sg_local, simple_sgl_flags, 0,
sg_dma_len(sg_scmd), sg_dma_address(sg_scmd));
sg_scmd = sg_next(sg_scmd);
sg_local += ioc->sge_size_ieee;
sges_left--;
sges_in_segment--;
}
chain_req = _base_get_chain_buffer_tracker(ioc, scmd);
if (!chain_req)
return -1;
chain = chain_req->chain_buffer;
chain_dma = chain_req->chain_buffer_dma;
} while (1);
fill_in_last_segment:
/* fill the last segment */
while (sges_left > 0) {
if (sges_left == 1)
_base_add_sg_single_ieee(sg_local,
simple_sgl_flags_last, 0, sg_dma_len(sg_scmd),
sg_dma_address(sg_scmd));
else
_base_add_sg_single_ieee(sg_local, simple_sgl_flags, 0,
sg_dma_len(sg_scmd), sg_dma_address(sg_scmd));
sg_scmd = sg_next(sg_scmd);
sg_local += ioc->sge_size_ieee;
sges_left--;
}
return 0;
}
/**
* _base_build_sg_ieee - build generic sg for IEEE format
* @ioc: per adapter object
* @psge: virtual address for SGE
* @data_out_dma: physical address for WRITES
* @data_out_sz: data xfer size for WRITES
* @data_in_dma: physical address for READS
* @data_in_sz: data xfer size for READS
*/
static void
_base_build_sg_ieee(struct MPT3SAS_ADAPTER *ioc, void *psge,
dma_addr_t data_out_dma, size_t data_out_sz, dma_addr_t data_in_dma,
size_t data_in_sz)
{
u8 sgl_flags;
if (!data_out_sz && !data_in_sz) {
_base_build_zero_len_sge_ieee(ioc, psge);
return;
}
if (data_out_sz && data_in_sz) {
/* WRITE sgel first */
sgl_flags = MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
_base_add_sg_single_ieee(psge, sgl_flags, 0, data_out_sz,
data_out_dma);
/* incr sgel */
psge += ioc->sge_size_ieee;
/* READ sgel last */
sgl_flags |= MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
_base_add_sg_single_ieee(psge, sgl_flags, 0, data_in_sz,
data_in_dma);
} else if (data_out_sz) /* WRITE */ {
sgl_flags = MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
MPI25_IEEE_SGE_FLAGS_END_OF_LIST |
MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
_base_add_sg_single_ieee(psge, sgl_flags, 0, data_out_sz,
data_out_dma);
} else if (data_in_sz) /* READ */ {
sgl_flags = MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
MPI25_IEEE_SGE_FLAGS_END_OF_LIST |
MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
_base_add_sg_single_ieee(psge, sgl_flags, 0, data_in_sz,
data_in_dma);
}
}
#define convert_to_kb(x) ((x) << (PAGE_SHIFT - 10))
/**
* _base_config_dma_addressing - set dma addressing
* @ioc: per adapter object
* @pdev: PCI device struct
*
* Return: 0 for success, non-zero for failure.
*/
static int
_base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev)
{
struct sysinfo s;
if (ioc->is_mcpu_endpoint ||
sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma ||
dma_get_required_mask(&pdev->dev) <= 32)
ioc->dma_mask = 32;
/* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */
else if (ioc->hba_mpi_version_belonged > MPI2_VERSION)
ioc->dma_mask = 63;
else
ioc->dma_mask = 64;
if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask)) ||
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask)))
return -ENODEV;
if (ioc->dma_mask > 32) {
ioc->base_add_sg_single = &_base_add_sg_single_64;
ioc->sge_size = sizeof(Mpi2SGESimple64_t);
} else {
ioc->base_add_sg_single = &_base_add_sg_single_32;
ioc->sge_size = sizeof(Mpi2SGESimple32_t);
}
si_meminfo(&s);
ioc_info(ioc, "%d BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (%ld kB)\n",
ioc->dma_mask, convert_to_kb(s.totalram));
return 0;
}
/**
* _base_check_enable_msix - checks MSIX capabable.
* @ioc: per adapter object
*
* Check to see if card is capable of MSIX, and set number
* of available msix vectors
*/
static int
_base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc)
{
int base;
u16 message_control;
/* Check whether controller SAS2008 B0 controller,
* if it is SAS2008 B0 controller use IO-APIC instead of MSIX
*/
if (ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2008 &&
ioc->pdev->revision == SAS2_PCI_DEVICE_B0_REVISION) {
return -EINVAL;
}
base = pci_find_capability(ioc->pdev, PCI_CAP_ID_MSIX);
if (!base) {
dfailprintk(ioc, ioc_info(ioc, "msix not supported\n"));
return -EINVAL;
}
/* get msix vector count */
/* NUMA_IO not supported for older controllers */
if (ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2004 ||
ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2008 ||
ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2108_1 ||
ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2108_2 ||
ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2108_3 ||
ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2116_1 ||
ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2116_2)
ioc->msix_vector_count = 1;
else {
pci_read_config_word(ioc->pdev, base + 2, &message_control);
ioc->msix_vector_count = (message_control & 0x3FF) + 1;
}
dinitprintk(ioc, ioc_info(ioc, "msix is supported, vector_count(%d)\n",
ioc->msix_vector_count));
return 0;
}
/**
* mpt3sas_base_free_irq - free irq
* @ioc: per adapter object
*
* Freeing respective reply_queue from the list.
*/
void
mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
{
struct adapter_reply_queue *reply_q, *next;
if (list_empty(&ioc->reply_queue_list))
return;
list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
list_del(&reply_q->list);
if (reply_q->is_iouring_poll_q) {
kfree(reply_q);
continue;
}
if (ioc->smp_affinity_enable)
irq_set_affinity_hint(pci_irq_vector(ioc->pdev,
reply_q->msix_index), NULL);
free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index),
reply_q);
kfree(reply_q);
}
}
/**
* _base_request_irq - request irq
* @ioc: per adapter object
* @index: msix index into vector table
*
* Inserting respective reply_queue into the list.
*/
static int
_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
{
struct pci_dev *pdev = ioc->pdev;
struct adapter_reply_queue *reply_q;
int r, qid;
reply_q = kzalloc(sizeof(struct adapter_reply_queue), GFP_KERNEL);
if (!reply_q) {
ioc_err(ioc, "unable to allocate memory %zu!\n",
sizeof(struct adapter_reply_queue));
return -ENOMEM;
}
reply_q->ioc = ioc;
reply_q->msix_index = index;
atomic_set(&reply_q->busy, 0);
if (index >= ioc->iopoll_q_start_index) {
qid = index - ioc->iopoll_q_start_index;
snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d-mq-poll%d",
ioc->driver_name, ioc->id, qid);
reply_q->is_iouring_poll_q = 1;
ioc->io_uring_poll_queues[qid].reply_q = reply_q;
goto out;
}
if (ioc->msix_enable)
snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d-msix%d",
ioc->driver_name, ioc->id, index);
else
snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d",
ioc->driver_name, ioc->id);
r = request_irq(pci_irq_vector(pdev, index), _base_interrupt,
IRQF_SHARED, reply_q->name, reply_q);
if (r) {
pr_err("%s: unable to allocate interrupt %d!\n",
reply_q->name, pci_irq_vector(pdev, index));
kfree(reply_q);
return -EBUSY;
}
out:
INIT_LIST_HEAD(&reply_q->list);
list_add_tail(&reply_q->list, &ioc->reply_queue_list);
return 0;
}
/**
* _base_assign_reply_queues - assigning msix index for each cpu
* @ioc: per adapter object
*
* The enduser would need to set the affinity via /proc/irq/#/smp_affinity
*
* It would nice if we could call irq_set_affinity, however it is not
* an exported symbol
*/
static void
_base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
{
unsigned int cpu, nr_cpus, nr_msix, index = 0;
struct adapter_reply_queue *reply_q;
int local_numa_node;
int iopoll_q_count = ioc->reply_queue_count -
ioc->iopoll_q_start_index;
if (!_base_is_controller_msix_enabled(ioc))
return;
if (ioc->msix_load_balance)
return;
memset(ioc->cpu_msix_table, 0, ioc->cpu_msix_table_sz);
nr_cpus = num_online_cpus();
nr_msix = ioc->reply_queue_count = min(ioc->reply_queue_count,
ioc->facts.MaxMSIxVectors);
if (!nr_msix)
return;
if (ioc->smp_affinity_enable) {
/*
* set irq affinity to local numa node for those irqs
* corresponding to high iops queues.
*/
if (ioc->high_iops_queues) {
local_numa_node = dev_to_node(&ioc->pdev->dev);
for (index = 0; index < ioc->high_iops_queues;
index++) {
irq_set_affinity_hint(pci_irq_vector(ioc->pdev,
index), cpumask_of_node(local_numa_node));
}
}
list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
const cpumask_t *mask;
if (reply_q->msix_index < ioc->high_iops_queues ||
reply_q->msix_index >= ioc->iopoll_q_start_index)
continue;
mask = pci_irq_get_affinity(ioc->pdev,
reply_q->msix_index);
if (!mask) {
ioc_warn(ioc, "no affinity for msi %x\n",
reply_q->msix_index);
goto fall_back;
}
for_each_cpu_and(cpu, mask, cpu_online_mask) {
if (cpu >= ioc->cpu_msix_table_sz)
break;
ioc->cpu_msix_table[cpu] = reply_q->msix_index;
}
}
return;
}
fall_back:
cpu = cpumask_first(cpu_online_mask);
nr_msix -= (ioc->high_iops_queues - iopoll_q_count);
index = 0;
list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
unsigned int i, group = nr_cpus / nr_msix;
if (reply_q->msix_index < ioc->high_iops_queues ||
reply_q->msix_index >= ioc->iopoll_q_start_index)
continue;
if (cpu >= nr_cpus)
break;
if (index < nr_cpus % nr_msix)
group++;
for (i = 0 ; i < group ; i++) {
ioc->cpu_msix_table[cpu] = reply_q->msix_index;
cpu = cpumask_next(cpu, cpu_online_mask);
}
index++;
}
}
/**
* _base_check_and_enable_high_iops_queues - enable high iops mode
* @ioc: per adapter object
* @hba_msix_vector_count: msix vectors supported by HBA
*
* Enable high iops queues only if
* - HBA is a SEA/AERO controller and
* - MSI-Xs vector supported by the HBA is 128 and
* - total CPU count in the system >=16 and
* - loaded driver with default max_msix_vectors module parameter and
* - system booted in non kdump mode
*
* Return: nothing.
*/
static void
_base_check_and_enable_high_iops_queues(struct MPT3SAS_ADAPTER *ioc,
int hba_msix_vector_count)
{
u16 lnksta, speed;
/*
* Disable high iops queues if io uring poll queues are enabled.
*/
if (perf_mode == MPT_PERF_MODE_IOPS ||
perf_mode == MPT_PERF_MODE_LATENCY ||
ioc->io_uring_poll_queues) {
ioc->high_iops_queues = 0;
return;
}
if (perf_mode == MPT_PERF_MODE_DEFAULT) {
pcie_capability_read_word(ioc->pdev, PCI_EXP_LNKSTA, &lnksta);
speed = lnksta & PCI_EXP_LNKSTA_CLS;
if (speed < 0x4) {