Merge branches 'core', 'cxgb4', 'ipath', 'iser', 'lockdep', 'mlx4', 'nes', 'ocrdma', 'qib' and 'raw-qp' into for-linus
diff --git a/MAINTAINERS b/MAINTAINERS
index 7071633..5e8a932 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3631,7 +3631,7 @@
 F:	drivers/net/ethernet/icplus/ipg.*
 
 IPATH DRIVER
-M:	Mike Marciniszyn <infinipath@qlogic.com>
+M:	Mike Marciniszyn <infinipath@intel.com>
 L:	linux-rdma@vger.kernel.org
 S:	Maintained
 F:	drivers/infiniband/hw/ipath/
@@ -5455,7 +5455,7 @@
 S:	Maintained
 
 QIB DRIVER
-M:	Mike Marciniszyn <infinipath@qlogic.com>
+M:	Mike Marciniszyn <infinipath@intel.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/qib/
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index eb0add31..a0f29c1 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -51,6 +51,7 @@
 source "drivers/infiniband/hw/cxgb4/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
+source "drivers/infiniband/hw/ocrdma/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index a3b2d8e..bf846a1 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -8,6 +8,7 @@
 obj-$(CONFIG_INFINIBAND_CXGB4)		+= hw/cxgb4/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= hw/mlx4/
 obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
+obj-$(CONFIG_INFINIBAND_OCRDMA)		+= hw/ocrdma/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_SRPT)		+= ulp/srpt/
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e3e470f..79c7eeb 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1218,13 +1218,13 @@
 	}
 	if (!conn_id) {
 		ret = -ENOMEM;
-		goto out;
+		goto err1;
 	}
 
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
 	ret = cma_acquire_dev(conn_id);
 	if (ret)
-		goto release_conn_id;
+		goto err2;
 
 	conn_id->cm_id.ib = cm_id;
 	cm_id->context = conn_id;
@@ -1236,31 +1236,33 @@
 	 */
 	atomic_inc(&conn_id->refcount);
 	ret = conn_id->id.event_handler(&conn_id->id, &event);
-	if (!ret) {
-		/*
-		 * Acquire mutex to prevent user executing rdma_destroy_id()
-		 * while we're accessing the cm_id.
-		 */
-		mutex_lock(&lock);
-		if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
-			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
-		mutex_unlock(&lock);
-		mutex_unlock(&conn_id->handler_mutex);
-		cma_deref_id(conn_id);
-		goto out;
-	}
-	cma_deref_id(conn_id);
+	if (ret)
+		goto err3;
 
+	/*
+	 * Acquire mutex to prevent user executing rdma_destroy_id()
+	 * while we're accessing the cm_id.
+	 */
+	mutex_lock(&lock);
+	if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
+		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+	mutex_unlock(&lock);
+	mutex_unlock(&conn_id->handler_mutex);
+	mutex_unlock(&listen_id->handler_mutex);
+	cma_deref_id(conn_id);
+	return 0;
+
+err3:
+	cma_deref_id(conn_id);
 	/* Destroy the CM ID by returning a non-zero value. */
 	conn_id->cm_id.ib = NULL;
-
-release_conn_id:
+err2:
 	cma_exch(conn_id, RDMA_CM_DESTROYING);
 	mutex_unlock(&conn_id->handler_mutex);
-	rdma_destroy_id(&conn_id->id);
-
-out:
+err1:
 	mutex_unlock(&listen_id->handler_mutex);
+	if (conn_id)
+		rdma_destroy_id(&conn_id->id);
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 71f0c0f..a841123 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -269,7 +269,7 @@
 	} else
 		down_write(&mm->mmap_sem);
 
-	current->mm->locked_vm -= diff;
+	current->mm->pinned_vm -= diff;
 	up_write(&mm->mmap_sem);
 	mmput(mm);
 	kfree(umem);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4d27e4c3..f9d0d7c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -41,13 +41,18 @@
 
 #include "uverbs.h"
 
-static struct lock_class_key pd_lock_key;
-static struct lock_class_key mr_lock_key;
-static struct lock_class_key cq_lock_key;
-static struct lock_class_key qp_lock_key;
-static struct lock_class_key ah_lock_key;
-static struct lock_class_key srq_lock_key;
-static struct lock_class_key xrcd_lock_key;
+struct uverbs_lock_class {
+	struct lock_class_key	key;
+	char			name[16];
+};
+
+static struct uverbs_lock_class pd_lock_class	= { .name = "PD-uobj" };
+static struct uverbs_lock_class mr_lock_class	= { .name = "MR-uobj" };
+static struct uverbs_lock_class cq_lock_class	= { .name = "CQ-uobj" };
+static struct uverbs_lock_class qp_lock_class	= { .name = "QP-uobj" };
+static struct uverbs_lock_class ah_lock_class	= { .name = "AH-uobj" };
+static struct uverbs_lock_class srq_lock_class	= { .name = "SRQ-uobj" };
+static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
 
 #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\
 	do {								\
@@ -83,13 +88,13 @@
  */
 
 static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
-		      struct ib_ucontext *context, struct lock_class_key *key)
+		      struct ib_ucontext *context, struct uverbs_lock_class *c)
 {
 	uobj->user_handle = user_handle;
 	uobj->context     = context;
 	kref_init(&uobj->ref);
 	init_rwsem(&uobj->mutex);
-	lockdep_set_class(&uobj->mutex, key);
+	lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
 	uobj->live        = 0;
 }
 
@@ -522,7 +527,7 @@
 	if (!uobj)
 		return -ENOMEM;
 
-	init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
+	init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
 	down_write(&uobj->mutex);
 
 	pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
@@ -750,7 +755,7 @@
 		goto err_tree_mutex_unlock;
 	}
 
-	init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key);
+	init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
 
 	down_write(&obj->uobject.mutex);
 
@@ -947,7 +952,7 @@
 	if (!uobj)
 		return -ENOMEM;
 
-	init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
+	init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
 	down_write(&uobj->mutex);
 
 	pd = idr_read_pd(cmd.pd_handle, file->ucontext);
@@ -1115,7 +1120,7 @@
 	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key);
+	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class);
 	down_write(&obj->uobject.mutex);
 
 	if (cmd.comp_channel >= 0) {
@@ -1399,6 +1404,9 @@
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+		return -EPERM;
+
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
@@ -1407,7 +1415,7 @@
 	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
+	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
 	down_write(&obj->uevent.uobject.mutex);
 
 	if (cmd.qp_type == IB_QPT_XRC_TGT) {
@@ -1418,13 +1426,6 @@
 		}
 		device = xrcd->device;
 	} else {
-		pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-		scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
-		if (!pd || !scq) {
-			ret = -EINVAL;
-			goto err_put;
-		}
-
 		if (cmd.qp_type == IB_QPT_XRC_INI) {
 			cmd.max_recv_wr = cmd.max_recv_sge = 0;
 		} else {
@@ -1435,13 +1436,24 @@
 					goto err_put;
 				}
 			}
-			rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ?
-			       scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
-			if (!rcq) {
-				ret = -EINVAL;
-				goto err_put;
+
+			if (cmd.recv_cq_handle != cmd.send_cq_handle) {
+				rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
+				if (!rcq) {
+					ret = -EINVAL;
+					goto err_put;
+				}
 			}
 		}
+
+		scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
+		rcq = rcq ?: scq;
+		pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+		if (!pd || !scq) {
+			ret = -EINVAL;
+			goto err_put;
+		}
+
 		device = pd->device;
 	}
 
@@ -1585,7 +1597,7 @@
 	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
+	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
 	down_write(&obj->uevent.uobject.mutex);
 
 	xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
@@ -2272,7 +2284,7 @@
 	if (!uobj)
 		return -ENOMEM;
 
-	init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key);
+	init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
 	down_write(&uobj->mutex);
 
 	pd = idr_read_pd(cmd.pd_handle, file->ucontext);
@@ -2476,30 +2488,30 @@
 	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key);
+	init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
 	down_write(&obj->uevent.uobject.mutex);
 
-	pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
-	if (!pd) {
-		ret = -EINVAL;
-		goto err;
-	}
-
 	if (cmd->srq_type == IB_SRQT_XRC) {
-		attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
-		if (!attr.ext.xrc.cq) {
-			ret = -EINVAL;
-			goto err_put_pd;
-		}
-
 		attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
 		if (!attr.ext.xrc.xrcd) {
 			ret = -EINVAL;
-			goto err_put_cq;
+			goto err;
 		}
 
 		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
 		atomic_inc(&obj->uxrcd->refcnt);
+
+		attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+		if (!attr.ext.xrc.cq) {
+			ret = -EINVAL;
+			goto err_put_xrcd;
+		}
+	}
+
+	pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
+	if (!pd) {
+		ret = -EINVAL;
+		goto err_put_cq;
 	}
 
 	attr.event_handler  = ib_uverbs_srq_event_handler;
@@ -2576,17 +2588,17 @@
 	ib_destroy_srq(srq);
 
 err_put:
-	if (cmd->srq_type == IB_SRQT_XRC) {
-		atomic_dec(&obj->uxrcd->refcnt);
-		put_uobj_read(xrcd_uobj);
-	}
+	put_pd_read(pd);
 
 err_put_cq:
 	if (cmd->srq_type == IB_SRQT_XRC)
 		put_cq_read(attr.ext.xrc.cq);
 
-err_put_pd:
-	put_pd_read(pd);
+err_put_xrcd:
+	if (cmd->srq_type == IB_SRQT_XRC) {
+		atomic_dec(&obj->uxrcd->refcnt);
+		put_uobj_read(xrcd_uobj);
+	}
 
 err:
 	put_uobj_write(&obj->uevent.uobject);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 575b780..30f199e 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -479,6 +479,7 @@
 				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
 						IB_QP_PORT			|
 						IB_QP_QKEY),
+				[IB_QPT_RAW_PACKET] = IB_QP_PORT,
 				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
 						IB_QP_PORT			|
 						IB_QP_ACCESS_FLAGS),
@@ -1183,23 +1184,33 @@
 
 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
 {
+	int ret;
+
 	if (!qp->device->attach_mcast)
 		return -ENOSYS;
 	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
 		return -EINVAL;
 
-	return qp->device->attach_mcast(qp, gid, lid);
+	ret = qp->device->attach_mcast(qp, gid, lid);
+	if (!ret)
+		atomic_inc(&qp->usecnt);
+	return ret;
 }
 EXPORT_SYMBOL(ib_attach_mcast);
 
 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
 {
+	int ret;
+
 	if (!qp->device->detach_mcast)
 		return -ENOSYS;
 	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
 		return -EINVAL;
 
-	return qp->device->detach_mcast(qp, gid, lid);
+	ret = qp->device->detach_mcast(qp, gid, lid);
+	if (!ret)
+		atomic_dec(&qp->usecnt);
+	return ret;
 }
 EXPORT_SYMBOL(ib_detach_mcast);
 
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index 46b878c..e11cf72 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -2,4 +2,4 @@
 
 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
 
-iw_cxgb4-y :=  device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o
+iw_cxgb4-y :=  device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 92b4c2b..55ab284e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1362,7 +1362,10 @@
 
 	ep = lookup_tid(t, tid);
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-	BUG_ON(!ep);
+	if (!ep) {
+		printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
+		return 0;
+	}
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
 	case ABORTING:
@@ -1410,6 +1413,24 @@
 		return 0;
 	}
 
+	/*
+	 * Log interesting failures.
+	 */
+	switch (status) {
+	case CPL_ERR_CONN_RESET:
+	case CPL_ERR_CONN_TIMEDOUT:
+		break;
+	default:
+		printk(KERN_INFO MOD "Active open failure - "
+		       "atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
+		       atid, status, status2errno(status),
+		       &ep->com.local_addr.sin_addr.s_addr,
+		       ntohs(ep->com.local_addr.sin_port),
+		       &ep->com.remote_addr.sin_addr.s_addr,
+		       ntohs(ep->com.remote_addr.sin_port));
+		break;
+	}
+
 	connect_reply_upcall(ep, status2errno(status));
 	state_set(&ep->com, DEAD);
 
@@ -1593,7 +1614,7 @@
 					n, n->dev, 0);
 		if (!ep->l2t)
 			goto out;
-		ep->mtu = dst_mtu(ep->dst);
+		ep->mtu = dst_mtu(dst);
 		ep->tx_chan = cxgb4_port_chan(n->dev);
 		ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
 		step = cdev->rdev.lldi.ntxq /
@@ -2656,6 +2677,12 @@
 	unsigned int tid = GET_TID(req);
 
 	ep = lookup_tid(t, tid);
+	if (!ep) {
+		printk(KERN_WARNING MOD
+		       "Abort on non-existent endpoint, tid %d\n", tid);
+		kfree_skb(skb);
+		return 0;
+	}
 	if (is_neg_adv_abort(req->status)) {
 		PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
 		     ep->hwtid);
@@ -2667,11 +2694,8 @@
 
 	/*
 	 * Wake up any threads in rdma_init() or rdma_fini().
-	 * However, this is not needed if com state is just
-	 * MPA_REQ_SENT
 	 */
-	if (ep->com.state != MPA_REQ_SENT)
-		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+	c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 	sched(dev, skb);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 6d0df6e..cb4ecd7 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/debugfs.h>
+#include <linux/vmalloc.h>
 
 #include <rdma/ib_verbs.h>
 
@@ -44,6 +45,12 @@
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
+struct uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+	struct c4iw_dev *dev;
+};
+
 static LIST_HEAD(uld_ctx_list);
 static DEFINE_MUTEX(dev_mutex);
 
@@ -115,7 +122,7 @@
 		printk(KERN_INFO "%s null qpd?\n", __func__);
 		return 0;
 	}
-	kfree(qpd->buf);
+	vfree(qpd->buf);
 	kfree(qpd);
 	return 0;
 }
@@ -139,7 +146,7 @@
 	spin_unlock_irq(&qpd->devp->lock);
 
 	qpd->bufsize = count * 128;
-	qpd->buf = kmalloc(qpd->bufsize, GFP_KERNEL);
+	qpd->buf = vmalloc(qpd->bufsize);
 	if (!qpd->buf) {
 		ret = -ENOMEM;
 		goto err1;
@@ -240,6 +247,81 @@
 	.llseek  = default_llseek,
 };
 
+static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+	struct c4iw_dev *dev = seq->private;
+
+	seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
+		   "Max", "Fail");
+	seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
+			dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
+			dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
+	seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
+			dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
+			dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
+	seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
+			dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
+			dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
+	seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
+			dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
+			dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
+	seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
+			dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
+			dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
+	seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
+			dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
+			dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
+	seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
+	seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
+	seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
+	seq_printf(seq, " DB State: %s Transitions %llu\n",
+		   db_state_str[dev->db_state],
+		   dev->rdev.stats.db_state_transitions);
+	return 0;
+}
+
+static int stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, stats_show, inode->i_private);
+}
+
+static ssize_t stats_clear(struct file *file, const char __user *buf,
+		size_t count, loff_t *pos)
+{
+	struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
+
+	mutex_lock(&dev->rdev.stats.lock);
+	dev->rdev.stats.pd.max = 0;
+	dev->rdev.stats.pd.fail = 0;
+	dev->rdev.stats.qid.max = 0;
+	dev->rdev.stats.qid.fail = 0;
+	dev->rdev.stats.stag.max = 0;
+	dev->rdev.stats.stag.fail = 0;
+	dev->rdev.stats.pbl.max = 0;
+	dev->rdev.stats.pbl.fail = 0;
+	dev->rdev.stats.rqt.max = 0;
+	dev->rdev.stats.rqt.fail = 0;
+	dev->rdev.stats.ocqp.max = 0;
+	dev->rdev.stats.ocqp.fail = 0;
+	dev->rdev.stats.db_full = 0;
+	dev->rdev.stats.db_empty = 0;
+	dev->rdev.stats.db_drop = 0;
+	dev->rdev.stats.db_state_transitions = 0;
+	mutex_unlock(&dev->rdev.stats.lock);
+	return count;
+}
+
+static const struct file_operations stats_debugfs_fops = {
+	.owner   = THIS_MODULE,
+	.open    = stats_open,
+	.release = single_release,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.write   = stats_clear,
+};
+
 static int setup_debugfs(struct c4iw_dev *devp)
 {
 	struct dentry *de;
@@ -256,6 +338,12 @@
 				 (void *)devp, &stag_debugfs_fops);
 	if (de && de->d_inode)
 		de->d_inode->i_size = 4096;
+
+	de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root,
+			(void *)devp, &stats_debugfs_fops);
+	if (de && de->d_inode)
+		de->d_inode->i_size = 4096;
+
 	return 0;
 }
 
@@ -269,9 +357,13 @@
 	list_for_each_safe(pos, nxt, &uctx->qpids) {
 		entry = list_entry(pos, struct c4iw_qid_list, entry);
 		list_del_init(&entry->entry);
-		if (!(entry->qid & rdev->qpmask))
-			c4iw_put_resource(&rdev->resource.qid_fifo, entry->qid,
-					  &rdev->resource.qid_fifo_lock);
+		if (!(entry->qid & rdev->qpmask)) {
+			c4iw_put_resource(&rdev->resource.qid_table,
+					  entry->qid);
+			mutex_lock(&rdev->stats.lock);
+			rdev->stats.qid.cur -= rdev->qpmask + 1;
+			mutex_unlock(&rdev->stats.lock);
+		}
 		kfree(entry);
 	}
 
@@ -332,6 +424,13 @@
 		goto err1;
 	}
 
+	rdev->stats.pd.total = T4_MAX_NUM_PD;
+	rdev->stats.stag.total = rdev->lldi.vr->stag.size;
+	rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
+	rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
+	rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
+	rdev->stats.qid.total = rdev->lldi.vr->qp.size;
+
 	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
 	if (err) {
 		printk(KERN_ERR MOD "error %d initializing resources\n", err);
@@ -370,12 +469,6 @@
 	c4iw_destroy_resource(&rdev->resource);
 }
 
-struct uld_ctx {
-	struct list_head entry;
-	struct cxgb4_lld_info lldi;
-	struct c4iw_dev *dev;
-};
-
 static void c4iw_dealloc(struct uld_ctx *ctx)
 {
 	c4iw_rdev_close(&ctx->dev->rdev);
@@ -440,6 +533,8 @@
 	idr_init(&devp->qpidr);
 	idr_init(&devp->mmidr);
 	spin_lock_init(&devp->lock);
+	mutex_init(&devp->rdev.stats.lock);
+	mutex_init(&devp->db_mutex);
 
 	if (c4iw_debugfs_root) {
 		devp->debugfs_root = debugfs_create_dir(
@@ -585,11 +680,234 @@
 	return 0;
 }
 
+static int disable_qp_db(int id, void *p, void *data)
+{
+	struct c4iw_qp *qp = p;
+
+	t4_disable_wq_db(&qp->wq);
+	return 0;
+}
+
+static void stop_queues(struct uld_ctx *ctx)
+{
+	spin_lock_irq(&ctx->dev->lock);
+	if (ctx->dev->db_state == NORMAL) {
+		ctx->dev->rdev.stats.db_state_transitions++;
+		ctx->dev->db_state = FLOW_CONTROL;
+		idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+	}
+	spin_unlock_irq(&ctx->dev->lock);
+}
+
+static int enable_qp_db(int id, void *p, void *data)
+{
+	struct c4iw_qp *qp = p;
+
+	t4_enable_wq_db(&qp->wq);
+	return 0;
+}
+
+static void resume_queues(struct uld_ctx *ctx)
+{
+	spin_lock_irq(&ctx->dev->lock);
+	if (ctx->dev->qpcnt <= db_fc_threshold &&
+	    ctx->dev->db_state == FLOW_CONTROL) {
+		ctx->dev->db_state = NORMAL;
+		ctx->dev->rdev.stats.db_state_transitions++;
+		idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+	}
+	spin_unlock_irq(&ctx->dev->lock);
+}
+
+struct qp_list {
+	unsigned idx;
+	struct c4iw_qp **qps;
+};
+
+static int add_and_ref_qp(int id, void *p, void *data)
+{
+	struct qp_list *qp_listp = data;
+	struct c4iw_qp *qp = p;
+
+	c4iw_qp_add_ref(&qp->ibqp);
+	qp_listp->qps[qp_listp->idx++] = qp;
+	return 0;
+}
+
+static int count_qps(int id, void *p, void *data)
+{
+	unsigned *countp = data;
+	(*countp)++;
+	return 0;
+}
+
+static void deref_qps(struct qp_list qp_list)
+{
+	int idx;
+
+	for (idx = 0; idx < qp_list.idx; idx++)
+		c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
+}
+
+static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
+{
+	int idx;
+	int ret;
+
+	for (idx = 0; idx < qp_list->idx; idx++) {
+		struct c4iw_qp *qp = qp_list->qps[idx];
+
+		ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
+					  qp->wq.sq.qid,
+					  t4_sq_host_wq_pidx(&qp->wq),
+					  t4_sq_wq_size(&qp->wq));
+		if (ret) {
+			printk(KERN_ERR MOD "%s: Fatal error - "
+			       "DB overflow recovery failed - "
+			       "error syncing SQ qid %u\n",
+			       pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
+			return;
+		}
+
+		ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
+					  qp->wq.rq.qid,
+					  t4_rq_host_wq_pidx(&qp->wq),
+					  t4_rq_wq_size(&qp->wq));
+
+		if (ret) {
+			printk(KERN_ERR MOD "%s: Fatal error - "
+			       "DB overflow recovery failed - "
+			       "error syncing RQ qid %u\n",
+			       pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
+			return;
+		}
+
+		/* Wait for the dbfifo to drain */
+		while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(usecs_to_jiffies(10));
+		}
+	}
+}
+
+static void recover_queues(struct uld_ctx *ctx)
+{
+	int count = 0;
+	struct qp_list qp_list;
+	int ret;
+
+	/* lock out kernel db ringers */
+	mutex_lock(&ctx->dev->db_mutex);
+
+	/* put all queues in to recovery mode */
+	spin_lock_irq(&ctx->dev->lock);
+	ctx->dev->db_state = RECOVERY;
+	ctx->dev->rdev.stats.db_state_transitions++;
+	idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+	spin_unlock_irq(&ctx->dev->lock);
+
+	/* slow everybody down */
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(usecs_to_jiffies(1000));
+
+	/* Wait for the dbfifo to completely drain. */
+	while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(usecs_to_jiffies(10));
+	}
+
+	/* flush the SGE contexts */
+	ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
+	if (ret) {
+		printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+		       pci_name(ctx->lldi.pdev));
+		goto out;
+	}
+
+	/* Count active queues so we can build a list of queues to recover */
+	spin_lock_irq(&ctx->dev->lock);
+	idr_for_each(&ctx->dev->qpidr, count_qps, &count);
+
+	qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
+	if (!qp_list.qps) {
+		printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+		       pci_name(ctx->lldi.pdev));
+		spin_unlock_irq(&ctx->dev->lock);
+		goto out;
+	}
+	qp_list.idx = 0;
+
+	/* add and ref each qp so it doesn't get freed */
+	idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
+
+	spin_unlock_irq(&ctx->dev->lock);
+
+	/* now traverse the list in a safe context to recover the db state*/
+	recover_lost_dbs(ctx, &qp_list);
+
+	/* we're almost done!  deref the qps and clean up */
+	deref_qps(qp_list);
+	kfree(qp_list.qps);
+
+	/* Wait for the dbfifo to completely drain again */
+	while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(usecs_to_jiffies(10));
+	}
+
+	/* resume the queues */
+	spin_lock_irq(&ctx->dev->lock);
+	if (ctx->dev->qpcnt > db_fc_threshold)
+		ctx->dev->db_state = FLOW_CONTROL;
+	else {
+		ctx->dev->db_state = NORMAL;
+		idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+	}
+	ctx->dev->rdev.stats.db_state_transitions++;
+	spin_unlock_irq(&ctx->dev->lock);
+
+out:
+	/* start up kernel db ringers again */
+	mutex_unlock(&ctx->dev->db_mutex);
+}
+
+static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
+{
+	struct uld_ctx *ctx = handle;
+
+	switch (control) {
+	case CXGB4_CONTROL_DB_FULL:
+		stop_queues(ctx);
+		mutex_lock(&ctx->dev->rdev.stats.lock);
+		ctx->dev->rdev.stats.db_full++;
+		mutex_unlock(&ctx->dev->rdev.stats.lock);
+		break;
+	case CXGB4_CONTROL_DB_EMPTY:
+		resume_queues(ctx);
+		mutex_lock(&ctx->dev->rdev.stats.lock);
+		ctx->dev->rdev.stats.db_empty++;
+		mutex_unlock(&ctx->dev->rdev.stats.lock);
+		break;
+	case CXGB4_CONTROL_DB_DROP:
+		recover_queues(ctx);
+		mutex_lock(&ctx->dev->rdev.stats.lock);
+		ctx->dev->rdev.stats.db_drop++;
+		mutex_unlock(&ctx->dev->rdev.stats.lock);
+		break;
+	default:
+		printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
+		       pci_name(ctx->lldi.pdev), control);
+		break;
+	}
+	return 0;
+}
+
 static struct cxgb4_uld_info c4iw_uld_info = {
 	.name = DRV_NAME,
 	.add = c4iw_uld_add,
 	.rx_handler = c4iw_uld_rx_handler,
 	.state_change = c4iw_uld_state_change,
+	.control = c4iw_uld_control,
 };
 
 static int __init c4iw_init_module(void)
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 397cb36..cf2f6b4 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -84,7 +84,7 @@
 	struct c4iw_qp *qhp;
 	u32 cqid;
 
-	spin_lock(&dev->lock);
+	spin_lock_irq(&dev->lock);
 	qhp = get_qhp(dev, CQE_QPID(err_cqe));
 	if (!qhp) {
 		printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d "
@@ -93,7 +93,7 @@
 		       CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
 		       CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
 		       CQE_WRID_LOW(err_cqe));
-		spin_unlock(&dev->lock);
+		spin_unlock_irq(&dev->lock);
 		goto out;
 	}
 
@@ -109,13 +109,13 @@
 		       CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
 		       CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
 		       CQE_WRID_LOW(err_cqe));
-		spin_unlock(&dev->lock);
+		spin_unlock_irq(&dev->lock);
 		goto out;
 	}
 
 	c4iw_qp_add_ref(&qhp->ibqp);
 	atomic_inc(&chp->refcnt);
-	spin_unlock(&dev->lock);
+	spin_unlock_irq(&dev->lock);
 
 	/* Bad incoming write */
 	if (RQ_TYPE(err_cqe) &&
diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c
new file mode 100644
index 0000000..f95e5df
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/id_table.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2011 Chelsio Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include "iw_cxgb4.h"
+
+#define RANDOM_SKIP 16
+
+/*
+ * Trivial bitmap-based allocator. If the random flag is set, the
+ * allocator is designed to:
+ * - pseudo-randomize the id returned such that it is not trivially predictable.
+ * - avoid reuse of recently used id (at the expense of predictability)
+ */
+u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
+{
+	unsigned long flags;
+	u32 obj;
+
+	spin_lock_irqsave(&alloc->lock, flags);
+
+	obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
+	if (obj >= alloc->max)
+		obj = find_first_zero_bit(alloc->table, alloc->max);
+
+	if (obj < alloc->max) {
+		if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
+			alloc->last += random32() % RANDOM_SKIP;
+		else
+			alloc->last = obj + 1;
+		if (alloc->last >= alloc->max)
+			alloc->last = 0;
+		set_bit(obj, alloc->table);
+		obj += alloc->start;
+	} else
+		obj = -1;
+
+	spin_unlock_irqrestore(&alloc->lock, flags);
+	return obj;
+}
+
+void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj)
+{
+	unsigned long flags;
+
+	obj -= alloc->start;
+	BUG_ON((int)obj < 0);
+
+	spin_lock_irqsave(&alloc->lock, flags);
+	clear_bit(obj, alloc->table);
+	spin_unlock_irqrestore(&alloc->lock, flags);
+}
+
+int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
+			u32 reserved, u32 flags)
+{
+	int i;
+
+	alloc->start = start;
+	alloc->flags = flags;
+	if (flags & C4IW_ID_TABLE_F_RANDOM)
+		alloc->last = random32() % RANDOM_SKIP;
+	else
+		alloc->last = 0;
+	alloc->max  = num;
+	spin_lock_init(&alloc->lock);
+	alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long),
+				GFP_KERNEL);
+	if (!alloc->table)
+		return -ENOMEM;
+
+	bitmap_zero(alloc->table, num);
+	if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY))
+		for (i = 0; i < reserved; ++i)
+			set_bit(i, alloc->table);
+
+	return 0;
+}
+
+void c4iw_id_table_free(struct c4iw_id_table *alloc)
+{
+	kfree(alloc->table);
+}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 1357c5b..9beb3a9 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -45,7 +45,6 @@
 #include <linux/kref.h>
 #include <linux/timer.h>
 #include <linux/io.h>
-#include <linux/kfifo.h>
 
 #include <asm/byteorder.h>
 
@@ -79,13 +78,22 @@
 	return skb->data;
 }
 
+#define C4IW_ID_TABLE_F_RANDOM 1       /* Pseudo-randomize the id's returned */
+#define C4IW_ID_TABLE_F_EMPTY  2       /* Table is initially empty */
+
+struct c4iw_id_table {
+	u32 flags;
+	u32 start;              /* logical minimal id */
+	u32 last;               /* hint for find */
+	u32 max;
+	spinlock_t lock;
+	unsigned long *table;
+};
+
 struct c4iw_resource {
-	struct kfifo tpt_fifo;
-	spinlock_t tpt_fifo_lock;
-	struct kfifo qid_fifo;
-	spinlock_t qid_fifo_lock;
-	struct kfifo pdid_fifo;
-	spinlock_t pdid_fifo_lock;
+	struct c4iw_id_table tpt_table;
+	struct c4iw_id_table qid_table;
+	struct c4iw_id_table pdid_table;
 };
 
 struct c4iw_qid_list {
@@ -103,6 +111,27 @@
 	T4_FATAL_ERROR = (1<<0),
 };
 
+struct c4iw_stat {
+	u64 total;
+	u64 cur;
+	u64 max;
+	u64 fail;
+};
+
+struct c4iw_stats {
+	struct mutex lock;
+	struct c4iw_stat qid;
+	struct c4iw_stat pd;
+	struct c4iw_stat stag;
+	struct c4iw_stat pbl;
+	struct c4iw_stat rqt;
+	struct c4iw_stat ocqp;
+	u64  db_full;
+	u64  db_empty;
+	u64  db_drop;
+	u64  db_state_transitions;
+};
+
 struct c4iw_rdev {
 	struct c4iw_resource resource;
 	unsigned long qpshift;
@@ -117,6 +146,7 @@
 	struct cxgb4_lld_info lldi;
 	unsigned long oc_mw_pa;
 	void __iomem *oc_mw_kva;
+	struct c4iw_stats stats;
 };
 
 static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -175,6 +205,12 @@
 	return wr_waitp->ret;
 }
 
+enum db_state {
+	NORMAL = 0,
+	FLOW_CONTROL = 1,
+	RECOVERY = 2
+};
+
 struct c4iw_dev {
 	struct ib_device ibdev;
 	struct c4iw_rdev rdev;
@@ -183,7 +219,10 @@
 	struct idr qpidr;
 	struct idr mmidr;
 	spinlock_t lock;
+	struct mutex db_mutex;
 	struct dentry *debugfs_root;
+	enum db_state db_state;
+	int qpcnt;
 };
 
 static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -211,29 +250,57 @@
 	return idr_find(&rhp->mmidr, mmid);
 }
 
-static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
-				void *handle, u32 id)
+static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+				 void *handle, u32 id, int lock)
 {
 	int ret;
 	int newid;
 
 	do {
-		if (!idr_pre_get(idr, GFP_KERNEL))
+		if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC))
 			return -ENOMEM;
-		spin_lock_irq(&rhp->lock);
+		if (lock)
+			spin_lock_irq(&rhp->lock);
 		ret = idr_get_new_above(idr, handle, id, &newid);
-		BUG_ON(newid != id);
-		spin_unlock_irq(&rhp->lock);
+		BUG_ON(!ret && newid != id);
+		if (lock)
+			spin_unlock_irq(&rhp->lock);
 	} while (ret == -EAGAIN);
 
 	return ret;
 }
 
+static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+				void *handle, u32 id)
+{
+	return _insert_handle(rhp, idr, handle, id, 1);
+}
+
+static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
+				       void *handle, u32 id)
+{
+	return _insert_handle(rhp, idr, handle, id, 0);
+}
+
+static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr,
+				   u32 id, int lock)
+{
+	if (lock)
+		spin_lock_irq(&rhp->lock);
+	idr_remove(idr, id);
+	if (lock)
+		spin_unlock_irq(&rhp->lock);
+}
+
 static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
 {
-	spin_lock_irq(&rhp->lock);
-	idr_remove(idr, id);
-	spin_unlock_irq(&rhp->lock);
+	_remove_handle(rhp, idr, id, 1);
+}
+
+static inline void remove_handle_nolock(struct c4iw_dev *rhp,
+					 struct idr *idr, u32 id)
+{
+	_remove_handle(rhp, idr, id, 0);
 }
 
 struct c4iw_pd {
@@ -353,6 +420,8 @@
 	struct c4iw_ep *llp_stream_handle;
 	u8 layer_etype;
 	u8 ecode;
+	u16 sq_db_inc;
+	u16 rq_db_inc;
 };
 
 struct c4iw_qp {
@@ -427,6 +496,8 @@
 
 enum c4iw_qp_attr_mask {
 	C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
+	C4IW_QP_ATTR_SQ_DB = 1<<1,
+	C4IW_QP_ATTR_RQ_DB = 1<<2,
 	C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
 	C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
 	C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
@@ -480,6 +551,23 @@
 	}
 }
 
+static inline int to_ib_qp_state(int c4iw_qp_state)
+{
+	switch (c4iw_qp_state) {
+	case C4IW_QP_STATE_IDLE:
+		return IB_QPS_INIT;
+	case C4IW_QP_STATE_RTS:
+		return IB_QPS_RTS;
+	case C4IW_QP_STATE_CLOSING:
+		return IB_QPS_SQD;
+	case C4IW_QP_STATE_TERMINATE:
+		return IB_QPS_SQE;
+	case C4IW_QP_STATE_ERROR:
+		return IB_QPS_ERR;
+	}
+	return IB_QPS_ERR;
+}
+
 static inline u32 c4iw_ib_to_tpt_access(int a)
 {
 	return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
@@ -693,14 +781,20 @@
 	return wscale;
 }
 
+u32 c4iw_id_alloc(struct c4iw_id_table *alloc);
+void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj);
+int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
+			u32 reserved, u32 flags);
+void c4iw_id_table_free(struct c4iw_id_table *alloc);
+
 typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
 
 int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
 		     struct l2t_entry *l2t);
 void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
 		   struct c4iw_dev_ucontext *uctx);
-u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock);
-void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock);
+u32 c4iw_get_resource(struct c4iw_id_table *id_table);
+void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
 int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
 int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
 int c4iw_pblpool_create(struct c4iw_rdev *rdev);
@@ -769,6 +863,8 @@
 			     struct ib_udata *udata);
 int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				 int attr_mask, struct ib_udata *udata);
+int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		     int attr_mask, struct ib_qp_init_attr *init_attr);
 struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn);
 u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size);
 void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
@@ -797,5 +893,7 @@
 extern struct cxgb4_client t4c_client;
 extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
 extern int c4iw_max_read_depth;
+extern int db_fc_threshold;
+
 
 #endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 40c8353..57e07c6 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -131,10 +131,14 @@
 	stag_idx = (*stag) >> 8;
 
 	if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) {
-		stag_idx = c4iw_get_resource(&rdev->resource.tpt_fifo,
-					     &rdev->resource.tpt_fifo_lock);
+		stag_idx = c4iw_get_resource(&rdev->resource.tpt_table);
 		if (!stag_idx)
 			return -ENOMEM;
+		mutex_lock(&rdev->stats.lock);
+		rdev->stats.stag.cur += 32;
+		if (rdev->stats.stag.cur > rdev->stats.stag.max)
+			rdev->stats.stag.max = rdev->stats.stag.cur;
+		mutex_unlock(&rdev->stats.lock);
 		*stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
 	}
 	PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
@@ -165,9 +169,12 @@
 				(rdev->lldi.vr->stag.start >> 5),
 				sizeof(tpt), &tpt);
 
-	if (reset_tpt_entry)
-		c4iw_put_resource(&rdev->resource.tpt_fifo, stag_idx,
-				  &rdev->resource.tpt_fifo_lock);
+	if (reset_tpt_entry) {
+		c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
+		mutex_lock(&rdev->stats.lock);
+		rdev->stats.stag.cur -= 32;
+		mutex_unlock(&rdev->stats.lock);
+	}
 	return err;
 }
 
@@ -686,8 +693,8 @@
 	mhp = to_c4iw_mw(mw);
 	rhp = mhp->rhp;
 	mmid = (mw->rkey) >> 8;
-	deallocate_window(&rhp->rdev, mhp->attr.stag);
 	remove_handle(rhp, &rhp->mmidr, mmid);
+	deallocate_window(&rhp->rdev, mhp->attr.stag);
 	kfree(mhp);
 	PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
 	return 0;
@@ -789,12 +796,12 @@
 	mhp = to_c4iw_mr(ib_mr);
 	rhp = mhp->rhp;
 	mmid = mhp->attr.stag >> 8;
+	remove_handle(rhp, &rhp->mmidr, mmid);
 	dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
 		       mhp->attr.pbl_addr);
 	if (mhp->attr.pbl_size)
 		c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
 				  mhp->attr.pbl_size << 3);
-	remove_handle(rhp, &rhp->mmidr, mmid);
 	if (mhp->kva)
 		kfree((void *) (unsigned long) mhp->kva);
 	if (mhp->umem)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index be1c18f..e084fdc 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -188,8 +188,10 @@
 	php = to_c4iw_pd(pd);
 	rhp = php->rhp;
 	PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
-	c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, php->pdid,
-			  &rhp->rdev.resource.pdid_fifo_lock);
+	c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
+	mutex_lock(&rhp->rdev.stats.lock);
+	rhp->rdev.stats.pd.cur--;
+	mutex_unlock(&rhp->rdev.stats.lock);
 	kfree(php);
 	return 0;
 }
@@ -204,14 +206,12 @@
 
 	PDBG("%s ibdev %p\n", __func__, ibdev);
 	rhp = (struct c4iw_dev *) ibdev;
-	pdid =  c4iw_get_resource(&rhp->rdev.resource.pdid_fifo,
-				  &rhp->rdev.resource.pdid_fifo_lock);
+	pdid =  c4iw_get_resource(&rhp->rdev.resource.pdid_table);
 	if (!pdid)
 		return ERR_PTR(-EINVAL);
 	php = kzalloc(sizeof(*php), GFP_KERNEL);
 	if (!php) {
-		c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, pdid,
-				  &rhp->rdev.resource.pdid_fifo_lock);
+		c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
 		return ERR_PTR(-ENOMEM);
 	}
 	php->pdid = pdid;
@@ -222,6 +222,11 @@
 			return ERR_PTR(-EFAULT);
 		}
 	}
+	mutex_lock(&rhp->rdev.stats.lock);
+	rhp->rdev.stats.pd.cur++;
+	if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
+		rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
+	mutex_unlock(&rhp->rdev.stats.lock);
 	PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
 	return &php->ibpd;
 }
@@ -438,6 +443,7 @@
 	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
 	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
 	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
 	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
 	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
 	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
@@ -460,6 +466,7 @@
 	dev->ibdev.destroy_ah = c4iw_ah_destroy;
 	dev->ibdev.create_qp = c4iw_create_qp;
 	dev->ibdev.modify_qp = c4iw_ib_modify_qp;
+	dev->ibdev.query_qp = c4iw_ib_query_qp;
 	dev->ibdev.destroy_qp = c4iw_destroy_qp;
 	dev->ibdev.create_cq = c4iw_create_cq;
 	dev->ibdev.destroy_cq = c4iw_destroy_cq;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 5f940ae..45aedf1 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -34,10 +34,19 @@
 
 #include "iw_cxgb4.h"
 
+static int db_delay_usecs = 1;
+module_param(db_delay_usecs, int, 0644);
+MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
+
 static int ocqp_support = 1;
 module_param(ocqp_support, int, 0644);
 MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
 
+int db_fc_threshold = 2000;
+module_param(db_fc_threshold, int, 0644);
+MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic "
+		 "db flow control mode (default = 2000)");
+
 static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
 {
 	unsigned long flag;
@@ -1128,6 +1137,35 @@
 	return ret;
 }
 
+/*
+ * Called by the library when the qp has user dbs disabled due to
+ * a DB_FULL condition.  This function will single-thread all user
+ * DB rings to avoid overflowing the hw db-fifo.
+ */
+static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
+{
+	int delay = db_delay_usecs;
+
+	mutex_lock(&qhp->rhp->db_mutex);
+	do {
+
+		/*
+		 * The interrupt threshold is dbfifo_int_thresh << 6. So
+		 * make sure we don't cross that and generate an interrupt.
+		 */
+		if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
+		    (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
+			writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db);
+			break;
+		}
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(usecs_to_jiffies(delay));
+		delay = min(delay << 1, 2000);
+	} while (1);
+	mutex_unlock(&qhp->rhp->db_mutex);
+	return 0;
+}
+
 int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 		   enum c4iw_qp_attr_mask mask,
 		   struct c4iw_qp_attributes *attrs,
@@ -1176,6 +1214,15 @@
 		qhp->attr = newattr;
 	}
 
+	if (mask & C4IW_QP_ATTR_SQ_DB) {
+		ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
+		goto out;
+	}
+	if (mask & C4IW_QP_ATTR_RQ_DB) {
+		ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
+		goto out;
+	}
+
 	if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
 		goto out;
 	if (qhp->attr.state == attrs->next_state)
@@ -1352,6 +1399,14 @@
 	return ret;
 }
 
+static int enable_qp_db(int id, void *p, void *data)
+{
+	struct c4iw_qp *qp = p;
+
+	t4_enable_wq_db(&qp->wq);
+	return 0;
+}
+
 int c4iw_destroy_qp(struct ib_qp *ib_qp)
 {
 	struct c4iw_dev *rhp;
@@ -1369,7 +1424,16 @@
 		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
 	wait_event(qhp->wait, !qhp->ep);
 
-	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
+	spin_lock_irq(&rhp->lock);
+	remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
+	rhp->qpcnt--;
+	BUG_ON(rhp->qpcnt < 0);
+	if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
+		rhp->rdev.stats.db_state_transitions++;
+		rhp->db_state = NORMAL;
+		idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
+	}
+	spin_unlock_irq(&rhp->lock);
 	atomic_dec(&qhp->refcnt);
 	wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
 
@@ -1383,6 +1447,14 @@
 	return 0;
 }
 
+static int disable_qp_db(int id, void *p, void *data)
+{
+	struct c4iw_qp *qp = p;
+
+	t4_disable_wq_db(&qp->wq);
+	return 0;
+}
+
 struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 			     struct ib_udata *udata)
 {
@@ -1469,7 +1541,16 @@
 	init_waitqueue_head(&qhp->wait);
 	atomic_set(&qhp->refcnt, 1);
 
-	ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+	spin_lock_irq(&rhp->lock);
+	if (rhp->db_state != NORMAL)
+		t4_disable_wq_db(&qhp->wq);
+	if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
+		rhp->rdev.stats.db_state_transitions++;
+		rhp->db_state = FLOW_CONTROL;
+		idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
+	}
+	ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+	spin_unlock_irq(&rhp->lock);
 	if (ret)
 		goto err2;
 
@@ -1613,6 +1694,15 @@
 			 C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
 			 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
 
+	/*
+	 * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
+	 * ringing the queue db when we're in DB_FULL mode.
+	 */
+	attrs.sq_db_inc = attr->sq_psn;
+	attrs.rq_db_inc = attr->rq_psn;
+	mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
+	mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
+
 	return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
 }
 
@@ -1621,3 +1711,14 @@
 	PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
 	return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
 }
+
+int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		     int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+	struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
+
+	memset(attr, 0, sizeof *attr);
+	memset(init_attr, 0, sizeof *init_attr);
+	attr->qp_state = to_ib_qp_state(qhp->attr.state);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 407ff39..cdef4d7f 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -30,96 +30,25 @@
  * SOFTWARE.
  */
 /* Crude resource management */
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
 #include <linux/spinlock.h>
-#include <linux/errno.h>
 #include <linux/genalloc.h>
 #include <linux/ratelimit.h>
 #include "iw_cxgb4.h"
 
-#define RANDOM_SIZE 16
-
-static int __c4iw_init_resource_fifo(struct kfifo *fifo,
-				   spinlock_t *fifo_lock,
-				   u32 nr, u32 skip_low,
-				   u32 skip_high,
-				   int random)
-{
-	u32 i, j, entry = 0, idx;
-	u32 random_bytes;
-	u32 rarray[16];
-	spin_lock_init(fifo_lock);
-
-	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
-		return -ENOMEM;
-
-	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
-	if (random) {
-		j = 0;
-		random_bytes = random32();
-		for (i = 0; i < RANDOM_SIZE; i++)
-			rarray[i] = i + skip_low;
-		for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
-			if (j >= RANDOM_SIZE) {
-				j = 0;
-				random_bytes = random32();
-			}
-			idx = (random_bytes >> (j * 2)) & 0xF;
-			kfifo_in(fifo,
-				(unsigned char *) &rarray[idx],
-				sizeof(u32));
-			rarray[idx] = i;
-			j++;
-		}
-		for (i = 0; i < RANDOM_SIZE; i++)
-			kfifo_in(fifo,
-				(unsigned char *) &rarray[i],
-				sizeof(u32));
-	} else
-		for (i = skip_low; i < nr - skip_high; i++)
-			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
-
-	for (i = 0; i < skip_low + skip_high; i++)
-		if (kfifo_out_locked(fifo, (unsigned char *) &entry,
-				     sizeof(u32), fifo_lock))
-			break;
-	return 0;
-}
-
-static int c4iw_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
-				   u32 nr, u32 skip_low, u32 skip_high)
-{
-	return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
-					  skip_high, 0);
-}
-
-static int c4iw_init_resource_fifo_random(struct kfifo *fifo,
-				   spinlock_t *fifo_lock,
-				   u32 nr, u32 skip_low, u32 skip_high)
-{
-	return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
-					  skip_high, 1);
-}
-
-static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev)
+static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
 {
 	u32 i;
 
-	spin_lock_init(&rdev->resource.qid_fifo_lock);
-
-	if (kfifo_alloc(&rdev->resource.qid_fifo, rdev->lldi.vr->qp.size *
-			sizeof(u32), GFP_KERNEL))
+	if (c4iw_id_table_alloc(&rdev->resource.qid_table,
+				rdev->lldi.vr->qp.start,
+				rdev->lldi.vr->qp.size,
+				rdev->lldi.vr->qp.size, 0))
 		return -ENOMEM;
 
 	for (i = rdev->lldi.vr->qp.start;
-	     i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++)
+		i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++)
 		if (!(i & rdev->qpmask))
-			kfifo_in(&rdev->resource.qid_fifo,
-				    (unsigned char *) &i, sizeof(u32));
+			c4iw_id_free(&rdev->resource.qid_table, i);
 	return 0;
 }
 
@@ -127,44 +56,42 @@
 int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
 {
 	int err = 0;
-	err = c4iw_init_resource_fifo_random(&rdev->resource.tpt_fifo,
-					     &rdev->resource.tpt_fifo_lock,
-					     nr_tpt, 1, 0);
+	err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
+					C4IW_ID_TABLE_F_RANDOM);
 	if (err)
 		goto tpt_err;
-	err = c4iw_init_qid_fifo(rdev);
+	err = c4iw_init_qid_table(rdev);
 	if (err)
 		goto qid_err;
-	err = c4iw_init_resource_fifo(&rdev->resource.pdid_fifo,
-				      &rdev->resource.pdid_fifo_lock,
-				      nr_pdid, 1, 0);
+	err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0,
+					nr_pdid, 1, 0);
 	if (err)
 		goto pdid_err;
 	return 0;
-pdid_err:
-	kfifo_free(&rdev->resource.qid_fifo);
-qid_err:
-	kfifo_free(&rdev->resource.tpt_fifo);
-tpt_err:
+ pdid_err:
+	c4iw_id_table_free(&rdev->resource.qid_table);
+ qid_err:
+	c4iw_id_table_free(&rdev->resource.tpt_table);
+ tpt_err:
 	return -ENOMEM;
 }
 
 /*
  * returns 0 if no resource available
  */
-u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock)
+u32 c4iw_get_resource(struct c4iw_id_table *id_table)
 {
 	u32 entry;
-	if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
-		return entry;
-	else
+	entry = c4iw_id_alloc(id_table);
+	if (entry == (u32)(-1))
 		return 0;
+	return entry;
 }
 
-void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock)
+void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry)
 {
 	PDBG("%s entry 0x%x\n", __func__, entry);
-	kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock);
+	c4iw_id_free(id_table, entry);
 }
 
 u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
@@ -181,10 +108,12 @@
 		qid = entry->qid;
 		kfree(entry);
 	} else {
-		qid = c4iw_get_resource(&rdev->resource.qid_fifo,
-					&rdev->resource.qid_fifo_lock);
+		qid = c4iw_get_resource(&rdev->resource.qid_table);
 		if (!qid)
 			goto out;
+		mutex_lock(&rdev->stats.lock);
+		rdev->stats.qid.cur += rdev->qpmask + 1;
+		mutex_unlock(&rdev->stats.lock);
 		for (i = qid+1; i & rdev->qpmask; i++) {
 			entry = kmalloc(sizeof *entry, GFP_KERNEL);
 			if (!entry)
@@ -213,6 +142,10 @@
 out:
 	mutex_unlock(&uctx->lock);
 	PDBG("%s qid 0x%x\n", __func__, qid);
+	mutex_lock(&rdev->stats.lock);
+	if (rdev->stats.qid.cur > rdev->stats.qid.max)
+		rdev->stats.qid.max = rdev->stats.qid.cur;
+	mutex_unlock(&rdev->stats.lock);
 	return qid;
 }
 
@@ -245,10 +178,12 @@
 		qid = entry->qid;
 		kfree(entry);
 	} else {
-		qid = c4iw_get_resource(&rdev->resource.qid_fifo,
-					&rdev->resource.qid_fifo_lock);
+		qid = c4iw_get_resource(&rdev->resource.qid_table);
 		if (!qid)
 			goto out;
+		mutex_lock(&rdev->stats.lock);
+		rdev->stats.qid.cur += rdev->qpmask + 1;
+		mutex_unlock(&rdev->stats.lock);
 		for (i = qid+1; i & rdev->qpmask; i++) {
 			entry = kmalloc(sizeof *entry, GFP_KERNEL);
 			if (!entry)
@@ -277,6 +212,10 @@
 out:
 	mutex_unlock(&uctx->lock);
 	PDBG("%s qid 0x%x\n", __func__, qid);
+	mutex_lock(&rdev->stats.lock);
+	if (rdev->stats.qid.cur > rdev->stats.qid.max)
+		rdev->stats.qid.max = rdev->stats.qid.cur;
+	mutex_unlock(&rdev->stats.lock);
 	return qid;
 }
 
@@ -297,9 +236,9 @@
 
 void c4iw_destroy_resource(struct c4iw_resource *rscp)
 {
-	kfifo_free(&rscp->tpt_fifo);
-	kfifo_free(&rscp->qid_fifo);
-	kfifo_free(&rscp->pdid_fifo);
+	c4iw_id_table_free(&rscp->tpt_table);
+	c4iw_id_table_free(&rscp->qid_table);
+	c4iw_id_table_free(&rscp->pdid_table);
 }
 
 /*
@@ -312,15 +251,23 @@
 {
 	unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
 	PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
-	if (!addr)
-		printk_ratelimited(KERN_WARNING MOD "%s: Out of PBL memory\n",
-		       pci_name(rdev->lldi.pdev));
+	mutex_lock(&rdev->stats.lock);
+	if (addr) {
+		rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
+		if (rdev->stats.pbl.cur > rdev->stats.pbl.max)
+			rdev->stats.pbl.max = rdev->stats.pbl.cur;
+	} else
+		rdev->stats.pbl.fail++;
+	mutex_unlock(&rdev->stats.lock);
 	return (u32)addr;
 }
 
 void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 {
 	PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+	mutex_lock(&rdev->stats.lock);
+	rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
+	mutex_unlock(&rdev->stats.lock);
 	gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size);
 }
 
@@ -377,12 +324,23 @@
 	if (!addr)
 		printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n",
 		       pci_name(rdev->lldi.pdev));
+	mutex_lock(&rdev->stats.lock);
+	if (addr) {
+		rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
+		if (rdev->stats.rqt.cur > rdev->stats.rqt.max)
+			rdev->stats.rqt.max = rdev->stats.rqt.cur;
+	} else
+		rdev->stats.rqt.fail++;
+	mutex_unlock(&rdev->stats.lock);
 	return (u32)addr;
 }
 
 void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 {
 	PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+	mutex_lock(&rdev->stats.lock);
+	rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
+	mutex_unlock(&rdev->stats.lock);
 	gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6);
 }
 
@@ -433,12 +391,22 @@
 {
 	unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size);
 	PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+	if (addr) {
+		mutex_lock(&rdev->stats.lock);
+		rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT);
+		if (rdev->stats.ocqp.cur > rdev->stats.ocqp.max)
+			rdev->stats.ocqp.max = rdev->stats.ocqp.cur;
+		mutex_unlock(&rdev->stats.lock);
+	}
 	return (u32)addr;
 }
 
 void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 {
 	PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+	mutex_lock(&rdev->stats.lock);
+	rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT);
+	mutex_unlock(&rdev->stats.lock);
 	gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size);
 }
 
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index c0221ee..16f26ab 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -62,6 +62,10 @@
 	__be16 pidx;
 	u8 qp_err;	/* flit 1 - sw owns */
 	u8 db_off;
+	u8 pad;
+	u16 host_wq_pidx;
+	u16 host_cidx;
+	u16 host_pidx;
 };
 
 #define T4_EQ_ENTRY_SIZE 64
@@ -375,6 +379,16 @@
 		wq->rq.cidx = 0;
 }
 
+static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq)
+{
+	return wq->rq.queue[wq->rq.size].status.host_wq_pidx;
+}
+
+static inline u16 t4_rq_wq_size(struct t4_wq *wq)
+{
+		return wq->rq.size * T4_RQ_NUM_SLOTS;
+}
+
 static inline int t4_sq_onchip(struct t4_sq *sq)
 {
 	return sq->flags & T4_SQ_ONCHIP;
@@ -412,6 +426,16 @@
 		wq->sq.cidx = 0;
 }
 
+static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq)
+{
+	return wq->sq.queue[wq->sq.size].status.host_wq_pidx;
+}
+
+static inline u16 t4_sq_wq_size(struct t4_wq *wq)
+{
+		return wq->sq.size * T4_SQ_NUM_SLOTS;
+}
+
 static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
 {
 	wmb();
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index e6669d5..32b754c 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@
 #ifndef __C4IW_USER_H__
 #define __C4IW_USER_H__
 
-#define C4IW_UVERBS_ABI_VERSION	1
+#define C4IW_UVERBS_ABI_VERSION	2
 
 /*
  * Make sure that all structs defined in this file remain laid out so
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 1d7aea1..7cc3054 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -596,8 +596,7 @@
 
 	ipath_format_hwerrors(hwerrs,
 			      ipath_6110_hwerror_msgs,
-			      sizeof(ipath_6110_hwerror_msgs) /
-			      sizeof(ipath_6110_hwerror_msgs[0]),
+			      ARRAY_SIZE(ipath_6110_hwerror_msgs),
 			      msg, msgl);
 
 	if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index c0a03ac..26dfbc8 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -209,8 +209,7 @@
 {
 	int i;
 	const int glen =
-	    sizeof(ipath_generic_hwerror_msgs) /
-	    sizeof(ipath_generic_hwerror_msgs[0]);
+	    ARRAY_SIZE(ipath_generic_hwerror_msgs);
 
 	for (i=0; i<glen; i++) {
 		if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 1fca0af..ceb3332 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -84,6 +84,11 @@
 	MLX4_IB_CACHE_LINE_SIZE	= 64,
 };
 
+enum {
+	MLX4_RAW_QP_MTU		= 7,
+	MLX4_RAW_QP_MSGMAX	= 31,
+};
+
 static const __be32 mlx4_ib_opcode[] = {
 	[IB_WR_SEND]				= cpu_to_be32(MLX4_OPCODE_SEND),
 	[IB_WR_LSO]				= cpu_to_be32(MLX4_OPCODE_LSO),
@@ -573,7 +578,12 @@
 	if (sqpn) {
 		qpn = sqpn;
 	} else {
-		err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+		/* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
+		 * BlueFlame setup flow wrongly causes VLAN insertion. */
+		if (init_attr->qp_type == IB_QPT_RAW_PACKET)
+			err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
+		else
+			err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
 		if (err)
 			goto err_wrid;
 	}
@@ -791,6 +801,7 @@
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_UD:
+	case IB_QPT_RAW_PACKET:
 	{
 		qp = kzalloc(sizeof *qp, GFP_KERNEL);
 		if (!qp)
@@ -872,7 +883,8 @@
 	case IB_QPT_XRC_INI:
 	case IB_QPT_XRC_TGT:	return MLX4_QP_ST_XRC;
 	case IB_QPT_SMI:
-	case IB_QPT_GSI:	return MLX4_QP_ST_MLX;
+	case IB_QPT_GSI:
+	case IB_QPT_RAW_PACKET:	return MLX4_QP_ST_MLX;
 	default:		return -1;
 	}
 }
@@ -1042,6 +1054,8 @@
 
 	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
 		context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+	else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+		context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
 	else if (ibqp->qp_type == IB_QPT_UD) {
 		if (qp->flags & MLX4_IB_QP_LSO)
 			context->mtu_msgmax = (IB_MTU_4096 << 5) |
@@ -1200,7 +1214,8 @@
 	if (cur_state == IB_QPS_INIT &&
 	    new_state == IB_QPS_RTR  &&
 	    (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
-	     ibqp->qp_type == IB_QPT_UD)) {
+	     ibqp->qp_type == IB_QPT_UD ||
+	     ibqp->qp_type == IB_QPT_RAW_PACKET)) {
 		context->pri_path.sched_queue = (qp->port - 1) << 6;
 		if (is_qp0(dev, qp))
 			context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
@@ -1319,6 +1334,11 @@
 		goto out;
 	}
 
+	if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
+	    (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
+	     IB_LINK_LAYER_ETHERNET))
+		goto out;
+
 	if (attr_mask & IB_QP_PKEY_INDEX) {
 		int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
 		if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 71edfbb..020e95c 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2884,7 +2884,8 @@
 			ibevent.device = nesqp->ibqp.device;
 			ibevent.event = nesqp->terminate_eventtype;
 			ibevent.element.qp = &nesqp->ibqp;
-			nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+			if (nesqp->ibqp.event_handler)
+				nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
 		}
 	}
 
@@ -3320,6 +3321,10 @@
 
 	nesqp->private_data_len = conn_param->private_data_len;
 	nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
+	/* space for rdma0 read msg */
+	if (conn_param->ord == 0)
+		nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1);
+
 	nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
 	nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
 		  conn_param->private_data_len);
diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig
new file mode 100644
index 0000000..b5b6056
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OCRDMA
+	tristate "Emulex One Connect HCA support"
+	depends on ETHERNET && NETDEVICES && PCI && (IPV6 || IPV6=n)
+	select NET_VENDOR_EMULEX
+	select BE2NET
+	---help---
+	  This driver provides low-level InfiniBand over Ethernet
+	  support for Emulex One Connect host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile
new file mode 100644
index 0000000..06a5bed
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/Makefile
@@ -0,0 +1,5 @@
+ccflags-y := -Idrivers/net/ethernet/emulex/benet
+
+obj-$(CONFIG_INFINIBAND_OCRDMA)	+= ocrdma.o
+
+ocrdma-y :=	ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
new file mode 100644
index 0000000..85a69c9
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -0,0 +1,393 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_H__
+#define __OCRDMA_H__
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include <be_roce.h>
+#include "ocrdma_sli.h"
+
+#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
+#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
+
+#define ocrdma_err(format, arg...) printk(KERN_ERR format, ##arg)
+
+#define OCRDMA_MAX_AH 512
+
+#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
+
+struct ocrdma_dev_attr {
+	u8 fw_ver[32];
+	u32 vendor_id;
+	u32 device_id;
+	u16 max_pd;
+	u16 max_cq;
+	u16 max_cqe;
+	u16 max_qp;
+	u16 max_wqe;
+	u16 max_rqe;
+	u32 max_inline_data;
+	int max_send_sge;
+	int max_recv_sge;
+	int max_mr;
+	u64 max_mr_size;
+	u32 max_num_mr_pbl;
+	int max_fmr;
+	int max_map_per_fmr;
+	int max_pages_per_frmr;
+	u16 max_ord_per_qp;
+	u16 max_ird_per_qp;
+
+	int device_cap_flags;
+	u8 cq_overflow_detect;
+	u8 srq_supported;
+
+	u32 wqe_size;
+	u32 rqe_size;
+	u32 ird_page_size;
+	u8 local_ca_ack_delay;
+	u8 ird;
+	u8 num_ird_pages;
+};
+
+struct ocrdma_pbl {
+	void *va;
+	dma_addr_t pa;
+};
+
+struct ocrdma_queue_info {
+	void *va;
+	dma_addr_t dma;
+	u32 size;
+	u16 len;
+	u16 entry_size;		/* Size of an element in the queue */
+	u16 id;			/* qid, where to ring the doorbell. */
+	u16 head, tail;
+	bool created;
+	atomic_t used;		/* Number of valid elements in the queue */
+};
+
+struct ocrdma_eq {
+	struct ocrdma_queue_info q;
+	u32 vector;
+	int cq_cnt;
+	struct ocrdma_dev *dev;
+	char irq_name[32];
+};
+
+struct ocrdma_mq {
+	struct ocrdma_queue_info sq;
+	struct ocrdma_queue_info cq;
+	bool rearm_cq;
+};
+
+struct mqe_ctx {
+	struct mutex lock; /* for serializing mailbox commands on MQ */
+	wait_queue_head_t cmd_wait;
+	u32 tag;
+	u16 cqe_status;
+	u16 ext_status;
+	bool cmd_done;
+};
+
+struct ocrdma_dev {
+	struct ib_device ibdev;
+	struct ocrdma_dev_attr attr;
+
+	struct mutex dev_lock; /* provides syncronise access to device data */
+	spinlock_t flush_q_lock ____cacheline_aligned;
+
+	struct ocrdma_cq **cq_tbl;
+	struct ocrdma_qp **qp_tbl;
+
+	struct ocrdma_eq meq;
+	struct ocrdma_eq *qp_eq_tbl;
+	int eq_cnt;
+	u16 base_eqid;
+	u16 max_eq;
+
+	union ib_gid *sgid_tbl;
+	/* provided synchronization to sgid table for
+	 * updating gid entries triggered by notifier.
+	 */
+	spinlock_t sgid_lock;
+
+	int gsi_qp_created;
+	struct ocrdma_cq *gsi_sqcq;
+	struct ocrdma_cq *gsi_rqcq;
+
+	struct {
+		struct ocrdma_av *va;
+		dma_addr_t pa;
+		u32 size;
+		u32 num_ah;
+		/* provide synchronization for av
+		 * entry allocations.
+		 */
+		spinlock_t lock;
+		u32 ahid;
+		struct ocrdma_pbl pbl;
+	} av_tbl;
+
+	void *mbx_cmd;
+	struct ocrdma_mq mq;
+	struct mqe_ctx mqe_ctx;
+
+	struct be_dev_info nic_info;
+
+	struct list_head entry;
+	struct rcu_head rcu;
+	int id;
+};
+
+struct ocrdma_cq {
+	struct ib_cq ibcq;
+	struct ocrdma_dev *dev;
+	struct ocrdma_cqe *va;
+	u32 phase;
+	u32 getp;	/* pointer to pending wrs to
+			 * return to stack, wrap arounds
+			 * at max_hw_cqe
+			 */
+	u32 max_hw_cqe;
+	bool phase_change;
+	bool armed, solicited;
+	bool arm_needed;
+
+	spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
+						   * to cq polling
+						   */
+	/* syncronizes cq completion handler invoked from multiple context */
+	spinlock_t comp_handler_lock ____cacheline_aligned;
+	u16 id;
+	u16 eqn;
+
+	struct ocrdma_ucontext *ucontext;
+	dma_addr_t pa;
+	u32 len;
+	atomic_t use_cnt;
+
+	/* head of all qp's sq and rq for which cqes need to be flushed
+	 * by the software.
+	 */
+	struct list_head sq_head, rq_head;
+};
+
+struct ocrdma_pd {
+	struct ib_pd ibpd;
+	struct ocrdma_dev *dev;
+	struct ocrdma_ucontext *uctx;
+	atomic_t use_cnt;
+	u32 id;
+	int num_dpp_qp;
+	u32 dpp_page;
+	bool dpp_enabled;
+};
+
+struct ocrdma_ah {
+	struct ib_ah ibah;
+	struct ocrdma_dev *dev;
+	struct ocrdma_av *av;
+	u16 sgid_index;
+	u32 id;
+};
+
+struct ocrdma_qp_hwq_info {
+	u8 *va;			/* virtual address */
+	u32 max_sges;
+	u32 head, tail;
+	u32 entry_size;
+	u32 max_cnt;
+	u32 max_wqe_idx;
+	u32 free_delta;
+	u16 dbid;		/* qid, where to ring the doorbell. */
+	u32 len;
+	dma_addr_t pa;
+};
+
+struct ocrdma_srq {
+	struct ib_srq ibsrq;
+	struct ocrdma_dev *dev;
+	u8 __iomem *db;
+	/* provide synchronization to multiple context(s) posting rqe */
+	spinlock_t q_lock ____cacheline_aligned;
+
+	struct ocrdma_qp_hwq_info rq;
+	struct ocrdma_pd *pd;
+	atomic_t use_cnt;
+	u32 id;
+	u64 *rqe_wr_id_tbl;
+	u32 *idx_bit_fields;
+	u32 bit_fields_len;
+};
+
+struct ocrdma_qp {
+	struct ib_qp ibqp;
+	struct ocrdma_dev *dev;
+
+	u8 __iomem *sq_db;
+	/* provide synchronization to multiple context(s) posting wqe, rqe */
+	spinlock_t q_lock ____cacheline_aligned;
+	struct ocrdma_qp_hwq_info sq;
+	struct {
+		uint64_t wrid;
+		uint16_t dpp_wqe_idx;
+		uint16_t dpp_wqe;
+		uint8_t  signaled;
+		uint8_t  rsvd[3];
+	} *wqe_wr_id_tbl;
+	u32 max_inline_data;
+	struct ocrdma_cq *sq_cq;
+	/* list maintained per CQ to flush SQ errors */
+	struct list_head sq_entry;
+
+	u8 __iomem *rq_db;
+	struct ocrdma_qp_hwq_info rq;
+	u64 *rqe_wr_id_tbl;
+	struct ocrdma_cq *rq_cq;
+	struct ocrdma_srq *srq;
+	/* list maintained per CQ to flush RQ errors */
+	struct list_head rq_entry;
+
+	enum ocrdma_qp_state state;	/*  QP state */
+	int cap_flags;
+	u32 max_ord, max_ird;
+
+	u32 id;
+	struct ocrdma_pd *pd;
+
+	enum ib_qp_type qp_type;
+
+	int sgid_idx;
+	u32 qkey;
+	bool dpp_enabled;
+	u8 *ird_q_va;
+};
+
+#define OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp) \
+	(((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) && \
+		(qp->id < 64)) ? 24 : 16)
+
+struct ocrdma_hw_mr {
+	struct ocrdma_dev *dev;
+	u32 lkey;
+	u8 fr_mr;
+	u8 remote_atomic;
+	u8 remote_rd;
+	u8 remote_wr;
+	u8 local_rd;
+	u8 local_wr;
+	u8 mw_bind;
+	u8 rsvd;
+	u64 len;
+	struct ocrdma_pbl *pbl_table;
+	u32 num_pbls;
+	u32 num_pbes;
+	u32 pbl_size;
+	u32 pbe_size;
+	u64 fbo;
+	u64 va;
+};
+
+struct ocrdma_mr {
+	struct ib_mr ibmr;
+	struct ib_umem *umem;
+	struct ocrdma_hw_mr hwmr;
+	struct ocrdma_pd *pd;
+};
+
+struct ocrdma_ucontext {
+	struct ib_ucontext ibucontext;
+	struct ocrdma_dev *dev;
+
+	struct list_head mm_head;
+	struct mutex mm_list_lock; /* protects list entries of mm type */
+	struct {
+		u32 *va;
+		dma_addr_t pa;
+		u32 len;
+	} ah_tbl;
+};
+
+struct ocrdma_mm {
+	struct {
+		u64 phy_addr;
+		unsigned long len;
+	} key;
+	struct list_head entry;
+};
+
+static inline struct ocrdma_dev *get_ocrdma_dev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct ocrdma_dev, ibdev);
+}
+
+static inline struct ocrdma_ucontext *get_ocrdma_ucontext(struct ib_ucontext
+							  *ibucontext)
+{
+	return container_of(ibucontext, struct ocrdma_ucontext, ibucontext);
+}
+
+static inline struct ocrdma_pd *get_ocrdma_pd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct ocrdma_pd, ibpd);
+}
+
+static inline struct ocrdma_cq *get_ocrdma_cq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct ocrdma_cq, ibcq);
+}
+
+static inline struct ocrdma_qp *get_ocrdma_qp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct ocrdma_qp, ibqp);
+}
+
+static inline struct ocrdma_mr *get_ocrdma_mr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct ocrdma_mr, ibmr);
+}
+
+static inline struct ocrdma_ah *get_ocrdma_ah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct ocrdma_ah, ibah);
+}
+
+static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct ocrdma_srq, ibsrq);
+}
+
+#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
new file mode 100644
index 0000000..a411a4e
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
@@ -0,0 +1,134 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_ABI_H__
+#define __OCRDMA_ABI_H__
+
+struct ocrdma_alloc_ucontext_resp {
+	u32 dev_id;
+	u32 wqe_size;
+	u32 max_inline_data;
+	u32 dpp_wqe_size;
+	u64 ah_tbl_page;
+	u32 ah_tbl_len;
+	u32 rsvd;
+	u8 fw_ver[32];
+	u32 rqe_size;
+	u64 rsvd1;
+} __packed;
+
+/* user kernel communication data structures. */
+struct ocrdma_alloc_pd_ureq {
+	u64 rsvd1;
+} __packed;
+
+struct ocrdma_alloc_pd_uresp {
+	u32 id;
+	u32 dpp_enabled;
+	u32 dpp_page_addr_hi;
+	u32 dpp_page_addr_lo;
+	u64 rsvd1;
+} __packed;
+
+struct ocrdma_create_cq_ureq {
+	u32 dpp_cq;
+	u32 rsvd;
+} __packed;
+
+#define MAX_CQ_PAGES 8
+struct ocrdma_create_cq_uresp {
+	u32 cq_id;
+	u32 page_size;
+	u32 num_pages;
+	u32 max_hw_cqe;
+	u64 page_addr[MAX_CQ_PAGES];
+	u64 db_page_addr;
+	u32 db_page_size;
+	u32 phase_change;
+	u64 rsvd1;
+	u64 rsvd2;
+} __packed;
+
+#define MAX_QP_PAGES 8
+#define MAX_UD_AV_PAGES 8
+
+struct ocrdma_create_qp_ureq {
+	u8 enable_dpp_cq;
+	u8 rsvd;
+	u16 dpp_cq_id;
+	u32 rsvd1;
+};
+
+struct ocrdma_create_qp_uresp {
+	u16 qp_id;
+	u16 sq_dbid;
+	u16 rq_dbid;
+	u16 resv0;
+	u32 sq_page_size;
+	u32 rq_page_size;
+	u32 num_sq_pages;
+	u32 num_rq_pages;
+	u64 sq_page_addr[MAX_QP_PAGES];
+	u64 rq_page_addr[MAX_QP_PAGES];
+	u64 db_page_addr;
+	u32 db_page_size;
+	u32 dpp_credit;
+	u32 dpp_offset;
+	u32 rsvd1;
+	u32 num_wqe_allocated;
+	u32 num_rqe_allocated;
+	u32 free_wqe_delta;
+	u32 free_rqe_delta;
+	u32 db_sq_offset;
+	u32 db_rq_offset;
+	u32 db_shift;
+	u64 rsvd2;
+	u64 rsvd3;
+} __packed;
+
+struct ocrdma_create_srq_uresp {
+	u16 rq_dbid;
+	u16 resv0;
+	u32 resv1;
+
+	u32 rq_page_size;
+	u32 num_rq_pages;
+
+	u64 rq_page_addr[MAX_QP_PAGES];
+	u64 db_page_addr;
+
+	u32 db_page_size;
+	u32 num_rqe_allocated;
+	u32 db_rq_offset;
+	u32 db_shift;
+
+	u32 free_rqe_delta;
+	u32 rsvd2;
+	u64 rsvd3;
+} __packed;
+
+#endif				/* __OCRDMA_ABI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
new file mode 100644
index 0000000..a877a8e
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -0,0 +1,172 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <net/neighbour.h>
+#include <net/netevent.h>
+
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include "ocrdma.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_ah.h"
+#include "ocrdma_hw.h"
+
+static inline int set_av_attr(struct ocrdma_ah *ah,
+				struct ib_ah_attr *attr, int pdid)
+{
+	int status = 0;
+	u16 vlan_tag; bool vlan_enabled = false;
+	struct ocrdma_dev *dev = ah->dev;
+	struct ocrdma_eth_vlan eth;
+	struct ocrdma_grh grh;
+	int eth_sz;
+
+	memset(&eth, 0, sizeof(eth));
+	memset(&grh, 0, sizeof(grh));
+
+	ah->sgid_index = attr->grh.sgid_index;
+
+	vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
+	if (vlan_tag && (vlan_tag < 0x1000)) {
+		eth.eth_type = cpu_to_be16(0x8100);
+		eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+		vlan_tag |= (attr->sl & 7) << 13;
+		eth.vlan_tag = cpu_to_be16(vlan_tag);
+		eth_sz = sizeof(struct ocrdma_eth_vlan);
+		vlan_enabled = true;
+	} else {
+		eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+		eth_sz = sizeof(struct ocrdma_eth_basic);
+	}
+	memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
+	status = ocrdma_resolve_dgid(dev, &attr->grh.dgid, &eth.dmac[0]);
+	if (status)
+		return status;
+	status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
+			(union ib_gid *)&grh.sgid[0]);
+	if (status)
+		return status;
+
+	grh.tclass_flow = cpu_to_be32((6 << 28) |
+			(attr->grh.traffic_class << 24) |
+			attr->grh.flow_label);
+	/* 0x1b is next header value in GRH */
+	grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
+			(0x1b << 8) | attr->grh.hop_limit);
+
+	memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
+	memcpy(&ah->av->eth_hdr, &eth, eth_sz);
+	memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
+	if (vlan_enabled)
+		ah->av->valid |= OCRDMA_AV_VLAN_VALID;
+	return status;
+}
+
+struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+{
+	u32 *ahid_addr;
+	int status;
+	struct ocrdma_ah *ah;
+	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+	struct ocrdma_dev *dev = pd->dev;
+
+	if (!(attr->ah_flags & IB_AH_GRH))
+		return ERR_PTR(-EINVAL);
+
+	ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
+	ah->dev = pd->dev;
+
+	status = ocrdma_alloc_av(dev, ah);
+	if (status)
+		goto av_err;
+	status = set_av_attr(ah, attr, pd->id);
+	if (status)
+		goto av_conf_err;
+
+	/* if pd is for the user process, pass the ah_id to user space */
+	if ((pd->uctx) && (pd->uctx->ah_tbl.va)) {
+		ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
+		*ahid_addr = ah->id;
+	}
+	return &ah->ibah;
+
+av_conf_err:
+	ocrdma_free_av(dev, ah);
+av_err:
+	kfree(ah);
+	return ERR_PTR(status);
+}
+
+int ocrdma_destroy_ah(struct ib_ah *ibah)
+{
+	struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
+	ocrdma_free_av(ah->dev, ah);
+	kfree(ah);
+	return 0;
+}
+
+int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+	struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
+	struct ocrdma_av *av = ah->av;
+	struct ocrdma_grh *grh;
+	attr->ah_flags |= IB_AH_GRH;
+	if (ah->av->valid & Bit(1)) {
+		grh = (struct ocrdma_grh *)((u8 *)ah->av +
+				sizeof(struct ocrdma_eth_vlan));
+		attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
+	} else {
+		grh = (struct ocrdma_grh *)((u8 *)ah->av +
+					sizeof(struct ocrdma_eth_basic));
+		attr->sl = 0;
+	}
+	memcpy(&attr->grh.dgid.raw[0], &grh->dgid[0], sizeof(grh->dgid));
+	attr->grh.sgid_index = ah->sgid_index;
+	attr->grh.hop_limit = be32_to_cpu(grh->pdid_hoplimit) & 0xff;
+	attr->grh.traffic_class = be32_to_cpu(grh->tclass_flow) >> 24;
+	attr->grh.flow_label = be32_to_cpu(grh->tclass_flow) & 0x00ffffffff;
+	return 0;
+}
+
+int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+	/* modify_ah is unsupported */
+	return -ENOSYS;
+}
+
+int ocrdma_process_mad(struct ib_device *ibdev,
+		       int process_mad_flags,
+		       u8 port_num,
+		       struct ib_wc *in_wc,
+		       struct ib_grh *in_grh,
+		       struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	return IB_MAD_RESULT_SUCCESS;
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
new file mode 100644
index 0000000..8ac49e7
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -0,0 +1,42 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_AH_H__
+#define __OCRDMA_AH_H__
+
+struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
+int ocrdma_destroy_ah(struct ib_ah *);
+int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
+int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
+
+int ocrdma_process_mad(struct ib_device *,
+		       int process_mad_flags,
+		       u8 port_num,
+		       struct ib_wc *in_wc,
+		       struct ib_grh *in_grh,
+		       struct ib_mad *in_mad, struct ib_mad *out_mad);
+#endif				/* __OCRDMA_AH_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
new file mode 100644
index 0000000..9b204b1
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -0,0 +1,2640 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) CNA Adapters.              *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/log2.h>
+#include <linux/dma-mapping.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_ah.h"
+
+enum mbx_status {
+	OCRDMA_MBX_STATUS_FAILED		= 1,
+	OCRDMA_MBX_STATUS_ILLEGAL_FIELD		= 3,
+	OCRDMA_MBX_STATUS_OOR			= 100,
+	OCRDMA_MBX_STATUS_INVALID_PD		= 101,
+	OCRDMA_MBX_STATUS_PD_INUSE		= 102,
+	OCRDMA_MBX_STATUS_INVALID_CQ		= 103,
+	OCRDMA_MBX_STATUS_INVALID_QP		= 104,
+	OCRDMA_MBX_STATUS_INVALID_LKEY		= 105,
+	OCRDMA_MBX_STATUS_ORD_EXCEEDS		= 106,
+	OCRDMA_MBX_STATUS_IRD_EXCEEDS		= 107,
+	OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS	= 108,
+	OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS	= 109,
+	OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS	= 110,
+	OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS	= 111,
+	OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS	= 112,
+	OCRDMA_MBX_STATUS_INVALID_STATE_CHANGE	= 113,
+	OCRDMA_MBX_STATUS_MW_BOUND		= 114,
+	OCRDMA_MBX_STATUS_INVALID_VA		= 115,
+	OCRDMA_MBX_STATUS_INVALID_LENGTH	= 116,
+	OCRDMA_MBX_STATUS_INVALID_FBO		= 117,
+	OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS	= 118,
+	OCRDMA_MBX_STATUS_INVALID_PBE_SIZE	= 119,
+	OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY	= 120,
+	OCRDMA_MBX_STATUS_INVALID_PBL_SHIFT	= 121,
+	OCRDMA_MBX_STATUS_INVALID_SRQ_ID	= 129,
+	OCRDMA_MBX_STATUS_SRQ_ERROR		= 133,
+	OCRDMA_MBX_STATUS_RQE_EXCEEDS		= 134,
+	OCRDMA_MBX_STATUS_MTU_EXCEEDS		= 135,
+	OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS	= 136,
+	OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS	= 137,
+	OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS	= 138,
+	OCRDMA_MBX_STATUS_QP_BOUND		= 130,
+	OCRDMA_MBX_STATUS_INVALID_CHANGE	= 139,
+	OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP	= 140,
+	OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER	= 141,
+	OCRDMA_MBX_STATUS_MW_STILL_BOUND	= 142,
+	OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID	= 143,
+	OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS	= 144
+};
+
+enum additional_status {
+	OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES = 22
+};
+
+enum cqe_status {
+	OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES	= 1,
+	OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER		= 2,
+	OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES	= 3,
+	OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING		= 4,
+	OCRDMA_MBX_CQE_STATUS_DMA_FAILED		= 5
+};
+
+static inline void *ocrdma_get_eqe(struct ocrdma_eq *eq)
+{
+	return (u8 *)eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe));
+}
+
+static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq)
+{
+	eq->q.tail = (eq->q.tail + 1) & (OCRDMA_EQ_LEN - 1);
+}
+
+static inline void *ocrdma_get_mcqe(struct ocrdma_dev *dev)
+{
+	struct ocrdma_mcqe *cqe = (struct ocrdma_mcqe *)
+	    ((u8 *) dev->mq.cq.va +
+	     (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe)));
+
+	if (!(le32_to_cpu(cqe->valid_ae_cmpl_cons) & OCRDMA_MCQE_VALID_MASK))
+		return NULL;
+	return cqe;
+}
+
+static inline void ocrdma_mcq_inc_tail(struct ocrdma_dev *dev)
+{
+	dev->mq.cq.tail = (dev->mq.cq.tail + 1) & (OCRDMA_MQ_CQ_LEN - 1);
+}
+
+static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
+{
+	return (struct ocrdma_mqe *)((u8 *) dev->mq.sq.va +
+				     (dev->mq.sq.head *
+				      sizeof(struct ocrdma_mqe)));
+}
+
+static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
+{
+	dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1);
+	atomic_inc(&dev->mq.sq.used);
+}
+
+static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
+{
+	return (void *)((u8 *) dev->mq.sq.va +
+			(dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe)));
+}
+
+enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
+{
+	switch (qps) {
+	case OCRDMA_QPS_RST:
+		return IB_QPS_RESET;
+	case OCRDMA_QPS_INIT:
+		return IB_QPS_INIT;
+	case OCRDMA_QPS_RTR:
+		return IB_QPS_RTR;
+	case OCRDMA_QPS_RTS:
+		return IB_QPS_RTS;
+	case OCRDMA_QPS_SQD:
+	case OCRDMA_QPS_SQ_DRAINING:
+		return IB_QPS_SQD;
+	case OCRDMA_QPS_SQE:
+		return IB_QPS_SQE;
+	case OCRDMA_QPS_ERR:
+		return IB_QPS_ERR;
+	};
+	return IB_QPS_ERR;
+}
+
+static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps)
+{
+	switch (qps) {
+	case IB_QPS_RESET:
+		return OCRDMA_QPS_RST;
+	case IB_QPS_INIT:
+		return OCRDMA_QPS_INIT;
+	case IB_QPS_RTR:
+		return OCRDMA_QPS_RTR;
+	case IB_QPS_RTS:
+		return OCRDMA_QPS_RTS;
+	case IB_QPS_SQD:
+		return OCRDMA_QPS_SQD;
+	case IB_QPS_SQE:
+		return OCRDMA_QPS_SQE;
+	case IB_QPS_ERR:
+		return OCRDMA_QPS_ERR;
+	};
+	return OCRDMA_QPS_ERR;
+}
+
+static int ocrdma_get_mbx_errno(u32 status)
+{
+	int err_num = -EFAULT;
+	u8 mbox_status = (status & OCRDMA_MBX_RSP_STATUS_MASK) >>
+					OCRDMA_MBX_RSP_STATUS_SHIFT;
+	u8 add_status = (status & OCRDMA_MBX_RSP_ASTATUS_MASK) >>
+					OCRDMA_MBX_RSP_ASTATUS_SHIFT;
+
+	switch (mbox_status) {
+	case OCRDMA_MBX_STATUS_OOR:
+	case OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS:
+		err_num = -EAGAIN;
+		break;
+
+	case OCRDMA_MBX_STATUS_INVALID_PD:
+	case OCRDMA_MBX_STATUS_INVALID_CQ:
+	case OCRDMA_MBX_STATUS_INVALID_SRQ_ID:
+	case OCRDMA_MBX_STATUS_INVALID_QP:
+	case OCRDMA_MBX_STATUS_INVALID_CHANGE:
+	case OCRDMA_MBX_STATUS_MTU_EXCEEDS:
+	case OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER:
+	case OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID:
+	case OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS:
+	case OCRDMA_MBX_STATUS_ILLEGAL_FIELD:
+	case OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY:
+	case OCRDMA_MBX_STATUS_INVALID_LKEY:
+	case OCRDMA_MBX_STATUS_INVALID_VA:
+	case OCRDMA_MBX_STATUS_INVALID_LENGTH:
+	case OCRDMA_MBX_STATUS_INVALID_FBO:
+	case OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS:
+	case OCRDMA_MBX_STATUS_INVALID_PBE_SIZE:
+	case OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP:
+	case OCRDMA_MBX_STATUS_SRQ_ERROR:
+	case OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS:
+		err_num = -EINVAL;
+		break;
+
+	case OCRDMA_MBX_STATUS_PD_INUSE:
+	case OCRDMA_MBX_STATUS_QP_BOUND:
+	case OCRDMA_MBX_STATUS_MW_STILL_BOUND:
+	case OCRDMA_MBX_STATUS_MW_BOUND:
+		err_num = -EBUSY;
+		break;
+
+	case OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS:
+	case OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS:
+	case OCRDMA_MBX_STATUS_RQE_EXCEEDS:
+	case OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS:
+	case OCRDMA_MBX_STATUS_ORD_EXCEEDS:
+	case OCRDMA_MBX_STATUS_IRD_EXCEEDS:
+	case OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS:
+	case OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS:
+	case OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS:
+		err_num = -ENOBUFS;
+		break;
+
+	case OCRDMA_MBX_STATUS_FAILED:
+		switch (add_status) {
+		case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES:
+			err_num = -EAGAIN;
+			break;
+		}
+	default:
+		err_num = -EFAULT;
+	}
+	return err_num;
+}
+
+static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
+{
+	int err_num = -EINVAL;
+
+	switch (cqe_status) {
+	case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES:
+		err_num = -EPERM;
+		break;
+	case OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER:
+		err_num = -EINVAL;
+		break;
+	case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES:
+	case OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING:
+		err_num = -EAGAIN;
+		break;
+	case OCRDMA_MBX_CQE_STATUS_DMA_FAILED:
+		err_num = -EIO;
+		break;
+	}
+	return err_num;
+}
+
+void ocrdma_ring_cq_db(struct ocrdma_dev *dev, u16 cq_id, bool armed,
+		       bool solicited, u16 cqe_popped)
+{
+	u32 val = cq_id & OCRDMA_DB_CQ_RING_ID_MASK;
+
+	val |= ((cq_id & OCRDMA_DB_CQ_RING_ID_EXT_MASK) <<
+	     OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT);
+
+	if (armed)
+		val |= (1 << OCRDMA_DB_CQ_REARM_SHIFT);
+	if (solicited)
+		val |= (1 << OCRDMA_DB_CQ_SOLICIT_SHIFT);
+	val |= (cqe_popped << OCRDMA_DB_CQ_NUM_POPPED_SHIFT);
+	iowrite32(val, dev->nic_info.db + OCRDMA_DB_CQ_OFFSET);
+}
+
+static void ocrdma_ring_mq_db(struct ocrdma_dev *dev)
+{
+	u32 val = 0;
+
+	val |= dev->mq.sq.id & OCRDMA_MQ_ID_MASK;
+	val |= 1 << OCRDMA_MQ_NUM_MQE_SHIFT;
+	iowrite32(val, dev->nic_info.db + OCRDMA_DB_MQ_OFFSET);
+}
+
+static void ocrdma_ring_eq_db(struct ocrdma_dev *dev, u16 eq_id,
+			      bool arm, bool clear_int, u16 num_eqe)
+{
+	u32 val = 0;
+
+	val |= eq_id & OCRDMA_EQ_ID_MASK;
+	val |= ((eq_id & OCRDMA_EQ_ID_EXT_MASK) << OCRDMA_EQ_ID_EXT_MASK_SHIFT);
+	if (arm)
+		val |= (1 << OCRDMA_REARM_SHIFT);
+	if (clear_int)
+		val |= (1 << OCRDMA_EQ_CLR_SHIFT);
+	val |= (1 << OCRDMA_EQ_TYPE_SHIFT);
+	val |= (num_eqe << OCRDMA_NUM_EQE_SHIFT);
+	iowrite32(val, dev->nic_info.db + OCRDMA_DB_EQ_OFFSET);
+}
+
+static void ocrdma_init_mch(struct ocrdma_mbx_hdr *cmd_hdr,
+			    u8 opcode, u8 subsys, u32 cmd_len)
+{
+	cmd_hdr->subsys_op = (opcode | (subsys << OCRDMA_MCH_SUBSYS_SHIFT));
+	cmd_hdr->timeout = 20; /* seconds */
+	cmd_hdr->cmd_len = cmd_len - sizeof(struct ocrdma_mbx_hdr);
+}
+
+static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
+{
+	struct ocrdma_mqe *mqe;
+
+	mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
+	if (!mqe)
+		return NULL;
+	mqe->hdr.spcl_sge_cnt_emb |=
+		(OCRDMA_MQE_EMBEDDED << OCRDMA_MQE_HDR_EMB_SHIFT) &
+					OCRDMA_MQE_HDR_EMB_MASK;
+	mqe->hdr.pyld_len = cmd_len - sizeof(struct ocrdma_mqe_hdr);
+
+	ocrdma_init_mch(&mqe->u.emb_req.mch, opcode, OCRDMA_SUBSYS_ROCE,
+			mqe->hdr.pyld_len);
+	return mqe;
+}
+
+static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
+{
+	dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
+}
+
+static int ocrdma_alloc_q(struct ocrdma_dev *dev,
+			  struct ocrdma_queue_info *q, u16 len, u16 entry_size)
+{
+	memset(q, 0, sizeof(*q));
+	q->len = len;
+	q->entry_size = entry_size;
+	q->size = len * entry_size;
+	q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size,
+				   &q->dma, GFP_KERNEL);
+	if (!q->va)
+		return -ENOMEM;
+	memset(q->va, 0, q->size);
+	return 0;
+}
+
+static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
+					dma_addr_t host_pa, int hw_page_size)
+{
+	int i;
+
+	for (i = 0; i < cnt; i++) {
+		q_pa[i].lo = (u32) (host_pa & 0xffffffff);
+		q_pa[i].hi = (u32) upper_32_bits(host_pa);
+		host_pa += hw_page_size;
+	}
+}
+
+static void ocrdma_assign_eq_vect_gen2(struct ocrdma_dev *dev,
+				       struct ocrdma_eq *eq)
+{
+	/* assign vector and update vector id for next EQ */
+	eq->vector = dev->nic_info.msix.start_vector;
+	dev->nic_info.msix.start_vector += 1;
+}
+
+static void ocrdma_free_eq_vect_gen2(struct ocrdma_dev *dev)
+{
+	/* this assumes that EQs are freed in exactly reverse order
+	 * as its allocation.
+	 */
+	dev->nic_info.msix.start_vector -= 1;
+}
+
+static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
+			       int queue_type)
+{
+	u8 opcode = 0;
+	int status;
+	struct ocrdma_delete_q_req *cmd = dev->mbx_cmd;
+
+	switch (queue_type) {
+	case QTYPE_MCCQ:
+		opcode = OCRDMA_CMD_DELETE_MQ;
+		break;
+	case QTYPE_CQ:
+		opcode = OCRDMA_CMD_DELETE_CQ;
+		break;
+	case QTYPE_EQ:
+		opcode = OCRDMA_CMD_DELETE_EQ;
+		break;
+	default:
+		BUG();
+	}
+	memset(cmd, 0, sizeof(*cmd));
+	ocrdma_init_mch(&cmd->req, opcode, OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+	cmd->id = q->id;
+
+	status = be_roce_mcc_cmd(dev->nic_info.netdev,
+				 cmd, sizeof(*cmd), NULL, NULL);
+	if (!status)
+		q->created = false;
+	return status;
+}
+
+static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+	int status;
+	struct ocrdma_create_eq_req *cmd = dev->mbx_cmd;
+	struct ocrdma_create_eq_rsp *rsp = dev->mbx_cmd;
+
+	memset(cmd, 0, sizeof(*cmd));
+	ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_EQ, OCRDMA_SUBSYS_COMMON,
+			sizeof(*cmd));
+	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
+		cmd->req.rsvd_version = 0;
+	else
+		cmd->req.rsvd_version = 2;
+
+	cmd->num_pages = 4;
+	cmd->valid = OCRDMA_CREATE_EQ_VALID;
+	cmd->cnt = 4 << OCRDMA_CREATE_EQ_CNT_SHIFT;
+
+	ocrdma_build_q_pages(&cmd->pa[0], cmd->num_pages, eq->q.dma,
+			     PAGE_SIZE_4K);
+	status = be_roce_mcc_cmd(dev->nic_info.netdev, cmd, sizeof(*cmd), NULL,
+				 NULL);
+	if (!status) {
+		eq->q.id = rsp->vector_eqid & 0xffff;
+		if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
+			ocrdma_assign_eq_vect_gen2(dev, eq);
+		else {
+			eq->vector = (rsp->vector_eqid >> 16) & 0xffff;
+			dev->nic_info.msix.start_vector += 1;
+		}
+		eq->q.created = true;
+	}
+	return status;
+}
+
+static int ocrdma_create_eq(struct ocrdma_dev *dev,
+			    struct ocrdma_eq *eq, u16 q_len)
+{
+	int status;
+
+	status = ocrdma_alloc_q(dev, &eq->q, OCRDMA_EQ_LEN,
+				sizeof(struct ocrdma_eqe));
+	if (status)
+		return status;
+
+	status = ocrdma_mbx_create_eq(dev, eq);
+	if (status)
+		goto mbx_err;
+	eq->dev = dev;
+	ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+
+	return 0;
+mbx_err:
+	ocrdma_free_q(dev, &eq->q);
+	return status;
+}
+
+static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+	int irq;
+
+	if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
+		irq = dev->nic_info.pdev->irq;
+	else
+		irq = dev->nic_info.msix.vector_list[eq->vector];
+	return irq;
+}
+
+static void _ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+	if (eq->q.created) {
+		ocrdma_mbx_delete_q(dev, &eq->q, QTYPE_EQ);
+		if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
+			ocrdma_free_eq_vect_gen2(dev);
+		ocrdma_free_q(dev, &eq->q);
+	}
+}
+
+static void ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+	int irq;
+
+	/* disarm EQ so that interrupts are not generated
+	 * during freeing and EQ delete is in progress.
+	 */
+	ocrdma_ring_eq_db(dev, eq->q.id, false, false, 0);
+
+	irq = ocrdma_get_irq(dev, eq);
+	free_irq(irq, eq);
+	_ocrdma_destroy_eq(dev, eq);
+}
+
+static void ocrdma_destroy_qp_eqs(struct ocrdma_dev *dev)
+{
+	int i;
+
+	/* deallocate the data path eqs */
+	for (i = 0; i < dev->eq_cnt; i++)
+		ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
+}
+
+static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
+				   struct ocrdma_queue_info *cq,
+				   struct ocrdma_queue_info *eq)
+{
+	struct ocrdma_create_cq_cmd *cmd = dev->mbx_cmd;
+	struct ocrdma_create_cq_cmd_rsp *rsp = dev->mbx_cmd;
+	int status;
+
+	memset(cmd, 0, sizeof(*cmd));
+	ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_CQ,
+			OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+	cmd->pgsz_pgcnt = PAGES_4K_SPANNED(cq->va, cq->size);
+	cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
+	cmd->eqn = (eq->id << OCRDMA_CREATE_CQ_EQID_SHIFT);
+
+	ocrdma_build_q_pages(&cmd->pa[0], cmd->pgsz_pgcnt,
+			     cq->dma, PAGE_SIZE_4K);
+	status = be_roce_mcc_cmd(dev->nic_info.netdev,
+				 cmd, sizeof(*cmd), NULL, NULL);
+	if (!status) {
+		cq->id = (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
+		cq->created = true;
+	}
+	return status;
+}
+
+static u32 ocrdma_encoded_q_len(int q_len)
+{
+	u32 len_encoded = fls(q_len);	/* log2(len) + 1 */
+
+	if (len_encoded == 16)
+		len_encoded = 0;
+	return len_encoded;
+}
+
+static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
+				struct ocrdma_queue_info *mq,
+				struct ocrdma_queue_info *cq)
+{
+	int num_pages, status;
+	struct ocrdma_create_mq_req *cmd = dev->mbx_cmd;
+	struct ocrdma_create_mq_rsp *rsp = dev->mbx_cmd;
+	struct ocrdma_pa *pa;
+
+	memset(cmd, 0, sizeof(*cmd));
+	num_pages = PAGES_4K_SPANNED(mq->va, mq->size);
+
+	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ,
+				OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+		cmd->v0.pages = num_pages;
+		cmd->v0.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
+		cmd->v0.async_cqid_valid = (cq->id << 1);
+		cmd->v0.cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
+					     OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
+		cmd->v0.cqid_ringsize |=
+			(cq->id << OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT);
+		cmd->v0.valid = OCRDMA_CREATE_MQ_VALID;
+		pa = &cmd->v0.pa[0];
+	} else {
+		ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
+				OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+		cmd->req.rsvd_version = 1;
+		cmd->v1.cqid_pages = num_pages;
+		cmd->v1.cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
+		cmd->v1.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
+		cmd->v1.async_event_bitmap = Bit(20);
+		cmd->v1.async_cqid_ringsize = cq->id;
+		cmd->v1.async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
+					     OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
+		cmd->v1.valid = OCRDMA_CREATE_MQ_VALID;
+		pa = &cmd->v1.pa[0];
+	}
+	ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K);
+	status = be_roce_mcc_cmd(dev->nic_info.netdev,
+				 cmd, sizeof(*cmd), NULL, NULL);
+	if (!status) {
+		mq->id = rsp->id;
+		mq->created = true;
+	}
+	return status;
+}
+
+static int ocrdma_create_mq(struct ocrdma_dev *dev)
+{
+	int status;
+
+	/* Alloc completion queue for Mailbox queue */
+	status = ocrdma_alloc_q(dev, &dev->mq.cq, OCRDMA_MQ_CQ_LEN,
+				sizeof(struct ocrdma_mcqe));
+	if (status)
+		goto alloc_err;
+
+	status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->meq.q);
+	if (status)
+		goto mbx_cq_free;
+
+	memset(&dev->mqe_ctx, 0, sizeof(dev->mqe_ctx));
+	init_waitqueue_head(&dev->mqe_ctx.cmd_wait);
+	mutex_init(&dev->mqe_ctx.lock);
+
+	/* Alloc Mailbox queue */
+	status = ocrdma_alloc_q(dev, &dev->mq.sq, OCRDMA_MQ_LEN,
+				sizeof(struct ocrdma_mqe));
+	if (status)
+		goto mbx_cq_destroy;
+	status = ocrdma_mbx_create_mq(dev, &dev->mq.sq, &dev->mq.cq);
+	if (status)
+		goto mbx_q_free;
+	ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, 0);
+	return 0;
+
+mbx_q_free:
+	ocrdma_free_q(dev, &dev->mq.sq);
+mbx_cq_destroy:
+	ocrdma_mbx_delete_q(dev, &dev->mq.cq, QTYPE_CQ);
+mbx_cq_free:
+	ocrdma_free_q(dev, &dev->mq.cq);
+alloc_err:
+	return status;
+}
+
+static void ocrdma_destroy_mq(struct ocrdma_dev *dev)
+{
+	struct ocrdma_queue_info *mbxq, *cq;
+
+	/* mqe_ctx lock synchronizes with any other pending cmds. */
+	mutex_lock(&dev->mqe_ctx.lock);
+	mbxq = &dev->mq.sq;
+	if (mbxq->created) {
+		ocrdma_mbx_delete_q(dev, mbxq, QTYPE_MCCQ);
+		ocrdma_free_q(dev, mbxq);
+	}
+	mutex_unlock(&dev->mqe_ctx.lock);
+
+	cq = &dev->mq.cq;
+	if (cq->created) {
+		ocrdma_mbx_delete_q(dev, cq, QTYPE_CQ);
+		ocrdma_free_q(dev, cq);
+	}
+}
+
+static void ocrdma_process_qpcat_error(struct ocrdma_dev *dev,
+				       struct ocrdma_qp *qp)
+{
+	enum ib_qp_state new_ib_qps = IB_QPS_ERR;
+	enum ib_qp_state old_ib_qps;
+
+	if (qp == NULL)
+		BUG();
+	ocrdma_qp_state_machine(qp, new_ib_qps, &old_ib_qps);
+}
+
+static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
+				    struct ocrdma_ae_mcqe *cqe)
+{
+	struct ocrdma_qp *qp = NULL;
+	struct ocrdma_cq *cq = NULL;
+	struct ib_event ib_evt;
+	int cq_event = 0;
+	int qp_event = 1;
+	int srq_event = 0;
+	int dev_event = 0;
+	int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
+	    OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+
+	if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID)
+		qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK];
+	if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
+		cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+
+	ib_evt.device = &dev->ibdev;
+
+	switch (type) {
+	case OCRDMA_CQ_ERROR:
+		ib_evt.element.cq = &cq->ibcq;
+		ib_evt.event = IB_EVENT_CQ_ERR;
+		cq_event = 1;
+		qp_event = 0;
+		break;
+	case OCRDMA_CQ_OVERRUN_ERROR:
+		ib_evt.element.cq = &cq->ibcq;
+		ib_evt.event = IB_EVENT_CQ_ERR;
+		break;
+	case OCRDMA_CQ_QPCAT_ERROR:
+		ib_evt.element.qp = &qp->ibqp;
+		ib_evt.event = IB_EVENT_QP_FATAL;
+		ocrdma_process_qpcat_error(dev, qp);
+		break;
+	case OCRDMA_QP_ACCESS_ERROR:
+		ib_evt.element.qp = &qp->ibqp;
+		ib_evt.event = IB_EVENT_QP_ACCESS_ERR;
+		break;
+	case OCRDMA_QP_COMM_EST_EVENT:
+		ib_evt.element.qp = &qp->ibqp;
+		ib_evt.event = IB_EVENT_COMM_EST;
+		break;
+	case OCRDMA_SQ_DRAINED_EVENT:
+		ib_evt.element.qp = &qp->ibqp;
+		ib_evt.event = IB_EVENT_SQ_DRAINED;
+		break;
+	case OCRDMA_DEVICE_FATAL_EVENT:
+		ib_evt.element.port_num = 1;
+		ib_evt.event = IB_EVENT_DEVICE_FATAL;
+		qp_event = 0;
+		dev_event = 1;
+		break;
+	case OCRDMA_SRQCAT_ERROR:
+		ib_evt.element.srq = &qp->srq->ibsrq;
+		ib_evt.event = IB_EVENT_SRQ_ERR;
+		srq_event = 1;
+		qp_event = 0;
+		break;
+	case OCRDMA_SRQ_LIMIT_EVENT:
+		ib_evt.element.srq = &qp->srq->ibsrq;
+		ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED;
+		srq_event = 1;
+		qp_event = 0;
+		break;
+	case OCRDMA_QP_LAST_WQE_EVENT:
+		ib_evt.element.qp = &qp->ibqp;
+		ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED;
+		break;
+	default:
+		cq_event = 0;
+		qp_event = 0;
+		srq_event = 0;
+		dev_event = 0;
+		ocrdma_err("%s() unknown type=0x%x\n", __func__, type);
+		break;
+	}
+
+	if (qp_event) {
+		if (qp->ibqp.event_handler)
+			qp->ibqp.event_handler(&ib_evt, qp->ibqp.qp_context);
+	} else if (cq_event) {
+		if (cq->ibcq.event_handler)
+			cq->ibcq.event_handler(&ib_evt, cq->ibcq.cq_context);
+	} else if (srq_event) {
+		if (qp->srq->ibsrq.event_handler)
+			qp->srq->ibsrq.event_handler(&ib_evt,
+						     qp->srq->ibsrq.
+						     srq_context);
+	} else if (dev_event)
+		ib_dispatch_event(&ib_evt);
+
+}
+
+static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
+{
+	/* async CQE processing */
+	struct ocrdma_ae_mcqe *cqe = ae_cqe;
+	u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
+			OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
+
+	if (evt_code == OCRDMA_ASYNC_EVE_CODE)
+		ocrdma_dispatch_ibevent(dev, cqe);
+	else
+		ocrdma_err("%s(%d) invalid evt code=0x%x\n",
+			   __func__, dev->id, evt_code);
+}
+
+static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
+{
+	if (dev->mqe_ctx.tag == cqe->tag_lo && dev->mqe_ctx.cmd_done == false) {
+		dev->mqe_ctx.cqe_status = (cqe->status &
+		     OCRDMA_MCQE_STATUS_MASK) >> OCRDMA_MCQE_STATUS_SHIFT;
+		dev->mqe_ctx.ext_status =
+		    (cqe->status & OCRDMA_MCQE_ESTATUS_MASK)
+		    >> OCRDMA_MCQE_ESTATUS_SHIFT;
+		dev->mqe_ctx.cmd_done = true;
+		wake_up(&dev->mqe_ctx.cmd_wait);
+	} else
+		ocrdma_err("%s() cqe for invalid tag0x%x.expected=0x%x\n",
+			   __func__, cqe->tag_lo, dev->mqe_ctx.tag);
+}
+
+static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
+{
+	u16 cqe_popped = 0;
+	struct ocrdma_mcqe *cqe;
+
+	while (1) {
+		cqe = ocrdma_get_mcqe(dev);
+		if (cqe == NULL)
+			break;
+		ocrdma_le32_to_cpu(cqe, sizeof(*cqe));
+		cqe_popped += 1;
+		if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_AE_MASK)
+			ocrdma_process_acqe(dev, cqe);
+		else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
+			ocrdma_process_mcqe(dev, cqe);
+		else
+			ocrdma_err("%s() cqe->compl is not set.\n", __func__);
+		memset(cqe, 0, sizeof(struct ocrdma_mcqe));
+		ocrdma_mcq_inc_tail(dev);
+	}
+	ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, cqe_popped);
+	return 0;
+}
+
+static void ocrdma_qp_buddy_cq_handler(struct ocrdma_dev *dev,
+				       struct ocrdma_cq *cq)
+{
+	unsigned long flags;
+	struct ocrdma_qp *qp;
+	bool buddy_cq_found = false;
+	/* Go through list of QPs in error state which are using this CQ
+	 * and invoke its callback handler to trigger CQE processing for
+	 * error/flushed CQE. It is rare to find more than few entries in
+	 * this list as most consumers stops after getting error CQE.
+	 * List is traversed only once when a matching buddy cq found for a QP.
+	 */
+	spin_lock_irqsave(&dev->flush_q_lock, flags);
+	list_for_each_entry(qp, &cq->sq_head, sq_entry) {
+		if (qp->srq)
+			continue;
+		/* if wq and rq share the same cq, than comp_handler
+		 * is already invoked.
+		 */
+		if (qp->sq_cq == qp->rq_cq)
+			continue;
+		/* if completion came on sq, rq's cq is buddy cq.
+		 * if completion came on rq, sq's cq is buddy cq.
+		 */
+		if (qp->sq_cq == cq)
+			cq = qp->rq_cq;
+		else
+			cq = qp->sq_cq;
+		buddy_cq_found = true;
+		break;
+	}
+	spin_unlock_irqrestore(&dev->flush_q_lock, flags);
+	if (buddy_cq_found == false)
+		return;
+	if (cq->ibcq.comp_handler) {
+		spin_lock_irqsave(&cq->comp_handler_lock, flags);
+		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+		spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
+	}
+}
+
+static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
+{
+	unsigned long flags;
+	struct ocrdma_cq *cq;
+
+	if (cq_idx >= OCRDMA_MAX_CQ)
+		BUG();
+
+	cq = dev->cq_tbl[cq_idx];
+	if (cq == NULL) {
+		ocrdma_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
+		return;
+	}
+	spin_lock_irqsave(&cq->cq_lock, flags);
+	cq->armed = false;
+	cq->solicited = false;
+	spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+	ocrdma_ring_cq_db(dev, cq->id, false, false, 0);
+
+	if (cq->ibcq.comp_handler) {
+		spin_lock_irqsave(&cq->comp_handler_lock, flags);
+		(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+		spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
+	}
+	ocrdma_qp_buddy_cq_handler(dev, cq);
+}
+
+static void ocrdma_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
+{
+	/* process the MQ-CQE. */
+	if (cq_id == dev->mq.cq.id)
+		ocrdma_mq_cq_handler(dev, cq_id);
+	else
+		ocrdma_qp_cq_handler(dev, cq_id);
+}
+
+static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
+{
+	struct ocrdma_eq *eq = handle;
+	struct ocrdma_dev *dev = eq->dev;
+	struct ocrdma_eqe eqe;
+	struct ocrdma_eqe *ptr;
+	u16 eqe_popped = 0;
+	u16 cq_id;
+	while (1) {
+		ptr = ocrdma_get_eqe(eq);
+		eqe = *ptr;
+		ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
+		if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
+			break;
+		eqe_popped += 1;
+		ptr->id_valid = 0;
+		/* check whether its CQE or not. */
+		if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) {
+			cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT;
+			ocrdma_cq_handler(dev, cq_id);
+		}
+		ocrdma_eq_inc_tail(eq);
+	}
+	ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped);
+	/* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */
+	if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
+		ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+	return IRQ_HANDLED;
+}
+
+static void ocrdma_post_mqe(struct ocrdma_dev *dev, struct ocrdma_mqe *cmd)
+{
+	struct ocrdma_mqe *mqe;
+
+	dev->mqe_ctx.tag = dev->mq.sq.head;
+	dev->mqe_ctx.cmd_done = false;
+	mqe = ocrdma_get_mqe(dev);
+	cmd->hdr.tag_lo = dev->mq.sq.head;
+	ocrdma_copy_cpu_to_le32(mqe, cmd, sizeof(*mqe));
+	/* make sure descriptor is written before ringing doorbell */
+	wmb();
+	ocrdma_mq_inc_head(dev);
+	ocrdma_ring_mq_db(dev);
+}
+
+static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
+{
+	long status;
+	/* 30 sec timeout */
+	status = wait_event_timeout(dev->mqe_ctx.cmd_wait,
+				    (dev->mqe_ctx.cmd_done != false),
+				    msecs_to_jiffies(30000));
+	if (status)
+		return 0;
+	else
+		return -1;
+}
+
+/* issue a mailbox command on the MQ */
+static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
+{
+	int status = 0;
+	u16 cqe_status, ext_status;
+	struct ocrdma_mqe *rsp;
+
+	mutex_lock(&dev->mqe_ctx.lock);
+	ocrdma_post_mqe(dev, mqe);
+	status = ocrdma_wait_mqe_cmpl(dev);
+	if (status)
+		goto mbx_err;
+	cqe_status = dev->mqe_ctx.cqe_status;
+	ext_status = dev->mqe_ctx.ext_status;
+	rsp = ocrdma_get_mqe_rsp(dev);
+	ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
+	if (cqe_status || ext_status) {
+		ocrdma_err
+		    ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
+		     __func__,
+		     (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+		     OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
+		status = ocrdma_get_mbx_cqe_errno(cqe_status);
+		goto mbx_err;
+	}
+	if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)
+		status = ocrdma_get_mbx_errno(mqe->u.rsp.status);
+mbx_err:
+	mutex_unlock(&dev->mqe_ctx.lock);
+	return status;
+}
+
+static void ocrdma_get_attr(struct ocrdma_dev *dev,
+			      struct ocrdma_dev_attr *attr,
+			      struct ocrdma_mbx_query_config *rsp)
+{
+	int max_q_mem;
+
+	attr->max_pd =
+	    (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT;
+	attr->max_qp =
+	    (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT;
+	attr->max_send_sge = ((rsp->max_write_send_sge &
+			       OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
+			      OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
+	attr->max_recv_sge = (rsp->max_write_send_sge &
+			      OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT;
+	attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
+				OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
+	attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
+				OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
+	attr->cq_overflow_detect = (rsp->qp_srq_cq_ird_ord &
+				    OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT;
+	attr->srq_supported = (rsp->qp_srq_cq_ird_ord &
+			       OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT;
+	attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay &
+				    OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
+	attr->max_mr = rsp->max_mr;
+	attr->max_mr_size = ~0ull;
+	attr->max_fmr = 0;
+	attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
+	attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
+	attr->max_cqe = rsp->max_cq_cqes_per_cq &
+			OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK;
+	attr->wqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
+		OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK) >>
+		OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET) *
+		OCRDMA_WQE_STRIDE;
+	attr->rqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
+		OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK) >>
+		OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET) *
+		OCRDMA_WQE_STRIDE;
+	attr->max_inline_data =
+	    attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) +
+			      sizeof(struct ocrdma_sge));
+	max_q_mem = OCRDMA_Q_PAGE_BASE_SIZE << (OCRDMA_MAX_Q_PAGE_SIZE_CNT - 1);
+	/* hw can queue one less then the configured size,
+	 * so publish less by one to stack.
+	 */
+	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		dev->attr.max_wqe = max_q_mem / dev->attr.wqe_size;
+		attr->ird = 1;
+		attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE;
+		attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES;
+	} else
+		dev->attr.max_wqe = (max_q_mem / dev->attr.wqe_size) - 1;
+	dev->attr.max_rqe = (max_q_mem / dev->attr.rqe_size) - 1;
+}
+
+static int ocrdma_check_fw_config(struct ocrdma_dev *dev,
+				   struct ocrdma_fw_conf_rsp *conf)
+{
+	u32 fn_mode;
+
+	fn_mode = conf->fn_mode & OCRDMA_FN_MODE_RDMA;
+	if (fn_mode != OCRDMA_FN_MODE_RDMA)
+		return -EINVAL;
+	dev->base_eqid = conf->base_eqid;
+	dev->max_eq = conf->max_eq;
+	dev->attr.max_cq = OCRDMA_MAX_CQ - 1;
+	return 0;
+}
+
+/* can be issued only during init time. */
+static int ocrdma_mbx_query_fw_ver(struct ocrdma_dev *dev)
+{
+	int status = -ENOMEM;
+	struct ocrdma_mqe *cmd;
+	struct ocrdma_fw_ver_rsp *rsp;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_VER, sizeof(*cmd));
+	if (!cmd)
+		return -ENOMEM;
+	ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+			OCRDMA_CMD_GET_FW_VER,
+			OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_fw_ver_rsp *)cmd;
+	memset(&dev->attr.fw_ver[0], 0, sizeof(dev->attr.fw_ver));
+	memcpy(&dev->attr.fw_ver[0], &rsp->running_ver[0],
+	       sizeof(rsp->running_ver));
+	ocrdma_le32_to_cpu(dev->attr.fw_ver, sizeof(rsp->running_ver));
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+/* can be issued only during init time. */
+static int ocrdma_mbx_query_fw_config(struct ocrdma_dev *dev)
+{
+	int status = -ENOMEM;
+	struct ocrdma_mqe *cmd;
+	struct ocrdma_fw_conf_rsp *rsp;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_CONFIG, sizeof(*cmd));
+	if (!cmd)
+		return -ENOMEM;
+	ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+			OCRDMA_CMD_GET_FW_CONFIG,
+			OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_fw_conf_rsp *)cmd;
+	status = ocrdma_check_fw_config(dev, rsp);
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
+{
+	int status = -ENOMEM;
+	struct ocrdma_mbx_query_config *rsp;
+	struct ocrdma_mqe *cmd;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_CONFIG, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_mbx_query_config *)cmd;
+	ocrdma_get_attr(dev, &dev->attr, rsp);
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
+{
+	int status = -ENOMEM;
+	struct ocrdma_alloc_pd *cmd;
+	struct ocrdma_alloc_pd_rsp *rsp;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	if (pd->dpp_enabled)
+		cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP;
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_alloc_pd_rsp *)cmd;
+	pd->id = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_PDID_MASK;
+	if (rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) {
+		pd->dpp_enabled = true;
+		pd->dpp_page = rsp->dpp_page_pdid >>
+				OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT;
+	} else {
+		pd->dpp_enabled = false;
+		pd->num_dpp_qp = 0;
+	}
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
+{
+	int status = -ENOMEM;
+	struct ocrdma_dealloc_pd *cmd;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_PD, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->id = pd->id;
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	kfree(cmd);
+	return status;
+}
+
+static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
+			       int *num_pages, int *page_size)
+{
+	int i;
+	int mem_size;
+
+	*num_entries = roundup_pow_of_two(*num_entries);
+	mem_size = *num_entries * entry_size;
+	/* find the possible lowest possible multiplier */
+	for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) {
+		if (mem_size <= (OCRDMA_Q_PAGE_BASE_SIZE << i))
+			break;
+	}
+	if (i >= OCRDMA_MAX_Q_PAGE_SIZE_CNT)
+		return -EINVAL;
+	mem_size = roundup(mem_size,
+		       ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES));
+	*num_pages =
+	    mem_size / ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES);
+	*page_size = ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES);
+	*num_entries = mem_size / entry_size;
+	return 0;
+}
+
+static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
+{
+	int i ;
+	int status = 0;
+	int max_ah;
+	struct ocrdma_create_ah_tbl *cmd;
+	struct ocrdma_create_ah_tbl_rsp *rsp;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	dma_addr_t pa;
+	struct ocrdma_pbe *pbes;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_AH_TBL, sizeof(*cmd));
+	if (!cmd)
+		return status;
+
+	max_ah = OCRDMA_MAX_AH;
+	dev->av_tbl.size = sizeof(struct ocrdma_av) * max_ah;
+
+	/* number of PBEs in PBL */
+	cmd->ah_conf = (OCRDMA_AH_TBL_PAGES <<
+				OCRDMA_CREATE_AH_NUM_PAGES_SHIFT) &
+				OCRDMA_CREATE_AH_NUM_PAGES_MASK;
+
+	/* page size */
+	for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) {
+		if (PAGE_SIZE == (OCRDMA_MIN_Q_PAGE_SIZE << i))
+			break;
+	}
+	cmd->ah_conf |= (i << OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT) &
+				OCRDMA_CREATE_AH_PAGE_SIZE_MASK;
+
+	/* ah_entry size */
+	cmd->ah_conf |= (sizeof(struct ocrdma_av) <<
+				OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT) &
+				OCRDMA_CREATE_AH_ENTRY_SIZE_MASK;
+
+	dev->av_tbl.pbl.va = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+						&dev->av_tbl.pbl.pa,
+						GFP_KERNEL);
+	if (dev->av_tbl.pbl.va == NULL)
+		goto mem_err;
+
+	dev->av_tbl.va = dma_alloc_coherent(&pdev->dev, dev->av_tbl.size,
+					    &pa, GFP_KERNEL);
+	if (dev->av_tbl.va == NULL)
+		goto mem_err_ah;
+	dev->av_tbl.pa = pa;
+	dev->av_tbl.num_ah = max_ah;
+	memset(dev->av_tbl.va, 0, dev->av_tbl.size);
+
+	pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
+	for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
+		pbes[i].pa_lo = (u32) (pa & 0xffffffff);
+		pbes[i].pa_hi = (u32) upper_32_bits(pa);
+		pa += PAGE_SIZE;
+	}
+	cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
+	cmd->tbl_addr[0].hi = (u32)upper_32_bits(dev->av_tbl.pbl.pa);
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_create_ah_tbl_rsp *)cmd;
+	dev->av_tbl.ahid = rsp->ahid & 0xFFFF;
+	kfree(cmd);
+	return 0;
+
+mbx_err:
+	dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
+			  dev->av_tbl.pa);
+	dev->av_tbl.va = NULL;
+mem_err_ah:
+	dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
+			  dev->av_tbl.pbl.pa);
+	dev->av_tbl.pbl.va = NULL;
+	dev->av_tbl.size = 0;
+mem_err:
+	kfree(cmd);
+	return status;
+}
+
+static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
+{
+	struct ocrdma_delete_ah_tbl *cmd;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+
+	if (dev->av_tbl.va == NULL)
+		return;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_AH_TBL, sizeof(*cmd));
+	if (!cmd)
+		return;
+	cmd->ahid = dev->av_tbl.ahid;
+
+	ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
+			  dev->av_tbl.pa);
+	dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
+			  dev->av_tbl.pbl.pa);
+	kfree(cmd);
+}
+
+/* Multiple CQs uses the EQ. This routine returns least used
+ * EQ to associate with CQ. This will distributes the interrupt
+ * processing and CPU load to associated EQ, vector and so to that CPU.
+ */
+static u16 ocrdma_bind_eq(struct ocrdma_dev *dev)
+{
+	int i, selected_eq = 0, cq_cnt = 0;
+	u16 eq_id;
+
+	mutex_lock(&dev->dev_lock);
+	cq_cnt = dev->qp_eq_tbl[0].cq_cnt;
+	eq_id = dev->qp_eq_tbl[0].q.id;
+	/* find the EQ which is has the least number of
+	 * CQs associated with it.
+	 */
+	for (i = 0; i < dev->eq_cnt; i++) {
+		if (dev->qp_eq_tbl[i].cq_cnt < cq_cnt) {
+			cq_cnt = dev->qp_eq_tbl[i].cq_cnt;
+			eq_id = dev->qp_eq_tbl[i].q.id;
+			selected_eq = i;
+		}
+	}
+	dev->qp_eq_tbl[selected_eq].cq_cnt += 1;
+	mutex_unlock(&dev->dev_lock);
+	return eq_id;
+}
+
+static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
+{
+	int i;
+
+	mutex_lock(&dev->dev_lock);
+	for (i = 0; i < dev->eq_cnt; i++) {
+		if (dev->qp_eq_tbl[i].q.id != eq_id)
+			continue;
+		dev->qp_eq_tbl[i].cq_cnt -= 1;
+		break;
+	}
+	mutex_unlock(&dev->dev_lock);
+}
+
+int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
+			 int entries, int dpp_cq)
+{
+	int status = -ENOMEM; int max_hw_cqe;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	struct ocrdma_create_cq *cmd;
+	struct ocrdma_create_cq_rsp *rsp;
+	u32 hw_pages, cqe_size, page_size, cqe_count;
+
+	if (dpp_cq)
+		return -EINVAL;
+	if (entries > dev->attr.max_cqe) {
+		ocrdma_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
+			   __func__, dev->id, dev->attr.max_cqe, entries);
+		return -EINVAL;
+	}
+	if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
+		return -EINVAL;
+
+	if (dpp_cq) {
+		cq->max_hw_cqe = 1;
+		max_hw_cqe = 1;
+		cqe_size = OCRDMA_DPP_CQE_SIZE;
+		hw_pages = 1;
+	} else {
+		cq->max_hw_cqe = dev->attr.max_cqe;
+		max_hw_cqe = dev->attr.max_cqe;
+		cqe_size = sizeof(struct ocrdma_cqe);
+		hw_pages = OCRDMA_CREATE_CQ_MAX_PAGES;
+	}
+
+	cq->len = roundup(max_hw_cqe * cqe_size, OCRDMA_MIN_Q_PAGE_SIZE);
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_CQ, sizeof(*cmd));
+	if (!cmd)
+		return -ENOMEM;
+	ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_CREATE_CQ,
+			OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+	cq->va = dma_alloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL);
+	if (!cq->va) {
+		status = -ENOMEM;
+		goto mem_err;
+	}
+	memset(cq->va, 0, cq->len);
+	page_size = cq->len / hw_pages;
+	cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
+					OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
+	cmd->cmd.pgsz_pgcnt |= hw_pages;
+	cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
+
+	if (dev->eq_cnt < 0)
+		goto eq_err;
+	cq->eqn = ocrdma_bind_eq(dev);
+	cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2;
+	cqe_count = cq->len / cqe_size;
+	if (cqe_count > 1024)
+		/* Set cnt to 3 to indicate more than 1024 cq entries */
+		cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
+	else {
+		u8 count = 0;
+		switch (cqe_count) {
+		case 256:
+			count = 0;
+			break;
+		case 512:
+			count = 1;
+			break;
+		case 1024:
+			count = 2;
+			break;
+		default:
+			goto mbx_err;
+		}
+		cmd->cmd.ev_cnt_flags |= (count << OCRDMA_CREATE_CQ_CNT_SHIFT);
+	}
+	/* shared eq between all the consumer cqs. */
+	cmd->cmd.eqn = cq->eqn;
+	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		if (dpp_cq)
+			cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
+				OCRDMA_CREATE_CQ_TYPE_SHIFT;
+		cq->phase_change = false;
+		cmd->cmd.cqe_count = (cq->len / cqe_size);
+	} else {
+		cmd->cmd.cqe_count = (cq->len / cqe_size) - 1;
+		cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
+		cq->phase_change = true;
+	}
+
+	ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+
+	rsp = (struct ocrdma_create_cq_rsp *)cmd;
+	cq->id = (u16) (rsp->rsp.cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
+	kfree(cmd);
+	return 0;
+mbx_err:
+	ocrdma_unbind_eq(dev, cq->eqn);
+eq_err:
+	dma_free_coherent(&pdev->dev, cq->len, cq->va, cq->pa);
+mem_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
+{
+	int status = -ENOMEM;
+	struct ocrdma_destroy_cq *cmd;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_CQ, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	ocrdma_init_mch(&cmd->req, OCRDMA_CMD_DELETE_CQ,
+			OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+	cmd->bypass_flush_qid |=
+	    (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
+	    OCRDMA_DESTROY_CQ_QID_MASK;
+
+	ocrdma_unbind_eq(dev, cq->eqn);
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
+			  u32 pdid, int addr_check)
+{
+	int status = -ENOMEM;
+	struct ocrdma_alloc_lkey *cmd;
+	struct ocrdma_alloc_lkey_rsp *rsp;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_LKEY, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->pdid = pdid;
+	cmd->pbl_sz_flags |= addr_check;
+	cmd->pbl_sz_flags |= (hwmr->fr_mr << OCRDMA_ALLOC_LKEY_FMR_SHIFT);
+	cmd->pbl_sz_flags |=
+	    (hwmr->remote_wr << OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT);
+	cmd->pbl_sz_flags |=
+	    (hwmr->remote_rd << OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT);
+	cmd->pbl_sz_flags |=
+	    (hwmr->local_wr << OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT);
+	cmd->pbl_sz_flags |=
+	    (hwmr->remote_atomic << OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT);
+	cmd->pbl_sz_flags |=
+	    (hwmr->num_pbls << OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT);
+
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_alloc_lkey_rsp *)cmd;
+	hwmr->lkey = rsp->lrkey;
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey)
+{
+	int status = -ENOMEM;
+	struct ocrdma_dealloc_lkey *cmd;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_LKEY, sizeof(*cmd));
+	if (!cmd)
+		return -ENOMEM;
+	cmd->lkey = lkey;
+	cmd->rsvd_frmr = fr_mr ? 1 : 0;
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
+			     u32 pdid, u32 pbl_cnt, u32 pbe_size, u32 last)
+{
+	int status = -ENOMEM;
+	int i;
+	struct ocrdma_reg_nsmr *cmd;
+	struct ocrdma_reg_nsmr_rsp *rsp;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd));
+	if (!cmd)
+		return -ENOMEM;
+	cmd->num_pbl_pdid =
+	    pdid | (hwmr->num_pbls << OCRDMA_REG_NSMR_NUM_PBL_SHIFT);
+
+	cmd->flags_hpage_pbe_sz |= (hwmr->remote_wr <<
+				    OCRDMA_REG_NSMR_REMOTE_WR_SHIFT);
+	cmd->flags_hpage_pbe_sz |= (hwmr->remote_rd <<
+				    OCRDMA_REG_NSMR_REMOTE_RD_SHIFT);
+	cmd->flags_hpage_pbe_sz |= (hwmr->local_wr <<
+				    OCRDMA_REG_NSMR_LOCAL_WR_SHIFT);
+	cmd->flags_hpage_pbe_sz |= (hwmr->remote_atomic <<
+				    OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT);
+	cmd->flags_hpage_pbe_sz |= (hwmr->mw_bind <<
+				    OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT);
+	cmd->flags_hpage_pbe_sz |= (last << OCRDMA_REG_NSMR_LAST_SHIFT);
+
+	cmd->flags_hpage_pbe_sz |= (hwmr->pbe_size / OCRDMA_MIN_HPAGE_SIZE);
+	cmd->flags_hpage_pbe_sz |= (hwmr->pbl_size / OCRDMA_MIN_HPAGE_SIZE) <<
+					OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT;
+	cmd->totlen_low = hwmr->len;
+	cmd->totlen_high = upper_32_bits(hwmr->len);
+	cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff);
+	cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo);
+	cmd->va_loaddr = (u32) hwmr->va;
+	cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va);
+
+	for (i = 0; i < pbl_cnt; i++) {
+		cmd->pbl[i].lo = (u32) (hwmr->pbl_table[i].pa & 0xffffffff);
+		cmd->pbl[i].hi = upper_32_bits(hwmr->pbl_table[i].pa);
+	}
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_reg_nsmr_rsp *)cmd;
+	hwmr->lkey = rsp->lrkey;
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev,
+				  struct ocrdma_hw_mr *hwmr, u32 pbl_cnt,
+				  u32 pbl_offset, u32 last)
+{
+	int status = -ENOMEM;
+	int i;
+	struct ocrdma_reg_nsmr_cont *cmd;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR_CONT, sizeof(*cmd));
+	if (!cmd)
+		return -ENOMEM;
+	cmd->lrkey = hwmr->lkey;
+	cmd->num_pbl_offset = (pbl_cnt << OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT) |
+	    (pbl_offset & OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK);
+	cmd->last = last << OCRDMA_REG_NSMR_CONT_LAST_SHIFT;
+
+	for (i = 0; i < pbl_cnt; i++) {
+		cmd->pbl[i].lo =
+		    (u32) (hwmr->pbl_table[i + pbl_offset].pa & 0xffffffff);
+		cmd->pbl[i].hi =
+		    upper_32_bits(hwmr->pbl_table[i + pbl_offset].pa);
+	}
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_reg_mr(struct ocrdma_dev *dev,
+		  struct ocrdma_hw_mr *hwmr, u32 pdid, int acc)
+{
+	int status;
+	u32 last = 0;
+	u32 cur_pbl_cnt, pbl_offset;
+	u32 pending_pbl_cnt = hwmr->num_pbls;
+
+	pbl_offset = 0;
+	cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL);
+	if (cur_pbl_cnt == pending_pbl_cnt)
+		last = 1;
+
+	status = ocrdma_mbx_reg_mr(dev, hwmr, pdid,
+				   cur_pbl_cnt, hwmr->pbe_size, last);
+	if (status) {
+		ocrdma_err("%s() status=%d\n", __func__, status);
+		return status;
+	}
+	/* if there is no more pbls to register then exit. */
+	if (last)
+		return 0;
+
+	while (!last) {
+		pbl_offset += cur_pbl_cnt;
+		pending_pbl_cnt -= cur_pbl_cnt;
+		cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL);
+		/* if we reach the end of the pbls, then need to set the last
+		 * bit, indicating no more pbls to register for this memory key.
+		 */
+		if (cur_pbl_cnt == pending_pbl_cnt)
+			last = 1;
+
+		status = ocrdma_mbx_reg_mr_cont(dev, hwmr, cur_pbl_cnt,
+						pbl_offset, last);
+		if (status)
+			break;
+	}
+	if (status)
+		ocrdma_err("%s() err. status=%d\n", __func__, status);
+
+	return status;
+}
+
+bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp)
+{
+	struct ocrdma_qp *tmp;
+	bool found = false;
+	list_for_each_entry(tmp, &cq->sq_head, sq_entry) {
+		if (qp == tmp) {
+			found = true;
+			break;
+		}
+	}
+	return found;
+}
+
+bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp)
+{
+	struct ocrdma_qp *tmp;
+	bool found = false;
+	list_for_each_entry(tmp, &cq->rq_head, rq_entry) {
+		if (qp == tmp) {
+			found = true;
+			break;
+		}
+	}
+	return found;
+}
+
+void ocrdma_flush_qp(struct ocrdma_qp *qp)
+{
+	bool found;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->dev->flush_q_lock, flags);
+	found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
+	if (!found)
+		list_add_tail(&qp->sq_entry, &qp->sq_cq->sq_head);
+	if (!qp->srq) {
+		found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
+		if (!found)
+			list_add_tail(&qp->rq_entry, &qp->rq_cq->rq_head);
+	}
+	spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags);
+}
+
+int ocrdma_qp_state_machine(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
+			    enum ib_qp_state *old_ib_state)
+{
+	unsigned long flags;
+	int status = 0;
+	enum ocrdma_qp_state new_state;
+	new_state = get_ocrdma_qp_state(new_ib_state);
+
+	/* sync with wqe and rqe posting */
+	spin_lock_irqsave(&qp->q_lock, flags);
+
+	if (old_ib_state)
+		*old_ib_state = get_ibqp_state(qp->state);
+	if (new_state == qp->state) {
+		spin_unlock_irqrestore(&qp->q_lock, flags);
+		return 1;
+	}
+
+	switch (qp->state) {
+	case OCRDMA_QPS_RST:
+		switch (new_state) {
+		case OCRDMA_QPS_RST:
+		case OCRDMA_QPS_INIT:
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case OCRDMA_QPS_INIT:
+		/* qps: INIT->XXX */
+		switch (new_state) {
+		case OCRDMA_QPS_INIT:
+		case OCRDMA_QPS_RTR:
+			break;
+		case OCRDMA_QPS_ERR:
+			ocrdma_flush_qp(qp);
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case OCRDMA_QPS_RTR:
+		/* qps: RTS->XXX */
+		switch (new_state) {
+		case OCRDMA_QPS_RTS:
+			break;
+		case OCRDMA_QPS_ERR:
+			ocrdma_flush_qp(qp);
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case OCRDMA_QPS_RTS:
+		/* qps: RTS->XXX */
+		switch (new_state) {
+		case OCRDMA_QPS_SQD:
+		case OCRDMA_QPS_SQE:
+			break;
+		case OCRDMA_QPS_ERR:
+			ocrdma_flush_qp(qp);
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case OCRDMA_QPS_SQD:
+		/* qps: SQD->XXX */
+		switch (new_state) {
+		case OCRDMA_QPS_RTS:
+		case OCRDMA_QPS_SQE:
+		case OCRDMA_QPS_ERR:
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case OCRDMA_QPS_SQE:
+		switch (new_state) {
+		case OCRDMA_QPS_RTS:
+		case OCRDMA_QPS_ERR:
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case OCRDMA_QPS_ERR:
+		/* qps: ERR->XXX */
+		switch (new_state) {
+		case OCRDMA_QPS_RST:
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	default:
+		status = -EINVAL;
+		break;
+	};
+	if (!status)
+		qp->state = new_state;
+
+	spin_unlock_irqrestore(&qp->q_lock, flags);
+	return status;
+}
+
+static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp)
+{
+	u32 flags = 0;
+	if (qp->cap_flags & OCRDMA_QP_INB_RD)
+		flags |= OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK;
+	if (qp->cap_flags & OCRDMA_QP_INB_WR)
+		flags |= OCRDMA_CREATE_QP_REQ_INB_WREN_MASK;
+	if (qp->cap_flags & OCRDMA_QP_MW_BIND)
+		flags |= OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK;
+	if (qp->cap_flags & OCRDMA_QP_LKEY0)
+		flags |= OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK;
+	if (qp->cap_flags & OCRDMA_QP_FAST_REG)
+		flags |= OCRDMA_CREATE_QP_REQ_FMR_EN_MASK;
+	return flags;
+}
+
+static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
+					struct ib_qp_init_attr *attrs,
+					struct ocrdma_qp *qp)
+{
+	int status;
+	u32 len, hw_pages, hw_page_size;
+	dma_addr_t pa;
+	struct ocrdma_dev *dev = qp->dev;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	u32 max_wqe_allocated;
+	u32 max_sges = attrs->cap.max_send_sge;
+
+	max_wqe_allocated = attrs->cap.max_send_wr;
+	/* need to allocate one extra to for GEN1 family */
+	if (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)
+		max_wqe_allocated += 1;
+
+	status = ocrdma_build_q_conf(&max_wqe_allocated,
+		dev->attr.wqe_size, &hw_pages, &hw_page_size);
+	if (status) {
+		ocrdma_err("%s() req. max_send_wr=0x%x\n", __func__,
+			   max_wqe_allocated);
+		return -EINVAL;
+	}
+	qp->sq.max_cnt = max_wqe_allocated;
+	len = (hw_pages * hw_page_size);
+
+	qp->sq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+	if (!qp->sq.va)
+		return -EINVAL;
+	memset(qp->sq.va, 0, len);
+	qp->sq.len = len;
+	qp->sq.pa = pa;
+	qp->sq.entry_size = dev->attr.wqe_size;
+	ocrdma_build_q_pages(&cmd->wq_addr[0], hw_pages, pa, hw_page_size);
+
+	cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE)
+				<< OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT);
+	cmd->num_wq_rq_pages |= (hw_pages <<
+				 OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT) &
+	    OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK;
+	cmd->max_sge_send_write |= (max_sges <<
+				    OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT) &
+	    OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK;
+	cmd->max_sge_send_write |= (max_sges <<
+				    OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT) &
+					OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK;
+	cmd->max_wqe_rqe |= (ilog2(qp->sq.max_cnt) <<
+			     OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK;
+	cmd->wqe_rqe_size |= (dev->attr.wqe_size <<
+			      OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK;
+	return 0;
+}
+
+static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
+					struct ib_qp_init_attr *attrs,
+					struct ocrdma_qp *qp)
+{
+	int status;
+	u32 len, hw_pages, hw_page_size;
+	dma_addr_t pa = 0;
+	struct ocrdma_dev *dev = qp->dev;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	u32 max_rqe_allocated = attrs->cap.max_recv_wr + 1;
+
+	status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size,
+				     &hw_pages, &hw_page_size);
+	if (status) {
+		ocrdma_err("%s() req. max_recv_wr=0x%x\n", __func__,
+			   attrs->cap.max_recv_wr + 1);
+		return status;
+	}
+	qp->rq.max_cnt = max_rqe_allocated;
+	len = (hw_pages * hw_page_size);
+
+	qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+	if (!qp->rq.va)
+		return status;
+	memset(qp->rq.va, 0, len);
+	qp->rq.pa = pa;
+	qp->rq.len = len;
+	qp->rq.entry_size = dev->attr.rqe_size;
+
+	ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size);
+	cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
+		OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT);
+	cmd->num_wq_rq_pages |=
+	    (hw_pages << OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT) &
+	    OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK;
+	cmd->max_sge_recv_flags |= (attrs->cap.max_recv_sge <<
+				OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK;
+	cmd->max_wqe_rqe |= (ilog2(qp->rq.max_cnt) <<
+				OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK;
+	cmd->wqe_rqe_size |= (dev->attr.rqe_size <<
+			OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT) &
+			OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK;
+	return 0;
+}
+
+static void ocrdma_set_create_qp_dpp_cmd(struct ocrdma_create_qp_req *cmd,
+					 struct ocrdma_pd *pd,
+					 struct ocrdma_qp *qp,
+					 u8 enable_dpp_cq, u16 dpp_cq_id)
+{
+	pd->num_dpp_qp--;
+	qp->dpp_enabled = true;
+	cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK;
+	if (!enable_dpp_cq)
+		return;
+	cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK;
+	cmd->dpp_credits_cqid = dpp_cq_id;
+	cmd->dpp_credits_cqid |= OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT <<
+					OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT;
+}
+
+static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
+					struct ocrdma_qp *qp)
+{
+	struct ocrdma_dev *dev = qp->dev;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	dma_addr_t pa = 0;
+	int ird_page_size = dev->attr.ird_page_size;
+	int ird_q_len = dev->attr.num_ird_pages * ird_page_size;
+
+	if (dev->attr.ird == 0)
+		return 0;
+
+	qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len,
+					&pa, GFP_KERNEL);
+	if (!qp->ird_q_va)
+		return -ENOMEM;
+	memset(qp->ird_q_va, 0, ird_q_len);
+	ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
+			     pa, ird_page_size);
+	return 0;
+}
+
+static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp,
+				     struct ocrdma_qp *qp,
+				     struct ib_qp_init_attr *attrs,
+				     u16 *dpp_offset, u16 *dpp_credit_lmt)
+{
+	u32 max_wqe_allocated, max_rqe_allocated;
+	qp->id = rsp->qp_id & OCRDMA_CREATE_QP_RSP_QP_ID_MASK;
+	qp->rq.dbid = rsp->sq_rq_id & OCRDMA_CREATE_QP_RSP_RQ_ID_MASK;
+	qp->sq.dbid = rsp->sq_rq_id >> OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT;
+	qp->max_ird = rsp->max_ord_ird & OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK;
+	qp->max_ord = (rsp->max_ord_ird >> OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT);
+	qp->dpp_enabled = false;
+	if (rsp->dpp_response & OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK) {
+		qp->dpp_enabled = true;
+		*dpp_credit_lmt = (rsp->dpp_response &
+				OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK) >>
+				OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT;
+		*dpp_offset = (rsp->dpp_response &
+				OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK) >>
+				OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT;
+	}
+	max_wqe_allocated =
+		rsp->max_wqe_rqe >> OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT;
+	max_wqe_allocated = 1 << max_wqe_allocated;
+	max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe);
+
+	if (qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		qp->sq.free_delta = 0;
+		qp->rq.free_delta = 1;
+	} else
+		qp->sq.free_delta = 1;
+
+	qp->sq.max_cnt = max_wqe_allocated;
+	qp->sq.max_wqe_idx = max_wqe_allocated - 1;
+
+	if (!attrs->srq) {
+		qp->rq.max_cnt = max_rqe_allocated;
+		qp->rq.max_wqe_idx = max_rqe_allocated - 1;
+		qp->rq.free_delta = 1;
+	}
+}
+
+int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
+			 u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
+			 u16 *dpp_credit_lmt)
+{
+	int status = -ENOMEM;
+	u32 flags = 0;
+	struct ocrdma_dev *dev = qp->dev;
+	struct ocrdma_pd *pd = qp->pd;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	struct ocrdma_cq *cq;
+	struct ocrdma_create_qp_req *cmd;
+	struct ocrdma_create_qp_rsp *rsp;
+	int qptype;
+
+	switch (attrs->qp_type) {
+	case IB_QPT_GSI:
+		qptype = OCRDMA_QPT_GSI;
+		break;
+	case IB_QPT_RC:
+		qptype = OCRDMA_QPT_RC;
+		break;
+	case IB_QPT_UD:
+		qptype = OCRDMA_QPT_UD;
+		break;
+	default:
+		return -EINVAL;
+	};
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->type_pgsz_pdn |= (qptype << OCRDMA_CREATE_QP_REQ_QPT_SHIFT) &
+						OCRDMA_CREATE_QP_REQ_QPT_MASK;
+	status = ocrdma_set_create_qp_sq_cmd(cmd, attrs, qp);
+	if (status)
+		goto sq_err;
+
+	if (attrs->srq) {
+		struct ocrdma_srq *srq = get_ocrdma_srq(attrs->srq);
+		cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK;
+		cmd->rq_addr[0].lo = srq->id;
+		qp->srq = srq;
+	} else {
+		status = ocrdma_set_create_qp_rq_cmd(cmd, attrs, qp);
+		if (status)
+			goto rq_err;
+	}
+
+	status = ocrdma_set_create_qp_ird_cmd(cmd, qp);
+	if (status)
+		goto mbx_err;
+
+	cmd->type_pgsz_pdn |= (pd->id << OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_PD_ID_MASK;
+
+	flags = ocrdma_set_create_qp_mbx_access_flags(qp);
+
+	cmd->max_sge_recv_flags |= flags;
+	cmd->max_ord_ird |= (dev->attr.max_ord_per_qp <<
+			     OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK;
+	cmd->max_ord_ird |= (dev->attr.max_ird_per_qp <<
+			     OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK;
+	cq = get_ocrdma_cq(attrs->send_cq);
+	cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK;
+	qp->sq_cq = cq;
+	cq = get_ocrdma_cq(attrs->recv_cq);
+	cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT) &
+				OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
+	qp->rq_cq = cq;
+
+	if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
+	    (attrs->cap.max_inline_data <= dev->attr.max_inline_data))
+		ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
+					     dpp_cq_id);
+
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_create_qp_rsp *)cmd;
+	ocrdma_get_create_qp_rsp(rsp, qp, attrs, dpp_offset, dpp_credit_lmt);
+	qp->state = OCRDMA_QPS_RST;
+	kfree(cmd);
+	return 0;
+mbx_err:
+	if (qp->rq.va)
+		dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
+rq_err:
+	ocrdma_err("%s(%d) rq_err\n", __func__, dev->id);
+	dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
+sq_err:
+	ocrdma_err("%s(%d) sq_err\n", __func__, dev->id);
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
+			struct ocrdma_qp_params *param)
+{
+	int status = -ENOMEM;
+	struct ocrdma_query_qp *cmd;
+	struct ocrdma_query_qp_rsp *rsp;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->qp_id = qp->id;
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_query_qp_rsp *)cmd;
+	memcpy(param, &rsp->params, sizeof(struct ocrdma_qp_params));
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
+			u8 *mac_addr)
+{
+	struct in6_addr in6;
+
+	memcpy(&in6, dgid, sizeof in6);
+	if (rdma_is_multicast_addr(&in6))
+		rdma_get_mcast_mac(&in6, mac_addr);
+	else if (rdma_link_local_addr(&in6))
+		rdma_get_ll_mac(&in6, mac_addr);
+	else {
+		ocrdma_err("%s() fail to resolve mac_addr.\n", __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void ocrdma_set_av_params(struct ocrdma_qp *qp,
+				struct ocrdma_modify_qp *cmd,
+				struct ib_qp_attr *attrs)
+{
+	struct ib_ah_attr *ah_attr = &attrs->ah_attr;
+	union ib_gid sgid;
+	u32 vlan_id;
+	u8 mac_addr[6];
+	if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
+		return;
+	cmd->params.tclass_sq_psn |=
+	    (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+	cmd->params.rnt_rc_sl_fl |=
+	    (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
+	cmd->params.hop_lmt_rq_psn |=
+	    (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
+	cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
+	memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
+	       sizeof(cmd->params.dgid));
+	ocrdma_query_gid(&qp->dev->ibdev, 1,
+			 ah_attr->grh.sgid_index, &sgid);
+	qp->sgid_idx = ah_attr->grh.sgid_index;
+	memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
+	ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
+	cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
+				(mac_addr[2] << 16) | (mac_addr[3] << 24);
+	/* convert them to LE format. */
+	ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
+	ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
+	cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
+	vlan_id = rdma_get_vlan_id(&sgid);
+	if (vlan_id && (vlan_id < 0x1000)) {
+		cmd->params.vlan_dmac_b4_to_b5 |=
+		    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
+		cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
+	}
+}
+
+static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
+				struct ocrdma_modify_qp *cmd,
+				struct ib_qp_attr *attrs, int attr_mask,
+				enum ib_qp_state old_qps)
+{
+	int status = 0;
+	struct net_device *netdev = qp->dev->nic_info.netdev;
+	int eth_mtu = iboe_get_mtu(netdev->mtu);
+
+	if (attr_mask & IB_QP_PKEY_INDEX) {
+		cmd->params.path_mtu_pkey_indx |= (attrs->pkey_index &
+					    OCRDMA_QP_PARAMS_PKEY_INDEX_MASK);
+		cmd->flags |= OCRDMA_QP_PARA_PKEY_VALID;
+	}
+	if (attr_mask & IB_QP_QKEY) {
+		qp->qkey = attrs->qkey;
+		cmd->params.qkey = attrs->qkey;
+		cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
+	}
+	if (attr_mask & IB_QP_AV)
+		ocrdma_set_av_params(qp, cmd, attrs);
+	else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
+		/* set the default mac address for UD, GSI QPs */
+		cmd->params.dmac_b0_to_b3 = qp->dev->nic_info.mac_addr[0] |
+			(qp->dev->nic_info.mac_addr[1] << 8) |
+			(qp->dev->nic_info.mac_addr[2] << 16) |
+			(qp->dev->nic_info.mac_addr[3] << 24);
+		cmd->params.vlan_dmac_b4_to_b5 = qp->dev->nic_info.mac_addr[4] |
+					(qp->dev->nic_info.mac_addr[5] << 8);
+	}
+	if ((attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) &&
+	    attrs->en_sqd_async_notify) {
+		cmd->params.max_sge_recv_flags |=
+			OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC;
+		cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
+	}
+	if (attr_mask & IB_QP_DEST_QPN) {
+		cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->dest_qp_num &
+				OCRDMA_QP_PARAMS_DEST_QPN_MASK);
+		cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
+	}
+	if (attr_mask & IB_QP_PATH_MTU) {
+		if (ib_mtu_enum_to_int(eth_mtu) <
+		    ib_mtu_enum_to_int(attrs->path_mtu)) {
+			status = -EINVAL;
+			goto pmtu_err;
+		}
+		cmd->params.path_mtu_pkey_indx |=
+		    (ib_mtu_enum_to_int(attrs->path_mtu) <<
+		     OCRDMA_QP_PARAMS_PATH_MTU_SHIFT) &
+		    OCRDMA_QP_PARAMS_PATH_MTU_MASK;
+		cmd->flags |= OCRDMA_QP_PARA_PMTU_VALID;
+	}
+	if (attr_mask & IB_QP_TIMEOUT) {
+		cmd->params.ack_to_rnr_rtc_dest_qpn |= attrs->timeout <<
+		    OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
+		cmd->flags |= OCRDMA_QP_PARA_ACK_TO_VALID;
+	}
+	if (attr_mask & IB_QP_RETRY_CNT) {
+		cmd->params.rnt_rc_sl_fl |= (attrs->retry_cnt <<
+				      OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT) &
+		    OCRDMA_QP_PARAMS_RETRY_CNT_MASK;
+		cmd->flags |= OCRDMA_QP_PARA_RETRY_CNT_VALID;
+	}
+	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+		cmd->params.rnt_rc_sl_fl |= (attrs->min_rnr_timer <<
+				      OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT) &
+		    OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK;
+		cmd->flags |= OCRDMA_QP_PARA_RNT_VALID;
+	}
+	if (attr_mask & IB_QP_RNR_RETRY) {
+		cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->rnr_retry <<
+			OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT)
+			& OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK;
+		cmd->flags |= OCRDMA_QP_PARA_RRC_VALID;
+	}
+	if (attr_mask & IB_QP_SQ_PSN) {
+		cmd->params.tclass_sq_psn |= (attrs->sq_psn & 0x00ffffff);
+		cmd->flags |= OCRDMA_QP_PARA_SQPSN_VALID;
+	}
+	if (attr_mask & IB_QP_RQ_PSN) {
+		cmd->params.hop_lmt_rq_psn |= (attrs->rq_psn & 0x00ffffff);
+		cmd->flags |= OCRDMA_QP_PARA_RQPSN_VALID;
+	}
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		if (attrs->max_rd_atomic > qp->dev->attr.max_ord_per_qp) {
+			status = -EINVAL;
+			goto pmtu_err;
+		}
+		qp->max_ord = attrs->max_rd_atomic;
+		cmd->flags |= OCRDMA_QP_PARA_MAX_ORD_VALID;
+	}
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+		if (attrs->max_dest_rd_atomic > qp->dev->attr.max_ird_per_qp) {
+			status = -EINVAL;
+			goto pmtu_err;
+		}
+		qp->max_ird = attrs->max_dest_rd_atomic;
+		cmd->flags |= OCRDMA_QP_PARA_MAX_IRD_VALID;
+	}
+	cmd->params.max_ord_ird = (qp->max_ord <<
+				OCRDMA_QP_PARAMS_MAX_ORD_SHIFT) |
+				(qp->max_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK);
+pmtu_err:
+	return status;
+}
+
+int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
+			 struct ib_qp_attr *attrs, int attr_mask,
+			 enum ib_qp_state old_qps)
+{
+	int status = -ENOMEM;
+	struct ocrdma_modify_qp *cmd;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_QP, sizeof(*cmd));
+	if (!cmd)
+		return status;
+
+	cmd->params.id = qp->id;
+	cmd->flags = 0;
+	if (attr_mask & IB_QP_STATE) {
+		cmd->params.max_sge_recv_flags |=
+		    (get_ocrdma_qp_state(attrs->qp_state) <<
+		     OCRDMA_QP_PARAMS_STATE_SHIFT) &
+		    OCRDMA_QP_PARAMS_STATE_MASK;
+		cmd->flags |= OCRDMA_QP_PARA_QPS_VALID;
+	} else
+		cmd->params.max_sge_recv_flags |=
+		    (qp->state << OCRDMA_QP_PARAMS_STATE_SHIFT) &
+		    OCRDMA_QP_PARAMS_STATE_MASK;
+	status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
+	if (status)
+		goto mbx_err;
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+
+mbx_err:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_destroy_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
+{
+	int status = -ENOMEM;
+	struct ocrdma_destroy_qp *cmd;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_QP, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->qp_id = qp->id;
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+
+mbx_err:
+	kfree(cmd);
+	if (qp->sq.va)
+		dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
+	if (!qp->srq && qp->rq.va)
+		dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
+	if (qp->dpp_enabled)
+		qp->pd->num_dpp_qp++;
+	return status;
+}
+
+int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
+			  struct ib_srq_init_attr *srq_attr,
+			  struct ocrdma_pd *pd)
+{
+	int status = -ENOMEM;
+	int hw_pages, hw_page_size;
+	int len;
+	struct ocrdma_create_srq_rsp *rsp;
+	struct ocrdma_create_srq *cmd;
+	dma_addr_t pa;
+	struct ocrdma_dev *dev = srq->dev;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	u32 max_rqe_allocated;
+
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+	if (!cmd)
+		return status;
+
+	cmd->pgsz_pdid = pd->id & OCRDMA_CREATE_SRQ_PD_ID_MASK;
+	max_rqe_allocated = srq_attr->attr.max_wr + 1;
+	status = ocrdma_build_q_conf(&max_rqe_allocated,
+				dev->attr.rqe_size,
+				&hw_pages, &hw_page_size);
+	if (status) {
+		ocrdma_err("%s() req. max_wr=0x%x\n", __func__,
+			   srq_attr->attr.max_wr);
+		status = -EINVAL;
+		goto ret;
+	}
+	len = hw_pages * hw_page_size;
+	srq->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+	if (!srq->rq.va) {
+		status = -ENOMEM;
+		goto ret;
+	}
+	ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size);
+
+	srq->rq.entry_size = dev->attr.rqe_size;
+	srq->rq.pa = pa;
+	srq->rq.len = len;
+	srq->rq.max_cnt = max_rqe_allocated;
+
+	cmd->max_sge_rqe = ilog2(max_rqe_allocated);
+	cmd->max_sge_rqe |= srq_attr->attr.max_sge <<
+				OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT;
+
+	cmd->pgsz_pdid |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE)
+		<< OCRDMA_CREATE_SRQ_PG_SZ_SHIFT);
+	cmd->pages_rqe_sz |= (dev->attr.rqe_size
+		<< OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT)
+		& OCRDMA_CREATE_SRQ_RQE_SIZE_MASK;
+	cmd->pages_rqe_sz |= hw_pages << OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT;
+
+	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+	if (status)
+		goto mbx_err;
+	rsp = (struct ocrdma_create_srq_rsp *)cmd;
+	srq->id = rsp->id;
+	srq->rq.dbid = rsp->id;
+	max_rqe_allocated = ((rsp->max_sge_rqe_allocated &
+		OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK) >>
+		OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT);
+	max_rqe_allocated = (1 << max_rqe_allocated);
+	srq->rq.max_cnt = max_rqe_allocated;
+	srq->rq.max_wqe_idx = max_rqe_allocated - 1;
+	srq->rq.max_sges = (rsp->max_sge_rqe_allocated &
+		OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK) >>
+		OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT;
+	goto ret;
+mbx_err:
+	dma_free_coherent(&pdev->dev, srq->rq.len, srq->rq.va, pa);
+ret:
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
+{
+	int status = -ENOMEM;
+	struct ocrdma_modify_srq *cmd;
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->id = srq->id;
+	cmd->limit_max_rqe |= srq_attr->srq_limit <<
+	    OCRDMA_MODIFY_SRQ_LIMIT_SHIFT;
+	status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
+{
+	int status = -ENOMEM;
+	struct ocrdma_query_srq *cmd;
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->id = srq->rq.dbid;
+	status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+	if (status == 0) {
+		struct ocrdma_query_srq_rsp *rsp =
+		    (struct ocrdma_query_srq_rsp *)cmd;
+		srq_attr->max_sge =
+		    rsp->srq_lmt_max_sge &
+		    OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK;
+		srq_attr->max_wr =
+		    rsp->max_rqe_pdid >> OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT;
+		srq_attr->srq_limit = rsp->srq_lmt_max_sge >>
+		    OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT;
+	}
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
+{
+	int status = -ENOMEM;
+	struct ocrdma_destroy_srq *cmd;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_SRQ, sizeof(*cmd));
+	if (!cmd)
+		return status;
+	cmd->id = srq->id;
+	status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
+	if (srq->rq.va)
+		dma_free_coherent(&pdev->dev, srq->rq.len,
+				  srq->rq.va, srq->rq.pa);
+	kfree(cmd);
+	return status;
+}
+
+int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
+{
+	int i;
+	int status = -EINVAL;
+	struct ocrdma_av *av;
+	unsigned long flags;
+
+	av = dev->av_tbl.va;
+	spin_lock_irqsave(&dev->av_tbl.lock, flags);
+	for (i = 0; i < dev->av_tbl.num_ah; i++) {
+		if (av->valid == 0) {
+			av->valid = OCRDMA_AV_VALID;
+			ah->av = av;
+			ah->id = i;
+			status = 0;
+			break;
+		}
+		av++;
+	}
+	if (i == dev->av_tbl.num_ah)
+		status = -EAGAIN;
+	spin_unlock_irqrestore(&dev->av_tbl.lock, flags);
+	return status;
+}
+
+int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dev->av_tbl.lock, flags);
+	ah->av->valid = 0;
+	spin_unlock_irqrestore(&dev->av_tbl.lock, flags);
+	return 0;
+}
+
+static int ocrdma_create_mq_eq(struct ocrdma_dev *dev)
+{
+	int status;
+	int irq;
+	unsigned long flags = 0;
+	int num_eq = 0;
+
+	if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
+		flags = IRQF_SHARED;
+	else {
+		num_eq = dev->nic_info.msix.num_vectors -
+				dev->nic_info.msix.start_vector;
+		/* minimum two vectors/eq are required for rdma to work.
+		 * one for control path and one for data path.
+		 */
+		if (num_eq < 2)
+			return -EBUSY;
+	}
+
+	status = ocrdma_create_eq(dev, &dev->meq, OCRDMA_EQ_LEN);
+	if (status)
+		return status;
+	sprintf(dev->meq.irq_name, "ocrdma_mq%d", dev->id);
+	irq = ocrdma_get_irq(dev, &dev->meq);
+	status = request_irq(irq, ocrdma_irq_handler, flags, dev->meq.irq_name,
+			     &dev->meq);
+	if (status)
+		_ocrdma_destroy_eq(dev, &dev->meq);
+	return status;
+}
+
+static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev)
+{
+	int num_eq, i, status = 0;
+	int irq;
+	unsigned long flags = 0;
+
+	num_eq = dev->nic_info.msix.num_vectors -
+			dev->nic_info.msix.start_vector;
+	if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) {
+		num_eq = 1;
+		flags = IRQF_SHARED;
+	} else
+		num_eq = min_t(u32, num_eq, num_online_cpus());
+	dev->qp_eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
+	if (!dev->qp_eq_tbl)
+		return -ENOMEM;
+
+	for (i = 0; i < num_eq; i++) {
+		status = ocrdma_create_eq(dev, &dev->qp_eq_tbl[i],
+					  OCRDMA_EQ_LEN);
+		if (status) {
+			status = -EINVAL;
+			break;
+		}
+		sprintf(dev->qp_eq_tbl[i].irq_name, "ocrdma_qp%d-%d",
+			dev->id, i);
+		irq = ocrdma_get_irq(dev, &dev->qp_eq_tbl[i]);
+		status = request_irq(irq, ocrdma_irq_handler, flags,
+				     dev->qp_eq_tbl[i].irq_name,
+				     &dev->qp_eq_tbl[i]);
+		if (status) {
+			_ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
+			status = -EINVAL;
+			break;
+		}
+		dev->eq_cnt += 1;
+	}
+	/* one eq is sufficient for data path to work */
+	if (dev->eq_cnt >= 1)
+		return 0;
+	if (status)
+		ocrdma_destroy_qp_eqs(dev);
+	return status;
+}
+
+int ocrdma_init_hw(struct ocrdma_dev *dev)
+{
+	int status;
+	/* set up control path eq */
+	status = ocrdma_create_mq_eq(dev);
+	if (status)
+		return status;
+	/* set up data path eq */
+	status = ocrdma_create_qp_eqs(dev);
+	if (status)
+		goto qpeq_err;
+	status = ocrdma_create_mq(dev);
+	if (status)
+		goto mq_err;
+	status = ocrdma_mbx_query_fw_config(dev);
+	if (status)
+		goto conf_err;
+	status = ocrdma_mbx_query_dev(dev);
+	if (status)
+		goto conf_err;
+	status = ocrdma_mbx_query_fw_ver(dev);
+	if (status)
+		goto conf_err;
+	status = ocrdma_mbx_create_ah_tbl(dev);
+	if (status)
+		goto conf_err;
+	return 0;
+
+conf_err:
+	ocrdma_destroy_mq(dev);
+mq_err:
+	ocrdma_destroy_qp_eqs(dev);
+qpeq_err:
+	ocrdma_destroy_eq(dev, &dev->meq);
+	ocrdma_err("%s() status=%d\n", __func__, status);
+	return status;
+}
+
+void ocrdma_cleanup_hw(struct ocrdma_dev *dev)
+{
+	ocrdma_mbx_delete_ah_tbl(dev);
+
+	/* cleanup the data path eqs */
+	ocrdma_destroy_qp_eqs(dev);
+
+	/* cleanup the control path */
+	ocrdma_destroy_mq(dev);
+	ocrdma_destroy_eq(dev, &dev->meq);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
new file mode 100644
index 0000000..be5db77
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -0,0 +1,132 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) CNA Adapters.              *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_HW_H__
+#define __OCRDMA_HW_H__
+
+#include "ocrdma_sli.h"
+
+static inline void ocrdma_cpu_to_le32(void *dst, u32 len)
+{
+#ifdef __BIG_ENDIAN
+	int i = 0;
+	u32 *src_ptr = dst;
+	u32 *dst_ptr = dst;
+	for (; i < (len / 4); i++)
+		*(dst_ptr + i) = cpu_to_le32p(src_ptr + i);
+#endif
+}
+
+static inline void ocrdma_le32_to_cpu(void *dst, u32 len)
+{
+#ifdef __BIG_ENDIAN
+	int i = 0;
+	u32 *src_ptr = dst;
+	u32 *dst_ptr = dst;
+	for (; i < (len / sizeof(u32)); i++)
+		*(dst_ptr + i) = le32_to_cpu(*(src_ptr + i));
+#endif
+}
+
+static inline void ocrdma_copy_cpu_to_le32(void *dst, void *src, u32 len)
+{
+#ifdef __BIG_ENDIAN
+	int i = 0;
+	u32 *src_ptr = src;
+	u32 *dst_ptr = dst;
+	for (; i < (len / sizeof(u32)); i++)
+		*(dst_ptr + i) = cpu_to_le32p(src_ptr + i);
+#else
+	memcpy(dst, src, len);
+#endif
+}
+
+static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len)
+{
+#ifdef __BIG_ENDIAN
+	int i = 0;
+	u32 *src_ptr = src;
+	u32 *dst_ptr = dst;
+	for (; i < len / sizeof(u32); i++)
+		*(dst_ptr + i) = le32_to_cpu(*(src_ptr + i));
+#else
+	memcpy(dst, src, len);
+#endif
+}
+
+int ocrdma_init_hw(struct ocrdma_dev *);
+void ocrdma_cleanup_hw(struct ocrdma_dev *);
+
+enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps);
+void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
+		       bool solicited, u16 cqe_popped);
+
+/* verbs specific mailbox commands */
+int ocrdma_query_config(struct ocrdma_dev *,
+			struct ocrdma_mbx_query_config *config);
+int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
+
+int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
+int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
+
+int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
+			  u32 pd_id, int addr_check);
+int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey);
+
+int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
+			u32 pd_id, int acc);
+int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
+				int entries, int dpp_cq);
+int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
+
+int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
+			 u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
+			 u16 *dpp_credit_lmt);
+int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
+			 struct ib_qp_attr *attrs, int attr_mask,
+			 enum ib_qp_state old_qps);
+int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
+			struct ocrdma_qp_params *param);
+int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
+
+int ocrdma_mbx_create_srq(struct ocrdma_srq *,
+			  struct ib_srq_init_attr *,
+			  struct ocrdma_pd *);
+int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *);
+int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *);
+int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *);
+
+int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *);
+int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *);
+
+int ocrdma_qp_state_machine(struct ocrdma_qp *, enum ib_qp_state new_state,
+			    enum ib_qp_state *old_ib_state);
+bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
+bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
+void ocrdma_flush_qp(struct ocrdma_qp *);
+
+#endif				/* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
new file mode 100644
index 0000000..a20d16ea
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -0,0 +1,577 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/idr.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include <linux/netdevice.h>
+#include <net/addrconf.h>
+
+#include "ocrdma.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_ah.h"
+#include "be_roce.h"
+#include "ocrdma_hw.h"
+
+MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
+MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
+MODULE_AUTHOR("Emulex Corporation");
+MODULE_LICENSE("GPL");
+
+static LIST_HEAD(ocrdma_dev_list);
+static DEFINE_SPINLOCK(ocrdma_devlist_lock);
+static DEFINE_IDR(ocrdma_dev_id);
+
+static union ib_gid ocrdma_zero_sgid;
+
+static int ocrdma_get_instance(void)
+{
+	int instance = 0;
+
+	/* Assign an unused number */
+	if (!idr_pre_get(&ocrdma_dev_id, GFP_KERNEL))
+		return -1;
+	if (idr_get_new(&ocrdma_dev_id, NULL, &instance))
+		return -1;
+	return instance;
+}
+
+void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
+{
+	u8 mac_addr[6];
+
+	memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
+	guid[0] = mac_addr[0] ^ 2;
+	guid[1] = mac_addr[1];
+	guid[2] = mac_addr[2];
+	guid[3] = 0xff;
+	guid[4] = 0xfe;
+	guid[5] = mac_addr[3];
+	guid[6] = mac_addr[4];
+	guid[7] = mac_addr[5];
+}
+
+static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
+				  bool is_vlan, u16 vlan_id)
+{
+	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+	sgid->raw[8] = mac_addr[0] ^ 2;
+	sgid->raw[9] = mac_addr[1];
+	sgid->raw[10] = mac_addr[2];
+	if (is_vlan) {
+		sgid->raw[11] = vlan_id >> 8;
+		sgid->raw[12] = vlan_id & 0xff;
+	} else {
+		sgid->raw[11] = 0xff;
+		sgid->raw[12] = 0xfe;
+	}
+	sgid->raw[13] = mac_addr[3];
+	sgid->raw[14] = mac_addr[4];
+	sgid->raw[15] = mac_addr[5];
+}
+
+static void ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
+			    bool is_vlan, u16 vlan_id)
+{
+	int i;
+	bool found = false;
+	union ib_gid new_sgid;
+	int free_idx = OCRDMA_MAX_SGID;
+	unsigned long flags;
+
+	memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
+
+	ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
+
+	spin_lock_irqsave(&dev->sgid_lock, flags);
+	for (i = 0; i < OCRDMA_MAX_SGID; i++) {
+		if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
+			    sizeof(union ib_gid))) {
+			/* found free entry */
+			if (!found) {
+				free_idx = i;
+				found = true;
+				break;
+			}
+		} else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
+				   sizeof(union ib_gid))) {
+			/* entry already present, no addition is required. */
+			spin_unlock_irqrestore(&dev->sgid_lock, flags);
+			return;
+		}
+	}
+	/* if entry doesn't exist and if table has some space, add entry */
+	if (found)
+		memcpy(&dev->sgid_tbl[free_idx], &new_sgid,
+		       sizeof(union ib_gid));
+	spin_unlock_irqrestore(&dev->sgid_lock, flags);
+}
+
+static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
+			    bool is_vlan, u16 vlan_id)
+{
+	int found = false;
+	int i;
+	union ib_gid sgid;
+	unsigned long flags;
+
+	ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
+
+	spin_lock_irqsave(&dev->sgid_lock, flags);
+	/* first is default sgid, which cannot be deleted. */
+	for (i = 1; i < OCRDMA_MAX_SGID; i++) {
+		if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
+			/* found matching entry */
+			memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
+			found = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&dev->sgid_lock, flags);
+	return found;
+}
+
+static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
+{
+	/* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
+	union ib_gid *sgid = &dev->sgid_tbl[0];
+
+	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+	ocrdma_get_guid(dev, &sgid->raw[8]);
+}
+
+static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
+{
+	struct net_device *netdev, *tmp;
+	u16 vlan_id;
+	bool is_vlan;
+
+	netdev = dev->nic_info.netdev;
+
+	ocrdma_add_default_sgid(dev);
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, tmp) {
+		if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
+			if (!netif_running(tmp) || !netif_oper_up(tmp))
+				continue;
+			if (netdev != tmp) {
+				vlan_id = vlan_dev_vlan_id(tmp);
+				is_vlan = true;
+			} else {
+				is_vlan = false;
+				vlan_id = 0;
+				tmp = netdev;
+			}
+			ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+static int ocrdma_inet6addr_event(struct notifier_block *notifier,
+				  unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+	struct net_device *event_netdev = ifa->idev->dev;
+	struct net_device *netdev = NULL;
+	struct ib_event gid_event;
+	struct ocrdma_dev *dev;
+	bool found = false;
+	bool is_vlan = false;
+	u16 vid = 0;
+
+	netdev = vlan_dev_real_dev(event_netdev);
+	if (netdev != event_netdev) {
+		is_vlan = true;
+		vid = vlan_dev_vlan_id(event_netdev);
+	}
+	rcu_read_lock();
+	list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
+		if (dev->nic_info.netdev == netdev) {
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	if (!found)
+		return NOTIFY_DONE;
+	if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
+		return NOTIFY_DONE;
+
+	mutex_lock(&dev->dev_lock);
+	switch (event) {
+	case NETDEV_UP:
+		ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
+		break;
+	case NETDEV_DOWN:
+		found = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
+		if (found) {
+			/* found the matching entry, notify
+			 * the consumers about it
+			 */
+			gid_event.device = &dev->ibdev;
+			gid_event.element.port_num = 1;
+			gid_event.event = IB_EVENT_GID_CHANGE;
+			ib_dispatch_event(&gid_event);
+		}
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&dev->dev_lock);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ocrdma_inet6addr_notifier = {
+	.notifier_call = ocrdma_inet6addr_event
+};
+
+#endif /* IPV6 */
+
+static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device,
+					      u8 port_num)
+{
+	return IB_LINK_LAYER_ETHERNET;
+}
+
+static int ocrdma_register_device(struct ocrdma_dev *dev)
+{
+	strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
+	ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
+	memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
+	       sizeof(OCRDMA_NODE_DESC));
+	dev->ibdev.owner = THIS_MODULE;
+	dev->ibdev.uverbs_cmd_mask =
+	    OCRDMA_UVERBS(GET_CONTEXT) |
+	    OCRDMA_UVERBS(QUERY_DEVICE) |
+	    OCRDMA_UVERBS(QUERY_PORT) |
+	    OCRDMA_UVERBS(ALLOC_PD) |
+	    OCRDMA_UVERBS(DEALLOC_PD) |
+	    OCRDMA_UVERBS(REG_MR) |
+	    OCRDMA_UVERBS(DEREG_MR) |
+	    OCRDMA_UVERBS(CREATE_COMP_CHANNEL) |
+	    OCRDMA_UVERBS(CREATE_CQ) |
+	    OCRDMA_UVERBS(RESIZE_CQ) |
+	    OCRDMA_UVERBS(DESTROY_CQ) |
+	    OCRDMA_UVERBS(REQ_NOTIFY_CQ) |
+	    OCRDMA_UVERBS(CREATE_QP) |
+	    OCRDMA_UVERBS(MODIFY_QP) |
+	    OCRDMA_UVERBS(QUERY_QP) |
+	    OCRDMA_UVERBS(DESTROY_QP) |
+	    OCRDMA_UVERBS(POLL_CQ) |
+	    OCRDMA_UVERBS(POST_SEND) |
+	    OCRDMA_UVERBS(POST_RECV);
+
+	dev->ibdev.uverbs_cmd_mask |=
+	    OCRDMA_UVERBS(CREATE_AH) |
+	     OCRDMA_UVERBS(MODIFY_AH) |
+	     OCRDMA_UVERBS(QUERY_AH) |
+	     OCRDMA_UVERBS(DESTROY_AH);
+
+	dev->ibdev.node_type = RDMA_NODE_IB_CA;
+	dev->ibdev.phys_port_cnt = 1;
+	dev->ibdev.num_comp_vectors = 1;
+
+	/* mandatory verbs. */
+	dev->ibdev.query_device = ocrdma_query_device;
+	dev->ibdev.query_port = ocrdma_query_port;
+	dev->ibdev.modify_port = ocrdma_modify_port;
+	dev->ibdev.query_gid = ocrdma_query_gid;
+	dev->ibdev.get_link_layer = ocrdma_link_layer;
+	dev->ibdev.alloc_pd = ocrdma_alloc_pd;
+	dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
+
+	dev->ibdev.create_cq = ocrdma_create_cq;
+	dev->ibdev.destroy_cq = ocrdma_destroy_cq;
+	dev->ibdev.resize_cq = ocrdma_resize_cq;
+
+	dev->ibdev.create_qp = ocrdma_create_qp;
+	dev->ibdev.modify_qp = ocrdma_modify_qp;
+	dev->ibdev.query_qp = ocrdma_query_qp;
+	dev->ibdev.destroy_qp = ocrdma_destroy_qp;
+
+	dev->ibdev.query_pkey = ocrdma_query_pkey;
+	dev->ibdev.create_ah = ocrdma_create_ah;
+	dev->ibdev.destroy_ah = ocrdma_destroy_ah;
+	dev->ibdev.query_ah = ocrdma_query_ah;
+	dev->ibdev.modify_ah = ocrdma_modify_ah;
+
+	dev->ibdev.poll_cq = ocrdma_poll_cq;
+	dev->ibdev.post_send = ocrdma_post_send;
+	dev->ibdev.post_recv = ocrdma_post_recv;
+	dev->ibdev.req_notify_cq = ocrdma_arm_cq;
+
+	dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
+	dev->ibdev.dereg_mr = ocrdma_dereg_mr;
+	dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
+
+	/* mandatory to support user space verbs consumer. */
+	dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
+	dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
+	dev->ibdev.mmap = ocrdma_mmap;
+	dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
+
+	dev->ibdev.process_mad = ocrdma_process_mad;
+
+	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		dev->ibdev.uverbs_cmd_mask |=
+		     OCRDMA_UVERBS(CREATE_SRQ) |
+		     OCRDMA_UVERBS(MODIFY_SRQ) |
+		     OCRDMA_UVERBS(QUERY_SRQ) |
+		     OCRDMA_UVERBS(DESTROY_SRQ) |
+		     OCRDMA_UVERBS(POST_SRQ_RECV);
+
+		dev->ibdev.create_srq = ocrdma_create_srq;
+		dev->ibdev.modify_srq = ocrdma_modify_srq;
+		dev->ibdev.query_srq = ocrdma_query_srq;
+		dev->ibdev.destroy_srq = ocrdma_destroy_srq;
+		dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
+	}
+	return ib_register_device(&dev->ibdev, NULL);
+}
+
+static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
+{
+	mutex_init(&dev->dev_lock);
+	dev->sgid_tbl = kzalloc(sizeof(union ib_gid) *
+				OCRDMA_MAX_SGID, GFP_KERNEL);
+	if (!dev->sgid_tbl)
+		goto alloc_err;
+	spin_lock_init(&dev->sgid_lock);
+
+	dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) *
+			      OCRDMA_MAX_CQ, GFP_KERNEL);
+	if (!dev->cq_tbl)
+		goto alloc_err;
+
+	if (dev->attr.max_qp) {
+		dev->qp_tbl = kzalloc(sizeof(struct ocrdma_qp *) *
+				      OCRDMA_MAX_QP, GFP_KERNEL);
+		if (!dev->qp_tbl)
+			goto alloc_err;
+	}
+	spin_lock_init(&dev->av_tbl.lock);
+	spin_lock_init(&dev->flush_q_lock);
+	return 0;
+alloc_err:
+	ocrdma_err("%s(%d) error.\n", __func__, dev->id);
+	return -ENOMEM;
+}
+
+static void ocrdma_free_resources(struct ocrdma_dev *dev)
+{
+	kfree(dev->qp_tbl);
+	kfree(dev->cq_tbl);
+	kfree(dev->sgid_tbl);
+}
+
+static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
+{
+	int status = 0;
+	struct ocrdma_dev *dev;
+
+	dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
+	if (!dev) {
+		ocrdma_err("Unable to allocate ib device\n");
+		return NULL;
+	}
+	dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
+	if (!dev->mbx_cmd)
+		goto idr_err;
+
+	memcpy(&dev->nic_info, dev_info, sizeof(*dev_info));
+	dev->id = ocrdma_get_instance();
+	if (dev->id < 0)
+		goto idr_err;
+
+	status = ocrdma_init_hw(dev);
+	if (status)
+		goto init_err;
+
+	status = ocrdma_alloc_resources(dev);
+	if (status)
+		goto alloc_err;
+
+	status = ocrdma_build_sgid_tbl(dev);
+	if (status)
+		goto alloc_err;
+
+	status = ocrdma_register_device(dev);
+	if (status)
+		goto alloc_err;
+
+	spin_lock(&ocrdma_devlist_lock);
+	list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
+	spin_unlock(&ocrdma_devlist_lock);
+	return dev;
+
+alloc_err:
+	ocrdma_free_resources(dev);
+	ocrdma_cleanup_hw(dev);
+init_err:
+	idr_remove(&ocrdma_dev_id, dev->id);
+idr_err:
+	kfree(dev->mbx_cmd);
+	ib_dealloc_device(&dev->ibdev);
+	ocrdma_err("%s() leaving. ret=%d\n", __func__, status);
+	return NULL;
+}
+
+static void ocrdma_remove_free(struct rcu_head *rcu)
+{
+	struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
+
+	ocrdma_free_resources(dev);
+	ocrdma_cleanup_hw(dev);
+
+	idr_remove(&ocrdma_dev_id, dev->id);
+	kfree(dev->mbx_cmd);
+	ib_dealloc_device(&dev->ibdev);
+}
+
+static void ocrdma_remove(struct ocrdma_dev *dev)
+{
+	/* first unregister with stack to stop all the active traffic
+	 * of the registered clients.
+	 */
+	ib_unregister_device(&dev->ibdev);
+
+	spin_lock(&ocrdma_devlist_lock);
+	list_del_rcu(&dev->entry);
+	spin_unlock(&ocrdma_devlist_lock);
+	call_rcu(&dev->rcu, ocrdma_remove_free);
+}
+
+static int ocrdma_open(struct ocrdma_dev *dev)
+{
+	struct ib_event port_event;
+
+	port_event.event = IB_EVENT_PORT_ACTIVE;
+	port_event.element.port_num = 1;
+	port_event.device = &dev->ibdev;
+	ib_dispatch_event(&port_event);
+	return 0;
+}
+
+static int ocrdma_close(struct ocrdma_dev *dev)
+{
+	int i;
+	struct ocrdma_qp *qp, **cur_qp;
+	struct ib_event err_event;
+	struct ib_qp_attr attrs;
+	int attr_mask = IB_QP_STATE;
+
+	attrs.qp_state = IB_QPS_ERR;
+	mutex_lock(&dev->dev_lock);
+	if (dev->qp_tbl) {
+		cur_qp = dev->qp_tbl;
+		for (i = 0; i < OCRDMA_MAX_QP; i++) {
+			qp = cur_qp[i];
+			if (qp) {
+				/* change the QP state to ERROR */
+				_ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
+
+				err_event.event = IB_EVENT_QP_FATAL;
+				err_event.element.qp = &qp->ibqp;
+				err_event.device = &dev->ibdev;
+				ib_dispatch_event(&err_event);
+			}
+		}
+	}
+	mutex_unlock(&dev->dev_lock);
+
+	err_event.event = IB_EVENT_PORT_ERR;
+	err_event.element.port_num = 1;
+	err_event.device = &dev->ibdev;
+	ib_dispatch_event(&err_event);
+	return 0;
+}
+
+/* event handling via NIC driver ensures that all the NIC specific
+ * initialization done before RoCE driver notifies
+ * event to stack.
+ */
+static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
+{
+	switch (event) {
+	case BE_DEV_UP:
+		ocrdma_open(dev);
+		break;
+	case BE_DEV_DOWN:
+		ocrdma_close(dev);
+		break;
+	};
+}
+
+static struct ocrdma_driver ocrdma_drv = {
+	.name			= "ocrdma_driver",
+	.add			= ocrdma_add,
+	.remove			= ocrdma_remove,
+	.state_change_handler	= ocrdma_event_handler,
+};
+
+static void ocrdma_unregister_inet6addr_notifier(void)
+{
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier);
+#endif
+}
+
+static int __init ocrdma_init_module(void)
+{
+	int status;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
+	if (status)
+		return status;
+#endif
+
+	status = be_roce_register_driver(&ocrdma_drv);
+	if (status)
+		ocrdma_unregister_inet6addr_notifier();
+
+	return status;
+}
+
+static void __exit ocrdma_exit_module(void)
+{
+	be_roce_unregister_driver(&ocrdma_drv);
+	ocrdma_unregister_inet6addr_notifier();
+}
+
+module_init(ocrdma_init_module);
+module_exit(ocrdma_exit_module);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
new file mode 100644
index 0000000..7fd80cc
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -0,0 +1,1672 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_SLI_H__
+#define __OCRDMA_SLI_H__
+
+#define Bit(_b) (1 << (_b))
+
+#define OCRDMA_GEN1_FAMILY	0xB
+#define OCRDMA_GEN2_FAMILY	0x2
+
+#define OCRDMA_SUBSYS_ROCE 10
+enum {
+	OCRDMA_CMD_QUERY_CONFIG = 1,
+	OCRDMA_CMD_ALLOC_PD,
+	OCRDMA_CMD_DEALLOC_PD,
+
+	OCRDMA_CMD_CREATE_AH_TBL,
+	OCRDMA_CMD_DELETE_AH_TBL,
+
+	OCRDMA_CMD_CREATE_QP,
+	OCRDMA_CMD_QUERY_QP,
+	OCRDMA_CMD_MODIFY_QP,
+	OCRDMA_CMD_DELETE_QP,
+
+	OCRDMA_CMD_RSVD1,
+	OCRDMA_CMD_ALLOC_LKEY,
+	OCRDMA_CMD_DEALLOC_LKEY,
+	OCRDMA_CMD_REGISTER_NSMR,
+	OCRDMA_CMD_REREGISTER_NSMR,
+	OCRDMA_CMD_REGISTER_NSMR_CONT,
+	OCRDMA_CMD_QUERY_NSMR,
+	OCRDMA_CMD_ALLOC_MW,
+	OCRDMA_CMD_QUERY_MW,
+
+	OCRDMA_CMD_CREATE_SRQ,
+	OCRDMA_CMD_QUERY_SRQ,
+	OCRDMA_CMD_MODIFY_SRQ,
+	OCRDMA_CMD_DELETE_SRQ,
+
+	OCRDMA_CMD_ATTACH_MCAST,
+	OCRDMA_CMD_DETACH_MCAST,
+
+	OCRDMA_CMD_MAX
+};
+
+#define OCRDMA_SUBSYS_COMMON 1
+enum {
+	OCRDMA_CMD_CREATE_CQ		= 12,
+	OCRDMA_CMD_CREATE_EQ		= 13,
+	OCRDMA_CMD_CREATE_MQ		= 21,
+	OCRDMA_CMD_GET_FW_VER		= 35,
+	OCRDMA_CMD_DELETE_MQ		= 53,
+	OCRDMA_CMD_DELETE_CQ		= 54,
+	OCRDMA_CMD_DELETE_EQ		= 55,
+	OCRDMA_CMD_GET_FW_CONFIG	= 58,
+	OCRDMA_CMD_CREATE_MQ_EXT	= 90
+};
+
+enum {
+	QTYPE_EQ	= 1,
+	QTYPE_CQ	= 2,
+	QTYPE_MCCQ	= 3
+};
+
+#define OCRDMA_MAX_SGID (8)
+
+#define OCRDMA_MAX_QP    2048
+#define OCRDMA_MAX_CQ    2048
+
+enum {
+	OCRDMA_DB_RQ_OFFSET		= 0xE0,
+	OCRDMA_DB_GEN2_RQ1_OFFSET	= 0x100,
+	OCRDMA_DB_GEN2_RQ2_OFFSET	= 0xC0,
+	OCRDMA_DB_SQ_OFFSET		= 0x60,
+	OCRDMA_DB_GEN2_SQ_OFFSET	= 0x1C0,
+	OCRDMA_DB_SRQ_OFFSET		= OCRDMA_DB_RQ_OFFSET,
+	OCRDMA_DB_GEN2_SRQ_OFFSET	= OCRDMA_DB_GEN2_RQ1_OFFSET,
+	OCRDMA_DB_CQ_OFFSET		= 0x120,
+	OCRDMA_DB_EQ_OFFSET		= OCRDMA_DB_CQ_OFFSET,
+	OCRDMA_DB_MQ_OFFSET		= 0x140
+};
+
+#define OCRDMA_DB_CQ_RING_ID_MASK       0x3FF	/* bits 0 - 9 */
+#define OCRDMA_DB_CQ_RING_ID_EXT_MASK  0x0C00	/* bits 10-11 of qid at 12-11 */
+/* qid #2 msbits at 12-11 */
+#define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT  0x1
+#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT       (16)	/* bits 16 - 28 */
+/* Rearm bit */
+#define OCRDMA_DB_CQ_REARM_SHIFT        (29)	/* bit 29 */
+/* solicited bit */
+#define OCRDMA_DB_CQ_SOLICIT_SHIFT   (31)	/* bit 31 */
+
+#define OCRDMA_EQ_ID_MASK		0x1FF	/* bits 0 - 8 */
+#define OCRDMA_EQ_ID_EXT_MASK		0x3e00	/* bits 9-13 */
+#define OCRDMA_EQ_ID_EXT_MASK_SHIFT	(2)	/* qid bits 9-13 at 11-15 */
+
+/* Clear the interrupt for this eq */
+#define OCRDMA_EQ_CLR_SHIFT			(9)	/* bit 9 */
+/* Must be 1 */
+#define OCRDMA_EQ_TYPE_SHIFT		(10)	/* bit 10 */
+/* Number of event entries processed */
+#define OCRDMA_NUM_EQE_SHIFT		(16)	/* bits 16 - 28 */
+/* Rearm bit */
+#define OCRDMA_REARM_SHIFT		(29)	/* bit 29 */
+
+#define OCRDMA_MQ_ID_MASK		0x7FF	/* bits 0 - 10 */
+/* Number of entries posted */
+#define OCRDMA_MQ_NUM_MQE_SHIFT	(16)	/* bits 16 - 29 */
+
+#define OCRDMA_MIN_HPAGE_SIZE (4096)
+
+#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
+#define OCRDMA_MAX_Q_PAGES     (8)
+
+/*
+# 0: 4K Bytes
+# 1: 8K Bytes
+# 2: 16K Bytes
+# 3: 32K Bytes
+# 4: 64K Bytes
+*/
+#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (5)
+#define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
+
+#define MAX_OCRDMA_QP_PAGES      (8)
+#define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE)
+
+#define OCRDMA_CREATE_CQ_MAX_PAGES (4)
+#define OCRDMA_DPP_CQE_SIZE (4)
+
+#define OCRDMA_GEN2_MAX_CQE 1024
+#define OCRDMA_GEN2_CQ_PAGE_SIZE 4096
+#define OCRDMA_GEN2_WQE_SIZE 256
+#define OCRDMA_MAX_CQE  4095
+#define OCRDMA_CQ_PAGE_SIZE 16384
+#define OCRDMA_WQE_SIZE 128
+#define OCRDMA_WQE_STRIDE 8
+#define OCRDMA_WQE_ALIGN_BYTES 16
+
+#define MAX_OCRDMA_SRQ_PAGES MAX_OCRDMA_QP_PAGES
+
+enum {
+	OCRDMA_MCH_OPCODE_SHIFT	= 0,
+	OCRDMA_MCH_OPCODE_MASK	= 0xFF,
+	OCRDMA_MCH_SUBSYS_SHIFT	= 8,
+	OCRDMA_MCH_SUBSYS_MASK	= 0xFF00
+};
+
+/* mailbox cmd header */
+struct ocrdma_mbx_hdr {
+	u32 subsys_op;
+	u32 timeout;		/* in seconds */
+	u32 cmd_len;
+	u32 rsvd_version;
+} __packed;
+
+enum {
+	OCRDMA_MBX_RSP_OPCODE_SHIFT	= 0,
+	OCRDMA_MBX_RSP_OPCODE_MASK	= 0xFF,
+	OCRDMA_MBX_RSP_SUBSYS_SHIFT	= 8,
+	OCRDMA_MBX_RSP_SUBSYS_MASK	= 0xFF << OCRDMA_MBX_RSP_SUBSYS_SHIFT,
+
+	OCRDMA_MBX_RSP_STATUS_SHIFT	= 0,
+	OCRDMA_MBX_RSP_STATUS_MASK	= 0xFF,
+	OCRDMA_MBX_RSP_ASTATUS_SHIFT	= 8,
+	OCRDMA_MBX_RSP_ASTATUS_MASK	= 0xFF << OCRDMA_MBX_RSP_ASTATUS_SHIFT
+};
+
+/* mailbox cmd response */
+struct ocrdma_mbx_rsp {
+	u32 subsys_op;
+	u32 status;
+	u32 rsp_len;
+	u32 add_rsp_len;
+} __packed;
+
+enum {
+	OCRDMA_MQE_EMBEDDED	= 1,
+	OCRDMA_MQE_NONEMBEDDED	= 0
+};
+
+struct ocrdma_mqe_sge {
+	u32 pa_lo;
+	u32 pa_hi;
+	u32 len;
+} __packed;
+
+enum {
+	OCRDMA_MQE_HDR_EMB_SHIFT	= 0,
+	OCRDMA_MQE_HDR_EMB_MASK		= Bit(0),
+	OCRDMA_MQE_HDR_SGE_CNT_SHIFT	= 3,
+	OCRDMA_MQE_HDR_SGE_CNT_MASK	= 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT,
+	OCRDMA_MQE_HDR_SPECIAL_SHIFT	= 24,
+	OCRDMA_MQE_HDR_SPECIAL_MASK	= 0xFF << OCRDMA_MQE_HDR_SPECIAL_SHIFT
+};
+
+struct ocrdma_mqe_hdr {
+	u32 spcl_sge_cnt_emb;
+	u32 pyld_len;
+	u32 tag_lo;
+	u32 tag_hi;
+	u32 rsvd3;
+} __packed;
+
+struct ocrdma_mqe_emb_cmd {
+	struct ocrdma_mbx_hdr mch;
+	u8 pyld[220];
+} __packed;
+
+struct ocrdma_mqe {
+	struct ocrdma_mqe_hdr hdr;
+	union {
+		struct ocrdma_mqe_emb_cmd emb_req;
+		struct {
+			struct ocrdma_mqe_sge sge[19];
+		} nonemb_req;
+		u8 cmd[236];
+		struct ocrdma_mbx_rsp rsp;
+	} u;
+} __packed;
+
+#define OCRDMA_EQ_LEN       4096
+#define OCRDMA_MQ_CQ_LEN    256
+#define OCRDMA_MQ_LEN       128
+
+#define PAGE_SHIFT_4K		12
+#define PAGE_SIZE_4K		(1 << PAGE_SHIFT_4K)
+
+/* Returns number of pages spanned by the data starting at the given addr */
+#define PAGES_4K_SPANNED(_address, size) \
+	((u32)((((size_t)(_address) & (PAGE_SIZE_4K - 1)) +	\
+			(size) + (PAGE_SIZE_4K - 1)) >> PAGE_SHIFT_4K))
+
+struct ocrdma_delete_q_req {
+	struct ocrdma_mbx_hdr req;
+	u32 id;
+} __packed;
+
+struct ocrdma_pa {
+	u32 lo;
+	u32 hi;
+} __packed;
+
+#define MAX_OCRDMA_EQ_PAGES (8)
+struct ocrdma_create_eq_req {
+	struct ocrdma_mbx_hdr req;
+	u32 num_pages;
+	u32 valid;
+	u32 cnt;
+	u32 delay;
+	u32 rsvd;
+	struct ocrdma_pa pa[MAX_OCRDMA_EQ_PAGES];
+} __packed;
+
+enum {
+	OCRDMA_CREATE_EQ_VALID	= Bit(29),
+	OCRDMA_CREATE_EQ_CNT_SHIFT	= 26,
+	OCRDMA_CREATE_CQ_DELAY_SHIFT	= 13,
+};
+
+struct ocrdma_create_eq_rsp {
+	struct ocrdma_mbx_rsp rsp;
+	u32 vector_eqid;
+};
+
+#define OCRDMA_EQ_MINOR_OTHER (0x1)
+
+enum {
+	OCRDMA_MCQE_STATUS_SHIFT	= 0,
+	OCRDMA_MCQE_STATUS_MASK		= 0xFFFF,
+	OCRDMA_MCQE_ESTATUS_SHIFT	= 16,
+	OCRDMA_MCQE_ESTATUS_MASK	= 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT,
+	OCRDMA_MCQE_CONS_SHIFT		= 27,
+	OCRDMA_MCQE_CONS_MASK		= Bit(27),
+	OCRDMA_MCQE_CMPL_SHIFT		= 28,
+	OCRDMA_MCQE_CMPL_MASK		= Bit(28),
+	OCRDMA_MCQE_AE_SHIFT		= 30,
+	OCRDMA_MCQE_AE_MASK		= Bit(30),
+	OCRDMA_MCQE_VALID_SHIFT		= 31,
+	OCRDMA_MCQE_VALID_MASK		= Bit(31)
+};
+
+struct ocrdma_mcqe {
+	u32 status;
+	u32 tag_lo;
+	u32 tag_hi;
+	u32 valid_ae_cmpl_cons;
+} __packed;
+
+enum {
+	OCRDMA_AE_MCQE_QPVALID		= Bit(31),
+	OCRDMA_AE_MCQE_QPID_MASK	= 0xFFFF,
+
+	OCRDMA_AE_MCQE_CQVALID		= Bit(31),
+	OCRDMA_AE_MCQE_CQID_MASK	= 0xFFFF,
+	OCRDMA_AE_MCQE_VALID		= Bit(31),
+	OCRDMA_AE_MCQE_AE		= Bit(30),
+	OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT	= 16,
+	OCRDMA_AE_MCQE_EVENT_TYPE_MASK	=
+					0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT,
+	OCRDMA_AE_MCQE_EVENT_CODE_SHIFT	= 8,
+	OCRDMA_AE_MCQE_EVENT_CODE_MASK	=
+					0xFF << OCRDMA_AE_MCQE_EVENT_CODE_SHIFT
+};
+struct ocrdma_ae_mcqe {
+	u32 qpvalid_qpid;
+	u32 cqvalid_cqid;
+	u32 evt_tag;
+	u32 valid_ae_event;
+} __packed;
+
+enum {
+	OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT		= 16,
+	OCRDMA_AE_MPA_MCQE_REQ_ID_MASK		= 0xFFFF <<
+					OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT,
+
+	OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT	= 8,
+	OCRDMA_AE_MPA_MCQE_EVENT_CODE_MASK	= 0xFF <<
+					OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT,
+	OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT	= 16,
+	OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK	= 0xFF <<
+					OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT,
+	OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT	= 30,
+	OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK	= Bit(30),
+	OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT	= 31,
+	OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK	= Bit(31)
+};
+
+struct ocrdma_ae_mpa_mcqe {
+	u32 req_id;
+	u32 w1;
+	u32 w2;
+	u32 valid_ae_event;
+} __packed;
+
+enum {
+	OCRDMA_AE_QP_MCQE_NEW_QP_STATE_SHIFT	= 0,
+	OCRDMA_AE_QP_MCQE_NEW_QP_STATE_MASK	= 0xFFFF,
+	OCRDMA_AE_QP_MCQE_QP_ID_SHIFT		= 16,
+	OCRDMA_AE_QP_MCQE_QP_ID_MASK		= 0xFFFF <<
+						OCRDMA_AE_QP_MCQE_QP_ID_SHIFT,
+
+	OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT	= 8,
+	OCRDMA_AE_QP_MCQE_EVENT_CODE_MASK	= 0xFF <<
+				OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT,
+	OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT	= 16,
+	OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK	= 0xFF <<
+				OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT,
+	OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT	= 30,
+	OCRDMA_AE_QP_MCQE_EVENT_AE_MASK		= Bit(30),
+	OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT	= 31,
+	OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK	= Bit(31)
+};
+
+struct ocrdma_ae_qp_mcqe {
+	u32 qp_id_state;
+	u32 w1;
+	u32 w2;
+	u32 valid_ae_event;
+} __packed;
+
+#define OCRDMA_ASYNC_EVE_CODE 0x14
+
+enum OCRDMA_ASYNC_EVENT_TYPE {
+	OCRDMA_CQ_ERROR			= 0x00,
+	OCRDMA_CQ_OVERRUN_ERROR		= 0x01,
+	OCRDMA_CQ_QPCAT_ERROR		= 0x02,
+	OCRDMA_QP_ACCESS_ERROR		= 0x03,
+	OCRDMA_QP_COMM_EST_EVENT	= 0x04,
+	OCRDMA_SQ_DRAINED_EVENT		= 0x05,
+	OCRDMA_DEVICE_FATAL_EVENT	= 0x08,
+	OCRDMA_SRQCAT_ERROR		= 0x0E,
+	OCRDMA_SRQ_LIMIT_EVENT		= 0x0F,
+	OCRDMA_QP_LAST_WQE_EVENT	= 0x10
+};
+
+/* mailbox command request and responses */
+enum {
+	OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT		= 2,
+	OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK		= Bit(2),
+	OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT	= 3,
+	OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK		= Bit(3),
+	OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT		= 8,
+	OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK		= 0xFFFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT		= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK		= 0xFFFF <<
+					OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT,
+	OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT		= 8,
+	OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK		= 0xFF <<
+				OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT		= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK		= 0xFFFF,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT	= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK	= 0xFFFF,
+	OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT	= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK	= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET	= 24,
+	OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK		= 0xFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET	= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK		= 0xFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET	= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET		= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET	= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET		= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET	= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_MASK	= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET		= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET	= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_MASK	= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_OFFSET	= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_MASK	= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET		= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET	= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK	= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET,
+
+	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET		= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET		= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET,
+};
+
+struct ocrdma_mbx_query_config {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+	u32 qp_srq_cq_ird_ord;
+	u32 max_pd_ca_ack_delay;
+	u32 max_write_send_sge;
+	u32 max_ird_ord_per_qp;
+	u32 max_shared_ird_ord;
+	u32 max_mr;
+	u64 max_mr_size;
+	u32 max_num_mr_pbl;
+	u32 max_mw;
+	u32 max_fmr;
+	u32 max_pages_per_frmr;
+	u32 max_mcast_group;
+	u32 max_mcast_qp_attach;
+	u32 max_total_mcast_qp_attach;
+	u32 wqe_rqe_stride_max_dpp_cqs;
+	u32 max_srq_rpir_qps;
+	u32 max_dpp_pds_credits;
+	u32 max_dpp_credits_pds_per_pd;
+	u32 max_wqes_rqes_per_q;
+	u32 max_cq_cqes_per_cq;
+	u32 max_srq_rqe_sge;
+} __packed;
+
+struct ocrdma_fw_ver_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u8 running_ver[32];
+} __packed;
+
+struct ocrdma_fw_conf_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 config_num;
+	u32 asic_revision;
+	u32 phy_port;
+	u32 fn_mode;
+	struct {
+		u32 mode;
+		u32 nic_wqid_base;
+		u32 nic_wq_tot;
+		u32 prot_wqid_base;
+		u32 prot_wq_tot;
+		u32 prot_rqid_base;
+		u32 prot_rqid_tot;
+		u32 rsvd[6];
+	} ulp[2];
+	u32 fn_capabilities;
+	u32 rsvd1;
+	u32 rsvd2;
+	u32 base_eqid;
+	u32 max_eq;
+
+} __packed;
+
+enum {
+	OCRDMA_FN_MODE_RDMA	= 0x4
+};
+
+enum {
+	OCRDMA_CREATE_CQ_VER2			= 2,
+
+	OCRDMA_CREATE_CQ_PAGE_CNT_MASK		= 0xFFFF,
+	OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT	= 16,
+	OCRDMA_CREATE_CQ_PAGE_SIZE_MASK		= 0xFF,
+
+	OCRDMA_CREATE_CQ_COALESCWM_SHIFT	= 12,
+	OCRDMA_CREATE_CQ_COALESCWM_MASK		= Bit(13) | Bit(12),
+	OCRDMA_CREATE_CQ_FLAGS_NODELAY		= Bit(14),
+	OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID	= Bit(15),
+
+	OCRDMA_CREATE_CQ_EQ_ID_MASK		= 0xFFFF,
+	OCRDMA_CREATE_CQ_CQE_COUNT_MASK		= 0xFFFF
+};
+
+enum {
+	OCRDMA_CREATE_CQ_VER0			= 0,
+	OCRDMA_CREATE_CQ_DPP			= 1,
+	OCRDMA_CREATE_CQ_TYPE_SHIFT		= 24,
+	OCRDMA_CREATE_CQ_EQID_SHIFT		= 22,
+
+	OCRDMA_CREATE_CQ_CNT_SHIFT		= 27,
+	OCRDMA_CREATE_CQ_FLAGS_VALID		= Bit(29),
+	OCRDMA_CREATE_CQ_FLAGS_EVENTABLE	= Bit(31),
+	OCRDMA_CREATE_CQ_DEF_FLAGS		= OCRDMA_CREATE_CQ_FLAGS_VALID |
+					OCRDMA_CREATE_CQ_FLAGS_EVENTABLE |
+					OCRDMA_CREATE_CQ_FLAGS_NODELAY
+};
+
+struct ocrdma_create_cq_cmd {
+	struct ocrdma_mbx_hdr req;
+	u32 pgsz_pgcnt;
+	u32 ev_cnt_flags;
+	u32 eqn;
+	u32 cqe_count;
+	u32 rsvd6;
+	struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
+};
+
+struct ocrdma_create_cq {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_create_cq_cmd cmd;
+} __packed;
+
+enum {
+	OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK	= 0xFFFF
+};
+
+struct ocrdma_create_cq_cmd_rsp {
+	struct ocrdma_mbx_rsp rsp;
+	u32 cq_id;
+} __packed;
+
+struct ocrdma_create_cq_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_create_cq_cmd_rsp rsp;
+} __packed;
+
+enum {
+	OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT		= 22,
+	OCRDMA_CREATE_MQ_CQ_ID_SHIFT		= 16,
+	OCRDMA_CREATE_MQ_RING_SIZE_SHIFT	= 16,
+	OCRDMA_CREATE_MQ_VALID			= Bit(31),
+	OCRDMA_CREATE_MQ_ASYNC_CQ_VALID		= Bit(0)
+};
+
+struct ocrdma_create_mq_v0 {
+	u32 pages;
+	u32 cqid_ringsize;
+	u32 valid;
+	u32 async_cqid_valid;
+	u32 rsvd;
+	struct ocrdma_pa pa[8];
+} __packed;
+
+struct ocrdma_create_mq_v1 {
+	u32 cqid_pages;
+	u32 async_event_bitmap;
+	u32 async_cqid_ringsize;
+	u32 valid;
+	u32 async_cqid_valid;
+	u32 rsvd;
+	struct ocrdma_pa pa[8];
+} __packed;
+
+struct ocrdma_create_mq_req {
+	struct ocrdma_mbx_hdr req;
+	union {
+		struct ocrdma_create_mq_v0 v0;
+		struct ocrdma_create_mq_v1 v1;
+	};
+} __packed;
+
+struct ocrdma_create_mq_rsp {
+	struct ocrdma_mbx_rsp rsp;
+	u32 id;
+} __packed;
+
+enum {
+	OCRDMA_DESTROY_CQ_QID_SHIFT			= 0,
+	OCRDMA_DESTROY_CQ_QID_MASK			= 0xFFFF,
+	OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT	= 16,
+	OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_MASK		= 0xFFFF <<
+				OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT
+};
+
+struct ocrdma_destroy_cq {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	u32 bypass_flush_qid;
+} __packed;
+
+struct ocrdma_destroy_cq_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+} __packed;
+
+enum {
+	OCRDMA_QPT_GSI	= 1,
+	OCRDMA_QPT_RC	= 2,
+	OCRDMA_QPT_UD	= 4,
+};
+
+enum {
+	OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT	= 0,
+	OCRDMA_CREATE_QP_REQ_PD_ID_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT	= 16,
+	OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT	= 19,
+	OCRDMA_CREATE_QP_REQ_QPT_SHIFT		= 29,
+	OCRDMA_CREATE_QP_REQ_QPT_MASK		= Bit(31) | Bit(30) | Bit(29),
+
+	OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT	= 0,
+	OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK	= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT	= 16,
+	OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK	= 0xFFFF <<
+					OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT,
+
+	OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT	= 0,
+	OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT		= 16,
+	OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK		= 0xFFFF <<
+					OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT,
+
+	OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT		= 0,
+	OCRDMA_CREATE_QP_REQ_FMR_EN_MASK		= Bit(0),
+	OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT		= 1,
+	OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK		= Bit(1),
+	OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT		= 2,
+	OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK		= Bit(2),
+	OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT		= 3,
+	OCRDMA_CREATE_QP_REQ_INB_WREN_MASK		= Bit(3),
+	OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT		= 4,
+	OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK		= Bit(4),
+	OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT		= 5,
+	OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK		= Bit(5),
+	OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT		= 6,
+	OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK		= Bit(6),
+	OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT		= 7,
+	OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK		= Bit(7),
+	OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT	= 8,
+	OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK		= Bit(8),
+	OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT		= 16,
+	OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT,
+
+	OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT		= 0,
+	OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT		= 16,
+	OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT,
+
+	OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT		= 0,
+	OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT		= 16,
+	OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT,
+
+	OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT		= 0,
+	OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT		= 16,
+	OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT,
+
+	OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT		= 0,
+	OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT		= 16,
+	OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT,
+
+	OCRDMA_CREATE_QP_REQ_DPP_CQPID_SHIFT		= 0,
+	OCRDMA_CREATE_QP_REQ_DPP_CQPID_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT		= 16,
+	OCRDMA_CREATE_QP_REQ_DPP_CREDIT_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT
+};
+
+enum {
+	OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT	= 16,
+	OCRDMA_CREATE_QP_RSP_DPP_PAGE_SHIFT	= 1
+};
+
+#define MAX_OCRDMA_IRD_PAGES 4
+
+enum ocrdma_qp_flags {
+	OCRDMA_QP_MW_BIND	= 1,
+	OCRDMA_QP_LKEY0		= (1 << 1),
+	OCRDMA_QP_FAST_REG	= (1 << 2),
+	OCRDMA_QP_INB_RD	= (1 << 6),
+	OCRDMA_QP_INB_WR	= (1 << 7),
+};
+
+enum ocrdma_qp_state {
+	OCRDMA_QPS_RST		= 0,
+	OCRDMA_QPS_INIT		= 1,
+	OCRDMA_QPS_RTR		= 2,
+	OCRDMA_QPS_RTS		= 3,
+	OCRDMA_QPS_SQE		= 4,
+	OCRDMA_QPS_SQ_DRAINING	= 5,
+	OCRDMA_QPS_ERR		= 6,
+	OCRDMA_QPS_SQD		= 7
+};
+
+struct ocrdma_create_qp_req {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	u32 type_pgsz_pdn;
+	u32 max_wqe_rqe;
+	u32 max_sge_send_write;
+	u32 max_sge_recv_flags;
+	u32 max_ord_ird;
+	u32 num_wq_rq_pages;
+	u32 wqe_rqe_size;
+	u32 wq_rq_cqid;
+	struct ocrdma_pa wq_addr[MAX_OCRDMA_QP_PAGES];
+	struct ocrdma_pa rq_addr[MAX_OCRDMA_QP_PAGES];
+	u32 dpp_credits_cqid;
+	u32 rpir_lkey;
+	struct ocrdma_pa ird_addr[MAX_OCRDMA_IRD_PAGES];
+} __packed;
+
+enum {
+	OCRDMA_CREATE_QP_RSP_QP_ID_SHIFT		= 0,
+	OCRDMA_CREATE_QP_RSP_QP_ID_MASK			= 0xFFFF,
+
+	OCRDMA_CREATE_QP_RSP_MAX_RQE_SHIFT		= 0,
+	OCRDMA_CREATE_QP_RSP_MAX_RQE_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT		= 16,
+	OCRDMA_CREATE_QP_RSP_MAX_WQE_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT,
+
+	OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_SHIFT	= 0,
+	OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT		= 16,
+	OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT,
+
+	OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT		= 16,
+	OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT,
+
+	OCRDMA_CREATE_QP_RSP_MAX_IRD_SHIFT		= 0,
+	OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK		= 0xFFFF,
+	OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT		= 16,
+	OCRDMA_CREATE_QP_RSP_MAX_ORD_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT,
+
+	OCRDMA_CREATE_QP_RSP_RQ_ID_SHIFT		= 0,
+	OCRDMA_CREATE_QP_RSP_RQ_ID_MASK			= 0xFFFF,
+	OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT		= 16,
+	OCRDMA_CREATE_QP_RSP_SQ_ID_MASK			= 0xFFFF <<
+				OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT,
+
+	OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK		= Bit(0),
+	OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT	= 1,
+	OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK	= 0x7FFF <<
+				OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT,
+	OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT		= 16,
+	OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK		= 0xFFFF <<
+				OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT,
+};
+
+struct ocrdma_create_qp_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 qp_id;
+	u32 max_wqe_rqe;
+	u32 max_sge_send_write;
+	u32 max_sge_recv;
+	u32 max_ord_ird;
+	u32 sq_rq_id;
+	u32 dpp_response;
+} __packed;
+
+struct ocrdma_destroy_qp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+	u32 qp_id;
+} __packed;
+
+struct ocrdma_destroy_qp_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+} __packed;
+
+enum {
+	OCRDMA_MODIFY_QP_ID_SHIFT	= 0,
+	OCRDMA_MODIFY_QP_ID_MASK	= 0xFFFF,
+
+	OCRDMA_QP_PARA_QPS_VALID	= Bit(0),
+	OCRDMA_QP_PARA_SQD_ASYNC_VALID	= Bit(1),
+	OCRDMA_QP_PARA_PKEY_VALID	= Bit(2),
+	OCRDMA_QP_PARA_QKEY_VALID	= Bit(3),
+	OCRDMA_QP_PARA_PMTU_VALID	= Bit(4),
+	OCRDMA_QP_PARA_ACK_TO_VALID	= Bit(5),
+	OCRDMA_QP_PARA_RETRY_CNT_VALID	= Bit(6),
+	OCRDMA_QP_PARA_RRC_VALID	= Bit(7),
+	OCRDMA_QP_PARA_RQPSN_VALID	= Bit(8),
+	OCRDMA_QP_PARA_MAX_IRD_VALID	= Bit(9),
+	OCRDMA_QP_PARA_MAX_ORD_VALID	= Bit(10),
+	OCRDMA_QP_PARA_RNT_VALID	= Bit(11),
+	OCRDMA_QP_PARA_SQPSN_VALID	= Bit(12),
+	OCRDMA_QP_PARA_DST_QPN_VALID	= Bit(13),
+	OCRDMA_QP_PARA_MAX_WQE_VALID	= Bit(14),
+	OCRDMA_QP_PARA_MAX_RQE_VALID	= Bit(15),
+	OCRDMA_QP_PARA_SGE_SEND_VALID	= Bit(16),
+	OCRDMA_QP_PARA_SGE_RECV_VALID	= Bit(17),
+	OCRDMA_QP_PARA_SGE_WR_VALID	= Bit(18),
+	OCRDMA_QP_PARA_INB_RDEN_VALID	= Bit(19),
+	OCRDMA_QP_PARA_INB_WREN_VALID	= Bit(20),
+	OCRDMA_QP_PARA_FLOW_LBL_VALID	= Bit(21),
+	OCRDMA_QP_PARA_BIND_EN_VALID	= Bit(22),
+	OCRDMA_QP_PARA_ZLKEY_EN_VALID	= Bit(23),
+	OCRDMA_QP_PARA_FMR_EN_VALID	= Bit(24),
+	OCRDMA_QP_PARA_INBAT_EN_VALID	= Bit(25),
+	OCRDMA_QP_PARA_VLAN_EN_VALID	= Bit(26),
+
+	OCRDMA_MODIFY_QP_FLAGS_RD	= Bit(0),
+	OCRDMA_MODIFY_QP_FLAGS_WR	= Bit(1),
+	OCRDMA_MODIFY_QP_FLAGS_SEND	= Bit(2),
+	OCRDMA_MODIFY_QP_FLAGS_ATOMIC	= Bit(3)
+};
+
+enum {
+	OCRDMA_QP_PARAMS_SRQ_ID_SHIFT		= 0,
+	OCRDMA_QP_PARAMS_SRQ_ID_MASK		= 0xFFFF,
+
+	OCRDMA_QP_PARAMS_MAX_RQE_SHIFT		= 0,
+	OCRDMA_QP_PARAMS_MAX_RQE_MASK		= 0xFFFF,
+	OCRDMA_QP_PARAMS_MAX_WQE_SHIFT		= 16,
+	OCRDMA_QP_PARAMS_MAX_WQE_MASK		= 0xFFFF <<
+	    OCRDMA_QP_PARAMS_MAX_WQE_SHIFT,
+
+	OCRDMA_QP_PARAMS_MAX_SGE_WRITE_SHIFT	= 0,
+	OCRDMA_QP_PARAMS_MAX_SGE_WRITE_MASK	= 0xFFFF,
+	OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT	= 16,
+	OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK	= 0xFFFF <<
+					OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT,
+
+	OCRDMA_QP_PARAMS_FLAGS_FMR_EN		= Bit(0),
+	OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN	= Bit(1),
+	OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN	= Bit(2),
+	OCRDMA_QP_PARAMS_FLAGS_INBWR_EN		= Bit(3),
+	OCRDMA_QP_PARAMS_FLAGS_INBRD_EN		= Bit(4),
+	OCRDMA_QP_PARAMS_STATE_SHIFT		= 5,
+	OCRDMA_QP_PARAMS_STATE_MASK		= Bit(5) | Bit(6) | Bit(7),
+	OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC	= Bit(8),
+	OCRDMA_QP_PARAMS_FLAGS_INB_ATEN		= Bit(9),
+	OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT	= 16,
+	OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK	= 0xFFFF <<
+					OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
+
+	OCRDMA_QP_PARAMS_MAX_IRD_SHIFT		= 0,
+	OCRDMA_QP_PARAMS_MAX_IRD_MASK		= 0xFFFF,
+	OCRDMA_QP_PARAMS_MAX_ORD_SHIFT		= 16,
+	OCRDMA_QP_PARAMS_MAX_ORD_MASK		= 0xFFFF <<
+					OCRDMA_QP_PARAMS_MAX_ORD_SHIFT,
+
+	OCRDMA_QP_PARAMS_RQ_CQID_SHIFT		= 0,
+	OCRDMA_QP_PARAMS_RQ_CQID_MASK		= 0xFFFF,
+	OCRDMA_QP_PARAMS_WQ_CQID_SHIFT		= 16,
+	OCRDMA_QP_PARAMS_WQ_CQID_MASK		= 0xFFFF <<
+					OCRDMA_QP_PARAMS_WQ_CQID_SHIFT,
+
+	OCRDMA_QP_PARAMS_RQ_PSN_SHIFT		= 0,
+	OCRDMA_QP_PARAMS_RQ_PSN_MASK		= 0xFFFFFF,
+	OCRDMA_QP_PARAMS_HOP_LMT_SHIFT		= 24,
+	OCRDMA_QP_PARAMS_HOP_LMT_MASK		= 0xFF <<
+					OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
+
+	OCRDMA_QP_PARAMS_SQ_PSN_SHIFT		= 0,
+	OCRDMA_QP_PARAMS_SQ_PSN_MASK		= 0xFFFFFF,
+	OCRDMA_QP_PARAMS_TCLASS_SHIFT		= 24,
+	OCRDMA_QP_PARAMS_TCLASS_MASK		= 0xFF <<
+					OCRDMA_QP_PARAMS_TCLASS_SHIFT,
+
+	OCRDMA_QP_PARAMS_DEST_QPN_SHIFT		= 0,
+	OCRDMA_QP_PARAMS_DEST_QPN_MASK		= 0xFFFFFF,
+	OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT	= 24,
+	OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK	= 0x7 <<
+					OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT,
+	OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT	= 27,
+	OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK	= 0x1F <<
+					OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT,
+
+	OCRDMA_QP_PARAMS_PKEY_IDNEX_SHIFT	= 0,
+	OCRDMA_QP_PARAMS_PKEY_INDEX_MASK	= 0xFFFF,
+	OCRDMA_QP_PARAMS_PATH_MTU_SHIFT		= 18,
+	OCRDMA_QP_PARAMS_PATH_MTU_MASK		= 0x3FFF <<
+					OCRDMA_QP_PARAMS_PATH_MTU_SHIFT,
+
+	OCRDMA_QP_PARAMS_FLOW_LABEL_SHIFT	= 0,
+	OCRDMA_QP_PARAMS_FLOW_LABEL_MASK	= 0xFFFFF,
+	OCRDMA_QP_PARAMS_SL_SHIFT		= 20,
+	OCRDMA_QP_PARAMS_SL_MASK		= 0xF <<
+					OCRDMA_QP_PARAMS_SL_SHIFT,
+	OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT	= 24,
+	OCRDMA_QP_PARAMS_RETRY_CNT_MASK		= 0x7 <<
+					OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT,
+	OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT	= 27,
+	OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK	= 0x1F <<
+					OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT,
+
+	OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_SHIFT	= 0,
+	OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_MASK	= 0xFFFF,
+	OCRDMA_QP_PARAMS_VLAN_SHIFT		= 16,
+	OCRDMA_QP_PARAMS_VLAN_MASK		= 0xFFFF <<
+					OCRDMA_QP_PARAMS_VLAN_SHIFT
+};
+
+struct ocrdma_qp_params {
+	u32 id;
+	u32 max_wqe_rqe;
+	u32 max_sge_send_write;
+	u32 max_sge_recv_flags;
+	u32 max_ord_ird;
+	u32 wq_rq_cqid;
+	u32 hop_lmt_rq_psn;
+	u32 tclass_sq_psn;
+	u32 ack_to_rnr_rtc_dest_qpn;
+	u32 path_mtu_pkey_indx;
+	u32 rnt_rc_sl_fl;
+	u8 sgid[16];
+	u8 dgid[16];
+	u32 dmac_b0_to_b3;
+	u32 vlan_dmac_b4_to_b5;
+	u32 qkey;
+} __packed;
+
+
+struct ocrdma_modify_qp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	struct ocrdma_qp_params params;
+	u32 flags;
+	u32 rdma_flags;
+	u32 num_outstanding_atomic_rd;
+} __packed;
+
+enum {
+	OCRDMA_MODIFY_QP_RSP_MAX_RQE_SHIFT	= 0,
+	OCRDMA_MODIFY_QP_RSP_MAX_RQE_MASK	= 0xFFFF,
+	OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT	= 16,
+	OCRDMA_MODIFY_QP_RSP_MAX_WQE_MASK	= 0xFFFF <<
+					OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT,
+
+	OCRDMA_MODIFY_QP_RSP_MAX_IRD_SHIFT	= 0,
+	OCRDMA_MODIFY_QP_RSP_MAX_IRD_MASK	= 0xFFFF,
+	OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT	= 16,
+	OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK	= 0xFFFF <<
+					OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT
+};
+struct ocrdma_modify_qp_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 max_wqe_rqe;
+	u32 max_ord_ird;
+} __packed;
+
+struct ocrdma_query_qp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
+#define OCRDMA_QUERY_UP_QP_ID_MASK   0xFFFFFF
+	u32 qp_id;
+} __packed;
+
+struct ocrdma_query_qp_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+	struct ocrdma_qp_params params;
+} __packed;
+
+enum {
+	OCRDMA_CREATE_SRQ_PD_ID_SHIFT		= 0,
+	OCRDMA_CREATE_SRQ_PD_ID_MASK		= 0xFFFF,
+	OCRDMA_CREATE_SRQ_PG_SZ_SHIFT		= 16,
+	OCRDMA_CREATE_SRQ_PG_SZ_MASK		= 0x3 <<
+					OCRDMA_CREATE_SRQ_PG_SZ_SHIFT,
+
+	OCRDMA_CREATE_SRQ_MAX_RQE_SHIFT		= 0,
+	OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT	= 16,
+	OCRDMA_CREATE_SRQ_MAX_SGE_RECV_MASK	= 0xFFFF <<
+					OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT,
+
+	OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT	= 0,
+	OCRDMA_CREATE_SRQ_RQE_SIZE_MASK		= 0xFFFF,
+	OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT	= 16,
+	OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_MASK	= 0xFFFF <<
+					OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT
+};
+
+struct ocrdma_create_srq {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	u32 pgsz_pdid;
+	u32 max_sge_rqe;
+	u32 pages_rqe_sz;
+	struct ocrdma_pa rq_addr[MAX_OCRDMA_SRQ_PAGES];
+} __packed;
+
+enum {
+	OCRDMA_CREATE_SRQ_RSP_SRQ_ID_SHIFT			= 0,
+	OCRDMA_CREATE_SRQ_RSP_SRQ_ID_MASK			= 0xFFFFFF,
+
+	OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT		= 0,
+	OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK		= 0xFFFF,
+	OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT	= 16,
+	OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK	= 0xFFFF <<
+			OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT
+};
+
+struct ocrdma_create_srq_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 id;
+	u32 max_sge_rqe_allocated;
+} __packed;
+
+enum {
+	OCRDMA_MODIFY_SRQ_ID_SHIFT	= 0,
+	OCRDMA_MODIFY_SRQ_ID_MASK	= 0xFFFFFF,
+
+	OCRDMA_MODIFY_SRQ_MAX_RQE_SHIFT	= 0,
+	OCRDMA_MODIFY_SRQ_MAX_RQE_MASK	= 0xFFFF,
+	OCRDMA_MODIFY_SRQ_LIMIT_SHIFT	= 16,
+	OCRDMA_MODIFY_SRQ__LIMIT_MASK	= 0xFFFF <<
+					OCRDMA_MODIFY_SRQ_LIMIT_SHIFT
+};
+
+struct ocrdma_modify_srq {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rep;
+
+	u32 id;
+	u32 limit_max_rqe;
+} __packed;
+
+enum {
+	OCRDMA_QUERY_SRQ_ID_SHIFT	= 0,
+	OCRDMA_QUERY_SRQ_ID_MASK	= 0xFFFFFF
+};
+
+struct ocrdma_query_srq {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp req;
+
+	u32 id;
+} __packed;
+
+enum {
+	OCRDMA_QUERY_SRQ_RSP_PD_ID_SHIFT	= 0,
+	OCRDMA_QUERY_SRQ_RSP_PD_ID_MASK		= 0xFFFF,
+	OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT	= 16,
+	OCRDMA_QUERY_SRQ_RSP_MAX_RQE_MASK	= 0xFFFF <<
+					OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT,
+
+	OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_SHIFT	= 0,
+	OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK	= 0xFFFF,
+	OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT	= 16,
+	OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_MASK	= 0xFFFF <<
+					OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT
+};
+
+struct ocrdma_query_srq_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp req;
+
+	u32 max_rqe_pdid;
+	u32 srq_lmt_max_sge;
+} __packed;
+
+enum {
+	OCRDMA_DESTROY_SRQ_ID_SHIFT	= 0,
+	OCRDMA_DESTROY_SRQ_ID_MASK	= 0xFFFFFF
+};
+
+struct ocrdma_destroy_srq {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp req;
+
+	u32 id;
+} __packed;
+
+enum {
+	OCRDMA_ALLOC_PD_ENABLE_DPP	= BIT(16),
+	OCRDMA_PD_MAX_DPP_ENABLED_QP	= 8,
+	OCRDMA_DPP_PAGE_SIZE		= 4096
+};
+
+struct ocrdma_alloc_pd {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+	u32 enable_dpp_rsvd;
+} __packed;
+
+enum {
+	OCRDMA_ALLOC_PD_RSP_DPP			= Bit(16),
+	OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT	= 20,
+	OCRDMA_ALLOC_PD_RSP_PDID_MASK		= 0xFFFF,
+};
+
+struct ocrdma_alloc_pd_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+	u32 dpp_page_pdid;
+} __packed;
+
+struct ocrdma_dealloc_pd {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+	u32 id;
+} __packed;
+
+struct ocrdma_dealloc_pd_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+} __packed;
+
+enum {
+	OCRDMA_ADDR_CHECK_ENABLE	= 1,
+	OCRDMA_ADDR_CHECK_DISABLE	= 0
+};
+
+enum {
+	OCRDMA_ALLOC_LKEY_PD_ID_SHIFT		= 0,
+	OCRDMA_ALLOC_LKEY_PD_ID_MASK		= 0xFFFF,
+
+	OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT	= 0,
+	OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK	= Bit(0),
+	OCRDMA_ALLOC_LKEY_FMR_SHIFT		= 1,
+	OCRDMA_ALLOC_LKEY_FMR_MASK		= Bit(1),
+	OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT	= 2,
+	OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK	= Bit(2),
+	OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT	= 3,
+	OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK	= Bit(3),
+	OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT	= 4,
+	OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK	= Bit(4),
+	OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT	= 5,
+	OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK		= Bit(5),
+	OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK	= Bit(6),
+	OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT	= 6,
+	OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT	= 16,
+	OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK		= 0xFFFF <<
+						OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT
+};
+
+struct ocrdma_alloc_lkey {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	u32 pdid;
+	u32 pbl_sz_flags;
+} __packed;
+
+struct ocrdma_alloc_lkey_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 lrkey;
+	u32 num_pbl_rsvd;
+} __packed;
+
+struct ocrdma_dealloc_lkey {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	u32 lkey;
+	u32 rsvd_frmr;
+} __packed;
+
+struct ocrdma_dealloc_lkey_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+} __packed;
+
+#define MAX_OCRDMA_NSMR_PBL    (u32)22
+#define MAX_OCRDMA_PBL_SIZE     65536
+#define MAX_OCRDMA_PBL_PER_LKEY	32767
+
+enum {
+	OCRDMA_REG_NSMR_LRKEY_INDEX_SHIFT	= 0,
+	OCRDMA_REG_NSMR_LRKEY_INDEX_MASK	= 0xFFFFFF,
+	OCRDMA_REG_NSMR_LRKEY_SHIFT		= 24,
+	OCRDMA_REG_NSMR_LRKEY_MASK		= 0xFF <<
+					OCRDMA_REG_NSMR_LRKEY_SHIFT,
+
+	OCRDMA_REG_NSMR_PD_ID_SHIFT		= 0,
+	OCRDMA_REG_NSMR_PD_ID_MASK		= 0xFFFF,
+	OCRDMA_REG_NSMR_NUM_PBL_SHIFT		= 16,
+	OCRDMA_REG_NSMR_NUM_PBL_MASK		= 0xFFFF <<
+					OCRDMA_REG_NSMR_NUM_PBL_SHIFT,
+
+	OCRDMA_REG_NSMR_PBE_SIZE_SHIFT		= 0,
+	OCRDMA_REG_NSMR_PBE_SIZE_MASK		= 0xFFFF,
+	OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT	= 16,
+	OCRDMA_REG_NSMR_HPAGE_SIZE_MASK		= 0xFF <<
+					OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT,
+	OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT	= 24,
+	OCRDMA_REG_NSMR_BIND_MEMWIN_MASK	= Bit(24),
+	OCRDMA_REG_NSMR_ZB_SHIFT		= 25,
+	OCRDMA_REG_NSMR_ZB_SHIFT_MASK		= Bit(25),
+	OCRDMA_REG_NSMR_REMOTE_INV_SHIFT	= 26,
+	OCRDMA_REG_NSMR_REMOTE_INV_MASK		= Bit(26),
+	OCRDMA_REG_NSMR_REMOTE_WR_SHIFT		= 27,
+	OCRDMA_REG_NSMR_REMOTE_WR_MASK		= Bit(27),
+	OCRDMA_REG_NSMR_REMOTE_RD_SHIFT		= 28,
+	OCRDMA_REG_NSMR_REMOTE_RD_MASK		= Bit(28),
+	OCRDMA_REG_NSMR_LOCAL_WR_SHIFT		= 29,
+	OCRDMA_REG_NSMR_LOCAL_WR_MASK		= Bit(29),
+	OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT	= 30,
+	OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK	= Bit(30),
+	OCRDMA_REG_NSMR_LAST_SHIFT		= 31,
+	OCRDMA_REG_NSMR_LAST_MASK		= Bit(31)
+};
+
+struct ocrdma_reg_nsmr {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr cmd;
+
+	u32 lrkey_key_index;
+	u32 num_pbl_pdid;
+	u32 flags_hpage_pbe_sz;
+	u32 totlen_low;
+	u32 totlen_high;
+	u32 fbo_low;
+	u32 fbo_high;
+	u32 va_loaddr;
+	u32 va_hiaddr;
+	struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
+} __packed;
+
+enum {
+	OCRDMA_REG_NSMR_CONT_PBL_SHIFT		= 0,
+	OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK	= 0xFFFF,
+	OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT	= 16,
+	OCRDMA_REG_NSMR_CONT_NUM_PBL_MASK	= 0xFFFF <<
+					OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT,
+
+	OCRDMA_REG_NSMR_CONT_LAST_SHIFT		= 31,
+	OCRDMA_REG_NSMR_CONT_LAST_MASK		= Bit(31)
+};
+
+struct ocrdma_reg_nsmr_cont {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr cmd;
+
+	u32 lrkey;
+	u32 num_pbl_offset;
+	u32 last;
+
+	struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
+} __packed;
+
+struct ocrdma_pbe {
+	u32 pa_hi;
+	u32 pa_lo;
+} __packed;
+
+enum {
+	OCRDMA_REG_NSMR_RSP_NUM_PBL_SHIFT	= 16,
+	OCRDMA_REG_NSMR_RSP_NUM_PBL_MASK	= 0xFFFF0000
+};
+struct ocrdma_reg_nsmr_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 lrkey;
+	u32 num_pbl;
+} __packed;
+
+enum {
+	OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_SHIFT	= 0,
+	OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_MASK	= 0xFFFFFF,
+	OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT		= 24,
+	OCRDMA_REG_NSMR_CONT_RSP_LRKEY_MASK		= 0xFF <<
+					OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT,
+
+	OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT		= 16,
+	OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_MASK		= 0xFFFF <<
+					OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT
+};
+
+struct ocrdma_reg_nsmr_cont_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 lrkey_key_index;
+	u32 num_pbl;
+} __packed;
+
+enum {
+	OCRDMA_ALLOC_MW_PD_ID_SHIFT	= 0,
+	OCRDMA_ALLOC_MW_PD_ID_MASK	= 0xFFFF
+};
+
+struct ocrdma_alloc_mw {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	u32 pdid;
+} __packed;
+
+enum {
+	OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_SHIFT	= 0,
+	OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_MASK	= 0xFFFFFF
+};
+
+struct ocrdma_alloc_mw_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+
+	u32 lrkey_index;
+} __packed;
+
+struct ocrdma_attach_mcast {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+	u32 qp_id;
+	u8 mgid[16];
+	u32 mac_b0_to_b3;
+	u32 vlan_mac_b4_to_b5;
+} __packed;
+
+struct ocrdma_attach_mcast_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+} __packed;
+
+struct ocrdma_detach_mcast {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+	u32 qp_id;
+	u8 mgid[16];
+	u32 mac_b0_to_b3;
+	u32 vlan_mac_b4_to_b5;
+} __packed;
+
+struct ocrdma_detach_mcast_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+} __packed;
+
+enum {
+	OCRDMA_CREATE_AH_NUM_PAGES_SHIFT	= 19,
+	OCRDMA_CREATE_AH_NUM_PAGES_MASK		= 0xF <<
+					OCRDMA_CREATE_AH_NUM_PAGES_SHIFT,
+
+	OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT	= 16,
+	OCRDMA_CREATE_AH_PAGE_SIZE_MASK		= 0x7 <<
+					OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT,
+
+	OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT	= 23,
+	OCRDMA_CREATE_AH_ENTRY_SIZE_MASK	= 0x1FF <<
+					OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT,
+};
+
+#define OCRDMA_AH_TBL_PAGES 8
+
+struct ocrdma_create_ah_tbl {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+
+	u32 ah_conf;
+	struct ocrdma_pa tbl_addr[8];
+} __packed;
+
+struct ocrdma_create_ah_tbl_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+	u32 ahid;
+} __packed;
+
+struct ocrdma_delete_ah_tbl {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_hdr req;
+	u32 ahid;
+} __packed;
+
+struct ocrdma_delete_ah_tbl_rsp {
+	struct ocrdma_mqe_hdr hdr;
+	struct ocrdma_mbx_rsp rsp;
+} __packed;
+
+enum {
+	OCRDMA_EQE_VALID_SHIFT		= 0,
+	OCRDMA_EQE_VALID_MASK		= Bit(0),
+	OCRDMA_EQE_FOR_CQE_MASK		= 0xFFFE,
+	OCRDMA_EQE_RESOURCE_ID_SHIFT	= 16,
+	OCRDMA_EQE_RESOURCE_ID_MASK	= 0xFFFF <<
+				OCRDMA_EQE_RESOURCE_ID_SHIFT,
+};
+
+struct ocrdma_eqe {
+	u32 id_valid;
+} __packed;
+
+enum OCRDMA_CQE_STATUS {
+	OCRDMA_CQE_SUCCESS = 0,
+	OCRDMA_CQE_LOC_LEN_ERR,
+	OCRDMA_CQE_LOC_QP_OP_ERR,
+	OCRDMA_CQE_LOC_EEC_OP_ERR,
+	OCRDMA_CQE_LOC_PROT_ERR,
+	OCRDMA_CQE_WR_FLUSH_ERR,
+	OCRDMA_CQE_MW_BIND_ERR,
+	OCRDMA_CQE_BAD_RESP_ERR,
+	OCRDMA_CQE_LOC_ACCESS_ERR,
+	OCRDMA_CQE_REM_INV_REQ_ERR,
+	OCRDMA_CQE_REM_ACCESS_ERR,
+	OCRDMA_CQE_REM_OP_ERR,
+	OCRDMA_CQE_RETRY_EXC_ERR,
+	OCRDMA_CQE_RNR_RETRY_EXC_ERR,
+	OCRDMA_CQE_LOC_RDD_VIOL_ERR,
+	OCRDMA_CQE_REM_INV_RD_REQ_ERR,
+	OCRDMA_CQE_REM_ABORT_ERR,
+	OCRDMA_CQE_INV_EECN_ERR,
+	OCRDMA_CQE_INV_EEC_STATE_ERR,
+	OCRDMA_CQE_FATAL_ERR,
+	OCRDMA_CQE_RESP_TIMEOUT_ERR,
+	OCRDMA_CQE_GENERAL_ERR
+};
+
+enum {
+	/* w0 */
+	OCRDMA_CQE_WQEIDX_SHIFT		= 0,
+	OCRDMA_CQE_WQEIDX_MASK		= 0xFFFF,
+
+	/* w1 */
+	OCRDMA_CQE_UD_XFER_LEN_SHIFT	= 16,
+	OCRDMA_CQE_PKEY_SHIFT		= 0,
+	OCRDMA_CQE_PKEY_MASK		= 0xFFFF,
+
+	/* w2 */
+	OCRDMA_CQE_QPN_SHIFT		= 0,
+	OCRDMA_CQE_QPN_MASK		= 0x0000FFFF,
+
+	OCRDMA_CQE_BUFTAG_SHIFT		= 16,
+	OCRDMA_CQE_BUFTAG_MASK		= 0xFFFF << OCRDMA_CQE_BUFTAG_SHIFT,
+
+	/* w3 */
+	OCRDMA_CQE_UD_STATUS_SHIFT	= 24,
+	OCRDMA_CQE_UD_STATUS_MASK	= 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT,
+	OCRDMA_CQE_STATUS_SHIFT		= 16,
+	OCRDMA_CQE_STATUS_MASK		= 0xFF << OCRDMA_CQE_STATUS_SHIFT,
+	OCRDMA_CQE_VALID		= Bit(31),
+	OCRDMA_CQE_INVALIDATE		= Bit(30),
+	OCRDMA_CQE_QTYPE		= Bit(29),
+	OCRDMA_CQE_IMM			= Bit(28),
+	OCRDMA_CQE_WRITE_IMM		= Bit(27),
+	OCRDMA_CQE_QTYPE_SQ		= 0,
+	OCRDMA_CQE_QTYPE_RQ		= 1,
+	OCRDMA_CQE_SRCQP_MASK		= 0xFFFFFF
+};
+
+struct ocrdma_cqe {
+	union {
+		/* w0 to w2 */
+		struct {
+			u32 wqeidx;
+			u32 bytes_xfered;
+			u32 qpn;
+		} wq;
+		struct {
+			u32 lkey_immdt;
+			u32 rxlen;
+			u32 buftag_qpn;
+		} rq;
+		struct {
+			u32 lkey_immdt;
+			u32 rxlen_pkey;
+			u32 buftag_qpn;
+		} ud;
+		struct {
+			u32 word_0;
+			u32 word_1;
+			u32 qpn;
+		} cmn;
+	};
+	u32 flags_status_srcqpn;	/* w3 */
+} __packed;
+
+#define is_cqe_valid(cq, cqe) \
+	(((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID)\
+	== cq->phase) ? 1 : 0)
+#define is_cqe_for_sq(cqe) \
+	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 0 : 1)
+#define is_cqe_for_rq(cqe) \
+	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 1 : 0)
+#define is_cqe_invalidated(cqe) \
+	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_INVALIDATE) ? \
+	1 : 0)
+#define is_cqe_imm(cqe) \
+	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_IMM) ? 1 : 0)
+#define is_cqe_wr_imm(cqe) \
+	((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_WRITE_IMM) ? 1 : 0)
+
+struct ocrdma_sge {
+	u32 addr_hi;
+	u32 addr_lo;
+	u32 lrkey;
+	u32 len;
+} __packed;
+
+enum {
+	OCRDMA_FLAG_SIG		= 0x1,
+	OCRDMA_FLAG_INV		= 0x2,
+	OCRDMA_FLAG_FENCE_L	= 0x4,
+	OCRDMA_FLAG_FENCE_R	= 0x8,
+	OCRDMA_FLAG_SOLICIT	= 0x10,
+	OCRDMA_FLAG_IMM		= 0x20,
+
+	/* Stag flags */
+	OCRDMA_LKEY_FLAG_LOCAL_WR	= 0x1,
+	OCRDMA_LKEY_FLAG_REMOTE_RD	= 0x2,
+	OCRDMA_LKEY_FLAG_REMOTE_WR	= 0x4,
+	OCRDMA_LKEY_FLAG_VATO		= 0x8,
+};
+
+enum OCRDMA_WQE_OPCODE {
+	OCRDMA_WRITE		= 0x06,
+	OCRDMA_READ		= 0x0C,
+	OCRDMA_RESV0		= 0x02,
+	OCRDMA_SEND		= 0x00,
+	OCRDMA_CMP_SWP		= 0x14,
+	OCRDMA_BIND_MW		= 0x10,
+	OCRDMA_RESV1		= 0x0A,
+	OCRDMA_LKEY_INV		= 0x15,
+	OCRDMA_FETCH_ADD	= 0x13,
+	OCRDMA_POST_RQ		= 0x12
+};
+
+enum {
+	OCRDMA_TYPE_INLINE	= 0x0,
+	OCRDMA_TYPE_LKEY	= 0x1,
+};
+
+enum {
+	OCRDMA_WQE_OPCODE_SHIFT		= 0,
+	OCRDMA_WQE_OPCODE_MASK		= 0x0000001F,
+	OCRDMA_WQE_FLAGS_SHIFT		= 5,
+	OCRDMA_WQE_TYPE_SHIFT		= 16,
+	OCRDMA_WQE_TYPE_MASK		= 0x00030000,
+	OCRDMA_WQE_SIZE_SHIFT		= 18,
+	OCRDMA_WQE_SIZE_MASK		= 0xFF,
+	OCRDMA_WQE_NXT_WQE_SIZE_SHIFT	= 25,
+
+	OCRDMA_WQE_LKEY_FLAGS_SHIFT	= 0,
+	OCRDMA_WQE_LKEY_FLAGS_MASK	= 0xF
+};
+
+/* header WQE for all the SQ and RQ operations */
+struct ocrdma_hdr_wqe {
+	u32 cw;
+	union {
+		u32 rsvd_tag;
+		u32 rsvd_lkey_flags;
+	};
+	union {
+		u32 immdt;
+		u32 lkey;
+	};
+	u32 total_len;
+} __packed;
+
+struct ocrdma_ewqe_ud_hdr {
+	u32 rsvd_dest_qpn;
+	u32 qkey;
+	u32 rsvd_ahid;
+	u32 rsvd;
+} __packed;
+
+struct ocrdma_eth_basic {
+	u8 dmac[6];
+	u8 smac[6];
+	__be16 eth_type;
+} __packed;
+
+struct ocrdma_eth_vlan {
+	u8 dmac[6];
+	u8 smac[6];
+	__be16 eth_type;
+	__be16 vlan_tag;
+#define OCRDMA_ROCE_ETH_TYPE 0x8915
+	__be16 roce_eth_type;
+} __packed;
+
+struct ocrdma_grh {
+	__be32	tclass_flow;
+	__be32	pdid_hoplimit;
+	u8	sgid[16];
+	u8	dgid[16];
+	u16	rsvd;
+} __packed;
+
+#define OCRDMA_AV_VALID		Bit(0)
+#define OCRDMA_AV_VLAN_VALID	Bit(1)
+
+struct ocrdma_av {
+	struct ocrdma_eth_vlan eth_hdr;
+	struct ocrdma_grh grh;
+	u32 valid;
+} __packed;
+
+#endif				/* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
new file mode 100644
index 0000000..e9f74d1
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -0,0 +1,2537 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <linux/dma-mapping.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_abi.h"
+
+int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+	if (index > 1)
+		return -EINVAL;
+
+	*pkey = 0xffff;
+	return 0;
+}
+
+int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
+		     int index, union ib_gid *sgid)
+{
+	struct ocrdma_dev *dev;
+
+	dev = get_ocrdma_dev(ibdev);
+	memset(sgid, 0, sizeof(*sgid));
+	if (index > OCRDMA_MAX_SGID)
+		return -EINVAL;
+
+	memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
+
+	return 0;
+}
+
+int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
+{
+	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+
+	memset(attr, 0, sizeof *attr);
+	memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
+	       min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
+	ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
+	attr->max_mr_size = ~0ull;
+	attr->page_size_cap = 0xffff000;
+	attr->vendor_id = dev->nic_info.pdev->vendor;
+	attr->vendor_part_id = dev->nic_info.pdev->device;
+	attr->hw_ver = 0;
+	attr->max_qp = dev->attr.max_qp;
+	attr->max_ah = dev->attr.max_qp;
+	attr->max_qp_wr = dev->attr.max_wqe;
+
+	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
+					IB_DEVICE_RC_RNR_NAK_GEN |
+					IB_DEVICE_SHUTDOWN_PORT |
+					IB_DEVICE_SYS_IMAGE_GUID |
+					IB_DEVICE_LOCAL_DMA_LKEY;
+	attr->max_sge = dev->attr.max_send_sge;
+	attr->max_sge_rd = dev->attr.max_send_sge;
+	attr->max_cq = dev->attr.max_cq;
+	attr->max_cqe = dev->attr.max_cqe;
+	attr->max_mr = dev->attr.max_mr;
+	attr->max_mw = 0;
+	attr->max_pd = dev->attr.max_pd;
+	attr->atomic_cap = 0;
+	attr->max_fmr = 0;
+	attr->max_map_per_fmr = 0;
+	attr->max_qp_rd_atom =
+	    min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
+	attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
+	attr->max_srq = (dev->attr.max_qp - 1);
+	attr->max_srq_sge = attr->max_sge;
+	attr->max_srq_wr = dev->attr.max_rqe;
+	attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
+	attr->max_fast_reg_page_list_len = 0;
+	attr->max_pkeys = 1;
+	return 0;
+}
+
+int ocrdma_query_port(struct ib_device *ibdev,
+		      u8 port, struct ib_port_attr *props)
+{
+	enum ib_port_state port_state;
+	struct ocrdma_dev *dev;
+	struct net_device *netdev;
+
+	dev = get_ocrdma_dev(ibdev);
+	if (port > 1) {
+		ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__,
+			   dev->id, port);
+		return -EINVAL;
+	}
+	netdev = dev->nic_info.netdev;
+	if (netif_running(netdev) && netif_oper_up(netdev)) {
+		port_state = IB_PORT_ACTIVE;
+		props->phys_state = 5;
+	} else {
+		port_state = IB_PORT_DOWN;
+		props->phys_state = 3;
+	}
+	props->max_mtu = IB_MTU_4096;
+	props->active_mtu = iboe_get_mtu(netdev->mtu);
+	props->lid = 0;
+	props->lmc = 0;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+	props->state = port_state;
+	props->port_cap_flags =
+	    IB_PORT_CM_SUP |
+	    IB_PORT_REINIT_SUP |
+	    IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP;
+	props->gid_tbl_len = OCRDMA_MAX_SGID;
+	props->pkey_tbl_len = 1;
+	props->bad_pkey_cntr = 0;
+	props->qkey_viol_cntr = 0;
+	props->active_width = IB_WIDTH_1X;
+	props->active_speed = 4;
+	props->max_msg_sz = 0x80000000;
+	props->max_vl_num = 4;
+	return 0;
+}
+
+int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
+		       struct ib_port_modify *props)
+{
+	struct ocrdma_dev *dev;
+
+	dev = get_ocrdma_dev(ibdev);
+	if (port > 1) {
+		ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__,
+			   dev->id, port);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
+			   unsigned long len)
+{
+	struct ocrdma_mm *mm;
+
+	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+	if (mm == NULL)
+		return -ENOMEM;
+	mm->key.phy_addr = phy_addr;
+	mm->key.len = len;
+	INIT_LIST_HEAD(&mm->entry);
+
+	mutex_lock(&uctx->mm_list_lock);
+	list_add_tail(&mm->entry, &uctx->mm_head);
+	mutex_unlock(&uctx->mm_list_lock);
+	return 0;
+}
+
+static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
+			    unsigned long len)
+{
+	struct ocrdma_mm *mm, *tmp;
+
+	mutex_lock(&uctx->mm_list_lock);
+	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
+		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+			continue;
+
+		list_del(&mm->entry);
+		kfree(mm);
+		break;
+	}
+	mutex_unlock(&uctx->mm_list_lock);
+}
+
+static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
+			      unsigned long len)
+{
+	bool found = false;
+	struct ocrdma_mm *mm;
+
+	mutex_lock(&uctx->mm_list_lock);
+	list_for_each_entry(mm, &uctx->mm_head, entry) {
+		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+			continue;
+
+		found = true;
+		break;
+	}
+	mutex_unlock(&uctx->mm_list_lock);
+	return found;
+}
+
+struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
+					  struct ib_udata *udata)
+{
+	int status;
+	struct ocrdma_ucontext *ctx;
+	struct ocrdma_alloc_ucontext_resp resp;
+	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
+
+	if (!udata)
+		return ERR_PTR(-EFAULT);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+	ctx->dev = dev;
+	INIT_LIST_HEAD(&ctx->mm_head);
+	mutex_init(&ctx->mm_list_lock);
+
+	ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
+					    &ctx->ah_tbl.pa, GFP_KERNEL);
+	if (!ctx->ah_tbl.va) {
+		kfree(ctx);
+		return ERR_PTR(-ENOMEM);
+	}
+	memset(ctx->ah_tbl.va, 0, map_len);
+	ctx->ah_tbl.len = map_len;
+
+	resp.ah_tbl_len = ctx->ah_tbl.len;
+	resp.ah_tbl_page = ctx->ah_tbl.pa;
+
+	status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
+	if (status)
+		goto map_err;
+	resp.dev_id = dev->id;
+	resp.max_inline_data = dev->attr.max_inline_data;
+	resp.wqe_size = dev->attr.wqe_size;
+	resp.rqe_size = dev->attr.rqe_size;
+	resp.dpp_wqe_size = dev->attr.wqe_size;
+	resp.rsvd = 0;
+
+	memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
+	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	if (status)
+		goto cpy_err;
+	return &ctx->ibucontext;
+
+cpy_err:
+	ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
+map_err:
+	dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
+			  ctx->ah_tbl.pa);
+	kfree(ctx);
+	return ERR_PTR(status);
+}
+
+int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+	struct ocrdma_mm *mm, *tmp;
+	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
+	struct pci_dev *pdev = uctx->dev->nic_info.pdev;
+
+	ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
+	dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
+			  uctx->ah_tbl.pa);
+
+	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
+		list_del(&mm->entry);
+		kfree(mm);
+	}
+	kfree(uctx);
+	return 0;
+}
+
+int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
+	struct ocrdma_dev *dev = ucontext->dev;
+	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
+	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
+	unsigned long len = (vma->vm_end - vma->vm_start);
+	int status = 0;
+	bool found;
+
+	if (vma->vm_start & (PAGE_SIZE - 1))
+		return -EINVAL;
+	found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
+	if (!found)
+		return -EINVAL;
+
+	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
+		dev->nic_info.db_total_size)) &&
+		(len <=	dev->nic_info.db_page_size)) {
+		/* doorbell mapping */
+		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+					    len, vma->vm_page_prot);
+	} else if (dev->nic_info.dpp_unmapped_len &&
+		(vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
+		(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
+			dev->nic_info.dpp_unmapped_len)) &&
+		(len <= dev->nic_info.dpp_unmapped_len)) {
+		/* dpp area mapping */
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+					    len, vma->vm_page_prot);
+	} else {
+		/* queue memory mapping */
+		status = remap_pfn_range(vma, vma->vm_start,
+					 vma->vm_pgoff, len, vma->vm_page_prot);
+	}
+	return status;
+}
+
+static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
+				struct ib_ucontext *ib_ctx,
+				struct ib_udata *udata)
+{
+	int status;
+	u64 db_page_addr;
+	u64 dpp_page_addr = 0;
+	u32 db_page_size;
+	struct ocrdma_alloc_pd_uresp rsp;
+	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
+
+	rsp.id = pd->id;
+	rsp.dpp_enabled = pd->dpp_enabled;
+	db_page_addr = pd->dev->nic_info.unmapped_db +
+			(pd->id * pd->dev->nic_info.db_page_size);
+	db_page_size = pd->dev->nic_info.db_page_size;
+
+	status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
+	if (status)
+		return status;
+
+	if (pd->dpp_enabled) {
+		dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr +
+				(pd->id * OCRDMA_DPP_PAGE_SIZE);
+		status = ocrdma_add_mmap(uctx, dpp_page_addr,
+				 OCRDMA_DPP_PAGE_SIZE);
+		if (status)
+			goto dpp_map_err;
+		rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
+		rsp.dpp_page_addr_lo = dpp_page_addr;
+	}
+
+	status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
+	if (status)
+		goto ucopy_err;
+
+	pd->uctx = uctx;
+	return 0;
+
+ucopy_err:
+	if (pd->dpp_enabled)
+		ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE);
+dpp_map_err:
+	ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
+	return status;
+}
+
+struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
+			      struct ib_ucontext *context,
+			      struct ib_udata *udata)
+{
+	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+	struct ocrdma_pd *pd;
+	int status;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+	pd->dev = dev;
+	if (udata && context) {
+		pd->dpp_enabled = (dev->nic_info.dev_family ==
+					OCRDMA_GEN2_FAMILY) ? true : false;
+		pd->num_dpp_qp =
+			pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+	}
+	status = ocrdma_mbx_alloc_pd(dev, pd);
+	if (status) {
+		kfree(pd);
+		return ERR_PTR(status);
+	}
+	atomic_set(&pd->use_cnt, 0);
+
+	if (udata && context) {
+		status = ocrdma_copy_pd_uresp(pd, context, udata);
+		if (status)
+			goto err;
+	}
+	return &pd->ibpd;
+
+err:
+	ocrdma_dealloc_pd(&pd->ibpd);
+	return ERR_PTR(status);
+}
+
+int ocrdma_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+	struct ocrdma_dev *dev = pd->dev;
+	int status;
+	u64 usr_db;
+
+	if (atomic_read(&pd->use_cnt)) {
+		ocrdma_err("%s(%d) pd=0x%x is in use.\n",
+			   __func__, dev->id, pd->id);
+		status = -EFAULT;
+		goto dealloc_err;
+	}
+	status = ocrdma_mbx_dealloc_pd(dev, pd);
+	if (pd->uctx) {
+		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
+		    (pd->id * OCRDMA_DPP_PAGE_SIZE);
+		if (pd->dpp_enabled)
+			ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE);
+		usr_db = dev->nic_info.unmapped_db +
+		    (pd->id * dev->nic_info.db_page_size);
+		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
+	}
+	kfree(pd);
+dealloc_err:
+	return status;
+}
+
+static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
+					   int acc, u32 num_pbls,
+					   u32 addr_check)
+{
+	int status;
+	struct ocrdma_mr *mr;
+	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+	struct ocrdma_dev *dev = pd->dev;
+
+	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
+		ocrdma_err("%s(%d) leaving err, invalid access rights\n",
+			   __func__, dev->id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+	mr->hwmr.dev = dev;
+	mr->hwmr.fr_mr = 0;
+	mr->hwmr.local_rd = 1;
+	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+	mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
+	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+	mr->hwmr.num_pbls = num_pbls;
+
+	status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check);
+	if (status) {
+		kfree(mr);
+		return ERR_PTR(-ENOMEM);
+	}
+	mr->pd = pd;
+	atomic_inc(&pd->use_cnt);
+	mr->ibmr.lkey = mr->hwmr.lkey;
+	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
+		mr->ibmr.rkey = mr->hwmr.lkey;
+	return mr;
+}
+
+struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+	struct ocrdma_mr *mr;
+
+	mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE);
+	if (IS_ERR(mr))
+		return ERR_CAST(mr);
+
+	return &mr->ibmr;
+}
+
+static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
+				   struct ocrdma_hw_mr *mr)
+{
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	int i = 0;
+
+	if (mr->pbl_table) {
+		for (i = 0; i < mr->num_pbls; i++) {
+			if (!mr->pbl_table[i].va)
+				continue;
+			dma_free_coherent(&pdev->dev, mr->pbl_size,
+					  mr->pbl_table[i].va,
+					  mr->pbl_table[i].pa);
+		}
+		kfree(mr->pbl_table);
+		mr->pbl_table = NULL;
+	}
+}
+
+static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes)
+{
+	u32 num_pbls = 0;
+	u32 idx = 0;
+	int status = 0;
+	u32 pbl_size;
+
+	do {
+		pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
+		if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
+			status = -EFAULT;
+			break;
+		}
+		num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
+		num_pbls = num_pbls / (pbl_size / sizeof(u64));
+		idx++;
+	} while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl);
+
+	mr->hwmr.num_pbes = num_pbes;
+	mr->hwmr.num_pbls = num_pbls;
+	mr->hwmr.pbl_size = pbl_size;
+	return status;
+}
+
+static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
+{
+	int status = 0;
+	int i;
+	u32 dma_len = mr->pbl_size;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	void *va;
+	dma_addr_t pa;
+
+	mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) *
+				mr->num_pbls, GFP_KERNEL);
+
+	if (!mr->pbl_table)
+		return -ENOMEM;
+
+	for (i = 0; i < mr->num_pbls; i++) {
+		va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
+		if (!va) {
+			ocrdma_free_mr_pbl_tbl(dev, mr);
+			status = -ENOMEM;
+			break;
+		}
+		memset(va, 0, dma_len);
+		mr->pbl_table[i].va = va;
+		mr->pbl_table[i].pa = pa;
+	}
+	return status;
+}
+
+static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+			    u32 num_pbes)
+{
+	struct ocrdma_pbe *pbe;
+	struct ib_umem_chunk *chunk;
+	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
+	struct ib_umem *umem = mr->umem;
+	int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+
+	if (!mr->hwmr.num_pbes)
+		return;
+
+	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+	pbe_cnt = 0;
+
+	shift = ilog2(umem->page_size);
+
+	list_for_each_entry(chunk, &umem->chunk_list, list) {
+		/* get all the dma regions from the chunk. */
+		for (i = 0; i < chunk->nmap; i++) {
+			pages = sg_dma_len(&chunk->page_list[i]) >> shift;
+			for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+				/* store the page address in pbe */
+				pbe->pa_lo =
+				    cpu_to_le32(sg_dma_address
+						(&chunk->page_list[i]) +
+						(umem->page_size * pg_cnt));
+				pbe->pa_hi =
+				    cpu_to_le32(upper_32_bits
+						((sg_dma_address
+						  (&chunk->page_list[i]) +
+						  umem->page_size * pg_cnt)));
+				pbe_cnt += 1;
+				total_num_pbes += 1;
+				pbe++;
+
+				/* if done building pbes, issue the mbx cmd. */
+				if (total_num_pbes == num_pbes)
+					return;
+
+				/* if the given pbl is full storing the pbes,
+				 * move to next pbl.
+				 */
+				if (pbe_cnt ==
+					(mr->hwmr.pbl_size / sizeof(u64))) {
+					pbl_tbl++;
+					pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+					pbe_cnt = 0;
+				}
+			}
+		}
+	}
+}
+
+struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+				 u64 usr_addr, int acc, struct ib_udata *udata)
+{
+	int status = -ENOMEM;
+	struct ocrdma_dev *dev;
+	struct ocrdma_mr *mr;
+	struct ocrdma_pd *pd;
+	u32 num_pbes;
+
+	pd = get_ocrdma_pd(ibpd);
+	dev = pd->dev;
+
+	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
+		return ERR_PTR(-EINVAL);
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(status);
+	mr->hwmr.dev = dev;
+	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+	if (IS_ERR(mr->umem)) {
+		status = -EFAULT;
+		goto umem_err;
+	}
+	num_pbes = ib_umem_page_count(mr->umem);
+	status = ocrdma_get_pbl_info(mr, num_pbes);
+	if (status)
+		goto umem_err;
+
+	mr->hwmr.pbe_size = mr->umem->page_size;
+	mr->hwmr.fbo = mr->umem->offset;
+	mr->hwmr.va = usr_addr;
+	mr->hwmr.len = len;
+	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+	mr->hwmr.local_rd = 1;
+	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+	if (status)
+		goto umem_err;
+	build_user_pbes(dev, mr, num_pbes);
+	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
+	if (status)
+		goto mbx_err;
+	mr->pd = pd;
+	atomic_inc(&pd->use_cnt);
+	mr->ibmr.lkey = mr->hwmr.lkey;
+	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
+		mr->ibmr.rkey = mr->hwmr.lkey;
+
+	return &mr->ibmr;
+
+mbx_err:
+	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+umem_err:
+	kfree(mr);
+	return ERR_PTR(status);
+}
+
+int ocrdma_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
+	struct ocrdma_dev *dev = mr->hwmr.dev;
+	int status;
+
+	status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
+
+	if (mr->hwmr.fr_mr == 0)
+		ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+
+	atomic_dec(&mr->pd->use_cnt);
+	/* it could be user registered memory. */
+	if (mr->umem)
+		ib_umem_release(mr->umem);
+	kfree(mr);
+	return status;
+}
+
+static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
+				struct ib_ucontext *ib_ctx)
+{
+	int status;
+	struct ocrdma_ucontext *uctx;
+	struct ocrdma_create_cq_uresp uresp;
+
+	uresp.cq_id = cq->id;
+	uresp.page_size = cq->len;
+	uresp.num_pages = 1;
+	uresp.max_hw_cqe = cq->max_hw_cqe;
+	uresp.page_addr[0] = cq->pa;
+	uresp.db_page_addr = cq->dev->nic_info.unmapped_db;
+	uresp.db_page_size = cq->dev->nic_info.db_page_size;
+	uresp.phase_change = cq->phase_change ? 1 : 0;
+	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (status) {
+		ocrdma_err("%s(%d) copy error cqid=0x%x.\n",
+			   __func__, cq->dev->id, cq->id);
+		goto err;
+	}
+	uctx = get_ocrdma_ucontext(ib_ctx);
+	status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
+	if (status)
+		goto err;
+	status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
+	if (status) {
+		ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
+		goto err;
+	}
+	cq->ucontext = uctx;
+err:
+	return status;
+}
+
+struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
+			       struct ib_ucontext *ib_ctx,
+			       struct ib_udata *udata)
+{
+	struct ocrdma_cq *cq;
+	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+	int status;
+	struct ocrdma_create_cq_ureq ureq;
+
+	if (udata) {
+		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+			return ERR_PTR(-EFAULT);
+	} else
+		ureq.dpp_cq = 0;
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&cq->cq_lock);
+	spin_lock_init(&cq->comp_handler_lock);
+	atomic_set(&cq->use_cnt, 0);
+	INIT_LIST_HEAD(&cq->sq_head);
+	INIT_LIST_HEAD(&cq->rq_head);
+	cq->dev = dev;
+
+	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq);
+	if (status) {
+		kfree(cq);
+		return ERR_PTR(status);
+	}
+	if (ib_ctx) {
+		status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx);
+		if (status)
+			goto ctx_err;
+	}
+	cq->phase = OCRDMA_CQE_VALID;
+	cq->arm_needed = true;
+	dev->cq_tbl[cq->id] = cq;
+
+	return &cq->ibcq;
+
+ctx_err:
+	ocrdma_mbx_destroy_cq(dev, cq);
+	kfree(cq);
+	return ERR_PTR(status);
+}
+
+int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
+		     struct ib_udata *udata)
+{
+	int status = 0;
+	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+
+	if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
+		status = -EINVAL;
+		return status;
+	}
+	ibcq->cqe = new_cnt;
+	return status;
+}
+
+int ocrdma_destroy_cq(struct ib_cq *ibcq)
+{
+	int status;
+	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+	struct ocrdma_dev *dev = cq->dev;
+
+	if (atomic_read(&cq->use_cnt))
+		return -EINVAL;
+
+	status = ocrdma_mbx_destroy_cq(dev, cq);
+
+	if (cq->ucontext) {
+		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len);
+		ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db,
+				dev->nic_info.db_page_size);
+	}
+	dev->cq_tbl[cq->id] = NULL;
+
+	kfree(cq);
+	return status;
+}
+
+static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
+{
+	int status = -EINVAL;
+
+	if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
+		dev->qp_tbl[qp->id] = qp;
+		status = 0;
+	}
+	return status;
+}
+
+static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
+{
+	dev->qp_tbl[qp->id] = NULL;
+}
+
+static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
+				  struct ib_qp_init_attr *attrs)
+{
+	if (attrs->qp_type != IB_QPT_GSI &&
+	    attrs->qp_type != IB_QPT_RC &&
+	    attrs->qp_type != IB_QPT_UD) {
+		ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n",
+			   __func__, dev->id, attrs->qp_type);
+		return -EINVAL;
+	}
+	if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
+		ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n",
+			   __func__, dev->id, attrs->cap.max_send_wr);
+		ocrdma_err("%s(%d) supported send_wr=0x%x\n",
+			   __func__, dev->id, dev->attr.max_wqe);
+		return -EINVAL;
+	}
+	if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
+		ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n",
+			   __func__, dev->id, attrs->cap.max_recv_wr);
+		ocrdma_err("%s(%d) supported recv_wr=0x%x\n",
+			   __func__, dev->id, dev->attr.max_rqe);
+		return -EINVAL;
+	}
+	if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
+		ocrdma_err("%s(%d) unsupported inline data size=0x%x"
+			   " requested\n", __func__, dev->id,
+			   attrs->cap.max_inline_data);
+		ocrdma_err("%s(%d) supported inline data size=0x%x\n",
+			   __func__, dev->id, dev->attr.max_inline_data);
+		return -EINVAL;
+	}
+	if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
+		ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n",
+			   __func__, dev->id, attrs->cap.max_send_sge);
+		ocrdma_err("%s(%d) supported send_sge=0x%x\n",
+			   __func__, dev->id, dev->attr.max_send_sge);
+		return -EINVAL;
+	}
+	if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
+		ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n",
+			   __func__, dev->id, attrs->cap.max_recv_sge);
+		ocrdma_err("%s(%d) supported recv_sge=0x%x\n",
+			   __func__, dev->id, dev->attr.max_recv_sge);
+		return -EINVAL;
+	}
+	/* unprivileged user space cannot create special QP */
+	if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+		ocrdma_err
+		    ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
+		     __func__, dev->id, attrs->qp_type);
+		return -EINVAL;
+	}
+	/* allow creating only one GSI type of QP */
+	if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
+		ocrdma_err("%s(%d) GSI special QPs already created.\n",
+			   __func__, dev->id);
+		return -EINVAL;
+	}
+	/* verify consumer QPs are not trying to use GSI QP's CQ */
+	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
+		if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
+		    (dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq))) {
+			ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
+				   __func__, dev->id);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
+				struct ib_udata *udata, int dpp_offset,
+				int dpp_credit_lmt, int srq)
+{
+	int status = 0;
+	u64 usr_db;
+	struct ocrdma_create_qp_uresp uresp;
+	struct ocrdma_dev *dev = qp->dev;
+	struct ocrdma_pd *pd = qp->pd;
+
+	memset(&uresp, 0, sizeof(uresp));
+	usr_db = dev->nic_info.unmapped_db +
+			(pd->id * dev->nic_info.db_page_size);
+	uresp.qp_id = qp->id;
+	uresp.sq_dbid = qp->sq.dbid;
+	uresp.num_sq_pages = 1;
+	uresp.sq_page_size = qp->sq.len;
+	uresp.sq_page_addr[0] = qp->sq.pa;
+	uresp.num_wqe_allocated = qp->sq.max_cnt;
+	if (!srq) {
+		uresp.rq_dbid = qp->rq.dbid;
+		uresp.num_rq_pages = 1;
+		uresp.rq_page_size = qp->rq.len;
+		uresp.rq_page_addr[0] = qp->rq.pa;
+		uresp.num_rqe_allocated = qp->rq.max_cnt;
+	}
+	uresp.db_page_addr = usr_db;
+	uresp.db_page_size = dev->nic_info.db_page_size;
+	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
+		uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ?
+			OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET;
+		uresp.db_shift = (qp->id < 128) ? 24 : 16;
+	} else {
+		uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
+		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
+		uresp.db_shift = 16;
+	}
+	uresp.free_wqe_delta = qp->sq.free_delta;
+	uresp.free_rqe_delta = qp->rq.free_delta;
+
+	if (qp->dpp_enabled) {
+		uresp.dpp_credit = dpp_credit_lmt;
+		uresp.dpp_offset = dpp_offset;
+	}
+	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (status) {
+		ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id);
+		goto err;
+	}
+	status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
+				 uresp.sq_page_size);
+	if (status)
+		goto err;
+
+	if (!srq) {
+		status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
+					 uresp.rq_page_size);
+		if (status)
+			goto rq_map_err;
+	}
+	return status;
+rq_map_err:
+	ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
+err:
+	return status;
+}
+
+static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
+			     struct ocrdma_pd *pd)
+{
+	if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		qp->sq_db = dev->nic_info.db +
+			(pd->id * dev->nic_info.db_page_size) +
+			OCRDMA_DB_GEN2_SQ_OFFSET;
+		qp->rq_db = dev->nic_info.db +
+			(pd->id * dev->nic_info.db_page_size) +
+			((qp->id < 128) ?
+			OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET);
+	} else {
+		qp->sq_db = dev->nic_info.db +
+			(pd->id * dev->nic_info.db_page_size) +
+			OCRDMA_DB_SQ_OFFSET;
+		qp->rq_db = dev->nic_info.db +
+			(pd->id * dev->nic_info.db_page_size) +
+			OCRDMA_DB_RQ_OFFSET;
+	}
+}
+
+static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
+{
+	qp->wqe_wr_id_tbl =
+	    kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt,
+		    GFP_KERNEL);
+	if (qp->wqe_wr_id_tbl == NULL)
+		return -ENOMEM;
+	qp->rqe_wr_id_tbl =
+	    kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL);
+	if (qp->rqe_wr_id_tbl == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
+				      struct ocrdma_pd *pd,
+				      struct ib_qp_init_attr *attrs)
+{
+	qp->pd = pd;
+	spin_lock_init(&qp->q_lock);
+	INIT_LIST_HEAD(&qp->sq_entry);
+	INIT_LIST_HEAD(&qp->rq_entry);
+
+	qp->qp_type = attrs->qp_type;
+	qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
+	qp->max_inline_data = attrs->cap.max_inline_data;
+	qp->sq.max_sges = attrs->cap.max_send_sge;
+	qp->rq.max_sges = attrs->cap.max_recv_sge;
+	qp->state = OCRDMA_QPS_RST;
+}
+
+static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd)
+{
+	atomic_inc(&pd->use_cnt);
+	atomic_inc(&qp->sq_cq->use_cnt);
+	atomic_inc(&qp->rq_cq->use_cnt);
+	if (qp->srq)
+		atomic_inc(&qp->srq->use_cnt);
+	qp->ibqp.qp_num = qp->id;
+}
+
+static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
+				   struct ib_qp_init_attr *attrs)
+{
+	if (attrs->qp_type == IB_QPT_GSI) {
+		dev->gsi_qp_created = 1;
+		dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
+		dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
+	}
+}
+
+struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
+			       struct ib_qp_init_attr *attrs,
+			       struct ib_udata *udata)
+{
+	int status;
+	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+	struct ocrdma_qp *qp;
+	struct ocrdma_dev *dev = pd->dev;
+	struct ocrdma_create_qp_ureq ureq;
+	u16 dpp_credit_lmt, dpp_offset;
+
+	status = ocrdma_check_qp_params(ibpd, dev, attrs);
+	if (status)
+		goto gen_err;
+
+	memset(&ureq, 0, sizeof(ureq));
+	if (udata) {
+		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+			return ERR_PTR(-EFAULT);
+	}
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp) {
+		status = -ENOMEM;
+		goto gen_err;
+	}
+	qp->dev = dev;
+	ocrdma_set_qp_init_params(qp, pd, attrs);
+
+	mutex_lock(&dev->dev_lock);
+	status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
+					ureq.dpp_cq_id,
+					&dpp_offset, &dpp_credit_lmt);
+	if (status)
+		goto mbx_err;
+
+	/* user space QP's wr_id table are managed in library */
+	if (udata == NULL) {
+		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
+				  OCRDMA_QP_FAST_REG);
+		status = ocrdma_alloc_wr_id_tbl(qp);
+		if (status)
+			goto map_err;
+	}
+
+	status = ocrdma_add_qpn_map(dev, qp);
+	if (status)
+		goto map_err;
+	ocrdma_set_qp_db(dev, qp, pd);
+	if (udata) {
+		status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
+					      dpp_credit_lmt,
+					      (attrs->srq != NULL));
+		if (status)
+			goto cpy_err;
+	}
+	ocrdma_store_gsi_qp_cq(dev, attrs);
+	ocrdma_set_qp_use_cnt(qp, pd);
+	mutex_unlock(&dev->dev_lock);
+	return &qp->ibqp;
+
+cpy_err:
+	ocrdma_del_qpn_map(dev, qp);
+map_err:
+	ocrdma_mbx_destroy_qp(dev, qp);
+mbx_err:
+	mutex_unlock(&dev->dev_lock);
+	kfree(qp->wqe_wr_id_tbl);
+	kfree(qp->rqe_wr_id_tbl);
+	kfree(qp);
+	ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status);
+gen_err:
+	return ERR_PTR(status);
+}
+
+int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		      int attr_mask)
+{
+	int status = 0;
+	struct ocrdma_qp *qp;
+	struct ocrdma_dev *dev;
+	enum ib_qp_state old_qps;
+
+	qp = get_ocrdma_qp(ibqp);
+	dev = qp->dev;
+	if (attr_mask & IB_QP_STATE)
+		status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps);
+	/* if new and previous states are same hw doesn't need to
+	 * know about it.
+	 */
+	if (status < 0)
+		return status;
+	status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
+	return status;
+}
+
+int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		     int attr_mask, struct ib_udata *udata)
+{
+	unsigned long flags;
+	int status = -EINVAL;
+	struct ocrdma_qp *qp;
+	struct ocrdma_dev *dev;
+	enum ib_qp_state old_qps, new_qps;
+
+	qp = get_ocrdma_qp(ibqp);
+	dev = qp->dev;
+
+	/* syncronize with multiple context trying to change, retrive qps */
+	mutex_lock(&dev->dev_lock);
+	/* syncronize with wqe, rqe posting and cqe processing contexts */
+	spin_lock_irqsave(&qp->q_lock, flags);
+	old_qps = get_ibqp_state(qp->state);
+	if (attr_mask & IB_QP_STATE)
+		new_qps = attr->qp_state;
+	else
+		new_qps = old_qps;
+	spin_unlock_irqrestore(&qp->q_lock, flags);
+
+	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
+		ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for "
+			   "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
+			   __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
+			   old_qps, new_qps);
+		goto param_err;
+	}
+
+	status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
+	if (status > 0)
+		status = 0;
+param_err:
+	mutex_unlock(&dev->dev_lock);
+	return status;
+}
+
+static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
+{
+	switch (mtu) {
+	case 256:
+		return IB_MTU_256;
+	case 512:
+		return IB_MTU_512;
+	case 1024:
+		return IB_MTU_1024;
+	case 2048:
+		return IB_MTU_2048;
+	case 4096:
+		return IB_MTU_4096;
+	default:
+		return IB_MTU_1024;
+	}
+}
+
+static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
+{
+	int ib_qp_acc_flags = 0;
+
+	if (qp_cap_flags & OCRDMA_QP_INB_WR)
+		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
+	if (qp_cap_flags & OCRDMA_QP_INB_RD)
+		ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
+	return ib_qp_acc_flags;
+}
+
+int ocrdma_query_qp(struct ib_qp *ibqp,
+		    struct ib_qp_attr *qp_attr,
+		    int attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+	int status;
+	u32 qp_state;
+	struct ocrdma_qp_params params;
+	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+	struct ocrdma_dev *dev = qp->dev;
+
+	memset(&params, 0, sizeof(params));
+	mutex_lock(&dev->dev_lock);
+	status = ocrdma_mbx_query_qp(dev, qp, &params);
+	mutex_unlock(&dev->dev_lock);
+	if (status)
+		goto mbx_err;
+	qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
+	qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
+	qp_attr->path_mtu =
+		ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
+				OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
+				OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
+	qp_attr->path_mig_state = IB_MIG_MIGRATED;
+	qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
+	qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
+	qp_attr->dest_qp_num =
+	    params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
+
+	qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
+	qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
+	qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
+	qp_attr->cap.max_send_sge = qp->sq.max_sges;
+	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
+	qp_attr->cap.max_inline_data = dev->attr.max_inline_data;
+	qp_init_attr->cap = qp_attr->cap;
+	memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
+	       sizeof(params.dgid));
+	qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
+	    OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
+	qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
+	qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
+					  OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
+						OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
+	qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
+					      OCRDMA_QP_PARAMS_SQ_PSN_MASK) >>
+						OCRDMA_QP_PARAMS_TCLASS_SHIFT;
+
+	qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+	qp_attr->ah_attr.port_num = 1;
+	qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
+			       OCRDMA_QP_PARAMS_SL_MASK) >>
+				OCRDMA_QP_PARAMS_SL_SHIFT;
+	qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
+			    OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
+				OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
+	qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
+			      OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
+				OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
+	qp_attr->retry_cnt =
+	    (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
+		OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
+	qp_attr->min_rnr_timer = 0;
+	qp_attr->pkey_index = 0;
+	qp_attr->port_num = 1;
+	qp_attr->ah_attr.src_path_bits = 0;
+	qp_attr->ah_attr.static_rate = 0;
+	qp_attr->alt_pkey_index = 0;
+	qp_attr->alt_port_num = 0;
+	qp_attr->alt_timeout = 0;
+	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
+	qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
+		    OCRDMA_QP_PARAMS_STATE_SHIFT;
+	qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
+	qp_attr->max_dest_rd_atomic =
+	    params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
+	qp_attr->max_rd_atomic =
+	    params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
+	qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
+				OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
+mbx_err:
+	return status;
+}
+
+static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx)
+{
+	int i = idx / 32;
+	unsigned int mask = (1 << (idx % 32));
+
+	if (srq->idx_bit_fields[i] & mask)
+		srq->idx_bit_fields[i] &= ~mask;
+	else
+		srq->idx_bit_fields[i] |= mask;
+}
+
+static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
+{
+	int free_cnt;
+	if (q->head >= q->tail)
+		free_cnt = (q->max_cnt - q->head) + q->tail;
+	else
+		free_cnt = q->tail - q->head;
+	if (q->free_delta)
+		free_cnt -= q->free_delta;
+	return free_cnt;
+}
+
+static int is_hw_sq_empty(struct ocrdma_qp *qp)
+{
+	return (qp->sq.tail == qp->sq.head &&
+		ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0);
+}
+
+static int is_hw_rq_empty(struct ocrdma_qp *qp)
+{
+	return (qp->rq.tail == qp->rq.head) ? 1 : 0;
+}
+
+static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
+{
+	return q->va + (q->head * q->entry_size);
+}
+
+static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
+				      u32 idx)
+{
+	return q->va + (idx * q->entry_size);
+}
+
+static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
+{
+	q->head = (q->head + 1) & q->max_wqe_idx;
+}
+
+static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
+{
+	q->tail = (q->tail + 1) & q->max_wqe_idx;
+}
+
+/* discard the cqe for a given QP */
+static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
+{
+	unsigned long cq_flags;
+	unsigned long flags;
+	int discard_cnt = 0;
+	u32 cur_getp, stop_getp;
+	struct ocrdma_cqe *cqe;
+	u32 qpn = 0;
+
+	spin_lock_irqsave(&cq->cq_lock, cq_flags);
+
+	/* traverse through the CQEs in the hw CQ,
+	 * find the matching CQE for a given qp,
+	 * mark the matching one discarded by clearing qpn.
+	 * ring the doorbell in the poll_cq() as
+	 * we don't complete out of order cqe.
+	 */
+
+	cur_getp = cq->getp;
+	/* find upto when do we reap the cq. */
+	stop_getp = cur_getp;
+	do {
+		if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
+			break;
+
+		cqe = cq->va + cur_getp;
+		/* if (a) done reaping whole hw cq, or
+		 *    (b) qp_xq becomes empty.
+		 * then exit
+		 */
+		qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
+		/* if previously discarded cqe found, skip that too. */
+		/* check for matching qp */
+		if (qpn == 0 || qpn != qp->id)
+			goto skip_cqe;
+
+		/* mark cqe discarded so that it is not picked up later
+		 * in the poll_cq().
+		 */
+		discard_cnt += 1;
+		cqe->cmn.qpn = 0;
+		if (is_cqe_for_sq(cqe))
+			ocrdma_hwq_inc_tail(&qp->sq);
+		else {
+			if (qp->srq) {
+				spin_lock_irqsave(&qp->srq->q_lock, flags);
+				ocrdma_hwq_inc_tail(&qp->srq->rq);
+				ocrdma_srq_toggle_bit(qp->srq, cur_getp);
+				spin_unlock_irqrestore(&qp->srq->q_lock, flags);
+
+			} else
+				ocrdma_hwq_inc_tail(&qp->rq);
+		}
+skip_cqe:
+		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
+	} while (cur_getp != stop_getp);
+	spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
+}
+
+static void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
+{
+	int found = false;
+	unsigned long flags;
+	struct ocrdma_dev *dev = qp->dev;
+	/* sync with any active CQ poll */
+
+	spin_lock_irqsave(&dev->flush_q_lock, flags);
+	found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
+	if (found)
+		list_del(&qp->sq_entry);
+	if (!qp->srq) {
+		found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
+		if (found)
+			list_del(&qp->rq_entry);
+	}
+	spin_unlock_irqrestore(&dev->flush_q_lock, flags);
+}
+
+int ocrdma_destroy_qp(struct ib_qp *ibqp)
+{
+	int status;
+	struct ocrdma_pd *pd;
+	struct ocrdma_qp *qp;
+	struct ocrdma_dev *dev;
+	struct ib_qp_attr attrs;
+	int attr_mask = IB_QP_STATE;
+	unsigned long flags;
+
+	qp = get_ocrdma_qp(ibqp);
+	dev = qp->dev;
+
+	attrs.qp_state = IB_QPS_ERR;
+	pd = qp->pd;
+
+	/* change the QP state to ERROR */
+	_ocrdma_modify_qp(ibqp, &attrs, attr_mask);
+
+	/* ensure that CQEs for newly created QP (whose id may be same with
+	 * one which just getting destroyed are same), dont get
+	 * discarded until the old CQEs are discarded.
+	 */
+	mutex_lock(&dev->dev_lock);
+	status = ocrdma_mbx_destroy_qp(dev, qp);
+
+	/*
+	 * acquire CQ lock while destroy is in progress, in order to
+	 * protect against proessing in-flight CQEs for this QP.
+	 */
+	spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
+	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+		spin_lock(&qp->rq_cq->cq_lock);
+
+	ocrdma_del_qpn_map(dev, qp);
+
+	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+		spin_unlock(&qp->rq_cq->cq_lock);
+	spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
+
+	if (!pd->uctx) {
+		ocrdma_discard_cqes(qp, qp->sq_cq);
+		ocrdma_discard_cqes(qp, qp->rq_cq);
+	}
+	mutex_unlock(&dev->dev_lock);
+
+	if (pd->uctx) {
+		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len);
+		if (!qp->srq)
+			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len);
+	}
+
+	ocrdma_del_flush_qp(qp);
+
+	atomic_dec(&qp->pd->use_cnt);
+	atomic_dec(&qp->sq_cq->use_cnt);
+	atomic_dec(&qp->rq_cq->use_cnt);
+	if (qp->srq)
+		atomic_dec(&qp->srq->use_cnt);
+	kfree(qp->wqe_wr_id_tbl);
+	kfree(qp->rqe_wr_id_tbl);
+	kfree(qp);
+	return status;
+}
+
+static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
+{
+	int status;
+	struct ocrdma_create_srq_uresp uresp;
+
+	uresp.rq_dbid = srq->rq.dbid;
+	uresp.num_rq_pages = 1;
+	uresp.rq_page_addr[0] = srq->rq.pa;
+	uresp.rq_page_size = srq->rq.len;
+	uresp.db_page_addr = srq->dev->nic_info.unmapped_db +
+	    (srq->pd->id * srq->dev->nic_info.db_page_size);
+	uresp.db_page_size = srq->dev->nic_info.db_page_size;
+	uresp.num_rqe_allocated = srq->rq.max_cnt;
+	uresp.free_rqe_delta = 1;
+	if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+		uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
+		uresp.db_shift = 24;
+	} else {
+		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
+		uresp.db_shift = 16;
+	}
+
+	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (status)
+		return status;
+	status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
+				 uresp.rq_page_size);
+	if (status)
+		return status;
+	return status;
+}
+
+struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
+				 struct ib_srq_init_attr *init_attr,
+				 struct ib_udata *udata)
+{
+	int status = -ENOMEM;
+	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+	struct ocrdma_dev *dev = pd->dev;
+	struct ocrdma_srq *srq;
+
+	if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
+		return ERR_PTR(-EINVAL);
+	if (init_attr->attr.max_wr > dev->attr.max_rqe)
+		return ERR_PTR(-EINVAL);
+
+	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+	if (!srq)
+		return ERR_PTR(status);
+
+	spin_lock_init(&srq->q_lock);
+	srq->dev = dev;
+	srq->pd = pd;
+	srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
+	status = ocrdma_mbx_create_srq(srq, init_attr, pd);
+	if (status)
+		goto err;
+
+	if (udata == NULL) {
+		srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
+			    GFP_KERNEL);
+		if (srq->rqe_wr_id_tbl == NULL)
+			goto arm_err;
+
+		srq->bit_fields_len = (srq->rq.max_cnt / 32) +
+		    (srq->rq.max_cnt % 32 ? 1 : 0);
+		srq->idx_bit_fields =
+		    kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL);
+		if (srq->idx_bit_fields == NULL)
+			goto arm_err;
+		memset(srq->idx_bit_fields, 0xff,
+		       srq->bit_fields_len * sizeof(u32));
+	}
+
+	if (init_attr->attr.srq_limit) {
+		status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
+		if (status)
+			goto arm_err;
+	}
+
+	atomic_set(&srq->use_cnt, 0);
+	if (udata) {
+		status = ocrdma_copy_srq_uresp(srq, udata);
+		if (status)
+			goto arm_err;
+	}
+
+	atomic_inc(&pd->use_cnt);
+	return &srq->ibsrq;
+
+arm_err:
+	ocrdma_mbx_destroy_srq(dev, srq);
+err:
+	kfree(srq->rqe_wr_id_tbl);
+	kfree(srq->idx_bit_fields);
+	kfree(srq);
+	return ERR_PTR(status);
+}
+
+int ocrdma_modify_srq(struct ib_srq *ibsrq,
+		      struct ib_srq_attr *srq_attr,
+		      enum ib_srq_attr_mask srq_attr_mask,
+		      struct ib_udata *udata)
+{
+	int status = 0;
+	struct ocrdma_srq *srq;
+
+	srq = get_ocrdma_srq(ibsrq);
+	if (srq_attr_mask & IB_SRQ_MAX_WR)
+		status = -EINVAL;
+	else
+		status = ocrdma_mbx_modify_srq(srq, srq_attr);
+	return status;
+}
+
+int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+	int status;
+	struct ocrdma_srq *srq;
+
+	srq = get_ocrdma_srq(ibsrq);
+	status = ocrdma_mbx_query_srq(srq, srq_attr);
+	return status;
+}
+
+int ocrdma_destroy_srq(struct ib_srq *ibsrq)
+{
+	int status;
+	struct ocrdma_srq *srq;
+	struct ocrdma_dev *dev;
+
+	srq = get_ocrdma_srq(ibsrq);
+	dev = srq->dev;
+	if (atomic_read(&srq->use_cnt)) {
+		ocrdma_err("%s(%d) err, srq=0x%x in use\n",
+			   __func__, dev->id, srq->id);
+		return -EAGAIN;
+	}
+
+	status = ocrdma_mbx_destroy_srq(dev, srq);
+
+	if (srq->pd->uctx)
+		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
+
+	atomic_dec(&srq->pd->use_cnt);
+	kfree(srq->idx_bit_fields);
+	kfree(srq->rqe_wr_id_tbl);
+	kfree(srq);
+	return status;
+}
+
+/* unprivileged verbs and their support functions. */
+static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
+				struct ocrdma_hdr_wqe *hdr,
+				struct ib_send_wr *wr)
+{
+	struct ocrdma_ewqe_ud_hdr *ud_hdr =
+		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
+	struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
+
+	ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
+	if (qp->qp_type == IB_QPT_GSI)
+		ud_hdr->qkey = qp->qkey;
+	else
+		ud_hdr->qkey = wr->wr.ud.remote_qkey;
+	ud_hdr->rsvd_ahid = ah->id;
+}
+
+static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
+			      struct ocrdma_sge *sge, int num_sge,
+			      struct ib_sge *sg_list)
+{
+	int i;
+
+	for (i = 0; i < num_sge; i++) {
+		sge[i].lrkey = sg_list[i].lkey;
+		sge[i].addr_lo = sg_list[i].addr;
+		sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
+		sge[i].len = sg_list[i].length;
+		hdr->total_len += sg_list[i].length;
+	}
+	if (num_sge == 0)
+		memset(sge, 0, sizeof(*sge));
+}
+
+static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
+				    struct ocrdma_hdr_wqe *hdr,
+				    struct ocrdma_sge *sge,
+				    struct ib_send_wr *wr, u32 wqe_size)
+{
+	if (wr->send_flags & IB_SEND_INLINE) {
+		if (wr->sg_list[0].length > qp->max_inline_data) {
+			ocrdma_err("%s() supported_len=0x%x,"
+				" unspported len req=0x%x\n", __func__,
+				qp->max_inline_data, wr->sg_list[0].length);
+			return -EINVAL;
+		}
+		memcpy(sge,
+		       (void *)(unsigned long)wr->sg_list[0].addr,
+		       wr->sg_list[0].length);
+		hdr->total_len = wr->sg_list[0].length;
+		wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
+		hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
+	} else {
+		ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
+		if (wr->num_sge)
+			wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
+		else
+			wqe_size += sizeof(struct ocrdma_sge);
+		hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+	}
+	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+	return 0;
+}
+
+static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+			     struct ib_send_wr *wr)
+{
+	int status;
+	struct ocrdma_sge *sge;
+	u32 wqe_size = sizeof(*hdr);
+
+	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
+		ocrdma_build_ud_hdr(qp, hdr, wr);
+		sge = (struct ocrdma_sge *)(hdr + 2);
+		wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
+	} else
+		sge = (struct ocrdma_sge *)(hdr + 1);
+
+	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
+	return status;
+}
+
+static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+			      struct ib_send_wr *wr)
+{
+	int status;
+	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
+	struct ocrdma_sge *sge = ext_rw + 1;
+	u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
+
+	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
+	if (status)
+		return status;
+	ext_rw->addr_lo = wr->wr.rdma.remote_addr;
+	ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
+	ext_rw->lrkey = wr->wr.rdma.rkey;
+	ext_rw->len = hdr->total_len;
+	return 0;
+}
+
+static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+			      struct ib_send_wr *wr)
+{
+	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
+	struct ocrdma_sge *sge = ext_rw + 1;
+	u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
+	    sizeof(struct ocrdma_hdr_wqe);
+
+	ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
+	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+	hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
+	hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+
+	ext_rw->addr_lo = wr->wr.rdma.remote_addr;
+	ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
+	ext_rw->lrkey = wr->wr.rdma.rkey;
+	ext_rw->len = hdr->total_len;
+}
+
+static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
+{
+	u32 val = qp->sq.dbid | (1 << 16);
+
+	iowrite32(val, qp->sq_db);
+}
+
+int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		     struct ib_send_wr **bad_wr)
+{
+	int status = 0;
+	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+	struct ocrdma_hdr_wqe *hdr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->q_lock, flags);
+	if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
+		spin_unlock_irqrestore(&qp->q_lock, flags);
+		return -EINVAL;
+	}
+
+	while (wr) {
+		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
+		    wr->num_sge > qp->sq.max_sges) {
+			status = -ENOMEM;
+			break;
+		}
+		hdr = ocrdma_hwq_head(&qp->sq);
+		hdr->cw = 0;
+		if (wr->send_flags & IB_SEND_SIGNALED)
+			hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
+		if (wr->send_flags & IB_SEND_FENCE)
+			hdr->cw |=
+			    (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
+		if (wr->send_flags & IB_SEND_SOLICITED)
+			hdr->cw |=
+			    (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
+		hdr->total_len = 0;
+		switch (wr->opcode) {
+		case IB_WR_SEND_WITH_IMM:
+			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
+			hdr->immdt = ntohl(wr->ex.imm_data);
+		case IB_WR_SEND:
+			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
+			ocrdma_build_send(qp, hdr, wr);
+			break;
+		case IB_WR_SEND_WITH_INV:
+			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
+			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
+			hdr->lkey = wr->ex.invalidate_rkey;
+			status = ocrdma_build_send(qp, hdr, wr);
+			break;
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
+			hdr->immdt = ntohl(wr->ex.imm_data);
+		case IB_WR_RDMA_WRITE:
+			hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
+			status = ocrdma_build_write(qp, hdr, wr);
+			break;
+		case IB_WR_RDMA_READ_WITH_INV:
+			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
+		case IB_WR_RDMA_READ:
+			ocrdma_build_read(qp, hdr, wr);
+			break;
+		case IB_WR_LOCAL_INV:
+			hdr->cw |=
+			    (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
+			hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) /
+				OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
+			hdr->lkey = wr->ex.invalidate_rkey;
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		}
+		if (status) {
+			*bad_wr = wr;
+			break;
+		}
+		if (wr->send_flags & IB_SEND_SIGNALED)
+			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
+		else
+			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
+		qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
+		ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
+				   OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
+		/* make sure wqe is written before adapter can access it */
+		wmb();
+		/* inform hw to start processing it */
+		ocrdma_ring_sq_db(qp);
+
+		/* update pointer, counter for next wr */
+		ocrdma_hwq_inc_head(&qp->sq);
+		wr = wr->next;
+	}
+	spin_unlock_irqrestore(&qp->q_lock, flags);
+	return status;
+}
+
+static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
+{
+	u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp));
+
+	iowrite32(val, qp->rq_db);
+}
+
+static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
+			     u16 tag)
+{
+	u32 wqe_size = 0;
+	struct ocrdma_sge *sge;
+	if (wr->num_sge)
+		wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
+	else
+		wqe_size = sizeof(*sge) + sizeof(*rqe);
+
+	rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
+				OCRDMA_WQE_SIZE_SHIFT);
+	rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
+	rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+	rqe->total_len = 0;
+	rqe->rsvd_tag = tag;
+	sge = (struct ocrdma_sge *)(rqe + 1);
+	ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
+	ocrdma_cpu_to_le32(rqe, wqe_size);
+}
+
+int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		     struct ib_recv_wr **bad_wr)
+{
+	int status = 0;
+	unsigned long flags;
+	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+	struct ocrdma_hdr_wqe *rqe;
+
+	spin_lock_irqsave(&qp->q_lock, flags);
+	if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
+		spin_unlock_irqrestore(&qp->q_lock, flags);
+		*bad_wr = wr;
+		return -EINVAL;
+	}
+	while (wr) {
+		if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
+		    wr->num_sge > qp->rq.max_sges) {
+			*bad_wr = wr;
+			status = -ENOMEM;
+			break;
+		}
+		rqe = ocrdma_hwq_head(&qp->rq);
+		ocrdma_build_rqe(rqe, wr, 0);
+
+		qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
+		/* make sure rqe is written before adapter can access it */
+		wmb();
+
+		/* inform hw to start processing it */
+		ocrdma_ring_rq_db(qp);
+
+		/* update pointer, counter for next wr */
+		ocrdma_hwq_inc_head(&qp->rq);
+		wr = wr->next;
+	}
+	spin_unlock_irqrestore(&qp->q_lock, flags);
+	return status;
+}
+
+/* cqe for srq's rqe can potentially arrive out of order.
+ * index gives the entry in the shadow table where to store
+ * the wr_id. tag/index is returned in cqe to reference back
+ * for a given rqe.
+ */
+static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
+{
+	int row = 0;
+	int indx = 0;
+
+	for (row = 0; row < srq->bit_fields_len; row++) {
+		if (srq->idx_bit_fields[row]) {
+			indx = ffs(srq->idx_bit_fields[row]);
+			indx = (row * 32) + (indx - 1);
+			if (indx >= srq->rq.max_cnt)
+				BUG();
+			ocrdma_srq_toggle_bit(srq, indx);
+			break;
+		}
+	}
+
+	if (row == srq->bit_fields_len)
+		BUG();
+	return indx;
+}
+
+static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
+{
+	u32 val = srq->rq.dbid | (1 << 16);
+
+	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
+}
+
+int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+			 struct ib_recv_wr **bad_wr)
+{
+	int status = 0;
+	unsigned long flags;
+	struct ocrdma_srq *srq;
+	struct ocrdma_hdr_wqe *rqe;
+	u16 tag;
+
+	srq = get_ocrdma_srq(ibsrq);
+
+	spin_lock_irqsave(&srq->q_lock, flags);
+	while (wr) {
+		if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
+		    wr->num_sge > srq->rq.max_sges) {
+			status = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+		tag = ocrdma_srq_get_idx(srq);
+		rqe = ocrdma_hwq_head(&srq->rq);
+		ocrdma_build_rqe(rqe, wr, tag);
+
+		srq->rqe_wr_id_tbl[tag] = wr->wr_id;
+		/* make sure rqe is written before adapter can perform DMA */
+		wmb();
+		/* inform hw to start processing it */
+		ocrdma_ring_srq_db(srq);
+		/* update pointer, counter for next wr */
+		ocrdma_hwq_inc_head(&srq->rq);
+		wr = wr->next;
+	}
+	spin_unlock_irqrestore(&srq->q_lock, flags);
+	return status;
+}
+
+static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
+{
+	enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR;
+
+	switch (status) {
+	case OCRDMA_CQE_GENERAL_ERR:
+		ibwc_status = IB_WC_GENERAL_ERR;
+		break;
+	case OCRDMA_CQE_LOC_LEN_ERR:
+		ibwc_status = IB_WC_LOC_LEN_ERR;
+		break;
+	case OCRDMA_CQE_LOC_QP_OP_ERR:
+		ibwc_status = IB_WC_LOC_QP_OP_ERR;
+		break;
+	case OCRDMA_CQE_LOC_EEC_OP_ERR:
+		ibwc_status = IB_WC_LOC_EEC_OP_ERR;
+		break;
+	case OCRDMA_CQE_LOC_PROT_ERR:
+		ibwc_status = IB_WC_LOC_PROT_ERR;
+		break;
+	case OCRDMA_CQE_WR_FLUSH_ERR:
+		ibwc_status = IB_WC_WR_FLUSH_ERR;
+		break;
+	case OCRDMA_CQE_MW_BIND_ERR:
+		ibwc_status = IB_WC_MW_BIND_ERR;
+		break;
+	case OCRDMA_CQE_BAD_RESP_ERR:
+		ibwc_status = IB_WC_BAD_RESP_ERR;
+		break;
+	case OCRDMA_CQE_LOC_ACCESS_ERR:
+		ibwc_status = IB_WC_LOC_ACCESS_ERR;
+		break;
+	case OCRDMA_CQE_REM_INV_REQ_ERR:
+		ibwc_status = IB_WC_REM_INV_REQ_ERR;
+		break;
+	case OCRDMA_CQE_REM_ACCESS_ERR:
+		ibwc_status = IB_WC_REM_ACCESS_ERR;
+		break;
+	case OCRDMA_CQE_REM_OP_ERR:
+		ibwc_status = IB_WC_REM_OP_ERR;
+		break;
+	case OCRDMA_CQE_RETRY_EXC_ERR:
+		ibwc_status = IB_WC_RETRY_EXC_ERR;
+		break;
+	case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
+		ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
+		break;
+	case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
+		ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
+		break;
+	case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
+		ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
+		break;
+	case OCRDMA_CQE_REM_ABORT_ERR:
+		ibwc_status = IB_WC_REM_ABORT_ERR;
+		break;
+	case OCRDMA_CQE_INV_EECN_ERR:
+		ibwc_status = IB_WC_INV_EECN_ERR;
+		break;
+	case OCRDMA_CQE_INV_EEC_STATE_ERR:
+		ibwc_status = IB_WC_INV_EEC_STATE_ERR;
+		break;
+	case OCRDMA_CQE_FATAL_ERR:
+		ibwc_status = IB_WC_FATAL_ERR;
+		break;
+	case OCRDMA_CQE_RESP_TIMEOUT_ERR:
+		ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
+		break;
+	default:
+		ibwc_status = IB_WC_GENERAL_ERR;
+		break;
+	};
+	return ibwc_status;
+}
+
+static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
+		      u32 wqe_idx)
+{
+	struct ocrdma_hdr_wqe *hdr;
+	struct ocrdma_sge *rw;
+	int opcode;
+
+	hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
+
+	ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
+	/* Undo the hdr->cw swap */
+	opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
+	switch (opcode) {
+	case OCRDMA_WRITE:
+		ibwc->opcode = IB_WC_RDMA_WRITE;
+		break;
+	case OCRDMA_READ:
+		rw = (struct ocrdma_sge *)(hdr + 1);
+		ibwc->opcode = IB_WC_RDMA_READ;
+		ibwc->byte_len = rw->len;
+		break;
+	case OCRDMA_SEND:
+		ibwc->opcode = IB_WC_SEND;
+		break;
+	case OCRDMA_LKEY_INV:
+		ibwc->opcode = IB_WC_LOCAL_INV;
+		break;
+	default:
+		ibwc->status = IB_WC_GENERAL_ERR;
+		ocrdma_err("%s() invalid opcode received = 0x%x\n",
+			   __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
+		break;
+	};
+}
+
+static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
+						struct ocrdma_cqe *cqe)
+{
+	if (is_cqe_for_sq(cqe)) {
+		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+				cqe->flags_status_srcqpn) &
+					~OCRDMA_CQE_STATUS_MASK);
+		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+				cqe->flags_status_srcqpn) |
+				(OCRDMA_CQE_WR_FLUSH_ERR <<
+					OCRDMA_CQE_STATUS_SHIFT));
+	} else {
+		if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
+			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+					cqe->flags_status_srcqpn) &
+						~OCRDMA_CQE_UD_STATUS_MASK);
+			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+					cqe->flags_status_srcqpn) |
+					(OCRDMA_CQE_WR_FLUSH_ERR <<
+						OCRDMA_CQE_UD_STATUS_SHIFT));
+		} else {
+			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+					cqe->flags_status_srcqpn) &
+						~OCRDMA_CQE_STATUS_MASK);
+			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+					cqe->flags_status_srcqpn) |
+					(OCRDMA_CQE_WR_FLUSH_ERR <<
+						OCRDMA_CQE_STATUS_SHIFT));
+		}
+	}
+}
+
+static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
+				  struct ocrdma_qp *qp, int status)
+{
+	bool expand = false;
+
+	ibwc->byte_len = 0;
+	ibwc->qp = &qp->ibqp;
+	ibwc->status = ocrdma_to_ibwc_err(status);
+
+	ocrdma_flush_qp(qp);
+	ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL);
+
+	/* if wqe/rqe pending for which cqe needs to be returned,
+	 * trigger inflating it.
+	 */
+	if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
+		expand = true;
+		ocrdma_set_cqe_status_flushed(qp, cqe);
+	}
+	return expand;
+}
+
+static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
+				  struct ocrdma_qp *qp, int status)
+{
+	ibwc->opcode = IB_WC_RECV;
+	ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
+	ocrdma_hwq_inc_tail(&qp->rq);
+
+	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
+}
+
+static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
+				  struct ocrdma_qp *qp, int status)
+{
+	ocrdma_update_wc(qp, ibwc, qp->sq.tail);
+	ocrdma_hwq_inc_tail(&qp->sq);
+
+	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
+}
+
+
+static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
+				 struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
+				 bool *polled, bool *stop)
+{
+	bool expand;
+	int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+
+	/* when hw sq is empty, but rq is not empty, so we continue
+	 * to keep the cqe in order to get the cq event again.
+	 */
+	if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
+		/* when cq for rq and sq is same, it is safe to return
+		 * flush cqe for RQEs.
+		 */
+		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
+			*polled = true;
+			status = OCRDMA_CQE_WR_FLUSH_ERR;
+			expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
+		} else {
+			/* stop processing further cqe as this cqe is used for
+			 * triggering cq event on buddy cq of RQ.
+			 * When QP is destroyed, this cqe will be removed
+			 * from the cq's hardware q.
+			 */
+			*polled = false;
+			*stop = true;
+			expand = false;
+		}
+	} else {
+		*polled = true;
+		expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
+	}
+	return expand;
+}
+
+static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
+				     struct ocrdma_cqe *cqe,
+				     struct ib_wc *ibwc, bool *polled)
+{
+	bool expand = false;
+	int tail = qp->sq.tail;
+	u32 wqe_idx;
+
+	if (!qp->wqe_wr_id_tbl[tail].signaled) {
+		expand = true;	/* CQE cannot be consumed yet */
+		*polled = false;    /* WC cannot be consumed yet */
+	} else {
+		ibwc->status = IB_WC_SUCCESS;
+		ibwc->wc_flags = 0;
+		ibwc->qp = &qp->ibqp;
+		ocrdma_update_wc(qp, ibwc, tail);
+		*polled = true;
+		wqe_idx = le32_to_cpu(cqe->wq.wqeidx) &	OCRDMA_CQE_WQEIDX_MASK;
+		if (tail != wqe_idx)
+			expand = true; /* Coalesced CQE can't be consumed yet */
+	}
+	ocrdma_hwq_inc_tail(&qp->sq);
+	return expand;
+}
+
+static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
+			     struct ib_wc *ibwc, bool *polled, bool *stop)
+{
+	int status;
+	bool expand;
+
+	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+
+	if (status == OCRDMA_CQE_SUCCESS)
+		expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
+	else
+		expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
+	return expand;
+}
+
+static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
+{
+	int status;
+
+	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
+	ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
+						OCRDMA_CQE_SRCQP_MASK;
+	ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
+						OCRDMA_CQE_PKEY_MASK;
+	ibwc->wc_flags = IB_WC_GRH;
+	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
+					OCRDMA_CQE_UD_XFER_LEN_SHIFT);
+	return status;
+}
+
+static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
+				       struct ocrdma_cqe *cqe,
+				       struct ocrdma_qp *qp)
+{
+	unsigned long flags;
+	struct ocrdma_srq *srq;
+	u32 wqe_idx;
+
+	srq = get_ocrdma_srq(qp->ibqp.srq);
+	wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT;
+	ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
+	spin_lock_irqsave(&srq->q_lock, flags);
+	ocrdma_srq_toggle_bit(srq, wqe_idx);
+	spin_unlock_irqrestore(&srq->q_lock, flags);
+	ocrdma_hwq_inc_tail(&srq->rq);
+}
+
+static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
+				struct ib_wc *ibwc, bool *polled, bool *stop,
+				int status)
+{
+	bool expand;
+
+	/* when hw_rq is empty, but wq is not empty, so continue
+	 * to keep the cqe to get the cq event again.
+	 */
+	if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
+		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
+			*polled = true;
+			status = OCRDMA_CQE_WR_FLUSH_ERR;
+			expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
+		} else {
+			*polled = false;
+			*stop = true;
+			expand = false;
+		}
+	} else
+		expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
+	return expand;
+}
+
+static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
+				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
+{
+	ibwc->opcode = IB_WC_RECV;
+	ibwc->qp = &qp->ibqp;
+	ibwc->status = IB_WC_SUCCESS;
+
+	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
+		ocrdma_update_ud_rcqe(ibwc, cqe);
+	else
+		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
+
+	if (is_cqe_imm(cqe)) {
+		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
+		ibwc->wc_flags |= IB_WC_WITH_IMM;
+	} else if (is_cqe_wr_imm(cqe)) {
+		ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
+		ibwc->wc_flags |= IB_WC_WITH_IMM;
+	} else if (is_cqe_invalidated(cqe)) {
+		ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
+		ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
+	}
+	if (qp->ibqp.srq)
+		ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
+	else {
+		ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
+		ocrdma_hwq_inc_tail(&qp->rq);
+	}
+}
+
+static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
+			     struct ib_wc *ibwc, bool *polled, bool *stop)
+{
+	int status;
+	bool expand = false;
+
+	ibwc->wc_flags = 0;
+	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
+		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+					OCRDMA_CQE_UD_STATUS_MASK) >>
+					OCRDMA_CQE_UD_STATUS_SHIFT;
+	else
+		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+			     OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+
+	if (status == OCRDMA_CQE_SUCCESS) {
+		*polled = true;
+		ocrdma_poll_success_rcqe(qp, cqe, ibwc);
+	} else {
+		expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
+					      status);
+	}
+	return expand;
+}
+
+static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
+				   u16 cur_getp)
+{
+	if (cq->phase_change) {
+		if (cur_getp == 0)
+			cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
+	} else
+		/* clear valid bit */
+		cqe->flags_status_srcqpn = 0;
+}
+
+static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
+			    struct ib_wc *ibwc)
+{
+	u16 qpn = 0;
+	int i = 0;
+	bool expand = false;
+	int polled_hw_cqes = 0;
+	struct ocrdma_qp *qp = NULL;
+	struct ocrdma_dev *dev = cq->dev;
+	struct ocrdma_cqe *cqe;
+	u16 cur_getp; bool polled = false; bool stop = false;
+
+	cur_getp = cq->getp;
+	while (num_entries) {
+		cqe = cq->va + cur_getp;
+		/* check whether valid cqe or not */
+		if (!is_cqe_valid(cq, cqe))
+			break;
+		qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
+		/* ignore discarded cqe */
+		if (qpn == 0)
+			goto skip_cqe;
+		qp = dev->qp_tbl[qpn];
+		BUG_ON(qp == NULL);
+
+		if (is_cqe_for_sq(cqe)) {
+			expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
+						  &stop);
+		} else {
+			expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
+						  &stop);
+		}
+		if (expand)
+			goto expand_cqe;
+		if (stop)
+			goto stop_cqe;
+		/* clear qpn to avoid duplicate processing by discard_cqe() */
+		cqe->cmn.qpn = 0;
+skip_cqe:
+		polled_hw_cqes += 1;
+		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
+		ocrdma_change_cq_phase(cq, cqe, cur_getp);
+expand_cqe:
+		if (polled) {
+			num_entries -= 1;
+			i += 1;
+			ibwc = ibwc + 1;
+			polled = false;
+		}
+	}
+stop_cqe:
+	cq->getp = cur_getp;
+	if (polled_hw_cqes || expand || stop) {
+		ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
+				  polled_hw_cqes);
+	}
+	return i;
+}
+
+/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
+static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
+			      struct ocrdma_qp *qp, struct ib_wc *ibwc)
+{
+	int err_cqes = 0;
+
+	while (num_entries) {
+		if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
+			break;
+		if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
+			ocrdma_update_wc(qp, ibwc, qp->sq.tail);
+			ocrdma_hwq_inc_tail(&qp->sq);
+		} else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
+			ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
+			ocrdma_hwq_inc_tail(&qp->rq);
+		} else
+			return err_cqes;
+		ibwc->byte_len = 0;
+		ibwc->status = IB_WC_WR_FLUSH_ERR;
+		ibwc = ibwc + 1;
+		err_cqes += 1;
+		num_entries -= 1;
+	}
+	return err_cqes;
+}
+
+int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+	int cqes_to_poll = num_entries;
+	struct ocrdma_cq *cq = NULL;
+	unsigned long flags;
+	struct ocrdma_dev *dev;
+	int num_os_cqe = 0, err_cqes = 0;
+	struct ocrdma_qp *qp;
+
+	cq = get_ocrdma_cq(ibcq);
+	dev = cq->dev;
+
+	/* poll cqes from adapter CQ */
+	spin_lock_irqsave(&cq->cq_lock, flags);
+	num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
+	spin_unlock_irqrestore(&cq->cq_lock, flags);
+	cqes_to_poll -= num_os_cqe;
+
+	if (cqes_to_poll) {
+		wc = wc + num_os_cqe;
+		/* adapter returns single error cqe when qp moves to
+		 * error state. So insert error cqes with wc_status as
+		 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
+		 * respectively which uses this CQ.
+		 */
+		spin_lock_irqsave(&dev->flush_q_lock, flags);
+		list_for_each_entry(qp, &cq->sq_head, sq_entry) {
+			if (cqes_to_poll == 0)
+				break;
+			err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
+			cqes_to_poll -= err_cqes;
+			num_os_cqe += err_cqes;
+			wc = wc + err_cqes;
+		}
+		spin_unlock_irqrestore(&dev->flush_q_lock, flags);
+	}
+	return num_os_cqe;
+}
+
+int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
+{
+	struct ocrdma_cq *cq;
+	unsigned long flags;
+	struct ocrdma_dev *dev;
+	u16 cq_id;
+	u16 cur_getp;
+	struct ocrdma_cqe *cqe;
+
+	cq = get_ocrdma_cq(ibcq);
+	cq_id = cq->id;
+	dev = cq->dev;
+
+	spin_lock_irqsave(&cq->cq_lock, flags);
+	if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
+		cq->armed = true;
+	if (cq_flags & IB_CQ_SOLICITED)
+		cq->solicited = true;
+
+	cur_getp = cq->getp;
+	cqe = cq->va + cur_getp;
+
+	/* check whether any valid cqe exist or not, if not then safe to
+	 * arm. If cqe is not yet consumed, then let it get consumed and then
+	 * we arm it to avoid false interrupts.
+	 */
+	if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
+		cq->arm_needed = false;
+		ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
+	}
+	spin_unlock_irqrestore(&cq->cq_lock, flags);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
new file mode 100644
index 0000000..e648343
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -0,0 +1,94 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for          *
+ * RoCE (RDMA over Converged Ethernet) adapters.                   *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved.            *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_VERBS_H__
+#define __OCRDMA_VERBS_H__
+
+#include <linux/version.h>
+int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
+		     struct ib_send_wr **bad_wr);
+int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,
+		     struct ib_recv_wr **bad_wr);
+
+int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
+int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
+
+int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props);
+int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
+int ocrdma_modify_port(struct ib_device *, u8 port, int mask,
+		       struct ib_port_modify *props);
+
+void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid);
+int ocrdma_query_gid(struct ib_device *, u8 port,
+		     int index, union ib_gid *gid);
+int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
+
+struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *,
+					  struct ib_udata *);
+int ocrdma_dealloc_ucontext(struct ib_ucontext *);
+
+int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+
+struct ib_pd *ocrdma_alloc_pd(struct ib_device *,
+			      struct ib_ucontext *, struct ib_udata *);
+int ocrdma_dealloc_pd(struct ib_pd *pd);
+
+struct ib_cq *ocrdma_create_cq(struct ib_device *, int entries, int vector,
+			       struct ib_ucontext *, struct ib_udata *);
+int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
+int ocrdma_destroy_cq(struct ib_cq *);
+
+struct ib_qp *ocrdma_create_qp(struct ib_pd *,
+			       struct ib_qp_init_attr *attrs,
+			       struct ib_udata *);
+int _ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
+		      int attr_mask);
+int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
+		     int attr_mask, struct ib_udata *udata);
+int ocrdma_query_qp(struct ib_qp *,
+		    struct ib_qp_attr *qp_attr,
+		    int qp_attr_mask, struct ib_qp_init_attr *);
+int ocrdma_destroy_qp(struct ib_qp *);
+
+struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *,
+				 struct ib_udata *);
+int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
+		      enum ib_srq_attr_mask, struct ib_udata *);
+int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
+int ocrdma_destroy_srq(struct ib_srq *);
+int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
+			 struct ib_recv_wr **bad_recv_wr);
+
+int ocrdma_dereg_mr(struct ib_mr *);
+struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
+struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
+				   struct ib_phys_buf *buffer_list,
+				   int num_phys_buf, int acc, u64 *iova_start);
+struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
+				 u64 virt, int acc, struct ib_udata *);
+
+#endif				/* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 6b811e3..7e62f41 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -530,8 +530,6 @@
 	/* qib_lflags driver is waiting for */
 	u32 state_wanted;
 	spinlock_t lflags_lock;
-	/* number of (port-specific) interrupts for this port -- saturates... */
-	u32 int_counter;
 
 	/* ref count for each pkey */
 	atomic_t pkeyrefs[4];
@@ -543,24 +541,26 @@
 	u64 *statusp;
 
 	/* SendDMA related entries */
-	spinlock_t            sdma_lock;
-	struct qib_sdma_state sdma_state;
-	unsigned long         sdma_buf_jiffies;
-	struct qib_sdma_desc *sdma_descq;
-	u64                   sdma_descq_added;
-	u64                   sdma_descq_removed;
-	u16                   sdma_descq_cnt;
-	u16                   sdma_descq_tail;
-	u16                   sdma_descq_head;
-	u16                   sdma_next_intr;
-	u16                   sdma_reset_wait;
-	u8                    sdma_generation;
-	struct tasklet_struct sdma_sw_clean_up_task;
-	struct list_head      sdma_activelist;
 
+	/* read mostly */
+	struct qib_sdma_desc *sdma_descq;
+	struct qib_sdma_state sdma_state;
 	dma_addr_t       sdma_descq_phys;
 	volatile __le64 *sdma_head_dma; /* DMA'ed by chip */
 	dma_addr_t       sdma_head_phys;
+	u16                   sdma_descq_cnt;
+
+	/* read/write using lock */
+	spinlock_t            sdma_lock ____cacheline_aligned_in_smp;
+	struct list_head      sdma_activelist;
+	u64                   sdma_descq_added;
+	u64                   sdma_descq_removed;
+	u16                   sdma_descq_tail;
+	u16                   sdma_descq_head;
+	u8                    sdma_generation;
+
+	struct tasklet_struct sdma_sw_clean_up_task
+		____cacheline_aligned_in_smp;
 
 	wait_queue_head_t state_wait; /* for state_wanted */
 
@@ -873,7 +873,14 @@
 	 * pio_writing.
 	 */
 	spinlock_t pioavail_lock;
-
+	/*
+	 * index of last buffer to optimize search for next
+	 */
+	u32 last_pio;
+	/*
+	 * min kernel pio buffer to optimize search
+	 */
+	u32 min_kernel_pio;
 	/*
 	 * Shadow copies of registers; size indicates read access size.
 	 * Most of them are readonly, but some are write-only register,
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 6fc9365..8895cfe 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -38,6 +38,7 @@
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
+#include <linux/prefetch.h>
 
 #include "qib.h"
 
@@ -481,8 +482,10 @@
 			etail = qib_hdrget_index(rhf_addr);
 			updegr = 1;
 			if (tlen > sizeof(*hdr) ||
-			    etype >= RCVHQ_RCV_TYPE_NON_KD)
+			    etype >= RCVHQ_RCV_TYPE_NON_KD) {
 				ebuf = qib_get_egrbuf(rcd, etail);
+				prefetch_range(ebuf, tlen - sizeof(*hdr));
+			}
 		}
 		if (!eflags) {
 			u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index d0c64d5..4d352b9 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3132,6 +3132,7 @@
 	val = qib_read_kreg64(dd, kr_sendpiobufcnt);
 	dd->piobcnt2k = val & ~0U;
 	dd->piobcnt4k = val >> 32;
+	dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1;
 	/* these may be adjusted in init_chip_wc_pat() */
 	dd->pio2kbase = (u32 __iomem *)
 		(((char __iomem *)dd->kregbase) + dd->pio2k_bufbase);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 3c722f7..86a0ba7 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4157,6 +4157,7 @@
 		dd->cspec->sdmabufcnt;
 	dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
 	dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+	dd->last_pio = dd->cspec->lastbuf_for_pio;
 	dd->pbufsctxt = dd->lastctxt_piobuf /
 		(dd->cfgctxts - dd->first_user_ctxt);
 
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 060b960..c881e74 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6379,6 +6379,7 @@
 		dd->cspec->sdmabufcnt;
 	dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
 	dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+	dd->last_pio = dd->cspec->lastbuf_for_pio;
 	dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ?
 		dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0;
 
@@ -7708,7 +7709,7 @@
 	ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
 	msleep(20);
 	/*       Set Frequency Loop Bandwidth */
-	ibsd_wr_allchans(ppd, 2, (7 << 5), BMASK(8, 5));
+	ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5));
 	/*       Enable Frequency Loop */
 	ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4));
 	/*       Set Timing Loop Bandwidth */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index cf0cd30..dc14e10 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -102,6 +102,8 @@
 		dd->cfgctxts = qib_cfgctxts;
 	else
 		dd->cfgctxts = dd->ctxtcnt;
+	dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
+		dd->cfgctxts - dd->first_user_ctxt;
 }
 
 /*
@@ -402,7 +404,6 @@
 		if (rcd)
 			dd->f_rcvctrl(rcd->ppd, rcvmask, i);
 	}
-	dd->freectxts = dd->cfgctxts - dd->first_user_ctxt;
 }
 
 static void verify_interrupt(unsigned long opaque)
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index c4ff788..4339021 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -396,6 +396,7 @@
 
 static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
 {
+	int valid_mkey = 0;
 	int ret = 0;
 
 	/* Is the mkey in the process of expiring? */
@@ -406,23 +407,36 @@
 		ibp->mkeyprot = 0;
 	}
 
-	/* M_Key checking depends on Portinfo:M_Key_protect_bits */
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && ibp->mkey != 0 &&
-	    ibp->mkey != smp->mkey &&
-	    (smp->method == IB_MGMT_METHOD_SET ||
-	     smp->method == IB_MGMT_METHOD_TRAP_REPRESS ||
-	     (smp->method == IB_MGMT_METHOD_GET && ibp->mkeyprot >= 2))) {
-		if (ibp->mkey_violations != 0xFFFF)
-			++ibp->mkey_violations;
-		if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-			ibp->mkey_lease_timeout = jiffies +
-				ibp->mkey_lease_period * HZ;
-		/* Generate a trap notice. */
-		qib_bad_mkey(ibp, smp);
-		ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-	} else if (ibp->mkey_lease_timeout)
+	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
+	    ibp->mkey == smp->mkey)
+		valid_mkey = 1;
+
+	/* Unset lease timeout on any valid Get/Set/TrapRepress */
+	if (valid_mkey && ibp->mkey_lease_timeout &&
+	    (smp->method == IB_MGMT_METHOD_GET ||
+	     smp->method == IB_MGMT_METHOD_SET ||
+	     smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
 		ibp->mkey_lease_timeout = 0;
 
+	if (!valid_mkey) {
+		switch (smp->method) {
+		case IB_MGMT_METHOD_GET:
+			/* Bad mkey not a violation below level 2 */
+			if (ibp->mkeyprot < 2)
+				break;
+		case IB_MGMT_METHOD_SET:
+		case IB_MGMT_METHOD_TRAP_REPRESS:
+			if (ibp->mkey_violations != 0xFFFF)
+				++ibp->mkey_violations;
+			if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
+				ibp->mkey_lease_timeout = jiffies +
+					ibp->mkey_lease_period * HZ;
+			/* Generate a trap notice. */
+			qib_bad_mkey(ibp, smp);
+			ret = 1;
+		}
+	}
+
 	return ret;
 }
 
@@ -450,6 +464,7 @@
 			ibp = to_iport(ibdev, port_num);
 			ret = check_mkey(ibp, smp, 0);
 			if (ret)
+				ret = IB_MAD_RESULT_FAILURE;
 				goto bail;
 		}
 	}
@@ -631,7 +646,7 @@
 	struct qib_devdata *dd;
 	struct qib_pportdata *ppd;
 	struct qib_ibport *ibp;
-	char clientrereg = 0;
+	u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80);
 	unsigned long flags;
 	u16 lid, smlid;
 	u8 lwe;
@@ -781,12 +796,6 @@
 
 	ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
 
-	if (pip->clientrereg_resv_subnetto & 0x80) {
-		clientrereg = 1;
-		event.event = IB_EVENT_CLIENT_REREGISTER;
-		ib_dispatch_event(&event);
-	}
-
 	/*
 	 * Do the port state change now that the other link parameters
 	 * have been set.
@@ -844,10 +853,15 @@
 		smp->status |= IB_SMP_INVALID_FIELD;
 	}
 
+	if (clientrereg) {
+		event.event = IB_EVENT_CLIENT_REREGISTER;
+		ib_dispatch_event(&event);
+	}
+
 	ret = subn_get_portinfo(smp, ibdev, port);
 
-	if (clientrereg)
-		pip->clientrereg_resv_subnetto |= 0x80;
+	/* restore re-reg bit per o14-12.2.1 */
+	pip->clientrereg_resv_subnetto |= clientrereg;
 
 	goto get_only;
 
@@ -1835,6 +1849,7 @@
 		    port_num && port_num <= ibdev->phys_port_cnt &&
 		    port != port_num)
 			(void) check_mkey(to_iport(ibdev, port_num), smp, 0);
+		ret = IB_MAD_RESULT_FAILURE;
 		goto bail;
 	}
 
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 7e7e16f..1ce56b5 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1038,6 +1038,11 @@
 			goto bail_swq;
 		}
 		RCU_INIT_POINTER(qp->next, NULL);
+		qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+		if (!qp->s_hdr) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_qp;
+		}
 		qp->timeout_jiffies =
 			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
 				1000UL);
@@ -1159,6 +1164,7 @@
 		vfree(qp->r_rq.wq);
 	free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
 bail_qp:
+	kfree(qp->s_hdr);
 	kfree(qp);
 bail_swq:
 	vfree(swq);
@@ -1214,6 +1220,7 @@
 	else
 		vfree(qp->r_rq.wq);
 	vfree(qp->s_wq);
+	kfree(qp->s_hdr);
 	kfree(qp);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 765b4cb..b641416 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -244,9 +244,9 @@
 	int ret = 0;
 	int delta;
 
-	ohdr = &qp->s_hdr.u.oth;
+	ohdr = &qp->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr.u.l.oth;
+		ohdr = &qp->s_hdr->u.l.oth;
 
 	/*
 	 * The lock is needed to synchronize between the sending tasklet,
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b4b37e4..c0ee7e0 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -688,17 +688,17 @@
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = QIB_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
 					       &qp->remote_ah_attr.grh,
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
 	}
 	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
 		qp->remote_ah_attr.sl << 4;
-	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 				       qp->remote_ah_attr.src_path_bits);
 	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
@@ -758,7 +758,7 @@
 			 * If the packet cannot be sent now, return and
 			 * the send tasklet will be woken up later.
 			 */
-			if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+			if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
 					   qp->s_cur_sge, qp->s_cur_size))
 				break;
 			/* Record that s_hdr is empty. */
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index dae5160..dd9cd49 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -503,8 +503,11 @@
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of user ports (contexts) available. */
-	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts -
-		dd->first_user_ctxt);
+	/* The calculation below deals with a special case where
+	 * cfgctxts is set to 1 on a single-port board. */
+	return scnprintf(buf, PAGE_SIZE, "%u\n",
+			(dd->first_user_ctxt > dd->cfgctxts) ? 0 :
+			(dd->cfgctxts - dd->first_user_ctxt));
 }
 
 static ssize_t show_nfreectxts(struct device *device,
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
index 1bf626c..31d3561 100644
--- a/drivers/infiniband/hw/qib/qib_tx.c
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -295,6 +295,7 @@
 
 	nbufs = last - first + 1; /* number in range to check */
 	if (dd->upd_pio_shadow) {
+update_shadow:
 		/*
 		 * Minor optimization.  If we had no buffers on last call,
 		 * start out by doing the update; continue and do scan even
@@ -304,37 +305,39 @@
 		updated++;
 	}
 	i = first;
-rescan:
 	/*
 	 * While test_and_set_bit() is atomic, we do that and then the
 	 * change_bit(), and the pair is not.  See if this is the cause
 	 * of the remaining armlaunch errors.
 	 */
 	spin_lock_irqsave(&dd->pioavail_lock, flags);
+	if (dd->last_pio >= first && dd->last_pio <= last)
+		i = dd->last_pio + 1;
+	if (!first)
+		/* adjust to min possible  */
+		nbufs = last - dd->min_kernel_pio + 1;
 	for (j = 0; j < nbufs; j++, i++) {
 		if (i > last)
-			i = first;
+			i = !first ? dd->min_kernel_pio : first;
 		if (__test_and_set_bit((2 * i) + 1, shadow))
 			continue;
 		/* flip generation bit */
 		__change_bit(2 * i, shadow);
 		/* remember that the buffer can be written to now */
 		__set_bit(i, dd->pio_writing);
+		if (!first && first != last) /* first == last on VL15, avoid */
+			dd->last_pio = i;
 		break;
 	}
 	spin_unlock_irqrestore(&dd->pioavail_lock, flags);
 
 	if (j == nbufs) {
-		if (!updated) {
+		if (!updated)
 			/*
 			 * First time through; shadow exhausted, but may be
 			 * buffers available, try an update and then rescan.
 			 */
-			update_send_bufs(dd);
-			updated++;
-			i = first;
-			goto rescan;
-		}
+			goto update_shadow;
 		no_send_bufs(dd);
 		buf = NULL;
 	} else {
@@ -422,14 +425,20 @@
 				__clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT
 					    + start, dd->pioavailshadow);
 			__set_bit(start, dd->pioavailkernel);
+			if ((start >> 1) < dd->min_kernel_pio)
+				dd->min_kernel_pio = start >> 1;
 		} else {
 			__set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT,
 				  dd->pioavailshadow);
 			__clear_bit(start, dd->pioavailkernel);
+			if ((start >> 1) > dd->min_kernel_pio)
+				dd->min_kernel_pio = start >> 1;
 		}
 		start += 2;
 	}
 
+	if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1)
+		dd->last_pio = dd->min_kernel_pio - 1;
 	spin_unlock_irqrestore(&dd->pioavail_lock, flags);
 
 	dd->f_txchk_change(dd, ostart, len, avail, rcd);
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 7ce2ac2..ce7387f 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -72,9 +72,9 @@
 		goto done;
 	}
 
-	ohdr = &qp->s_hdr.u.oth;
+	ohdr = &qp->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr.u.l.oth;
+		ohdr = &qp->s_hdr->u.l.oth;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 828609f..a468bf2 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -321,11 +321,11 @@
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		/* Header size in 32-bit words. */
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
 					       &ah_attr->grh,
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
-		ohdr = &qp->s_hdr.u.l.oth;
+		ohdr = &qp->s_hdr->u.l.oth;
 		/*
 		 * Don't worry about sending to locally attached multicast
 		 * QPs.  It is unspecified by the spec. what happens.
@@ -333,7 +333,7 @@
 	} else {
 		/* Header size in 32-bit words. */
 		lrh0 = QIB_LRH_BTH;
-		ohdr = &qp->s_hdr.u.oth;
+		ohdr = &qp->s_hdr->u.oth;
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
@@ -346,15 +346,15 @@
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
 	else
 		lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
-	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
+	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
 	lid = ppd->lid;
 	if (lid) {
 		lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
-		qp->s_hdr.lrh[3] = cpu_to_be16(lid);
+		qp->s_hdr->lrh[3] = cpu_to_be16(lid);
 	} else
-		qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
+		qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 		bth0 |= IB_BTH_SOLICITED;
 	bth0 |= extra_bytes << 20;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 0c19ef0..4876060 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -367,9 +367,10 @@
 
 struct qib_rq {
 	struct qib_rwq *wq;
-	spinlock_t lock; /* protect changes in this struct */
 	u32 size;               /* size of RWQE array */
 	u8 max_sge;
+	spinlock_t lock /* protect changes in this struct */
+		____cacheline_aligned_in_smp;
 };
 
 struct qib_srq {
@@ -412,31 +413,75 @@
  */
 struct qib_qp {
 	struct ib_qp ibqp;
-	struct qib_qp *next;            /* link list for QPN hash table */
-	struct qib_qp *timer_next;      /* link list for qib_ib_timer() */
-	struct list_head iowait;        /* link for wait PIO buf */
-	struct list_head rspwait;       /* link for waititing to respond */
+	/* read mostly fields above and below */
 	struct ib_ah_attr remote_ah_attr;
 	struct ib_ah_attr alt_ah_attr;
-	struct qib_ib_header s_hdr;     /* next packet header to send */
-	atomic_t refcount;
-	wait_queue_head_t wait;
-	wait_queue_head_t wait_dma;
-	struct timer_list s_timer;
-	struct work_struct s_work;
+	struct qib_qp *next;            /* link list for QPN hash table */
+	struct qib_swqe *s_wq;  /* send work queue */
 	struct qib_mmap_info *ip;
-	struct qib_sge_state *s_cur_sge;
-	struct qib_verbs_txreq *s_tx;
-	struct qib_mregion *s_rdma_mr;
-	struct qib_sge_state s_sge;     /* current send request data */
-	struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1];
-	struct qib_sge_state s_ack_rdma_sge;
+	struct qib_ib_header *s_hdr;     /* next packet header to send */
+	unsigned long timeout_jiffies;  /* computed from timeout */
+
+	enum ib_mtu path_mtu;
+	u32 remote_qpn;
+	u32 pmtu;		/* decoded from path_mtu */
+	u32 qkey;               /* QKEY for this QP (for UD or RD) */
+	u32 s_size;             /* send work queue size */
+	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
+
+	u8 state;               /* QP state */
+	u8 qp_access_flags;
+	u8 alt_timeout;         /* Alternate path timeout for this QP */
+	u8 timeout;             /* Timeout for this QP */
+	u8 s_srate;
+	u8 s_mig_state;
+	u8 port_num;
+	u8 s_pkey_index;        /* PKEY index to use */
+	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
+	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
+	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
+	u8 s_retry_cnt;         /* number of times to retry */
+	u8 s_rnr_retry_cnt;
+	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
+	u8 s_max_sge;           /* size of s_wq->sg_list */
+	u8 s_draining;
+
+	/* start of read/write fields */
+
+	atomic_t refcount ____cacheline_aligned_in_smp;
+	wait_queue_head_t wait;
+
+
+	struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
+		____cacheline_aligned_in_smp;
 	struct qib_sge_state s_rdma_read_sge;
+
+	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
+	unsigned long r_aflags;
+	u64 r_wr_id;            /* ID for current receive WQE */
+	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
+	u32 r_len;              /* total length of r_sge */
+	u32 r_rcv_len;          /* receive data len processed */
+	u32 r_psn;              /* expected rcv packet sequence number */
+	u32 r_msn;              /* message sequence number */
+
+	u8 r_state;             /* opcode of last packet received */
+	u8 r_flags;
+	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
+
+	struct list_head rspwait;       /* link for waititing to respond */
+
 	struct qib_sge_state r_sge;     /* current receive data */
-	spinlock_t r_lock;      /* used for APM */
-	spinlock_t s_lock;
-	atomic_t s_dma_busy;
+	struct qib_rq r_rq;             /* receive work queue */
+
+	spinlock_t s_lock ____cacheline_aligned_in_smp;
+	struct qib_sge_state *s_cur_sge;
 	u32 s_flags;
+	struct qib_verbs_txreq *s_tx;
+	struct qib_swqe *s_wqe;
+	struct qib_sge_state s_sge;     /* current send request data */
+	struct qib_mregion *s_rdma_mr;
+	atomic_t s_dma_busy;
 	u32 s_cur_size;         /* size of send packet in bytes */
 	u32 s_len;              /* total length of s_sge */
 	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
@@ -447,48 +492,6 @@
 	u32 s_psn;              /* current packet sequence number */
 	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
 	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-	u64 r_wr_id;            /* ID for current receive WQE */
-	unsigned long r_aflags;
-	u32 r_len;              /* total length of r_sge */
-	u32 r_rcv_len;          /* receive data len processed */
-	u32 r_psn;              /* expected rcv packet sequence number */
-	u32 r_msn;              /* message sequence number */
-	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-	u16 s_rdma_ack_cnt;
-	u8 state;               /* QP state */
-	u8 s_state;             /* opcode of last packet sent */
-	u8 s_ack_state;         /* opcode of packet to ACK */
-	u8 s_nak_state;         /* non-zero if NAK is pending */
-	u8 r_state;             /* opcode of last packet received */
-	u8 r_nak_state;         /* non-zero if NAK is pending */
-	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-	u8 r_flags;
-	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-	u8 qp_access_flags;
-	u8 s_max_sge;           /* size of s_wq->sg_list */
-	u8 s_retry_cnt;         /* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 s_retry;             /* requester retry counter */
-	u8 s_rnr_retry;         /* requester RNR retry counter */
-	u8 s_pkey_index;        /* PKEY index to use */
-	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
-	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-	u8 s_srate;
-	u8 s_draining;
-	u8 s_mig_state;
-	u8 timeout;             /* Timeout for this QP */
-	u8 alt_timeout;         /* Alternate path timeout for this QP */
-	u8 port_num;
-	enum ib_mtu path_mtu;
-	u32 pmtu;		/* decoded from path_mtu */
-	u32 remote_qpn;
-	u32 qkey;               /* QKEY for this QP (for UD or RD) */
-	u32 s_size;             /* send work queue size */
 	u32 s_head;             /* new entries added here */
 	u32 s_tail;             /* next entry to process */
 	u32 s_cur;              /* current work queue entry */
@@ -496,11 +499,27 @@
 	u32 s_last;             /* last completed entry */
 	u32 s_ssn;              /* SSN of tail entry */
 	u32 s_lsn;              /* limit sequence number (credit) */
-	unsigned long timeout_jiffies;  /* computed from timeout */
-	struct qib_swqe *s_wq;  /* send work queue */
-	struct qib_swqe *s_wqe;
-	struct qib_rq r_rq;             /* receive work queue */
-	struct qib_sge r_sg_list[0];    /* verified SGEs */
+	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
+	u16 s_rdma_ack_cnt;
+	u8 s_state;             /* opcode of last packet sent */
+	u8 s_ack_state;         /* opcode of packet to ACK */
+	u8 s_nak_state;         /* non-zero if NAK is pending */
+	u8 r_nak_state;         /* non-zero if NAK is pending */
+	u8 s_retry;             /* requester retry counter */
+	u8 s_rnr_retry;         /* requester RNR retry counter */
+	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
+	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
+
+	struct qib_sge_state s_ack_rdma_sge;
+	struct timer_list s_timer;
+	struct list_head iowait;        /* link for wait PIO buf */
+
+	struct work_struct s_work;
+
+	wait_queue_head_t wait_dma;
+
+	struct qib_sge r_sg_list[0] /* verified SGEs */
+		____cacheline_aligned_in_smp;
 };
 
 /*
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index db43b31..0ab8c9c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -573,10 +573,9 @@
 
 	err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr,
 			   non_blocking);
-	if (err) {
-		iscsi_destroy_endpoint(ep);
+	if (err)
 		return ERR_PTR(err);
-	}
+
 	return ep;
 }
 
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 14224ba..2dddabd 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -613,8 +613,9 @@
 	ib_conn->cma_id = NULL;
 addr_failure:
 	ib_conn->state = ISER_CONN_DOWN;
+	iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
 connect_failure:
-	iser_conn_release(ib_conn, 1);
+	iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
 	return err;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 0fe1885..ec2dafe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -51,6 +51,8 @@
 #define FW_VERSION_MINOR 1
 #define FW_VERSION_MICRO 0
 
+#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
+
 enum {
 	MAX_NPORTS = 4,     /* max # of ports */
 	SERNUM_LEN = 24,    /* Serial # length */
@@ -64,6 +66,15 @@
 	MEM_MC
 };
 
+enum {
+	MEMWIN0_APERTURE = 65536,
+	MEMWIN0_BASE     = 0x30000,
+	MEMWIN1_APERTURE = 32768,
+	MEMWIN1_BASE     = 0x28000,
+	MEMWIN2_APERTURE = 2048,
+	MEMWIN2_BASE     = 0x1b800,
+};
+
 enum dev_master {
 	MASTER_CANT,
 	MASTER_MAY,
@@ -403,6 +414,9 @@
 	struct tx_sw_desc *sdesc;   /* address of SW Tx descriptor ring */
 	struct sge_qstat *stat;     /* queue status entry */
 	dma_addr_t    phys_addr;    /* physical address of the ring */
+	spinlock_t db_lock;
+	int db_disabled;
+	unsigned short db_pidx;
 };
 
 struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
@@ -475,6 +489,7 @@
 	void __iomem *regs;
 	struct pci_dev *pdev;
 	struct device *pdev_dev;
+	unsigned int mbox;
 	unsigned int fn;
 	unsigned int flags;
 
@@ -504,6 +519,8 @@
 	void **tid_release_head;
 	spinlock_t tid_release_lock;
 	struct work_struct tid_release_task;
+	struct work_struct db_full_task;
+	struct work_struct db_drop_task;
 	bool tid_release_task_busy;
 
 	struct dentry *debugfs_root;
@@ -605,6 +622,7 @@
 void t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
 void t4_sge_stop(struct adapter *adap);
+extern int dbfifo_int_thresh;
 
 #define for_each_port(adapter, iter) \
 	for (iter = 0; iter < (adapter)->params.nports; ++iter)
@@ -719,4 +737,9 @@
 int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 		    unsigned int vf, unsigned int eqid);
 int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl);
+void t4_db_full(struct adapter *adapter);
+void t4_db_dropped(struct adapter *adapter);
+int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len);
+int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
+			 u32 addr, u32 val);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index b126b98..e1f96fb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -149,15 +149,6 @@
 #endif
 
 enum {
-	MEMWIN0_APERTURE = 65536,
-	MEMWIN0_BASE     = 0x30000,
-	MEMWIN1_APERTURE = 32768,
-	MEMWIN1_BASE     = 0x28000,
-	MEMWIN2_APERTURE = 2048,
-	MEMWIN2_BASE     = 0x1b800,
-};
-
-enum {
 	MAX_TXQ_ENTRIES      = 16384,
 	MAX_CTRL_TXQ_ENTRIES = 1024,
 	MAX_RSPQ_ENTRIES     = 16384,
@@ -371,6 +362,15 @@
 				uhash | mhash, sleep);
 }
 
+int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
+module_param(dbfifo_int_thresh, int, 0644);
+MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");
+
+int dbfifo_drain_delay = 1000; /* usecs to sleep while draining the dbfifo */
+module_param(dbfifo_drain_delay, int, 0644);
+MODULE_PARM_DESC(dbfifo_drain_delay,
+		 "usecs to sleep while draining the dbfifo");
+
 /*
  * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
  * If @mtu is -1 it is left unchanged.
@@ -389,6 +389,8 @@
 	return ret;
 }
 
+static struct workqueue_struct *workq;
+
 /**
  *	link_start - enable a port
  *	@dev: the port to enable
@@ -2196,7 +2198,7 @@
 	adap->tid_release_head = (void **)((uintptr_t)p | chan);
 	if (!adap->tid_release_task_busy) {
 		adap->tid_release_task_busy = true;
-		schedule_work(&adap->tid_release_task);
+		queue_work(workq, &adap->tid_release_task);
 	}
 	spin_unlock_bh(&adap->tid_release_lock);
 }
@@ -2366,6 +2368,16 @@
 }
 EXPORT_SYMBOL(cxgb4_port_chan);
 
+unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
+{
+	struct adapter *adap = netdev2adap(dev);
+	u32 v;
+
+	v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
+	return lpfifo ? G_LP_COUNT(v) : G_HP_COUNT(v);
+}
+EXPORT_SYMBOL(cxgb4_dbfifo_count);
+
 /**
  *	cxgb4_port_viid - get the VI id of a port
  *	@dev: the net device for the port
@@ -2413,6 +2425,59 @@
 }
 EXPORT_SYMBOL(cxgb4_iscsi_init);
 
+int cxgb4_flush_eq_cache(struct net_device *dev)
+{
+	struct adapter *adap = netdev2adap(dev);
+	int ret;
+
+	ret = t4_fwaddrspace_write(adap, adap->mbox,
+				   0xe1000000 + A_SGE_CTXT_CMD, 0x20000000);
+	return ret;
+}
+EXPORT_SYMBOL(cxgb4_flush_eq_cache);
+
+static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx)
+{
+	u32 addr = t4_read_reg(adap, A_SGE_DBQ_CTXT_BADDR) + 24 * qid + 8;
+	__be64 indices;
+	int ret;
+
+	ret = t4_mem_win_read_len(adap, addr, (__be32 *)&indices, 8);
+	if (!ret) {
+		indices = be64_to_cpu(indices);
+		*cidx = (indices >> 25) & 0xffff;
+		*pidx = (indices >> 9) & 0xffff;
+	}
+	return ret;
+}
+
+int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
+			u16 size)
+{
+	struct adapter *adap = netdev2adap(dev);
+	u16 hw_pidx, hw_cidx;
+	int ret;
+
+	ret = read_eq_indices(adap, qid, &hw_pidx, &hw_cidx);
+	if (ret)
+		goto out;
+
+	if (pidx != hw_pidx) {
+		u16 delta;
+
+		if (pidx >= hw_pidx)
+			delta = pidx - hw_pidx;
+		else
+			delta = size - hw_pidx + pidx;
+		wmb();
+		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
+			     V_QID(qid) | V_PIDX(delta));
+	}
+out:
+	return ret;
+}
+EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
+
 static struct pci_driver cxgb4_driver;
 
 static void check_neigh_update(struct neighbour *neigh)
@@ -2446,6 +2511,144 @@
 	.notifier_call = netevent_cb
 };
 
+static void drain_db_fifo(struct adapter *adap, int usecs)
+{
+	u32 v;
+
+	do {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(usecs_to_jiffies(usecs));
+		v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
+		if (G_LP_COUNT(v) == 0 && G_HP_COUNT(v) == 0)
+			break;
+	} while (1);
+}
+
+static void disable_txq_db(struct sge_txq *q)
+{
+	spin_lock_irq(&q->db_lock);
+	q->db_disabled = 1;
+	spin_unlock_irq(&q->db_lock);
+}
+
+static void enable_txq_db(struct sge_txq *q)
+{
+	spin_lock_irq(&q->db_lock);
+	q->db_disabled = 0;
+	spin_unlock_irq(&q->db_lock);
+}
+
+static void disable_dbs(struct adapter *adap)
+{
+	int i;
+
+	for_each_ethrxq(&adap->sge, i)
+		disable_txq_db(&adap->sge.ethtxq[i].q);
+	for_each_ofldrxq(&adap->sge, i)
+		disable_txq_db(&adap->sge.ofldtxq[i].q);
+	for_each_port(adap, i)
+		disable_txq_db(&adap->sge.ctrlq[i].q);
+}
+
+static void enable_dbs(struct adapter *adap)
+{
+	int i;
+
+	for_each_ethrxq(&adap->sge, i)
+		enable_txq_db(&adap->sge.ethtxq[i].q);
+	for_each_ofldrxq(&adap->sge, i)
+		enable_txq_db(&adap->sge.ofldtxq[i].q);
+	for_each_port(adap, i)
+		enable_txq_db(&adap->sge.ctrlq[i].q);
+}
+
+static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
+{
+	u16 hw_pidx, hw_cidx;
+	int ret;
+
+	spin_lock_bh(&q->db_lock);
+	ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx);
+	if (ret)
+		goto out;
+	if (q->db_pidx != hw_pidx) {
+		u16 delta;
+
+		if (q->db_pidx >= hw_pidx)
+			delta = q->db_pidx - hw_pidx;
+		else
+			delta = q->size - hw_pidx + q->db_pidx;
+		wmb();
+		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
+				V_QID(q->cntxt_id) | V_PIDX(delta));
+	}
+out:
+	q->db_disabled = 0;
+	spin_unlock_bh(&q->db_lock);
+	if (ret)
+		CH_WARN(adap, "DB drop recovery failed.\n");
+}
+static void recover_all_queues(struct adapter *adap)
+{
+	int i;
+
+	for_each_ethrxq(&adap->sge, i)
+		sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
+	for_each_ofldrxq(&adap->sge, i)
+		sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
+	for_each_port(adap, i)
+		sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
+}
+
+static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
+{
+	mutex_lock(&uld_mutex);
+	if (adap->uld_handle[CXGB4_ULD_RDMA])
+		ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
+				cmd);
+	mutex_unlock(&uld_mutex);
+}
+
+static void process_db_full(struct work_struct *work)
+{
+	struct adapter *adap;
+
+	adap = container_of(work, struct adapter, db_full_task);
+
+	notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
+	drain_db_fifo(adap, dbfifo_drain_delay);
+	t4_set_reg_field(adap, A_SGE_INT_ENABLE3,
+			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT,
+			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT);
+	notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
+}
+
+static void process_db_drop(struct work_struct *work)
+{
+	struct adapter *adap;
+
+	adap = container_of(work, struct adapter, db_drop_task);
+
+	t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_DROPPED_DB, 0);
+	disable_dbs(adap);
+	notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP);
+	drain_db_fifo(adap, 1);
+	recover_all_queues(adap);
+	enable_dbs(adap);
+}
+
+void t4_db_full(struct adapter *adap)
+{
+	t4_set_reg_field(adap, A_SGE_INT_ENABLE3,
+			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT, 0);
+	queue_work(workq, &adap->db_full_task);
+}
+
+void t4_db_dropped(struct adapter *adap)
+{
+	queue_work(workq, &adap->db_drop_task);
+}
+
 static void uld_attach(struct adapter *adap, unsigned int uld)
 {
 	void *handle;
@@ -2479,6 +2682,7 @@
 	lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
 	lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
 	lli.fw_vers = adap->params.fw_vers;
+	lli.dbfifo_int_thresh = dbfifo_int_thresh;
 
 	handle = ulds[uld].add(&lli);
 	if (IS_ERR(handle)) {
@@ -2649,6 +2853,8 @@
 {
 	t4_intr_disable(adapter);
 	cancel_work_sync(&adapter->tid_release_task);
+	cancel_work_sync(&adapter->db_full_task);
+	cancel_work_sync(&adapter->db_drop_task);
 	adapter->tid_release_task_busy = false;
 	adapter->tid_release_head = NULL;
 
@@ -3593,6 +3799,7 @@
 
 	adapter->pdev = pdev;
 	adapter->pdev_dev = &pdev->dev;
+	adapter->mbox = func;
 	adapter->fn = func;
 	adapter->msg_enable = dflt_msg_enable;
 	memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
@@ -3601,6 +3808,8 @@
 	spin_lock_init(&adapter->tid_release_lock);
 
 	INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
+	INIT_WORK(&adapter->db_full_task, process_db_full);
+	INIT_WORK(&adapter->db_drop_task, process_db_drop);
 
 	err = t4_prep_adapter(adapter);
 	if (err)
@@ -3788,6 +3997,10 @@
 {
 	int ret;
 
+	workq = create_singlethread_workqueue("cxgb4");
+	if (!workq)
+		return -ENOMEM;
+
 	/* Debugfs support is optional, just warn if this fails */
 	cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
 	if (!cxgb4_debugfs_root)
@@ -3803,6 +4016,8 @@
 {
 	pci_unregister_driver(&cxgb4_driver);
 	debugfs_remove(cxgb4_debugfs_root);  /* NULL ok */
+	flush_workqueue(workq);
+	destroy_workqueue(workq);
 }
 
 module_init(cxgb4_init_module);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index b1d39b8..d79980c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -163,6 +163,12 @@
 	CXGB4_STATE_DETACH
 };
 
+enum cxgb4_control {
+	CXGB4_CONTROL_DB_FULL,
+	CXGB4_CONTROL_DB_EMPTY,
+	CXGB4_CONTROL_DB_DROP,
+};
+
 struct pci_dev;
 struct l2t_data;
 struct net_device;
@@ -212,6 +218,7 @@
 	unsigned short ucq_density;          /* # of user CQs/page */
 	void __iomem *gts_reg;               /* address of GTS register */
 	void __iomem *db_reg;                /* address of kernel doorbell */
+	int dbfifo_int_thresh;		     /* doorbell fifo int threshold */
 };
 
 struct cxgb4_uld_info {
@@ -220,11 +227,13 @@
 	int (*rx_handler)(void *handle, const __be64 *rsp,
 			  const struct pkt_gl *gl);
 	int (*state_change)(void *handle, enum cxgb4_state new_state);
+	int (*control)(void *handle, enum cxgb4_control control, ...);
 };
 
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
+unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
 unsigned int cxgb4_port_idx(const struct net_device *dev);
@@ -236,4 +245,6 @@
 		      const unsigned int *pgsz_order);
 struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl,
 				   unsigned int skb_len, unsigned int pull_len);
+int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx, u16 size);
+int cxgb4_flush_eq_cache(struct net_device *dev);
 #endif  /* !__CXGB4_OFLD_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 2dae795..e111d97 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -767,8 +767,13 @@
 static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 {
 	wmb();            /* write descriptors before telling HW */
-	t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
-		     QID(q->cntxt_id) | PIDX(n));
+	spin_lock(&q->db_lock);
+	if (!q->db_disabled) {
+		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
+			     V_QID(q->cntxt_id) | V_PIDX(n));
+	}
+	q->db_pidx = q->pidx;
+	spin_unlock(&q->db_lock);
 }
 
 /**
@@ -2081,6 +2086,7 @@
 	q->stops = q->restarts = 0;
 	q->stat = (void *)&q->desc[q->size];
 	q->cntxt_id = id;
+	spin_lock_init(&q->db_lock);
 	adap->sge.egr_map[id - adap->sge.egr_start] = q;
 }
 
@@ -2415,6 +2421,18 @@
 			 RXPKTCPLMODE |
 			 (STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0));
 
+	/*
+	 * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows
+	 * and generate an interrupt when this occurs so we can recover.
+	 */
+	t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS,
+			V_HP_INT_THRESH(M_HP_INT_THRESH) |
+			V_LP_INT_THRESH(M_LP_INT_THRESH),
+			V_HP_INT_THRESH(dbfifo_int_thresh) |
+			V_LP_INT_THRESH(dbfifo_int_thresh));
+	t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP,
+			F_ENABLE_DROP);
+
 	for (i = v = 0; i < 32; i += 4)
 		v |= (PAGE_SHIFT - 10) << i;
 	t4_write_reg(adap, SGE_HOST_PAGE_SIZE, v);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index d1ec111..32e1dd5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -868,11 +868,14 @@
 	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
 }
 
+typedef void (*int_handler_t)(struct adapter *adap);
+
 struct intr_info {
 	unsigned int mask;       /* bits to check in interrupt status */
 	const char *msg;         /* message to print or NULL */
 	short stat_idx;          /* stat counter to increment or -1 */
 	unsigned short fatal;    /* whether the condition reported is fatal */
+	int_handler_t int_handler; /* platform-specific int handler */
 };
 
 /**
@@ -905,6 +908,8 @@
 		} else if (acts->msg && printk_ratelimit())
 			dev_warn(adapter->pdev_dev, "%s (0x%x)\n", acts->msg,
 				 status & acts->mask);
+		if (acts->int_handler)
+			acts->int_handler(adapter);
 		mask |= acts->mask;
 	}
 	status &= mask;
@@ -1013,7 +1018,9 @@
 		{ ERR_INVALID_CIDX_INC,
 		  "SGE GTS CIDX increment too large", -1, 0 },
 		{ ERR_CPL_OPCODE_0, "SGE received 0-length CPL", -1, 0 },
-		{ ERR_DROPPED_DB, "SGE doorbell dropped", -1, 0 },
+		{ F_DBFIFO_LP_INT, NULL, -1, 0, t4_db_full },
+		{ F_DBFIFO_HP_INT, NULL, -1, 0, t4_db_full },
+		{ F_ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped },
 		{ ERR_DATA_CPL_ON_HIGH_QID1 | ERR_DATA_CPL_ON_HIGH_QID0,
 		  "SGE IQID > 1023 received CPL for FL", -1, 0 },
 		{ ERR_BAD_DB_PIDX3, "SGE DBP 3 pidx increment too large", -1,
@@ -1034,10 +1041,10 @@
 	};
 
 	v = (u64)t4_read_reg(adapter, SGE_INT_CAUSE1) |
-	    ((u64)t4_read_reg(adapter, SGE_INT_CAUSE2) << 32);
+		((u64)t4_read_reg(adapter, SGE_INT_CAUSE2) << 32);
 	if (v) {
 		dev_alert(adapter->pdev_dev, "SGE parity error (%#llx)\n",
-			 (unsigned long long)v);
+				(unsigned long long)v);
 		t4_write_reg(adapter, SGE_INT_CAUSE1, v);
 		t4_write_reg(adapter, SGE_INT_CAUSE2, v >> 32);
 	}
@@ -1513,6 +1520,7 @@
 		     ERR_BAD_DB_PIDX2 | ERR_BAD_DB_PIDX1 |
 		     ERR_BAD_DB_PIDX0 | ERR_ING_CTXT_PRIO |
 		     ERR_EGR_CTXT_PRIO | INGRESS_SIZE_ERR |
+		     F_DBFIFO_HP_INT | F_DBFIFO_LP_INT |
 		     EGRESS_SIZE_ERR);
 	t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE), PF_INTR_MASK);
 	t4_set_reg_field(adapter, PL_INT_MAP0, 0, 1 << pf);
@@ -1986,6 +1994,54 @@
 	(var).retval_len16 = htonl(FW_LEN16(var)); \
 } while (0)
 
+int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
+			  u32 addr, u32 val)
+{
+	struct fw_ldst_cmd c;
+
+	memset(&c, 0, sizeof(c));
+	c.op_to_addrspace = htonl(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST |
+			    F_FW_CMD_WRITE |
+			    V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FIRMWARE));
+	c.cycles_to_len16 = htonl(FW_LEN16(c));
+	c.u.addrval.addr = htonl(addr);
+	c.u.addrval.val = htonl(val);
+
+	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
+}
+
+/*
+ *     t4_mem_win_read_len - read memory through PCIE memory window
+ *     @adap: the adapter
+ *     @addr: address of first byte requested aligned on 32b.
+ *     @data: len bytes to hold the data read
+ *     @len: amount of data to read from window.  Must be <=
+ *            MEMWIN0_APERATURE after adjusting for 16B alignment
+ *            requirements of the the memory window.
+ *
+ *     Read len bytes of data from MC starting at @addr.
+ */
+int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len)
+{
+	int i;
+	int off;
+
+	/*
+	 * Align on a 16B boundary.
+	 */
+	off = addr & 15;
+	if ((addr & 3) || (len + off) > MEMWIN0_APERTURE)
+		return -EINVAL;
+
+	t4_write_reg(adap, A_PCIE_MEM_ACCESS_OFFSET, addr & ~15);
+	t4_read_reg(adap, A_PCIE_MEM_ACCESS_OFFSET);
+
+	for (i = 0; i < len; i += 4)
+		*data++ = t4_read_reg(adap, (MEMWIN0_BASE + off + i));
+
+	return 0;
+}
+
 /**
  *	t4_mdio_rd - read a PHY register through MDIO
  *	@adap: the adapter
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 0adc5bc..111fc32 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -190,6 +190,59 @@
 #define SGE_DEBUG_DATA_LOW 0x10d4
 #define SGE_INGRESS_QUEUES_PER_PAGE_PF 0x10f4
 
+#define S_LP_INT_THRESH    12
+#define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH)
+#define S_HP_INT_THRESH    28
+#define V_HP_INT_THRESH(x) ((x) << S_HP_INT_THRESH)
+#define A_SGE_DBFIFO_STATUS 0x10a4
+
+#define S_ENABLE_DROP    13
+#define V_ENABLE_DROP(x) ((x) << S_ENABLE_DROP)
+#define F_ENABLE_DROP    V_ENABLE_DROP(1U)
+#define A_SGE_DOORBELL_CONTROL 0x10a8
+
+#define A_SGE_CTXT_CMD 0x11fc
+#define A_SGE_DBQ_CTXT_BADDR 0x1084
+
+#define A_SGE_PF_KDOORBELL 0x0
+
+#define S_QID 15
+#define V_QID(x) ((x) << S_QID)
+
+#define S_PIDX 0
+#define V_PIDX(x) ((x) << S_PIDX)
+
+#define M_LP_COUNT 0x7ffU
+#define S_LP_COUNT 0
+#define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT)
+
+#define M_HP_COUNT 0x7ffU
+#define S_HP_COUNT 16
+#define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT)
+
+#define A_SGE_INT_ENABLE3 0x1040
+
+#define S_DBFIFO_HP_INT 8
+#define V_DBFIFO_HP_INT(x) ((x) << S_DBFIFO_HP_INT)
+#define F_DBFIFO_HP_INT V_DBFIFO_HP_INT(1U)
+
+#define S_DBFIFO_LP_INT 7
+#define V_DBFIFO_LP_INT(x) ((x) << S_DBFIFO_LP_INT)
+#define F_DBFIFO_LP_INT V_DBFIFO_LP_INT(1U)
+
+#define S_DROPPED_DB 0
+#define V_DROPPED_DB(x) ((x) << S_DROPPED_DB)
+#define F_DROPPED_DB V_DROPPED_DB(1U)
+
+#define S_ERR_DROPPED_DB 18
+#define V_ERR_DROPPED_DB(x) ((x) << S_ERR_DROPPED_DB)
+#define F_ERR_DROPPED_DB V_ERR_DROPPED_DB(1U)
+
+#define A_PCIE_MEM_ACCESS_OFFSET 0x306c
+
+#define M_HP_INT_THRESH 0xfU
+#define M_LP_INT_THRESH 0xfU
+
 #define PCIE_PF_CLI 0x44
 #define PCIE_INT_CAUSE 0x3004
 #define  UNXSPLCPLERR  0x20000000U
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index edcfd7e..ad53f79 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1620,4 +1620,19 @@
 #define FW_HDR_FW_VER_MINOR_GET(x) (((x) >> 16) & 0xff)
 #define FW_HDR_FW_VER_MICRO_GET(x) (((x) >> 8) & 0xff)
 #define FW_HDR_FW_VER_BUILD_GET(x) (((x) >> 0) & 0xff)
+
+#define S_FW_CMD_OP 24
+#define V_FW_CMD_OP(x) ((x) << S_FW_CMD_OP)
+
+#define S_FW_CMD_REQUEST 23
+#define V_FW_CMD_REQUEST(x) ((x) << S_FW_CMD_REQUEST)
+#define F_FW_CMD_REQUEST V_FW_CMD_REQUEST(1U)
+
+#define S_FW_CMD_WRITE 21
+#define V_FW_CMD_WRITE(x) ((x) << S_FW_CMD_WRITE)
+#define F_FW_CMD_WRITE V_FW_CMD_WRITE(1U)
+
+#define S_FW_LDST_CMD_ADDRSPACE 0
+#define V_FW_LDST_CMD_ADDRSPACE(x) ((x) << S_FW_LDST_CMD_ADDRSPACE)
+
 #endif /* _T4FW_INTERFACE_H_ */
diff --git a/drivers/net/ethernet/emulex/benet/Makefile b/drivers/net/ethernet/emulex/benet/Makefile
index a60cd80..1a91b27 100644
--- a/drivers/net/ethernet/emulex/benet/Makefile
+++ b/drivers/net/ethernet/emulex/benet/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_BE2NET) += be2net.o
 
-be2net-y :=  be_main.o be_cmds.o be_ethtool.o
+be2net-y :=  be_main.o be_cmds.o be_ethtool.o be_roce.o
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 9576ac0..7bb2e97 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -32,6 +32,7 @@
 #include <linux/u64_stats_sync.h>
 
 #include "be_hw.h"
+#include "be_roce.h"
 
 #define DRV_VER			"4.2.116u"
 #define DRV_NAME		"be2net"
@@ -102,7 +103,8 @@
 #define MAX_RX_QS		(MAX_RSS_QS + 1) /* RSS qs + 1 def Rx */
 
 #define MAX_TX_QS		8
-#define MAX_MSIX_VECTORS	MAX_RSS_QS
+#define MAX_ROCE_EQS		5
+#define MAX_MSIX_VECTORS	(MAX_RSS_QS + MAX_ROCE_EQS) /* RSS qs + RoCE */
 #define BE_TX_BUDGET		256
 #define BE_NAPI_WEIGHT		64
 #define MAX_RX_POST		BE_NAPI_WEIGHT /* Frags posted at a time */
@@ -382,6 +384,17 @@
 	u8 transceiver;
 	u8 autoneg;
 	u8 generation;		/* BladeEngine ASIC generation */
+	u32 if_type;
+	struct {
+		u8 __iomem *base;	/* Door Bell */
+		u32 size;
+		u32 total_size;
+		u64 io_addr;
+	} roce_db;
+	u32 num_msix_roce_vec;
+	struct ocrdma_dev *ocrdma_dev;
+	struct list_head entry;
+
 	u32 flash_status;
 	struct completion flash_compl;
 
@@ -413,6 +426,10 @@
 #define lancer_chip(adapter)	((adapter->pdev->device == OC_DEVICE_ID3) || \
 				 (adapter->pdev->device == OC_DEVICE_ID4))
 
+#define be_roce_supported(adapter) ((adapter->if_type == SLI_INTF_TYPE_3 || \
+				adapter->sli_family == SKYHAWK_SLI_FAMILY) && \
+				(adapter->function_mode & RDMA_ENABLED))
+
 extern const struct ethtool_ops be_ethtool_ops;
 
 #define msix_enabled(adapter)		(adapter->num_msix_vec > 0)
@@ -577,10 +594,29 @@
 	}
 }
 
+static inline bool be_type_2_3(struct be_adapter *adapter)
+{
+	return (adapter->if_type == SLI_INTF_TYPE_2 ||
+		adapter->if_type == SLI_INTF_TYPE_3) ? true : false;
+}
+
 extern void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm,
 		u16 num_popped);
 extern void be_link_status_update(struct be_adapter *adapter, u8 link_status);
 extern void be_parse_stats(struct be_adapter *adapter);
 extern int be_load_fw(struct be_adapter *adapter, u8 *func);
 extern bool be_is_wol_supported(struct be_adapter *adapter);
+
+/*
+ * internal function to initialize-cleanup roce device.
+ */
+extern void be_roce_dev_add(struct be_adapter *);
+extern void be_roce_dev_remove(struct be_adapter *);
+
+/*
+ * internal function to open-close roce device during ifup-ifdown.
+ */
+extern void be_roce_dev_open(struct be_adapter *);
+extern void be_roce_dev_close(struct be_adapter *);
+
 #endif				/* BE_H */
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 67b030d..1c7c7d0 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -15,6 +15,7 @@
  * Costa Mesa, CA 92626
  */
 
+#include <linux/module.h>
 #include "be.h"
 #include "be_cmds.h"
 
@@ -2556,3 +2557,41 @@
 	pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
 	return status;
 }
+
+int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
+			int wrb_payload_size, u16 *cmd_status, u16 *ext_status)
+{
+	struct be_adapter *adapter = netdev_priv(netdev_handle);
+	struct be_mcc_wrb *wrb;
+	struct be_cmd_req_hdr *hdr = (struct be_cmd_req_hdr *) wrb_payload;
+	struct be_cmd_req_hdr *req;
+	struct be_cmd_resp_hdr *resp;
+	int status;
+
+	spin_lock_bh(&adapter->mcc_lock);
+
+	wrb = wrb_from_mccq(adapter);
+	if (!wrb) {
+		status = -EBUSY;
+		goto err;
+	}
+	req = embedded_payload(wrb);
+	resp = embedded_payload(wrb);
+
+	be_wrb_cmd_hdr_prepare(req, hdr->subsystem,
+			       hdr->opcode, wrb_payload_size, wrb, NULL);
+	memcpy(req, wrb_payload, wrb_payload_size);
+	be_dws_cpu_to_le(req, wrb_payload_size);
+
+	status = be_mcc_notify_wait(adapter);
+	if (cmd_status)
+		*cmd_status = (status & 0xffff);
+	if (ext_status)
+		*ext_status = 0;
+	memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length);
+	be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length);
+err:
+	spin_unlock_bh(&adapter->mcc_lock);
+	return status;
+}
+EXPORT_SYMBOL(be_roce_mcc_cmd);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index d5b680c..54eabd8f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -1056,6 +1056,7 @@
 /* The HW can come up in either of the following multi-channel modes
  * based on the skew/IPL.
  */
+#define RDMA_ENABLED				0x4
 #define FLEX10_MODE				0x400
 #define VNIC_MODE				0x20000
 #define UMC_ENABLED				0x1000000
diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h
index f2c89e3..23a3454 100644
--- a/drivers/net/ethernet/emulex/benet/be_hw.h
+++ b/drivers/net/ethernet/emulex/benet/be_hw.h
@@ -98,11 +98,13 @@
 #define SLI_INTF_REV_SHIFT			4
 #define SLI_INTF_FT_MASK			0x00000001
 
+#define SLI_INTF_TYPE_2		2
+#define SLI_INTF_TYPE_3		3
 
 /* SLI family */
 #define BE_SLI_FAMILY		0x0
 #define LANCER_A0_SLI_FAMILY	0xA
-
+#define SKYHAWK_SLI_FAMILY      0x2
 
 /********* ISR0 Register offset **********/
 #define CEV_ISR0_OFFSET 			0xC18
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 528a886..fcc15e7 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2103,10 +2103,17 @@
 static void be_msix_enable(struct be_adapter *adapter)
 {
 #define BE_MIN_MSIX_VECTORS		1
-	int i, status, num_vec;
+	int i, status, num_vec, num_roce_vec = 0;
 
 	/* If RSS queues are not used, need a vec for default RX Q */
 	num_vec = min(be_num_rss_want(adapter), num_online_cpus());
+	if (be_roce_supported(adapter)) {
+		num_roce_vec = min_t(u32, MAX_ROCE_MSIX_VECTORS,
+					(num_online_cpus() + 1));
+		num_roce_vec = min(num_roce_vec, MAX_ROCE_EQS);
+		num_vec += num_roce_vec;
+		num_vec = min(num_vec, MAX_MSIX_VECTORS);
+	}
 	num_vec = max(num_vec, BE_MIN_MSIX_VECTORS);
 
 	for (i = 0; i < num_vec; i++)
@@ -2123,7 +2130,17 @@
 	}
 	return;
 done:
-	adapter->num_msix_vec = num_vec;
+	if (be_roce_supported(adapter)) {
+		if (num_vec > num_roce_vec) {
+			adapter->num_msix_vec = num_vec - num_roce_vec;
+			adapter->num_msix_roce_vec =
+				num_vec - adapter->num_msix_vec;
+		} else {
+			adapter->num_msix_vec = num_vec;
+			adapter->num_msix_roce_vec = 0;
+		}
+	} else
+		adapter->num_msix_vec = num_vec;
 	return;
 }
 
@@ -2282,6 +2299,8 @@
 	struct be_eq_obj *eqo;
 	int i;
 
+	be_roce_dev_close(adapter);
+
 	be_async_mcc_disable(adapter);
 
 	if (!lancer_chip(adapter))
@@ -2390,6 +2409,7 @@
 	if (!status)
 		be_link_status_update(adapter, link_status);
 
+	be_roce_dev_open(adapter);
 	return 0;
 err:
 	be_close(adapter->netdev);
@@ -3122,6 +3142,24 @@
 		iounmap(adapter->csr);
 	if (adapter->db)
 		iounmap(adapter->db);
+	if (adapter->roce_db.base)
+		pci_iounmap(adapter->pdev, adapter->roce_db.base);
+}
+
+static int lancer_roce_map_pci_bars(struct be_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	u8 __iomem *addr;
+
+	addr = pci_iomap(pdev, 2, 0);
+	if (addr == NULL)
+		return -ENOMEM;
+
+	adapter->roce_db.base = addr;
+	adapter->roce_db.io_addr = pci_resource_start(pdev, 2);
+	adapter->roce_db.size = 8192;
+	adapter->roce_db.total_size = pci_resource_len(pdev, 2);
+	return 0;
 }
 
 static int be_map_pci_bars(struct be_adapter *adapter)
@@ -3130,11 +3168,18 @@
 	int db_reg;
 
 	if (lancer_chip(adapter)) {
-		addr = ioremap_nocache(pci_resource_start(adapter->pdev, 0),
-			pci_resource_len(adapter->pdev, 0));
-		if (addr == NULL)
-			return -ENOMEM;
-		adapter->db = addr;
+		if (be_type_2_3(adapter)) {
+			addr = ioremap_nocache(
+					pci_resource_start(adapter->pdev, 0),
+					pci_resource_len(adapter->pdev, 0));
+			if (addr == NULL)
+				return -ENOMEM;
+			adapter->db = addr;
+		}
+		if (adapter->if_type == SLI_INTF_TYPE_3) {
+			if (lancer_roce_map_pci_bars(adapter))
+				goto pci_map_err;
+		}
 		return 0;
 	}
 
@@ -3159,14 +3204,19 @@
 	if (addr == NULL)
 		goto pci_map_err;
 	adapter->db = addr;
-
+	if (adapter->sli_family == SKYHAWK_SLI_FAMILY) {
+		adapter->roce_db.size = 4096;
+		adapter->roce_db.io_addr =
+				pci_resource_start(adapter->pdev, db_reg);
+		adapter->roce_db.total_size =
+				pci_resource_len(adapter->pdev, db_reg);
+	}
 	return 0;
 pci_map_err:
 	be_unmap_pci_bars(adapter);
 	return -ENOMEM;
 }
 
-
 static void be_ctrl_cleanup(struct be_adapter *adapter)
 {
 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
@@ -3272,6 +3322,8 @@
 	if (!adapter)
 		return;
 
+	be_roce_dev_remove(adapter);
+
 	unregister_netdev(adapter->netdev);
 
 	be_clear(adapter);
@@ -3350,17 +3402,27 @@
 		break;
 	case BE_DEVICE_ID2:
 	case OC_DEVICE_ID2:
-	case OC_DEVICE_ID5:
 		adapter->generation = BE_GEN3;
 		break;
 	case OC_DEVICE_ID3:
 	case OC_DEVICE_ID4:
 		pci_read_config_dword(pdev, SLI_INTF_REG_OFFSET, &sli_intf);
+		adapter->if_type = (sli_intf & SLI_INTF_IF_TYPE_MASK) >>
+						SLI_INTF_IF_TYPE_SHIFT;
 		if_type = (sli_intf & SLI_INTF_IF_TYPE_MASK) >>
 						SLI_INTF_IF_TYPE_SHIFT;
-
 		if (((sli_intf & SLI_INTF_VALID_MASK) != SLI_INTF_VALID) ||
-			if_type != 0x02) {
+			!be_type_2_3(adapter)) {
+			dev_err(&pdev->dev, "SLI_INTF reg val is not valid\n");
+			return -EINVAL;
+		}
+		adapter->sli_family = ((sli_intf & SLI_INTF_FAMILY_MASK) >>
+					 SLI_INTF_FAMILY_SHIFT);
+		adapter->generation = BE_GEN3;
+		break;
+	case OC_DEVICE_ID5:
+		pci_read_config_dword(pdev, SLI_INTF_REG_OFFSET, &sli_intf);
+		if ((sli_intf & SLI_INTF_VALID_MASK) != SLI_INTF_VALID) {
 			dev_err(&pdev->dev, "SLI_INTF reg val is not valid\n");
 			return -EINVAL;
 		}
@@ -3620,6 +3682,8 @@
 	if (status != 0)
 		goto unsetup;
 
+	be_roce_dev_add(adapter);
+
 	dev_info(&pdev->dev, "%s: %s port %d\n", netdev->name, nic_name(pdev),
 		adapter->port_num);
 
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
new file mode 100644
index 0000000..deecc44
--- /dev/null
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2005 - 2011 Emulex
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation. The full GNU General
+ * Public License is included in this distribution in the file called COPYING.
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ */
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+
+#include "be.h"
+#include "be_cmds.h"
+
+static struct ocrdma_driver *ocrdma_drv;
+static LIST_HEAD(be_adapter_list);
+static DEFINE_MUTEX(be_adapter_list_lock);
+
+static void _be_roce_dev_add(struct be_adapter *adapter)
+{
+	struct be_dev_info dev_info;
+	int i, num_vec;
+	struct pci_dev *pdev = adapter->pdev;
+
+	if (!ocrdma_drv)
+		return;
+	if (pdev->device == OC_DEVICE_ID5) {
+		/* only msix is supported on these devices */
+		if (!msix_enabled(adapter))
+			return;
+		/* DPP region address and length */
+		dev_info.dpp_unmapped_addr = pci_resource_start(pdev, 2);
+		dev_info.dpp_unmapped_len = pci_resource_len(pdev, 2);
+	} else {
+		dev_info.dpp_unmapped_addr = 0;
+		dev_info.dpp_unmapped_len = 0;
+	}
+	dev_info.pdev = adapter->pdev;
+	if (adapter->sli_family == SKYHAWK_SLI_FAMILY)
+		dev_info.db = adapter->db;
+	else
+		dev_info.db = adapter->roce_db.base;
+	dev_info.unmapped_db = adapter->roce_db.io_addr;
+	dev_info.db_page_size = adapter->roce_db.size;
+	dev_info.db_total_size = adapter->roce_db.total_size;
+	dev_info.netdev = adapter->netdev;
+	memcpy(dev_info.mac_addr, adapter->netdev->dev_addr, ETH_ALEN);
+	dev_info.dev_family = adapter->sli_family;
+	if (msix_enabled(adapter)) {
+		/* provide all the vectors, so that EQ creation response
+		 * can decide which one to use.
+		 */
+		num_vec = adapter->num_msix_vec + adapter->num_msix_roce_vec;
+		dev_info.intr_mode = BE_INTERRUPT_MODE_MSIX;
+		dev_info.msix.num_vectors = min(num_vec, MAX_ROCE_MSIX_VECTORS);
+		/* provide start index of the vector,
+		 * so in case of linear usage,
+		 * it can use the base as starting point.
+		 */
+		dev_info.msix.start_vector = adapter->num_evt_qs;
+		for (i = 0; i < dev_info.msix.num_vectors; i++) {
+			dev_info.msix.vector_list[i] =
+			    adapter->msix_entries[i].vector;
+		}
+	} else {
+		dev_info.msix.num_vectors = 0;
+		dev_info.intr_mode = BE_INTERRUPT_MODE_INTX;
+	}
+	adapter->ocrdma_dev = ocrdma_drv->add(&dev_info);
+}
+
+void be_roce_dev_add(struct be_adapter *adapter)
+{
+	if (be_roce_supported(adapter)) {
+		INIT_LIST_HEAD(&adapter->entry);
+		mutex_lock(&be_adapter_list_lock);
+		list_add_tail(&adapter->entry, &be_adapter_list);
+
+		/* invoke add() routine of roce driver only if
+		 * valid driver registered with add method and add() is not yet
+		 * invoked on a given adapter.
+		 */
+		_be_roce_dev_add(adapter);
+		mutex_unlock(&be_adapter_list_lock);
+	}
+}
+
+void _be_roce_dev_remove(struct be_adapter *adapter)
+{
+	if (ocrdma_drv && ocrdma_drv->remove && adapter->ocrdma_dev)
+		ocrdma_drv->remove(adapter->ocrdma_dev);
+	adapter->ocrdma_dev = NULL;
+}
+
+void be_roce_dev_remove(struct be_adapter *adapter)
+{
+	if (be_roce_supported(adapter)) {
+		mutex_lock(&be_adapter_list_lock);
+		_be_roce_dev_remove(adapter);
+		list_del(&adapter->entry);
+		mutex_unlock(&be_adapter_list_lock);
+	}
+}
+
+void _be_roce_dev_open(struct be_adapter *adapter)
+{
+	if (ocrdma_drv && adapter->ocrdma_dev &&
+	    ocrdma_drv->state_change_handler)
+		ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 0);
+}
+
+void be_roce_dev_open(struct be_adapter *adapter)
+{
+	if (be_roce_supported(adapter)) {
+		mutex_lock(&be_adapter_list_lock);
+		_be_roce_dev_open(adapter);
+		mutex_unlock(&be_adapter_list_lock);
+	}
+}
+
+void _be_roce_dev_close(struct be_adapter *adapter)
+{
+	if (ocrdma_drv && adapter->ocrdma_dev &&
+	    ocrdma_drv->state_change_handler)
+		ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 1);
+}
+
+void be_roce_dev_close(struct be_adapter *adapter)
+{
+	if (be_roce_supported(adapter)) {
+		mutex_lock(&be_adapter_list_lock);
+		_be_roce_dev_close(adapter);
+		mutex_unlock(&be_adapter_list_lock);
+	}
+}
+
+int be_roce_register_driver(struct ocrdma_driver *drv)
+{
+	struct be_adapter *dev;
+
+	mutex_lock(&be_adapter_list_lock);
+	if (ocrdma_drv) {
+		mutex_unlock(&be_adapter_list_lock);
+		return -EINVAL;
+	}
+	ocrdma_drv = drv;
+	list_for_each_entry(dev, &be_adapter_list, entry) {
+		struct net_device *netdev;
+		_be_roce_dev_add(dev);
+		netdev = dev->netdev;
+		if (netif_running(netdev) && netif_oper_up(netdev))
+			_be_roce_dev_open(dev);
+	}
+	mutex_unlock(&be_adapter_list_lock);
+	return 0;
+}
+EXPORT_SYMBOL(be_roce_register_driver);
+
+void be_roce_unregister_driver(struct ocrdma_driver *drv)
+{
+	struct be_adapter *dev;
+
+	mutex_lock(&be_adapter_list_lock);
+	list_for_each_entry(dev, &be_adapter_list, entry) {
+		if (dev->ocrdma_dev)
+			_be_roce_dev_remove(dev);
+	}
+	ocrdma_drv = NULL;
+	mutex_unlock(&be_adapter_list_lock);
+}
+EXPORT_SYMBOL(be_roce_unregister_driver);
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
new file mode 100644
index 0000000..db4ea80
--- /dev/null
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2005 - 2011 Emulex
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation. The full GNU General
+ * Public License is included in this distribution in the file called COPYING.
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ */
+
+#ifndef BE_ROCE_H
+#define BE_ROCE_H
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+struct ocrdma_dev;
+
+enum be_interrupt_mode {
+	BE_INTERRUPT_MODE_MSIX	= 0,
+	BE_INTERRUPT_MODE_INTX	= 1,
+	BE_INTERRUPT_MODE_MSI	= 2,
+};
+
+#define MAX_ROCE_MSIX_VECTORS   16
+struct be_dev_info {
+	u8 __iomem *db;
+	u64 unmapped_db;
+	u32 db_page_size;
+	u32 db_total_size;
+	u64 dpp_unmapped_addr;
+	u32 dpp_unmapped_len;
+	struct pci_dev *pdev;
+	struct net_device *netdev;
+	u8 mac_addr[ETH_ALEN];
+	u32 dev_family;
+	enum be_interrupt_mode intr_mode;
+	struct {
+		int num_vectors;
+		int start_vector;
+		u32 vector_list[MAX_ROCE_MSIX_VECTORS];
+	} msix;
+};
+
+/* ocrdma driver register's the callback functions with nic driver. */
+struct ocrdma_driver {
+	unsigned char name[32];
+	struct ocrdma_dev *(*add) (struct be_dev_info *dev_info);
+	void (*remove) (struct ocrdma_dev *);
+	void (*state_change_handler) (struct ocrdma_dev *, u32 new_state);
+};
+
+enum {
+	BE_DEV_UP	= 0,
+	BE_DEV_DOWN	= 1
+};
+
+/* APIs for RoCE driver to register callback handlers,
+ * which will be invoked when device is added, removed, ifup, ifdown
+ */
+int be_roce_register_driver(struct ocrdma_driver *drv);
+void be_roce_unregister_driver(struct ocrdma_driver *drv);
+
+/* API for RoCE driver to issue mailbox commands */
+int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
+		    int wrb_payload_size, u16 *cmd_status, u16 *ext_status);
+
+#endif /* BE_ROCE_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index b513f57..3d81b90 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -160,7 +160,7 @@
 
 typedef u64 __bitwise ib_sa_comp_mask;
 
-#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
+#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n)))
 
 /*
  * ib_sa_hdr and ib_sa_mad structures must be packed because they have
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c3cca5a..07996af 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -605,7 +605,7 @@
 	IB_QPT_UD,
 	IB_QPT_RAW_IPV6,
 	IB_QPT_RAW_ETHERTYPE,
-	/* Save 8 for RAW_PACKET */
+	IB_QPT_RAW_PACKET = 8,
 	IB_QPT_XRC_INI = 9,
 	IB_QPT_XRC_TGT,
 	IB_QPT_MAX
@@ -964,7 +964,7 @@
 	struct ib_srq	       *srq;
 	struct ib_xrcd	       *xrcd; /* XRC TGT QPs only */
 	struct list_head	xrcd_list;
-	atomic_t		usecnt; /* count times opened */
+	atomic_t		usecnt; /* count times opened, mcast attaches */
 	struct list_head	open_list;
 	struct ib_qp           *real_qp;
 	struct ib_uobject      *uobject;