Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index aed5ca23..5ea741f 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -31,7 +31,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: user_mad.c 2814 2005-07-06 19:14:09Z halr $
+ * $Id: user_mad.c 4010 2005-11-09 23:11:56Z roland $
  */
 
 #include <linux/module.h>
@@ -110,13 +110,13 @@
 };
 
 struct ib_umad_file {
-	struct ib_umad_port *port;
-	struct list_head     recv_list;
-	struct list_head     port_list;
-	spinlock_t           recv_lock;
-	wait_queue_head_t    recv_wait;
-	struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS];
-	struct ib_mr        *mr[IB_UMAD_MAX_AGENTS];
+	struct ib_umad_port    *port;
+	struct list_head	recv_list;
+	struct list_head	port_list;
+	spinlock_t		recv_lock;
+	wait_queue_head_t	recv_wait;
+	struct ib_mad_agent    *agent[IB_UMAD_MAX_AGENTS];
+	int			agents_dead;
 };
 
 struct ib_umad_packet {
@@ -145,6 +145,12 @@
 	kfree(dev);
 }
 
+/* caller must hold port->mutex at least for reading */
+static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
+{
+	return file->agents_dead ? NULL : file->agent[id];
+}
+
 static int queue_packet(struct ib_umad_file *file,
 			struct ib_mad_agent *agent,
 			struct ib_umad_packet *packet)
@@ -152,10 +158,11 @@
 	int ret = 1;
 
 	down_read(&file->port->mutex);
+
 	for (packet->mad.hdr.id = 0;
 	     packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
 	     packet->mad.hdr.id++)
-		if (agent == file->agent[packet->mad.hdr.id]) {
+		if (agent == __get_agent(file, packet->mad.hdr.id)) {
 			spin_lock_irq(&file->recv_lock);
 			list_add_tail(&packet->list, &file->recv_list);
 			spin_unlock_irq(&file->recv_lock);
@@ -327,7 +334,7 @@
 
 	down_read(&file->port->mutex);
 
-	agent = file->agent[packet->mad.hdr.id];
+	agent = __get_agent(file, packet->mad.hdr.id);
 	if (!agent) {
 		ret = -EINVAL;
 		goto err_up;
@@ -481,7 +488,7 @@
 	}
 
 	for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
-		if (!file->agent[agent_id])
+		if (!__get_agent(file, agent_id))
 			goto found;
 
 	ret = -ENOMEM;
@@ -505,29 +512,15 @@
 		goto out;
 	}
 
-	file->agent[agent_id] = agent;
-
-	file->mr[agent_id] = ib_get_dma_mr(agent->qp->pd, IB_ACCESS_LOCAL_WRITE);
-	if (IS_ERR(file->mr[agent_id])) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
 	if (put_user(agent_id,
 		     (u32 __user *) (arg + offsetof(struct ib_user_mad_reg_req, id)))) {
 		ret = -EFAULT;
-		goto err_mr;
+		ib_unregister_mad_agent(agent);
+		goto out;
 	}
 
+	file->agent[agent_id] = agent;
 	ret = 0;
-	goto out;
-
-err_mr:
-	ib_dereg_mr(file->mr[agent_id]);
-
-err:
-	file->agent[agent_id] = NULL;
-	ib_unregister_mad_agent(agent);
 
 out:
 	up_write(&file->port->mutex);
@@ -536,27 +529,29 @@
 
 static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
 {
+	struct ib_mad_agent *agent = NULL;
 	u32 id;
 	int ret = 0;
 
+	if (get_user(id, (u32 __user *) arg))
+		return -EFAULT;
+
 	down_write(&file->port->mutex);
 
-	if (get_user(id, (u32 __user *) arg)) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !file->agent[id]) {
+	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ib_dereg_mr(file->mr[id]);
-	ib_unregister_mad_agent(file->agent[id]);
+	agent = file->agent[id];
 	file->agent[id] = NULL;
 
 out:
 	up_write(&file->port->mutex);
+
+	if (agent)
+		ib_unregister_mad_agent(agent);
+
 	return ret;
 }
 
@@ -621,23 +616,29 @@
 	struct ib_umad_file *file = filp->private_data;
 	struct ib_umad_device *dev = file->port->umad_dev;
 	struct ib_umad_packet *packet, *tmp;
+	int already_dead;
 	int i;
 
 	down_write(&file->port->mutex);
-	for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
-		if (file->agent[i]) {
-			ib_dereg_mr(file->mr[i]);
-			ib_unregister_mad_agent(file->agent[i]);
-		}
+
+	already_dead = file->agents_dead;
+	file->agents_dead = 1;
 
 	list_for_each_entry_safe(packet, tmp, &file->recv_list, list)
 		kfree(packet);
 
 	list_del(&file->port_list);
-	up_write(&file->port->mutex);
+
+	downgrade_write(&file->port->mutex);
+
+	if (!already_dead)
+		for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i)
+			if (file->agent[i])
+				ib_unregister_mad_agent(file->agent[i]);
+
+	up_read(&file->port->mutex);
 
 	kfree(file);
-
 	kref_put(&dev->ref, ib_umad_release_dev);
 
 	return 0;
@@ -801,7 +802,7 @@
 		goto err_class;
 	port->sm_dev->owner = THIS_MODULE;
 	port->sm_dev->ops   = &umad_sm_fops;
-	kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num);
+	kobject_set_name(&port->sm_dev->kobj, "issm%d", port->dev_num);
 	if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
 		goto err_sm_cdev;
 
@@ -863,14 +864,36 @@
 
 	port->ib_dev = NULL;
 
-	list_for_each_entry(file, &port->file_list, port_list)
-		for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) {
-			if (!file->agent[id])
-				continue;
-			ib_dereg_mr(file->mr[id]);
-			ib_unregister_mad_agent(file->agent[id]);
-			file->agent[id] = NULL;
-		}
+	/*
+	 * Now go through the list of files attached to this port and
+	 * unregister all of their MAD agents.  We need to hold
+	 * port->mutex while doing this to avoid racing with
+	 * ib_umad_close(), but we can't hold the mutex for writing
+	 * while calling ib_unregister_mad_agent(), since that might
+	 * deadlock by calling back into queue_packet().  So we
+	 * downgrade our lock to a read lock, and then drop and
+	 * reacquire the write lock for the next iteration.
+	 *
+	 * We do list_del_init() on the file's list_head so that the
+	 * list_del in ib_umad_close() is still OK, even after the
+	 * file is removed from the list.
+	 */
+	while (!list_empty(&port->file_list)) {
+		file = list_entry(port->file_list.next, struct ib_umad_file,
+				  port_list);
+
+		file->agents_dead = 1;
+		list_del_init(&file->port_list);
+
+		downgrade_write(&port->mutex);
+
+		for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
+			if (file->agent[id])
+				ib_unregister_mad_agent(file->agent[id]);
+
+		up_read(&port->mutex);
+		down_write(&port->mutex);
+	}
 
 	up_write(&port->mutex);
 
@@ -913,7 +936,7 @@
 
 err:
 	while (--i >= s)
-		ib_umad_kill_port(&umad_dev->port[i]);
+		ib_umad_kill_port(&umad_dev->port[i - s]);
 
 	kref_put(&umad_dev->ref, ib_umad_release_dev);
 }
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 63a7415..ed45da8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -708,7 +708,7 @@
 		resp->wc[i].opcode 	   = wc[i].opcode;
 		resp->wc[i].vendor_err 	   = wc[i].vendor_err;
 		resp->wc[i].byte_len 	   = wc[i].byte_len;
-		resp->wc[i].imm_data 	   = wc[i].imm_data;
+		resp->wc[i].imm_data 	   = (__u32 __force) wc[i].imm_data;
 		resp->wc[i].qp_num 	   = wc[i].qp_num;
 		resp->wc[i].src_qp 	   = wc[i].src_qp;
 		resp->wc[i].wc_flags 	   = wc[i].wc_flags;
@@ -908,7 +908,12 @@
 	if (ret)
 		goto err_destroy;
 
-	resp.qp_handle = uobj->uobject.id;
+	resp.qp_handle       = uobj->uobject.id;
+	resp.max_recv_sge    = attr.cap.max_recv_sge;
+	resp.max_send_sge    = attr.cap.max_send_sge;
+	resp.max_recv_wr     = attr.cap.max_recv_wr;
+	resp.max_send_wr     = attr.cap.max_send_wr;
+	resp.max_inline_data = attr.cap.max_inline_data;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
@@ -1135,7 +1140,7 @@
 		next->num_sge    = user_wr->num_sge;
 		next->opcode     = user_wr->opcode;
 		next->send_flags = user_wr->send_flags;
-		next->imm_data   = user_wr->imm_data;
+		next->imm_data   = (__be32 __force) user_wr->imm_data;
 
 		if (qp->qp_type == IB_QPT_UD) {
 			next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
@@ -1701,7 +1706,6 @@
 	}
 
 	attr.max_wr    = cmd.max_wr;
-	attr.max_sge   = cmd.max_sge;
 	attr.srq_limit = cmd.srq_limit;
 
 	ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 4186cc8..4c15e11 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -325,16 +325,8 @@
 int ib_resize_cq(struct ib_cq *cq,
                  int           cqe)
 {
-	int ret;
-
-	if (!cq->device->resize_cq)
-		return -ENOSYS;
-
-	ret = cq->device->resize_cq(cq, &cqe);
-	if (!ret)
-		cq->cqe = cqe;
-
-	return ret;
+	return cq->device->resize_cq ?
+		cq->device->resize_cq(cq, cqe) : -ENOSYS;
 }
 EXPORT_SYMBOL(ib_resize_cq);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 25ebab6..c3bec74 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -97,7 +97,7 @@
 		}
 
 	spin_lock_irqsave(&catas_lock, flags);
-	if (dev->catas_err.stop)
+	if (!dev->catas_err.stop)
 		mod_timer(&dev->catas_err.timer,
 			  jiffies + MTHCA_CATAS_POLL_INTERVAL);
 	spin_unlock_irqrestore(&catas_lock, flags);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 49f211d..9ed3458 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1060,6 +1060,8 @@
 		dev_lim->hca.arbel.resize_srq = field & 1;
 		MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
 		dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
+		MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET);
+		dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz);
 		MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
 		dev_lim->mpt_entry_sz = size;
 		MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index f98e235..4a8adce 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -258,7 +258,7 @@
 {
 	struct mthca_cq *cq;
 	struct mthca_cqe *cqe;
-	int prod_index;
+	u32 prod_index;
 	int nfreed = 0;
 
 	spin_lock_irq(&dev->cq_table.lock);
@@ -293,19 +293,15 @@
 	 * Now sweep backwards through the CQ, removing CQ entries
 	 * that match our QP by copying older entries on top of them.
 	 */
-	while (prod_index > cq->cons_index) {
-		cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe);
+	while ((int) --prod_index - (int) cq->cons_index >= 0) {
+		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
 		if (cqe->my_qpn == cpu_to_be32(qpn)) {
 			if (srq)
 				mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
 			++nfreed;
-		}
-		else if (nfreed)
-			memcpy(get_cqe(cq, (prod_index - 1 + nfreed) &
-				       cq->ibcq.cqe),
-			       cqe,
-			       MTHCA_CQ_ENTRY_SIZE);
-		--prod_index;
+		} else if (nfreed)
+			memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
+			       cqe, MTHCA_CQ_ENTRY_SIZE);
 	}
 
 	if (nfreed) {
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index e7e5d3b..497ff79 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -131,6 +131,7 @@
 	int      max_sg;
 	int      num_qps;
 	int      max_wqes;
+	int	 max_desc_sz;
 	int	 max_qp_init_rdma;
 	int      reserved_qps;
 	int      num_srqs;
@@ -154,6 +155,7 @@
 	int      reserved_mcgs;
 	int      num_pds;
 	int      reserved_pds;
+	u32      page_size_cap;
 	u32      flags;
 	u8       port_width_cap;
 };
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 45c6328..147f248 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -168,6 +168,7 @@
 	mdev->limits.max_srq_wqes       = dev_lim->max_srq_sz;
 	mdev->limits.reserved_srqs      = dev_lim->reserved_srqs;
 	mdev->limits.reserved_eecs      = dev_lim->reserved_eecs;
+	mdev->limits.max_desc_sz        = dev_lim->max_desc_sz;
 	/*
 	 * Subtract 1 from the limit because we need to allocate a
 	 * spare CQE so the HCA HW can tell the difference between an
@@ -181,6 +182,7 @@
 	mdev->limits.reserved_uars      = dev_lim->reserved_uars;
 	mdev->limits.reserved_pds       = dev_lim->reserved_pds;
 	mdev->limits.port_width_cap     = dev_lim->max_port_width;
+	mdev->limits.page_size_cap      = ~(u32) (dev_lim->min_page_sz - 1);
 	mdev->limits.flags              = dev_lim->flags;
 
 	/* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6b01666..4cc7e28 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -90,6 +90,7 @@
 	memcpy(&props->node_guid,      out_mad->data + 12, 8);
 
 	props->max_mr_size         = ~0ull;
+	props->page_size_cap       = mdev->limits.page_size_cap;
 	props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
 	props->max_qp_wr           = mdev->limits.max_wqes;
 	props->max_sge             = mdev->limits.max_sg;
@@ -615,11 +616,11 @@
 		return ERR_PTR(err);
 	}
 
-	init_attr->cap.max_inline_data = 0;
 	init_attr->cap.max_send_wr     = qp->sq.max;
 	init_attr->cap.max_recv_wr     = qp->rq.max;
 	init_attr->cap.max_send_sge    = qp->sq.max_gs;
 	init_attr->cap.max_recv_sge    = qp->rq.max_gs;
+	init_attr->cap.max_inline_data = qp->max_inline_data;
 
 	return &qp->ibqp;
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index bcd4b01..1e73947 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -251,6 +251,7 @@
 	struct mthca_wq        sq;
 	enum ib_sig_type       sq_policy;
 	int                    send_wqe_offset;
+	int                    max_inline_data;
 
 	u64                   *wrid;
 	union mthca_buf	       queue;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 8852ea4..760c418d 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -885,6 +885,48 @@
 	return err;
 }
 
+static void mthca_adjust_qp_caps(struct mthca_dev *dev,
+				 struct mthca_pd *pd,
+				 struct mthca_qp *qp)
+{
+	int max_data_size;
+
+	/*
+	 * Calculate the maximum size of WQE s/g segments, excluding
+	 * the next segment and other non-data segments.
+	 */
+	max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) -
+		sizeof (struct mthca_next_seg);
+
+	switch (qp->transport) {
+	case MLX:
+		max_data_size -= 2 * sizeof (struct mthca_data_seg);
+		break;
+
+	case UD:
+		if (mthca_is_memfree(dev))
+			max_data_size -= sizeof (struct mthca_arbel_ud_seg);
+		else
+			max_data_size -= sizeof (struct mthca_tavor_ud_seg);
+		break;
+
+	default:
+		max_data_size -= sizeof (struct mthca_raddr_seg);
+		break;
+	}
+
+	/* We don't support inline data for kernel QPs (yet). */
+	if (!pd->ibpd.uobject)
+		qp->max_inline_data = 0;
+        else
+		qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE;
+
+	qp->sq.max_gs = max_data_size / sizeof (struct mthca_data_seg);
+	qp->rq.max_gs = (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
+			sizeof (struct mthca_next_seg)) /
+			sizeof (struct mthca_data_seg);
+}
+
 /*
  * Allocate and register buffer for WQEs.  qp->rq.max, sq.max,
  * rq.max_gs and sq.max_gs must all be assigned.
@@ -902,27 +944,53 @@
 	size = sizeof (struct mthca_next_seg) +
 		qp->rq.max_gs * sizeof (struct mthca_data_seg);
 
+	if (size > dev->limits.max_desc_sz)
+		return -EINVAL;
+
 	for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
 	     qp->rq.wqe_shift++)
 		; /* nothing */
 
-	size = sizeof (struct mthca_next_seg) +
-		qp->sq.max_gs * sizeof (struct mthca_data_seg);
+	size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
 	switch (qp->transport) {
 	case MLX:
 		size += 2 * sizeof (struct mthca_data_seg);
 		break;
+
 	case UD:
-		if (mthca_is_memfree(dev))
-			size += sizeof (struct mthca_arbel_ud_seg);
-		else
-			size += sizeof (struct mthca_tavor_ud_seg);
+		size += mthca_is_memfree(dev) ?
+			sizeof (struct mthca_arbel_ud_seg) :
+			sizeof (struct mthca_tavor_ud_seg);
 		break;
+
+	case UC:
+		size += sizeof (struct mthca_raddr_seg);
+		break;
+
+	case RC:
+		size += sizeof (struct mthca_raddr_seg);
+		/*
+		 * An atomic op will require an atomic segment, a
+		 * remote address segment and one scatter entry.
+		 */
+		size = max_t(int, size,
+			     sizeof (struct mthca_atomic_seg) +
+			     sizeof (struct mthca_raddr_seg) +
+			     sizeof (struct mthca_data_seg));
+		break;
+
 	default:
-		/* bind seg is as big as atomic + raddr segs */
-		size += sizeof (struct mthca_bind_seg);
+		break;
 	}
 
+	/* Make sure that we have enough space for a bind request */
+	size = max_t(int, size, sizeof (struct mthca_bind_seg));
+
+	size += sizeof (struct mthca_next_seg);
+
+	if (size > dev->limits.max_desc_sz)
+		return -EINVAL;
+
 	for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
 	     qp->sq.wqe_shift++)
 		; /* nothing */
@@ -1066,6 +1134,8 @@
 		return ret;
 	}
 
+	mthca_adjust_qp_caps(dev, pd, qp);
+
 	/*
 	 * If this is a userspace QP, we're done now.  The doorbells
 	 * will be allocated and buffers will be initialized in
@@ -1486,8 +1556,8 @@
 				}
 
 				wqe += sizeof (struct mthca_atomic_seg);
-				size += sizeof (struct mthca_raddr_seg) / 16 +
-					sizeof (struct mthca_atomic_seg);
+				size += (sizeof (struct mthca_raddr_seg) +
+					 sizeof (struct mthca_atomic_seg)) / 16;
 				break;
 
 			case IB_WR_RDMA_WRITE:
@@ -1637,6 +1707,7 @@
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
+	__be32 doorbell[2];
 	unsigned long flags;
 	int err = 0;
 	int nreq;
@@ -1654,6 +1725,22 @@
 	ind = qp->rq.next_ind;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
+		if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+			nreq = 0;
+
+			doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
+			doorbell[1] = cpu_to_be32(qp->qpn << 8);
+
+			wmb();
+
+			mthca_write64(doorbell,
+				      dev->kar + MTHCA_RECEIVE_DOORBELL,
+				      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+			qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
+			size0 = 0;
+		}
+
 		if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
 			mthca_err(dev, "RQ %06x full (%u head, %u tail,"
 					" %d max, %d nreq)\n", qp->qpn,
@@ -1711,8 +1798,6 @@
 
 out:
 	if (likely(nreq)) {
-		__be32 doorbell[2];
-
 		doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
 		doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
 
@@ -1806,8 +1891,8 @@
 				}
 
 				wqe += sizeof (struct mthca_atomic_seg);
-				size += sizeof (struct mthca_raddr_seg) / 16 +
-					sizeof (struct mthca_atomic_seg);
+				size += (sizeof (struct mthca_raddr_seg) +
+					 sizeof (struct mthca_atomic_seg)) / 16;
 				break;
 
 			case IB_WR_RDMA_READ:
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 26d5161..f7d2342 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -417,6 +417,7 @@
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
+	__be32 doorbell[2];
 	unsigned long flags;
 	int err = 0;
 	int first_ind;
@@ -432,6 +433,25 @@
 	first_ind = srq->first_free;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
+		if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+			nreq = 0;
+
+			doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
+			doorbell[1] = cpu_to_be32(srq->srqn << 8);
+
+			/*
+			 * Make sure that descriptors are written
+			 * before doorbell is rung.
+			 */
+			wmb();
+
+			mthca_write64(doorbell,
+				      dev->kar + MTHCA_RECEIVE_DOORBELL,
+				      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+			first_ind = srq->first_free;
+		}
+
 		ind = srq->first_free;
 
 		if (ind < 0) {
@@ -494,8 +514,6 @@
 	}
 
 	if (likely(nreq)) {
-		__be32 doorbell[2];
-
 		doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift);
 		doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
index 1f4c0ff..73f1c0b 100644
--- a/drivers/infiniband/hw/mthca/mthca_wqe.h
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -49,7 +49,8 @@
 };
 
 enum {
-	MTHCA_INVAL_LKEY = 0x100
+	MTHCA_INVAL_LKEY			= 0x100,
+	MTHCA_TAVOR_MAX_WQES_PER_RECV_DB	= 256
 };
 
 struct mthca_next_seg {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 0095acc..9923a15 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -179,6 +179,7 @@
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 	struct list_head fs_list;
 	struct dentry *mcg_dentry;
+	struct dentry *path_dentry;
 #endif
 };
 
@@ -270,7 +271,6 @@
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev);
-void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter);
 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter);
 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
 				  union ib_gid *gid,
@@ -278,6 +278,11 @@
 				  unsigned int *queuelen,
 				  unsigned int *complete,
 				  unsigned int *send_only);
+
+struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev);
+int ipoib_path_iter_next(struct ipoib_path_iter *iter);
+void ipoib_path_iter_read(struct ipoib_path_iter *iter,
+			  struct ipoib_path *path);
 #endif
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
@@ -299,13 +304,13 @@
 int ipoib_pkey_dev_delay_open(struct net_device *dev);
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
-int ipoib_create_debug_file(struct net_device *dev);
-void ipoib_delete_debug_file(struct net_device *dev);
+void ipoib_create_debug_files(struct net_device *dev);
+void ipoib_delete_debug_files(struct net_device *dev);
 int ipoib_register_debugfs(void);
 void ipoib_unregister_debugfs(void);
 #else
-static inline int ipoib_create_debug_file(struct net_device *dev) { return 0; }
-static inline void ipoib_delete_debug_file(struct net_device *dev) { }
+static inline void ipoib_create_debug_files(struct net_device *dev) { }
+static inline void ipoib_delete_debug_files(struct net_device *dev) { }
 static inline int ipoib_register_debugfs(void) { return 0; }
 static inline void ipoib_unregister_debugfs(void) { }
 #endif
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 38b150f..685258e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -43,6 +43,18 @@
 
 static struct dentry *ipoib_root;
 
+static void format_gid(union ib_gid *gid, char *buf)
+{
+	int i, n;
+
+	for (n = 0, i = 0; i < 8; ++i) {
+		n += sprintf(buf + n, "%x",
+			     be16_to_cpu(((__be16 *) gid->raw)[i]));
+		if (i < 7)
+			buf[n++] = ':';
+	}
+}
+
 static void *ipoib_mcg_seq_start(struct seq_file *file, loff_t *pos)
 {
 	struct ipoib_mcast_iter *iter;
@@ -54,7 +66,7 @@
 
 	while (n--) {
 		if (ipoib_mcast_iter_next(iter)) {
-			ipoib_mcast_iter_free(iter);
+			kfree(iter);
 			return NULL;
 		}
 	}
@@ -70,7 +82,7 @@
 	(*pos)++;
 
 	if (ipoib_mcast_iter_next(iter)) {
-		ipoib_mcast_iter_free(iter);
+		kfree(iter);
 		return NULL;
 	}
 
@@ -87,32 +99,32 @@
 	struct ipoib_mcast_iter *iter = iter_ptr;
 	char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
 	union ib_gid mgid;
-	int i, n;
 	unsigned long created;
 	unsigned int queuelen, complete, send_only;
 
-	if (iter) {
-		ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen,
-				      &complete, &send_only);
+	if (!iter)
+		return 0;
 
-		for (n = 0, i = 0; i < sizeof mgid / 2; ++i) {
-			n += sprintf(gid_buf + n, "%x",
-				     be16_to_cpu(((__be16 *) mgid.raw)[i]));
-			if (i < sizeof mgid / 2 - 1)
-				gid_buf[n++] = ':';
-		}
-	}
+	ipoib_mcast_iter_read(iter, &mgid, &created, &queuelen,
+			      &complete, &send_only);
 
-	seq_printf(file, "GID: %*s", -(1 + (int) sizeof gid_buf), gid_buf);
+	format_gid(&mgid, gid_buf);
 
 	seq_printf(file,
-		   " created: %10ld queuelen: %4d complete: %d send_only: %d\n",
-		   created, queuelen, complete, send_only);
+		   "GID: %s\n"
+		   "  created: %10ld\n"
+		   "  queuelen: %9d\n"
+		   "  complete: %9s\n"
+		   "  send_only: %8s\n"
+		   "\n",
+		   gid_buf, created, queuelen,
+		   complete ? "yes" : "no",
+		   send_only ? "yes" : "no");
 
 	return 0;
 }
 
-static struct seq_operations ipoib_seq_ops = {
+static struct seq_operations ipoib_mcg_seq_ops = {
 	.start = ipoib_mcg_seq_start,
 	.next  = ipoib_mcg_seq_next,
 	.stop  = ipoib_mcg_seq_stop,
@@ -124,7 +136,7 @@
 	struct seq_file *seq;
 	int ret;
 
-	ret = seq_open(file, &ipoib_seq_ops);
+	ret = seq_open(file, &ipoib_mcg_seq_ops);
 	if (ret)
 		return ret;
 
@@ -134,7 +146,7 @@
 	return 0;
 }
 
-static struct file_operations ipoib_fops = {
+static struct file_operations ipoib_mcg_fops = {
 	.owner   = THIS_MODULE,
 	.open    = ipoib_mcg_open,
 	.read    = seq_read,
@@ -142,25 +154,138 @@
 	.release = seq_release
 };
 
-int ipoib_create_debug_file(struct net_device *dev)
+static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	char name[IFNAMSIZ + sizeof "_mcg"];
+	struct ipoib_path_iter *iter;
+	loff_t n = *pos;
 
-	snprintf(name, sizeof name, "%s_mcg", dev->name);
+	iter = ipoib_path_iter_init(file->private);
+	if (!iter)
+		return NULL;
 
-	priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
-					       ipoib_root, dev, &ipoib_fops);
+	while (n--) {
+		if (ipoib_path_iter_next(iter)) {
+			kfree(iter);
+			return NULL;
+		}
+	}
 
-	return priv->mcg_dentry ? 0 : -ENOMEM;
+	return iter;
 }
 
-void ipoib_delete_debug_file(struct net_device *dev)
+static void *ipoib_path_seq_next(struct seq_file *file, void *iter_ptr,
+				   loff_t *pos)
+{
+	struct ipoib_path_iter *iter = iter_ptr;
+
+	(*pos)++;
+
+	if (ipoib_path_iter_next(iter)) {
+		kfree(iter);
+		return NULL;
+	}
+
+	return iter;
+}
+
+static void ipoib_path_seq_stop(struct seq_file *file, void *iter_ptr)
+{
+	/* nothing for now */
+}
+
+static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
+{
+	struct ipoib_path_iter *iter = iter_ptr;
+	char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
+	struct ipoib_path path;
+	int rate;
+
+	if (!iter)
+		return 0;
+
+	ipoib_path_iter_read(iter, &path);
+
+	format_gid(&path.pathrec.dgid, gid_buf);
+
+	seq_printf(file,
+		   "GID: %s\n"
+		   "  complete: %6s\n",
+		   gid_buf, path.pathrec.dlid ? "yes" : "no");
+
+	if (path.pathrec.dlid) {
+		rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25;
+
+		seq_printf(file,
+			   "  DLID:     0x%04x\n"
+			   "  SL: %12d\n"
+			   "  rate: %*d%s Gb/sec\n",
+			   be16_to_cpu(path.pathrec.dlid),
+			   path.pathrec.sl,
+			   10 - ((rate % 10) ? 2 : 0),
+			   rate / 10, rate % 10 ? ".5" : "");
+	}
+
+	seq_putc(file, '\n');
+
+	return 0;
+}
+
+static struct seq_operations ipoib_path_seq_ops = {
+	.start = ipoib_path_seq_start,
+	.next  = ipoib_path_seq_next,
+	.stop  = ipoib_path_seq_stop,
+	.show  = ipoib_path_seq_show,
+};
+
+static int ipoib_path_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &ipoib_path_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->u.generic_ip;
+
+	return 0;
+}
+
+static struct file_operations ipoib_path_fops = {
+	.owner   = THIS_MODULE,
+	.open    = ipoib_path_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+void ipoib_create_debug_files(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	char name[IFNAMSIZ + sizeof "_path"];
+
+	snprintf(name, sizeof name, "%s_mcg", dev->name);
+	priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
+					       ipoib_root, dev, &ipoib_mcg_fops);
+	if (!priv->mcg_dentry)
+		ipoib_warn(priv, "failed to create mcg debug file\n");
+
+	snprintf(name, sizeof name, "%s_path", dev->name);
+	priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
+						ipoib_root, dev, &ipoib_path_fops);
+	if (!priv->path_dentry)
+		ipoib_warn(priv, "failed to create path debug file\n");
+}
+
+void ipoib_delete_debug_files(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	if (priv->mcg_dentry)
 		debugfs_remove(priv->mcg_dentry);
+	if (priv->path_dentry)
+		debugfs_remove(priv->path_dentry);
 }
 
 int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ce02962..2fa3075 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -58,6 +58,11 @@
 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 #endif
 
+struct ipoib_path_iter {
+	struct net_device *dev;
+	struct ipoib_path  path;
+};
+
 static const u8 ipv4_bcast_addr[] = {
 	0x00, 0xff, 0xff, 0xff,
 	0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
@@ -250,6 +255,64 @@
 	kfree(path);
 }
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+
+struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
+{
+	struct ipoib_path_iter *iter;
+
+	iter = kmalloc(sizeof *iter, GFP_KERNEL);
+	if (!iter)
+		return NULL;
+
+	iter->dev = dev;
+	memset(iter->path.pathrec.dgid.raw, 0, 16);
+
+	if (ipoib_path_iter_next(iter)) {
+		kfree(iter);
+		return NULL;
+	}
+
+	return iter;
+}
+
+int ipoib_path_iter_next(struct ipoib_path_iter *iter)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
+	struct rb_node *n;
+	struct ipoib_path *path;
+	int ret = 1;
+
+	spin_lock_irq(&priv->lock);
+
+	n = rb_first(&priv->path_tree);
+
+	while (n) {
+		path = rb_entry(n, struct ipoib_path, rb_node);
+
+		if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
+			   sizeof (union ib_gid)) < 0) {
+			iter->path = *path;
+			ret = 0;
+			break;
+		}
+
+		n = rb_next(n);
+	}
+
+	spin_unlock_irq(&priv->lock);
+
+	return ret;
+}
+
+void ipoib_path_iter_read(struct ipoib_path_iter *iter,
+			  struct ipoib_path *path)
+{
+	*path = iter->path;
+}
+
+#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
+
 void ipoib_flush_paths(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -763,7 +826,7 @@
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
 
-	ipoib_delete_debug_file(dev);
+	ipoib_delete_debug_files(dev);
 
 	/* Delete any child interfaces first */
 	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
@@ -972,8 +1035,7 @@
 		goto register_failed;
 	}
 
-	if (ipoib_create_debug_file(priv->dev))
-		goto debug_failed;
+	ipoib_create_debug_files(priv->dev);
 
 	if (ipoib_add_pkey_attr(priv->dev))
 		goto sysfs_failed;
@@ -987,9 +1049,7 @@
 	return priv->dev;
 
 sysfs_failed:
-	ipoib_delete_debug_file(priv->dev);
-
-debug_failed:
+	ipoib_delete_debug_files(priv->dev);
 	unregister_netdev(priv->dev);
 
 register_failed:
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 3ecf78a..c33ed87 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -120,12 +120,8 @@
 	if (mcast->ah)
 		ipoib_put_ah(mcast->ah);
 
-	while (!skb_queue_empty(&mcast->pkt_queue)) {
-		struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
-
-		skb->dev = dev;
-		dev_kfree_skb_any(skb);
-	}
+	while (!skb_queue_empty(&mcast->pkt_queue))
+		dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
 
 	kfree(mcast);
 }
@@ -317,13 +313,8 @@
 					IPOIB_GID_ARG(mcast->mcmember.mgid), status);
 
 		/* Flush out any queued packets */
-		while (!skb_queue_empty(&mcast->pkt_queue)) {
-			struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
-
-			skb->dev = dev;
-
-			dev_kfree_skb_any(skb);
-		}
+		while (!skb_queue_empty(&mcast->pkt_queue))
+			dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
 
 		/* Clear the busy flag so we try again */
 		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
@@ -928,21 +919,16 @@
 		return NULL;
 
 	iter->dev = dev;
-	memset(iter->mgid.raw, 0, sizeof iter->mgid);
+	memset(iter->mgid.raw, 0, 16);
 
 	if (ipoib_mcast_iter_next(iter)) {
-		ipoib_mcast_iter_free(iter);
+		kfree(iter);
 		return NULL;
 	}
 
 	return iter;
 }
 
-void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter)
-{
-	kfree(iter);
-}
-
 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 332d730..d280b34 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -113,8 +113,7 @@
 
 	priv->parent = ppriv->dev;
 
-	if (ipoib_create_debug_file(priv->dev))
-		goto debug_failed;
+	ipoib_create_debug_files(priv->dev);
 
 	if (ipoib_add_pkey_attr(priv->dev))
 		goto sysfs_failed;
@@ -130,9 +129,7 @@
 	return 0;
 
 sysfs_failed:
-	ipoib_delete_debug_file(priv->dev);
-
-debug_failed:
+	ipoib_delete_debug_files(priv->dev);
 	unregister_netdev(priv->dev);
 
 register_failed:
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 072f3a2..5ff1490 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -43,7 +43,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IB_USER_VERBS_ABI_VERSION	3
+#define IB_USER_VERBS_ABI_VERSION	4
 
 enum {
 	IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -333,6 +333,11 @@
 struct ib_uverbs_create_qp_resp {
 	__u32 qp_handle;
 	__u32 qpn;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
 };
 
 /*
@@ -552,9 +557,7 @@
 	__u32 srq_handle;
 	__u32 attr_mask;
 	__u32 max_wr;
-	__u32 max_sge;
 	__u32 srq_limit;
-	__u32 reserved;
 	__u64 driver_data[0];
 };
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index f72d46d..a7f4c35 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -881,7 +881,7 @@
 						struct ib_ucontext *context,
 						struct ib_udata *udata);
 	int                        (*destroy_cq)(struct ib_cq *cq);
-	int                        (*resize_cq)(struct ib_cq *cq, int *cqe);
+	int                        (*resize_cq)(struct ib_cq *cq, int cqe);
 	int                        (*poll_cq)(struct ib_cq *cq, int num_entries,
 					      struct ib_wc *wc);
 	int                        (*peek_cq)(struct ib_cq *cq, int wc_cnt);