net/smc: use separate memory regions for RMBs

SMC currently uses the unsafe_global_rkey of the protection domain,
which exposes all memory for remote reads and writes once a connection
is established. This patch introduces separate memory regions with
separate rkeys for every RMB. Now the unsafe_global_rkey of the
protection domain is no longer needed.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 15cb760..3934913 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -204,7 +204,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(cclc.qpn, link->roce_qp->qp_num);
 	cclc.rmb_rkey =
-		htonl(link->roce_pd->unsafe_global_rkey);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
 	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
@@ -256,7 +256,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(aclc.qpn, link->roce_qp->qp_num);
 	aclc.rmb_rkey =
-		htonl(link->roce_pd->unsafe_global_rkey);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
 	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	aclc.qp_mtu = link->path_mtu;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bfdbda7..f1dd4e1 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -218,6 +218,7 @@ static void smc_sndbuf_unuse(struct smc_connection *conn)
 static void smc_rmb_unuse(struct smc_connection *conn)
 {
 	if (conn->rmb_desc) {
+		conn->rmb_desc->reused = true;
 		conn->rmb_desc->used = 0;
 		conn->rmbe_size = 0;
 	}
@@ -274,6 +275,8 @@ static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
 		list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
 					 list) {
 			list_del(&rmb_desc->list);
+			smc_ib_put_memory_region(
+					rmb_desc->mr_rx[SMC_SINGLE_LINK]);
 			smc_ib_buf_unmap_sg(lnk->smcibdev, rmb_desc,
 					    DMA_FROM_DEVICE);
 			kfree(rmb_desc->cpu_addr);
@@ -627,6 +630,21 @@ int smc_rmb_create(struct smc_sock *smc)
 			rmb_desc = NULL;
 			continue; /* if mapping failed, try smaller one */
 		}
+		rc = smc_ib_get_memory_region(lgr->lnk[SMC_SINGLE_LINK].roce_pd,
+					      IB_ACCESS_REMOTE_WRITE |
+					      IB_ACCESS_LOCAL_WRITE,
+					      rmb_desc);
+		if (rc) {
+			smc_ib_buf_unmap_sg(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+					    rmb_desc, DMA_FROM_DEVICE);
+			sg_free_table(&rmb_desc->sgt[SMC_SINGLE_LINK]);
+			free_pages((unsigned long)rmb_desc->cpu_addr,
+				   rmb_desc->order);
+			kfree(rmb_desc);
+			rmb_desc = NULL;
+			continue;
+		}
+
 		rmb_desc->used = 1;
 		write_lock_bh(&lgr->rmbs_lock);
 		list_add(&rmb_desc->list, &lgr->rmbs[bufsize_short]);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 0ee450d..17b5fea 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -94,8 +94,13 @@ struct smc_buf_desc {
 						/* mapped address of buffer */
 	void			*cpu_addr;	/* virtual address of buffer */
 	struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
+	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+						/* for rmb only: memory region
+						 * incl. rkey provided to peer
+						 */
 	u32			order;		/* allocation order */
 	u32			used;		/* currently used / unused */
+	bool			reused;		/* new created / reused */
 };
 
 struct smc_rtoken {				/* address/key of remote RMB */
@@ -175,5 +180,4 @@ int smc_sndbuf_create(struct smc_sock *smc);
 int smc_rmb_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
-
 #endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index fcfeb89..0823349 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -192,8 +192,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 {
 	int rc;
 
-	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
-				   IB_PD_UNSAFE_GLOBAL_RKEY);
+	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
 	rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
 	if (IS_ERR(lnk->roce_pd))
 		lnk->roce_pd = NULL;
@@ -254,6 +253,48 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
 	return rc;
 }
 
+void smc_ib_put_memory_region(struct ib_mr *mr)
+{
+	ib_dereg_mr(mr);
+}
+
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+{
+	unsigned int offset = 0;
+	int sg_num;
+
+	/* map the largest prefix of a dma mapped SG list */
+	sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
+			      buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+			      buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+			      &offset, PAGE_SIZE);
+
+	return sg_num;
+}
+
+/* Allocate a memory region and map the dma mapped SG list of buf_slot */
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot)
+{
+	if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+		return 0; /* already done */
+
+	buf_slot->mr_rx[SMC_SINGLE_LINK] =
+		ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
+	if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+		int rc;
+
+		rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
+		buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+		return rc;
+	}
+
+	if (smc_ib_map_mr_sg(buf_slot) != 1)
+		return -EINVAL;
+
+	return 0;
+}
+
 /* map a new TX or RX buffer to DMA */
 int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
 		   struct smc_buf_desc *buf_slot,
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index b30e387..b57d29f 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -71,6 +71,7 @@ int smc_ib_ready_link(struct smc_link *lnk);
 int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
-
-
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot);
+void smc_ib_put_memory_region(struct ib_mr *mr);
 #endif