net/smc: add SMC-D support in af_smc

This patch ties together the previous SMC-D patches. It adds support for
SMC-D to the listen and connect functions and, thus, enables SMC-D
support in the SMC code. If a connection supports both SMC-R and SMC-D,
SMC-D is preferred.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 20afa94..cbbb947 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
+#include <linux/if_vlan.h>
 
 #include <net/sock.h>
 #include <net/tcp.h>
@@ -35,6 +36,7 @@
 #include "smc_cdc.h"
 #include "smc_core.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
 #include "smc_pnet.h"
 #include "smc_tx.h"
 #include "smc_rx.h"
@@ -372,8 +374,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 	return 0;
 }
 
-static void smc_conn_save_peer_info(struct smc_sock *smc,
-				    struct smc_clc_msg_accept_confirm *clc)
+static void smcr_conn_save_peer_info(struct smc_sock *smc,
+				     struct smc_clc_msg_accept_confirm *clc)
 {
 	int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
 
@@ -384,6 +386,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
 	smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 }
 
+static void smcd_conn_save_peer_info(struct smc_sock *smc,
+				     struct smc_clc_msg_accept_confirm *clc)
+{
+	int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
+
+	smc->conn.peer_rmbe_idx = clc->dmbe_idx;
+	smc->conn.peer_token = clc->token;
+	/* msg header takes up space in the buffer */
+	smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
+	atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
+	smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
+}
+
+static void smc_conn_save_peer_info(struct smc_sock *smc,
+				    struct smc_clc_msg_accept_confirm *clc)
+{
+	if (smc->conn.lgr->is_smcd)
+		smcd_conn_save_peer_info(smc, clc);
+	else
+		smcr_conn_save_peer_info(smc, clc);
+}
+
 static void smc_link_save_peer_info(struct smc_link *link,
 				    struct smc_clc_msg_accept_confirm *clc)
 {
@@ -450,15 +474,51 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
 	return reason_code;
 }
 
+/* check if there is an ISM device available for this connection. */
+/* called for connect and listen */
+static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+{
+	/* Find ISM device with same PNETID as connecting interface  */
+	smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
+	if (!(*ismdev))
+		return SMC_CLC_DECL_CNFERR; /* configuration error */
+	return 0;
+}
+
+/* Check for VLAN ID and register it on ISM device just for CLC handshake */
+static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
+				      struct smcd_dev *ismdev,
+				      unsigned short vlan_id)
+{
+	if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
+		return SMC_CLC_DECL_CNFERR;
+	return 0;
+}
+
+/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
+ * used, the VLAN ID will be registered again during the connection setup.
+ */
+static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
+					struct smcd_dev *ismdev,
+					unsigned short vlan_id)
+{
+	if (!is_smcd)
+		return 0;
+	if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+		return SMC_CLC_DECL_CNFERR;
+	return 0;
+}
+
 /* CLC handshake during connect */
 static int smc_connect_clc(struct smc_sock *smc, int smc_type,
 			   struct smc_clc_msg_accept_confirm *aclc,
-			   struct smc_ib_device *ibdev, u8 ibport)
+			   struct smc_ib_device *ibdev, u8 ibport,
+			   struct smcd_dev *ismdev)
 {
 	int rc = 0;
 
 	/* do inband token exchange */
-	rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, NULL);
+	rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev);
 	if (rc)
 		return rc;
 	/* receive SMC Accept CLC message */
@@ -538,11 +598,50 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	return 0;
 }
 
+/* setup for ISM connection of client */
+static int smc_connect_ism(struct smc_sock *smc,
+			   struct smc_clc_msg_accept_confirm *aclc,
+			   struct smcd_dev *ismdev)
+{
+	int local_contact = SMC_FIRST_CONTACT;
+	int rc = 0;
+
+	mutex_lock(&smc_create_lgr_pending);
+	local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+					NULL, ismdev, aclc->gid);
+	if (local_contact < 0)
+		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
+
+	/* Create send and receive buffers */
+	if (smc_buf_create(smc, true))
+		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+
+	smc_conn_save_peer_info(smc, aclc);
+	smc_close_init(smc);
+	smc_rx_init(smc);
+	smc_tx_init(smc);
+
+	rc = smc_clc_send_confirm(smc);
+	if (rc)
+		return smc_connect_abort(smc, rc, local_contact);
+	mutex_unlock(&smc_create_lgr_pending);
+
+	smc_copy_sock_settings_to_clc(smc);
+	if (smc->sk.sk_state == SMC_INIT)
+		smc->sk.sk_state = SMC_ACTIVE;
+
+	return 0;
+}
+
 /* perform steps before actually connecting */
 static int __smc_connect(struct smc_sock *smc)
 {
+	bool ism_supported = false, rdma_supported = false;
 	struct smc_clc_msg_accept_confirm aclc;
 	struct smc_ib_device *ibdev;
+	struct smcd_dev *ismdev;
+	unsigned short vlan;
+	int smc_type;
 	int rc = 0;
 	u8 ibport;
 
@@ -559,20 +658,52 @@ static int __smc_connect(struct smc_sock *smc)
 	if (using_ipsec(smc))
 		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 
-	/* check if a RDMA device is available; if not, fall back */
-	if (smc_check_rdma(smc, &ibdev, &ibport))
+	/* check for VLAN ID */
+	if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
+		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+
+	/* check if there is an ism device available */
+	if (!smc_check_ism(smc, &ismdev) &&
+	    !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+		/* ISM is supported for this connection */
+		ism_supported = true;
+		smc_type = SMC_TYPE_D;
+	}
+
+	/* check if there is a rdma device available */
+	if (!smc_check_rdma(smc, &ibdev, &ibport)) {
+		/* RDMA is supported for this connection */
+		rdma_supported = true;
+		if (ism_supported)
+			smc_type = SMC_TYPE_B; /* both */
+		else
+			smc_type = SMC_TYPE_R; /* only RDMA */
+	}
+
+	/* if neither ISM nor RDMA are supported, fallback */
+	if (!rdma_supported && !ism_supported)
 		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
 
 	/* perform CLC handshake */
-	rc = smc_connect_clc(smc, SMC_TYPE_R, &aclc, ibdev, ibport);
-	if (rc)
+	rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev);
+	if (rc) {
+		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 		return smc_connect_decline_fallback(smc, rc);
+	}
 
-	/* connect using rdma */
-	rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
-	if (rc)
+	/* depending on previous steps, connect using rdma or ism */
+	if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
+		rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+	else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
+		rc = smc_connect_ism(smc, &aclc, ismdev);
+	else
+		rc = SMC_CLC_DECL_CNFERR;
+	if (rc) {
+		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 		return smc_connect_decline_fallback(smc, rc);
+	}
 
+	smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 	return 0;
 }
 
@@ -909,6 +1040,44 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
 	return 0;
 }
 
+/* listen worker: initialize connection and buffers for SMC-D */
+static int smc_listen_ism_init(struct smc_sock *new_smc,
+			       struct smc_clc_msg_proposal *pclc,
+			       struct smcd_dev *ismdev,
+			       int *local_contact)
+{
+	struct smc_clc_msg_smcd *pclc_smcd;
+
+	pclc_smcd = smc_get_clc_msg_smcd(pclc);
+	*local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+					 ismdev, pclc_smcd->gid);
+	if (*local_contact < 0) {
+		if (*local_contact == -ENOMEM)
+			return SMC_CLC_DECL_MEM;/* insufficient memory*/
+		return SMC_CLC_DECL_INTERR; /* other error */
+	}
+
+	/* Check if peer can be reached via ISM device */
+	if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
+			    new_smc->conn.lgr->vlan_id,
+			    new_smc->conn.lgr->smcd)) {
+		if (*local_contact == SMC_FIRST_CONTACT)
+			smc_lgr_forget(new_smc->conn.lgr);
+		smc_conn_free(&new_smc->conn);
+		return SMC_CLC_DECL_CNFERR;
+	}
+
+	/* Create send and receive buffers */
+	if (smc_buf_create(new_smc, true)) {
+		if (*local_contact == SMC_FIRST_CONTACT)
+			smc_lgr_forget(new_smc->conn.lgr);
+		smc_conn_free(&new_smc->conn);
+		return SMC_CLC_DECL_MEM;
+	}
+
+	return 0;
+}
+
 /* listen worker: register buffers */
 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 {
@@ -967,6 +1136,8 @@ static void smc_listen_work(struct work_struct *work)
 	struct smc_clc_msg_accept_confirm cclc;
 	struct smc_clc_msg_proposal *pclc;
 	struct smc_ib_device *ibdev;
+	bool ism_supported = false;
+	struct smcd_dev *ismdev;
 	u8 buf[SMC_CLC_MAX_LEN];
 	int local_contact = 0;
 	int reason_code = 0;
@@ -1007,13 +1178,21 @@ static void smc_listen_work(struct work_struct *work)
 	smc_rx_init(new_smc);
 	smc_tx_init(new_smc);
 
+	/* check if ISM is available */
+	if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
+	    !smc_check_ism(new_smc, &ismdev) &&
+	    !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
+		ism_supported = true;
+	}
+
 	/* check if RDMA is available */
-	if ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
-	    smc_check_rdma(new_smc, &ibdev, &ibport) ||
-	    smc_listen_rdma_check(new_smc, pclc) ||
-	    smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
-				 &local_contact) ||
-	    smc_listen_rdma_reg(new_smc, local_contact)) {
+	if (!ism_supported &&
+	    ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
+	     smc_check_rdma(new_smc, &ibdev, &ibport) ||
+	     smc_listen_rdma_check(new_smc, pclc) ||
+	     smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
+				  &local_contact) ||
+	     smc_listen_rdma_reg(new_smc, local_contact))) {
 		/* SMC not supported, decline */
 		mutex_unlock(&smc_create_lgr_pending);
 		smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
@@ -1038,7 +1217,8 @@ static void smc_listen_work(struct work_struct *work)
 	}
 
 	/* finish worker */
-	smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+	if (!ism_supported)
+		smc_listen_rdma_finish(new_smc, &cclc, local_contact);
 	smc_conn_save_peer_info(new_smc, &cclc);
 	mutex_unlock(&smc_create_lgr_pending);
 	smc_listen_out_connected(new_smc);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 434c028..66741e6 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -478,7 +478,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
-static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
+int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 	struct net_device *ndev;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index cd9268a..8b47e01 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -257,6 +257,7 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
+int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
 
 void smc_conn_free(struct smc_connection *conn);
 int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,