Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* Copyright (c) 2018, Intel Corporation. */ |
| 3 | |
| 4 | /* This provides a net_failover interface for paravirtual drivers to |
| 5 | * provide an alternate datapath by exporting APIs to create and |
| 6 | * destroy a upper 'net_failover' netdev. The upper dev manages the |
| 7 | * original paravirtual interface as a 'standby' netdev and uses the |
| 8 | * generic failover infrastructure to register and manage a direct |
| 9 | * attached VF as a 'primary' netdev. This enables live migration of |
| 10 | * a VM with direct attached VF by failing over to the paravirtual |
| 11 | * datapath when the VF is unplugged. |
| 12 | * |
| 13 | * Some of the netdev management routines are based on bond/team driver as |
| 14 | * this driver provides active-backup functionality similar to those drivers. |
| 15 | */ |
| 16 | |
| 17 | #include <linux/netdevice.h> |
| 18 | #include <linux/etherdevice.h> |
| 19 | #include <linux/ethtool.h> |
| 20 | #include <linux/module.h> |
| 21 | #include <linux/slab.h> |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 22 | #include <linux/netpoll.h> |
| 23 | #include <linux/rtnetlink.h> |
| 24 | #include <linux/if_vlan.h> |
| 25 | #include <linux/pci.h> |
| 26 | #include <net/sch_generic.h> |
| 27 | #include <uapi/linux/if_arp.h> |
| 28 | #include <net/net_failover.h> |
| 29 | |
| 30 | static bool net_failover_xmit_ready(struct net_device *dev) |
| 31 | { |
| 32 | return netif_running(dev) && netif_carrier_ok(dev); |
| 33 | } |
| 34 | |
| 35 | static int net_failover_open(struct net_device *dev) |
| 36 | { |
| 37 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 38 | struct net_device *primary_dev, *standby_dev; |
| 39 | int err; |
| 40 | |
| 41 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 42 | if (primary_dev) { |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 43 | err = dev_open(primary_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 44 | if (err) |
| 45 | goto err_primary_open; |
| 46 | } |
| 47 | |
| 48 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 49 | if (standby_dev) { |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 50 | err = dev_open(standby_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 51 | if (err) |
| 52 | goto err_standby_open; |
| 53 | } |
| 54 | |
| 55 | if ((primary_dev && net_failover_xmit_ready(primary_dev)) || |
| 56 | (standby_dev && net_failover_xmit_ready(standby_dev))) { |
| 57 | netif_carrier_on(dev); |
| 58 | netif_tx_wake_all_queues(dev); |
| 59 | } |
| 60 | |
| 61 | return 0; |
| 62 | |
| 63 | err_standby_open: |
| 64 | dev_close(primary_dev); |
| 65 | err_primary_open: |
| 66 | netif_tx_disable(dev); |
| 67 | return err; |
| 68 | } |
| 69 | |
| 70 | static int net_failover_close(struct net_device *dev) |
| 71 | { |
| 72 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 73 | struct net_device *slave_dev; |
| 74 | |
| 75 | netif_tx_disable(dev); |
| 76 | |
| 77 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
| 78 | if (slave_dev) |
| 79 | dev_close(slave_dev); |
| 80 | |
| 81 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
| 82 | if (slave_dev) |
| 83 | dev_close(slave_dev); |
| 84 | |
| 85 | return 0; |
| 86 | } |
| 87 | |
| 88 | static netdev_tx_t net_failover_drop_xmit(struct sk_buff *skb, |
| 89 | struct net_device *dev) |
| 90 | { |
| 91 | atomic_long_inc(&dev->tx_dropped); |
| 92 | dev_kfree_skb_any(skb); |
| 93 | return NETDEV_TX_OK; |
| 94 | } |
| 95 | |
| 96 | static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb, |
| 97 | struct net_device *dev) |
| 98 | { |
| 99 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 100 | struct net_device *xmit_dev; |
| 101 | |
| 102 | /* Try xmit via primary netdev followed by standby netdev */ |
| 103 | xmit_dev = rcu_dereference_bh(nfo_info->primary_dev); |
| 104 | if (!xmit_dev || !net_failover_xmit_ready(xmit_dev)) { |
| 105 | xmit_dev = rcu_dereference_bh(nfo_info->standby_dev); |
| 106 | if (!xmit_dev || !net_failover_xmit_ready(xmit_dev)) |
| 107 | return net_failover_drop_xmit(skb, dev); |
| 108 | } |
| 109 | |
| 110 | skb->dev = xmit_dev; |
| 111 | skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; |
| 112 | |
| 113 | return dev_queue_xmit(skb); |
| 114 | } |
| 115 | |
| 116 | static u16 net_failover_select_queue(struct net_device *dev, |
Alexander Duyck | 4f49dec | 2018-07-09 12:19:59 -0400 | [diff] [blame] | 117 | struct sk_buff *skb, |
Paolo Abeni | a350ecc | 2019-03-20 11:02:06 +0100 | [diff] [blame] | 118 | struct net_device *sb_dev) |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 119 | { |
| 120 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 121 | struct net_device *primary_dev; |
| 122 | u16 txq; |
| 123 | |
| 124 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 125 | if (primary_dev) { |
| 126 | const struct net_device_ops *ops = primary_dev->netdev_ops; |
| 127 | |
| 128 | if (ops->ndo_select_queue) |
Paolo Abeni | a350ecc | 2019-03-20 11:02:06 +0100 | [diff] [blame] | 129 | txq = ops->ndo_select_queue(primary_dev, skb, sb_dev); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 130 | else |
Paolo Abeni | a350ecc | 2019-03-20 11:02:06 +0100 | [diff] [blame] | 131 | txq = netdev_pick_tx(primary_dev, skb, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 132 | |
| 133 | qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; |
| 134 | |
| 135 | return txq; |
| 136 | } |
| 137 | |
| 138 | txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; |
| 139 | |
| 140 | /* Save the original txq to restore before passing to the driver */ |
| 141 | qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; |
| 142 | |
| 143 | if (unlikely(txq >= dev->real_num_tx_queues)) { |
| 144 | do { |
| 145 | txq -= dev->real_num_tx_queues; |
| 146 | } while (txq >= dev->real_num_tx_queues); |
| 147 | } |
| 148 | |
| 149 | return txq; |
| 150 | } |
| 151 | |
| 152 | /* fold stats, assuming all rtnl_link_stats64 fields are u64, but |
| 153 | * that some drivers can provide 32bit values only. |
| 154 | */ |
| 155 | static void net_failover_fold_stats(struct rtnl_link_stats64 *_res, |
| 156 | const struct rtnl_link_stats64 *_new, |
| 157 | const struct rtnl_link_stats64 *_old) |
| 158 | { |
| 159 | const u64 *new = (const u64 *)_new; |
| 160 | const u64 *old = (const u64 *)_old; |
| 161 | u64 *res = (u64 *)_res; |
| 162 | int i; |
| 163 | |
| 164 | for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) { |
| 165 | u64 nv = new[i]; |
| 166 | u64 ov = old[i]; |
| 167 | s64 delta = nv - ov; |
| 168 | |
| 169 | /* detects if this particular field is 32bit only */ |
| 170 | if (((nv | ov) >> 32) == 0) |
| 171 | delta = (s64)(s32)((u32)nv - (u32)ov); |
| 172 | |
| 173 | /* filter anomalies, some drivers reset their stats |
| 174 | * at down/up events. |
| 175 | */ |
| 176 | if (delta > 0) |
| 177 | res[i] += delta; |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | static void net_failover_get_stats(struct net_device *dev, |
| 182 | struct rtnl_link_stats64 *stats) |
| 183 | { |
| 184 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 185 | const struct rtnl_link_stats64 *new; |
| 186 | struct rtnl_link_stats64 temp; |
| 187 | struct net_device *slave_dev; |
| 188 | |
| 189 | spin_lock(&nfo_info->stats_lock); |
| 190 | memcpy(stats, &nfo_info->failover_stats, sizeof(*stats)); |
| 191 | |
| 192 | rcu_read_lock(); |
| 193 | |
| 194 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
| 195 | if (slave_dev) { |
| 196 | new = dev_get_stats(slave_dev, &temp); |
| 197 | net_failover_fold_stats(stats, new, &nfo_info->primary_stats); |
| 198 | memcpy(&nfo_info->primary_stats, new, sizeof(*new)); |
| 199 | } |
| 200 | |
| 201 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
| 202 | if (slave_dev) { |
| 203 | new = dev_get_stats(slave_dev, &temp); |
| 204 | net_failover_fold_stats(stats, new, &nfo_info->standby_stats); |
| 205 | memcpy(&nfo_info->standby_stats, new, sizeof(*new)); |
| 206 | } |
| 207 | |
| 208 | rcu_read_unlock(); |
| 209 | |
| 210 | memcpy(&nfo_info->failover_stats, stats, sizeof(*stats)); |
| 211 | spin_unlock(&nfo_info->stats_lock); |
| 212 | } |
| 213 | |
| 214 | static int net_failover_change_mtu(struct net_device *dev, int new_mtu) |
| 215 | { |
| 216 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 217 | struct net_device *primary_dev, *standby_dev; |
| 218 | int ret = 0; |
| 219 | |
Stephen Hemminger | 3260155 | 2018-07-27 13:43:21 -0700 | [diff] [blame] | 220 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 221 | if (primary_dev) { |
| 222 | ret = dev_set_mtu(primary_dev, new_mtu); |
| 223 | if (ret) |
| 224 | return ret; |
| 225 | } |
| 226 | |
Stephen Hemminger | 3260155 | 2018-07-27 13:43:21 -0700 | [diff] [blame] | 227 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 228 | if (standby_dev) { |
| 229 | ret = dev_set_mtu(standby_dev, new_mtu); |
| 230 | if (ret) { |
| 231 | if (primary_dev) |
| 232 | dev_set_mtu(primary_dev, dev->mtu); |
| 233 | return ret; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | dev->mtu = new_mtu; |
| 238 | |
| 239 | return 0; |
| 240 | } |
| 241 | |
| 242 | static void net_failover_set_rx_mode(struct net_device *dev) |
| 243 | { |
| 244 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 245 | struct net_device *slave_dev; |
| 246 | |
| 247 | rcu_read_lock(); |
| 248 | |
| 249 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
| 250 | if (slave_dev) { |
| 251 | dev_uc_sync_multiple(slave_dev, dev); |
| 252 | dev_mc_sync_multiple(slave_dev, dev); |
| 253 | } |
| 254 | |
| 255 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
| 256 | if (slave_dev) { |
| 257 | dev_uc_sync_multiple(slave_dev, dev); |
| 258 | dev_mc_sync_multiple(slave_dev, dev); |
| 259 | } |
| 260 | |
| 261 | rcu_read_unlock(); |
| 262 | } |
| 263 | |
| 264 | static int net_failover_vlan_rx_add_vid(struct net_device *dev, __be16 proto, |
| 265 | u16 vid) |
| 266 | { |
| 267 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 268 | struct net_device *primary_dev, *standby_dev; |
| 269 | int ret = 0; |
| 270 | |
| 271 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 272 | if (primary_dev) { |
| 273 | ret = vlan_vid_add(primary_dev, proto, vid); |
| 274 | if (ret) |
| 275 | return ret; |
| 276 | } |
| 277 | |
| 278 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
| 279 | if (standby_dev) { |
| 280 | ret = vlan_vid_add(standby_dev, proto, vid); |
| 281 | if (ret) |
| 282 | if (primary_dev) |
| 283 | vlan_vid_del(primary_dev, proto, vid); |
| 284 | } |
| 285 | |
| 286 | return ret; |
| 287 | } |
| 288 | |
| 289 | static int net_failover_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, |
| 290 | u16 vid) |
| 291 | { |
| 292 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 293 | struct net_device *slave_dev; |
| 294 | |
| 295 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
| 296 | if (slave_dev) |
| 297 | vlan_vid_del(slave_dev, proto, vid); |
| 298 | |
| 299 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
| 300 | if (slave_dev) |
| 301 | vlan_vid_del(slave_dev, proto, vid); |
| 302 | |
| 303 | return 0; |
| 304 | } |
| 305 | |
| 306 | static const struct net_device_ops failover_dev_ops = { |
| 307 | .ndo_open = net_failover_open, |
| 308 | .ndo_stop = net_failover_close, |
| 309 | .ndo_start_xmit = net_failover_start_xmit, |
| 310 | .ndo_select_queue = net_failover_select_queue, |
| 311 | .ndo_get_stats64 = net_failover_get_stats, |
| 312 | .ndo_change_mtu = net_failover_change_mtu, |
| 313 | .ndo_set_rx_mode = net_failover_set_rx_mode, |
| 314 | .ndo_vlan_rx_add_vid = net_failover_vlan_rx_add_vid, |
| 315 | .ndo_vlan_rx_kill_vid = net_failover_vlan_rx_kill_vid, |
| 316 | .ndo_validate_addr = eth_validate_addr, |
| 317 | .ndo_features_check = passthru_features_check, |
| 318 | }; |
| 319 | |
| 320 | #define FAILOVER_NAME "net_failover" |
| 321 | #define FAILOVER_VERSION "0.1" |
| 322 | |
| 323 | static void nfo_ethtool_get_drvinfo(struct net_device *dev, |
| 324 | struct ethtool_drvinfo *drvinfo) |
| 325 | { |
| 326 | strlcpy(drvinfo->driver, FAILOVER_NAME, sizeof(drvinfo->driver)); |
| 327 | strlcpy(drvinfo->version, FAILOVER_VERSION, sizeof(drvinfo->version)); |
| 328 | } |
| 329 | |
| 330 | static int nfo_ethtool_get_link_ksettings(struct net_device *dev, |
| 331 | struct ethtool_link_ksettings *cmd) |
| 332 | { |
| 333 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 334 | struct net_device *slave_dev; |
| 335 | |
| 336 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
| 337 | if (!slave_dev || !net_failover_xmit_ready(slave_dev)) { |
| 338 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
| 339 | if (!slave_dev || !net_failover_xmit_ready(slave_dev)) { |
| 340 | cmd->base.duplex = DUPLEX_UNKNOWN; |
| 341 | cmd->base.port = PORT_OTHER; |
| 342 | cmd->base.speed = SPEED_UNKNOWN; |
| 343 | |
| 344 | return 0; |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | return __ethtool_get_link_ksettings(slave_dev, cmd); |
| 349 | } |
| 350 | |
| 351 | static const struct ethtool_ops failover_ethtool_ops = { |
| 352 | .get_drvinfo = nfo_ethtool_get_drvinfo, |
| 353 | .get_link = ethtool_op_get_link, |
| 354 | .get_link_ksettings = nfo_ethtool_get_link_ksettings, |
| 355 | }; |
| 356 | |
| 357 | /* Called when slave dev is injecting data into network stack. |
| 358 | * Change the associated network device from lower dev to failover dev. |
| 359 | * note: already called with rcu_read_lock |
| 360 | */ |
| 361 | static rx_handler_result_t net_failover_handle_frame(struct sk_buff **pskb) |
| 362 | { |
| 363 | struct sk_buff *skb = *pskb; |
| 364 | struct net_device *dev = rcu_dereference(skb->dev->rx_handler_data); |
| 365 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 366 | struct net_device *primary_dev, *standby_dev; |
| 367 | |
| 368 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 369 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
| 370 | |
| 371 | if (primary_dev && skb->dev == standby_dev) |
| 372 | return RX_HANDLER_EXACT; |
| 373 | |
| 374 | skb->dev = dev; |
| 375 | |
| 376 | return RX_HANDLER_ANOTHER; |
| 377 | } |
| 378 | |
| 379 | static void net_failover_compute_features(struct net_device *dev) |
| 380 | { |
Dan Carpenter | a746407 | 2018-06-04 17:43:21 +0300 | [diff] [blame] | 381 | netdev_features_t vlan_features = FAILOVER_VLAN_FEATURES & |
| 382 | NETIF_F_ALL_FOR_ALL; |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 383 | netdev_features_t enc_features = FAILOVER_ENC_FEATURES; |
| 384 | unsigned short max_hard_header_len = ETH_HLEN; |
| 385 | unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | |
| 386 | IFF_XMIT_DST_RELEASE_PERM; |
| 387 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 388 | struct net_device *primary_dev, *standby_dev; |
| 389 | |
| 390 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 391 | if (primary_dev) { |
| 392 | vlan_features = |
| 393 | netdev_increment_features(vlan_features, |
| 394 | primary_dev->vlan_features, |
| 395 | FAILOVER_VLAN_FEATURES); |
| 396 | enc_features = |
| 397 | netdev_increment_features(enc_features, |
| 398 | primary_dev->hw_enc_features, |
| 399 | FAILOVER_ENC_FEATURES); |
| 400 | |
| 401 | dst_release_flag &= primary_dev->priv_flags; |
| 402 | if (primary_dev->hard_header_len > max_hard_header_len) |
| 403 | max_hard_header_len = primary_dev->hard_header_len; |
| 404 | } |
| 405 | |
| 406 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
| 407 | if (standby_dev) { |
| 408 | vlan_features = |
| 409 | netdev_increment_features(vlan_features, |
| 410 | standby_dev->vlan_features, |
| 411 | FAILOVER_VLAN_FEATURES); |
| 412 | enc_features = |
| 413 | netdev_increment_features(enc_features, |
| 414 | standby_dev->hw_enc_features, |
| 415 | FAILOVER_ENC_FEATURES); |
| 416 | |
| 417 | dst_release_flag &= standby_dev->priv_flags; |
| 418 | if (standby_dev->hard_header_len > max_hard_header_len) |
| 419 | max_hard_header_len = standby_dev->hard_header_len; |
| 420 | } |
| 421 | |
| 422 | dev->vlan_features = vlan_features; |
| 423 | dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; |
| 424 | dev->hard_header_len = max_hard_header_len; |
| 425 | |
| 426 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
| 427 | if (dst_release_flag == (IFF_XMIT_DST_RELEASE | |
| 428 | IFF_XMIT_DST_RELEASE_PERM)) |
| 429 | dev->priv_flags |= IFF_XMIT_DST_RELEASE; |
| 430 | |
| 431 | netdev_change_features(dev); |
| 432 | } |
| 433 | |
| 434 | static void net_failover_lower_state_changed(struct net_device *slave_dev, |
| 435 | struct net_device *primary_dev, |
| 436 | struct net_device *standby_dev) |
| 437 | { |
| 438 | struct netdev_lag_lower_state_info info; |
| 439 | |
| 440 | if (netif_carrier_ok(slave_dev)) |
| 441 | info.link_up = true; |
| 442 | else |
| 443 | info.link_up = false; |
| 444 | |
| 445 | if (slave_dev == primary_dev) { |
| 446 | if (netif_running(primary_dev)) |
| 447 | info.tx_enabled = true; |
| 448 | else |
| 449 | info.tx_enabled = false; |
| 450 | } else { |
| 451 | if ((primary_dev && netif_running(primary_dev)) || |
| 452 | (!netif_running(standby_dev))) |
| 453 | info.tx_enabled = false; |
| 454 | else |
| 455 | info.tx_enabled = true; |
| 456 | } |
| 457 | |
| 458 | netdev_lower_state_changed(slave_dev, &info); |
| 459 | } |
| 460 | |
| 461 | static int net_failover_slave_pre_register(struct net_device *slave_dev, |
| 462 | struct net_device *failover_dev) |
| 463 | { |
| 464 | struct net_device *standby_dev, *primary_dev; |
| 465 | struct net_failover_info *nfo_info; |
| 466 | bool slave_is_standby; |
| 467 | |
| 468 | nfo_info = netdev_priv(failover_dev); |
| 469 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 470 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 471 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
| 472 | if (slave_is_standby ? standby_dev : primary_dev) { |
| 473 | netdev_err(failover_dev, "%s attempting to register as slave dev when %s already present\n", |
| 474 | slave_dev->name, |
| 475 | slave_is_standby ? "standby" : "primary"); |
| 476 | return -EINVAL; |
| 477 | } |
| 478 | |
| 479 | /* We want to allow only a direct attached VF device as a primary |
| 480 | * netdev. As there is no easy way to check for a VF device, restrict |
| 481 | * this to a pci device. |
| 482 | */ |
| 483 | if (!slave_is_standby && (!slave_dev->dev.parent || |
| 484 | !dev_is_pci(slave_dev->dev.parent))) |
| 485 | return -EINVAL; |
| 486 | |
| 487 | if (failover_dev->features & NETIF_F_VLAN_CHALLENGED && |
| 488 | vlan_uses_dev(failover_dev)) { |
| 489 | netdev_err(failover_dev, "Device %s is VLAN challenged and failover device has VLAN set up\n", |
| 490 | failover_dev->name); |
| 491 | return -EINVAL; |
| 492 | } |
| 493 | |
| 494 | return 0; |
| 495 | } |
| 496 | |
| 497 | static int net_failover_slave_register(struct net_device *slave_dev, |
| 498 | struct net_device *failover_dev) |
| 499 | { |
| 500 | struct net_device *standby_dev, *primary_dev; |
| 501 | struct net_failover_info *nfo_info; |
| 502 | bool slave_is_standby; |
| 503 | u32 orig_mtu; |
| 504 | int err; |
| 505 | |
| 506 | /* Align MTU of slave with failover dev */ |
| 507 | orig_mtu = slave_dev->mtu; |
| 508 | err = dev_set_mtu(slave_dev, failover_dev->mtu); |
| 509 | if (err) { |
| 510 | netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n", |
| 511 | slave_dev->name, failover_dev->mtu); |
| 512 | goto done; |
| 513 | } |
| 514 | |
| 515 | dev_hold(slave_dev); |
| 516 | |
| 517 | if (netif_running(failover_dev)) { |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 518 | err = dev_open(slave_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 519 | if (err && (err != -EBUSY)) { |
| 520 | netdev_err(failover_dev, "Opening slave %s failed err:%d\n", |
| 521 | slave_dev->name, err); |
| 522 | goto err_dev_open; |
| 523 | } |
| 524 | } |
| 525 | |
| 526 | netif_addr_lock_bh(failover_dev); |
| 527 | dev_uc_sync_multiple(slave_dev, failover_dev); |
Liran Alon | e522343 | 2018-06-18 15:04:05 +0300 | [diff] [blame] | 528 | dev_mc_sync_multiple(slave_dev, failover_dev); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 529 | netif_addr_unlock_bh(failover_dev); |
| 530 | |
| 531 | err = vlan_vids_add_by_dev(slave_dev, failover_dev); |
| 532 | if (err) { |
| 533 | netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n", |
| 534 | slave_dev->name, err); |
| 535 | goto err_vlan_add; |
| 536 | } |
| 537 | |
| 538 | nfo_info = netdev_priv(failover_dev); |
| 539 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 540 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 541 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
| 542 | |
| 543 | if (slave_is_standby) { |
| 544 | rcu_assign_pointer(nfo_info->standby_dev, slave_dev); |
| 545 | standby_dev = slave_dev; |
| 546 | dev_get_stats(standby_dev, &nfo_info->standby_stats); |
| 547 | } else { |
| 548 | rcu_assign_pointer(nfo_info->primary_dev, slave_dev); |
| 549 | primary_dev = slave_dev; |
| 550 | dev_get_stats(primary_dev, &nfo_info->primary_stats); |
| 551 | failover_dev->min_mtu = slave_dev->min_mtu; |
| 552 | failover_dev->max_mtu = slave_dev->max_mtu; |
| 553 | } |
| 554 | |
| 555 | net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev); |
| 556 | net_failover_compute_features(failover_dev); |
| 557 | |
| 558 | call_netdevice_notifiers(NETDEV_JOIN, slave_dev); |
| 559 | |
| 560 | netdev_info(failover_dev, "failover %s slave:%s registered\n", |
| 561 | slave_is_standby ? "standby" : "primary", slave_dev->name); |
| 562 | |
| 563 | return 0; |
| 564 | |
| 565 | err_vlan_add: |
| 566 | dev_uc_unsync(slave_dev, failover_dev); |
| 567 | dev_mc_unsync(slave_dev, failover_dev); |
| 568 | dev_close(slave_dev); |
| 569 | err_dev_open: |
| 570 | dev_put(slave_dev); |
| 571 | dev_set_mtu(slave_dev, orig_mtu); |
| 572 | done: |
| 573 | return err; |
| 574 | } |
| 575 | |
| 576 | static int net_failover_slave_pre_unregister(struct net_device *slave_dev, |
| 577 | struct net_device *failover_dev) |
| 578 | { |
| 579 | struct net_device *standby_dev, *primary_dev; |
| 580 | struct net_failover_info *nfo_info; |
| 581 | |
| 582 | nfo_info = netdev_priv(failover_dev); |
| 583 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 584 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 585 | |
| 586 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
| 587 | return -ENODEV; |
| 588 | |
| 589 | return 0; |
| 590 | } |
| 591 | |
| 592 | static int net_failover_slave_unregister(struct net_device *slave_dev, |
| 593 | struct net_device *failover_dev) |
| 594 | { |
| 595 | struct net_device *standby_dev, *primary_dev; |
| 596 | struct net_failover_info *nfo_info; |
| 597 | bool slave_is_standby; |
| 598 | |
| 599 | nfo_info = netdev_priv(failover_dev); |
| 600 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 601 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 602 | |
YueHaibing | 9e7e6ca | 2018-09-04 02:56:26 +0000 | [diff] [blame] | 603 | if (WARN_ON_ONCE(slave_dev != primary_dev && slave_dev != standby_dev)) |
| 604 | return -ENODEV; |
| 605 | |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 606 | vlan_vids_del_by_dev(slave_dev, failover_dev); |
| 607 | dev_uc_unsync(slave_dev, failover_dev); |
| 608 | dev_mc_unsync(slave_dev, failover_dev); |
| 609 | dev_close(slave_dev); |
| 610 | |
| 611 | nfo_info = netdev_priv(failover_dev); |
| 612 | dev_get_stats(failover_dev, &nfo_info->failover_stats); |
| 613 | |
| 614 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
| 615 | if (slave_is_standby) { |
| 616 | RCU_INIT_POINTER(nfo_info->standby_dev, NULL); |
| 617 | } else { |
| 618 | RCU_INIT_POINTER(nfo_info->primary_dev, NULL); |
| 619 | if (standby_dev) { |
| 620 | failover_dev->min_mtu = standby_dev->min_mtu; |
| 621 | failover_dev->max_mtu = standby_dev->max_mtu; |
| 622 | } |
| 623 | } |
| 624 | |
| 625 | dev_put(slave_dev); |
| 626 | |
| 627 | net_failover_compute_features(failover_dev); |
| 628 | |
| 629 | netdev_info(failover_dev, "failover %s slave:%s unregistered\n", |
| 630 | slave_is_standby ? "standby" : "primary", slave_dev->name); |
| 631 | |
| 632 | return 0; |
| 633 | } |
| 634 | |
| 635 | static int net_failover_slave_link_change(struct net_device *slave_dev, |
| 636 | struct net_device *failover_dev) |
| 637 | { |
| 638 | struct net_device *primary_dev, *standby_dev; |
| 639 | struct net_failover_info *nfo_info; |
| 640 | |
| 641 | nfo_info = netdev_priv(failover_dev); |
| 642 | |
| 643 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 644 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 645 | |
| 646 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
| 647 | return -ENODEV; |
| 648 | |
| 649 | if ((primary_dev && net_failover_xmit_ready(primary_dev)) || |
| 650 | (standby_dev && net_failover_xmit_ready(standby_dev))) { |
| 651 | netif_carrier_on(failover_dev); |
| 652 | netif_tx_wake_all_queues(failover_dev); |
| 653 | } else { |
| 654 | dev_get_stats(failover_dev, &nfo_info->failover_stats); |
| 655 | netif_carrier_off(failover_dev); |
| 656 | netif_tx_stop_all_queues(failover_dev); |
| 657 | } |
| 658 | |
| 659 | net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev); |
| 660 | |
| 661 | return 0; |
| 662 | } |
| 663 | |
| 664 | static int net_failover_slave_name_change(struct net_device *slave_dev, |
| 665 | struct net_device *failover_dev) |
| 666 | { |
| 667 | struct net_device *primary_dev, *standby_dev; |
| 668 | struct net_failover_info *nfo_info; |
| 669 | |
| 670 | nfo_info = netdev_priv(failover_dev); |
| 671 | |
| 672 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 673 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 674 | |
| 675 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
| 676 | return -ENODEV; |
| 677 | |
| 678 | /* We need to bring up the slave after the rename by udev in case |
| 679 | * open failed with EBUSY when it was registered. |
| 680 | */ |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 681 | dev_open(slave_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 682 | |
| 683 | return 0; |
| 684 | } |
| 685 | |
| 686 | static struct failover_ops net_failover_ops = { |
| 687 | .slave_pre_register = net_failover_slave_pre_register, |
| 688 | .slave_register = net_failover_slave_register, |
| 689 | .slave_pre_unregister = net_failover_slave_pre_unregister, |
| 690 | .slave_unregister = net_failover_slave_unregister, |
| 691 | .slave_link_change = net_failover_slave_link_change, |
| 692 | .slave_name_change = net_failover_slave_name_change, |
| 693 | .slave_handle_frame = net_failover_handle_frame, |
| 694 | }; |
| 695 | |
| 696 | /** |
| 697 | * net_failover_create - Create and register a failover instance |
| 698 | * |
| 699 | * @dev: standby netdev |
| 700 | * |
| 701 | * Creates a failover netdev and registers a failover instance for a standby |
| 702 | * netdev. Used by paravirtual drivers that use 3-netdev model. |
| 703 | * The failover netdev acts as a master device and controls 2 slave devices - |
| 704 | * the original standby netdev and a VF netdev with the same MAC gets |
| 705 | * registered as primary netdev. |
| 706 | * |
| 707 | * Return: pointer to failover instance |
| 708 | */ |
| 709 | struct failover *net_failover_create(struct net_device *standby_dev) |
| 710 | { |
| 711 | struct device *dev = standby_dev->dev.parent; |
| 712 | struct net_device *failover_dev; |
| 713 | struct failover *failover; |
| 714 | int err; |
| 715 | |
| 716 | /* Alloc at least 2 queues, for now we are going with 16 assuming |
| 717 | * that VF devices being enslaved won't have too many queues. |
| 718 | */ |
| 719 | failover_dev = alloc_etherdev_mq(sizeof(struct net_failover_info), 16); |
| 720 | if (!failover_dev) { |
| 721 | dev_err(dev, "Unable to allocate failover_netdev!\n"); |
| 722 | return ERR_PTR(-ENOMEM); |
| 723 | } |
| 724 | |
| 725 | dev_net_set(failover_dev, dev_net(standby_dev)); |
| 726 | SET_NETDEV_DEV(failover_dev, dev); |
| 727 | |
| 728 | failover_dev->netdev_ops = &failover_dev_ops; |
| 729 | failover_dev->ethtool_ops = &failover_ethtool_ops; |
| 730 | |
| 731 | /* Initialize the device options */ |
| 732 | failover_dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; |
| 733 | failover_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | |
| 734 | IFF_TX_SKB_SHARING); |
| 735 | |
| 736 | /* don't acquire failover netdev's netif_tx_lock when transmitting */ |
| 737 | failover_dev->features |= NETIF_F_LLTX; |
| 738 | |
| 739 | /* Don't allow failover devices to change network namespaces. */ |
| 740 | failover_dev->features |= NETIF_F_NETNS_LOCAL; |
| 741 | |
| 742 | failover_dev->hw_features = FAILOVER_VLAN_FEATURES | |
| 743 | NETIF_F_HW_VLAN_CTAG_TX | |
| 744 | NETIF_F_HW_VLAN_CTAG_RX | |
| 745 | NETIF_F_HW_VLAN_CTAG_FILTER; |
| 746 | |
| 747 | failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; |
| 748 | failover_dev->features |= failover_dev->hw_features; |
| 749 | |
| 750 | memcpy(failover_dev->dev_addr, standby_dev->dev_addr, |
| 751 | failover_dev->addr_len); |
| 752 | |
| 753 | failover_dev->min_mtu = standby_dev->min_mtu; |
| 754 | failover_dev->max_mtu = standby_dev->max_mtu; |
| 755 | |
| 756 | err = register_netdev(failover_dev); |
| 757 | if (err) { |
| 758 | dev_err(dev, "Unable to register failover_dev!\n"); |
| 759 | goto err_register_netdev; |
| 760 | } |
| 761 | |
| 762 | netif_carrier_off(failover_dev); |
| 763 | |
| 764 | failover = failover_register(failover_dev, &net_failover_ops); |
YueHaibing | 09317da | 2018-09-06 21:04:12 +0800 | [diff] [blame] | 765 | if (IS_ERR(failover)) { |
| 766 | err = PTR_ERR(failover); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 767 | goto err_failover_register; |
YueHaibing | 09317da | 2018-09-06 21:04:12 +0800 | [diff] [blame] | 768 | } |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 769 | |
| 770 | return failover; |
| 771 | |
| 772 | err_failover_register: |
| 773 | unregister_netdev(failover_dev); |
| 774 | err_register_netdev: |
| 775 | free_netdev(failover_dev); |
| 776 | |
| 777 | return ERR_PTR(err); |
| 778 | } |
| 779 | EXPORT_SYMBOL_GPL(net_failover_create); |
| 780 | |
| 781 | /** |
| 782 | * net_failover_destroy - Destroy a failover instance |
| 783 | * |
| 784 | * @failover: pointer to failover instance |
| 785 | * |
| 786 | * Unregisters any slave netdevs associated with the failover instance by |
| 787 | * calling failover_slave_unregister(). |
| 788 | * unregisters the failover instance itself and finally frees the failover |
| 789 | * netdev. Used by paravirtual drivers that use 3-netdev model. |
| 790 | * |
| 791 | */ |
| 792 | void net_failover_destroy(struct failover *failover) |
| 793 | { |
| 794 | struct net_failover_info *nfo_info; |
| 795 | struct net_device *failover_dev; |
| 796 | struct net_device *slave_dev; |
| 797 | |
| 798 | if (!failover) |
| 799 | return; |
| 800 | |
| 801 | failover_dev = rcu_dereference(failover->failover_dev); |
| 802 | nfo_info = netdev_priv(failover_dev); |
| 803 | |
| 804 | netif_device_detach(failover_dev); |
| 805 | |
| 806 | rtnl_lock(); |
| 807 | |
| 808 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
| 809 | if (slave_dev) |
| 810 | failover_slave_unregister(slave_dev); |
| 811 | |
| 812 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
| 813 | if (slave_dev) |
| 814 | failover_slave_unregister(slave_dev); |
| 815 | |
| 816 | failover_unregister(failover); |
| 817 | |
| 818 | unregister_netdevice(failover_dev); |
| 819 | |
| 820 | rtnl_unlock(); |
| 821 | |
| 822 | free_netdev(failover_dev); |
| 823 | } |
| 824 | EXPORT_SYMBOL_GPL(net_failover_destroy); |
| 825 | |
| 826 | static __init int |
| 827 | net_failover_init(void) |
| 828 | { |
| 829 | return 0; |
| 830 | } |
| 831 | module_init(net_failover_init); |
| 832 | |
| 833 | static __exit |
| 834 | void net_failover_exit(void) |
| 835 | { |
| 836 | } |
| 837 | module_exit(net_failover_exit); |
| 838 | |
| 839 | MODULE_DESCRIPTION("Failover driver for Paravirtual drivers"); |
| 840 | MODULE_LICENSE("GPL v2"); |