| // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
| /* |
| * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved |
| */ |
| |
| #include "cmd.h" |
| |
| enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; |
| |
| static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, |
| u16 *vhca_id); |
| static void |
| _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); |
| |
| int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) |
| { |
| u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; |
| u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; |
| |
| lockdep_assert_held(&mvdev->state_mutex); |
| if (mvdev->mdev_detach) |
| return -ENOTCONN; |
| |
| MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); |
| MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); |
| MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); |
| |
| return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); |
| } |
| |
| int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) |
| { |
| u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; |
| u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; |
| |
| lockdep_assert_held(&mvdev->state_mutex); |
| if (mvdev->mdev_detach) |
| return -ENOTCONN; |
| |
| MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); |
| MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id); |
| MLX5_SET(resume_vhca_in, in, op_mod, op_mod); |
| |
| return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out); |
| } |
| |
| int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, |
| size_t *state_size) |
| { |
| u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; |
| u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; |
| int ret; |
| |
| lockdep_assert_held(&mvdev->state_mutex); |
| if (mvdev->mdev_detach) |
| return -ENOTCONN; |
| |
| MLX5_SET(query_vhca_migration_state_in, in, opcode, |
| MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); |
| MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); |
| MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); |
| |
| ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, |
| out); |
| if (ret) |
| return ret; |
| |
| *state_size = MLX5_GET(query_vhca_migration_state_out, out, |
| required_umem_size); |
| return 0; |
| } |
| |
| static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) |
| { |
| /* Mark the tracker under an error and wake it up if it's running */ |
| mvdev->tracker.is_err = true; |
| complete(&mvdev->tracker_comp); |
| } |
| |
| static int mlx5fv_vf_event(struct notifier_block *nb, |
| unsigned long event, void *data) |
| { |
| struct mlx5vf_pci_core_device *mvdev = |
| container_of(nb, struct mlx5vf_pci_core_device, nb); |
| |
| switch (event) { |
| case MLX5_PF_NOTIFY_ENABLE_VF: |
| mutex_lock(&mvdev->state_mutex); |
| mvdev->mdev_detach = false; |
| mlx5vf_state_mutex_unlock(mvdev); |
| break; |
| case MLX5_PF_NOTIFY_DISABLE_VF: |
| mlx5vf_cmd_close_migratable(mvdev); |
| mutex_lock(&mvdev->state_mutex); |
| mvdev->mdev_detach = true; |
| mlx5vf_state_mutex_unlock(mvdev); |
| break; |
| default: |
| break; |
| } |
| |
| return 0; |
| } |
| |
| void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) |
| { |
| if (!mvdev->migrate_cap) |
| return; |
| |
| /* Must be done outside the lock to let it progress */ |
| set_tracker_error(mvdev); |
| mutex_lock(&mvdev->state_mutex); |
| mlx5vf_disable_fds(mvdev); |
| _mlx5vf_free_page_tracker_resources(mvdev); |
| mlx5vf_state_mutex_unlock(mvdev); |
| } |
| |
| void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) |
| { |
| if (!mvdev->migrate_cap) |
| return; |
| |
| mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id, |
| &mvdev->nb); |
| destroy_workqueue(mvdev->cb_wq); |
| } |
| |
| void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, |
| const struct vfio_migration_ops *mig_ops, |
| const struct vfio_log_ops *log_ops) |
| { |
| struct pci_dev *pdev = mvdev->core_device.pdev; |
| int ret; |
| |
| if (!pdev->is_virtfn) |
| return; |
| |
| mvdev->mdev = mlx5_vf_get_core_dev(pdev); |
| if (!mvdev->mdev) |
| return; |
| |
| if (!MLX5_CAP_GEN(mvdev->mdev, migration)) |
| goto end; |
| |
| mvdev->vf_id = pci_iov_vf_id(pdev); |
| if (mvdev->vf_id < 0) |
| goto end; |
| |
| if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, |
| &mvdev->vhca_id)) |
| goto end; |
| |
| mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0); |
| if (!mvdev->cb_wq) |
| goto end; |
| |
| mutex_init(&mvdev->state_mutex); |
| spin_lock_init(&mvdev->reset_lock); |
| mvdev->nb.notifier_call = mlx5fv_vf_event; |
| ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id, |
| &mvdev->nb); |
| if (ret) { |
| destroy_workqueue(mvdev->cb_wq); |
| goto end; |
| } |
| |
| mvdev->migrate_cap = 1; |
| mvdev->core_device.vdev.migration_flags = |
| VFIO_MIGRATION_STOP_COPY | |
| VFIO_MIGRATION_P2P; |
| mvdev->core_device.vdev.mig_ops = mig_ops; |
| init_completion(&mvdev->tracker_comp); |
| if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) |
| mvdev->core_device.vdev.log_ops = log_ops; |
| |
| end: |
| mlx5_vf_put_core_dev(mvdev->mdev); |
| } |
| |
| static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, |
| u16 *vhca_id) |
| { |
| u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; |
| int out_size; |
| void *out; |
| int ret; |
| |
| out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); |
| out = kzalloc(out_size, GFP_KERNEL); |
| if (!out) |
| return -ENOMEM; |
| |
| MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); |
| MLX5_SET(query_hca_cap_in, in, other_function, 1); |
| MLX5_SET(query_hca_cap_in, in, function_id, function_id); |
| MLX5_SET(query_hca_cap_in, in, op_mod, |
| MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | |
| HCA_CAP_OPMOD_GET_CUR); |
| |
| ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); |
| if (ret) |
| goto err_exec; |
| |
| *vhca_id = MLX5_GET(query_hca_cap_out, out, |
| capability.cmd_hca_cap.vhca_id); |
| |
| err_exec: |
| kfree(out); |
| return ret; |
| } |
| |
| static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, |
| struct mlx5_vf_migration_file *migf, |
| struct mlx5_vhca_recv_buf *recv_buf, |
| u32 *mkey) |
| { |
| size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) : |
| recv_buf->npages; |
| int err = 0, inlen; |
| __be64 *mtt; |
| void *mkc; |
| u32 *in; |
| |
| inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + |
| sizeof(*mtt) * round_up(npages, 2); |
| |
| in = kvzalloc(inlen, GFP_KERNEL); |
| if (!in) |
| return -ENOMEM; |
| |
| MLX5_SET(create_mkey_in, in, translations_octword_actual_size, |
| DIV_ROUND_UP(npages, 2)); |
| mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); |
| |
| if (migf) { |
| struct sg_dma_page_iter dma_iter; |
| |
| for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) |
| *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); |
| } else { |
| int i; |
| |
| for (i = 0; i < npages; i++) |
| *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]); |
| } |
| |
| mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); |
| MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); |
| MLX5_SET(mkc, mkc, lr, 1); |
| MLX5_SET(mkc, mkc, lw, 1); |
| MLX5_SET(mkc, mkc, rr, 1); |
| MLX5_SET(mkc, mkc, rw, 1); |
| MLX5_SET(mkc, mkc, pd, pdn); |
| MLX5_SET(mkc, mkc, bsf_octword_size, 0); |
| MLX5_SET(mkc, mkc, qpn, 0xffffff); |
| MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); |
| MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); |
| MLX5_SET64(mkc, mkc, len, |
| migf ? migf->total_length : (npages * PAGE_SIZE)); |
| err = mlx5_core_create_mkey(mdev, mkey, in, inlen); |
| kvfree(in); |
| return err; |
| } |
| |
| void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) |
| { |
| struct mlx5vf_async_data *async_data = container_of(_work, |
| struct mlx5vf_async_data, work); |
| struct mlx5_vf_migration_file *migf = container_of(async_data, |
| struct mlx5_vf_migration_file, async_data); |
| struct mlx5_core_dev *mdev = migf->mvdev->mdev; |
| |
| mutex_lock(&migf->lock); |
| if (async_data->status) { |
| migf->is_err = true; |
| wake_up_interruptible(&migf->poll_wait); |
| } |
| mutex_unlock(&migf->lock); |
| |
| mlx5_core_destroy_mkey(mdev, async_data->mkey); |
| dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); |
| mlx5_core_dealloc_pd(mdev, async_data->pdn); |
| kvfree(async_data->out); |
| fput(migf->filp); |
| } |
| |
| static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) |
| { |
| struct mlx5vf_async_data *async_data = container_of(context, |
| struct mlx5vf_async_data, cb_work); |
| struct mlx5_vf_migration_file *migf = container_of(async_data, |
| struct mlx5_vf_migration_file, async_data); |
| |
| if (!status) { |
| WRITE_ONCE(migf->total_length, |
| MLX5_GET(save_vhca_state_out, async_data->out, |
| actual_image_size)); |
| wake_up_interruptible(&migf->poll_wait); |
| } |
| |
| /* |
| * The error and the cleanup flows can't run from an |
| * interrupt context |
| */ |
| async_data->status = status; |
| queue_work(migf->mvdev->cb_wq, &async_data->work); |
| } |
| |
| int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, |
| struct mlx5_vf_migration_file *migf) |
| { |
| u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); |
| u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; |
| struct mlx5vf_async_data *async_data; |
| struct mlx5_core_dev *mdev; |
| u32 pdn, mkey; |
| int err; |
| |
| lockdep_assert_held(&mvdev->state_mutex); |
| if (mvdev->mdev_detach) |
| return -ENOTCONN; |
| |
| mdev = mvdev->mdev; |
| err = mlx5_core_alloc_pd(mdev, &pdn); |
| if (err) |
| return err; |
| |
| err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, |
| 0); |
| if (err) |
| goto err_dma_map; |
| |
| err = _create_mkey(mdev, pdn, migf, NULL, &mkey); |
| if (err) |
| goto err_create_mkey; |
| |
| MLX5_SET(save_vhca_state_in, in, opcode, |
| MLX5_CMD_OP_SAVE_VHCA_STATE); |
| MLX5_SET(save_vhca_state_in, in, op_mod, 0); |
| MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); |
| MLX5_SET(save_vhca_state_in, in, mkey, mkey); |
| MLX5_SET(save_vhca_state_in, in, size, migf->total_length); |
| |
| async_data = &migf->async_data; |
| async_data->out = kvzalloc(out_size, GFP_KERNEL); |
| if (!async_data->out) { |
| err = -ENOMEM; |
| goto err_out; |
| } |
| |
| /* no data exists till the callback comes back */ |
| migf->total_length = 0; |
| get_file(migf->filp); |
| async_data->mkey = mkey; |
| async_data->pdn = pdn; |
| err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), |
| async_data->out, |
| out_size, mlx5vf_save_callback, |
| &async_data->cb_work); |
| if (err) |
| goto err_exec; |
| |
| return 0; |
| |
| err_exec: |
| fput(migf->filp); |
| kvfree(async_data->out); |
| err_out: |
| mlx5_core_destroy_mkey(mdev, mkey); |
| err_create_mkey: |
| dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); |
| err_dma_map: |
| mlx5_core_dealloc_pd(mdev, pdn); |
| return err; |
| } |
| |
| int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, |
| struct mlx5_vf_migration_file *migf) |
| { |
| struct mlx5_core_dev *mdev; |
| u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; |
| u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; |
| u32 pdn, mkey; |
| int err; |
| |
| lockdep_assert_held(&mvdev->state_mutex); |
| if (mvdev->mdev_detach) |
| return -ENOTCONN; |
| |
| mutex_lock(&migf->lock); |
| if (!migf->total_length) { |
| err = -EINVAL; |
| goto end; |
| } |
| |
| mdev = mvdev->mdev; |
| err = mlx5_core_alloc_pd(mdev, &pdn); |
| if (err) |
| goto end; |
| |
| err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); |
| if (err) |
| goto err_reg; |
| |
| err = _create_mkey(mdev, pdn, migf, NULL, &mkey); |
| if (err) |
| goto err_mkey; |
| |
| MLX5_SET(load_vhca_state_in, in, opcode, |
| MLX5_CMD_OP_LOAD_VHCA_STATE); |
| MLX5_SET(load_vhca_state_in, in, op_mod, 0); |
| MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); |
| MLX5_SET(load_vhca_state_in, in, mkey, mkey); |
| MLX5_SET(load_vhca_state_in, in, size, migf->total_length); |
| |
| err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out); |
| |
| mlx5_core_destroy_mkey(mdev, mkey); |
| err_mkey: |
| dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); |
| err_reg: |
| mlx5_core_dealloc_pd(mdev, pdn); |
| end: |
| mutex_unlock(&migf->lock); |
| return err; |
| } |
| |
| static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes, |
| u32 req_nodes) |
| { |
| struct interval_tree_node *prev, *curr, *comb_start, *comb_end; |
| unsigned long min_gap; |
| unsigned long curr_gap; |
| |
| /* Special shortcut when a single range is required */ |
| if (req_nodes == 1) { |
| unsigned long last; |
| |
| curr = comb_start = interval_tree_iter_first(root, 0, ULONG_MAX); |
| while (curr) { |
| last = curr->last; |
| prev = curr; |
| curr = interval_tree_iter_next(curr, 0, ULONG_MAX); |
| if (prev != comb_start) |
| interval_tree_remove(prev, root); |
| } |
| comb_start->last = last; |
| return; |
| } |
| |
| /* Combine ranges which have the smallest gap */ |
| while (cur_nodes > req_nodes) { |
| prev = NULL; |
| min_gap = ULONG_MAX; |
| curr = interval_tree_iter_first(root, 0, ULONG_MAX); |
| while (curr) { |
| if (prev) { |
| curr_gap = curr->start - prev->last; |
| if (curr_gap < min_gap) { |
| min_gap = curr_gap; |
| comb_start = prev; |
| comb_end = curr; |
| } |
| } |
| prev = curr; |
| curr = interval_tree_iter_next(curr, 0, ULONG_MAX); |
| } |
| comb_start->last = comb_end->last; |
| interval_tree_remove(comb_end, root); |
| cur_nodes--; |
| } |
| } |
| |
| static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, |
| struct mlx5vf_pci_core_device *mvdev, |
| struct rb_root_cached *ranges, u32 nnodes) |
| { |
| int max_num_range = |
| MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range); |
| struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
| int record_size = MLX5_ST_SZ_BYTES(page_track_range); |
| u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; |
| struct interval_tree_node *node = NULL; |
| u64 total_ranges_len = 0; |
| u32 num_ranges = nnodes; |
| u8 log_addr_space_size; |
| void *range_list_ptr; |
| void *obj_context; |
| void *cmd_hdr; |
| int inlen; |
| void *in; |
| int err; |
| int i; |
| |
| if (num_ranges > max_num_range) { |
| combine_ranges(ranges, nnodes, max_num_range); |
| num_ranges = max_num_range; |
| } |
| |
| inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) + |
| record_size * num_ranges; |
| in = kzalloc(inlen, GFP_KERNEL); |
| if (!in) |
| return -ENOMEM; |
| |
| cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in, |
| general_obj_in_cmd_hdr); |
| MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, |
| MLX5_CMD_OP_CREATE_GENERAL_OBJECT); |
| MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, |
| MLX5_OBJ_TYPE_PAGE_TRACK); |
| obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context); |
| MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id); |
| MLX5_SET(page_track, obj_context, track_type, 1); |
| MLX5_SET(page_track, obj_context, log_page_size, |
| ilog2(tracker->host_qp->tracked_page_size)); |
| MLX5_SET(page_track, obj_context, log_msg_size, |
| ilog2(tracker->host_qp->max_msg_size)); |
| MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn); |
| MLX5_SET(page_track, obj_context, num_ranges, num_ranges); |
| |
| range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range); |
| node = interval_tree_iter_first(ranges, 0, ULONG_MAX); |
| for (i = 0; i < num_ranges; i++) { |
| void *addr_range_i_base = range_list_ptr + record_size * i; |
| unsigned long length = node->last - node->start; |
| |
| MLX5_SET64(page_track_range, addr_range_i_base, start_address, |
| node->start); |
| MLX5_SET64(page_track_range, addr_range_i_base, length, length); |
| total_ranges_len += length; |
| node = interval_tree_iter_next(node, 0, ULONG_MAX); |
| } |
| |
| WARN_ON(node); |
| log_addr_space_size = ilog2(total_ranges_len); |
| if (log_addr_space_size < |
| (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) || |
| log_addr_space_size > |
| (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) { |
| err = -EOPNOTSUPP; |
| goto out; |
| } |
| |
| MLX5_SET(page_track, obj_context, log_addr_space_size, |
| log_addr_space_size); |
| err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); |
| if (err) |
| goto out; |
| |
| tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); |
| out: |
| kfree(in); |
| return err; |
| } |
| |
| static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev, |
| u32 tracker_id) |
| { |
| u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; |
| u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; |
| |
| MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); |
| MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); |
| MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id); |
| |
| return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); |
| } |
| |
| static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, |
| u32 tracker_id, unsigned long iova, |
| unsigned long length, u32 tracker_state) |
| { |
| u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {}; |
| u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; |
| void *obj_context; |
| void *cmd_hdr; |
| |
| cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); |
| MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); |
| MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); |
| MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id); |
| |
| obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context); |
| MLX5_SET64(page_track, obj_context, modify_field_select, 0x3); |
| MLX5_SET64(page_track, obj_context, range_start_address, iova); |
| MLX5_SET64(page_track, obj_context, length, length); |
| MLX5_SET(page_track, obj_context, state, tracker_state); |
| |
| return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); |
| } |
| |
| static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_cq_buf *buf, int nent, |
| int cqe_size) |
| { |
| struct mlx5_frag_buf *frag_buf = &buf->frag_buf; |
| u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); |
| u8 log_wq_sz = ilog2(cqe_size); |
| int err; |
| |
| err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf, |
| mdev->priv.numa_node); |
| if (err) |
| return err; |
| |
| mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); |
| buf->cqe_size = cqe_size; |
| buf->nent = nent; |
| return 0; |
| } |
| |
| static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf) |
| { |
| struct mlx5_cqe64 *cqe64; |
| void *cqe; |
| int i; |
| |
| for (i = 0; i < buf->nent; i++) { |
| cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i); |
| cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; |
| cqe64->op_own = MLX5_CQE_INVALID << 4; |
| } |
| } |
| |
| static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_cq *cq) |
| { |
| mlx5_core_destroy_cq(mdev, &cq->mcq); |
| mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); |
| mlx5_db_free(mdev, &cq->db); |
| } |
| |
| static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) |
| { |
| if (type != MLX5_EVENT_TYPE_CQ_ERROR) |
| return; |
| |
| set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device, |
| tracker.cq.mcq)); |
| } |
| |
| static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, |
| void *data) |
| { |
| struct mlx5_vhca_page_tracker *tracker = |
| mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); |
| struct mlx5vf_pci_core_device *mvdev = container_of( |
| tracker, struct mlx5vf_pci_core_device, tracker); |
| struct mlx5_eqe *eqe = data; |
| u8 event_type = (u8)type; |
| u8 queue_type; |
| int qp_num; |
| |
| switch (event_type) { |
| case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: |
| case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: |
| case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: |
| queue_type = eqe->data.qp_srq.type; |
| if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP) |
| break; |
| qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; |
| if (qp_num != tracker->host_qp->qpn && |
| qp_num != tracker->fw_qp->qpn) |
| break; |
| set_tracker_error(mvdev); |
| break; |
| default: |
| break; |
| } |
| |
| return NOTIFY_OK; |
| } |
| |
| static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq, |
| struct mlx5_eqe *eqe) |
| { |
| struct mlx5vf_pci_core_device *mvdev = |
| container_of(mcq, struct mlx5vf_pci_core_device, |
| tracker.cq.mcq); |
| |
| complete(&mvdev->tracker_comp); |
| } |
| |
| static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_page_tracker *tracker, |
| size_t ncqe) |
| { |
| int cqe_size = cache_line_size() == 128 ? 128 : 64; |
| u32 out[MLX5_ST_SZ_DW(create_cq_out)]; |
| struct mlx5_vhca_cq *cq; |
| int inlen, err, eqn; |
| void *cqc, *in; |
| __be64 *pas; |
| int vector; |
| |
| cq = &tracker->cq; |
| ncqe = roundup_pow_of_two(ncqe); |
| err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node); |
| if (err) |
| return err; |
| |
| cq->ncqe = ncqe; |
| cq->mcq.set_ci_db = cq->db.db; |
| cq->mcq.arm_db = cq->db.db + 1; |
| cq->mcq.cqe_sz = cqe_size; |
| err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size); |
| if (err) |
| goto err_db_free; |
| |
| init_cq_frag_buf(&cq->buf); |
| inlen = MLX5_ST_SZ_BYTES(create_cq_in) + |
| MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * |
| cq->buf.frag_buf.npages; |
| in = kvzalloc(inlen, GFP_KERNEL); |
| if (!in) { |
| err = -ENOMEM; |
| goto err_buff; |
| } |
| |
| vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); |
| err = mlx5_vector2eqn(mdev, vector, &eqn); |
| if (err) |
| goto err_vec; |
| |
| cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); |
| MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); |
| MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); |
| MLX5_SET(cqc, cqc, uar_page, tracker->uar->index); |
| MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift - |
| MLX5_ADAPTER_PAGE_SHIFT); |
| MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); |
| pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); |
| mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); |
| cq->mcq.comp = mlx5vf_cq_complete; |
| cq->mcq.event = mlx5vf_cq_event; |
| err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); |
| if (err) |
| goto err_vec; |
| |
| mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, |
| cq->mcq.cons_index); |
| kvfree(in); |
| return 0; |
| |
| err_vec: |
| kvfree(in); |
| err_buff: |
| mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); |
| err_db_free: |
| mlx5_db_free(mdev, &cq->db); |
| return err; |
| } |
| |
| static struct mlx5_vhca_qp * |
| mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr) |
| { |
| u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; |
| struct mlx5_vhca_qp *qp; |
| u8 log_rq_stride; |
| u8 log_rq_sz; |
| void *qpc; |
| int inlen; |
| void *in; |
| int err; |
| |
| qp = kzalloc(sizeof(*qp), GFP_KERNEL); |
| if (!qp) |
| return ERR_PTR(-ENOMEM); |
| |
| qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); |
| log_rq_stride = ilog2(MLX5_SEND_WQE_DS); |
| log_rq_sz = ilog2(qp->rq.wqe_cnt); |
| err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node); |
| if (err) |
| goto err_free; |
| |
| if (max_recv_wr) { |
| err = mlx5_frag_buf_alloc_node(mdev, |
| wq_get_byte_sz(log_rq_sz, log_rq_stride), |
| &qp->buf, mdev->priv.numa_node); |
| if (err) |
| goto err_db_free; |
| mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc); |
| } |
| |
| qp->rq.db = &qp->db.db[MLX5_RCV_DBR]; |
| inlen = MLX5_ST_SZ_BYTES(create_qp_in) + |
| MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * |
| qp->buf.npages; |
| in = kvzalloc(inlen, GFP_KERNEL); |
| if (!in) { |
| err = -ENOMEM; |
| goto err_in; |
| } |
| |
| qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); |
| MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); |
| MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); |
| MLX5_SET(qpc, qpc, pd, tracker->pdn); |
| MLX5_SET(qpc, qpc, uar_page, tracker->uar->index); |
| MLX5_SET(qpc, qpc, log_page_size, |
| qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); |
| MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); |
| if (MLX5_CAP_GEN(mdev, cqe_version) == 1) |
| MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); |
| MLX5_SET(qpc, qpc, no_sq, 1); |
| if (max_recv_wr) { |
| MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn); |
| MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4); |
| MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz); |
| MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); |
| MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); |
| mlx5_fill_page_frag_array(&qp->buf, |
| (__be64 *)MLX5_ADDR_OF(create_qp_in, |
| in, pas)); |
| } else { |
| MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); |
| } |
| |
| MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); |
| err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); |
| kvfree(in); |
| if (err) |
| goto err_in; |
| |
| qp->qpn = MLX5_GET(create_qp_out, out, qpn); |
| return qp; |
| |
| err_in: |
| if (max_recv_wr) |
| mlx5_frag_buf_free(mdev, &qp->buf); |
| err_db_free: |
| mlx5_db_free(mdev, &qp->db); |
| err_free: |
| kfree(qp); |
| return ERR_PTR(err); |
| } |
| |
| static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp) |
| { |
| struct mlx5_wqe_data_seg *data; |
| unsigned int ix; |
| |
| WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt); |
| ix = qp->rq.pc & (qp->rq.wqe_cnt - 1); |
| data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix); |
| data->byte_count = cpu_to_be32(qp->max_msg_size); |
| data->lkey = cpu_to_be32(qp->recv_buf.mkey); |
| data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset); |
| qp->rq.pc++; |
| /* Make sure that descriptors are written before doorbell record. */ |
| dma_wmb(); |
| *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff); |
| } |
| |
| static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_qp *qp, u32 remote_qpn, |
| bool host_qp) |
| { |
| u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; |
| u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; |
| u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; |
| void *qpc; |
| int ret; |
| |
| /* Init */ |
| qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc); |
| MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); |
| MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); |
| MLX5_SET(qpc, qpc, rre, 1); |
| MLX5_SET(qpc, qpc, rwe, 1); |
| MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP); |
| MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn); |
| ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in); |
| if (ret) |
| return ret; |
| |
| if (host_qp) { |
| struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; |
| int i; |
| |
| for (i = 0; i < qp->rq.wqe_cnt; i++) { |
| mlx5vf_post_recv(qp); |
| recv_buf->next_rq_offset += qp->max_msg_size; |
| } |
| } |
| |
| /* RTR */ |
| qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc); |
| MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); |
| MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); |
| MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg)); |
| MLX5_SET(qpc, qpc, remote_qpn, remote_qpn); |
| MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); |
| MLX5_SET(qpc, qpc, primary_address_path.fl, 1); |
| MLX5_SET(qpc, qpc, min_rnr_nak, 1); |
| MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP); |
| MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); |
| ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in); |
| if (ret || host_qp) |
| return ret; |
| |
| /* RTS */ |
| qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc); |
| MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); |
| MLX5_SET(qpc, qpc, retry_count, 7); |
| MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ |
| MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ |
| MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP); |
| MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); |
| |
| return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in); |
| } |
| |
| static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_qp *qp) |
| { |
| u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; |
| |
| MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); |
| MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); |
| mlx5_cmd_exec_in(mdev, destroy_qp, in); |
| |
| mlx5_frag_buf_free(mdev, &qp->buf); |
| mlx5_db_free(mdev, &qp->db); |
| kfree(qp); |
| } |
| |
| static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf) |
| { |
| int i; |
| |
| /* Undo alloc_pages_bulk_array() */ |
| for (i = 0; i < recv_buf->npages; i++) |
| __free_page(recv_buf->page_list[i]); |
| |
| kvfree(recv_buf->page_list); |
| } |
| |
| static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf, |
| unsigned int npages) |
| { |
| unsigned int filled = 0, done = 0; |
| int i; |
| |
| recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list), |
| GFP_KERNEL); |
| if (!recv_buf->page_list) |
| return -ENOMEM; |
| |
| for (;;) { |
| filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done, |
| recv_buf->page_list + done); |
| if (!filled) |
| goto err; |
| |
| done += filled; |
| if (done == npages) |
| break; |
| } |
| |
| recv_buf->npages = npages; |
| return 0; |
| |
| err: |
| for (i = 0; i < npages; i++) { |
| if (recv_buf->page_list[i]) |
| __free_page(recv_buf->page_list[i]); |
| } |
| |
| kvfree(recv_buf->page_list); |
| return -ENOMEM; |
| } |
| |
| static int register_dma_recv_pages(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_recv_buf *recv_buf) |
| { |
| int i, j; |
| |
| recv_buf->dma_addrs = kvcalloc(recv_buf->npages, |
| sizeof(*recv_buf->dma_addrs), |
| GFP_KERNEL); |
| if (!recv_buf->dma_addrs) |
| return -ENOMEM; |
| |
| for (i = 0; i < recv_buf->npages; i++) { |
| recv_buf->dma_addrs[i] = dma_map_page(mdev->device, |
| recv_buf->page_list[i], |
| 0, PAGE_SIZE, |
| DMA_FROM_DEVICE); |
| if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i])) |
| goto error; |
| } |
| return 0; |
| |
| error: |
| for (j = 0; j < i; j++) |
| dma_unmap_single(mdev->device, recv_buf->dma_addrs[j], |
| PAGE_SIZE, DMA_FROM_DEVICE); |
| |
| kvfree(recv_buf->dma_addrs); |
| return -ENOMEM; |
| } |
| |
| static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_recv_buf *recv_buf) |
| { |
| int i; |
| |
| for (i = 0; i < recv_buf->npages; i++) |
| dma_unmap_single(mdev->device, recv_buf->dma_addrs[i], |
| PAGE_SIZE, DMA_FROM_DEVICE); |
| |
| kvfree(recv_buf->dma_addrs); |
| } |
| |
| static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_qp *qp) |
| { |
| struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; |
| |
| mlx5_core_destroy_mkey(mdev, recv_buf->mkey); |
| unregister_dma_recv_pages(mdev, recv_buf); |
| free_recv_pages(&qp->recv_buf); |
| } |
| |
| static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev, |
| struct mlx5_vhca_qp *qp, u32 pdn, |
| u64 rq_size) |
| { |
| unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE); |
| struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; |
| int err; |
| |
| err = alloc_recv_pages(recv_buf, npages); |
| if (err < 0) |
| return err; |
| |
| err = register_dma_recv_pages(mdev, recv_buf); |
| if (err) |
| goto end; |
| |
| err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey); |
| if (err) |
| goto err_create_mkey; |
| |
| return 0; |
| |
| err_create_mkey: |
| unregister_dma_recv_pages(mdev, recv_buf); |
| end: |
| free_recv_pages(recv_buf); |
| return err; |
| } |
| |
| static void |
| _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev) |
| { |
| struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
| struct mlx5_core_dev *mdev = mvdev->mdev; |
| |
| lockdep_assert_held(&mvdev->state_mutex); |
| |
| if (!mvdev->log_active) |
| return; |
| |
| WARN_ON(mvdev->mdev_detach); |
| |
| mlx5_eq_notifier_unregister(mdev, &tracker->nb); |
| mlx5vf_cmd_destroy_tracker(mdev, tracker->id); |
| mlx5vf_destroy_qp(mdev, tracker->fw_qp); |
| mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp); |
| mlx5vf_destroy_qp(mdev, tracker->host_qp); |
| mlx5vf_destroy_cq(mdev, &tracker->cq); |
| mlx5_core_dealloc_pd(mdev, tracker->pdn); |
| mlx5_put_uars_page(mdev, tracker->uar); |
| mvdev->log_active = false; |
| } |
| |
| int mlx5vf_stop_page_tracker(struct vfio_device *vdev) |
| { |
| struct mlx5vf_pci_core_device *mvdev = container_of( |
| vdev, struct mlx5vf_pci_core_device, core_device.vdev); |
| |
| mutex_lock(&mvdev->state_mutex); |
| if (!mvdev->log_active) |
| goto end; |
| |
| _mlx5vf_free_page_tracker_resources(mvdev); |
| mvdev->log_active = false; |
| end: |
| mlx5vf_state_mutex_unlock(mvdev); |
| return 0; |
| } |
| |
| int mlx5vf_start_page_tracker(struct vfio_device *vdev, |
| struct rb_root_cached *ranges, u32 nnodes, |
| u64 *page_size) |
| { |
| struct mlx5vf_pci_core_device *mvdev = container_of( |
| vdev, struct mlx5vf_pci_core_device, core_device.vdev); |
| struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
| u8 log_tracked_page = ilog2(*page_size); |
| struct mlx5_vhca_qp *host_qp; |
| struct mlx5_vhca_qp *fw_qp; |
| struct mlx5_core_dev *mdev; |
| u32 max_msg_size = PAGE_SIZE; |
| u64 rq_size = SZ_2M; |
| u32 max_recv_wr; |
| int err; |
| |
| mutex_lock(&mvdev->state_mutex); |
| if (mvdev->mdev_detach) { |
| err = -ENOTCONN; |
| goto end; |
| } |
| |
| if (mvdev->log_active) { |
| err = -EINVAL; |
| goto end; |
| } |
| |
| mdev = mvdev->mdev; |
| memset(tracker, 0, sizeof(*tracker)); |
| tracker->uar = mlx5_get_uars_page(mdev); |
| if (IS_ERR(tracker->uar)) { |
| err = PTR_ERR(tracker->uar); |
| goto end; |
| } |
| |
| err = mlx5_core_alloc_pd(mdev, &tracker->pdn); |
| if (err) |
| goto err_uar; |
| |
| max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size); |
| err = mlx5vf_create_cq(mdev, tracker, max_recv_wr); |
| if (err) |
| goto err_dealloc_pd; |
| |
| host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr); |
| if (IS_ERR(host_qp)) { |
| err = PTR_ERR(host_qp); |
| goto err_cq; |
| } |
| |
| host_qp->max_msg_size = max_msg_size; |
| if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
| pg_track_log_min_page_size)) { |
| log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
| pg_track_log_min_page_size); |
| } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
| pg_track_log_max_page_size)) { |
| log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, |
| pg_track_log_max_page_size); |
| } |
| |
| host_qp->tracked_page_size = (1ULL << log_tracked_page); |
| err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn, |
| rq_size); |
| if (err) |
| goto err_host_qp; |
| |
| fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0); |
| if (IS_ERR(fw_qp)) { |
| err = PTR_ERR(fw_qp); |
| goto err_recv_resources; |
| } |
| |
| err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true); |
| if (err) |
| goto err_activate; |
| |
| err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false); |
| if (err) |
| goto err_activate; |
| |
| tracker->host_qp = host_qp; |
| tracker->fw_qp = fw_qp; |
| err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes); |
| if (err) |
| goto err_activate; |
| |
| MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY); |
| mlx5_eq_notifier_register(mdev, &tracker->nb); |
| *page_size = host_qp->tracked_page_size; |
| mvdev->log_active = true; |
| mlx5vf_state_mutex_unlock(mvdev); |
| return 0; |
| |
| err_activate: |
| mlx5vf_destroy_qp(mdev, fw_qp); |
| err_recv_resources: |
| mlx5vf_free_qp_recv_resources(mdev, host_qp); |
| err_host_qp: |
| mlx5vf_destroy_qp(mdev, host_qp); |
| err_cq: |
| mlx5vf_destroy_cq(mdev, &tracker->cq); |
| err_dealloc_pd: |
| mlx5_core_dealloc_pd(mdev, tracker->pdn); |
| err_uar: |
| mlx5_put_uars_page(mdev, tracker->uar); |
| end: |
| mlx5vf_state_mutex_unlock(mvdev); |
| return err; |
| } |
| |
| static void |
| set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp, |
| struct iova_bitmap *dirty) |
| { |
| u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry); |
| u32 nent = size / entry_size; |
| struct page *page; |
| u64 addr; |
| u64 *buf; |
| int i; |
| |
| if (WARN_ON(index >= qp->recv_buf.npages || |
| (nent > qp->max_msg_size / entry_size))) |
| return; |
| |
| page = qp->recv_buf.page_list[index]; |
| buf = kmap_local_page(page); |
| for (i = 0; i < nent; i++) { |
| addr = MLX5_GET(page_track_report_entry, buf + i, |
| dirty_address_low); |
| addr |= (u64)MLX5_GET(page_track_report_entry, buf + i, |
| dirty_address_high) << 32; |
| iova_bitmap_set(dirty, addr, qp->tracked_page_size); |
| } |
| kunmap_local(buf); |
| } |
| |
| static void |
| mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe, |
| struct iova_bitmap *dirty, int *tracker_status) |
| { |
| u32 size; |
| int ix; |
| |
| qp->rq.cc++; |
| *tracker_status = be32_to_cpu(cqe->immediate) >> 28; |
| size = be32_to_cpu(cqe->byte_cnt); |
| ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1); |
| |
| /* zero length CQE, no data */ |
| WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING); |
| if (size) |
| set_report_output(size, ix, qp, dirty); |
| |
| qp->recv_buf.next_rq_offset = ix * qp->max_msg_size; |
| mlx5vf_post_recv(qp); |
| } |
| |
| static void *get_cqe(struct mlx5_vhca_cq *cq, int n) |
| { |
| return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n); |
| } |
| |
| static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n) |
| { |
| void *cqe = get_cqe(cq, n & (cq->ncqe - 1)); |
| struct mlx5_cqe64 *cqe64; |
| |
| cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; |
| |
| if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && |
| !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) { |
| return cqe64; |
| } else { |
| return NULL; |
| } |
| } |
| |
| static int |
| mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp, |
| struct iova_bitmap *dirty, int *tracker_status) |
| { |
| struct mlx5_cqe64 *cqe; |
| u8 opcode; |
| |
| cqe = get_sw_cqe(cq, cq->mcq.cons_index); |
| if (!cqe) |
| return CQ_EMPTY; |
| |
| ++cq->mcq.cons_index; |
| /* |
| * Make sure we read CQ entry contents after we've checked the |
| * ownership bit. |
| */ |
| rmb(); |
| opcode = get_cqe_opcode(cqe); |
| switch (opcode) { |
| case MLX5_CQE_RESP_SEND_IMM: |
| mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status); |
| return CQ_OK; |
| default: |
| return CQ_POLL_ERR; |
| } |
| } |
| |
| int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, |
| unsigned long length, |
| struct iova_bitmap *dirty) |
| { |
| struct mlx5vf_pci_core_device *mvdev = container_of( |
| vdev, struct mlx5vf_pci_core_device, core_device.vdev); |
| struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; |
| struct mlx5_vhca_cq *cq = &tracker->cq; |
| struct mlx5_core_dev *mdev; |
| int poll_err, err; |
| |
| mutex_lock(&mvdev->state_mutex); |
| if (!mvdev->log_active) { |
| err = -EINVAL; |
| goto end; |
| } |
| |
| if (mvdev->mdev_detach) { |
| err = -ENOTCONN; |
| goto end; |
| } |
| |
| mdev = mvdev->mdev; |
| err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length, |
| MLX5_PAGE_TRACK_STATE_REPORTING); |
| if (err) |
| goto end; |
| |
| tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING; |
| while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING && |
| !tracker->is_err) { |
| poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty, |
| &tracker->status); |
| if (poll_err == CQ_EMPTY) { |
| mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, |
| cq->mcq.cons_index); |
| poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, |
| dirty, &tracker->status); |
| if (poll_err == CQ_EMPTY) { |
| wait_for_completion(&mvdev->tracker_comp); |
| continue; |
| } |
| } |
| if (poll_err == CQ_POLL_ERR) { |
| err = -EIO; |
| goto end; |
| } |
| mlx5_cq_set_ci(&cq->mcq); |
| } |
| |
| if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR) |
| tracker->is_err = true; |
| |
| if (tracker->is_err) |
| err = -EIO; |
| end: |
| mlx5vf_state_mutex_unlock(mvdev); |
| return err; |
| } |