| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * NVMe over Fabrics Persist Reservation. |
| * Copyright (c) 2024 Guixin Liu, Alibaba Group. |
| * All rights reserved. |
| */ |
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| #include <linux/unaligned.h> |
| #include "nvmet.h" |
| |
| #define NVMET_PR_NOTIFI_MASK_ALL \ |
| (1 << NVME_PR_NOTIFY_BIT_REG_PREEMPTED | \ |
| 1 << NVME_PR_NOTIFY_BIT_RESV_RELEASED | \ |
| 1 << NVME_PR_NOTIFY_BIT_RESV_PREEMPTED) |
| |
| static inline bool nvmet_pr_parse_ignore_key(u32 cdw10) |
| { |
| /* Ignore existing key, bit 03. */ |
| return (cdw10 >> 3) & 1; |
| } |
| |
| static inline struct nvmet_ns *nvmet_pr_to_ns(struct nvmet_pr *pr) |
| { |
| return container_of(pr, struct nvmet_ns, pr); |
| } |
| |
| static struct nvmet_pr_registrant * |
| nvmet_pr_find_registrant(struct nvmet_pr *pr, uuid_t *hostid) |
| { |
| struct nvmet_pr_registrant *reg; |
| |
| list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { |
| if (uuid_equal(®->hostid, hostid)) |
| return reg; |
| } |
| return NULL; |
| } |
| |
| u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask) |
| { |
| u32 nsid = le32_to_cpu(req->cmd->common.nsid); |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_ns *ns; |
| unsigned long idx; |
| u16 status; |
| |
| if (mask & ~(NVMET_PR_NOTIFI_MASK_ALL)) { |
| req->error_loc = offsetof(struct nvme_common_command, cdw11); |
| return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; |
| } |
| |
| if (nsid != U32_MAX) { |
| status = nvmet_req_find_ns(req); |
| if (status) |
| return status; |
| if (!req->ns->pr.enable) |
| return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; |
| |
| WRITE_ONCE(req->ns->pr.notify_mask, mask); |
| goto success; |
| } |
| |
| xa_for_each(&ctrl->subsys->namespaces, idx, ns) { |
| if (ns->pr.enable) |
| WRITE_ONCE(ns->pr.notify_mask, mask); |
| } |
| |
| success: |
| nvmet_set_result(req, mask); |
| return NVME_SC_SUCCESS; |
| } |
| |
| u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req) |
| { |
| u16 status; |
| |
| status = nvmet_req_find_ns(req); |
| if (status) |
| return status; |
| |
| if (!req->ns->pr.enable) |
| return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; |
| |
| nvmet_set_result(req, READ_ONCE(req->ns->pr.notify_mask)); |
| return status; |
| } |
| |
| void nvmet_execute_get_log_page_resv(struct nvmet_req *req) |
| { |
| struct nvmet_pr_log_mgr *log_mgr = &req->sq->ctrl->pr_log_mgr; |
| struct nvme_pr_log next_log = {0}; |
| struct nvme_pr_log log = {0}; |
| u16 status = NVME_SC_SUCCESS; |
| u64 lost_count; |
| u64 cur_count; |
| u64 next_count; |
| |
| mutex_lock(&log_mgr->lock); |
| if (!kfifo_get(&log_mgr->log_queue, &log)) |
| goto out; |
| |
| /* |
| * We can't get the last in kfifo. |
| * Utilize the current count and the count from the next log to |
| * calculate the number of lost logs, while also addressing cases |
| * of overflow. If there is no subsequent log, the number of lost |
| * logs is equal to the lost_count within the nvmet_pr_log_mgr. |
| */ |
| cur_count = le64_to_cpu(log.count); |
| if (kfifo_peek(&log_mgr->log_queue, &next_log)) { |
| next_count = le64_to_cpu(next_log.count); |
| if (next_count > cur_count) |
| lost_count = next_count - cur_count - 1; |
| else |
| lost_count = U64_MAX - cur_count + next_count - 1; |
| } else { |
| lost_count = log_mgr->lost_count; |
| } |
| |
| log.count = cpu_to_le64((cur_count + lost_count) == 0 ? |
| 1 : (cur_count + lost_count)); |
| log_mgr->lost_count -= lost_count; |
| |
| log.nr_pages = kfifo_len(&log_mgr->log_queue); |
| |
| out: |
| status = nvmet_copy_to_sgl(req, 0, &log, sizeof(log)); |
| mutex_unlock(&log_mgr->lock); |
| nvmet_req_complete(req, status); |
| } |
| |
| static void nvmet_pr_add_resv_log(struct nvmet_ctrl *ctrl, u8 log_type, |
| u32 nsid) |
| { |
| struct nvmet_pr_log_mgr *log_mgr = &ctrl->pr_log_mgr; |
| struct nvme_pr_log log = {0}; |
| |
| mutex_lock(&log_mgr->lock); |
| log_mgr->counter++; |
| if (log_mgr->counter == 0) |
| log_mgr->counter = 1; |
| |
| log.count = cpu_to_le64(log_mgr->counter); |
| log.type = log_type; |
| log.nsid = cpu_to_le32(nsid); |
| |
| if (!kfifo_put(&log_mgr->log_queue, log)) { |
| pr_info("a reservation log lost, cntlid:%d, log_type:%d, nsid:%d\n", |
| ctrl->cntlid, log_type, nsid); |
| log_mgr->lost_count++; |
| } |
| |
| mutex_unlock(&log_mgr->lock); |
| } |
| |
| static void nvmet_pr_resv_released(struct nvmet_pr *pr, uuid_t *hostid) |
| { |
| struct nvmet_ns *ns = nvmet_pr_to_ns(pr); |
| struct nvmet_subsys *subsys = ns->subsys; |
| struct nvmet_ctrl *ctrl; |
| |
| if (test_bit(NVME_PR_NOTIFY_BIT_RESV_RELEASED, &pr->notify_mask)) |
| return; |
| |
| mutex_lock(&subsys->lock); |
| list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { |
| if (!uuid_equal(&ctrl->hostid, hostid) && |
| nvmet_pr_find_registrant(pr, &ctrl->hostid)) { |
| nvmet_pr_add_resv_log(ctrl, |
| NVME_PR_LOG_RESERVATION_RELEASED, ns->nsid); |
| nvmet_add_async_event(ctrl, NVME_AER_CSS, |
| NVME_AEN_RESV_LOG_PAGE_AVALIABLE, |
| NVME_LOG_RESERVATION); |
| } |
| } |
| mutex_unlock(&subsys->lock); |
| } |
| |
| static void nvmet_pr_send_event_to_host(struct nvmet_pr *pr, uuid_t *hostid, |
| u8 log_type) |
| { |
| struct nvmet_ns *ns = nvmet_pr_to_ns(pr); |
| struct nvmet_subsys *subsys = ns->subsys; |
| struct nvmet_ctrl *ctrl; |
| |
| mutex_lock(&subsys->lock); |
| list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { |
| if (uuid_equal(hostid, &ctrl->hostid)) { |
| nvmet_pr_add_resv_log(ctrl, log_type, ns->nsid); |
| nvmet_add_async_event(ctrl, NVME_AER_CSS, |
| NVME_AEN_RESV_LOG_PAGE_AVALIABLE, |
| NVME_LOG_RESERVATION); |
| } |
| } |
| mutex_unlock(&subsys->lock); |
| } |
| |
| static void nvmet_pr_resv_preempted(struct nvmet_pr *pr, uuid_t *hostid) |
| { |
| if (test_bit(NVME_PR_NOTIFY_BIT_RESV_PREEMPTED, &pr->notify_mask)) |
| return; |
| |
| nvmet_pr_send_event_to_host(pr, hostid, |
| NVME_PR_LOG_RESERVATOIN_PREEMPTED); |
| } |
| |
| static void nvmet_pr_registration_preempted(struct nvmet_pr *pr, |
| uuid_t *hostid) |
| { |
| if (test_bit(NVME_PR_NOTIFY_BIT_REG_PREEMPTED, &pr->notify_mask)) |
| return; |
| |
| nvmet_pr_send_event_to_host(pr, hostid, |
| NVME_PR_LOG_REGISTRATION_PREEMPTED); |
| } |
| |
| static inline void nvmet_pr_set_new_holder(struct nvmet_pr *pr, u8 new_rtype, |
| struct nvmet_pr_registrant *reg) |
| { |
| reg->rtype = new_rtype; |
| rcu_assign_pointer(pr->holder, reg); |
| } |
| |
| static u16 nvmet_pr_register(struct nvmet_req *req, |
| struct nvmet_pr_register_data *d) |
| { |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_pr_registrant *new, *reg; |
| struct nvmet_pr *pr = &req->ns->pr; |
| u16 status = NVME_SC_SUCCESS; |
| u64 nrkey = le64_to_cpu(d->nrkey); |
| |
| new = kmalloc(sizeof(*new), GFP_KERNEL); |
| if (!new) |
| return NVME_SC_INTERNAL; |
| |
| down(&pr->pr_sem); |
| reg = nvmet_pr_find_registrant(pr, &ctrl->hostid); |
| if (reg) { |
| if (reg->rkey != nrkey) |
| status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| kfree(new); |
| goto out; |
| } |
| |
| memset(new, 0, sizeof(*new)); |
| INIT_LIST_HEAD(&new->entry); |
| new->rkey = nrkey; |
| uuid_copy(&new->hostid, &ctrl->hostid); |
| list_add_tail_rcu(&new->entry, &pr->registrant_list); |
| |
| out: |
| up(&pr->pr_sem); |
| return status; |
| } |
| |
| static void nvmet_pr_unregister_one(struct nvmet_pr *pr, |
| struct nvmet_pr_registrant *reg) |
| { |
| struct nvmet_pr_registrant *first_reg; |
| struct nvmet_pr_registrant *holder; |
| u8 original_rtype; |
| |
| list_del_rcu(®->entry); |
| |
| holder = rcu_dereference_protected(pr->holder, 1); |
| if (reg != holder) |
| goto out; |
| |
| original_rtype = holder->rtype; |
| if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || |
| original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { |
| first_reg = list_first_or_null_rcu(&pr->registrant_list, |
| struct nvmet_pr_registrant, entry); |
| if (first_reg) |
| first_reg->rtype = original_rtype; |
| rcu_assign_pointer(pr->holder, first_reg); |
| } else { |
| rcu_assign_pointer(pr->holder, NULL); |
| |
| if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_REG_ONLY || |
| original_rtype == NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY) |
| nvmet_pr_resv_released(pr, ®->hostid); |
| } |
| out: |
| kfree_rcu(reg, rcu); |
| } |
| |
| static u16 nvmet_pr_unregister(struct nvmet_req *req, |
| struct nvmet_pr_register_data *d, |
| bool ignore_key) |
| { |
| u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_registrant *reg; |
| |
| down(&pr->pr_sem); |
| list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { |
| if (uuid_equal(®->hostid, &ctrl->hostid)) { |
| if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) { |
| status = NVME_SC_SUCCESS; |
| nvmet_pr_unregister_one(pr, reg); |
| } |
| break; |
| } |
| } |
| up(&pr->pr_sem); |
| |
| return status; |
| } |
| |
| static void nvmet_pr_update_reg_rkey(struct nvmet_pr_registrant *reg, |
| void *attr) |
| { |
| reg->rkey = *(u64 *)attr; |
| } |
| |
| static u16 nvmet_pr_update_reg_attr(struct nvmet_pr *pr, |
| struct nvmet_pr_registrant *reg, |
| void (*change_attr)(struct nvmet_pr_registrant *reg, |
| void *attr), |
| void *attr) |
| { |
| struct nvmet_pr_registrant *holder; |
| struct nvmet_pr_registrant *new; |
| |
| holder = rcu_dereference_protected(pr->holder, 1); |
| if (reg != holder) { |
| change_attr(reg, attr); |
| return NVME_SC_SUCCESS; |
| } |
| |
| new = kmalloc(sizeof(*new), GFP_ATOMIC); |
| if (!new) |
| return NVME_SC_INTERNAL; |
| |
| new->rkey = holder->rkey; |
| new->rtype = holder->rtype; |
| uuid_copy(&new->hostid, &holder->hostid); |
| INIT_LIST_HEAD(&new->entry); |
| |
| change_attr(new, attr); |
| list_replace_rcu(&holder->entry, &new->entry); |
| rcu_assign_pointer(pr->holder, new); |
| kfree_rcu(holder, rcu); |
| |
| return NVME_SC_SUCCESS; |
| } |
| |
| static u16 nvmet_pr_replace(struct nvmet_req *req, |
| struct nvmet_pr_register_data *d, |
| bool ignore_key) |
| { |
| u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_registrant *reg; |
| u64 nrkey = le64_to_cpu(d->nrkey); |
| |
| down(&pr->pr_sem); |
| list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { |
| if (uuid_equal(®->hostid, &ctrl->hostid)) { |
| if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) |
| status = nvmet_pr_update_reg_attr(pr, reg, |
| nvmet_pr_update_reg_rkey, |
| &nrkey); |
| break; |
| } |
| } |
| up(&pr->pr_sem); |
| return status; |
| } |
| |
| static void nvmet_execute_pr_register(struct nvmet_req *req) |
| { |
| u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); |
| bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); |
| struct nvmet_pr_register_data *d; |
| u8 reg_act = cdw10 & 0x07; /* Reservation Register Action, bit 02:00 */ |
| u16 status; |
| |
| d = kmalloc(sizeof(*d), GFP_KERNEL); |
| if (!d) { |
| status = NVME_SC_INTERNAL; |
| goto out; |
| } |
| |
| status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); |
| if (status) |
| goto free_data; |
| |
| switch (reg_act) { |
| case NVME_PR_REGISTER_ACT_REG: |
| status = nvmet_pr_register(req, d); |
| break; |
| case NVME_PR_REGISTER_ACT_UNREG: |
| status = nvmet_pr_unregister(req, d, ignore_key); |
| break; |
| case NVME_PR_REGISTER_ACT_REPLACE: |
| status = nvmet_pr_replace(req, d, ignore_key); |
| break; |
| default: |
| req->error_loc = offsetof(struct nvme_common_command, cdw10); |
| status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; |
| break; |
| } |
| free_data: |
| kfree(d); |
| out: |
| if (!status) |
| atomic_inc(&req->ns->pr.generation); |
| nvmet_req_complete(req, status); |
| } |
| |
| static u16 nvmet_pr_acquire(struct nvmet_req *req, |
| struct nvmet_pr_registrant *reg, |
| u8 rtype) |
| { |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_registrant *holder; |
| |
| holder = rcu_dereference_protected(pr->holder, 1); |
| if (holder && reg != holder) |
| return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| if (holder && reg == holder) { |
| if (holder->rtype == rtype) |
| return NVME_SC_SUCCESS; |
| return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| } |
| |
| nvmet_pr_set_new_holder(pr, rtype, reg); |
| return NVME_SC_SUCCESS; |
| } |
| |
| static void nvmet_pr_confirm_ns_pc_ref(struct percpu_ref *ref) |
| { |
| struct nvmet_pr_per_ctrl_ref *pc_ref = |
| container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); |
| |
| complete(&pc_ref->confirm_done); |
| } |
| |
| static void nvmet_pr_set_ctrl_to_abort(struct nvmet_req *req, uuid_t *hostid) |
| { |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| struct nvmet_ns *ns = req->ns; |
| unsigned long idx; |
| |
| xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { |
| if (uuid_equal(&pc_ref->hostid, hostid)) { |
| percpu_ref_kill_and_confirm(&pc_ref->ref, |
| nvmet_pr_confirm_ns_pc_ref); |
| wait_for_completion(&pc_ref->confirm_done); |
| } |
| } |
| } |
| |
| static u16 nvmet_pr_unreg_all_host_by_prkey(struct nvmet_req *req, u64 prkey, |
| uuid_t *send_hostid, |
| bool abort) |
| { |
| u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| struct nvmet_pr_registrant *reg, *tmp; |
| struct nvmet_pr *pr = &req->ns->pr; |
| uuid_t hostid; |
| |
| list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { |
| if (reg->rkey == prkey) { |
| status = NVME_SC_SUCCESS; |
| uuid_copy(&hostid, ®->hostid); |
| if (abort) |
| nvmet_pr_set_ctrl_to_abort(req, &hostid); |
| nvmet_pr_unregister_one(pr, reg); |
| if (!uuid_equal(&hostid, send_hostid)) |
| nvmet_pr_registration_preempted(pr, &hostid); |
| } |
| } |
| return status; |
| } |
| |
| static void nvmet_pr_unreg_all_others_by_prkey(struct nvmet_req *req, |
| u64 prkey, |
| uuid_t *send_hostid, |
| bool abort) |
| { |
| struct nvmet_pr_registrant *reg, *tmp; |
| struct nvmet_pr *pr = &req->ns->pr; |
| uuid_t hostid; |
| |
| list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { |
| if (reg->rkey == prkey && |
| !uuid_equal(®->hostid, send_hostid)) { |
| uuid_copy(&hostid, ®->hostid); |
| if (abort) |
| nvmet_pr_set_ctrl_to_abort(req, &hostid); |
| nvmet_pr_unregister_one(pr, reg); |
| nvmet_pr_registration_preempted(pr, &hostid); |
| } |
| } |
| } |
| |
| static void nvmet_pr_unreg_all_others(struct nvmet_req *req, |
| uuid_t *send_hostid, |
| bool abort) |
| { |
| struct nvmet_pr_registrant *reg, *tmp; |
| struct nvmet_pr *pr = &req->ns->pr; |
| uuid_t hostid; |
| |
| list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { |
| if (!uuid_equal(®->hostid, send_hostid)) { |
| uuid_copy(&hostid, ®->hostid); |
| if (abort) |
| nvmet_pr_set_ctrl_to_abort(req, &hostid); |
| nvmet_pr_unregister_one(pr, reg); |
| nvmet_pr_registration_preempted(pr, &hostid); |
| } |
| } |
| } |
| |
| static void nvmet_pr_update_holder_rtype(struct nvmet_pr_registrant *reg, |
| void *attr) |
| { |
| u8 new_rtype = *(u8 *)attr; |
| |
| reg->rtype = new_rtype; |
| } |
| |
| static u16 nvmet_pr_preempt(struct nvmet_req *req, |
| struct nvmet_pr_registrant *reg, |
| u8 rtype, |
| struct nvmet_pr_acquire_data *d, |
| bool abort) |
| { |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_registrant *holder; |
| enum nvme_pr_type original_rtype; |
| u64 prkey = le64_to_cpu(d->prkey); |
| u16 status; |
| |
| holder = rcu_dereference_protected(pr->holder, 1); |
| if (!holder) |
| return nvmet_pr_unreg_all_host_by_prkey(req, prkey, |
| &ctrl->hostid, abort); |
| |
| original_rtype = holder->rtype; |
| if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || |
| original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { |
| if (!prkey) { |
| /* |
| * To prevent possible access from other hosts, and |
| * avoid terminate the holder, set the new holder |
| * first before unregistering. |
| */ |
| nvmet_pr_set_new_holder(pr, rtype, reg); |
| nvmet_pr_unreg_all_others(req, &ctrl->hostid, abort); |
| return NVME_SC_SUCCESS; |
| } |
| return nvmet_pr_unreg_all_host_by_prkey(req, prkey, |
| &ctrl->hostid, abort); |
| } |
| |
| if (holder == reg) { |
| status = nvmet_pr_update_reg_attr(pr, holder, |
| nvmet_pr_update_holder_rtype, &rtype); |
| if (!status && original_rtype != rtype) |
| nvmet_pr_resv_released(pr, ®->hostid); |
| return status; |
| } |
| |
| if (prkey == holder->rkey) { |
| /* |
| * Same as before, set the new holder first. |
| */ |
| nvmet_pr_set_new_holder(pr, rtype, reg); |
| nvmet_pr_unreg_all_others_by_prkey(req, prkey, &ctrl->hostid, |
| abort); |
| if (original_rtype != rtype) |
| nvmet_pr_resv_released(pr, ®->hostid); |
| return NVME_SC_SUCCESS; |
| } |
| |
| if (prkey) |
| return nvmet_pr_unreg_all_host_by_prkey(req, prkey, |
| &ctrl->hostid, abort); |
| return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; |
| } |
| |
| static void nvmet_pr_do_abort(struct work_struct *w) |
| { |
| struct nvmet_req *req = container_of(w, struct nvmet_req, r.abort_work); |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| struct nvmet_ns *ns = req->ns; |
| unsigned long idx; |
| |
| /* |
| * The target does not support abort, just wait per-controller ref to 0. |
| */ |
| xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { |
| if (percpu_ref_is_dying(&pc_ref->ref)) { |
| wait_for_completion(&pc_ref->free_done); |
| reinit_completion(&pc_ref->confirm_done); |
| reinit_completion(&pc_ref->free_done); |
| percpu_ref_resurrect(&pc_ref->ref); |
| } |
| } |
| |
| up(&ns->pr.pr_sem); |
| nvmet_req_complete(req, NVME_SC_SUCCESS); |
| } |
| |
| static u16 __nvmet_execute_pr_acquire(struct nvmet_req *req, |
| struct nvmet_pr_registrant *reg, |
| u8 acquire_act, |
| u8 rtype, |
| struct nvmet_pr_acquire_data *d) |
| { |
| u16 status; |
| |
| switch (acquire_act) { |
| case NVME_PR_ACQUIRE_ACT_ACQUIRE: |
| status = nvmet_pr_acquire(req, reg, rtype); |
| goto out; |
| case NVME_PR_ACQUIRE_ACT_PREEMPT: |
| status = nvmet_pr_preempt(req, reg, rtype, d, false); |
| goto inc_gen; |
| case NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT: |
| status = nvmet_pr_preempt(req, reg, rtype, d, true); |
| goto inc_gen; |
| default: |
| req->error_loc = offsetof(struct nvme_common_command, cdw10); |
| status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; |
| goto out; |
| } |
| inc_gen: |
| if (!status) |
| atomic_inc(&req->ns->pr.generation); |
| out: |
| return status; |
| } |
| |
| static void nvmet_execute_pr_acquire(struct nvmet_req *req) |
| { |
| u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); |
| bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); |
| /* Reservation type, bit 15:08 */ |
| u8 rtype = (u8)((cdw10 >> 8) & 0xff); |
| /* Reservation acquire action, bit 02:00 */ |
| u8 acquire_act = cdw10 & 0x07; |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_pr_acquire_data *d = NULL; |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_registrant *reg; |
| u16 status = NVME_SC_SUCCESS; |
| |
| if (ignore_key || |
| rtype < NVME_PR_WRITE_EXCLUSIVE || |
| rtype > NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { |
| status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; |
| goto out; |
| } |
| |
| d = kmalloc(sizeof(*d), GFP_KERNEL); |
| if (!d) { |
| status = NVME_SC_INTERNAL; |
| goto out; |
| } |
| |
| status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); |
| if (status) |
| goto free_data; |
| |
| status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| down(&pr->pr_sem); |
| list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { |
| if (uuid_equal(®->hostid, &ctrl->hostid) && |
| reg->rkey == le64_to_cpu(d->crkey)) { |
| status = __nvmet_execute_pr_acquire(req, reg, |
| acquire_act, rtype, d); |
| break; |
| } |
| } |
| |
| if (!status && acquire_act == NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT) { |
| kfree(d); |
| INIT_WORK(&req->r.abort_work, nvmet_pr_do_abort); |
| queue_work(nvmet_wq, &req->r.abort_work); |
| return; |
| } |
| |
| up(&pr->pr_sem); |
| |
| free_data: |
| kfree(d); |
| out: |
| nvmet_req_complete(req, status); |
| } |
| |
| static u16 nvmet_pr_release(struct nvmet_req *req, |
| struct nvmet_pr_registrant *reg, |
| u8 rtype) |
| { |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_registrant *holder; |
| u8 original_rtype; |
| |
| holder = rcu_dereference_protected(pr->holder, 1); |
| if (!holder || reg != holder) |
| return NVME_SC_SUCCESS; |
| |
| original_rtype = holder->rtype; |
| if (original_rtype != rtype) |
| return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| |
| rcu_assign_pointer(pr->holder, NULL); |
| |
| if (original_rtype != NVME_PR_WRITE_EXCLUSIVE && |
| original_rtype != NVME_PR_EXCLUSIVE_ACCESS) |
| nvmet_pr_resv_released(pr, ®->hostid); |
| |
| return NVME_SC_SUCCESS; |
| } |
| |
| static void nvmet_pr_clear(struct nvmet_req *req) |
| { |
| struct nvmet_pr_registrant *reg, *tmp; |
| struct nvmet_pr *pr = &req->ns->pr; |
| |
| rcu_assign_pointer(pr->holder, NULL); |
| |
| list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { |
| list_del_rcu(®->entry); |
| if (!uuid_equal(&req->sq->ctrl->hostid, ®->hostid)) |
| nvmet_pr_resv_preempted(pr, ®->hostid); |
| kfree_rcu(reg, rcu); |
| } |
| |
| atomic_inc(&pr->generation); |
| } |
| |
| static u16 __nvmet_execute_pr_release(struct nvmet_req *req, |
| struct nvmet_pr_registrant *reg, |
| u8 release_act, u8 rtype) |
| { |
| switch (release_act) { |
| case NVME_PR_RELEASE_ACT_RELEASE: |
| return nvmet_pr_release(req, reg, rtype); |
| case NVME_PR_RELEASE_ACT_CLEAR: |
| nvmet_pr_clear(req); |
| return NVME_SC_SUCCESS; |
| default: |
| req->error_loc = offsetof(struct nvme_common_command, cdw10); |
| return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; |
| } |
| } |
| |
| static void nvmet_execute_pr_release(struct nvmet_req *req) |
| { |
| u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); |
| bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); |
| u8 rtype = (u8)((cdw10 >> 8) & 0xff); /* Reservation type, bit 15:08 */ |
| u8 release_act = cdw10 & 0x07; /* Reservation release action, bit 02:00 */ |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_release_data *d; |
| struct nvmet_pr_registrant *reg; |
| u16 status; |
| |
| if (ignore_key) { |
| status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; |
| goto out; |
| } |
| |
| d = kmalloc(sizeof(*d), GFP_KERNEL); |
| if (!d) { |
| status = NVME_SC_INTERNAL; |
| goto out; |
| } |
| |
| status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); |
| if (status) |
| goto free_data; |
| |
| status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| down(&pr->pr_sem); |
| list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { |
| if (uuid_equal(®->hostid, &ctrl->hostid) && |
| reg->rkey == le64_to_cpu(d->crkey)) { |
| status = __nvmet_execute_pr_release(req, reg, |
| release_act, rtype); |
| break; |
| } |
| } |
| up(&pr->pr_sem); |
| free_data: |
| kfree(d); |
| out: |
| nvmet_req_complete(req, status); |
| } |
| |
| static void nvmet_execute_pr_report(struct nvmet_req *req) |
| { |
| u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11); |
| u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); |
| u32 num_bytes = 4 * (cdw10 + 1); /* cdw10 is number of dwords */ |
| u8 eds = cdw11 & 1; /* Extended data structure, bit 00 */ |
| struct nvme_registered_ctrl_ext *ctrl_eds; |
| struct nvme_reservation_status_ext *data; |
| struct nvmet_pr *pr = &req->ns->pr; |
| struct nvmet_pr_registrant *holder; |
| struct nvmet_pr_registrant *reg; |
| u16 num_ctrls = 0; |
| u16 status; |
| u8 rtype; |
| |
| /* nvmet hostid(uuid_t) is 128 bit. */ |
| if (!eds) { |
| req->error_loc = offsetof(struct nvme_common_command, cdw11); |
| status = NVME_SC_HOST_ID_INCONSIST | NVME_STATUS_DNR; |
| goto out; |
| } |
| |
| if (num_bytes < sizeof(struct nvme_reservation_status_ext)) { |
| req->error_loc = offsetof(struct nvme_common_command, cdw10); |
| status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; |
| goto out; |
| } |
| |
| data = kmalloc(num_bytes, GFP_KERNEL); |
| if (!data) { |
| status = NVME_SC_INTERNAL; |
| goto out; |
| } |
| memset(data, 0, num_bytes); |
| data->gen = cpu_to_le32(atomic_read(&pr->generation)); |
| data->ptpls = 0; |
| ctrl_eds = data->regctl_eds; |
| |
| rcu_read_lock(); |
| holder = rcu_dereference(pr->holder); |
| rtype = holder ? holder->rtype : 0; |
| data->rtype = rtype; |
| |
| list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { |
| num_ctrls++; |
| /* |
| * continue to get the number of all registrans. |
| */ |
| if (((void *)ctrl_eds + sizeof(*ctrl_eds)) > |
| ((void *)data + num_bytes)) |
| continue; |
| /* |
| * Dynamic controller, set cntlid to 0xffff. |
| */ |
| ctrl_eds->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); |
| if (rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || |
| rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) |
| ctrl_eds->rcsts = 1; |
| if (reg == holder) |
| ctrl_eds->rcsts = 1; |
| uuid_copy((uuid_t *)&ctrl_eds->hostid, ®->hostid); |
| ctrl_eds->rkey = cpu_to_le64(reg->rkey); |
| ctrl_eds++; |
| } |
| rcu_read_unlock(); |
| |
| put_unaligned_le16(num_ctrls, data->regctl); |
| status = nvmet_copy_to_sgl(req, 0, data, num_bytes); |
| kfree(data); |
| out: |
| nvmet_req_complete(req, status); |
| } |
| |
| u16 nvmet_parse_pr_cmd(struct nvmet_req *req) |
| { |
| struct nvme_command *cmd = req->cmd; |
| |
| switch (cmd->common.opcode) { |
| case nvme_cmd_resv_register: |
| req->execute = nvmet_execute_pr_register; |
| break; |
| case nvme_cmd_resv_acquire: |
| req->execute = nvmet_execute_pr_acquire; |
| break; |
| case nvme_cmd_resv_release: |
| req->execute = nvmet_execute_pr_release; |
| break; |
| case nvme_cmd_resv_report: |
| req->execute = nvmet_execute_pr_report; |
| break; |
| default: |
| return 1; |
| } |
| return NVME_SC_SUCCESS; |
| } |
| |
| static bool nvmet_is_req_write_cmd_group(struct nvmet_req *req) |
| { |
| u8 opcode = req->cmd->common.opcode; |
| |
| if (req->sq->qid) { |
| switch (opcode) { |
| case nvme_cmd_flush: |
| case nvme_cmd_write: |
| case nvme_cmd_write_zeroes: |
| case nvme_cmd_dsm: |
| case nvme_cmd_zone_append: |
| case nvme_cmd_zone_mgmt_send: |
| return true; |
| default: |
| return false; |
| } |
| } |
| return false; |
| } |
| |
| static bool nvmet_is_req_read_cmd_group(struct nvmet_req *req) |
| { |
| u8 opcode = req->cmd->common.opcode; |
| |
| if (req->sq->qid) { |
| switch (opcode) { |
| case nvme_cmd_read: |
| case nvme_cmd_zone_mgmt_recv: |
| return true; |
| default: |
| return false; |
| } |
| } |
| return false; |
| } |
| |
| u16 nvmet_pr_check_cmd_access(struct nvmet_req *req) |
| { |
| struct nvmet_ctrl *ctrl = req->sq->ctrl; |
| struct nvmet_pr_registrant *holder; |
| struct nvmet_ns *ns = req->ns; |
| struct nvmet_pr *pr = &ns->pr; |
| u16 status = NVME_SC_SUCCESS; |
| |
| rcu_read_lock(); |
| holder = rcu_dereference(pr->holder); |
| if (!holder) |
| goto unlock; |
| if (uuid_equal(&ctrl->hostid, &holder->hostid)) |
| goto unlock; |
| |
| /* |
| * The Reservation command group is checked in executing, |
| * allow it here. |
| */ |
| switch (holder->rtype) { |
| case NVME_PR_WRITE_EXCLUSIVE: |
| if (nvmet_is_req_write_cmd_group(req)) |
| status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| break; |
| case NVME_PR_EXCLUSIVE_ACCESS: |
| if (nvmet_is_req_read_cmd_group(req) || |
| nvmet_is_req_write_cmd_group(req)) |
| status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| break; |
| case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY: |
| case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS: |
| if ((nvmet_is_req_write_cmd_group(req)) && |
| !nvmet_pr_find_registrant(pr, &ctrl->hostid)) |
| status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| break; |
| case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY: |
| case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS: |
| if ((nvmet_is_req_read_cmd_group(req) || |
| nvmet_is_req_write_cmd_group(req)) && |
| !nvmet_pr_find_registrant(pr, &ctrl->hostid)) |
| status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; |
| break; |
| default: |
| pr_warn("the reservation type is set wrong, type:%d\n", |
| holder->rtype); |
| break; |
| } |
| |
| unlock: |
| rcu_read_unlock(); |
| if (status) |
| req->error_loc = offsetof(struct nvme_common_command, opcode); |
| return status; |
| } |
| |
| u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req) |
| { |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| |
| pc_ref = xa_load(&req->ns->pr_per_ctrl_refs, |
| req->sq->ctrl->cntlid); |
| if (unlikely(!percpu_ref_tryget_live(&pc_ref->ref))) |
| return NVME_SC_INTERNAL; |
| req->pc_ref = pc_ref; |
| return NVME_SC_SUCCESS; |
| } |
| |
| static void nvmet_pr_ctrl_ns_all_cmds_done(struct percpu_ref *ref) |
| { |
| struct nvmet_pr_per_ctrl_ref *pc_ref = |
| container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); |
| |
| complete(&pc_ref->free_done); |
| } |
| |
| static int nvmet_pr_alloc_and_insert_pc_ref(struct nvmet_ns *ns, |
| unsigned long idx, |
| uuid_t *hostid) |
| { |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| int ret; |
| |
| pc_ref = kmalloc(sizeof(*pc_ref), GFP_ATOMIC); |
| if (!pc_ref) |
| return -ENOMEM; |
| |
| ret = percpu_ref_init(&pc_ref->ref, nvmet_pr_ctrl_ns_all_cmds_done, |
| PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); |
| if (ret) |
| goto free; |
| |
| init_completion(&pc_ref->free_done); |
| init_completion(&pc_ref->confirm_done); |
| uuid_copy(&pc_ref->hostid, hostid); |
| |
| ret = xa_insert(&ns->pr_per_ctrl_refs, idx, pc_ref, GFP_KERNEL); |
| if (ret) |
| goto exit; |
| return ret; |
| exit: |
| percpu_ref_exit(&pc_ref->ref); |
| free: |
| kfree(pc_ref); |
| return ret; |
| } |
| |
| int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl) |
| { |
| struct nvmet_subsys *subsys = ctrl->subsys; |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| struct nvmet_ns *ns = NULL; |
| unsigned long idx; |
| int ret; |
| |
| ctrl->pr_log_mgr.counter = 0; |
| ctrl->pr_log_mgr.lost_count = 0; |
| mutex_init(&ctrl->pr_log_mgr.lock); |
| INIT_KFIFO(ctrl->pr_log_mgr.log_queue); |
| |
| /* |
| * Here we are under subsys lock, if an ns not in subsys->namespaces, |
| * we can make sure that ns is not enabled, and not call |
| * nvmet_pr_init_ns(), see more details in nvmet_ns_enable(). |
| * So just check ns->pr.enable. |
| */ |
| xa_for_each(&subsys->namespaces, idx, ns) { |
| if (ns->pr.enable) { |
| ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, |
| &ctrl->hostid); |
| if (ret) |
| goto free_per_ctrl_refs; |
| } |
| } |
| return 0; |
| |
| free_per_ctrl_refs: |
| xa_for_each(&subsys->namespaces, idx, ns) { |
| if (ns->pr.enable) { |
| pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); |
| if (pc_ref) |
| percpu_ref_exit(&pc_ref->ref); |
| kfree(pc_ref); |
| } |
| } |
| return ret; |
| } |
| |
| void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl) |
| { |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| struct nvmet_ns *ns; |
| unsigned long idx; |
| |
| kfifo_free(&ctrl->pr_log_mgr.log_queue); |
| mutex_destroy(&ctrl->pr_log_mgr.lock); |
| |
| xa_for_each(&ctrl->subsys->namespaces, idx, ns) { |
| if (ns->pr.enable) { |
| pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); |
| if (pc_ref) |
| percpu_ref_exit(&pc_ref->ref); |
| kfree(pc_ref); |
| } |
| } |
| } |
| |
| int nvmet_pr_init_ns(struct nvmet_ns *ns) |
| { |
| struct nvmet_subsys *subsys = ns->subsys; |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| struct nvmet_ctrl *ctrl = NULL; |
| unsigned long idx; |
| int ret; |
| |
| ns->pr.holder = NULL; |
| atomic_set(&ns->pr.generation, 0); |
| sema_init(&ns->pr.pr_sem, 1); |
| INIT_LIST_HEAD(&ns->pr.registrant_list); |
| ns->pr.notify_mask = 0; |
| |
| xa_init(&ns->pr_per_ctrl_refs); |
| |
| list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { |
| ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, |
| &ctrl->hostid); |
| if (ret) |
| goto free_per_ctrl_refs; |
| } |
| return 0; |
| |
| free_per_ctrl_refs: |
| xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { |
| xa_erase(&ns->pr_per_ctrl_refs, idx); |
| percpu_ref_exit(&pc_ref->ref); |
| kfree(pc_ref); |
| } |
| return ret; |
| } |
| |
| void nvmet_pr_exit_ns(struct nvmet_ns *ns) |
| { |
| struct nvmet_pr_registrant *reg, *tmp; |
| struct nvmet_pr_per_ctrl_ref *pc_ref; |
| struct nvmet_pr *pr = &ns->pr; |
| unsigned long idx; |
| |
| list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { |
| list_del(®->entry); |
| kfree(reg); |
| } |
| |
| xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { |
| /* |
| * No command on ns here, we can safely free pc_ref. |
| */ |
| pc_ref = xa_erase(&ns->pr_per_ctrl_refs, idx); |
| percpu_ref_exit(&pc_ref->ref); |
| kfree(pc_ref); |
| } |
| |
| xa_destroy(&ns->pr_per_ctrl_refs); |
| } |