| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* vnode and volume validity verification. |
| * |
| * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| */ |
| |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <linux/sched.h> |
| #include "internal.h" |
| |
| /* |
| * See if the server we've just talked to is currently excluded. |
| */ |
| static bool __afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume) |
| { |
| const struct afs_server_entry *se; |
| const struct afs_server_list *slist; |
| bool is_excluded = true; |
| int i; |
| |
| rcu_read_lock(); |
| |
| slist = rcu_dereference(volume->servers); |
| for (i = 0; i < slist->nr_servers; i++) { |
| se = &slist->servers[i]; |
| if (op->server == se->server) { |
| is_excluded = test_bit(AFS_SE_EXCLUDED, &se->flags); |
| break; |
| } |
| } |
| |
| rcu_read_unlock(); |
| return is_excluded; |
| } |
| |
| /* |
| * Update the volume's server list when the creation time changes and see if |
| * the server we've just talked to is currently excluded. |
| */ |
| static int afs_is_server_excluded(struct afs_operation *op, struct afs_volume *volume) |
| { |
| int ret; |
| |
| if (__afs_is_server_excluded(op, volume)) |
| return 1; |
| |
| set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); |
| ret = afs_check_volume_status(op->volume, op); |
| if (ret < 0) |
| return ret; |
| |
| return __afs_is_server_excluded(op, volume); |
| } |
| |
| /* |
| * Handle a change to the volume creation time in the VolSync record. |
| */ |
| static int afs_update_volume_creation_time(struct afs_operation *op, struct afs_volume *volume) |
| { |
| unsigned int snap; |
| time64_t cur = volume->creation_time; |
| time64_t old = op->pre_volsync.creation; |
| time64_t new = op->volsync.creation; |
| int ret; |
| |
| _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new); |
| |
| if (cur == TIME64_MIN) { |
| volume->creation_time = new; |
| return 0; |
| } |
| |
| if (new == cur) |
| return 0; |
| |
| /* Try to advance the creation timestamp from what we had before the |
| * operation to what we got back from the server. This should |
| * hopefully ensure that in a race between multiple operations only one |
| * of them will do this. |
| */ |
| if (cur != old) |
| return 0; |
| |
| /* If the creation time changes in an unexpected way, we need to scrub |
| * our caches. For a RW vol, this will only change if the volume is |
| * restored from a backup; for a RO/Backup vol, this will advance when |
| * the volume is updated to a new snapshot (eg. "vos release"). |
| */ |
| if (volume->type == AFSVL_RWVOL) |
| goto regressed; |
| if (volume->type == AFSVL_BACKVOL) { |
| if (new < old) |
| goto regressed; |
| goto advance; |
| } |
| |
| /* We have an RO volume, we need to query the VL server and look at the |
| * server flags to see if RW->RO replication is in progress. |
| */ |
| ret = afs_is_server_excluded(op, volume); |
| if (ret < 0) |
| return ret; |
| if (ret > 0) { |
| snap = atomic_read(&volume->cb_ro_snapshot); |
| trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_volume_excluded); |
| return ret; |
| } |
| |
| advance: |
| snap = atomic_inc_return(&volume->cb_ro_snapshot); |
| trace_afs_cb_v_break(volume->vid, snap, afs_cb_break_for_vos_release); |
| volume->creation_time = new; |
| return 0; |
| |
| regressed: |
| atomic_inc(&volume->cb_scrub); |
| trace_afs_cb_v_break(volume->vid, 0, afs_cb_break_for_creation_regress); |
| volume->creation_time = new; |
| return 0; |
| } |
| |
| /* |
| * Handle a change to the volume update time in the VolSync record. |
| */ |
| static void afs_update_volume_update_time(struct afs_operation *op, struct afs_volume *volume) |
| { |
| enum afs_cb_break_reason reason = afs_cb_break_no_break; |
| time64_t cur = volume->update_time; |
| time64_t old = op->pre_volsync.update; |
| time64_t new = op->volsync.update; |
| |
| _enter("%llx,%llx,%llx->%llx", volume->vid, cur, old, new); |
| |
| if (cur == TIME64_MIN) { |
| volume->update_time = new; |
| return; |
| } |
| |
| if (new == cur) |
| return; |
| |
| /* If the volume update time changes in an unexpected way, we need to |
| * scrub our caches. For a RW vol, this will advance on every |
| * modification op; for a RO/Backup vol, this will advance when the |
| * volume is updated to a new snapshot (eg. "vos release"). |
| */ |
| if (new < old) |
| reason = afs_cb_break_for_update_regress; |
| |
| /* Try to advance the update timestamp from what we had before the |
| * operation to what we got back from the server. This should |
| * hopefully ensure that in a race between multiple operations only one |
| * of them will do this. |
| */ |
| if (cur == old) { |
| if (reason == afs_cb_break_for_update_regress) { |
| atomic_inc(&volume->cb_scrub); |
| trace_afs_cb_v_break(volume->vid, 0, reason); |
| } |
| volume->update_time = new; |
| } |
| } |
| |
| static int afs_update_volume_times(struct afs_operation *op, struct afs_volume *volume) |
| { |
| int ret = 0; |
| |
| if (likely(op->volsync.creation == volume->creation_time && |
| op->volsync.update == volume->update_time)) |
| return 0; |
| |
| mutex_lock(&volume->volsync_lock); |
| if (op->volsync.creation != volume->creation_time) { |
| ret = afs_update_volume_creation_time(op, volume); |
| if (ret < 0) |
| goto out; |
| } |
| if (op->volsync.update != volume->update_time) |
| afs_update_volume_update_time(op, volume); |
| out: |
| mutex_unlock(&volume->volsync_lock); |
| return ret; |
| } |
| |
| /* |
| * Update the state of a volume. Returns 1 to redo the operation from the start. |
| */ |
| int afs_update_volume_state(struct afs_operation *op) |
| { |
| struct afs_volume *volume = op->volume; |
| int ret; |
| |
| _enter("%llx", op->volume->vid); |
| |
| if (op->volsync.creation != TIME64_MIN || op->volsync.update != TIME64_MIN) { |
| ret = afs_update_volume_times(op, volume); |
| if (ret != 0) { |
| _leave(" = %d", ret); |
| return ret; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * mark the data attached to an inode as obsolete due to a write on the server |
| * - might also want to ditch all the outstanding writes and dirty pages |
| */ |
| static void afs_zap_data(struct afs_vnode *vnode) |
| { |
| _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); |
| |
| afs_invalidate_cache(vnode, 0); |
| |
| /* nuke all the non-dirty pages that aren't locked, mapped or being |
| * written back in a regular file and completely discard the pages in a |
| * directory or symlink */ |
| if (S_ISREG(vnode->netfs.inode.i_mode)) |
| invalidate_remote_inode(&vnode->netfs.inode); |
| else |
| invalidate_inode_pages2(vnode->netfs.inode.i_mapping); |
| } |
| |
| /* |
| * Check to see if we have a server currently serving this volume and that it |
| * hasn't been reinitialised or dropped from the list. |
| */ |
| static bool afs_check_server_good(struct afs_vnode *vnode) |
| { |
| struct afs_server_list *slist; |
| struct afs_server *server; |
| bool good; |
| int i; |
| |
| if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break)) |
| return true; |
| |
| rcu_read_lock(); |
| |
| slist = rcu_dereference(vnode->volume->servers); |
| for (i = 0; i < slist->nr_servers; i++) { |
| server = slist->servers[i].server; |
| if (server == vnode->cb_server) { |
| good = (vnode->cb_s_break == server->cb_s_break); |
| rcu_read_unlock(); |
| return good; |
| } |
| } |
| |
| rcu_read_unlock(); |
| return false; |
| } |
| |
| /* |
| * Check the validity of a vnode/inode. |
| */ |
| bool afs_check_validity(struct afs_vnode *vnode) |
| { |
| enum afs_cb_break_reason need_clear = afs_cb_break_no_break; |
| time64_t now = ktime_get_real_seconds(); |
| unsigned int cb_break; |
| int seq; |
| |
| do { |
| seq = read_seqbegin(&vnode->cb_lock); |
| cb_break = vnode->cb_break; |
| |
| if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { |
| if (vnode->cb_v_break != atomic_read(&vnode->volume->cb_v_break)) |
| need_clear = afs_cb_break_for_v_break; |
| else if (!afs_check_server_good(vnode)) |
| need_clear = afs_cb_break_for_s_reinit; |
| else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) |
| need_clear = afs_cb_break_for_zap; |
| else if (vnode->cb_expires_at - 10 <= now) |
| need_clear = afs_cb_break_for_lapsed; |
| } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { |
| ; |
| } else { |
| need_clear = afs_cb_break_no_promise; |
| } |
| |
| } while (read_seqretry(&vnode->cb_lock, seq)); |
| |
| if (need_clear == afs_cb_break_no_break) |
| return true; |
| |
| write_seqlock(&vnode->cb_lock); |
| if (need_clear == afs_cb_break_no_promise) |
| vnode->cb_v_break = atomic_read(&vnode->volume->cb_v_break); |
| else if (cb_break == vnode->cb_break) |
| __afs_break_callback(vnode, need_clear); |
| else |
| trace_afs_cb_miss(&vnode->fid, need_clear); |
| write_sequnlock(&vnode->cb_lock); |
| return false; |
| } |
| |
| /* |
| * Returns true if the pagecache is still valid. Does not sleep. |
| */ |
| bool afs_pagecache_valid(struct afs_vnode *vnode) |
| { |
| if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) { |
| if (vnode->netfs.inode.i_nlink) |
| clear_nlink(&vnode->netfs.inode); |
| return true; |
| } |
| |
| if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) && |
| afs_check_validity(vnode)) |
| return true; |
| |
| return false; |
| } |
| |
| /* |
| * validate a vnode/inode |
| * - there are several things we need to check |
| * - parent dir data changes (rm, rmdir, rename, mkdir, create, link, |
| * symlink) |
| * - parent dir metadata changed (security changes) |
| * - dentry data changed (write, truncate) |
| * - dentry metadata changed (security changes) |
| */ |
| int afs_validate(struct afs_vnode *vnode, struct key *key) |
| { |
| int ret; |
| |
| _enter("{v={%llx:%llu} fl=%lx},%x", |
| vnode->fid.vid, vnode->fid.vnode, vnode->flags, |
| key_serial(key)); |
| |
| if (afs_pagecache_valid(vnode)) |
| goto valid; |
| |
| down_write(&vnode->validate_lock); |
| |
| /* if the promise has expired, we need to check the server again to get |
| * a new promise - note that if the (parent) directory's metadata was |
| * changed then the security may be different and we may no longer have |
| * access */ |
| if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { |
| _debug("not promised"); |
| ret = afs_fetch_status(vnode, key, false, NULL); |
| if (ret < 0) { |
| if (ret == -ENOENT) { |
| set_bit(AFS_VNODE_DELETED, &vnode->flags); |
| ret = -ESTALE; |
| } |
| goto error_unlock; |
| } |
| _debug("new promise [fl=%lx]", vnode->flags); |
| } |
| |
| if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { |
| _debug("file already deleted"); |
| ret = -ESTALE; |
| goto error_unlock; |
| } |
| |
| /* if the vnode's data version number changed then its contents are |
| * different */ |
| if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) |
| afs_zap_data(vnode); |
| up_write(&vnode->validate_lock); |
| valid: |
| _leave(" = 0"); |
| return 0; |
| |
| error_unlock: |
| up_write(&vnode->validate_lock); |
| _leave(" = %d", ret); |
| return ret; |
| } |