| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (c) 2021-2024 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <djwong@kernel.org> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_inode.h" |
| #include "xfs_icache.h" |
| #include "xfs_iwalk.h" |
| #include "xfs_ialloc.h" |
| #include "xfs_dir2.h" |
| #include "xfs_dir2_priv.h" |
| #include "xfs_ag.h" |
| #include "xfs_parent.h" |
| #include "scrub/scrub.h" |
| #include "scrub/common.h" |
| #include "scrub/repair.h" |
| #include "scrub/xfile.h" |
| #include "scrub/xfarray.h" |
| #include "scrub/iscan.h" |
| #include "scrub/orphanage.h" |
| #include "scrub/nlinks.h" |
| #include "scrub/trace.h" |
| #include "scrub/readdir.h" |
| #include "scrub/tempfile.h" |
| #include "scrub/listxattr.h" |
| |
| /* |
| * Live Inode Link Count Checking |
| * ============================== |
| * |
| * Inode link counts are "summary" metadata, in the sense that they are |
| * computed as the number of directory entries referencing each file on the |
| * filesystem. Therefore, we compute the correct link counts by creating a |
| * shadow link count structure and walking every inode. |
| */ |
| |
| /* Set us up to scrub inode link counts. */ |
| int |
| xchk_setup_nlinks( |
| struct xfs_scrub *sc) |
| { |
| struct xchk_nlink_ctrs *xnc; |
| int error; |
| |
| xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); |
| |
| if (xchk_could_repair(sc)) { |
| error = xrep_setup_nlinks(sc); |
| if (error) |
| return error; |
| } |
| |
| xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS); |
| if (!xnc) |
| return -ENOMEM; |
| xnc->xname.name = xnc->namebuf; |
| xnc->sc = sc; |
| sc->buf = xnc; |
| |
| return xchk_setup_fs(sc); |
| } |
| |
| /* |
| * Part 1: Collecting file link counts. For each file, we create a shadow link |
| * counting structure, then walk the entire directory tree, incrementing parent |
| * and child link counts for each directory entry seen. |
| * |
| * To avoid false corruption reports in part 2, any failure in this part must |
| * set the INCOMPLETE flag even when a negative errno is returned. This care |
| * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, |
| * ECANCELED) that are absorbed into a scrub state flag update by |
| * xchk_*_process_error. Scrub and repair share the same incore data |
| * structures, so the INCOMPLETE flag is critical to prevent a repair based on |
| * insufficient information. |
| * |
| * Because we are scanning a live filesystem, it's possible that another thread |
| * will try to update the link counts for an inode that we've already scanned. |
| * This will cause our counts to be incorrect. Therefore, we hook all |
| * directory entry updates because that is when link count updates occur. By |
| * shadowing transaction updates in this manner, live nlink check can ensure by |
| * locking the inode and the shadow structure that its own copies are not out |
| * of date. Because the hook code runs in a different process context from the |
| * scrub code and the scrub state flags are not accessed atomically, failures |
| * in the hook code must abort the iscan and the scrubber must notice the |
| * aborted scan and set the incomplete flag. |
| * |
| * Note that we use jump labels and srcu notifier hooks to minimize the |
| * overhead when live nlinks is /not/ running. Locking order for nlink |
| * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock. |
| */ |
| |
| /* |
| * Add a delta to an nlink counter, clamping the value to U32_MAX. Because |
| * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results |
| * even if we lose some precision. |
| */ |
| static inline void |
| careful_add( |
| xfs_nlink_t *nlinkp, |
| int delta) |
| { |
| uint64_t new_value = (uint64_t)(*nlinkp) + delta; |
| |
| BUILD_BUG_ON(XFS_MAXLINK > U32_MAX); |
| *nlinkp = min_t(uint64_t, new_value, U32_MAX); |
| } |
| |
| /* Update incore link count information. Caller must hold the nlinks lock. */ |
| STATIC int |
| xchk_nlinks_update_incore( |
| struct xchk_nlink_ctrs *xnc, |
| xfs_ino_t ino, |
| int parents_delta, |
| int backrefs_delta, |
| int children_delta) |
| { |
| struct xchk_nlink nl; |
| int error; |
| |
| if (!xnc->nlinks) |
| return 0; |
| |
| error = xfarray_load_sparse(xnc->nlinks, ino, &nl); |
| if (error) |
| return error; |
| |
| trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta, |
| backrefs_delta, children_delta); |
| |
| careful_add(&nl.parents, parents_delta); |
| careful_add(&nl.backrefs, backrefs_delta); |
| careful_add(&nl.children, children_delta); |
| |
| nl.flags |= XCHK_NLINK_WRITTEN; |
| error = xfarray_store(xnc->nlinks, ino, &nl); |
| if (error == -EFBIG) { |
| /* |
| * EFBIG means we tried to store data at too high a byte offset |
| * in the sparse array. IOWs, we cannot complete the check and |
| * must notify userspace that the check was incomplete. |
| */ |
| error = -ECANCELED; |
| } |
| return error; |
| } |
| |
| /* |
| * Apply a link count change from the regular filesystem into our shadow link |
| * count structure based on a directory update in progress. |
| */ |
| STATIC int |
| xchk_nlinks_live_update( |
| struct notifier_block *nb, |
| unsigned long action, |
| void *data) |
| { |
| struct xfs_dir_update_params *p = data; |
| struct xchk_nlink_ctrs *xnc; |
| int error; |
| |
| xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb); |
| |
| /* |
| * Ignore temporary directories being used to stage dir repairs, since |
| * we don't bump the link counts of the children. |
| */ |
| if (xrep_is_tempfile(p->dp)) |
| return NOTIFY_DONE; |
| |
| trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino, |
| p->delta, p->name->name, p->name->len); |
| |
| /* |
| * If we've already scanned @dp, update the number of parents that link |
| * to @ip. If @ip is a subdirectory, update the number of child links |
| * going out of @dp. |
| */ |
| if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) { |
| mutex_lock(&xnc->lock); |
| error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta, |
| 0, 0); |
| if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode)) |
| error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, |
| 0, p->delta); |
| mutex_unlock(&xnc->lock); |
| if (error) |
| goto out_abort; |
| } |
| |
| /* |
| * If @ip is a subdirectory and we've already scanned it, update the |
| * number of backrefs pointing to @dp. |
| */ |
| if (S_ISDIR(VFS_IC(p->ip)->i_mode) && |
| xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) { |
| mutex_lock(&xnc->lock); |
| error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, |
| p->delta, 0); |
| mutex_unlock(&xnc->lock); |
| if (error) |
| goto out_abort; |
| } |
| |
| return NOTIFY_DONE; |
| |
| out_abort: |
| xchk_iscan_abort(&xnc->collect_iscan); |
| return NOTIFY_DONE; |
| } |
| |
| /* Bump the observed link count for the inode referenced by this entry. */ |
| STATIC int |
| xchk_nlinks_collect_dirent( |
| struct xfs_scrub *sc, |
| struct xfs_inode *dp, |
| xfs_dir2_dataptr_t dapos, |
| const struct xfs_name *name, |
| xfs_ino_t ino, |
| void *priv) |
| { |
| struct xchk_nlink_ctrs *xnc = priv; |
| bool dot = false, dotdot = false; |
| int error; |
| |
| /* Does this name make sense? */ |
| if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) { |
| error = -ECANCELED; |
| goto out_abort; |
| } |
| |
| if (name->len == 1 && name->name[0] == '.') |
| dot = true; |
| else if (name->len == 2 && name->name[0] == '.' && |
| name->name[1] == '.') |
| dotdot = true; |
| |
| /* Don't accept a '.' entry that points somewhere else. */ |
| if (dot && ino != dp->i_ino) { |
| error = -ECANCELED; |
| goto out_abort; |
| } |
| |
| /* Don't accept an invalid inode number. */ |
| if (!xfs_verify_dir_ino(sc->mp, ino)) { |
| error = -ECANCELED; |
| goto out_abort; |
| } |
| |
| /* Update the shadow link counts if we haven't already failed. */ |
| |
| if (xchk_iscan_aborted(&xnc->collect_iscan)) { |
| error = -ECANCELED; |
| goto out_incomplete; |
| } |
| |
| trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name); |
| |
| mutex_lock(&xnc->lock); |
| |
| /* |
| * If this is a dotdot entry, it is a back link from dp to ino. How |
| * we handle this depends on whether or not dp is the root directory. |
| * |
| * The root directory is its own parent, so we pretend the dotdot entry |
| * establishes the "parent" of the root directory. Increment the |
| * number of parents of the root directory. |
| * |
| * Otherwise, increment the number of backrefs pointing back to ino. |
| * |
| * If the filesystem has parent pointers, we walk the pptrs to |
| * determine the backref count. |
| */ |
| if (dotdot) { |
| if (dp == sc->mp->m_rootip) |
| error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); |
| else if (!xfs_has_parent(sc->mp)) |
| error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); |
| else |
| error = 0; |
| if (error) |
| goto out_unlock; |
| } |
| |
| /* |
| * If this dirent is a forward link from dp to ino, increment the |
| * number of parents linking into ino. |
| */ |
| if (!dot && !dotdot) { |
| error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); |
| if (error) |
| goto out_unlock; |
| } |
| |
| /* |
| * If this dirent is a forward link to a subdirectory, increment the |
| * number of child links of dp. |
| */ |
| if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) { |
| error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1); |
| if (error) |
| goto out_unlock; |
| } |
| |
| mutex_unlock(&xnc->lock); |
| return 0; |
| |
| out_unlock: |
| mutex_unlock(&xnc->lock); |
| out_abort: |
| xchk_iscan_abort(&xnc->collect_iscan); |
| out_incomplete: |
| xchk_set_incomplete(sc); |
| return error; |
| } |
| |
| /* Bump the backref count for the inode referenced by this parent pointer. */ |
| STATIC int |
| xchk_nlinks_collect_pptr( |
| struct xfs_scrub *sc, |
| struct xfs_inode *ip, |
| unsigned int attr_flags, |
| const unsigned char *name, |
| unsigned int namelen, |
| const void *value, |
| unsigned int valuelen, |
| void *priv) |
| { |
| struct xfs_name xname = { |
| .name = name, |
| .len = namelen, |
| }; |
| struct xchk_nlink_ctrs *xnc = priv; |
| const struct xfs_parent_rec *pptr_rec = value; |
| xfs_ino_t parent_ino; |
| int error; |
| |
| /* Update the shadow link counts if we haven't already failed. */ |
| |
| if (xchk_iscan_aborted(&xnc->collect_iscan)) { |
| error = -ECANCELED; |
| goto out_incomplete; |
| } |
| |
| if (!(attr_flags & XFS_ATTR_PARENT)) |
| return 0; |
| |
| error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value, |
| valuelen, &parent_ino, NULL); |
| if (error) |
| return error; |
| |
| trace_xchk_nlinks_collect_pptr(sc->mp, ip, &xname, pptr_rec); |
| |
| mutex_lock(&xnc->lock); |
| |
| error = xchk_nlinks_update_incore(xnc, parent_ino, 0, 1, 0); |
| if (error) |
| goto out_unlock; |
| |
| mutex_unlock(&xnc->lock); |
| return 0; |
| |
| out_unlock: |
| mutex_unlock(&xnc->lock); |
| xchk_iscan_abort(&xnc->collect_iscan); |
| out_incomplete: |
| xchk_set_incomplete(sc); |
| return error; |
| } |
| |
| /* Walk a directory to bump the observed link counts of the children. */ |
| STATIC int |
| xchk_nlinks_collect_dir( |
| struct xchk_nlink_ctrs *xnc, |
| struct xfs_inode *dp) |
| { |
| struct xfs_scrub *sc = xnc->sc; |
| unsigned int lock_mode; |
| int error = 0; |
| |
| /* |
| * Ignore temporary directories being used to stage dir repairs, since |
| * we don't bump the link counts of the children. |
| */ |
| if (xrep_is_tempfile(dp)) |
| return 0; |
| |
| /* Prevent anyone from changing this directory while we walk it. */ |
| xfs_ilock(dp, XFS_IOLOCK_SHARED); |
| lock_mode = xfs_ilock_data_map_shared(dp); |
| |
| /* |
| * The dotdot entry of an unlinked directory still points to the last |
| * parent, but the parent no longer links to this directory. Skip the |
| * directory to avoid overcounting. |
| */ |
| if (VFS_I(dp)->i_nlink == 0) |
| goto out_unlock; |
| |
| /* |
| * We cannot count file links if the directory looks as though it has |
| * been zapped by the inode record repair code. |
| */ |
| if (xchk_dir_looks_zapped(dp)) { |
| error = -EBUSY; |
| goto out_abort; |
| } |
| |
| error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc); |
| if (error == -ECANCELED) { |
| error = 0; |
| goto out_unlock; |
| } |
| if (error) |
| goto out_abort; |
| |
| /* Walk the parent pointers to get real backref counts. */ |
| if (xfs_has_parent(sc->mp)) { |
| /* |
| * If the extended attributes look as though they has been |
| * zapped by the inode record repair code, we cannot scan for |
| * parent pointers. |
| */ |
| if (xchk_pptr_looks_zapped(dp)) { |
| error = -EBUSY; |
| goto out_unlock; |
| } |
| |
| error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, NULL, |
| xnc); |
| if (error == -ECANCELED) { |
| error = 0; |
| goto out_unlock; |
| } |
| if (error) |
| goto out_abort; |
| } |
| |
| xchk_iscan_mark_visited(&xnc->collect_iscan, dp); |
| goto out_unlock; |
| |
| out_abort: |
| xchk_set_incomplete(sc); |
| xchk_iscan_abort(&xnc->collect_iscan); |
| out_unlock: |
| xfs_iunlock(dp, lock_mode); |
| xfs_iunlock(dp, XFS_IOLOCK_SHARED); |
| return error; |
| } |
| |
| /* If this looks like a valid pointer, count it. */ |
| static inline int |
| xchk_nlinks_collect_metafile( |
| struct xchk_nlink_ctrs *xnc, |
| xfs_ino_t ino) |
| { |
| if (!xfs_verify_ino(xnc->sc->mp, ino)) |
| return 0; |
| |
| trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino); |
| return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); |
| } |
| |
| /* Bump the link counts of metadata files rooted in the superblock. */ |
| STATIC int |
| xchk_nlinks_collect_metafiles( |
| struct xchk_nlink_ctrs *xnc) |
| { |
| struct xfs_mount *mp = xnc->sc->mp; |
| int error = -ECANCELED; |
| |
| |
| if (xchk_iscan_aborted(&xnc->collect_iscan)) |
| goto out_incomplete; |
| |
| mutex_lock(&xnc->lock); |
| error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino); |
| if (error) |
| goto out_abort; |
| |
| error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino); |
| if (error) |
| goto out_abort; |
| |
| error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino); |
| if (error) |
| goto out_abort; |
| |
| error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino); |
| if (error) |
| goto out_abort; |
| |
| error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino); |
| if (error) |
| goto out_abort; |
| mutex_unlock(&xnc->lock); |
| |
| return 0; |
| |
| out_abort: |
| mutex_unlock(&xnc->lock); |
| xchk_iscan_abort(&xnc->collect_iscan); |
| out_incomplete: |
| xchk_set_incomplete(xnc->sc); |
| return error; |
| } |
| |
| /* Advance the collection scan cursor for this non-directory file. */ |
| static inline int |
| xchk_nlinks_collect_file( |
| struct xchk_nlink_ctrs *xnc, |
| struct xfs_inode *ip) |
| { |
| xfs_ilock(ip, XFS_IOLOCK_SHARED); |
| xchk_iscan_mark_visited(&xnc->collect_iscan, ip); |
| xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
| return 0; |
| } |
| |
| /* Walk all directories and count inode links. */ |
| STATIC int |
| xchk_nlinks_collect( |
| struct xchk_nlink_ctrs *xnc) |
| { |
| struct xfs_scrub *sc = xnc->sc; |
| struct xfs_inode *ip; |
| int error; |
| |
| /* Count the rt and quota files that are rooted in the superblock. */ |
| error = xchk_nlinks_collect_metafiles(xnc); |
| if (error) |
| return error; |
| |
| /* |
| * Set up for a potentially lengthy filesystem scan by reducing our |
| * transaction resource usage for the duration. Specifically: |
| * |
| * Cancel the transaction to release the log grant space while we scan |
| * the filesystem. |
| * |
| * Create a new empty transaction to eliminate the possibility of the |
| * inode scan deadlocking on cyclical metadata. |
| * |
| * We pass the empty transaction to the file scanning function to avoid |
| * repeatedly cycling empty transactions. This can be done even though |
| * we take the IOLOCK to quiesce the file because empty transactions |
| * do not take sb_internal. |
| */ |
| xchk_trans_cancel(sc); |
| error = xchk_trans_alloc_empty(sc); |
| if (error) |
| return error; |
| |
| while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) { |
| if (S_ISDIR(VFS_I(ip)->i_mode)) |
| error = xchk_nlinks_collect_dir(xnc, ip); |
| else |
| error = xchk_nlinks_collect_file(xnc, ip); |
| xchk_irele(sc, ip); |
| if (error) |
| break; |
| |
| if (xchk_should_terminate(sc, &error)) |
| break; |
| } |
| xchk_iscan_iter_finish(&xnc->collect_iscan); |
| if (error) { |
| xchk_set_incomplete(sc); |
| /* |
| * If we couldn't grab an inode that was busy with a state |
| * change, change the error code so that we exit to userspace |
| * as quickly as possible. |
| */ |
| if (error == -EBUSY) |
| return -ECANCELED; |
| return error; |
| } |
| |
| /* |
| * Switch out for a real transaction in preparation for building a new |
| * tree. |
| */ |
| xchk_trans_cancel(sc); |
| return xchk_setup_fs(sc); |
| } |
| |
| /* |
| * Part 2: Comparing file link counters. Walk each inode and compare the link |
| * counts against our shadow information; and then walk each shadow link count |
| * structure (that wasn't covered in the first part), comparing it against the |
| * file. |
| */ |
| |
| /* Read the observed link count for comparison with the actual inode. */ |
| STATIC int |
| xchk_nlinks_comparison_read( |
| struct xchk_nlink_ctrs *xnc, |
| xfs_ino_t ino, |
| struct xchk_nlink *obs) |
| { |
| struct xchk_nlink nl; |
| int error; |
| |
| error = xfarray_load_sparse(xnc->nlinks, ino, &nl); |
| if (error) |
| return error; |
| |
| nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN); |
| |
| error = xfarray_store(xnc->nlinks, ino, &nl); |
| if (error == -EFBIG) { |
| /* |
| * EFBIG means we tried to store data at too high a byte offset |
| * in the sparse array. IOWs, we cannot complete the check and |
| * must notify userspace that the check was incomplete. This |
| * shouldn't really happen outside of the collection phase. |
| */ |
| xchk_set_incomplete(xnc->sc); |
| return -ECANCELED; |
| } |
| if (error) |
| return error; |
| |
| /* Copy the counters, but do not expose the internal state. */ |
| obs->parents = nl.parents; |
| obs->backrefs = nl.backrefs; |
| obs->children = nl.children; |
| obs->flags = 0; |
| return 0; |
| } |
| |
| /* Check our link count against an inode. */ |
| STATIC int |
| xchk_nlinks_compare_inode( |
| struct xchk_nlink_ctrs *xnc, |
| struct xfs_inode *ip) |
| { |
| struct xchk_nlink obs; |
| struct xfs_scrub *sc = xnc->sc; |
| uint64_t total_links; |
| unsigned int actual_nlink; |
| int error; |
| |
| /* |
| * Ignore temporary files being used to stage repairs, since we assume |
| * they're correct for non-directories, and the directory repair code |
| * doesn't bump the link counts for the children. |
| */ |
| if (xrep_is_tempfile(ip)) |
| return 0; |
| |
| xfs_ilock(ip, XFS_ILOCK_SHARED); |
| mutex_lock(&xnc->lock); |
| |
| if (xchk_iscan_aborted(&xnc->collect_iscan)) { |
| xchk_set_incomplete(xnc->sc); |
| error = -ECANCELED; |
| goto out_scanlock; |
| } |
| |
| error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs); |
| if (error) |
| goto out_scanlock; |
| |
| /* |
| * If we don't have ftype to get an accurate count of the subdirectory |
| * entries in this directory, take advantage of the fact that on a |
| * consistent ftype=0 filesystem, the number of subdirectory |
| * backreferences (dotdot entries) pointing towards this directory |
| * should be equal to the number of subdirectory entries in the |
| * directory. |
| */ |
| if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode)) |
| obs.children = obs.backrefs; |
| |
| total_links = xchk_nlink_total(ip, &obs); |
| actual_nlink = VFS_I(ip)->i_nlink; |
| |
| trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs); |
| |
| /* |
| * If we found so many parents that we'd overflow i_nlink, we must flag |
| * this as a corruption. The VFS won't let users increase the link |
| * count, but it will let them decrease it. |
| */ |
| if (total_links > XFS_NLINK_PINNED) { |
| xchk_ino_set_corrupt(sc, ip->i_ino); |
| goto out_corrupt; |
| } else if (total_links > XFS_MAXLINK) { |
| xchk_ino_set_warning(sc, ip->i_ino); |
| } |
| |
| /* Link counts should match. */ |
| if (total_links != actual_nlink) { |
| xchk_ino_set_corrupt(sc, ip->i_ino); |
| goto out_corrupt; |
| } |
| |
| if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) { |
| /* |
| * The collection phase ignores directories with zero link |
| * count, so we ignore them here too. |
| * |
| * The number of subdirectory backreferences (dotdot entries) |
| * pointing towards this directory should be equal to the |
| * number of subdirectory entries in the directory. |
| */ |
| if (obs.children != obs.backrefs) |
| xchk_ino_xref_set_corrupt(sc, ip->i_ino); |
| } else { |
| /* |
| * Non-directories and unlinked directories should not have |
| * back references. |
| */ |
| if (obs.backrefs != 0) { |
| xchk_ino_set_corrupt(sc, ip->i_ino); |
| goto out_corrupt; |
| } |
| |
| /* |
| * Non-directories and unlinked directories should not have |
| * children. |
| */ |
| if (obs.children != 0) { |
| xchk_ino_set_corrupt(sc, ip->i_ino); |
| goto out_corrupt; |
| } |
| } |
| |
| if (ip == sc->mp->m_rootip) { |
| /* |
| * For the root of a directory tree, both the '.' and '..' |
| * entries should point to the root directory. The dotdot |
| * entry is counted as a parent of the root /and/ a backref of |
| * the root directory. |
| */ |
| if (obs.parents != 1) { |
| xchk_ino_set_corrupt(sc, ip->i_ino); |
| goto out_corrupt; |
| } |
| } else if (actual_nlink > 0) { |
| /* |
| * Linked files that are not the root directory should have at |
| * least one parent. |
| */ |
| if (obs.parents == 0) { |
| xchk_ino_set_corrupt(sc, ip->i_ino); |
| goto out_corrupt; |
| } |
| } |
| |
| out_corrupt: |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| error = -ECANCELED; |
| out_scanlock: |
| mutex_unlock(&xnc->lock); |
| xfs_iunlock(ip, XFS_ILOCK_SHARED); |
| return error; |
| } |
| |
| /* |
| * Check our link count against an inode that wasn't checked previously. This |
| * is intended to catch directories with dangling links, though we could be |
| * racing with inode allocation in other threads. |
| */ |
| STATIC int |
| xchk_nlinks_compare_inum( |
| struct xchk_nlink_ctrs *xnc, |
| xfs_ino_t ino) |
| { |
| struct xchk_nlink obs; |
| struct xfs_mount *mp = xnc->sc->mp; |
| struct xfs_trans *tp = xnc->sc->tp; |
| struct xfs_buf *agi_bp; |
| struct xfs_inode *ip; |
| int error; |
| |
| /* |
| * The first iget failed, so try again with the variant that returns |
| * either an incore inode or the AGI buffer. If the function returns |
| * EINVAL/ENOENT, it should have passed us the AGI buffer so that we |
| * can guarantee that the inode won't be allocated while we check for |
| * a zero link count in the observed link count data. |
| */ |
| error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip); |
| if (!error) { |
| /* Actually got an inode, so use the inode compare. */ |
| error = xchk_nlinks_compare_inode(xnc, ip); |
| xchk_irele(xnc->sc, ip); |
| return error; |
| } |
| if (error == -ENOENT || error == -EINVAL) { |
| /* No inode was found. Check for zero link count below. */ |
| error = 0; |
| } |
| if (error) |
| goto out_agi; |
| |
| /* Ensure that we have protected against inode allocation/freeing. */ |
| if (agi_bp == NULL) { |
| ASSERT(agi_bp != NULL); |
| xchk_set_incomplete(xnc->sc); |
| return -ECANCELED; |
| } |
| |
| if (xchk_iscan_aborted(&xnc->collect_iscan)) { |
| xchk_set_incomplete(xnc->sc); |
| error = -ECANCELED; |
| goto out_agi; |
| } |
| |
| mutex_lock(&xnc->lock); |
| error = xchk_nlinks_comparison_read(xnc, ino, &obs); |
| if (error) |
| goto out_scanlock; |
| |
| trace_xchk_nlinks_check_zero(mp, ino, &obs); |
| |
| /* |
| * If we can't grab the inode, the link count had better be zero. We |
| * still hold the AGI to prevent inode allocation/freeing. |
| */ |
| if (xchk_nlink_total(NULL, &obs) != 0) { |
| xchk_ino_set_corrupt(xnc->sc, ino); |
| error = -ECANCELED; |
| } |
| |
| out_scanlock: |
| mutex_unlock(&xnc->lock); |
| out_agi: |
| if (agi_bp) |
| xfs_trans_brelse(tp, agi_bp); |
| return error; |
| } |
| |
| /* |
| * Try to visit every inode in the filesystem to compare the link count. Move |
| * on if we can't grab an inode, since we'll revisit unchecked nlink records in |
| * the second part. |
| */ |
| static int |
| xchk_nlinks_compare_iter( |
| struct xchk_nlink_ctrs *xnc, |
| struct xfs_inode **ipp) |
| { |
| int error; |
| |
| do { |
| error = xchk_iscan_iter(&xnc->compare_iscan, ipp); |
| } while (error == -EBUSY); |
| |
| return error; |
| } |
| |
| /* Compare the link counts we observed against the live information. */ |
| STATIC int |
| xchk_nlinks_compare( |
| struct xchk_nlink_ctrs *xnc) |
| { |
| struct xchk_nlink nl; |
| struct xfs_scrub *sc = xnc->sc; |
| struct xfs_inode *ip; |
| xfarray_idx_t cur = XFARRAY_CURSOR_INIT; |
| int error; |
| |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| return 0; |
| |
| /* |
| * Create a new empty transaction so that we can advance the iscan |
| * cursor without deadlocking if the inobt has a cycle and push on the |
| * inactivation workqueue. |
| */ |
| xchk_trans_cancel(sc); |
| error = xchk_trans_alloc_empty(sc); |
| if (error) |
| return error; |
| |
| /* |
| * Use the inobt to walk all allocated inodes to compare the link |
| * counts. Inodes skipped by _compare_iter will be tried again in the |
| * next phase of the scan. |
| */ |
| xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan); |
| while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) { |
| error = xchk_nlinks_compare_inode(xnc, ip); |
| xchk_iscan_mark_visited(&xnc->compare_iscan, ip); |
| xchk_irele(sc, ip); |
| if (error) |
| break; |
| |
| if (xchk_should_terminate(sc, &error)) |
| break; |
| } |
| xchk_iscan_iter_finish(&xnc->compare_iscan); |
| xchk_iscan_teardown(&xnc->compare_iscan); |
| if (error) |
| return error; |
| |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| return 0; |
| |
| /* |
| * Walk all the non-null nlink observations that weren't checked in the |
| * previous step. |
| */ |
| mutex_lock(&xnc->lock); |
| while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) { |
| xfs_ino_t ino = cur - 1; |
| |
| if (nl.flags & XCHK_NLINK_COMPARE_SCANNED) |
| continue; |
| |
| mutex_unlock(&xnc->lock); |
| |
| error = xchk_nlinks_compare_inum(xnc, ino); |
| if (error) |
| return error; |
| |
| if (xchk_should_terminate(xnc->sc, &error)) |
| return error; |
| |
| mutex_lock(&xnc->lock); |
| } |
| mutex_unlock(&xnc->lock); |
| |
| return error; |
| } |
| |
| /* Tear down everything associated with a nlinks check. */ |
| static void |
| xchk_nlinks_teardown_scan( |
| void *priv) |
| { |
| struct xchk_nlink_ctrs *xnc = priv; |
| |
| /* Discourage any hook functions that might be running. */ |
| xchk_iscan_abort(&xnc->collect_iscan); |
| |
| xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); |
| |
| xfarray_destroy(xnc->nlinks); |
| xnc->nlinks = NULL; |
| |
| xchk_iscan_teardown(&xnc->collect_iscan); |
| mutex_destroy(&xnc->lock); |
| xnc->sc = NULL; |
| } |
| |
| /* |
| * Scan all inodes in the entire filesystem to generate link count data. If |
| * the scan is successful, the counts will be left alive for a repair. If any |
| * error occurs, we'll tear everything down. |
| */ |
| STATIC int |
| xchk_nlinks_setup_scan( |
| struct xfs_scrub *sc, |
| struct xchk_nlink_ctrs *xnc) |
| { |
| struct xfs_mount *mp = sc->mp; |
| char *descr; |
| unsigned long long max_inos; |
| xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; |
| xfs_agino_t first_agino, last_agino; |
| int error; |
| |
| mutex_init(&xnc->lock); |
| |
| /* Retry iget every tenth of a second for up to 30 seconds. */ |
| xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan); |
| |
| /* |
| * Set up enough space to store an nlink record for the highest |
| * possible inode number in this system. |
| */ |
| xfs_agino_range(mp, last_agno, &first_agino, &last_agino); |
| max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; |
| descr = xchk_xfile_descr(sc, "file link counts"); |
| error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), |
| sizeof(struct xchk_nlink), &xnc->nlinks); |
| kfree(descr); |
| if (error) |
| goto out_teardown; |
| |
| /* |
| * Hook into the directory entry code so that we can capture updates to |
| * file link counts. The hook only triggers for inodes that were |
| * already scanned, and the scanner thread takes each inode's ILOCK, |
| * which means that any in-progress inode updates will finish before we |
| * can scan the inode. |
| */ |
| ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); |
| xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update); |
| error = xfs_dir_hook_add(mp, &xnc->dhook); |
| if (error) |
| goto out_teardown; |
| |
| /* Use deferred cleanup to pass the inode link count data to repair. */ |
| sc->buf_cleanup = xchk_nlinks_teardown_scan; |
| return 0; |
| |
| out_teardown: |
| xchk_nlinks_teardown_scan(xnc); |
| return error; |
| } |
| |
| /* Scrub the link count of all inodes on the filesystem. */ |
| int |
| xchk_nlinks( |
| struct xfs_scrub *sc) |
| { |
| struct xchk_nlink_ctrs *xnc = sc->buf; |
| int error = 0; |
| |
| /* Set ourselves up to check link counts on the live filesystem. */ |
| error = xchk_nlinks_setup_scan(sc, xnc); |
| if (error) |
| return error; |
| |
| /* Walk all inodes, picking up link count information. */ |
| error = xchk_nlinks_collect(xnc); |
| if (!xchk_xref_process_error(sc, 0, 0, &error)) |
| return error; |
| |
| /* Fail fast if we're not playing with a full dataset. */ |
| if (xchk_iscan_aborted(&xnc->collect_iscan)) |
| xchk_set_incomplete(sc); |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) |
| return 0; |
| |
| /* Compare link counts. */ |
| error = xchk_nlinks_compare(xnc); |
| if (!xchk_xref_process_error(sc, 0, 0, &error)) |
| return error; |
| |
| /* Check one last time for an incomplete dataset. */ |
| if (xchk_iscan_aborted(&xnc->collect_iscan)) |
| xchk_set_incomplete(sc); |
| |
| return 0; |
| } |