| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (c) 2021-2024 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <djwong@kernel.org> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_inode.h" |
| #include "xfs_btree.h" |
| #include "xfs_ialloc.h" |
| #include "xfs_ialloc_btree.h" |
| #include "xfs_ag.h" |
| #include "xfs_error.h" |
| #include "xfs_bit.h" |
| #include "xfs_icache.h" |
| #include "scrub/scrub.h" |
| #include "scrub/iscan.h" |
| #include "scrub/common.h" |
| #include "scrub/trace.h" |
| |
| /* |
| * Live File Scan |
| * ============== |
| * |
| * Live file scans walk every inode in a live filesystem. This is more or |
| * less like a regular iwalk, except that when we're advancing the scan cursor, |
| * we must ensure that inodes cannot be added or deleted anywhere between the |
| * old cursor value and the new cursor value. If we're advancing the cursor |
| * by one inode, the caller must hold that inode; if we're finding the next |
| * inode to scan, we must grab the AGI and hold it until we've updated the |
| * scan cursor. |
| * |
| * Callers are expected to use this code to scan all files in the filesystem to |
| * construct a new metadata index of some kind. The scan races against other |
| * live updates, which means there must be a provision to update the new index |
| * when updates are made to inodes that already been scanned. The iscan lock |
| * can be used in live update hook code to stop the scan and protect this data |
| * structure. |
| * |
| * To keep the new index up to date with other metadata updates being made to |
| * the live filesystem, it is assumed that the caller will add hooks as needed |
| * to be notified when a metadata update occurs. The inode scanner must tell |
| * the hook code when an inode has been visited with xchk_iscan_mark_visit. |
| * Hook functions can use xchk_iscan_want_live_update to decide if the |
| * scanner's observations must be updated. |
| */ |
| |
| /* |
| * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so |
| * that the scan ignores that inode. |
| */ |
| STATIC void |
| xchk_iscan_mask_skipino( |
| struct xchk_iscan *iscan, |
| struct xfs_perag *pag, |
| struct xfs_inobt_rec_incore *rec, |
| xfs_agino_t lastrecino) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| struct xfs_mount *mp = sc->mp; |
| xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino); |
| xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino); |
| |
| if (pag->pag_agno != skip_agno) |
| return; |
| if (skip_agino < rec->ir_startino) |
| return; |
| if (skip_agino > lastrecino) |
| return; |
| |
| rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1); |
| } |
| |
| /* |
| * Set *cursor to the next allocated inode after whatever it's set to now. |
| * If there are no more inodes in this AG, cursor is set to NULLAGINO. |
| */ |
| STATIC int |
| xchk_iscan_find_next( |
| struct xchk_iscan *iscan, |
| struct xfs_buf *agi_bp, |
| struct xfs_perag *pag, |
| xfs_inofree_t *allocmaskp, |
| xfs_agino_t *cursor, |
| uint8_t *nr_inodesp) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| struct xfs_inobt_rec_incore rec; |
| struct xfs_btree_cur *cur; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_trans *tp = sc->tp; |
| xfs_agnumber_t agno = pag->pag_agno; |
| xfs_agino_t lastino = NULLAGINO; |
| xfs_agino_t first, last; |
| xfs_agino_t agino = *cursor; |
| int has_rec; |
| int error; |
| |
| /* If the cursor is beyond the end of this AG, move to the next one. */ |
| xfs_agino_range(mp, agno, &first, &last); |
| if (agino > last) { |
| *cursor = NULLAGINO; |
| return 0; |
| } |
| |
| /* |
| * Look up the inode chunk for the current cursor position. If there |
| * is no chunk here, we want the next one. |
| */ |
| cur = xfs_inobt_init_cursor(pag, tp, agi_bp); |
| error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec); |
| if (!error && !has_rec) |
| error = xfs_btree_increment(cur, 0, &has_rec); |
| for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) { |
| xfs_inofree_t allocmask; |
| |
| /* |
| * If we've run out of inobt records in this AG, move the |
| * cursor on to the next AG and exit. The caller can try |
| * again with the next AG. |
| */ |
| if (!has_rec) { |
| *cursor = NULLAGINO; |
| break; |
| } |
| |
| error = xfs_inobt_get_rec(cur, &rec, &has_rec); |
| if (error) |
| break; |
| if (!has_rec) { |
| error = -EFSCORRUPTED; |
| break; |
| } |
| |
| /* Make sure that we always move forward. */ |
| if (lastino != NULLAGINO && |
| XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) { |
| error = -EFSCORRUPTED; |
| break; |
| } |
| lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1; |
| |
| /* |
| * If this record only covers inodes that come before the |
| * cursor, advance to the next record. |
| */ |
| if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) |
| continue; |
| |
| if (iscan->skip_ino) |
| xchk_iscan_mask_skipino(iscan, pag, &rec, lastino); |
| |
| /* |
| * If the incoming lookup put us in the middle of an inobt |
| * record, mark it and the previous inodes "free" so that the |
| * search for allocated inodes will start at the cursor. |
| * We don't care about ir_freecount here. |
| */ |
| if (agino >= rec.ir_startino) |
| rec.ir_free |= xfs_inobt_maskn(0, |
| agino + 1 - rec.ir_startino); |
| |
| /* |
| * If there are allocated inodes in this chunk, find them |
| * and update the scan cursor. |
| */ |
| allocmask = ~rec.ir_free; |
| if (hweight64(allocmask) > 0) { |
| int next = xfs_lowbit64(allocmask); |
| |
| ASSERT(next >= 0); |
| *cursor = rec.ir_startino + next; |
| *allocmaskp = allocmask >> next; |
| *nr_inodesp = XFS_INODES_PER_CHUNK - next; |
| break; |
| } |
| } |
| |
| xfs_btree_del_cursor(cur, error); |
| return error; |
| } |
| |
| /* |
| * Advance both the scan and the visited cursors. |
| * |
| * The inumber address space for a given filesystem is sparse, which means that |
| * the scan cursor can jump a long ways in a single iter() call. There are no |
| * inodes in these sparse areas, so we must move the visited cursor forward at |
| * the same time so that the scan user can receive live updates for inodes that |
| * may get created once we release the AGI buffer. |
| */ |
| static inline void |
| xchk_iscan_move_cursor( |
| struct xchk_iscan *iscan, |
| xfs_agnumber_t agno, |
| xfs_agino_t agino) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| struct xfs_mount *mp = sc->mp; |
| xfs_ino_t cursor, visited; |
| |
| BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO); |
| |
| /* |
| * Special-case ino == 0 here so that we never set visited_ino to |
| * NULLFSINO when wrapping around EOFS, for that will let through all |
| * live updates. |
| */ |
| cursor = XFS_AGINO_TO_INO(mp, agno, agino); |
| if (cursor == 0) |
| visited = XFS_MAXINUMBER; |
| else |
| visited = cursor - 1; |
| |
| mutex_lock(&iscan->lock); |
| iscan->cursor_ino = cursor; |
| iscan->__visited_ino = visited; |
| trace_xchk_iscan_move_cursor(iscan); |
| mutex_unlock(&iscan->lock); |
| } |
| |
| /* |
| * Prepare to return agno/agino to the iscan caller by moving the lastino |
| * cursor to the previous inode. Do this while we still hold the AGI so that |
| * no other threads can create or delete inodes in this AG. |
| */ |
| static inline void |
| xchk_iscan_finish( |
| struct xchk_iscan *iscan) |
| { |
| mutex_lock(&iscan->lock); |
| iscan->cursor_ino = NULLFSINO; |
| |
| /* All live updates will be applied from now on */ |
| iscan->__visited_ino = NULLFSINO; |
| |
| mutex_unlock(&iscan->lock); |
| } |
| |
| /* Mark an inode scan finished before we actually scan anything. */ |
| void |
| xchk_iscan_finish_early( |
| struct xchk_iscan *iscan) |
| { |
| ASSERT(iscan->cursor_ino == iscan->scan_start_ino); |
| ASSERT(iscan->__visited_ino == iscan->scan_start_ino); |
| |
| xchk_iscan_finish(iscan); |
| } |
| |
| /* |
| * Grab the AGI to advance the inode scan. Returns 0 if *agi_bpp is now set, |
| * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed, |
| * or the usual negative errno. |
| */ |
| STATIC int |
| xchk_iscan_read_agi( |
| struct xchk_iscan *iscan, |
| struct xfs_perag *pag, |
| struct xfs_buf **agi_bpp) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| unsigned long relax; |
| int ret; |
| |
| if (!xchk_iscan_agi_needs_trylock(iscan)) |
| return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp); |
| |
| relax = msecs_to_jiffies(iscan->iget_retry_delay); |
| do { |
| ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK, |
| agi_bpp); |
| if (ret != -EAGAIN) |
| return ret; |
| if (!iscan->iget_timeout || |
| time_is_before_jiffies(iscan->__iget_deadline)) |
| return -EBUSY; |
| |
| trace_xchk_iscan_agi_retry_wait(iscan); |
| } while (!schedule_timeout_killable(relax) && |
| !xchk_iscan_aborted(iscan)); |
| return -ECANCELED; |
| } |
| |
| /* |
| * Advance ino to the next inode that the inobt thinks is allocated, being |
| * careful to jump to the next AG if we've reached the right end of this AG's |
| * inode btree. Advancing ino effectively means that we've pushed the inode |
| * scan forward, so set the iscan cursor to (ino - 1) so that our live update |
| * predicates will track inode allocations in that part of the inode number |
| * key space once we release the AGI buffer. |
| * |
| * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, |
| * -ECANCELED if the live scan aborted, or the usual negative errno. |
| */ |
| STATIC int |
| xchk_iscan_advance( |
| struct xchk_iscan *iscan, |
| struct xfs_perag **pagp, |
| struct xfs_buf **agi_bpp, |
| xfs_inofree_t *allocmaskp, |
| uint8_t *nr_inodesp) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_buf *agi_bp; |
| struct xfs_perag *pag; |
| xfs_agnumber_t agno; |
| xfs_agino_t agino; |
| int ret; |
| |
| ASSERT(iscan->cursor_ino >= iscan->__visited_ino); |
| |
| do { |
| if (xchk_iscan_aborted(iscan)) |
| return -ECANCELED; |
| |
| agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino); |
| pag = xfs_perag_get(mp, agno); |
| if (!pag) |
| return -ECANCELED; |
| |
| ret = xchk_iscan_read_agi(iscan, pag, &agi_bp); |
| if (ret) |
| goto out_pag; |
| |
| agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino); |
| ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp, |
| &agino, nr_inodesp); |
| if (ret) |
| goto out_buf; |
| |
| if (agino != NULLAGINO) { |
| /* |
| * Found the next inode in this AG, so return it along |
| * with the AGI buffer and the perag structure to |
| * ensure it cannot go away. |
| */ |
| xchk_iscan_move_cursor(iscan, agno, agino); |
| *agi_bpp = agi_bp; |
| *pagp = pag; |
| return 1; |
| } |
| |
| /* |
| * Did not find any more inodes in this AG, move on to the next |
| * AG. |
| */ |
| agno = (agno + 1) % mp->m_sb.sb_agcount; |
| xchk_iscan_move_cursor(iscan, agno, 0); |
| xfs_trans_brelse(sc->tp, agi_bp); |
| xfs_perag_put(pag); |
| |
| trace_xchk_iscan_advance_ag(iscan); |
| } while (iscan->cursor_ino != iscan->scan_start_ino); |
| |
| xchk_iscan_finish(iscan); |
| return 0; |
| |
| out_buf: |
| xfs_trans_brelse(sc->tp, agi_bp); |
| out_pag: |
| xfs_perag_put(pag); |
| return ret; |
| } |
| |
| /* |
| * Grabbing the inode failed, so we need to back up the scan and ask the caller |
| * to try to _advance the scan again. Returns -EBUSY if we've run out of retry |
| * opportunities, -ECANCELED if the process has a fatal signal pending, or |
| * -EAGAIN if we should try again. |
| */ |
| STATIC int |
| xchk_iscan_iget_retry( |
| struct xchk_iscan *iscan, |
| bool wait) |
| { |
| ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1); |
| |
| if (!iscan->iget_timeout || |
| time_is_before_jiffies(iscan->__iget_deadline)) |
| return -EBUSY; |
| |
| if (wait) { |
| unsigned long relax; |
| |
| /* |
| * Sleep for a period of time to let the rest of the system |
| * catch up. If we return early, someone sent a kill signal to |
| * the calling process. |
| */ |
| relax = msecs_to_jiffies(iscan->iget_retry_delay); |
| trace_xchk_iscan_iget_retry_wait(iscan); |
| |
| if (schedule_timeout_killable(relax) || |
| xchk_iscan_aborted(iscan)) |
| return -ECANCELED; |
| } |
| |
| iscan->cursor_ino--; |
| return -EAGAIN; |
| } |
| |
| /* |
| * For an inode scan, we hold the AGI and want to try to grab a batch of |
| * inodes. Holding the AGI prevents inodegc from clearing freed inodes, |
| * so we must use noretry here. For every inode after the first one in the |
| * batch, we don't want to wait, so we use retry there too. Finally, use |
| * dontcache to avoid polluting the cache. |
| */ |
| #define ISCAN_IGET_FLAGS (XFS_IGET_NORETRY | XFS_IGET_DONTCACHE) |
| |
| /* |
| * Grab an inode as part of an inode scan. While scanning this inode, the |
| * caller must ensure that no other threads can modify the inode until a call |
| * to xchk_iscan_visit succeeds. |
| * |
| * Returns the number of incore inodes grabbed; -EAGAIN if the caller should |
| * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode; |
| * -ECANCELED if there's a fatal signal pending; or some other negative errno. |
| */ |
| STATIC int |
| xchk_iscan_iget( |
| struct xchk_iscan *iscan, |
| struct xfs_perag *pag, |
| struct xfs_buf *agi_bp, |
| xfs_inofree_t allocmask, |
| uint8_t nr_inodes) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| struct xfs_mount *mp = sc->mp; |
| xfs_ino_t ino = iscan->cursor_ino; |
| unsigned int idx = 0; |
| unsigned int i; |
| int error; |
| |
| ASSERT(iscan->__inodes[0] == NULL); |
| |
| /* Fill the first slot in the inode array. */ |
| error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0, |
| &iscan->__inodes[idx]); |
| |
| trace_xchk_iscan_iget(iscan, error); |
| |
| if (error == -ENOENT || error == -EAGAIN) { |
| xfs_trans_brelse(sc->tp, agi_bp); |
| xfs_perag_put(pag); |
| |
| /* |
| * It's possible that this inode has lost all of its links but |
| * hasn't yet been inactivated. If we don't have a transaction |
| * or it's not writable, flush the inodegc workers and wait. |
| * If we have a non-empty transaction, we must not block on |
| * inodegc, which allocates its own transactions. |
| */ |
| if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) |
| xfs_inodegc_push(mp); |
| else |
| xfs_inodegc_flush(mp); |
| return xchk_iscan_iget_retry(iscan, true); |
| } |
| |
| if (error == -EINVAL) { |
| xfs_trans_brelse(sc->tp, agi_bp); |
| xfs_perag_put(pag); |
| |
| /* |
| * We thought the inode was allocated, but the inode btree |
| * lookup failed, which means that it was freed since the last |
| * time we advanced the cursor. Back up and try again. This |
| * should never happen since still hold the AGI buffer from the |
| * inobt check, but we need to be careful about infinite loops. |
| */ |
| return xchk_iscan_iget_retry(iscan, false); |
| } |
| |
| if (error) { |
| xfs_trans_brelse(sc->tp, agi_bp); |
| xfs_perag_put(pag); |
| return error; |
| } |
| idx++; |
| ino++; |
| allocmask >>= 1; |
| |
| /* |
| * Now that we've filled the first slot in __inodes, try to fill the |
| * rest of the batch with consecutively ordered inodes. to reduce the |
| * number of _iter calls. Make a bitmap of unallocated inodes from the |
| * zeroes in the inuse bitmap; these inodes will not be scanned, but |
| * the _want_live_update predicate will pass through all live updates. |
| * |
| * If we can't iget an allocated inode, stop and return what we have. |
| */ |
| mutex_lock(&iscan->lock); |
| iscan->__batch_ino = ino - 1; |
| iscan->__skipped_inomask = 0; |
| mutex_unlock(&iscan->lock); |
| |
| for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) { |
| if (!(allocmask & 1)) { |
| ASSERT(!(iscan->__skipped_inomask & (1ULL << i))); |
| |
| mutex_lock(&iscan->lock); |
| iscan->cursor_ino = ino; |
| iscan->__skipped_inomask |= (1ULL << i); |
| mutex_unlock(&iscan->lock); |
| continue; |
| } |
| |
| ASSERT(iscan->__inodes[idx] == NULL); |
| |
| error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0, |
| &iscan->__inodes[idx]); |
| if (error) |
| break; |
| |
| mutex_lock(&iscan->lock); |
| iscan->cursor_ino = ino; |
| mutex_unlock(&iscan->lock); |
| idx++; |
| } |
| |
| trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx); |
| xfs_trans_brelse(sc->tp, agi_bp); |
| xfs_perag_put(pag); |
| return idx; |
| } |
| |
| /* |
| * Advance the visit cursor to reflect skipped inodes beyond whatever we |
| * scanned. |
| */ |
| STATIC void |
| xchk_iscan_finish_batch( |
| struct xchk_iscan *iscan) |
| { |
| xfs_ino_t highest_skipped; |
| |
| mutex_lock(&iscan->lock); |
| |
| if (iscan->__batch_ino != NULLFSINO) { |
| highest_skipped = iscan->__batch_ino + |
| xfs_highbit64(iscan->__skipped_inomask); |
| iscan->__visited_ino = max(iscan->__visited_ino, |
| highest_skipped); |
| |
| trace_xchk_iscan_skip(iscan); |
| } |
| |
| iscan->__batch_ino = NULLFSINO; |
| iscan->__skipped_inomask = 0; |
| |
| mutex_unlock(&iscan->lock); |
| } |
| |
| /* |
| * Advance the inode scan cursor to the next allocated inode and return up to |
| * 64 consecutive allocated inodes starting with the cursor position. |
| */ |
| STATIC int |
| xchk_iscan_iter_batch( |
| struct xchk_iscan *iscan) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| int ret; |
| |
| xchk_iscan_finish_batch(iscan); |
| |
| if (iscan->iget_timeout) |
| iscan->__iget_deadline = jiffies + |
| msecs_to_jiffies(iscan->iget_timeout); |
| |
| do { |
| struct xfs_buf *agi_bp = NULL; |
| struct xfs_perag *pag = NULL; |
| xfs_inofree_t allocmask = 0; |
| uint8_t nr_inodes = 0; |
| |
| ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask, |
| &nr_inodes); |
| if (ret != 1) |
| return ret; |
| |
| if (xchk_iscan_aborted(iscan)) { |
| xfs_trans_brelse(sc->tp, agi_bp); |
| xfs_perag_put(pag); |
| ret = -ECANCELED; |
| break; |
| } |
| |
| ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes); |
| } while (ret == -EAGAIN); |
| |
| return ret; |
| } |
| |
| /* |
| * Advance the inode scan cursor to the next allocated inode and return the |
| * incore inode structure associated with it. |
| * |
| * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, |
| * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be |
| * grabbed, or the usual negative errno. |
| * |
| * If the function returns -EBUSY and the caller can handle skipping an inode, |
| * it may call this function again to continue the scan with the next allocated |
| * inode. |
| */ |
| int |
| xchk_iscan_iter( |
| struct xchk_iscan *iscan, |
| struct xfs_inode **ipp) |
| { |
| unsigned int i; |
| int error; |
| |
| /* Find a cached inode, or go get another batch. */ |
| for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { |
| if (iscan->__inodes[i]) |
| goto foundit; |
| } |
| |
| error = xchk_iscan_iter_batch(iscan); |
| if (error <= 0) |
| return error; |
| |
| ASSERT(iscan->__inodes[0] != NULL); |
| i = 0; |
| |
| foundit: |
| /* Give the caller our reference. */ |
| *ipp = iscan->__inodes[i]; |
| iscan->__inodes[i] = NULL; |
| return 1; |
| } |
| |
| /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */ |
| void |
| xchk_iscan_iter_finish( |
| struct xchk_iscan *iscan) |
| { |
| struct xfs_scrub *sc = iscan->sc; |
| unsigned int i; |
| |
| for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { |
| if (iscan->__inodes[i]) { |
| xchk_irele(sc, iscan->__inodes[i]); |
| iscan->__inodes[i] = NULL; |
| } |
| } |
| } |
| |
| /* Mark this inode scan finished and release resources. */ |
| void |
| xchk_iscan_teardown( |
| struct xchk_iscan *iscan) |
| { |
| xchk_iscan_iter_finish(iscan); |
| xchk_iscan_finish(iscan); |
| mutex_destroy(&iscan->lock); |
| } |
| |
| /* Pick an AG from which to start a scan. */ |
| static inline xfs_ino_t |
| xchk_iscan_rotor( |
| struct xfs_mount *mp) |
| { |
| static atomic_t agi_rotor; |
| unsigned int r = atomic_inc_return(&agi_rotor) - 1; |
| |
| /* |
| * Rotoring *backwards* through the AGs, so we add one here before |
| * subtracting from the agcount to arrive at an AG number. |
| */ |
| r = (r % mp->m_sb.sb_agcount) + 1; |
| |
| return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0); |
| } |
| |
| /* |
| * Set ourselves up to start an inode scan. If the @iget_timeout and |
| * @iget_retry_delay parameters are set, the scan will try to iget each inode |
| * for @iget_timeout milliseconds. If an iget call indicates that the inode is |
| * waiting to be inactivated, the CPU will relax for @iget_retry_delay |
| * milliseconds after pushing the inactivation workers. |
| */ |
| void |
| xchk_iscan_start( |
| struct xfs_scrub *sc, |
| unsigned int iget_timeout, |
| unsigned int iget_retry_delay, |
| struct xchk_iscan *iscan) |
| { |
| xfs_ino_t start_ino; |
| |
| start_ino = xchk_iscan_rotor(sc->mp); |
| |
| iscan->__batch_ino = NULLFSINO; |
| iscan->__skipped_inomask = 0; |
| |
| iscan->sc = sc; |
| clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate); |
| iscan->iget_timeout = iget_timeout; |
| iscan->iget_retry_delay = iget_retry_delay; |
| iscan->__visited_ino = start_ino; |
| iscan->cursor_ino = start_ino; |
| iscan->scan_start_ino = start_ino; |
| mutex_init(&iscan->lock); |
| memset(iscan->__inodes, 0, sizeof(iscan->__inodes)); |
| |
| trace_xchk_iscan_start(iscan, start_ino); |
| } |
| |
| /* |
| * Mark this inode as having been visited. Callers must hold a sufficiently |
| * exclusive lock on the inode to prevent concurrent modifications. |
| */ |
| void |
| xchk_iscan_mark_visited( |
| struct xchk_iscan *iscan, |
| struct xfs_inode *ip) |
| { |
| mutex_lock(&iscan->lock); |
| iscan->__visited_ino = ip->i_ino; |
| trace_xchk_iscan_visit(iscan); |
| mutex_unlock(&iscan->lock); |
| } |
| |
| /* |
| * Did we skip this inode because it wasn't allocated when we loaded the batch? |
| * If so, it is newly allocated and will not be scanned. All live updates to |
| * this inode must be passed to the caller to maintain scan correctness. |
| */ |
| static inline bool |
| xchk_iscan_skipped( |
| const struct xchk_iscan *iscan, |
| xfs_ino_t ino) |
| { |
| if (iscan->__batch_ino == NULLFSINO) |
| return false; |
| if (ino < iscan->__batch_ino) |
| return false; |
| if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK) |
| return false; |
| |
| return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino)); |
| } |
| |
| /* |
| * Do we need a live update for this inode? This is true if the scanner thread |
| * has visited this inode and the scan hasn't been aborted due to errors. |
| * Callers must hold a sufficiently exclusive lock on the inode to prevent |
| * scanners from reading any inode metadata. |
| */ |
| bool |
| xchk_iscan_want_live_update( |
| struct xchk_iscan *iscan, |
| xfs_ino_t ino) |
| { |
| bool ret = false; |
| |
| if (xchk_iscan_aborted(iscan)) |
| return false; |
| |
| mutex_lock(&iscan->lock); |
| |
| trace_xchk_iscan_want_live_update(iscan, ino); |
| |
| /* Scan is finished, caller should receive all updates. */ |
| if (iscan->__visited_ino == NULLFSINO) { |
| ret = true; |
| goto unlock; |
| } |
| |
| /* |
| * No inodes have been visited yet, so the visited cursor points at the |
| * start of the scan range. The caller should not receive any updates. |
| */ |
| if (iscan->scan_start_ino == iscan->__visited_ino) { |
| ret = false; |
| goto unlock; |
| } |
| |
| /* |
| * This inode was not allocated at the time of the iscan batch. |
| * The caller should receive all updates. |
| */ |
| if (xchk_iscan_skipped(iscan, ino)) { |
| ret = true; |
| goto unlock; |
| } |
| |
| /* |
| * The visited cursor hasn't yet wrapped around the end of the FS. If |
| * @ino is inside the starred range, the caller should receive updates: |
| * |
| * 0 ------------ S ************ V ------------ EOFS |
| */ |
| if (iscan->scan_start_ino <= iscan->__visited_ino) { |
| if (ino >= iscan->scan_start_ino && |
| ino <= iscan->__visited_ino) |
| ret = true; |
| |
| goto unlock; |
| } |
| |
| /* |
| * The visited cursor wrapped around the end of the FS. If @ino is |
| * inside the starred range, the caller should receive updates: |
| * |
| * 0 ************ V ------------ S ************ EOFS |
| */ |
| if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino) |
| ret = true; |
| |
| unlock: |
| mutex_unlock(&iscan->lock); |
| return ret; |
| } |