| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (C) 2017-2023 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <djwong@kernel.org> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_btree.h" |
| #include "xfs_bit.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_inode.h" |
| #include "xfs_alloc.h" |
| #include "xfs_bmap.h" |
| #include "xfs_bmap_btree.h" |
| #include "xfs_rmap.h" |
| #include "xfs_rmap_btree.h" |
| #include "xfs_health.h" |
| #include "scrub/scrub.h" |
| #include "scrub/common.h" |
| #include "scrub/btree.h" |
| #include "scrub/health.h" |
| #include "xfs_ag.h" |
| |
| /* Set us up with an inode's bmap. */ |
| int |
| xchk_setup_inode_bmap( |
| struct xfs_scrub *sc) |
| { |
| int error; |
| |
| if (xchk_need_intent_drain(sc)) |
| xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); |
| |
| error = xchk_iget_for_scrubbing(sc); |
| if (error) |
| goto out; |
| |
| xchk_ilock(sc, XFS_IOLOCK_EXCL); |
| |
| /* |
| * We don't want any ephemeral data/cow fork updates sitting around |
| * while we inspect block mappings, so wait for directio to finish |
| * and flush dirty data if we have delalloc reservations. |
| */ |
| if (S_ISREG(VFS_I(sc->ip)->i_mode) && |
| sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { |
| struct address_space *mapping = VFS_I(sc->ip)->i_mapping; |
| bool is_repair = xchk_could_repair(sc); |
| |
| xchk_ilock(sc, XFS_MMAPLOCK_EXCL); |
| |
| /* Break all our leases, we're going to mess with things. */ |
| if (is_repair) { |
| error = xfs_break_layouts(VFS_I(sc->ip), |
| &sc->ilock_flags, BREAK_WRITE); |
| if (error) |
| goto out; |
| } |
| |
| inode_dio_wait(VFS_I(sc->ip)); |
| |
| /* |
| * Try to flush all incore state to disk before we examine the |
| * space mappings for the data fork. Leave accumulated errors |
| * in the mapping for the writer threads to consume. |
| * |
| * On ENOSPC or EIO writeback errors, we continue into the |
| * extent mapping checks because write failures do not |
| * necessarily imply anything about the correctness of the file |
| * metadata. The metadata and the file data could be on |
| * completely separate devices; a media failure might only |
| * affect a subset of the disk, etc. We can handle delalloc |
| * extents in the scrubber, so leaving them in memory is fine. |
| */ |
| error = filemap_fdatawrite(mapping); |
| if (!error) |
| error = filemap_fdatawait_keep_errors(mapping); |
| if (error && (error != -ENOSPC && error != -EIO)) |
| goto out; |
| |
| /* Drop the page cache if we're repairing block mappings. */ |
| if (is_repair) { |
| error = invalidate_inode_pages2( |
| VFS_I(sc->ip)->i_mapping); |
| if (error) |
| goto out; |
| } |
| |
| } |
| |
| /* Got the inode, lock it and we're ready to go. */ |
| error = xchk_trans_alloc(sc, 0); |
| if (error) |
| goto out; |
| |
| error = xchk_ino_dqattach(sc); |
| if (error) |
| goto out; |
| |
| xchk_ilock(sc, XFS_ILOCK_EXCL); |
| out: |
| /* scrub teardown will unlock and release the inode */ |
| return error; |
| } |
| |
| /* |
| * Inode fork block mapping (BMBT) scrubber. |
| * More complex than the others because we have to scrub |
| * all the extents regardless of whether or not the fork |
| * is in btree format. |
| */ |
| |
| struct xchk_bmap_info { |
| struct xfs_scrub *sc; |
| |
| /* Incore extent tree cursor */ |
| struct xfs_iext_cursor icur; |
| |
| /* Previous fork mapping that we examined */ |
| struct xfs_bmbt_irec prev_rec; |
| |
| /* Is this a realtime fork? */ |
| bool is_rt; |
| |
| /* May mappings point to shared space? */ |
| bool is_shared; |
| |
| /* Was the incore extent tree loaded? */ |
| bool was_loaded; |
| |
| /* Which inode fork are we checking? */ |
| int whichfork; |
| }; |
| |
| /* Look for a corresponding rmap for this irec. */ |
| static inline bool |
| xchk_bmap_get_rmap( |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec, |
| xfs_agblock_t agbno, |
| uint64_t owner, |
| struct xfs_rmap_irec *rmap) |
| { |
| xfs_fileoff_t offset; |
| unsigned int rflags = 0; |
| int has_rmap; |
| int error; |
| |
| if (info->whichfork == XFS_ATTR_FORK) |
| rflags |= XFS_RMAP_ATTR_FORK; |
| if (irec->br_state == XFS_EXT_UNWRITTEN) |
| rflags |= XFS_RMAP_UNWRITTEN; |
| |
| /* |
| * CoW staging extents are owned (on disk) by the refcountbt, so |
| * their rmaps do not have offsets. |
| */ |
| if (info->whichfork == XFS_COW_FORK) |
| offset = 0; |
| else |
| offset = irec->br_startoff; |
| |
| /* |
| * If the caller thinks this could be a shared bmbt extent (IOWs, |
| * any data fork extent of a reflink inode) then we have to use the |
| * range rmap lookup to make sure we get the correct owner/offset. |
| */ |
| if (info->is_shared) { |
| error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, |
| owner, offset, rflags, rmap, &has_rmap); |
| } else { |
| error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, |
| owner, offset, rflags, rmap, &has_rmap); |
| } |
| if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) |
| return false; |
| |
| if (!has_rmap) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| return has_rmap; |
| } |
| |
| /* Make sure that we have rmapbt records for this data/attr fork extent. */ |
| STATIC void |
| xchk_bmap_xref_rmap( |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec, |
| xfs_agblock_t agbno) |
| { |
| struct xfs_rmap_irec rmap; |
| unsigned long long rmap_end; |
| uint64_t owner = info->sc->ip->i_ino; |
| |
| if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) |
| return; |
| |
| /* Find the rmap record for this irec. */ |
| if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) |
| return; |
| |
| /* |
| * The rmap must be an exact match for this incore file mapping record, |
| * which may have arisen from multiple ondisk records. |
| */ |
| if (rmap.rm_startblock != agbno) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; |
| if (rmap_end != agbno + irec->br_blockcount) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| /* Check the logical offsets. */ |
| if (rmap.rm_offset != irec->br_startoff) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount; |
| if (rmap_end != irec->br_startoff + irec->br_blockcount) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| /* Check the owner */ |
| if (rmap.rm_owner != owner) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| /* |
| * Check for discrepancies between the unwritten flag in the irec and |
| * the rmap. Note that the (in-memory) CoW fork distinguishes between |
| * unwritten and written extents, but we don't track that in the rmap |
| * records because the blocks are owned (on-disk) by the refcountbt, |
| * which doesn't track unwritten state. |
| */ |
| if (!!(irec->br_state == XFS_EXT_UNWRITTEN) != |
| !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| if (!!(info->whichfork == XFS_ATTR_FORK) != |
| !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| } |
| |
| /* Make sure that we have rmapbt records for this COW fork extent. */ |
| STATIC void |
| xchk_bmap_xref_rmap_cow( |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec, |
| xfs_agblock_t agbno) |
| { |
| struct xfs_rmap_irec rmap; |
| unsigned long long rmap_end; |
| uint64_t owner = XFS_RMAP_OWN_COW; |
| |
| if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) |
| return; |
| |
| /* Find the rmap record for this irec. */ |
| if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) |
| return; |
| |
| /* |
| * CoW staging extents are owned by the refcount btree, so the rmap |
| * can start before and end after the physical space allocated to this |
| * mapping. There are no offsets to check. |
| */ |
| if (rmap.rm_startblock > agbno) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; |
| if (rmap_end < agbno + irec->br_blockcount) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| /* Check the owner */ |
| if (rmap.rm_owner != owner) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| /* |
| * No flags allowed. Note that the (in-memory) CoW fork distinguishes |
| * between unwritten and written extents, but we don't track that in |
| * the rmap records because the blocks are owned (on-disk) by the |
| * refcountbt, which doesn't track unwritten state. |
| */ |
| if (rmap.rm_flags & XFS_RMAP_ATTR_FORK) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| if (rmap.rm_flags & XFS_RMAP_UNWRITTEN) |
| xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| } |
| |
| /* Cross-reference a single rtdev extent record. */ |
| STATIC void |
| xchk_bmap_rt_iextent_xref( |
| struct xfs_inode *ip, |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec) |
| { |
| xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, |
| irec->br_blockcount); |
| } |
| |
| /* Cross-reference a single datadev extent record. */ |
| STATIC void |
| xchk_bmap_iextent_xref( |
| struct xfs_inode *ip, |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec) |
| { |
| struct xfs_owner_info oinfo; |
| struct xfs_mount *mp = info->sc->mp; |
| xfs_agnumber_t agno; |
| xfs_agblock_t agbno; |
| xfs_extlen_t len; |
| int error; |
| |
| agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); |
| agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); |
| len = irec->br_blockcount; |
| |
| error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa); |
| if (!xchk_fblock_process_error(info->sc, info->whichfork, |
| irec->br_startoff, &error)) |
| goto out_free; |
| |
| xchk_xref_is_used_space(info->sc, agbno, len); |
| xchk_xref_is_not_inode_chunk(info->sc, agbno, len); |
| switch (info->whichfork) { |
| case XFS_DATA_FORK: |
| xchk_bmap_xref_rmap(info, irec, agbno); |
| if (!xfs_is_reflink_inode(info->sc->ip)) { |
| xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, |
| info->whichfork, irec->br_startoff); |
| xchk_xref_is_only_owned_by(info->sc, agbno, |
| irec->br_blockcount, &oinfo); |
| xchk_xref_is_not_shared(info->sc, agbno, |
| irec->br_blockcount); |
| } |
| xchk_xref_is_not_cow_staging(info->sc, agbno, |
| irec->br_blockcount); |
| break; |
| case XFS_ATTR_FORK: |
| xchk_bmap_xref_rmap(info, irec, agbno); |
| xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, |
| info->whichfork, irec->br_startoff); |
| xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, |
| &oinfo); |
| xchk_xref_is_not_shared(info->sc, agbno, |
| irec->br_blockcount); |
| xchk_xref_is_not_cow_staging(info->sc, agbno, |
| irec->br_blockcount); |
| break; |
| case XFS_COW_FORK: |
| xchk_bmap_xref_rmap_cow(info, irec, agbno); |
| xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, |
| &XFS_RMAP_OINFO_COW); |
| xchk_xref_is_cow_staging(info->sc, agbno, |
| irec->br_blockcount); |
| xchk_xref_is_not_shared(info->sc, agbno, |
| irec->br_blockcount); |
| break; |
| } |
| |
| out_free: |
| xchk_ag_free(info->sc, &info->sc->sa); |
| } |
| |
| /* |
| * Directories and attr forks should never have blocks that can't be addressed |
| * by a xfs_dablk_t. |
| */ |
| STATIC void |
| xchk_bmap_dirattr_extent( |
| struct xfs_inode *ip, |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec) |
| { |
| struct xfs_mount *mp = ip->i_mount; |
| xfs_fileoff_t off; |
| |
| if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK) |
| return; |
| |
| if (!xfs_verify_dablk(mp, irec->br_startoff)) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| off = irec->br_startoff + irec->br_blockcount - 1; |
| if (!xfs_verify_dablk(mp, off)) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, off); |
| } |
| |
| /* Scrub a single extent record. */ |
| STATIC void |
| xchk_bmap_iextent( |
| struct xfs_inode *ip, |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec) |
| { |
| struct xfs_mount *mp = info->sc->mp; |
| |
| /* |
| * Check for out-of-order extents. This record could have come |
| * from the incore list, for which there is no ordering check. |
| */ |
| if (irec->br_startoff < info->prev_rec.br_startoff + |
| info->prev_rec.br_blockcount) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| xchk_bmap_dirattr_extent(ip, info, irec); |
| |
| /* Make sure the extent points to a valid place. */ |
| if (info->is_rt && |
| !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount)) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| if (!info->is_rt && |
| !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| /* We don't allow unwritten extents on attr forks. */ |
| if (irec->br_state == XFS_EXT_UNWRITTEN && |
| info->whichfork == XFS_ATTR_FORK) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| return; |
| |
| if (info->is_rt) |
| xchk_bmap_rt_iextent_xref(ip, info, irec); |
| else |
| xchk_bmap_iextent_xref(ip, info, irec); |
| } |
| |
| /* Scrub a bmbt record. */ |
| STATIC int |
| xchk_bmapbt_rec( |
| struct xchk_btree *bs, |
| const union xfs_btree_rec *rec) |
| { |
| struct xfs_bmbt_irec irec; |
| struct xfs_bmbt_irec iext_irec; |
| struct xfs_iext_cursor icur; |
| struct xchk_bmap_info *info = bs->private; |
| struct xfs_inode *ip = bs->cur->bc_ino.ip; |
| struct xfs_buf *bp = NULL; |
| struct xfs_btree_block *block; |
| struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork); |
| uint64_t owner; |
| int i; |
| |
| /* |
| * Check the owners of the btree blocks up to the level below |
| * the root since the verifiers don't do that. |
| */ |
| if (xfs_has_crc(bs->cur->bc_mp) && |
| bs->cur->bc_levels[0].ptr == 1) { |
| for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { |
| block = xfs_btree_get_block(bs->cur, i, &bp); |
| owner = be64_to_cpu(block->bb_u.l.bb_owner); |
| if (owner != ip->i_ino) |
| xchk_fblock_set_corrupt(bs->sc, |
| info->whichfork, 0); |
| } |
| } |
| |
| /* |
| * Check that the incore extent tree contains an extent that matches |
| * this one exactly. We validate those cached bmaps later, so we don't |
| * need to check them here. If the incore extent tree was just loaded |
| * from disk by the scrubber, we assume that its contents match what's |
| * on disk (we still hold the ILOCK) and skip the equivalence check. |
| */ |
| if (!info->was_loaded) |
| return 0; |
| |
| xfs_bmbt_disk_get_all(&rec->bmbt, &irec); |
| if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) { |
| xchk_fblock_set_corrupt(bs->sc, info->whichfork, |
| irec.br_startoff); |
| return 0; |
| } |
| |
| if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur, |
| &iext_irec) || |
| irec.br_startoff != iext_irec.br_startoff || |
| irec.br_startblock != iext_irec.br_startblock || |
| irec.br_blockcount != iext_irec.br_blockcount || |
| irec.br_state != iext_irec.br_state) |
| xchk_fblock_set_corrupt(bs->sc, info->whichfork, |
| irec.br_startoff); |
| return 0; |
| } |
| |
| /* Scan the btree records. */ |
| STATIC int |
| xchk_bmap_btree( |
| struct xfs_scrub *sc, |
| int whichfork, |
| struct xchk_bmap_info *info) |
| { |
| struct xfs_owner_info oinfo; |
| struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_inode *ip = sc->ip; |
| struct xfs_btree_cur *cur; |
| int error; |
| |
| /* Load the incore bmap cache if it's not loaded. */ |
| info->was_loaded = !xfs_need_iread_extents(ifp); |
| |
| error = xfs_iread_extents(sc->tp, ip, whichfork); |
| if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) |
| goto out; |
| |
| /* Check the btree structure. */ |
| cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); |
| xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); |
| error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); |
| xfs_btree_del_cursor(cur, error); |
| out: |
| return error; |
| } |
| |
| struct xchk_bmap_check_rmap_info { |
| struct xfs_scrub *sc; |
| int whichfork; |
| struct xfs_iext_cursor icur; |
| }; |
| |
| /* Can we find bmaps that fit this rmap? */ |
| STATIC int |
| xchk_bmap_check_rmap( |
| struct xfs_btree_cur *cur, |
| const struct xfs_rmap_irec *rec, |
| void *priv) |
| { |
| struct xfs_bmbt_irec irec; |
| struct xfs_rmap_irec check_rec; |
| struct xchk_bmap_check_rmap_info *sbcri = priv; |
| struct xfs_ifork *ifp; |
| struct xfs_scrub *sc = sbcri->sc; |
| bool have_map; |
| |
| /* Is this even the right fork? */ |
| if (rec->rm_owner != sc->ip->i_ino) |
| return 0; |
| if ((sbcri->whichfork == XFS_ATTR_FORK) ^ |
| !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) |
| return 0; |
| if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) |
| return 0; |
| |
| /* Now look up the bmbt record. */ |
| ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork); |
| if (!ifp) { |
| xchk_fblock_set_corrupt(sc, sbcri->whichfork, |
| rec->rm_offset); |
| goto out; |
| } |
| have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, |
| &sbcri->icur, &irec); |
| if (!have_map) |
| xchk_fblock_set_corrupt(sc, sbcri->whichfork, |
| rec->rm_offset); |
| /* |
| * bmap extent record lengths are constrained to 2^21 blocks in length |
| * because of space constraints in the on-disk metadata structure. |
| * However, rmap extent record lengths are constrained only by AG |
| * length, so we have to loop through the bmbt to make sure that the |
| * entire rmap is covered by bmbt records. |
| */ |
| check_rec = *rec; |
| while (have_map) { |
| if (irec.br_startoff != check_rec.rm_offset) |
| xchk_fblock_set_corrupt(sc, sbcri->whichfork, |
| check_rec.rm_offset); |
| if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, |
| cur->bc_ag.pag->pag_agno, |
| check_rec.rm_startblock)) |
| xchk_fblock_set_corrupt(sc, sbcri->whichfork, |
| check_rec.rm_offset); |
| if (irec.br_blockcount > check_rec.rm_blockcount) |
| xchk_fblock_set_corrupt(sc, sbcri->whichfork, |
| check_rec.rm_offset); |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| break; |
| check_rec.rm_startblock += irec.br_blockcount; |
| check_rec.rm_offset += irec.br_blockcount; |
| check_rec.rm_blockcount -= irec.br_blockcount; |
| if (check_rec.rm_blockcount == 0) |
| break; |
| have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); |
| if (!have_map) |
| xchk_fblock_set_corrupt(sc, sbcri->whichfork, |
| check_rec.rm_offset); |
| } |
| |
| out: |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| return -ECANCELED; |
| return 0; |
| } |
| |
| /* Make sure each rmap has a corresponding bmbt entry. */ |
| STATIC int |
| xchk_bmap_check_ag_rmaps( |
| struct xfs_scrub *sc, |
| int whichfork, |
| struct xfs_perag *pag) |
| { |
| struct xchk_bmap_check_rmap_info sbcri; |
| struct xfs_btree_cur *cur; |
| struct xfs_buf *agf; |
| int error; |
| |
| error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf); |
| if (error) |
| return error; |
| |
| cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag); |
| |
| sbcri.sc = sc; |
| sbcri.whichfork = whichfork; |
| error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); |
| if (error == -ECANCELED) |
| error = 0; |
| |
| xfs_btree_del_cursor(cur, error); |
| xfs_trans_brelse(sc->tp, agf); |
| return error; |
| } |
| |
| /* |
| * Decide if we want to scan the reverse mappings to determine if the attr |
| * fork /really/ has zero space mappings. |
| */ |
| STATIC bool |
| xchk_bmap_check_empty_attrfork( |
| struct xfs_inode *ip) |
| { |
| struct xfs_ifork *ifp = &ip->i_af; |
| |
| /* |
| * If the dinode repair found a bad attr fork, it will reset the fork |
| * to extents format with zero records and wait for the this scrubber |
| * to reconstruct the block mappings. If the fork is not in this |
| * state, then the fork cannot have been zapped. |
| */ |
| if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) |
| return false; |
| |
| /* |
| * Files can have an attr fork in EXTENTS format with zero records for |
| * several reasons: |
| * |
| * a) an attr set created a fork but ran out of space |
| * b) attr replace deleted an old attr but failed during the set step |
| * c) the data fork was in btree format when all attrs were deleted, so |
| * the fork was left in place |
| * d) the inode repair code zapped the fork |
| * |
| * Only in case (d) do we want to scan the rmapbt to see if we need to |
| * rebuild the attr fork. The fork zap code clears all DAC permission |
| * bits and zeroes the uid and gid, so avoid the scan if any of those |
| * three conditions are not met. |
| */ |
| if ((VFS_I(ip)->i_mode & 0777) != 0) |
| return false; |
| if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID)) |
| return false; |
| if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID)) |
| return false; |
| |
| return true; |
| } |
| |
| /* |
| * Decide if we want to scan the reverse mappings to determine if the data |
| * fork /really/ has zero space mappings. |
| */ |
| STATIC bool |
| xchk_bmap_check_empty_datafork( |
| struct xfs_inode *ip) |
| { |
| struct xfs_ifork *ifp = &ip->i_df; |
| |
| /* Don't support realtime rmap checks yet. */ |
| if (XFS_IS_REALTIME_INODE(ip)) |
| return false; |
| |
| /* |
| * If the dinode repair found a bad data fork, it will reset the fork |
| * to extents format with zero records and wait for the this scrubber |
| * to reconstruct the block mappings. If the fork is not in this |
| * state, then the fork cannot have been zapped. |
| */ |
| if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) |
| return false; |
| |
| /* |
| * If we encounter an empty data fork along with evidence that the fork |
| * might not really be empty, we need to scan the reverse mappings to |
| * decide if we're going to rebuild the fork. Data forks with nonzero |
| * file size are scanned. |
| */ |
| return i_size_read(VFS_I(ip)) != 0; |
| } |
| |
| /* |
| * Decide if we want to walk every rmap btree in the fs to make sure that each |
| * rmap for this file fork has corresponding bmbt entries. |
| */ |
| static bool |
| xchk_bmap_want_check_rmaps( |
| struct xchk_bmap_info *info) |
| { |
| struct xfs_scrub *sc = info->sc; |
| |
| if (!xfs_has_rmapbt(sc->mp)) |
| return false; |
| if (info->whichfork == XFS_COW_FORK) |
| return false; |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| return false; |
| |
| if (info->whichfork == XFS_ATTR_FORK) |
| return xchk_bmap_check_empty_attrfork(sc->ip); |
| |
| return xchk_bmap_check_empty_datafork(sc->ip); |
| } |
| |
| /* Make sure each rmap has a corresponding bmbt entry. */ |
| STATIC int |
| xchk_bmap_check_rmaps( |
| struct xfs_scrub *sc, |
| int whichfork) |
| { |
| struct xfs_perag *pag; |
| xfs_agnumber_t agno; |
| int error; |
| |
| for_each_perag(sc->mp, agno, pag) { |
| error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); |
| if (error || |
| (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { |
| xfs_perag_rele(pag); |
| return error; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* Scrub a delalloc reservation from the incore extent map tree. */ |
| STATIC void |
| xchk_bmap_iextent_delalloc( |
| struct xfs_inode *ip, |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec) |
| { |
| struct xfs_mount *mp = info->sc->mp; |
| |
| /* |
| * Check for out-of-order extents. This record could have come |
| * from the incore list, for which there is no ordering check. |
| */ |
| if (irec->br_startoff < info->prev_rec.br_startoff + |
| info->prev_rec.br_blockcount) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| |
| /* Make sure the extent points to a valid place. */ |
| if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| } |
| |
| /* Decide if this individual fork mapping is ok. */ |
| static bool |
| xchk_bmap_iext_mapping( |
| struct xchk_bmap_info *info, |
| const struct xfs_bmbt_irec *irec) |
| { |
| /* There should never be a "hole" extent in either extent list. */ |
| if (irec->br_startblock == HOLESTARTBLOCK) |
| return false; |
| if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) |
| return false; |
| return true; |
| } |
| |
| /* Are these two mappings contiguous with each other? */ |
| static inline bool |
| xchk_are_bmaps_contiguous( |
| const struct xfs_bmbt_irec *b1, |
| const struct xfs_bmbt_irec *b2) |
| { |
| /* Don't try to combine unallocated mappings. */ |
| if (!xfs_bmap_is_real_extent(b1)) |
| return false; |
| if (!xfs_bmap_is_real_extent(b2)) |
| return false; |
| |
| /* Does b2 come right after b1 in the logical and physical range? */ |
| if (b1->br_startoff + b1->br_blockcount != b2->br_startoff) |
| return false; |
| if (b1->br_startblock + b1->br_blockcount != b2->br_startblock) |
| return false; |
| if (b1->br_state != b2->br_state) |
| return false; |
| return true; |
| } |
| |
| /* |
| * Walk the incore extent records, accumulating consecutive contiguous records |
| * into a single incore mapping. Returns true if @irec has been set to a |
| * mapping or false if there are no more mappings. Caller must ensure that |
| * @info.icur is zeroed before the first call. |
| */ |
| static bool |
| xchk_bmap_iext_iter( |
| struct xchk_bmap_info *info, |
| struct xfs_bmbt_irec *irec) |
| { |
| struct xfs_bmbt_irec got; |
| struct xfs_ifork *ifp; |
| unsigned int nr = 0; |
| |
| ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); |
| |
| /* Advance to the next iextent record and check the mapping. */ |
| xfs_iext_next(ifp, &info->icur); |
| if (!xfs_iext_get_extent(ifp, &info->icur, irec)) |
| return false; |
| |
| if (!xchk_bmap_iext_mapping(info, irec)) { |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| irec->br_startoff); |
| return false; |
| } |
| nr++; |
| |
| /* |
| * Iterate subsequent iextent records and merge them with the one |
| * that we just read, if possible. |
| */ |
| while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) { |
| if (!xchk_are_bmaps_contiguous(irec, &got)) |
| break; |
| |
| if (!xchk_bmap_iext_mapping(info, &got)) { |
| xchk_fblock_set_corrupt(info->sc, info->whichfork, |
| got.br_startoff); |
| return false; |
| } |
| nr++; |
| |
| irec->br_blockcount += got.br_blockcount; |
| xfs_iext_next(ifp, &info->icur); |
| } |
| |
| /* |
| * If the merged mapping could be expressed with fewer bmbt records |
| * than we actually found, notify the user that this fork could be |
| * optimized. CoW forks only exist in memory so we ignore them. |
| */ |
| if (nr > 1 && info->whichfork != XFS_COW_FORK && |
| howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr) |
| xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); |
| |
| return true; |
| } |
| |
| /* |
| * Scrub an inode fork's block mappings. |
| * |
| * First we scan every record in every btree block, if applicable. |
| * Then we unconditionally scan the incore extent cache. |
| */ |
| STATIC int |
| xchk_bmap( |
| struct xfs_scrub *sc, |
| int whichfork) |
| { |
| struct xfs_bmbt_irec irec; |
| struct xchk_bmap_info info = { NULL }; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_inode *ip = sc->ip; |
| struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); |
| xfs_fileoff_t endoff; |
| int error = 0; |
| |
| /* Non-existent forks can be ignored. */ |
| if (!ifp) |
| return -ENOENT; |
| |
| info.is_rt = xfs_ifork_is_realtime(ip, whichfork); |
| info.whichfork = whichfork; |
| info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); |
| info.sc = sc; |
| |
| switch (whichfork) { |
| case XFS_COW_FORK: |
| /* No CoW forks on non-reflink filesystems. */ |
| if (!xfs_has_reflink(mp)) { |
| xchk_ino_set_corrupt(sc, sc->ip->i_ino); |
| return 0; |
| } |
| break; |
| case XFS_ATTR_FORK: |
| if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) |
| xchk_ino_set_corrupt(sc, sc->ip->i_ino); |
| break; |
| default: |
| ASSERT(whichfork == XFS_DATA_FORK); |
| break; |
| } |
| |
| /* Check the fork values */ |
| switch (ifp->if_format) { |
| case XFS_DINODE_FMT_UUID: |
| case XFS_DINODE_FMT_DEV: |
| case XFS_DINODE_FMT_LOCAL: |
| /* No mappings to check. */ |
| if (whichfork == XFS_COW_FORK) |
| xchk_fblock_set_corrupt(sc, whichfork, 0); |
| return 0; |
| case XFS_DINODE_FMT_EXTENTS: |
| break; |
| case XFS_DINODE_FMT_BTREE: |
| if (whichfork == XFS_COW_FORK) { |
| xchk_fblock_set_corrupt(sc, whichfork, 0); |
| return 0; |
| } |
| |
| error = xchk_bmap_btree(sc, whichfork, &info); |
| if (error) |
| return error; |
| break; |
| default: |
| xchk_fblock_set_corrupt(sc, whichfork, 0); |
| return 0; |
| } |
| |
| if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
| return 0; |
| |
| /* Find the offset of the last extent in the mapping. */ |
| error = xfs_bmap_last_offset(ip, &endoff, whichfork); |
| if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) |
| return error; |
| |
| /* |
| * Scrub extent records. We use a special iterator function here that |
| * combines adjacent mappings if they are logically and physically |
| * contiguous. For large allocations that require multiple bmbt |
| * records, this reduces the number of cross-referencing calls, which |
| * reduces runtime. Cross referencing with the rmap is simpler because |
| * the rmap must match the combined mapping exactly. |
| */ |
| while (xchk_bmap_iext_iter(&info, &irec)) { |
| if (xchk_should_terminate(sc, &error) || |
| (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) |
| return 0; |
| |
| if (irec.br_startoff >= endoff) { |
| xchk_fblock_set_corrupt(sc, whichfork, |
| irec.br_startoff); |
| return 0; |
| } |
| |
| if (isnullstartblock(irec.br_startblock)) |
| xchk_bmap_iextent_delalloc(ip, &info, &irec); |
| else |
| xchk_bmap_iextent(ip, &info, &irec); |
| memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec)); |
| } |
| |
| if (xchk_bmap_want_check_rmaps(&info)) { |
| error = xchk_bmap_check_rmaps(sc, whichfork); |
| if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) |
| return error; |
| } |
| |
| return 0; |
| } |
| |
| /* Scrub an inode's data fork. */ |
| int |
| xchk_bmap_data( |
| struct xfs_scrub *sc) |
| { |
| int error; |
| |
| if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) { |
| xchk_ino_set_corrupt(sc, sc->ip->i_ino); |
| return 0; |
| } |
| |
| error = xchk_bmap(sc, XFS_DATA_FORK); |
| if (error) |
| return error; |
| |
| /* If the data fork is clean, it is clearly not zapped. */ |
| xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED); |
| return 0; |
| } |
| |
| /* Scrub an inode's attr fork. */ |
| int |
| xchk_bmap_attr( |
| struct xfs_scrub *sc) |
| { |
| int error; |
| |
| /* |
| * If the attr fork has been zapped, it's possible that forkoff was |
| * reset to zero and hence sc->ip->i_afp is NULL. We don't want the |
| * NULL ifp check in xchk_bmap to conclude that the attr fork is ok, |
| * so short circuit that logic by setting the corruption flag and |
| * returning immediately. |
| */ |
| if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) { |
| xchk_ino_set_corrupt(sc, sc->ip->i_ino); |
| return 0; |
| } |
| |
| error = xchk_bmap(sc, XFS_ATTR_FORK); |
| if (error) |
| return error; |
| |
| /* If the attr fork is clean, it is clearly not zapped. */ |
| xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED); |
| return 0; |
| } |
| |
| /* Scrub an inode's CoW fork. */ |
| int |
| xchk_bmap_cow( |
| struct xfs_scrub *sc) |
| { |
| return xchk_bmap(sc, XFS_COW_FORK); |
| } |