| // SPDX-License-Identifier: GPL-2.0+ |
| /* |
| * Copyright (C) 2017 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <darrick.wong@oracle.com> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_inode.h" |
| #include "xfs_dir2.h" |
| #include "xfs_dir2_priv.h" |
| #include "xfs_attr_leaf.h" |
| #include "scrub/scrub.h" |
| #include "scrub/common.h" |
| #include "scrub/trace.h" |
| #include "scrub/dabtree.h" |
| |
| /* Directory/Attribute Btree */ |
| |
| /* |
| * Check for da btree operation errors. See the section about handling |
| * operational errors in common.c. |
| */ |
| bool |
| xchk_da_process_error( |
| struct xchk_da_btree *ds, |
| int level, |
| int *error) |
| { |
| struct xfs_scrub *sc = ds->sc; |
| |
| if (*error == 0) |
| return true; |
| |
| switch (*error) { |
| case -EDEADLOCK: |
| /* Used to restart an op with deadlock avoidance. */ |
| trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); |
| break; |
| case -EFSBADCRC: |
| case -EFSCORRUPTED: |
| /* Note the badness but don't abort. */ |
| sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; |
| *error = 0; |
| fallthrough; |
| default: |
| trace_xchk_file_op_error(sc, ds->dargs.whichfork, |
| xfs_dir2_da_to_db(ds->dargs.geo, |
| ds->state->path.blk[level].blkno), |
| *error, __return_address); |
| break; |
| } |
| return false; |
| } |
| |
| /* |
| * Check for da btree corruption. See the section about handling |
| * operational errors in common.c. |
| */ |
| void |
| xchk_da_set_corrupt( |
| struct xchk_da_btree *ds, |
| int level) |
| { |
| struct xfs_scrub *sc = ds->sc; |
| |
| sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; |
| |
| trace_xchk_fblock_error(sc, ds->dargs.whichfork, |
| xfs_dir2_da_to_db(ds->dargs.geo, |
| ds->state->path.blk[level].blkno), |
| __return_address); |
| } |
| |
| static struct xfs_da_node_entry * |
| xchk_da_btree_node_entry( |
| struct xchk_da_btree *ds, |
| int level) |
| { |
| struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; |
| struct xfs_da3_icnode_hdr hdr; |
| |
| ASSERT(blk->magic == XFS_DA_NODE_MAGIC); |
| |
| xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr); |
| return hdr.btree + blk->index; |
| } |
| |
| /* Scrub a da btree hash (key). */ |
| int |
| xchk_da_btree_hash( |
| struct xchk_da_btree *ds, |
| int level, |
| __be32 *hashp) |
| { |
| struct xfs_da_node_entry *entry; |
| xfs_dahash_t hash; |
| xfs_dahash_t parent_hash; |
| |
| /* Is this hash in order? */ |
| hash = be32_to_cpu(*hashp); |
| if (hash < ds->hashes[level]) |
| xchk_da_set_corrupt(ds, level); |
| ds->hashes[level] = hash; |
| |
| if (level == 0) |
| return 0; |
| |
| /* Is this hash no larger than the parent hash? */ |
| entry = xchk_da_btree_node_entry(ds, level - 1); |
| parent_hash = be32_to_cpu(entry->hashval); |
| if (parent_hash < hash) |
| xchk_da_set_corrupt(ds, level); |
| |
| return 0; |
| } |
| |
| /* |
| * Check a da btree pointer. Returns true if it's ok to use this |
| * pointer. |
| */ |
| STATIC bool |
| xchk_da_btree_ptr_ok( |
| struct xchk_da_btree *ds, |
| int level, |
| xfs_dablk_t blkno) |
| { |
| if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { |
| xchk_da_set_corrupt(ds, level); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* |
| * The da btree scrubber can handle leaf1 blocks as a degenerate |
| * form of leafn blocks. Since the regular da code doesn't handle |
| * leaf1, we must multiplex the verifiers. |
| */ |
| static void |
| xchk_da_btree_read_verify( |
| struct xfs_buf *bp) |
| { |
| struct xfs_da_blkinfo *info = bp->b_addr; |
| |
| switch (be16_to_cpu(info->magic)) { |
| case XFS_DIR2_LEAF1_MAGIC: |
| case XFS_DIR3_LEAF1_MAGIC: |
| bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
| bp->b_ops->verify_read(bp); |
| return; |
| default: |
| /* |
| * xfs_da3_node_buf_ops already know how to handle |
| * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. |
| */ |
| bp->b_ops = &xfs_da3_node_buf_ops; |
| bp->b_ops->verify_read(bp); |
| return; |
| } |
| } |
| static void |
| xchk_da_btree_write_verify( |
| struct xfs_buf *bp) |
| { |
| struct xfs_da_blkinfo *info = bp->b_addr; |
| |
| switch (be16_to_cpu(info->magic)) { |
| case XFS_DIR2_LEAF1_MAGIC: |
| case XFS_DIR3_LEAF1_MAGIC: |
| bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
| bp->b_ops->verify_write(bp); |
| return; |
| default: |
| /* |
| * xfs_da3_node_buf_ops already know how to handle |
| * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. |
| */ |
| bp->b_ops = &xfs_da3_node_buf_ops; |
| bp->b_ops->verify_write(bp); |
| return; |
| } |
| } |
| static void * |
| xchk_da_btree_verify( |
| struct xfs_buf *bp) |
| { |
| struct xfs_da_blkinfo *info = bp->b_addr; |
| |
| switch (be16_to_cpu(info->magic)) { |
| case XFS_DIR2_LEAF1_MAGIC: |
| case XFS_DIR3_LEAF1_MAGIC: |
| bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
| return bp->b_ops->verify_struct(bp); |
| default: |
| bp->b_ops = &xfs_da3_node_buf_ops; |
| return bp->b_ops->verify_struct(bp); |
| } |
| } |
| |
| static const struct xfs_buf_ops xchk_da_btree_buf_ops = { |
| .name = "xchk_da_btree", |
| .verify_read = xchk_da_btree_read_verify, |
| .verify_write = xchk_da_btree_write_verify, |
| .verify_struct = xchk_da_btree_verify, |
| }; |
| |
| /* Check a block's sibling. */ |
| STATIC int |
| xchk_da_btree_block_check_sibling( |
| struct xchk_da_btree *ds, |
| int level, |
| int direction, |
| xfs_dablk_t sibling) |
| { |
| struct xfs_da_state_path *path = &ds->state->path; |
| struct xfs_da_state_path *altpath = &ds->state->altpath; |
| int retval; |
| int plevel; |
| int error; |
| |
| memcpy(altpath, path, sizeof(ds->state->altpath)); |
| |
| /* |
| * If the pointer is null, we shouldn't be able to move the upper |
| * level pointer anywhere. |
| */ |
| if (sibling == 0) { |
| error = xfs_da3_path_shift(ds->state, altpath, direction, |
| false, &retval); |
| if (error == 0 && retval == 0) |
| xchk_da_set_corrupt(ds, level); |
| error = 0; |
| goto out; |
| } |
| |
| /* Move the alternate cursor one block in the direction given. */ |
| error = xfs_da3_path_shift(ds->state, altpath, direction, false, |
| &retval); |
| if (!xchk_da_process_error(ds, level, &error)) |
| goto out; |
| if (retval) { |
| xchk_da_set_corrupt(ds, level); |
| goto out; |
| } |
| if (altpath->blk[level].bp) |
| xchk_buffer_recheck(ds->sc, altpath->blk[level].bp); |
| |
| /* Compare upper level pointer to sibling pointer. */ |
| if (altpath->blk[level].blkno != sibling) |
| xchk_da_set_corrupt(ds, level); |
| |
| out: |
| /* Free all buffers in the altpath that aren't referenced from path. */ |
| for (plevel = 0; plevel < altpath->active; plevel++) { |
| if (altpath->blk[plevel].bp == NULL || |
| (plevel < path->active && |
| altpath->blk[plevel].bp == path->blk[plevel].bp)) |
| continue; |
| |
| xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp); |
| altpath->blk[plevel].bp = NULL; |
| } |
| |
| return error; |
| } |
| |
| /* Check a block's sibling pointers. */ |
| STATIC int |
| xchk_da_btree_block_check_siblings( |
| struct xchk_da_btree *ds, |
| int level, |
| struct xfs_da_blkinfo *hdr) |
| { |
| xfs_dablk_t forw; |
| xfs_dablk_t back; |
| int error = 0; |
| |
| forw = be32_to_cpu(hdr->forw); |
| back = be32_to_cpu(hdr->back); |
| |
| /* Top level blocks should not have sibling pointers. */ |
| if (level == 0) { |
| if (forw != 0 || back != 0) |
| xchk_da_set_corrupt(ds, level); |
| return 0; |
| } |
| |
| /* |
| * Check back (left) and forw (right) pointers. These functions |
| * absorb error codes for us. |
| */ |
| error = xchk_da_btree_block_check_sibling(ds, level, 0, back); |
| if (error) |
| goto out; |
| error = xchk_da_btree_block_check_sibling(ds, level, 1, forw); |
| |
| out: |
| memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); |
| return error; |
| } |
| |
| /* Load a dir/attribute block from a btree. */ |
| STATIC int |
| xchk_da_btree_block( |
| struct xchk_da_btree *ds, |
| int level, |
| xfs_dablk_t blkno) |
| { |
| struct xfs_da_state_blk *blk; |
| struct xfs_da_intnode *node; |
| struct xfs_da_node_entry *btree; |
| struct xfs_da3_blkinfo *hdr3; |
| struct xfs_da_args *dargs = &ds->dargs; |
| struct xfs_inode *ip = ds->dargs.dp; |
| xfs_ino_t owner; |
| int *pmaxrecs; |
| struct xfs_da3_icnode_hdr nodehdr; |
| int error = 0; |
| |
| blk = &ds->state->path.blk[level]; |
| ds->state->path.active = level + 1; |
| |
| /* Release old block. */ |
| if (blk->bp) { |
| xfs_trans_brelse(dargs->trans, blk->bp); |
| blk->bp = NULL; |
| } |
| |
| /* Check the pointer. */ |
| blk->blkno = blkno; |
| if (!xchk_da_btree_ptr_ok(ds, level, blkno)) |
| goto out_nobuf; |
| |
| /* Read the buffer. */ |
| error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, |
| XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork, |
| &xchk_da_btree_buf_ops); |
| if (!xchk_da_process_error(ds, level, &error)) |
| goto out_nobuf; |
| if (blk->bp) |
| xchk_buffer_recheck(ds->sc, blk->bp); |
| |
| /* |
| * We didn't find a dir btree root block, which means that |
| * there's no LEAF1/LEAFN tree (at least not where it's supposed |
| * to be), so jump out now. |
| */ |
| if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && |
| blk->bp == NULL) |
| goto out_nobuf; |
| |
| /* It's /not/ ok for attr trees not to have a da btree. */ |
| if (blk->bp == NULL) { |
| xchk_da_set_corrupt(ds, level); |
| goto out_nobuf; |
| } |
| |
| hdr3 = blk->bp->b_addr; |
| blk->magic = be16_to_cpu(hdr3->hdr.magic); |
| pmaxrecs = &ds->maxrecs[level]; |
| |
| /* We only started zeroing the header on v5 filesystems. */ |
| if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad) |
| xchk_da_set_corrupt(ds, level); |
| |
| /* Check the owner. */ |
| if (xfs_has_crc(ip->i_mount)) { |
| owner = be64_to_cpu(hdr3->owner); |
| if (owner != ip->i_ino) |
| xchk_da_set_corrupt(ds, level); |
| } |
| |
| /* Check the siblings. */ |
| error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr); |
| if (error) |
| goto out; |
| |
| /* Interpret the buffer. */ |
| switch (blk->magic) { |
| case XFS_ATTR_LEAF_MAGIC: |
| case XFS_ATTR3_LEAF_MAGIC: |
| xfs_trans_buf_set_type(dargs->trans, blk->bp, |
| XFS_BLFT_ATTR_LEAF_BUF); |
| blk->magic = XFS_ATTR_LEAF_MAGIC; |
| blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); |
| if (ds->tree_level != 0) |
| xchk_da_set_corrupt(ds, level); |
| break; |
| case XFS_DIR2_LEAFN_MAGIC: |
| case XFS_DIR3_LEAFN_MAGIC: |
| xfs_trans_buf_set_type(dargs->trans, blk->bp, |
| XFS_BLFT_DIR_LEAFN_BUF); |
| blk->magic = XFS_DIR2_LEAFN_MAGIC; |
| blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); |
| if (ds->tree_level != 0) |
| xchk_da_set_corrupt(ds, level); |
| break; |
| case XFS_DIR2_LEAF1_MAGIC: |
| case XFS_DIR3_LEAF1_MAGIC: |
| xfs_trans_buf_set_type(dargs->trans, blk->bp, |
| XFS_BLFT_DIR_LEAF1_BUF); |
| blk->magic = XFS_DIR2_LEAF1_MAGIC; |
| blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); |
| if (ds->tree_level != 0) |
| xchk_da_set_corrupt(ds, level); |
| break; |
| case XFS_DA_NODE_MAGIC: |
| case XFS_DA3_NODE_MAGIC: |
| xfs_trans_buf_set_type(dargs->trans, blk->bp, |
| XFS_BLFT_DA_NODE_BUF); |
| blk->magic = XFS_DA_NODE_MAGIC; |
| node = blk->bp->b_addr; |
| xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node); |
| btree = nodehdr.btree; |
| *pmaxrecs = nodehdr.count; |
| blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); |
| if (level == 0) { |
| if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { |
| xchk_da_set_corrupt(ds, level); |
| goto out_freebp; |
| } |
| ds->tree_level = nodehdr.level; |
| } else { |
| if (ds->tree_level != nodehdr.level) { |
| xchk_da_set_corrupt(ds, level); |
| goto out_freebp; |
| } |
| } |
| |
| /* XXX: Check hdr3.pad32 once we know how to fix it. */ |
| break; |
| default: |
| xchk_da_set_corrupt(ds, level); |
| goto out_freebp; |
| } |
| |
| /* |
| * If we've been handed a block that is below the dabtree root, does |
| * its hashval match what the parent block expected to see? |
| */ |
| if (level > 0) { |
| struct xfs_da_node_entry *key; |
| |
| key = xchk_da_btree_node_entry(ds, level - 1); |
| if (be32_to_cpu(key->hashval) != blk->hashval) { |
| xchk_da_set_corrupt(ds, level); |
| goto out_freebp; |
| } |
| } |
| |
| out: |
| return error; |
| out_freebp: |
| xfs_trans_brelse(dargs->trans, blk->bp); |
| blk->bp = NULL; |
| out_nobuf: |
| blk->blkno = 0; |
| return error; |
| } |
| |
| /* Visit all nodes and leaves of a da btree. */ |
| int |
| xchk_da_btree( |
| struct xfs_scrub *sc, |
| int whichfork, |
| xchk_da_btree_rec_fn scrub_fn, |
| void *private) |
| { |
| struct xchk_da_btree *ds; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_da_state_blk *blks; |
| struct xfs_da_node_entry *key; |
| xfs_dablk_t blkno; |
| int level; |
| int error; |
| |
| /* Skip short format data structures; no btree to scan. */ |
| if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork))) |
| return 0; |
| |
| /* Set up initial da state. */ |
| ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL); |
| if (!ds) |
| return -ENOMEM; |
| ds->dargs.dp = sc->ip; |
| ds->dargs.whichfork = whichfork; |
| ds->dargs.trans = sc->tp; |
| ds->dargs.op_flags = XFS_DA_OP_OKNOENT; |
| ds->state = xfs_da_state_alloc(&ds->dargs); |
| ds->sc = sc; |
| ds->private = private; |
| if (whichfork == XFS_ATTR_FORK) { |
| ds->dargs.geo = mp->m_attr_geo; |
| ds->lowest = 0; |
| ds->highest = 0; |
| } else { |
| ds->dargs.geo = mp->m_dir_geo; |
| ds->lowest = ds->dargs.geo->leafblk; |
| ds->highest = ds->dargs.geo->freeblk; |
| } |
| blkno = ds->lowest; |
| level = 0; |
| |
| /* Find the root of the da tree, if present. */ |
| blks = ds->state->path.blk; |
| error = xchk_da_btree_block(ds, level, blkno); |
| if (error) |
| goto out_state; |
| /* |
| * We didn't find a block at ds->lowest, which means that there's |
| * no LEAF1/LEAFN tree (at least not where it's supposed to be), |
| * so jump out now. |
| */ |
| if (blks[level].bp == NULL) |
| goto out_state; |
| |
| blks[level].index = 0; |
| while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { |
| /* Handle leaf block. */ |
| if (blks[level].magic != XFS_DA_NODE_MAGIC) { |
| /* End of leaf, pop back towards the root. */ |
| if (blks[level].index >= ds->maxrecs[level]) { |
| if (level > 0) |
| blks[level - 1].index++; |
| ds->tree_level++; |
| level--; |
| continue; |
| } |
| |
| /* Dispatch record scrubbing. */ |
| error = scrub_fn(ds, level); |
| if (error) |
| break; |
| if (xchk_should_terminate(sc, &error) || |
| (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) |
| break; |
| |
| blks[level].index++; |
| continue; |
| } |
| |
| |
| /* End of node, pop back towards the root. */ |
| if (blks[level].index >= ds->maxrecs[level]) { |
| if (level > 0) |
| blks[level - 1].index++; |
| ds->tree_level++; |
| level--; |
| continue; |
| } |
| |
| /* Hashes in order for scrub? */ |
| key = xchk_da_btree_node_entry(ds, level); |
| error = xchk_da_btree_hash(ds, level, &key->hashval); |
| if (error) |
| goto out; |
| |
| /* Drill another level deeper. */ |
| blkno = be32_to_cpu(key->before); |
| level++; |
| if (level >= XFS_DA_NODE_MAXDEPTH) { |
| /* Too deep! */ |
| xchk_da_set_corrupt(ds, level - 1); |
| break; |
| } |
| ds->tree_level--; |
| error = xchk_da_btree_block(ds, level, blkno); |
| if (error) |
| goto out; |
| if (blks[level].bp == NULL) |
| goto out; |
| |
| blks[level].index = 0; |
| } |
| |
| out: |
| /* Release all the buffers we're tracking. */ |
| for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { |
| if (blks[level].bp == NULL) |
| continue; |
| xfs_trans_brelse(sc->tp, blks[level].bp); |
| blks[level].bp = NULL; |
| } |
| |
| out_state: |
| xfs_da_state_free(ds->state); |
| kmem_free(ds); |
| return error; |
| } |