| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (C) 2018-2023 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <djwong@kernel.org> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_defer.h" |
| #include "xfs_btree.h" |
| #include "xfs_btree_staging.h" |
| #include "xfs_bit.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_sb.h" |
| #include "xfs_inode.h" |
| #include "xfs_alloc.h" |
| #include "xfs_ialloc.h" |
| #include "xfs_ialloc_btree.h" |
| #include "xfs_icache.h" |
| #include "xfs_rmap.h" |
| #include "xfs_rmap_btree.h" |
| #include "xfs_log.h" |
| #include "xfs_trans_priv.h" |
| #include "xfs_error.h" |
| #include "xfs_health.h" |
| #include "xfs_ag.h" |
| #include "scrub/xfs_scrub.h" |
| #include "scrub/scrub.h" |
| #include "scrub/common.h" |
| #include "scrub/btree.h" |
| #include "scrub/trace.h" |
| #include "scrub/repair.h" |
| #include "scrub/bitmap.h" |
| #include "scrub/agb_bitmap.h" |
| #include "scrub/xfile.h" |
| #include "scrub/xfarray.h" |
| #include "scrub/newbt.h" |
| #include "scrub/reap.h" |
| |
| /* |
| * Inode Btree Repair |
| * ================== |
| * |
| * A quick refresher of inode btrees on a v5 filesystem: |
| * |
| * - Inode records are read into memory in units of 'inode clusters'. However |
| * many inodes fit in a cluster buffer is the smallest number of inodes that |
| * can be allocated or freed. Clusters are never smaller than one fs block |
| * though they can span multiple blocks. The size (in fs blocks) is |
| * computed with xfs_icluster_size_fsb(). The fs block alignment of a |
| * cluster is computed with xfs_ialloc_cluster_alignment(). |
| * |
| * - Each inode btree record can describe a single 'inode chunk'. The chunk |
| * size is defined to be 64 inodes. If sparse inodes are enabled, every |
| * inobt record must be aligned to the chunk size; if not, every record must |
| * be aligned to the start of a cluster. It is possible to construct an XFS |
| * geometry where one inobt record maps to multiple inode clusters; it is |
| * also possible to construct a geometry where multiple inobt records map to |
| * different parts of one inode cluster. |
| * |
| * - If sparse inodes are not enabled, the smallest unit of allocation for |
| * inode records is enough to contain one inode chunk's worth of inodes. |
| * |
| * - If sparse inodes are enabled, the holemask field will be active. Each |
| * bit of the holemask represents 4 potential inodes; if set, the |
| * corresponding space does *not* contain inodes and must be left alone. |
| * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation |
| * of inode records is one inode cluster. |
| * |
| * So what's the rebuild algorithm? |
| * |
| * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT |
| * records. The OWN_INOBT records are the old inode btree blocks and will be |
| * cleared out after we've rebuilt the tree. Each possible inode cluster |
| * within an OWN_INODES record will be read in; for each possible inobt record |
| * associated with that cluster, compute the freemask calculated from the |
| * i_mode data in the inode chunk. For sparse inodes the holemask will be |
| * calculated by creating the properly aligned inobt record and punching out |
| * any chunk that's missing. Inode allocations and frees grab the AGI first, |
| * so repair protects itself from concurrent access by locking the AGI. |
| * |
| * Once we've reconstructed all the inode records, we can create new inode |
| * btree roots and reload the btrees. We rebuild both inode trees at the same |
| * time because they have the same rmap owner and it would be more complex to |
| * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT |
| * blocks it owns. We have all the data we need to build both, so dump |
| * everything and start over. |
| * |
| * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once. |
| */ |
| |
| struct xrep_ibt { |
| /* Record under construction. */ |
| struct xfs_inobt_rec_incore rie; |
| |
| /* new inobt information */ |
| struct xrep_newbt new_inobt; |
| |
| /* new finobt information */ |
| struct xrep_newbt new_finobt; |
| |
| /* Old inode btree blocks we found in the rmap. */ |
| struct xagb_bitmap old_iallocbt_blocks; |
| |
| /* Reconstructed inode records. */ |
| struct xfarray *inode_records; |
| |
| struct xfs_scrub *sc; |
| |
| /* Number of inodes assigned disk space. */ |
| unsigned int icount; |
| |
| /* Number of inodes in use. */ |
| unsigned int iused; |
| |
| /* Number of finobt records needed. */ |
| unsigned int finobt_recs; |
| |
| /* get_records()'s position in the inode record array. */ |
| xfarray_idx_t array_cur; |
| }; |
| |
| /* |
| * Is this inode in use? If the inode is in memory we can tell from i_mode, |
| * otherwise we have to check di_mode in the on-disk buffer. We only care |
| * that the high (i.e. non-permission) bits of _mode are zero. This should be |
| * safe because repair keeps all AG headers locked until the end, and process |
| * trying to perform an inode allocation/free must lock the AGI. |
| * |
| * @cluster_ag_base is the inode offset of the cluster within the AG. |
| * @cluster_bp is the cluster buffer. |
| * @cluster_index is the inode offset within the inode cluster. |
| */ |
| STATIC int |
| xrep_ibt_check_ifree( |
| struct xrep_ibt *ri, |
| xfs_agino_t cluster_ag_base, |
| struct xfs_buf *cluster_bp, |
| unsigned int cluster_index, |
| bool *inuse) |
| { |
| struct xfs_scrub *sc = ri->sc; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_dinode *dip; |
| xfs_ino_t fsino; |
| xfs_agino_t agino; |
| xfs_agnumber_t agno = ri->sc->sa.pag->pag_agno; |
| unsigned int cluster_buf_base; |
| unsigned int offset; |
| int error; |
| |
| agino = cluster_ag_base + cluster_index; |
| fsino = XFS_AGINO_TO_INO(mp, agno, agino); |
| |
| /* Inode uncached or half assembled, read disk buffer */ |
| cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base); |
| offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize; |
| if (offset >= BBTOB(cluster_bp->b_length)) |
| return -EFSCORRUPTED; |
| dip = xfs_buf_offset(cluster_bp, offset); |
| if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) |
| return -EFSCORRUPTED; |
| |
| if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino) |
| return -EFSCORRUPTED; |
| |
| /* Will the in-core inode tell us if it's in use? */ |
| error = xchk_inode_is_allocated(sc, agino, inuse); |
| if (!error) |
| return 0; |
| |
| *inuse = dip->di_mode != 0; |
| return 0; |
| } |
| |
| /* Stash the accumulated inobt record for rebuilding. */ |
| STATIC int |
| xrep_ibt_stash( |
| struct xrep_ibt *ri) |
| { |
| int error = 0; |
| |
| if (xchk_should_terminate(ri->sc, &error)) |
| return error; |
| |
| ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie); |
| if (xfs_inobt_check_irec(ri->sc->sa.pag, &ri->rie) != NULL) |
| return -EFSCORRUPTED; |
| |
| if (ri->rie.ir_freecount > 0) |
| ri->finobt_recs++; |
| |
| trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie); |
| |
| error = xfarray_append(ri->inode_records, &ri->rie); |
| if (error) |
| return error; |
| |
| ri->rie.ir_startino = NULLAGINO; |
| return 0; |
| } |
| |
| /* |
| * Given an extent of inodes and an inode cluster buffer, calculate the |
| * location of the corresponding inobt record (creating it if necessary), |
| * then update the parts of the holemask and freemask of that record that |
| * correspond to the inode extent we were given. |
| * |
| * @cluster_ir_startino is the AG inode number of an inobt record that we're |
| * proposing to create for this inode cluster. If sparse inodes are enabled, |
| * we must round down to a chunk boundary to find the actual sparse record. |
| * @cluster_bp is the buffer of the inode cluster. |
| * @nr_inodes is the number of inodes to check from the cluster. |
| */ |
| STATIC int |
| xrep_ibt_cluster_record( |
| struct xrep_ibt *ri, |
| xfs_agino_t cluster_ir_startino, |
| struct xfs_buf *cluster_bp, |
| unsigned int nr_inodes) |
| { |
| struct xfs_scrub *sc = ri->sc; |
| struct xfs_mount *mp = sc->mp; |
| xfs_agino_t ir_startino; |
| unsigned int cluster_base; |
| unsigned int cluster_index; |
| int error = 0; |
| |
| ir_startino = cluster_ir_startino; |
| if (xfs_has_sparseinodes(mp)) |
| ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK); |
| cluster_base = cluster_ir_startino - ir_startino; |
| |
| /* |
| * If the accumulated inobt record doesn't map this cluster, add it to |
| * the list and reset it. |
| */ |
| if (ri->rie.ir_startino != NULLAGINO && |
| ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) { |
| error = xrep_ibt_stash(ri); |
| if (error) |
| return error; |
| } |
| |
| if (ri->rie.ir_startino == NULLAGINO) { |
| ri->rie.ir_startino = ir_startino; |
| ri->rie.ir_free = XFS_INOBT_ALL_FREE; |
| ri->rie.ir_holemask = 0xFFFF; |
| ri->rie.ir_count = 0; |
| } |
| |
| /* Record the whole cluster. */ |
| ri->icount += nr_inodes; |
| ri->rie.ir_count += nr_inodes; |
| ri->rie.ir_holemask &= ~xfs_inobt_maskn( |
| cluster_base / XFS_INODES_PER_HOLEMASK_BIT, |
| nr_inodes / XFS_INODES_PER_HOLEMASK_BIT); |
| |
| /* Which inodes within this cluster are free? */ |
| for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) { |
| bool inuse = false; |
| |
| error = xrep_ibt_check_ifree(ri, cluster_ir_startino, |
| cluster_bp, cluster_index, &inuse); |
| if (error) |
| return error; |
| if (!inuse) |
| continue; |
| ri->iused++; |
| ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base + |
| cluster_index); |
| } |
| return 0; |
| } |
| |
| /* |
| * For each inode cluster covering the physical extent recorded by the rmapbt, |
| * we must calculate the properly aligned startino of that cluster, then |
| * iterate each cluster to fill in used and filled masks appropriately. We |
| * then use the (startino, used, filled) information to construct the |
| * appropriate inode records. |
| */ |
| STATIC int |
| xrep_ibt_process_cluster( |
| struct xrep_ibt *ri, |
| xfs_agblock_t cluster_bno) |
| { |
| struct xfs_imap imap; |
| struct xfs_buf *cluster_bp; |
| struct xfs_scrub *sc = ri->sc; |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_ino_geometry *igeo = M_IGEO(mp); |
| xfs_agino_t cluster_ag_base; |
| xfs_agino_t irec_index; |
| unsigned int nr_inodes; |
| int error; |
| |
| nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster, |
| XFS_INODES_PER_CHUNK); |
| |
| /* |
| * Grab the inode cluster buffer. This is safe to do with a broken |
| * inobt because imap_to_bp directly maps the buffer without touching |
| * either inode btree. |
| */ |
| imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno); |
| imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); |
| imap.im_boffset = 0; |
| error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp); |
| if (error) |
| return error; |
| |
| /* |
| * Record the contents of each possible inobt record mapping this |
| * cluster. |
| */ |
| cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno); |
| for (irec_index = 0; |
| irec_index < igeo->inodes_per_cluster; |
| irec_index += XFS_INODES_PER_CHUNK) { |
| error = xrep_ibt_cluster_record(ri, |
| cluster_ag_base + irec_index, cluster_bp, |
| nr_inodes); |
| if (error) |
| break; |
| |
| } |
| |
| xfs_trans_brelse(sc->tp, cluster_bp); |
| return error; |
| } |
| |
| /* Check for any obvious conflicts in the inode chunk extent. */ |
| STATIC int |
| xrep_ibt_check_inode_ext( |
| struct xfs_scrub *sc, |
| xfs_agblock_t agbno, |
| xfs_extlen_t len) |
| { |
| struct xfs_mount *mp = sc->mp; |
| struct xfs_ino_geometry *igeo = M_IGEO(mp); |
| xfs_agino_t agino; |
| enum xbtree_recpacking outcome; |
| int error; |
| |
| /* Inode records must be within the AG. */ |
| if (!xfs_verify_agbext(sc->sa.pag, agbno, len)) |
| return -EFSCORRUPTED; |
| |
| /* The entire record must align to the inode cluster size. */ |
| if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) || |
| !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster)) |
| return -EFSCORRUPTED; |
| |
| /* |
| * The entire record must also adhere to the inode cluster alignment |
| * size if sparse inodes are not enabled. |
| */ |
| if (!xfs_has_sparseinodes(mp) && |
| (!IS_ALIGNED(agbno, igeo->cluster_align) || |
| !IS_ALIGNED(agbno + len, igeo->cluster_align))) |
| return -EFSCORRUPTED; |
| |
| /* |
| * On a sparse inode fs, this cluster could be part of a sparse chunk. |
| * Sparse clusters must be aligned to sparse chunk alignment. |
| */ |
| if (xfs_has_sparseinodes(mp) && mp->m_sb.sb_spino_align && |
| (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) || |
| !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align))) |
| return -EFSCORRUPTED; |
| |
| /* Make sure the entire range of blocks are valid AG inodes. */ |
| agino = XFS_AGB_TO_AGINO(mp, agbno); |
| if (!xfs_verify_agino(sc->sa.pag, agino)) |
| return -EFSCORRUPTED; |
| |
| agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1; |
| if (!xfs_verify_agino(sc->sa.pag, agino)) |
| return -EFSCORRUPTED; |
| |
| /* Make sure this isn't free space. */ |
| error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome); |
| if (error) |
| return error; |
| if (outcome != XBTREE_RECPACKING_EMPTY) |
| return -EFSCORRUPTED; |
| |
| return 0; |
| } |
| |
| /* Found a fragment of the old inode btrees; dispose of them later. */ |
| STATIC int |
| xrep_ibt_record_old_btree_blocks( |
| struct xrep_ibt *ri, |
| const struct xfs_rmap_irec *rec) |
| { |
| if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock, |
| rec->rm_blockcount)) |
| return -EFSCORRUPTED; |
| |
| return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock, |
| rec->rm_blockcount); |
| } |
| |
| /* Record extents that belong to inode cluster blocks. */ |
| STATIC int |
| xrep_ibt_record_inode_blocks( |
| struct xrep_ibt *ri, |
| const struct xfs_rmap_irec *rec) |
| { |
| struct xfs_mount *mp = ri->sc->mp; |
| struct xfs_ino_geometry *igeo = M_IGEO(mp); |
| xfs_agblock_t cluster_base; |
| int error; |
| |
| error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock, |
| rec->rm_blockcount); |
| if (error) |
| return error; |
| |
| trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno, |
| rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, |
| rec->rm_offset, rec->rm_flags); |
| |
| /* |
| * Record the free/hole masks for each inode cluster that could be |
| * mapped by this rmap record. |
| */ |
| for (cluster_base = 0; |
| cluster_base < rec->rm_blockcount; |
| cluster_base += igeo->blocks_per_cluster) { |
| error = xrep_ibt_process_cluster(ri, |
| rec->rm_startblock + cluster_base); |
| if (error) |
| return error; |
| } |
| |
| return 0; |
| } |
| |
| STATIC int |
| xrep_ibt_walk_rmap( |
| struct xfs_btree_cur *cur, |
| const struct xfs_rmap_irec *rec, |
| void *priv) |
| { |
| struct xrep_ibt *ri = priv; |
| int error = 0; |
| |
| if (xchk_should_terminate(ri->sc, &error)) |
| return error; |
| |
| switch (rec->rm_owner) { |
| case XFS_RMAP_OWN_INOBT: |
| return xrep_ibt_record_old_btree_blocks(ri, rec); |
| case XFS_RMAP_OWN_INODES: |
| return xrep_ibt_record_inode_blocks(ri, rec); |
| } |
| return 0; |
| } |
| |
| /* |
| * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode |
| * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct |
| * the inode btrees. The caller must clean up the lists if anything goes |
| * wrong. |
| */ |
| STATIC int |
| xrep_ibt_find_inodes( |
| struct xrep_ibt *ri) |
| { |
| struct xfs_scrub *sc = ri->sc; |
| int error; |
| |
| ri->rie.ir_startino = NULLAGINO; |
| |
| /* Collect all reverse mappings for inode blocks. */ |
| xrep_ag_btcur_init(sc, &sc->sa); |
| error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri); |
| xchk_ag_btcur_free(&sc->sa); |
| if (error) |
| return error; |
| |
| /* If we have a record ready to go, add it to the array. */ |
| if (ri->rie.ir_startino != NULLAGINO) |
| return xrep_ibt_stash(ri); |
| |
| return 0; |
| } |
| |
| /* Update the AGI counters. */ |
| STATIC int |
| xrep_ibt_reset_counters( |
| struct xrep_ibt *ri) |
| { |
| struct xfs_scrub *sc = ri->sc; |
| struct xfs_agi *agi = sc->sa.agi_bp->b_addr; |
| unsigned int freecount = ri->icount - ri->iused; |
| |
| /* Trigger inode count recalculation */ |
| xfs_force_summary_recalc(sc->mp); |
| |
| /* |
| * The AGI header contains extra information related to the inode |
| * btrees, so we must update those fields here. |
| */ |
| agi->agi_count = cpu_to_be32(ri->icount); |
| agi->agi_freecount = cpu_to_be32(freecount); |
| xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, |
| XFS_AGI_COUNT | XFS_AGI_FREECOUNT); |
| |
| /* Reinitialize with the values we just logged. */ |
| return xrep_reinit_pagi(sc); |
| } |
| |
| /* Retrieve finobt data for bulk load. */ |
| STATIC int |
| xrep_fibt_get_records( |
| struct xfs_btree_cur *cur, |
| unsigned int idx, |
| struct xfs_btree_block *block, |
| unsigned int nr_wanted, |
| void *priv) |
| { |
| struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; |
| struct xrep_ibt *ri = priv; |
| union xfs_btree_rec *block_rec; |
| unsigned int loaded; |
| int error; |
| |
| for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { |
| do { |
| error = xfarray_load(ri->inode_records, |
| ri->array_cur++, irec); |
| } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0); |
| if (error) |
| return error; |
| |
| block_rec = xfs_btree_rec_addr(cur, idx, block); |
| cur->bc_ops->init_rec_from_cur(cur, block_rec); |
| } |
| |
| return loaded; |
| } |
| |
| /* Retrieve inobt data for bulk load. */ |
| STATIC int |
| xrep_ibt_get_records( |
| struct xfs_btree_cur *cur, |
| unsigned int idx, |
| struct xfs_btree_block *block, |
| unsigned int nr_wanted, |
| void *priv) |
| { |
| struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; |
| struct xrep_ibt *ri = priv; |
| union xfs_btree_rec *block_rec; |
| unsigned int loaded; |
| int error; |
| |
| for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { |
| error = xfarray_load(ri->inode_records, ri->array_cur++, irec); |
| if (error) |
| return error; |
| |
| block_rec = xfs_btree_rec_addr(cur, idx, block); |
| cur->bc_ops->init_rec_from_cur(cur, block_rec); |
| } |
| |
| return loaded; |
| } |
| |
| /* Feed one of the new inobt blocks to the bulk loader. */ |
| STATIC int |
| xrep_ibt_claim_block( |
| struct xfs_btree_cur *cur, |
| union xfs_btree_ptr *ptr, |
| void *priv) |
| { |
| struct xrep_ibt *ri = priv; |
| |
| return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr); |
| } |
| |
| /* Feed one of the new finobt blocks to the bulk loader. */ |
| STATIC int |
| xrep_fibt_claim_block( |
| struct xfs_btree_cur *cur, |
| union xfs_btree_ptr *ptr, |
| void *priv) |
| { |
| struct xrep_ibt *ri = priv; |
| |
| return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr); |
| } |
| |
| /* Make sure the records do not overlap in inumber address space. */ |
| STATIC int |
| xrep_ibt_check_overlap( |
| struct xrep_ibt *ri) |
| { |
| struct xfs_inobt_rec_incore irec; |
| xfarray_idx_t cur; |
| xfs_agino_t next_agino = 0; |
| int error = 0; |
| |
| foreach_xfarray_idx(ri->inode_records, cur) { |
| if (xchk_should_terminate(ri->sc, &error)) |
| return error; |
| |
| error = xfarray_load(ri->inode_records, cur, &irec); |
| if (error) |
| return error; |
| |
| if (irec.ir_startino < next_agino) |
| return -EFSCORRUPTED; |
| |
| next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK; |
| } |
| |
| return error; |
| } |
| |
| /* Build new inode btrees and dispose of the old one. */ |
| STATIC int |
| xrep_ibt_build_new_trees( |
| struct xrep_ibt *ri) |
| { |
| struct xfs_scrub *sc = ri->sc; |
| struct xfs_btree_cur *ino_cur; |
| struct xfs_btree_cur *fino_cur = NULL; |
| xfs_fsblock_t fsbno; |
| bool need_finobt; |
| int error; |
| |
| need_finobt = xfs_has_finobt(sc->mp); |
| |
| /* |
| * Create new btrees for staging all the inobt records we collected |
| * earlier. The records were collected in order of increasing agino, |
| * so we do not have to sort them. Ensure there are no overlapping |
| * records. |
| */ |
| error = xrep_ibt_check_overlap(ri); |
| if (error) |
| return error; |
| |
| /* |
| * The new inode btrees will not be rooted in the AGI until we've |
| * successfully rebuilt the tree. |
| * |
| * Start by setting up the inobt staging cursor. |
| */ |
| fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, |
| XFS_IBT_BLOCK(sc->mp)); |
| xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno, |
| XFS_AG_RESV_NONE); |
| ri->new_inobt.bload.claim_block = xrep_ibt_claim_block; |
| ri->new_inobt.bload.get_records = xrep_ibt_get_records; |
| |
| ino_cur = xfs_inobt_init_cursor(sc->sa.pag, NULL, NULL); |
| xfs_btree_stage_afakeroot(ino_cur, &ri->new_inobt.afake); |
| error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload, |
| xfarray_length(ri->inode_records)); |
| if (error) |
| goto err_inocur; |
| |
| /* Set up finobt staging cursor. */ |
| if (need_finobt) { |
| enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA; |
| |
| if (sc->mp->m_finobt_nores) |
| resv = XFS_AG_RESV_NONE; |
| |
| fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, |
| XFS_FIBT_BLOCK(sc->mp)); |
| xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT, |
| fsbno, resv); |
| ri->new_finobt.bload.claim_block = xrep_fibt_claim_block; |
| ri->new_finobt.bload.get_records = xrep_fibt_get_records; |
| |
| fino_cur = xfs_finobt_init_cursor(sc->sa.pag, NULL, NULL); |
| xfs_btree_stage_afakeroot(fino_cur, &ri->new_finobt.afake); |
| error = xfs_btree_bload_compute_geometry(fino_cur, |
| &ri->new_finobt.bload, ri->finobt_recs); |
| if (error) |
| goto err_finocur; |
| } |
| |
| /* Last chance to abort before we start committing fixes. */ |
| if (xchk_should_terminate(sc, &error)) |
| goto err_finocur; |
| |
| /* Reserve all the space we need to build the new btrees. */ |
| error = xrep_newbt_alloc_blocks(&ri->new_inobt, |
| ri->new_inobt.bload.nr_blocks); |
| if (error) |
| goto err_finocur; |
| |
| if (need_finobt) { |
| error = xrep_newbt_alloc_blocks(&ri->new_finobt, |
| ri->new_finobt.bload.nr_blocks); |
| if (error) |
| goto err_finocur; |
| } |
| |
| /* Add all inobt records. */ |
| ri->array_cur = XFARRAY_CURSOR_INIT; |
| error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri); |
| if (error) |
| goto err_finocur; |
| |
| /* Add all finobt records. */ |
| if (need_finobt) { |
| ri->array_cur = XFARRAY_CURSOR_INIT; |
| error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri); |
| if (error) |
| goto err_finocur; |
| } |
| |
| /* |
| * Install the new btrees in the AG header. After this point the old |
| * btrees are no longer accessible and the new trees are live. |
| */ |
| xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp); |
| xfs_btree_del_cursor(ino_cur, 0); |
| |
| if (fino_cur) { |
| xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp); |
| xfs_btree_del_cursor(fino_cur, 0); |
| } |
| |
| /* Reset the AGI counters now that we've changed the inode roots. */ |
| error = xrep_ibt_reset_counters(ri); |
| if (error) |
| goto err_finobt; |
| |
| /* Free unused blocks and bitmap. */ |
| if (need_finobt) { |
| error = xrep_newbt_commit(&ri->new_finobt); |
| if (error) |
| goto err_inobt; |
| } |
| error = xrep_newbt_commit(&ri->new_inobt); |
| if (error) |
| return error; |
| |
| return xrep_roll_ag_trans(sc); |
| |
| err_finocur: |
| if (need_finobt) |
| xfs_btree_del_cursor(fino_cur, error); |
| err_inocur: |
| xfs_btree_del_cursor(ino_cur, error); |
| err_finobt: |
| if (need_finobt) |
| xrep_newbt_cancel(&ri->new_finobt); |
| err_inobt: |
| xrep_newbt_cancel(&ri->new_inobt); |
| return error; |
| } |
| |
| /* |
| * Now that we've logged the roots of the new btrees, invalidate all of the |
| * old blocks and free them. |
| */ |
| STATIC int |
| xrep_ibt_remove_old_trees( |
| struct xrep_ibt *ri) |
| { |
| struct xfs_scrub *sc = ri->sc; |
| int error; |
| |
| /* |
| * Free the old inode btree blocks if they're not in use. It's ok to |
| * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG |
| * reservation because we reset the reservation before releasing the |
| * AGI and AGF header buffer locks. |
| */ |
| error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks, |
| &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE); |
| if (error) |
| return error; |
| |
| /* |
| * If the finobt is enabled and has a per-AG reservation, make sure we |
| * reinitialize the per-AG reservations. |
| */ |
| if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores) |
| sc->flags |= XREP_RESET_PERAG_RESV; |
| |
| return 0; |
| } |
| |
| /* Repair both inode btrees. */ |
| int |
| xrep_iallocbt( |
| struct xfs_scrub *sc) |
| { |
| struct xrep_ibt *ri; |
| struct xfs_mount *mp = sc->mp; |
| char *descr; |
| xfs_agino_t first_agino, last_agino; |
| int error = 0; |
| |
| /* We require the rmapbt to rebuild anything. */ |
| if (!xfs_has_rmapbt(mp)) |
| return -EOPNOTSUPP; |
| |
| ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS); |
| if (!ri) |
| return -ENOMEM; |
| ri->sc = sc; |
| |
| /* We rebuild both inode btrees. */ |
| sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT; |
| |
| /* Set up enough storage to handle an AG with nothing but inodes. */ |
| xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino); |
| last_agino /= XFS_INODES_PER_CHUNK; |
| descr = xchk_xfile_ag_descr(sc, "inode index records"); |
| error = xfarray_create(descr, last_agino, |
| sizeof(struct xfs_inobt_rec_incore), |
| &ri->inode_records); |
| kfree(descr); |
| if (error) |
| goto out_ri; |
| |
| /* Collect the inode data and find the old btree blocks. */ |
| xagb_bitmap_init(&ri->old_iallocbt_blocks); |
| error = xrep_ibt_find_inodes(ri); |
| if (error) |
| goto out_bitmap; |
| |
| /* Rebuild the inode indexes. */ |
| error = xrep_ibt_build_new_trees(ri); |
| if (error) |
| goto out_bitmap; |
| |
| /* Kill the old tree. */ |
| error = xrep_ibt_remove_old_trees(ri); |
| if (error) |
| goto out_bitmap; |
| |
| out_bitmap: |
| xagb_bitmap_destroy(&ri->old_iallocbt_blocks); |
| xfarray_destroy(ri->inode_records); |
| out_ri: |
| kfree(ri); |
| return error; |
| } |
| |
| /* Make sure both btrees are ok after we've rebuilt them. */ |
| int |
| xrep_revalidate_iallocbt( |
| struct xfs_scrub *sc) |
| { |
| __u32 old_type = sc->sm->sm_type; |
| int error; |
| |
| /* |
| * We must update sm_type temporarily so that the tree-to-tree cross |
| * reference checks will work in the correct direction, and also so |
| * that tracing will report correctly if there are more errors. |
| */ |
| sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT; |
| error = xchk_iallocbt(sc); |
| if (error) |
| goto out; |
| |
| if (xfs_has_finobt(sc->mp)) { |
| sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; |
| error = xchk_iallocbt(sc); |
| } |
| |
| out: |
| sc->sm->sm_type = old_type; |
| return error; |
| } |