| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (c) 2020-2024 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <djwong@kernel.org> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_mount.h" |
| #include "xfs_defer.h" |
| #include "xfs_bit.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_sb.h" |
| #include "xfs_inode.h" |
| #include "xfs_icache.h" |
| #include "xfs_da_format.h" |
| #include "xfs_da_btree.h" |
| #include "xfs_dir2.h" |
| #include "xfs_dir2_priv.h" |
| #include "xfs_bmap.h" |
| #include "xfs_quota.h" |
| #include "xfs_bmap_btree.h" |
| #include "xfs_trans_space.h" |
| #include "xfs_bmap_util.h" |
| #include "xfs_exchmaps.h" |
| #include "xfs_exchrange.h" |
| #include "xfs_ag.h" |
| #include "xfs_parent.h" |
| #include "scrub/xfs_scrub.h" |
| #include "scrub/scrub.h" |
| #include "scrub/common.h" |
| #include "scrub/trace.h" |
| #include "scrub/repair.h" |
| #include "scrub/tempfile.h" |
| #include "scrub/tempexch.h" |
| #include "scrub/xfile.h" |
| #include "scrub/xfarray.h" |
| #include "scrub/xfblob.h" |
| #include "scrub/iscan.h" |
| #include "scrub/readdir.h" |
| #include "scrub/reap.h" |
| #include "scrub/findparent.h" |
| #include "scrub/orphanage.h" |
| #include "scrub/listxattr.h" |
| |
| /* |
| * Directory Repair |
| * ================ |
| * |
| * We repair directories by reading the directory data blocks looking for |
| * directory entries that look salvageable (name passes verifiers, entry points |
| * to a valid allocated inode, etc). Each entry worth salvaging is stashed in |
| * memory, and the stashed entries are periodically replayed into a temporary |
| * directory to constrain memory use. Batching the construction of the |
| * temporary directory in this fashion reduces lock cycling of the directory |
| * being repaired and the temporary directory, and will later become important |
| * for parent pointer scanning. |
| * |
| * If parent pointers are enabled on this filesystem, we instead reconstruct |
| * the directory by visiting each parent pointer of each file in the filesystem |
| * and translating the relevant parent pointer records into dirents. In this |
| * case, it is advantageous to stash all directory entries created from parent |
| * pointers for a single child file before replaying them into the temporary |
| * directory. To save memory, the live filesystem scan reuses the findparent |
| * fields. Directory repair chooses either parent pointer scanning or |
| * directory entry salvaging, but not both. |
| * |
| * Directory entries added to the temporary directory do not elevate the link |
| * counts of the inodes found. When salvaging completes, the remaining stashed |
| * entries are replayed to the temporary directory. An atomic mapping exchange |
| * is used to commit the new directory blocks to the directory being repaired. |
| * This will disrupt readdir cursors. |
| * |
| * Locking Issues |
| * -------------- |
| * |
| * If /a, /a/b, and /c are all directories, the VFS does not take i_rwsem on |
| * /a/b for a "mv /a/b /c/" operation. This means that only b's ILOCK protects |
| * b's dotdot update. This is in contrast to every other dotdot update (link, |
| * remove, mkdir). If the repair code drops the ILOCK, it must either |
| * revalidate the dotdot entry or use dirent hooks to capture updates from |
| * other threads. |
| */ |
| |
| /* Create a dirent in the tempdir. */ |
| #define XREP_DIRENT_ADD (1) |
| |
| /* Remove a dirent from the tempdir. */ |
| #define XREP_DIRENT_REMOVE (2) |
| |
| /* Directory entry to be restored in the new directory. */ |
| struct xrep_dirent { |
| /* Cookie for retrieval of the dirent name. */ |
| xfblob_cookie name_cookie; |
| |
| /* Target inode number. */ |
| xfs_ino_t ino; |
| |
| /* Length of the dirent name. */ |
| uint8_t namelen; |
| |
| /* File type of the dirent. */ |
| uint8_t ftype; |
| |
| /* XREP_DIRENT_{ADD,REMOVE} */ |
| uint8_t action; |
| }; |
| |
| /* |
| * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names |
| * before we write them to the temp dir. |
| */ |
| #define XREP_DIR_MAX_STASH_BYTES (PAGE_SIZE * 8) |
| |
| struct xrep_dir { |
| struct xfs_scrub *sc; |
| |
| /* Fixed-size array of xrep_dirent structures. */ |
| struct xfarray *dir_entries; |
| |
| /* Blobs containing directory entry names. */ |
| struct xfblob *dir_names; |
| |
| /* Information for exchanging data forks at the end. */ |
| struct xrep_tempexch tx; |
| |
| /* Preallocated args struct for performing dir operations */ |
| struct xfs_da_args args; |
| |
| /* |
| * Information used to scan the filesystem to find the inumber of the |
| * dotdot entry for this directory. For directory salvaging when |
| * parent pointers are not enabled, we use the findparent_* functions |
| * on this object and access only the parent_ino field directly. |
| * |
| * When parent pointers are enabled, however, the pptr scanner uses the |
| * iscan, hooks, lock, and parent_ino fields of this object directly. |
| * @pscan.lock coordinates access to dir_entries, dir_names, |
| * parent_ino, subdirs, dirents, and args. This reduces the memory |
| * requirements of this structure. |
| */ |
| struct xrep_parent_scan_info pscan; |
| |
| /* |
| * Context information for attaching this directory to the lost+found |
| * if this directory does not have a parent. |
| */ |
| struct xrep_adoption adoption; |
| |
| /* How many subdirectories did we find? */ |
| uint64_t subdirs; |
| |
| /* How many dirents did we find? */ |
| unsigned int dirents; |
| |
| /* Should we move this directory to the orphanage? */ |
| bool needs_adoption; |
| |
| /* Directory entry name, plus the trailing null. */ |
| struct xfs_name xname; |
| unsigned char namebuf[MAXNAMELEN]; |
| }; |
| |
| /* Tear down all the incore stuff we created. */ |
| static void |
| xrep_dir_teardown( |
| struct xfs_scrub *sc) |
| { |
| struct xrep_dir *rd = sc->buf; |
| |
| xrep_findparent_scan_teardown(&rd->pscan); |
| xfblob_destroy(rd->dir_names); |
| xfarray_destroy(rd->dir_entries); |
| } |
| |
| /* Set up for a directory repair. */ |
| int |
| xrep_setup_directory( |
| struct xfs_scrub *sc) |
| { |
| struct xrep_dir *rd; |
| int error; |
| |
| xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); |
| |
| error = xrep_orphanage_try_create(sc); |
| if (error) |
| return error; |
| |
| error = xrep_tempfile_create(sc, S_IFDIR); |
| if (error) |
| return error; |
| |
| rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS); |
| if (!rd) |
| return -ENOMEM; |
| rd->sc = sc; |
| rd->xname.name = rd->namebuf; |
| sc->buf = rd; |
| |
| return 0; |
| } |
| |
| /* |
| * Look up the dotdot entry and confirm that it's really the parent. |
| * Returns NULLFSINO if we don't know what to do. |
| */ |
| static inline xfs_ino_t |
| xrep_dir_lookup_parent( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| xfs_ino_t ino; |
| int error; |
| |
| error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL); |
| if (error) |
| return NULLFSINO; |
| if (!xfs_verify_dir_ino(sc->mp, ino)) |
| return NULLFSINO; |
| |
| error = xrep_findparent_confirm(sc, &ino); |
| if (error) |
| return NULLFSINO; |
| |
| return ino; |
| } |
| |
| /* |
| * Look up '..' in the dentry cache and confirm that it's really the parent. |
| * Returns NULLFSINO if the dcache misses or if the hit is implausible. |
| */ |
| static inline xfs_ino_t |
| xrep_dir_dcache_parent( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| xfs_ino_t parent_ino; |
| int error; |
| |
| parent_ino = xrep_findparent_from_dcache(sc); |
| if (parent_ino == NULLFSINO) |
| return parent_ino; |
| |
| error = xrep_findparent_confirm(sc, &parent_ino); |
| if (error) |
| return NULLFSINO; |
| |
| return parent_ino; |
| } |
| |
| /* Try to find the parent of the directory being repaired. */ |
| STATIC int |
| xrep_dir_find_parent( |
| struct xrep_dir *rd) |
| { |
| xfs_ino_t ino; |
| |
| ino = xrep_findparent_self_reference(rd->sc); |
| if (ino != NULLFSINO) { |
| xrep_findparent_scan_finish_early(&rd->pscan, ino); |
| return 0; |
| } |
| |
| ino = xrep_dir_dcache_parent(rd); |
| if (ino != NULLFSINO) { |
| xrep_findparent_scan_finish_early(&rd->pscan, ino); |
| return 0; |
| } |
| |
| ino = xrep_dir_lookup_parent(rd); |
| if (ino != NULLFSINO) { |
| xrep_findparent_scan_finish_early(&rd->pscan, ino); |
| return 0; |
| } |
| |
| /* |
| * A full filesystem scan is the last resort. On a busy filesystem, |
| * the scan can fail with -EBUSY if we cannot grab IOLOCKs. That means |
| * that we don't know what who the parent is, so we should return to |
| * userspace. |
| */ |
| return xrep_findparent_scan(&rd->pscan); |
| } |
| |
| /* |
| * Decide if we want to salvage this entry. We don't bother with oversized |
| * names or the dot entry. |
| */ |
| STATIC int |
| xrep_dir_want_salvage( |
| struct xrep_dir *rd, |
| const char *name, |
| int namelen, |
| xfs_ino_t ino) |
| { |
| struct xfs_mount *mp = rd->sc->mp; |
| |
| /* No pointers to ourselves or to garbage. */ |
| if (ino == rd->sc->ip->i_ino) |
| return false; |
| if (!xfs_verify_dir_ino(mp, ino)) |
| return false; |
| |
| /* No weird looking names or dot entries. */ |
| if (namelen >= MAXNAMELEN || namelen <= 0) |
| return false; |
| if (namelen == 1 && name[0] == '.') |
| return false; |
| if (!xfs_dir2_namecheck(name, namelen)) |
| return false; |
| |
| return true; |
| } |
| |
| /* |
| * Remember that we want to create a dirent in the tempdir. These stashed |
| * actions will be replayed later. |
| */ |
| STATIC int |
| xrep_dir_stash_createname( |
| struct xrep_dir *rd, |
| const struct xfs_name *name, |
| xfs_ino_t ino) |
| { |
| struct xrep_dirent dirent = { |
| .action = XREP_DIRENT_ADD, |
| .ino = ino, |
| .namelen = name->len, |
| .ftype = name->type, |
| }; |
| int error; |
| |
| trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino); |
| |
| error = xfblob_storename(rd->dir_names, &dirent.name_cookie, name); |
| if (error) |
| return error; |
| |
| return xfarray_append(rd->dir_entries, &dirent); |
| } |
| |
| /* |
| * Remember that we want to remove a dirent from the tempdir. These stashed |
| * actions will be replayed later. |
| */ |
| STATIC int |
| xrep_dir_stash_removename( |
| struct xrep_dir *rd, |
| const struct xfs_name *name, |
| xfs_ino_t ino) |
| { |
| struct xrep_dirent dirent = { |
| .action = XREP_DIRENT_REMOVE, |
| .ino = ino, |
| .namelen = name->len, |
| .ftype = name->type, |
| }; |
| int error; |
| |
| trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino); |
| |
| error = xfblob_storename(rd->dir_names, &dirent.name_cookie, name); |
| if (error) |
| return error; |
| |
| return xfarray_append(rd->dir_entries, &dirent); |
| } |
| |
| /* Allocate an in-core record to hold entries while we rebuild the dir data. */ |
| STATIC int |
| xrep_dir_salvage_entry( |
| struct xrep_dir *rd, |
| unsigned char *name, |
| unsigned int namelen, |
| xfs_ino_t ino) |
| { |
| struct xfs_name xname = { |
| .name = name, |
| }; |
| struct xfs_scrub *sc = rd->sc; |
| struct xfs_inode *ip; |
| unsigned int i = 0; |
| int error = 0; |
| |
| if (xchk_should_terminate(sc, &error)) |
| return error; |
| |
| /* |
| * Truncate the name to the first character that would trip namecheck. |
| * If we no longer have a name after that, ignore this entry. |
| */ |
| while (i < namelen && name[i] != 0 && name[i] != '/') |
| i++; |
| if (i == 0) |
| return 0; |
| xname.len = i; |
| |
| /* Ignore '..' entries; we already picked the new parent. */ |
| if (xname.len == 2 && name[0] == '.' && name[1] == '.') { |
| trace_xrep_dir_salvaged_parent(sc->ip, ino); |
| return 0; |
| } |
| |
| trace_xrep_dir_salvage_entry(sc->ip, &xname, ino); |
| |
| /* |
| * Compute the ftype or dump the entry if we can't. We don't lock the |
| * inode because inodes can't change type while we have a reference. |
| */ |
| error = xchk_iget(sc, ino, &ip); |
| if (error) |
| return 0; |
| |
| /* Don't mix metadata and regular directory trees. */ |
| if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(rd->sc->ip)) { |
| xchk_irele(sc, ip); |
| return 0; |
| } |
| |
| xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode); |
| xchk_irele(sc, ip); |
| |
| return xrep_dir_stash_createname(rd, &xname, ino); |
| } |
| |
| /* Record a shortform directory entry for later reinsertion. */ |
| STATIC int |
| xrep_dir_salvage_sf_entry( |
| struct xrep_dir *rd, |
| struct xfs_dir2_sf_hdr *sfp, |
| struct xfs_dir2_sf_entry *sfep) |
| { |
| xfs_ino_t ino; |
| |
| ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep); |
| if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino)) |
| return 0; |
| |
| return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino); |
| } |
| |
| /* Record a regular directory entry for later reinsertion. */ |
| STATIC int |
| xrep_dir_salvage_data_entry( |
| struct xrep_dir *rd, |
| struct xfs_dir2_data_entry *dep) |
| { |
| xfs_ino_t ino; |
| |
| ino = be64_to_cpu(dep->inumber); |
| if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino)) |
| return 0; |
| |
| return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino); |
| } |
| |
| /* Try to recover block/data format directory entries. */ |
| STATIC int |
| xrep_dir_recover_data( |
| struct xrep_dir *rd, |
| struct xfs_buf *bp) |
| { |
| struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo; |
| unsigned int offset; |
| unsigned int end; |
| int error = 0; |
| |
| /* |
| * Loop over the data portion of the block. |
| * Each object is a real entry (dep) or an unused one (dup). |
| */ |
| offset = geo->data_entry_offset; |
| end = min_t(unsigned int, BBTOB(bp->b_length), |
| xfs_dir3_data_end_offset(geo, bp->b_addr)); |
| |
| while (offset < end) { |
| struct xfs_dir2_data_unused *dup = bp->b_addr + offset; |
| struct xfs_dir2_data_entry *dep = bp->b_addr + offset; |
| |
| if (xchk_should_terminate(rd->sc, &error)) |
| return error; |
| |
| /* Skip unused entries. */ |
| if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { |
| offset += be16_to_cpu(dup->length); |
| continue; |
| } |
| |
| /* Don't walk off the end of the block. */ |
| offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen); |
| if (offset > end) |
| break; |
| |
| /* Ok, let's save this entry. */ |
| error = xrep_dir_salvage_data_entry(rd, dep); |
| if (error) |
| return error; |
| |
| } |
| |
| return 0; |
| } |
| |
| /* Try to recover shortform directory entries. */ |
| STATIC int |
| xrep_dir_recover_sf( |
| struct xrep_dir *rd) |
| { |
| struct xfs_dir2_sf_hdr *hdr; |
| struct xfs_dir2_sf_entry *sfep; |
| struct xfs_dir2_sf_entry *next; |
| struct xfs_ifork *ifp; |
| xfs_ino_t ino; |
| unsigned char *end; |
| int error = 0; |
| |
| ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK); |
| hdr = ifp->if_data; |
| end = (unsigned char *)ifp->if_data + ifp->if_bytes; |
| |
| ino = xfs_dir2_sf_get_parent_ino(hdr); |
| trace_xrep_dir_salvaged_parent(rd->sc->ip, ino); |
| |
| sfep = xfs_dir2_sf_firstentry(hdr); |
| while ((unsigned char *)sfep < end) { |
| if (xchk_should_terminate(rd->sc, &error)) |
| return error; |
| |
| next = xfs_dir2_sf_nextentry(rd->sc->mp, hdr, sfep); |
| if ((unsigned char *)next > end) |
| break; |
| |
| /* Ok, let's save this entry. */ |
| error = xrep_dir_salvage_sf_entry(rd, hdr, sfep); |
| if (error) |
| return error; |
| |
| sfep = next; |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * Try to figure out the format of this directory from the data fork mappings |
| * and the directory size. If we can be reasonably sure of format, we can be |
| * more aggressive in salvaging directory entries. On return, @magic_guess |
| * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format" |
| * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory, |
| * and 0 if we can't tell. |
| */ |
| STATIC void |
| xrep_dir_guess_format( |
| struct xrep_dir *rd, |
| __be32 *magic_guess) |
| { |
| struct xfs_inode *dp = rd->sc->ip; |
| struct xfs_mount *mp = rd->sc->mp; |
| struct xfs_da_geometry *geo = mp->m_dir_geo; |
| xfs_fileoff_t last; |
| int error; |
| |
| ASSERT(xfs_has_crc(mp)); |
| |
| *magic_guess = 0; |
| |
| /* |
| * If there's a single directory block and the directory size is |
| * exactly one block, this has to be a single block format directory. |
| */ |
| error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK); |
| if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize && |
| dp->i_disk_size == geo->blksize) { |
| *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); |
| return; |
| } |
| |
| /* |
| * If the last extent before the leaf offset matches the directory |
| * size and the directory size is larger than 1 block, this is a |
| * data format directory. |
| */ |
| last = geo->leafblk; |
| error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK); |
| if (!error && |
| XFS_FSB_TO_B(mp, last) > geo->blksize && |
| XFS_FSB_TO_B(mp, last) == dp->i_disk_size) { |
| *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC); |
| return; |
| } |
| } |
| |
| /* Recover directory entries from a specific directory block. */ |
| STATIC int |
| xrep_dir_recover_dirblock( |
| struct xrep_dir *rd, |
| __be32 magic_guess, |
| xfs_dablk_t dabno) |
| { |
| struct xfs_dir2_data_hdr *hdr; |
| struct xfs_buf *bp; |
| __be32 oldmagic; |
| int error; |
| |
| /* |
| * Try to read buffer. We invalidate them in the next step so we don't |
| * bother to set a buffer type or ops. |
| */ |
| error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno, |
| XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL); |
| if (error || !bp) |
| return error; |
| |
| hdr = bp->b_addr; |
| oldmagic = hdr->magic; |
| |
| trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno, |
| be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess)); |
| |
| /* |
| * If we're sure of the block's format, proceed with the salvage |
| * operation using the specified magic number. |
| */ |
| if (magic_guess) { |
| hdr->magic = magic_guess; |
| goto recover; |
| } |
| |
| /* |
| * If we couldn't guess what type of directory this is, then we will |
| * only salvage entries from directory blocks that match the magic |
| * number and pass verifiers. |
| */ |
| switch (hdr->magic) { |
| case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): |
| case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): |
| if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops)) |
| goto out; |
| if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL) |
| goto out; |
| break; |
| case cpu_to_be32(XFS_DIR2_DATA_MAGIC): |
| case cpu_to_be32(XFS_DIR3_DATA_MAGIC): |
| if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops)) |
| goto out; |
| if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL) |
| goto out; |
| break; |
| default: |
| goto out; |
| } |
| |
| recover: |
| error = xrep_dir_recover_data(rd, bp); |
| |
| out: |
| hdr->magic = oldmagic; |
| xfs_trans_brelse(rd->sc->tp, bp); |
| return error; |
| } |
| |
| static inline void |
| xrep_dir_init_args( |
| struct xrep_dir *rd, |
| struct xfs_inode *dp, |
| const struct xfs_name *name) |
| { |
| memset(&rd->args, 0, sizeof(struct xfs_da_args)); |
| rd->args.geo = rd->sc->mp->m_dir_geo; |
| rd->args.whichfork = XFS_DATA_FORK; |
| rd->args.owner = rd->sc->ip->i_ino; |
| rd->args.trans = rd->sc->tp; |
| rd->args.dp = dp; |
| if (!name) |
| return; |
| rd->args.name = name->name; |
| rd->args.namelen = name->len; |
| rd->args.filetype = name->type; |
| rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name); |
| } |
| |
| /* Replay a stashed createname into the temporary directory. */ |
| STATIC int |
| xrep_dir_replay_createname( |
| struct xrep_dir *rd, |
| const struct xfs_name *name, |
| xfs_ino_t inum, |
| xfs_extlen_t total) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| struct xfs_inode *dp = rd->sc->tempip; |
| int error; |
| |
| ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); |
| |
| error = xfs_dir_ino_validate(sc->mp, inum); |
| if (error) |
| return error; |
| |
| trace_xrep_dir_replay_createname(dp, name, inum); |
| |
| xrep_dir_init_args(rd, dp, name); |
| rd->args.inumber = inum; |
| rd->args.total = total; |
| rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; |
| return xfs_dir_createname_args(&rd->args); |
| } |
| |
| /* Replay a stashed removename onto the temporary directory. */ |
| STATIC int |
| xrep_dir_replay_removename( |
| struct xrep_dir *rd, |
| const struct xfs_name *name, |
| xfs_extlen_t total) |
| { |
| struct xfs_inode *dp = rd->args.dp; |
| |
| ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); |
| |
| xrep_dir_init_args(rd, dp, name); |
| rd->args.op_flags = 0; |
| rd->args.total = total; |
| |
| trace_xrep_dir_replay_removename(dp, name, 0); |
| return xfs_dir_removename_args(&rd->args); |
| } |
| |
| /* |
| * Add this stashed incore directory entry to the temporary directory. |
| * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and |
| * must not be in transaction context. |
| */ |
| STATIC int |
| xrep_dir_replay_update( |
| struct xrep_dir *rd, |
| const struct xfs_name *xname, |
| const struct xrep_dirent *dirent) |
| { |
| struct xfs_mount *mp = rd->sc->mp; |
| #ifdef DEBUG |
| xfs_ino_t ino; |
| #endif |
| uint resblks; |
| int error; |
| |
| resblks = xfs_link_space_res(mp, xname->len); |
| error = xchk_trans_alloc(rd->sc, resblks); |
| if (error) |
| return error; |
| |
| /* Lock the temporary directory and join it to the transaction */ |
| xrep_tempfile_ilock(rd->sc); |
| xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0); |
| |
| switch (dirent->action) { |
| case XREP_DIRENT_ADD: |
| /* |
| * Create a replacement dirent in the temporary directory. |
| * Note that _createname doesn't check for existing entries. |
| * There shouldn't be any in the temporary dir, but we'll |
| * verify this in debug mode. |
| */ |
| #ifdef DEBUG |
| error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino); |
| if (error != -ENOENT) { |
| ASSERT(error != -ENOENT); |
| goto out_cancel; |
| } |
| #endif |
| |
| error = xrep_dir_replay_createname(rd, xname, dirent->ino, |
| resblks); |
| if (error) |
| goto out_cancel; |
| |
| if (xname->type == XFS_DIR3_FT_DIR) |
| rd->subdirs++; |
| rd->dirents++; |
| break; |
| case XREP_DIRENT_REMOVE: |
| /* |
| * Remove a dirent from the temporary directory. Note that |
| * _removename doesn't check the inode target of the exist |
| * entry. There should be a perfect match in the temporary |
| * dir, but we'll verify this in debug mode. |
| */ |
| #ifdef DEBUG |
| error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino); |
| if (error) { |
| ASSERT(error != 0); |
| goto out_cancel; |
| } |
| if (ino != dirent->ino) { |
| ASSERT(ino == dirent->ino); |
| error = -EIO; |
| goto out_cancel; |
| } |
| #endif |
| |
| error = xrep_dir_replay_removename(rd, xname, resblks); |
| if (error) |
| goto out_cancel; |
| |
| if (xname->type == XFS_DIR3_FT_DIR) |
| rd->subdirs--; |
| rd->dirents--; |
| break; |
| default: |
| ASSERT(0); |
| error = -EIO; |
| goto out_cancel; |
| } |
| |
| /* Commit and unlock. */ |
| error = xrep_trans_commit(rd->sc); |
| if (error) |
| return error; |
| |
| xrep_tempfile_iunlock(rd->sc); |
| return 0; |
| out_cancel: |
| xchk_trans_cancel(rd->sc); |
| xrep_tempfile_iunlock(rd->sc); |
| return error; |
| } |
| |
| /* |
| * Flush stashed incore dirent updates that have been recorded by the scanner. |
| * This is done to reduce the memory requirements of the directory rebuild, |
| * since directories can contain up to 32GB of directory data. |
| * |
| * Caller must not hold transactions or ILOCKs. Caller must hold the tempdir |
| * IOLOCK. |
| */ |
| STATIC int |
| xrep_dir_replay_updates( |
| struct xrep_dir *rd) |
| { |
| xfarray_idx_t array_cur; |
| int error; |
| |
| /* Add all the salvaged dirents to the temporary directory. */ |
| mutex_lock(&rd->pscan.lock); |
| foreach_xfarray_idx(rd->dir_entries, array_cur) { |
| struct xrep_dirent dirent; |
| |
| error = xfarray_load(rd->dir_entries, array_cur, &dirent); |
| if (error) |
| goto out_unlock; |
| |
| error = xfblob_loadname(rd->dir_names, dirent.name_cookie, |
| &rd->xname, dirent.namelen); |
| if (error) |
| goto out_unlock; |
| rd->xname.type = dirent.ftype; |
| mutex_unlock(&rd->pscan.lock); |
| |
| error = xrep_dir_replay_update(rd, &rd->xname, &dirent); |
| if (error) |
| return error; |
| mutex_lock(&rd->pscan.lock); |
| } |
| |
| /* Empty out both arrays now that we've added the entries. */ |
| xfarray_truncate(rd->dir_entries); |
| xfblob_truncate(rd->dir_names); |
| mutex_unlock(&rd->pscan.lock); |
| return 0; |
| out_unlock: |
| mutex_unlock(&rd->pscan.lock); |
| return error; |
| } |
| |
| /* |
| * Periodically flush stashed directory entries to the temporary dir. This |
| * is done to reduce the memory requirements of the directory rebuild, since |
| * directories can contain up to 32GB of directory data. |
| */ |
| STATIC int |
| xrep_dir_flush_stashed( |
| struct xrep_dir *rd) |
| { |
| int error; |
| |
| /* |
| * Entering this function, the scrub context has a reference to the |
| * inode being repaired, the temporary file, and a scrub transaction |
| * that we use during dirent salvaging to avoid livelocking if there |
| * are cycles in the directory structures. We hold ILOCK_EXCL on both |
| * the inode being repaired and the temporary file, though they are |
| * not ijoined to the scrub transaction. |
| * |
| * To constrain kernel memory use, we occasionally write salvaged |
| * dirents from the xfarray and xfblob structures into the temporary |
| * directory in preparation for exchanging the directory structures at |
| * the end. Updating the temporary file requires a transaction, so we |
| * commit the scrub transaction and drop the two ILOCKs so that |
| * we can allocate whatever transaction we want. |
| * |
| * We still hold IOLOCK_EXCL on the inode being repaired, which |
| * prevents anyone from accessing the damaged directory data while we |
| * repair it. |
| */ |
| error = xrep_trans_commit(rd->sc); |
| if (error) |
| return error; |
| xchk_iunlock(rd->sc, XFS_ILOCK_EXCL); |
| |
| /* |
| * Take the IOLOCK of the temporary file while we modify dirents. This |
| * isn't strictly required because the temporary file is never revealed |
| * to userspace, but we follow the same locking rules. We still hold |
| * sc->ip's IOLOCK. |
| */ |
| error = xrep_tempfile_iolock_polled(rd->sc); |
| if (error) |
| return error; |
| |
| /* Write to the tempdir all the updates that we've stashed. */ |
| error = xrep_dir_replay_updates(rd); |
| xrep_tempfile_iounlock(rd->sc); |
| if (error) |
| return error; |
| |
| /* |
| * Recreate the salvage transaction and relock the dir we're salvaging. |
| */ |
| error = xchk_trans_alloc(rd->sc, 0); |
| if (error) |
| return error; |
| xchk_ilock(rd->sc, XFS_ILOCK_EXCL); |
| return 0; |
| } |
| |
| /* Decide if we've stashed too much dirent data in memory. */ |
| static inline bool |
| xrep_dir_want_flush_stashed( |
| struct xrep_dir *rd) |
| { |
| unsigned long long bytes; |
| |
| bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names); |
| return bytes > XREP_DIR_MAX_STASH_BYTES; |
| } |
| |
| /* Extract as many directory entries as we can. */ |
| STATIC int |
| xrep_dir_recover( |
| struct xrep_dir *rd) |
| { |
| struct xfs_bmbt_irec got; |
| struct xfs_scrub *sc = rd->sc; |
| struct xfs_da_geometry *geo = sc->mp->m_dir_geo; |
| xfs_fileoff_t offset; |
| xfs_dablk_t dabno; |
| __be32 magic_guess; |
| int nmap; |
| int error; |
| |
| xrep_dir_guess_format(rd, &magic_guess); |
| |
| /* Iterate each directory data block in the data fork. */ |
| for (offset = 0; |
| offset < geo->leafblk; |
| offset = got.br_startoff + got.br_blockcount) { |
| nmap = 1; |
| error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset, |
| &got, &nmap, 0); |
| if (error) |
| return error; |
| if (nmap != 1) |
| return -EFSCORRUPTED; |
| if (!xfs_bmap_is_written_extent(&got)) |
| continue; |
| |
| for (dabno = round_up(got.br_startoff, geo->fsbcount); |
| dabno < got.br_startoff + got.br_blockcount; |
| dabno += geo->fsbcount) { |
| if (xchk_should_terminate(rd->sc, &error)) |
| return error; |
| |
| error = xrep_dir_recover_dirblock(rd, |
| magic_guess, dabno); |
| if (error) |
| return error; |
| |
| /* Flush dirents to constrain memory usage. */ |
| if (xrep_dir_want_flush_stashed(rd)) { |
| error = xrep_dir_flush_stashed(rd); |
| if (error) |
| return error; |
| } |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * Find all the directory entries for this inode by scraping them out of the |
| * directory leaf blocks by hand, and flushing them into the temp dir. |
| */ |
| STATIC int |
| xrep_dir_find_entries( |
| struct xrep_dir *rd) |
| { |
| struct xfs_inode *dp = rd->sc->ip; |
| int error; |
| |
| /* |
| * Salvage directory entries from the old directory, and write them to |
| * the temporary directory. |
| */ |
| if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { |
| error = xrep_dir_recover_sf(rd); |
| } else { |
| error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK); |
| if (error) |
| return error; |
| |
| error = xrep_dir_recover(rd); |
| } |
| if (error) |
| return error; |
| |
| return xrep_dir_flush_stashed(rd); |
| } |
| |
| /* Scan all files in the filesystem for dirents. */ |
| STATIC int |
| xrep_dir_salvage_entries( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| int error; |
| |
| /* |
| * Drop the ILOCK on this directory so that we can scan for this |
| * directory's parent. Figure out who is going to be the parent of |
| * this directory, then retake the ILOCK so that we can salvage |
| * directory entries. |
| */ |
| xchk_iunlock(sc, XFS_ILOCK_EXCL); |
| error = xrep_dir_find_parent(rd); |
| xchk_ilock(sc, XFS_ILOCK_EXCL); |
| if (error) |
| return error; |
| |
| /* |
| * Collect directory entries by parsing raw leaf blocks to salvage |
| * whatever we can. When we're done, free the staging memory before |
| * exchanging the directories to reduce memory usage. |
| */ |
| error = xrep_dir_find_entries(rd); |
| if (error) |
| return error; |
| |
| /* |
| * Cancel the repair transaction and drop the ILOCK so that we can |
| * (later) use the atomic mapping exchange functions to compute the |
| * correct block reservations and re-lock the inodes. |
| * |
| * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory |
| * modifications, but there's nothing to prevent userspace from reading |
| * the directory until we're ready for the exchange operation. Reads |
| * will return -EIO without shutting down the fs, so we're ok with |
| * that. |
| * |
| * The VFS can change dotdot on us, but the findparent scan will keep |
| * our incore parent inode up to date. See the note on locking issues |
| * for more details. |
| */ |
| error = xrep_trans_commit(sc); |
| if (error) |
| return error; |
| |
| xchk_iunlock(sc, XFS_ILOCK_EXCL); |
| return 0; |
| } |
| |
| |
| /* |
| * Examine a parent pointer of a file. If it leads us back to the directory |
| * that we're rebuilding, create an incore dirent from the parent pointer and |
| * stash it. |
| */ |
| STATIC int |
| xrep_dir_scan_pptr( |
| struct xfs_scrub *sc, |
| struct xfs_inode *ip, |
| unsigned int attr_flags, |
| const unsigned char *name, |
| unsigned int namelen, |
| const void *value, |
| unsigned int valuelen, |
| void *priv) |
| { |
| struct xfs_name xname = { |
| .name = name, |
| .len = namelen, |
| .type = xfs_mode_to_ftype(VFS_I(ip)->i_mode), |
| }; |
| xfs_ino_t parent_ino; |
| uint32_t parent_gen; |
| struct xrep_dir *rd = priv; |
| int error; |
| |
| if (!(attr_flags & XFS_ATTR_PARENT)) |
| return 0; |
| |
| /* |
| * Ignore parent pointers that point back to a different dir, list the |
| * wrong generation number, or are invalid. |
| */ |
| error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value, |
| valuelen, &parent_ino, &parent_gen); |
| if (error) |
| return error; |
| |
| if (parent_ino != sc->ip->i_ino || |
| parent_gen != VFS_I(sc->ip)->i_generation) |
| return 0; |
| |
| mutex_lock(&rd->pscan.lock); |
| error = xrep_dir_stash_createname(rd, &xname, ip->i_ino); |
| mutex_unlock(&rd->pscan.lock); |
| return error; |
| } |
| |
| /* |
| * If this child dirent points to the directory being repaired, remember that |
| * fact so that we can reset the dotdot entry if necessary. |
| */ |
| STATIC int |
| xrep_dir_scan_dirent( |
| struct xfs_scrub *sc, |
| struct xfs_inode *dp, |
| xfs_dir2_dataptr_t dapos, |
| const struct xfs_name *name, |
| xfs_ino_t ino, |
| void *priv) |
| { |
| struct xrep_dir *rd = priv; |
| |
| /* Dirent doesn't point to this directory. */ |
| if (ino != rd->sc->ip->i_ino) |
| return 0; |
| |
| /* Ignore garbage inum. */ |
| if (!xfs_verify_dir_ino(rd->sc->mp, ino)) |
| return 0; |
| |
| /* No weird looking names. */ |
| if (name->len >= MAXNAMELEN || name->len <= 0) |
| return 0; |
| |
| /* Don't pick up dot or dotdot entries; we only want child dirents. */ |
| if (xfs_dir2_samename(name, &xfs_name_dotdot) || |
| xfs_dir2_samename(name, &xfs_name_dot)) |
| return 0; |
| |
| trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot, |
| dp->i_ino); |
| |
| xrep_findparent_scan_found(&rd->pscan, dp->i_ino); |
| return 0; |
| } |
| |
| /* |
| * Decide if we want to look for child dirents or parent pointers in this file. |
| * Skip the dir being repaired and any files being used to stage repairs. |
| */ |
| static inline bool |
| xrep_dir_want_scan( |
| struct xrep_dir *rd, |
| const struct xfs_inode *ip) |
| { |
| return ip != rd->sc->ip && !xrep_is_tempfile(ip); |
| } |
| |
| /* |
| * Take ILOCK on a file that we want to scan. |
| * |
| * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or |
| * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED. |
| */ |
| static inline unsigned int |
| xrep_dir_scan_ilock( |
| struct xrep_dir *rd, |
| struct xfs_inode *ip) |
| { |
| uint lock_mode = XFS_ILOCK_SHARED; |
| |
| /* Need to take the shared ILOCK to advance the iscan cursor. */ |
| if (!xrep_dir_want_scan(rd, ip)) |
| goto lock; |
| |
| if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) { |
| lock_mode = XFS_ILOCK_EXCL; |
| goto lock; |
| } |
| |
| if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af)) |
| lock_mode = XFS_ILOCK_EXCL; |
| |
| lock: |
| xfs_ilock(ip, lock_mode); |
| return lock_mode; |
| } |
| |
| /* |
| * Scan this file for relevant child dirents or parent pointers that point to |
| * the directory we're rebuilding. |
| */ |
| STATIC int |
| xrep_dir_scan_file( |
| struct xrep_dir *rd, |
| struct xfs_inode *ip) |
| { |
| unsigned int lock_mode; |
| int error = 0; |
| |
| lock_mode = xrep_dir_scan_ilock(rd, ip); |
| |
| if (!xrep_dir_want_scan(rd, ip)) |
| goto scan_done; |
| |
| /* |
| * If the extended attributes look as though they has been zapped by |
| * the inode record repair code, we cannot scan for parent pointers. |
| */ |
| if (xchk_pptr_looks_zapped(ip)) { |
| error = -EBUSY; |
| goto scan_done; |
| } |
| |
| error = xchk_xattr_walk(rd->sc, ip, xrep_dir_scan_pptr, NULL, rd); |
| if (error) |
| goto scan_done; |
| |
| if (S_ISDIR(VFS_I(ip)->i_mode)) { |
| /* |
| * If the directory looks as though it has been zapped by the |
| * inode record repair code, we cannot scan for child dirents. |
| */ |
| if (xchk_dir_looks_zapped(ip)) { |
| error = -EBUSY; |
| goto scan_done; |
| } |
| |
| error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd); |
| if (error) |
| goto scan_done; |
| } |
| |
| scan_done: |
| xchk_iscan_mark_visited(&rd->pscan.iscan, ip); |
| xfs_iunlock(ip, lock_mode); |
| return error; |
| } |
| |
| /* |
| * Scan all files in the filesystem for parent pointers that we can turn into |
| * replacement dirents, and a dirent that we can use to set the dotdot pointer. |
| */ |
| STATIC int |
| xrep_dir_scan_dirtree( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| struct xfs_inode *ip; |
| int error; |
| |
| /* Roots of directory trees are their own parents. */ |
| if (xchk_inode_is_dirtree_root(sc->ip)) |
| xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino); |
| |
| /* |
| * Filesystem scans are time consuming. Drop the directory ILOCK and |
| * all other resources for the duration of the scan and hope for the |
| * best. The live update hooks will keep our scan information up to |
| * date even though we've dropped the locks. |
| */ |
| xchk_trans_cancel(sc); |
| if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) |
| xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED | |
| XFS_ILOCK_EXCL)); |
| error = xchk_trans_alloc_empty(sc); |
| if (error) |
| return error; |
| |
| while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) { |
| bool flush; |
| |
| error = xrep_dir_scan_file(rd, ip); |
| xchk_irele(sc, ip); |
| if (error) |
| break; |
| |
| /* Flush stashed dirent updates to constrain memory usage. */ |
| mutex_lock(&rd->pscan.lock); |
| flush = xrep_dir_want_flush_stashed(rd); |
| mutex_unlock(&rd->pscan.lock); |
| if (flush) { |
| xchk_trans_cancel(sc); |
| |
| error = xrep_tempfile_iolock_polled(sc); |
| if (error) |
| break; |
| |
| error = xrep_dir_replay_updates(rd); |
| xrep_tempfile_iounlock(sc); |
| if (error) |
| break; |
| |
| error = xchk_trans_alloc_empty(sc); |
| if (error) |
| break; |
| } |
| |
| if (xchk_should_terminate(sc, &error)) |
| break; |
| } |
| xchk_iscan_iter_finish(&rd->pscan.iscan); |
| if (error) { |
| /* |
| * If we couldn't grab an inode that was busy with a state |
| * change, change the error code so that we exit to userspace |
| * as quickly as possible. |
| */ |
| if (error == -EBUSY) |
| return -ECANCELED; |
| return error; |
| } |
| |
| /* |
| * Cancel the empty transaction so that we can (later) use the atomic |
| * file mapping exchange functions to lock files and commit the new |
| * directory. |
| */ |
| xchk_trans_cancel(rd->sc); |
| return 0; |
| } |
| |
| /* |
| * Capture dirent updates being made by other threads which are relevant to the |
| * directory being repaired. |
| */ |
| STATIC int |
| xrep_dir_live_update( |
| struct notifier_block *nb, |
| unsigned long action, |
| void *data) |
| { |
| struct xfs_dir_update_params *p = data; |
| struct xrep_dir *rd; |
| struct xfs_scrub *sc; |
| int error = 0; |
| |
| rd = container_of(nb, struct xrep_dir, pscan.dhook.dirent_hook.nb); |
| sc = rd->sc; |
| |
| /* |
| * This thread updated a child dirent in the directory that we're |
| * rebuilding. Stash the update for replay against the temporary |
| * directory. |
| */ |
| if (p->dp->i_ino == sc->ip->i_ino && |
| xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) { |
| mutex_lock(&rd->pscan.lock); |
| if (p->delta > 0) |
| error = xrep_dir_stash_createname(rd, p->name, |
| p->ip->i_ino); |
| else |
| error = xrep_dir_stash_removename(rd, p->name, |
| p->ip->i_ino); |
| mutex_unlock(&rd->pscan.lock); |
| if (error) |
| goto out_abort; |
| } |
| |
| /* |
| * This thread updated another directory's child dirent that points to |
| * the directory that we're rebuilding, so remember the new dotdot |
| * target. |
| */ |
| if (p->ip->i_ino == sc->ip->i_ino && |
| xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) { |
| if (p->delta > 0) { |
| trace_xrep_dir_stash_createname(sc->tempip, |
| &xfs_name_dotdot, |
| p->dp->i_ino); |
| |
| xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino); |
| } else { |
| trace_xrep_dir_stash_removename(sc->tempip, |
| &xfs_name_dotdot, |
| rd->pscan.parent_ino); |
| |
| xrep_findparent_scan_found(&rd->pscan, NULLFSINO); |
| } |
| } |
| |
| return NOTIFY_DONE; |
| out_abort: |
| xchk_iscan_abort(&rd->pscan.iscan); |
| return NOTIFY_DONE; |
| } |
| |
| /* |
| * Free all the directory blocks and reset the data fork. The caller must |
| * join the inode to the transaction. This function returns with the inode |
| * joined to a clean scrub transaction. |
| */ |
| STATIC int |
| xrep_dir_reset_fork( |
| struct xrep_dir *rd, |
| xfs_ino_t parent_ino) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| struct xfs_ifork *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK); |
| int error; |
| |
| /* Unmap all the directory buffers. */ |
| if (xfs_ifork_has_extents(ifp)) { |
| error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); |
| if (error) |
| return error; |
| } |
| |
| trace_xrep_dir_reset_fork(sc->tempip, parent_ino); |
| |
| /* Reset the data fork to an empty data fork. */ |
| xfs_idestroy_fork(ifp); |
| ifp->if_bytes = 0; |
| sc->tempip->i_disk_size = 0; |
| |
| /* Reinitialize the short form directory. */ |
| xrep_dir_init_args(rd, sc->tempip, NULL); |
| return xfs_dir2_sf_create(&rd->args, parent_ino); |
| } |
| |
| /* |
| * Prepare both inodes' directory forks for exchanging mappings. Promote the |
| * tempfile from short format to leaf format, and if the file being repaired |
| * has a short format data fork, turn it into an empty extent list. |
| */ |
| STATIC int |
| xrep_dir_swap_prep( |
| struct xfs_scrub *sc, |
| bool temp_local, |
| bool ip_local) |
| { |
| int error; |
| |
| /* |
| * If the tempfile's directory is in shortform format, convert that to |
| * a single leaf extent so that we can use the atomic mapping exchange. |
| */ |
| if (temp_local) { |
| struct xfs_da_args args = { |
| .dp = sc->tempip, |
| .geo = sc->mp->m_dir_geo, |
| .whichfork = XFS_DATA_FORK, |
| .trans = sc->tp, |
| .total = 1, |
| .owner = sc->ip->i_ino, |
| }; |
| |
| error = xfs_dir2_sf_to_block(&args); |
| if (error) |
| return error; |
| |
| /* |
| * Roll the deferred log items to get us back to a clean |
| * transaction. |
| */ |
| error = xfs_defer_finish(&sc->tp); |
| if (error) |
| return error; |
| } |
| |
| /* |
| * If the file being repaired had a shortform data fork, convert that |
| * to an empty extent list in preparation for the atomic mapping |
| * exchange. |
| */ |
| if (ip_local) { |
| struct xfs_ifork *ifp; |
| |
| ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); |
| xfs_idestroy_fork(ifp); |
| ifp->if_format = XFS_DINODE_FMT_EXTENTS; |
| ifp->if_nextents = 0; |
| ifp->if_bytes = 0; |
| ifp->if_data = NULL; |
| ifp->if_height = 0; |
| |
| xfs_trans_log_inode(sc->tp, sc->ip, |
| XFS_ILOG_CORE | XFS_ILOG_DDATA); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * Replace the inode number of a directory entry. |
| */ |
| static int |
| xrep_dir_replace( |
| struct xrep_dir *rd, |
| struct xfs_inode *dp, |
| const struct xfs_name *name, |
| xfs_ino_t inum, |
| xfs_extlen_t total) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| int error; |
| |
| ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); |
| |
| error = xfs_dir_ino_validate(sc->mp, inum); |
| if (error) |
| return error; |
| |
| xrep_dir_init_args(rd, dp, name); |
| rd->args.inumber = inum; |
| rd->args.total = total; |
| return xfs_dir_replace_args(&rd->args); |
| } |
| |
| /* |
| * Reset the link count of this directory and adjust the unlinked list pointers |
| * as needed. |
| */ |
| STATIC int |
| xrep_dir_set_nlink( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| struct xfs_inode *dp = sc->ip; |
| struct xfs_perag *pag; |
| unsigned int new_nlink = min_t(unsigned long long, |
| rd->subdirs + 2, |
| XFS_NLINK_PINNED); |
| int error; |
| |
| /* |
| * The directory is not on the incore unlinked list, which means that |
| * it needs to be reachable via the directory tree. Update the nlink |
| * with our observed link count. If the directory has no parent, it |
| * will be moved to the orphanage. |
| */ |
| if (!xfs_inode_on_unlinked_list(dp)) |
| goto reset_nlink; |
| |
| /* |
| * The directory is on the unlinked list and we did not find any |
| * dirents. Set the link count to zero and let the directory |
| * inactivate when the last reference drops. |
| */ |
| if (rd->dirents == 0) { |
| rd->needs_adoption = false; |
| new_nlink = 0; |
| goto reset_nlink; |
| } |
| |
| /* |
| * The directory is on the unlinked list and we found dirents. This |
| * directory needs to be reachable via the directory tree. Remove the |
| * dir from the unlinked list and update nlink with the observed link |
| * count. If the directory has no parent, it will be moved to the |
| * orphanage. |
| */ |
| pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino)); |
| if (!pag) { |
| ASSERT(0); |
| return -EFSCORRUPTED; |
| } |
| |
| error = xfs_iunlink_remove(sc->tp, pag, dp); |
| xfs_perag_put(pag); |
| if (error) |
| return error; |
| |
| reset_nlink: |
| if (VFS_I(dp)->i_nlink != new_nlink) |
| set_nlink(VFS_I(dp), new_nlink); |
| return 0; |
| } |
| |
| /* |
| * Finish replaying stashed dirent updates, allocate a transaction for |
| * exchanging data fork mappings, and take the ILOCKs of both directories |
| * before we commit the new directory structure. |
| */ |
| STATIC int |
| xrep_dir_finalize_tempdir( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| int error; |
| |
| if (!xfs_has_parent(sc->mp)) |
| return xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx); |
| |
| /* |
| * Repair relies on the ILOCK to quiesce all possible dirent updates. |
| * Replay all queued dirent updates into the tempdir before exchanging |
| * the contents, even if that means dropping the ILOCKs and the |
| * transaction. |
| */ |
| do { |
| error = xrep_dir_replay_updates(rd); |
| if (error) |
| return error; |
| |
| error = xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx); |
| if (error) |
| return error; |
| |
| if (xfarray_length(rd->dir_entries) == 0) |
| break; |
| |
| xchk_trans_cancel(sc); |
| xrep_tempfile_iunlock_both(sc); |
| } while (!xchk_should_terminate(sc, &error)); |
| return error; |
| } |
| |
| /* Exchange the temporary directory's data fork with the one being repaired. */ |
| STATIC int |
| xrep_dir_swap( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| xfs_ino_t ino; |
| bool ip_local, temp_local; |
| int error = 0; |
| |
| /* |
| * If we never found the parent for this directory, temporarily assign |
| * the root dir as the parent; we'll move this to the orphanage after |
| * exchanging the dir contents. We hold the ILOCK of the dir being |
| * repaired, so we're not worried about racy updates of dotdot. |
| */ |
| ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); |
| if (rd->pscan.parent_ino == NULLFSINO) { |
| rd->needs_adoption = true; |
| rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino; |
| } |
| |
| /* |
| * Reset the temporary directory's '..' entry to point to the parent |
| * that we found. The dirent replace code asserts if the dirent |
| * already points at the new inumber, so we look it up here. |
| * |
| * It's also possible that this replacement could also expand a sf |
| * tempdir into block format. |
| */ |
| error = xchk_dir_lookup(sc, rd->sc->tempip, &xfs_name_dotdot, &ino); |
| if (error) |
| return error; |
| |
| if (rd->pscan.parent_ino != ino) { |
| error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot, |
| rd->pscan.parent_ino, rd->tx.req.resblks); |
| if (error) |
| return error; |
| } |
| |
| /* |
| * Changing the dot and dotdot entries could have changed the shape of |
| * the directory, so we recompute these. |
| */ |
| ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL; |
| temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL; |
| |
| /* |
| * If the both files have a local format data fork and the rebuilt |
| * directory data would fit in the repaired file's data fork, copy |
| * the contents from the tempfile and update the directory link count. |
| * We're done now. |
| */ |
| if (ip_local && temp_local && |
| sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) { |
| xrep_tempfile_copyout_local(sc, XFS_DATA_FORK); |
| return xrep_dir_set_nlink(rd); |
| } |
| |
| /* |
| * Clean the transaction before we start working on exchanging |
| * directory contents. |
| */ |
| error = xrep_tempfile_roll_trans(rd->sc); |
| if (error) |
| return error; |
| |
| /* Otherwise, make sure both data forks are in block-mapping mode. */ |
| error = xrep_dir_swap_prep(sc, temp_local, ip_local); |
| if (error) |
| return error; |
| |
| /* |
| * Set nlink of the directory in the same transaction sequence that |
| * (atomically) commits the new directory data. |
| */ |
| error = xrep_dir_set_nlink(rd); |
| if (error) |
| return error; |
| |
| return xrep_tempexch_contents(sc, &rd->tx); |
| } |
| |
| /* |
| * Exchange the new directory contents (which we created in the tempfile) with |
| * the directory being repaired. |
| */ |
| STATIC int |
| xrep_dir_rebuild_tree( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| int error; |
| |
| trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino); |
| |
| /* |
| * Take the IOLOCK on the temporary file so that we can run dir |
| * operations with the same locks held as we would for a normal file. |
| * We still hold sc->ip's IOLOCK. |
| */ |
| error = xrep_tempfile_iolock_polled(rd->sc); |
| if (error) |
| return error; |
| |
| /* |
| * Allocate transaction, lock inodes, and make sure that we've replayed |
| * all the stashed dirent updates to the tempdir. After this point, |
| * we're ready to exchange data fork mappings. |
| */ |
| error = xrep_dir_finalize_tempdir(rd); |
| if (error) |
| return error; |
| |
| if (xchk_iscan_aborted(&rd->pscan.iscan)) |
| return -ECANCELED; |
| |
| /* |
| * Exchange the tempdir's data fork with the file being repaired. This |
| * recreates the transaction and re-takes the ILOCK in the scrub |
| * context. |
| */ |
| error = xrep_dir_swap(rd); |
| if (error) |
| return error; |
| |
| /* |
| * Release the old directory blocks and reset the data fork of the temp |
| * directory to an empty shortform directory because inactivation does |
| * nothing for directories. |
| */ |
| error = xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino); |
| if (error) |
| return error; |
| |
| /* |
| * Roll to get a transaction without any inodes joined to it. Then we |
| * can drop the tempfile's ILOCK and IOLOCK before doing more work on |
| * the scrub target directory. |
| */ |
| error = xfs_trans_roll(&sc->tp); |
| if (error) |
| return error; |
| |
| xrep_tempfile_iunlock(sc); |
| xrep_tempfile_iounlock(sc); |
| return 0; |
| } |
| |
| /* Set up the filesystem scan so we can regenerate directory entries. */ |
| STATIC int |
| xrep_dir_setup_scan( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| char *descr; |
| int error; |
| |
| /* Set up some staging memory for salvaging dirents. */ |
| descr = xchk_xfile_ino_descr(sc, "directory entries"); |
| error = xfarray_create(descr, 0, sizeof(struct xrep_dirent), |
| &rd->dir_entries); |
| kfree(descr); |
| if (error) |
| return error; |
| |
| descr = xchk_xfile_ino_descr(sc, "directory entry names"); |
| error = xfblob_create(descr, &rd->dir_names); |
| kfree(descr); |
| if (error) |
| goto out_xfarray; |
| |
| if (xfs_has_parent(sc->mp)) |
| error = __xrep_findparent_scan_start(sc, &rd->pscan, |
| xrep_dir_live_update); |
| else |
| error = xrep_findparent_scan_start(sc, &rd->pscan); |
| if (error) |
| goto out_xfblob; |
| |
| return 0; |
| |
| out_xfblob: |
| xfblob_destroy(rd->dir_names); |
| rd->dir_names = NULL; |
| out_xfarray: |
| xfarray_destroy(rd->dir_entries); |
| rd->dir_entries = NULL; |
| return error; |
| } |
| |
| /* |
| * Move the current file to the orphanage. |
| * |
| * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon |
| * successful return, the scrub transaction will have enough extra reservation |
| * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the |
| * orphanage; and both inodes will be ijoined. |
| */ |
| STATIC int |
| xrep_dir_move_to_orphanage( |
| struct xrep_dir *rd) |
| { |
| struct xfs_scrub *sc = rd->sc; |
| xfs_ino_t orig_parent, new_parent; |
| int error; |
| |
| /* |
| * We are about to drop the ILOCK on sc->ip to lock the orphanage and |
| * prepare for the adoption. Therefore, look up the old dotdot entry |
| * for sc->ip so that we can compare it after we re-lock sc->ip. |
| */ |
| error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent); |
| if (error) |
| return error; |
| |
| /* |
| * Drop the ILOCK on the scrub target and commit the transaction. |
| * Adoption computes its own resource requirements and gathers the |
| * necessary components. |
| */ |
| error = xrep_trans_commit(sc); |
| if (error) |
| return error; |
| xchk_iunlock(sc, XFS_ILOCK_EXCL); |
| |
| /* If we can take the orphanage's iolock then we're ready to move. */ |
| if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) { |
| xchk_iunlock(sc, sc->ilock_flags); |
| error = xrep_orphanage_iolock_two(sc); |
| if (error) |
| return error; |
| } |
| |
| /* Grab transaction and ILOCK the two files. */ |
| error = xrep_adoption_trans_alloc(sc, &rd->adoption); |
| if (error) |
| return error; |
| |
| error = xrep_adoption_compute_name(&rd->adoption, &rd->xname); |
| if (error) |
| return error; |
| |
| /* |
| * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot |
| * entry again. If the parent changed or the child was unlinked while |
| * the child directory was unlocked, we don't need to move the child to |
| * the orphanage after all. |
| */ |
| error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent); |
| if (error) |
| return error; |
| |
| /* |
| * Attach to the orphanage if we still have a linked directory and it |
| * hasn't been moved. |
| */ |
| if (orig_parent == new_parent && VFS_I(sc->ip)->i_nlink > 0) { |
| error = xrep_adoption_move(&rd->adoption); |
| if (error) |
| return error; |
| } |
| |
| /* |
| * Launder the scrub transaction so we can drop the orphanage ILOCK |
| * and IOLOCK. Return holding the scrub target's ILOCK and IOLOCK. |
| */ |
| error = xrep_adoption_trans_roll(&rd->adoption); |
| if (error) |
| return error; |
| |
| xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL); |
| xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); |
| return 0; |
| } |
| |
| /* |
| * Repair the directory metadata. |
| * |
| * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer |
| * cache in XFS can't handle aliased multiblock buffers, so this might |
| * misbehave if the directory blocks are crosslinked with other filesystem |
| * metadata. |
| * |
| * XXX: Is it necessary to check the dcache for this directory to make sure |
| * that we always recreate every cached entry? |
| */ |
| int |
| xrep_directory( |
| struct xfs_scrub *sc) |
| { |
| struct xrep_dir *rd = sc->buf; |
| int error; |
| |
| /* The rmapbt is required to reap the old data fork. */ |
| if (!xfs_has_rmapbt(sc->mp)) |
| return -EOPNOTSUPP; |
| /* We require atomic file exchange range to rebuild anything. */ |
| if (!xfs_has_exchange_range(sc->mp)) |
| return -EOPNOTSUPP; |
| |
| error = xrep_dir_setup_scan(rd); |
| if (error) |
| return error; |
| |
| if (xfs_has_parent(sc->mp)) |
| error = xrep_dir_scan_dirtree(rd); |
| else |
| error = xrep_dir_salvage_entries(rd); |
| if (error) |
| goto out_teardown; |
| |
| /* Last chance to abort before we start committing fixes. */ |
| if (xchk_should_terminate(sc, &error)) |
| goto out_teardown; |
| |
| error = xrep_dir_rebuild_tree(rd); |
| if (error) |
| goto out_teardown; |
| |
| if (rd->needs_adoption) { |
| if (!xrep_orphanage_can_adopt(rd->sc)) |
| error = -EFSCORRUPTED; |
| else |
| error = xrep_dir_move_to_orphanage(rd); |
| if (error) |
| goto out_teardown; |
| } |
| |
| out_teardown: |
| xrep_dir_teardown(sc); |
| return error; |
| } |