| /* SPDX-License-Identifier: GPL-2.0 */ |
| /* |
| * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
| * Copyright (c) 2018 Red Hat, Inc. |
| * All rights reserved. |
| */ |
| |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_bit.h" |
| #include "xfs_sb.h" |
| #include "xfs_mount.h" |
| #include "xfs_btree.h" |
| #include "xfs_alloc_btree.h" |
| #include "xfs_rmap_btree.h" |
| #include "xfs_alloc.h" |
| #include "xfs_ialloc.h" |
| #include "xfs_rmap.h" |
| #include "xfs_ag.h" |
| #include "xfs_ag_resv.h" |
| #include "xfs_health.h" |
| #include "xfs_error.h" |
| #include "xfs_bmap.h" |
| #include "xfs_defer.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans.h" |
| #include "xfs_trace.h" |
| #include "xfs_inode.h" |
| #include "xfs_icache.h" |
| |
| |
| /* |
| * Passive reference counting access wrappers to the perag structures. If the |
| * per-ag structure is to be freed, the freeing code is responsible for cleaning |
| * up objects with passive references before freeing the structure. This is |
| * things like cached buffers. |
| */ |
| struct xfs_perag * |
| xfs_perag_get( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agno) |
| { |
| struct xfs_perag *pag; |
| |
| rcu_read_lock(); |
| pag = xa_load(&mp->m_perags, agno); |
| if (pag) { |
| trace_xfs_perag_get(pag, _RET_IP_); |
| ASSERT(atomic_read(&pag->pag_ref) >= 0); |
| atomic_inc(&pag->pag_ref); |
| } |
| rcu_read_unlock(); |
| return pag; |
| } |
| |
| /* Get a passive reference to the given perag. */ |
| struct xfs_perag * |
| xfs_perag_hold( |
| struct xfs_perag *pag) |
| { |
| ASSERT(atomic_read(&pag->pag_ref) > 0 || |
| atomic_read(&pag->pag_active_ref) > 0); |
| |
| trace_xfs_perag_hold(pag, _RET_IP_); |
| atomic_inc(&pag->pag_ref); |
| return pag; |
| } |
| |
| void |
| xfs_perag_put( |
| struct xfs_perag *pag) |
| { |
| trace_xfs_perag_put(pag, _RET_IP_); |
| ASSERT(atomic_read(&pag->pag_ref) > 0); |
| atomic_dec(&pag->pag_ref); |
| } |
| |
| /* |
| * Active references for perag structures. This is for short term access to the |
| * per ag structures for walking trees or accessing state. If an AG is being |
| * shrunk or is offline, then this will fail to find that AG and return NULL |
| * instead. |
| */ |
| struct xfs_perag * |
| xfs_perag_grab( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agno) |
| { |
| struct xfs_perag *pag; |
| |
| rcu_read_lock(); |
| pag = xa_load(&mp->m_perags, agno); |
| if (pag) { |
| trace_xfs_perag_grab(pag, _RET_IP_); |
| if (!atomic_inc_not_zero(&pag->pag_active_ref)) |
| pag = NULL; |
| } |
| rcu_read_unlock(); |
| return pag; |
| } |
| |
| void |
| xfs_perag_rele( |
| struct xfs_perag *pag) |
| { |
| trace_xfs_perag_rele(pag, _RET_IP_); |
| if (atomic_dec_and_test(&pag->pag_active_ref)) |
| wake_up(&pag->pag_active_wq); |
| } |
| |
| /* |
| * xfs_initialize_perag_data |
| * |
| * Read in each per-ag structure so we can count up the number of |
| * allocated inodes, free inodes and used filesystem blocks as this |
| * information is no longer persistent in the superblock. Once we have |
| * this information, write it into the in-core superblock structure. |
| */ |
| int |
| xfs_initialize_perag_data( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agcount) |
| { |
| xfs_agnumber_t index; |
| struct xfs_perag *pag; |
| struct xfs_sb *sbp = &mp->m_sb; |
| uint64_t ifree = 0; |
| uint64_t ialloc = 0; |
| uint64_t bfree = 0; |
| uint64_t bfreelst = 0; |
| uint64_t btree = 0; |
| uint64_t fdblocks; |
| int error = 0; |
| |
| for (index = 0; index < agcount; index++) { |
| /* |
| * Read the AGF and AGI buffers to populate the per-ag |
| * structures for us. |
| */ |
| pag = xfs_perag_get(mp, index); |
| error = xfs_alloc_read_agf(pag, NULL, 0, NULL); |
| if (!error) |
| error = xfs_ialloc_read_agi(pag, NULL, 0, NULL); |
| if (error) { |
| xfs_perag_put(pag); |
| return error; |
| } |
| |
| ifree += pag->pagi_freecount; |
| ialloc += pag->pagi_count; |
| bfree += pag->pagf_freeblks; |
| bfreelst += pag->pagf_flcount; |
| btree += pag->pagf_btreeblks; |
| xfs_perag_put(pag); |
| } |
| fdblocks = bfree + bfreelst + btree; |
| |
| /* |
| * If the new summary counts are obviously incorrect, fail the |
| * mount operation because that implies the AGFs are also corrupt. |
| * Clear FS_COUNTERS so that we don't unmount with a dirty log, which |
| * will prevent xfs_repair from fixing anything. |
| */ |
| if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { |
| xfs_alert(mp, "AGF corruption. Please run xfs_repair."); |
| xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); |
| error = -EFSCORRUPTED; |
| goto out; |
| } |
| |
| /* Overwrite incore superblock counters with just-read data */ |
| spin_lock(&mp->m_sb_lock); |
| sbp->sb_ifree = ifree; |
| sbp->sb_icount = ialloc; |
| sbp->sb_fdblocks = fdblocks; |
| spin_unlock(&mp->m_sb_lock); |
| |
| xfs_reinit_percpu_counters(mp); |
| out: |
| xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); |
| return error; |
| } |
| |
| /* |
| * Free up the per-ag resources associated with the mount structure. |
| */ |
| void |
| xfs_free_perag( |
| struct xfs_mount *mp) |
| { |
| struct xfs_perag *pag; |
| xfs_agnumber_t agno; |
| |
| for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
| pag = xa_erase(&mp->m_perags, agno); |
| ASSERT(pag); |
| XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); |
| xfs_defer_drain_free(&pag->pag_intents_drain); |
| |
| cancel_delayed_work_sync(&pag->pag_blockgc_work); |
| xfs_buf_cache_destroy(&pag->pag_bcache); |
| |
| /* drop the mount's active reference */ |
| xfs_perag_rele(pag); |
| XFS_IS_CORRUPT(pag->pag_mount, |
| atomic_read(&pag->pag_active_ref) != 0); |
| kfree_rcu_mightsleep(pag); |
| } |
| } |
| |
| /* Find the size of the AG, in blocks. */ |
| static xfs_agblock_t |
| __xfs_ag_block_count( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agno, |
| xfs_agnumber_t agcount, |
| xfs_rfsblock_t dblocks) |
| { |
| ASSERT(agno < agcount); |
| |
| if (agno < agcount - 1) |
| return mp->m_sb.sb_agblocks; |
| return dblocks - (agno * mp->m_sb.sb_agblocks); |
| } |
| |
| xfs_agblock_t |
| xfs_ag_block_count( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agno) |
| { |
| return __xfs_ag_block_count(mp, agno, mp->m_sb.sb_agcount, |
| mp->m_sb.sb_dblocks); |
| } |
| |
| /* Calculate the first and last possible inode number in an AG. */ |
| static void |
| __xfs_agino_range( |
| struct xfs_mount *mp, |
| xfs_agblock_t eoag, |
| xfs_agino_t *first, |
| xfs_agino_t *last) |
| { |
| xfs_agblock_t bno; |
| |
| /* |
| * Calculate the first inode, which will be in the first |
| * cluster-aligned block after the AGFL. |
| */ |
| bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); |
| *first = XFS_AGB_TO_AGINO(mp, bno); |
| |
| /* |
| * Calculate the last inode, which will be at the end of the |
| * last (aligned) cluster that can be allocated in the AG. |
| */ |
| bno = round_down(eoag, M_IGEO(mp)->cluster_align); |
| *last = XFS_AGB_TO_AGINO(mp, bno) - 1; |
| } |
| |
| void |
| xfs_agino_range( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agno, |
| xfs_agino_t *first, |
| xfs_agino_t *last) |
| { |
| return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); |
| } |
| |
| /* |
| * Free perag within the specified AG range, it is only used to free unused |
| * perags under the error handling path. |
| */ |
| void |
| xfs_free_unused_perag_range( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agstart, |
| xfs_agnumber_t agend) |
| { |
| struct xfs_perag *pag; |
| xfs_agnumber_t index; |
| |
| for (index = agstart; index < agend; index++) { |
| pag = xa_erase(&mp->m_perags, index); |
| if (!pag) |
| break; |
| xfs_buf_cache_destroy(&pag->pag_bcache); |
| xfs_defer_drain_free(&pag->pag_intents_drain); |
| kfree(pag); |
| } |
| } |
| |
| int |
| xfs_initialize_perag( |
| struct xfs_mount *mp, |
| xfs_agnumber_t agcount, |
| xfs_rfsblock_t dblocks, |
| xfs_agnumber_t *maxagi) |
| { |
| struct xfs_perag *pag; |
| xfs_agnumber_t index; |
| xfs_agnumber_t first_initialised = NULLAGNUMBER; |
| int error; |
| |
| /* |
| * Walk the current per-ag tree so we don't try to initialise AGs |
| * that already exist (growfs case). Allocate and insert all the |
| * AGs we don't find ready for initialisation. |
| */ |
| for (index = 0; index < agcount; index++) { |
| pag = xfs_perag_get(mp, index); |
| if (pag) { |
| xfs_perag_put(pag); |
| continue; |
| } |
| |
| pag = kzalloc(sizeof(*pag), GFP_KERNEL | __GFP_RETRY_MAYFAIL); |
| if (!pag) { |
| error = -ENOMEM; |
| goto out_unwind_new_pags; |
| } |
| pag->pag_agno = index; |
| pag->pag_mount = mp; |
| |
| error = xa_insert(&mp->m_perags, index, pag, GFP_KERNEL); |
| if (error) { |
| WARN_ON_ONCE(error == -EBUSY); |
| goto out_free_pag; |
| } |
| |
| #ifdef __KERNEL__ |
| /* Place kernel structure only init below this point. */ |
| spin_lock_init(&pag->pag_ici_lock); |
| spin_lock_init(&pag->pagb_lock); |
| spin_lock_init(&pag->pag_state_lock); |
| INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); |
| INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
| xfs_defer_drain_init(&pag->pag_intents_drain); |
| init_waitqueue_head(&pag->pagb_wait); |
| init_waitqueue_head(&pag->pag_active_wq); |
| pag->pagb_count = 0; |
| pag->pagb_tree = RB_ROOT; |
| xfs_hooks_init(&pag->pag_rmap_update_hooks); |
| #endif /* __KERNEL__ */ |
| |
| error = xfs_buf_cache_init(&pag->pag_bcache); |
| if (error) |
| goto out_remove_pag; |
| |
| /* Active ref owned by mount indicates AG is online. */ |
| atomic_set(&pag->pag_active_ref, 1); |
| |
| /* first new pag is fully initialized */ |
| if (first_initialised == NULLAGNUMBER) |
| first_initialised = index; |
| |
| /* |
| * Pre-calculated geometry |
| */ |
| pag->block_count = __xfs_ag_block_count(mp, index, agcount, |
| dblocks); |
| pag->min_block = XFS_AGFL_BLOCK(mp); |
| __xfs_agino_range(mp, pag->block_count, &pag->agino_min, |
| &pag->agino_max); |
| } |
| |
| index = xfs_set_inode_alloc(mp, agcount); |
| |
| if (maxagi) |
| *maxagi = index; |
| |
| mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); |
| return 0; |
| |
| out_remove_pag: |
| xfs_defer_drain_free(&pag->pag_intents_drain); |
| pag = xa_erase(&mp->m_perags, index); |
| out_free_pag: |
| kfree(pag); |
| out_unwind_new_pags: |
| /* unwind any prior newly initialized pags */ |
| xfs_free_unused_perag_range(mp, first_initialised, agcount); |
| return error; |
| } |
| |
| static int |
| xfs_get_aghdr_buf( |
| struct xfs_mount *mp, |
| xfs_daddr_t blkno, |
| size_t numblks, |
| struct xfs_buf **bpp, |
| const struct xfs_buf_ops *ops) |
| { |
| struct xfs_buf *bp; |
| int error; |
| |
| error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp); |
| if (error) |
| return error; |
| |
| bp->b_maps[0].bm_bn = blkno; |
| bp->b_ops = ops; |
| |
| *bpp = bp; |
| return 0; |
| } |
| |
| /* |
| * Generic btree root block init function |
| */ |
| static void |
| xfs_btroot_init( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); |
| } |
| |
| /* Finish initializing a free space btree. */ |
| static void |
| xfs_freesp_init_recs( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| struct xfs_alloc_rec *arec; |
| struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
| |
| arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); |
| arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); |
| |
| if (xfs_ag_contains_log(mp, id->agno)) { |
| struct xfs_alloc_rec *nrec; |
| xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, |
| mp->m_sb.sb_logstart); |
| |
| ASSERT(start >= mp->m_ag_prealloc_blocks); |
| if (start != mp->m_ag_prealloc_blocks) { |
| /* |
| * Modify first record to pad stripe align of log and |
| * bump the record count. |
| */ |
| arec->ar_blockcount = cpu_to_be32(start - |
| mp->m_ag_prealloc_blocks); |
| be16_add_cpu(&block->bb_numrecs, 1); |
| nrec = arec + 1; |
| |
| /* |
| * Insert second record at start of internal log |
| * which then gets trimmed. |
| */ |
| nrec->ar_startblock = cpu_to_be32( |
| be32_to_cpu(arec->ar_startblock) + |
| be32_to_cpu(arec->ar_blockcount)); |
| arec = nrec; |
| } |
| /* |
| * Change record start to after the internal log |
| */ |
| be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); |
| } |
| |
| /* |
| * Calculate the block count of this record; if it is nonzero, |
| * increment the record count. |
| */ |
| arec->ar_blockcount = cpu_to_be32(id->agsize - |
| be32_to_cpu(arec->ar_startblock)); |
| if (arec->ar_blockcount) |
| be16_add_cpu(&block->bb_numrecs, 1); |
| } |
| |
| /* |
| * bnobt/cntbt btree root block init functions |
| */ |
| static void |
| xfs_bnoroot_init( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); |
| xfs_freesp_init_recs(mp, bp, id); |
| } |
| |
| /* |
| * Reverse map root block init |
| */ |
| static void |
| xfs_rmaproot_init( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
| struct xfs_rmap_rec *rrec; |
| |
| xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 4, id->agno); |
| |
| /* |
| * mark the AG header regions as static metadata The BNO |
| * btree block is the first block after the headers, so |
| * it's location defines the size of region the static |
| * metadata consumes. |
| * |
| * Note: unlike mkfs, we never have to account for log |
| * space when growing the data regions |
| */ |
| rrec = XFS_RMAP_REC_ADDR(block, 1); |
| rrec->rm_startblock = 0; |
| rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); |
| rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); |
| rrec->rm_offset = 0; |
| |
| /* account freespace btree root blocks */ |
| rrec = XFS_RMAP_REC_ADDR(block, 2); |
| rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); |
| rrec->rm_blockcount = cpu_to_be32(2); |
| rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); |
| rrec->rm_offset = 0; |
| |
| /* account inode btree root blocks */ |
| rrec = XFS_RMAP_REC_ADDR(block, 3); |
| rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); |
| rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - |
| XFS_IBT_BLOCK(mp)); |
| rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); |
| rrec->rm_offset = 0; |
| |
| /* account for rmap btree root */ |
| rrec = XFS_RMAP_REC_ADDR(block, 4); |
| rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); |
| rrec->rm_blockcount = cpu_to_be32(1); |
| rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); |
| rrec->rm_offset = 0; |
| |
| /* account for refc btree root */ |
| if (xfs_has_reflink(mp)) { |
| rrec = XFS_RMAP_REC_ADDR(block, 5); |
| rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); |
| rrec->rm_blockcount = cpu_to_be32(1); |
| rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); |
| rrec->rm_offset = 0; |
| be16_add_cpu(&block->bb_numrecs, 1); |
| } |
| |
| /* account for the log space */ |
| if (xfs_ag_contains_log(mp, id->agno)) { |
| rrec = XFS_RMAP_REC_ADDR(block, |
| be16_to_cpu(block->bb_numrecs) + 1); |
| rrec->rm_startblock = cpu_to_be32( |
| XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); |
| rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); |
| rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); |
| rrec->rm_offset = 0; |
| be16_add_cpu(&block->bb_numrecs, 1); |
| } |
| } |
| |
| /* |
| * Initialise new secondary superblocks with the pre-grow geometry, but mark |
| * them as "in progress" so we know they haven't yet been activated. This will |
| * get cleared when the update with the new geometry information is done after |
| * changes to the primary are committed. This isn't strictly necessary, but we |
| * get it for free with the delayed buffer write lists and it means we can tell |
| * if a grow operation didn't complete properly after the fact. |
| */ |
| static void |
| xfs_sbblock_init( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| struct xfs_dsb *dsb = bp->b_addr; |
| |
| xfs_sb_to_disk(dsb, &mp->m_sb); |
| dsb->sb_inprogress = 1; |
| } |
| |
| static void |
| xfs_agfblock_init( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| struct xfs_agf *agf = bp->b_addr; |
| xfs_extlen_t tmpsize; |
| |
| agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); |
| agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); |
| agf->agf_seqno = cpu_to_be32(id->agno); |
| agf->agf_length = cpu_to_be32(id->agsize); |
| agf->agf_bno_root = cpu_to_be32(XFS_BNO_BLOCK(mp)); |
| agf->agf_cnt_root = cpu_to_be32(XFS_CNT_BLOCK(mp)); |
| agf->agf_bno_level = cpu_to_be32(1); |
| agf->agf_cnt_level = cpu_to_be32(1); |
| if (xfs_has_rmapbt(mp)) { |
| agf->agf_rmap_root = cpu_to_be32(XFS_RMAP_BLOCK(mp)); |
| agf->agf_rmap_level = cpu_to_be32(1); |
| agf->agf_rmap_blocks = cpu_to_be32(1); |
| } |
| |
| agf->agf_flfirst = cpu_to_be32(1); |
| agf->agf_fllast = 0; |
| agf->agf_flcount = 0; |
| tmpsize = id->agsize - mp->m_ag_prealloc_blocks; |
| agf->agf_freeblks = cpu_to_be32(tmpsize); |
| agf->agf_longest = cpu_to_be32(tmpsize); |
| if (xfs_has_crc(mp)) |
| uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); |
| if (xfs_has_reflink(mp)) { |
| agf->agf_refcount_root = cpu_to_be32( |
| xfs_refc_block(mp)); |
| agf->agf_refcount_level = cpu_to_be32(1); |
| agf->agf_refcount_blocks = cpu_to_be32(1); |
| } |
| |
| if (xfs_ag_contains_log(mp, id->agno)) { |
| int64_t logblocks = mp->m_sb.sb_logblocks; |
| |
| be32_add_cpu(&agf->agf_freeblks, -logblocks); |
| agf->agf_longest = cpu_to_be32(id->agsize - |
| XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); |
| } |
| } |
| |
| static void |
| xfs_agflblock_init( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); |
| __be32 *agfl_bno; |
| int bucket; |
| |
| if (xfs_has_crc(mp)) { |
| agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); |
| agfl->agfl_seqno = cpu_to_be32(id->agno); |
| uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); |
| } |
| |
| agfl_bno = xfs_buf_to_agfl_bno(bp); |
| for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) |
| agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); |
| } |
| |
| static void |
| xfs_agiblock_init( |
| struct xfs_mount *mp, |
| struct xfs_buf *bp, |
| struct aghdr_init_data *id) |
| { |
| struct xfs_agi *agi = bp->b_addr; |
| int bucket; |
| |
| agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); |
| agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); |
| agi->agi_seqno = cpu_to_be32(id->agno); |
| agi->agi_length = cpu_to_be32(id->agsize); |
| agi->agi_count = 0; |
| agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp)); |
| agi->agi_level = cpu_to_be32(1); |
| agi->agi_freecount = 0; |
| agi->agi_newino = cpu_to_be32(NULLAGINO); |
| agi->agi_dirino = cpu_to_be32(NULLAGINO); |
| if (xfs_has_crc(mp)) |
| uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); |
| if (xfs_has_finobt(mp)) { |
| agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); |
| agi->agi_free_level = cpu_to_be32(1); |
| } |
| for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) |
| agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
| if (xfs_has_inobtcounts(mp)) { |
| agi->agi_iblocks = cpu_to_be32(1); |
| if (xfs_has_finobt(mp)) |
| agi->agi_fblocks = cpu_to_be32(1); |
| } |
| } |
| |
| typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, |
| struct aghdr_init_data *id); |
| static int |
| xfs_ag_init_hdr( |
| struct xfs_mount *mp, |
| struct aghdr_init_data *id, |
| aghdr_init_work_f work, |
| const struct xfs_buf_ops *ops) |
| { |
| struct xfs_buf *bp; |
| int error; |
| |
| error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops); |
| if (error) |
| return error; |
| |
| (*work)(mp, bp, id); |
| |
| xfs_buf_delwri_queue(bp, &id->buffer_list); |
| xfs_buf_relse(bp); |
| return 0; |
| } |
| |
| struct xfs_aghdr_grow_data { |
| xfs_daddr_t daddr; |
| size_t numblks; |
| const struct xfs_buf_ops *ops; |
| aghdr_init_work_f work; |
| const struct xfs_btree_ops *bc_ops; |
| bool need_init; |
| }; |
| |
| /* |
| * Prepare new AG headers to be written to disk. We use uncached buffers here, |
| * as it is assumed these new AG headers are currently beyond the currently |
| * valid filesystem address space. Using cached buffers would trip over EOFS |
| * corruption detection alogrithms in the buffer cache lookup routines. |
| * |
| * This is a non-transactional function, but the prepared buffers are added to a |
| * delayed write buffer list supplied by the caller so they can submit them to |
| * disk and wait on them as required. |
| */ |
| int |
| xfs_ag_init_headers( |
| struct xfs_mount *mp, |
| struct aghdr_init_data *id) |
| |
| { |
| struct xfs_aghdr_grow_data aghdr_data[] = { |
| { /* SB */ |
| .daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR), |
| .numblks = XFS_FSS_TO_BB(mp, 1), |
| .ops = &xfs_sb_buf_ops, |
| .work = &xfs_sbblock_init, |
| .need_init = true |
| }, |
| { /* AGF */ |
| .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)), |
| .numblks = XFS_FSS_TO_BB(mp, 1), |
| .ops = &xfs_agf_buf_ops, |
| .work = &xfs_agfblock_init, |
| .need_init = true |
| }, |
| { /* AGFL */ |
| .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)), |
| .numblks = XFS_FSS_TO_BB(mp, 1), |
| .ops = &xfs_agfl_buf_ops, |
| .work = &xfs_agflblock_init, |
| .need_init = true |
| }, |
| { /* AGI */ |
| .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)), |
| .numblks = XFS_FSS_TO_BB(mp, 1), |
| .ops = &xfs_agi_buf_ops, |
| .work = &xfs_agiblock_init, |
| .need_init = true |
| }, |
| { /* BNO root block */ |
| .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)), |
| .numblks = BTOBB(mp->m_sb.sb_blocksize), |
| .ops = &xfs_bnobt_buf_ops, |
| .work = &xfs_bnoroot_init, |
| .bc_ops = &xfs_bnobt_ops, |
| .need_init = true |
| }, |
| { /* CNT root block */ |
| .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), |
| .numblks = BTOBB(mp->m_sb.sb_blocksize), |
| .ops = &xfs_cntbt_buf_ops, |
| .work = &xfs_bnoroot_init, |
| .bc_ops = &xfs_cntbt_ops, |
| .need_init = true |
| }, |
| { /* INO root block */ |
| .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)), |
| .numblks = BTOBB(mp->m_sb.sb_blocksize), |
| .ops = &xfs_inobt_buf_ops, |
| .work = &xfs_btroot_init, |
| .bc_ops = &xfs_inobt_ops, |
| .need_init = true |
| }, |
| { /* FINO root block */ |
| .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)), |
| .numblks = BTOBB(mp->m_sb.sb_blocksize), |
| .ops = &xfs_finobt_buf_ops, |
| .work = &xfs_btroot_init, |
| .bc_ops = &xfs_finobt_ops, |
| .need_init = xfs_has_finobt(mp) |
| }, |
| { /* RMAP root block */ |
| .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), |
| .numblks = BTOBB(mp->m_sb.sb_blocksize), |
| .ops = &xfs_rmapbt_buf_ops, |
| .work = &xfs_rmaproot_init, |
| .bc_ops = &xfs_rmapbt_ops, |
| .need_init = xfs_has_rmapbt(mp) |
| }, |
| { /* REFC root block */ |
| .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), |
| .numblks = BTOBB(mp->m_sb.sb_blocksize), |
| .ops = &xfs_refcountbt_buf_ops, |
| .work = &xfs_btroot_init, |
| .bc_ops = &xfs_refcountbt_ops, |
| .need_init = xfs_has_reflink(mp) |
| }, |
| { /* NULL terminating block */ |
| .daddr = XFS_BUF_DADDR_NULL, |
| } |
| }; |
| struct xfs_aghdr_grow_data *dp; |
| int error = 0; |
| |
| /* Account for AG free space in new AG */ |
| id->nfree += id->agsize - mp->m_ag_prealloc_blocks; |
| for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) { |
| if (!dp->need_init) |
| continue; |
| |
| id->daddr = dp->daddr; |
| id->numblks = dp->numblks; |
| id->bc_ops = dp->bc_ops; |
| error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops); |
| if (error) |
| break; |
| } |
| return error; |
| } |
| |
| int |
| xfs_ag_shrink_space( |
| struct xfs_perag *pag, |
| struct xfs_trans **tpp, |
| xfs_extlen_t delta) |
| { |
| struct xfs_mount *mp = pag->pag_mount; |
| struct xfs_alloc_arg args = { |
| .tp = *tpp, |
| .mp = mp, |
| .pag = pag, |
| .minlen = delta, |
| .maxlen = delta, |
| .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, |
| .resv = XFS_AG_RESV_NONE, |
| .prod = 1 |
| }; |
| struct xfs_buf *agibp, *agfbp; |
| struct xfs_agi *agi; |
| struct xfs_agf *agf; |
| xfs_agblock_t aglen; |
| int error, err2; |
| |
| ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1); |
| error = xfs_ialloc_read_agi(pag, *tpp, 0, &agibp); |
| if (error) |
| return error; |
| |
| agi = agibp->b_addr; |
| |
| error = xfs_alloc_read_agf(pag, *tpp, 0, &agfbp); |
| if (error) |
| return error; |
| |
| agf = agfbp->b_addr; |
| aglen = be32_to_cpu(agi->agi_length); |
| /* some extra paranoid checks before we shrink the ag */ |
| if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) { |
| xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); |
| return -EFSCORRUPTED; |
| } |
| if (delta >= aglen) |
| return -EINVAL; |
| |
| /* |
| * Make sure that the last inode cluster cannot overlap with the new |
| * end of the AG, even if it's sparse. |
| */ |
| error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta); |
| if (error) |
| return error; |
| |
| /* |
| * Disable perag reservations so it doesn't cause the allocation request |
| * to fail. We'll reestablish reservation before we return. |
| */ |
| xfs_ag_resv_free(pag); |
| |
| /* internal log shouldn't also show up in the free space btrees */ |
| error = xfs_alloc_vextent_exact_bno(&args, |
| XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta)); |
| if (!error && args.agbno == NULLAGBLOCK) |
| error = -ENOSPC; |
| |
| if (error) { |
| /* |
| * If extent allocation fails, need to roll the transaction to |
| * ensure that the AGFL fixup has been committed anyway. |
| * |
| * We need to hold the AGF across the roll to ensure nothing can |
| * access the AG for allocation until the shrink is fully |
| * cleaned up. And due to the resetting of the AG block |
| * reservation space needing to lock the AGI, we also have to |
| * hold that so we don't get AGI/AGF lock order inversions in |
| * the error handling path. |
| */ |
| xfs_trans_bhold(*tpp, agfbp); |
| xfs_trans_bhold(*tpp, agibp); |
| err2 = xfs_trans_roll(tpp); |
| if (err2) |
| return err2; |
| xfs_trans_bjoin(*tpp, agfbp); |
| xfs_trans_bjoin(*tpp, agibp); |
| goto resv_init_out; |
| } |
| |
| /* |
| * if successfully deleted from freespace btrees, need to confirm |
| * per-AG reservation works as expected. |
| */ |
| be32_add_cpu(&agi->agi_length, -delta); |
| be32_add_cpu(&agf->agf_length, -delta); |
| |
| err2 = xfs_ag_resv_init(pag, *tpp); |
| if (err2) { |
| be32_add_cpu(&agi->agi_length, delta); |
| be32_add_cpu(&agf->agf_length, delta); |
| if (err2 != -ENOSPC) |
| goto resv_err; |
| |
| err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, |
| XFS_AG_RESV_NONE, XFS_FREE_EXTENT_SKIP_DISCARD); |
| if (err2) |
| goto resv_err; |
| |
| /* |
| * Roll the transaction before trying to re-init the per-ag |
| * reservation. The new transaction is clean so it will cancel |
| * without any side effects. |
| */ |
| error = xfs_defer_finish(tpp); |
| if (error) |
| return error; |
| |
| error = -ENOSPC; |
| goto resv_init_out; |
| } |
| |
| /* Update perag geometry */ |
| pag->block_count -= delta; |
| __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, |
| &pag->agino_max); |
| |
| xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); |
| xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); |
| return 0; |
| |
| resv_init_out: |
| err2 = xfs_ag_resv_init(pag, *tpp); |
| if (!err2) |
| return error; |
| resv_err: |
| xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", err2); |
| xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
| return err2; |
| } |
| |
| /* |
| * Extent the AG indicated by the @id by the length passed in |
| */ |
| int |
| xfs_ag_extend_space( |
| struct xfs_perag *pag, |
| struct xfs_trans *tp, |
| xfs_extlen_t len) |
| { |
| struct xfs_buf *bp; |
| struct xfs_agi *agi; |
| struct xfs_agf *agf; |
| int error; |
| |
| ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1); |
| |
| error = xfs_ialloc_read_agi(pag, tp, 0, &bp); |
| if (error) |
| return error; |
| |
| agi = bp->b_addr; |
| be32_add_cpu(&agi->agi_length, len); |
| xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); |
| |
| /* |
| * Change agf length. |
| */ |
| error = xfs_alloc_read_agf(pag, tp, 0, &bp); |
| if (error) |
| return error; |
| |
| agf = bp->b_addr; |
| be32_add_cpu(&agf->agf_length, len); |
| ASSERT(agf->agf_length == agi->agi_length); |
| xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); |
| |
| /* |
| * Free the new space. |
| * |
| * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that |
| * this doesn't actually exist in the rmap btree. |
| */ |
| error = xfs_rmap_free(tp, bp, pag, be32_to_cpu(agf->agf_length) - len, |
| len, &XFS_RMAP_OINFO_SKIP_UPDATE); |
| if (error) |
| return error; |
| |
| error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, |
| len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); |
| if (error) |
| return error; |
| |
| /* Update perag geometry */ |
| pag->block_count = be32_to_cpu(agf->agf_length); |
| __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, |
| &pag->agino_max); |
| return 0; |
| } |
| |
| /* Retrieve AG geometry. */ |
| int |
| xfs_ag_get_geometry( |
| struct xfs_perag *pag, |
| struct xfs_ag_geometry *ageo) |
| { |
| struct xfs_buf *agi_bp; |
| struct xfs_buf *agf_bp; |
| struct xfs_agi *agi; |
| struct xfs_agf *agf; |
| unsigned int freeblks; |
| int error; |
| |
| /* Lock the AG headers. */ |
| error = xfs_ialloc_read_agi(pag, NULL, 0, &agi_bp); |
| if (error) |
| return error; |
| error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp); |
| if (error) |
| goto out_agi; |
| |
| /* Fill out form. */ |
| memset(ageo, 0, sizeof(*ageo)); |
| ageo->ag_number = pag->pag_agno; |
| |
| agi = agi_bp->b_addr; |
| ageo->ag_icount = be32_to_cpu(agi->agi_count); |
| ageo->ag_ifree = be32_to_cpu(agi->agi_freecount); |
| |
| agf = agf_bp->b_addr; |
| ageo->ag_length = be32_to_cpu(agf->agf_length); |
| freeblks = pag->pagf_freeblks + |
| pag->pagf_flcount + |
| pag->pagf_btreeblks - |
| xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE); |
| ageo->ag_freeblks = freeblks; |
| xfs_ag_geom_health(pag, ageo); |
| |
| /* Release resources. */ |
| xfs_buf_relse(agf_bp); |
| out_agi: |
| xfs_buf_relse(agi_bp); |
| return error; |
| } |