fs/xfs/libxfs/xfs_metafile.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <djwong@kernel.org>
  */
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_bit.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_trans.h"
 #include "xfs_metafile.h"
 #include "xfs_trace.h"
 #include "xfs_inode.h"
 #include "xfs_quota.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_alloc.h"

 static const struct {
 	enum xfs_metafile_type	mtype;
 	const char		*name;
 } xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };

 const char *
 xfs_metafile_type_str(enum xfs_metafile_type metatype)
 {
 	unsigned int	i;

 	for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
 		if (xfs_metafile_type_strs[i].mtype == metatype)
 			return xfs_metafile_type_strs[i].name;
 	}

 	return NULL;
 }

 /* Set up an inode to be recognized as a metadata directory inode. */
 void
 xfs_metafile_set_iflag(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	enum xfs_metafile_type	metafile_type)
 {
 	VFS_I(ip)->i_mode &= ~0777;
 	VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
 	VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
 	if (S_ISDIR(VFS_I(ip)->i_mode))
 		ip->i_diflags |= XFS_METADIR_DIFLAGS;
 	else
 		ip->i_diflags |= XFS_METAFILE_DIFLAGS;
 	ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
 	ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
 	ip->i_metatype = metafile_type;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 }

 /* Clear the metadata directory inode flag. */
 void
 xfs_metafile_clear_iflag(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip)
 {
 	ASSERT(xfs_is_metadir_inode(ip));
 	ASSERT(VFS_I(ip)->i_nlink == 0);

 	ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 }

 /*
  * Is the amount of space that could be allocated towards a given metadata
  * file at or beneath a certain threshold?
  */
 static inline bool
 xfs_metafile_resv_can_cover(
 	struct xfs_inode	*ip,
 	int64_t			rhs)
 {
 	/*
 	 * The amount of space that can be allocated to this metadata file is
 	 * the remaining reservation for the particular metadata file + the
 	 * global free block count.  Take care of the first case to avoid
 	 * touching the per-cpu counter.
 	 */
 	if (ip->i_delayed_blks >= rhs)
 		return true;

 	/*
 	 * There aren't enough blocks left in the inode's reservation, but it
 	 * isn't critical unless there also isn't enough free space.
 	 */
 	return __percpu_counter_compare(&ip->i_mount->m_fdblocks,
 			rhs - ip->i_delayed_blks, 2048) >= 0;
 }

 /*
  * Is this metadata file critically low on blocks?  For now we'll define that
  * as the number of blocks we can get our hands on being less than 10% of what
  * we reserved or less than some arbitrary number (maximum btree height).
  */
 bool
 xfs_metafile_resv_critical(
 	struct xfs_inode	*ip)
 {
 	uint64_t		asked_low_water;

 	if (!ip)
 		return false;

 	ASSERT(xfs_is_metadir_inode(ip));
 	trace_xfs_metafile_resv_critical(ip, 0);

 	if (!xfs_metafile_resv_can_cover(ip, ip->i_mount->m_rtbtree_maxlevels))
 		return true;

 	asked_low_water = div_u64(ip->i_meta_resv_asked, 10);
 	if (!xfs_metafile_resv_can_cover(ip, asked_low_water))
 		return true;

 	return XFS_TEST_ERROR(false, ip->i_mount,
 			XFS_ERRTAG_METAFILE_RESV_CRITICAL);
 }

 /* Allocate a block from the metadata file's reservation. */
 void
 xfs_metafile_resv_alloc_space(
 	struct xfs_inode	*ip,
 	struct xfs_alloc_arg	*args)
 {
 	int64_t			len = args->len;

 	ASSERT(xfs_is_metadir_inode(ip));
 	ASSERT(args->resv == XFS_AG_RESV_METAFILE);

 	trace_xfs_metafile_resv_alloc_space(ip, args->len);

 	/*
 	 * Allocate the blocks from the metadata inode's block reservation
 	 * and update the ondisk sb counter.
 	 */
 	if (ip->i_delayed_blks > 0) {
 		int64_t		from_resv;

 		from_resv = min_t(int64_t, len, ip->i_delayed_blks);
 		ip->i_delayed_blks -= from_resv;
 		xfs_mod_delalloc(ip, 0, -from_resv);
 		xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
 				-from_resv);
 		len -= from_resv;
 	}

 	/*
 	 * Any allocation in excess of the reservation requires in-core and
 	 * on-disk fdblocks updates.  If we can grab @len blocks from the
 	 * in-core fdblocks then all we need to do is update the on-disk
 	 * superblock; if not, then try to steal some from the transaction's
 	 * block reservation.  Overruns are only expected for rmap btrees.
 	 */
 	if (len) {
 		unsigned int	field;
 		int		error;

 		error = xfs_dec_fdblocks(ip->i_mount, len, true);
 		if (error)
 			field = XFS_TRANS_SB_FDBLOCKS;
 		else
 			field = XFS_TRANS_SB_RES_FDBLOCKS;

 		xfs_trans_mod_sb(args->tp, field, -len);
 	}

 	ip->i_nblocks += args->len;
 	xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
 }

 /* Free a block to the metadata file's reservation. */
 void
 xfs_metafile_resv_free_space(
 	struct xfs_inode	*ip,
 	struct xfs_trans	*tp,
 	xfs_filblks_t		len)
 {
 	int64_t			to_resv;

 	ASSERT(xfs_is_metadir_inode(ip));
 	trace_xfs_metafile_resv_free_space(ip, len);

 	ip->i_nblocks -= len;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

 	/*
 	 * Add the freed blocks back into the inode's delalloc reservation
 	 * until it reaches the maximum size.  Update the ondisk fdblocks only.
 	 */
 	to_resv = ip->i_meta_resv_asked - (ip->i_nblocks + ip->i_delayed_blks);
 	if (to_resv > 0) {
 		to_resv = min_t(int64_t, to_resv, len);
 		ip->i_delayed_blks += to_resv;
 		xfs_mod_delalloc(ip, 0, to_resv);
 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
 		len -= to_resv;
 	}

 	/*
 	 * Everything else goes back to the filesystem, so update the in-core
 	 * and on-disk counters.
 	 */
 	if (len)
 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
 }

 /* Release a metadata file's space reservation. */
 void
 xfs_metafile_resv_free(
 	struct xfs_inode	*ip)
 {
 	/* Non-btree metadata inodes don't need space reservations. */
 	if (!ip || !ip->i_meta_resv_asked)
 		return;

 	ASSERT(xfs_is_metadir_inode(ip));
 	trace_xfs_metafile_resv_free(ip, 0);

 	if (ip->i_delayed_blks) {
 		xfs_mod_delalloc(ip, 0, -ip->i_delayed_blks);
 		xfs_add_fdblocks(ip->i_mount, ip->i_delayed_blks);
 		ip->i_delayed_blks = 0;
 	}
 	ip->i_meta_resv_asked = 0;
 }

 /* Set up a metadata file's space reservation. */
 int
 xfs_metafile_resv_init(
 	struct xfs_inode	*ip,
 	xfs_filblks_t		ask)
 {
 	xfs_filblks_t		hidden_space;
 	xfs_filblks_t		used;
 	int			error;

 	if (!ip || ip->i_meta_resv_asked > 0)
 		return 0;

 	ASSERT(xfs_is_metadir_inode(ip));

 	/*
 	 * Space taken by all other metadata btrees are accounted on-disk as
 	 * used space.  We therefore only hide the space that is reserved but
 	 * not used by the trees.
 	 */
 	used = ip->i_nblocks;
 	if (used > ask)
 		ask = used;
 	hidden_space = ask - used;

 	error = xfs_dec_fdblocks(ip->i_mount, hidden_space, true);
 	if (error) {
 		trace_xfs_metafile_resv_init_error(ip, error, _RET_IP_);
 		return error;
 	}

 	xfs_mod_delalloc(ip, 0, hidden_space);
 	ip->i_delayed_blks = hidden_space;
 	ip->i_meta_resv_asked = ask;

 	trace_xfs_metafile_resv_init(ip, ask);
 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0-or-later
	/*
	* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
	* Author: Darrick J. Wong <djwong@kernel.org>
	*/
	#include "xfs.h"
	#include "xfs_fs.h"
	#include "xfs_shared.h"
	#include "xfs_format.h"
	#include "xfs_log_format.h"
	#include "xfs_trans_resv.h"
	#include "xfs_bit.h"
	#include "xfs_sb.h"
	#include "xfs_mount.h"
	#include "xfs_defer.h"
	#include "xfs_trans.h"
	#include "xfs_metafile.h"
	#include "xfs_trace.h"
	#include "xfs_inode.h"
	#include "xfs_quota.h"
	#include "xfs_errortag.h"
	#include "xfs_error.h"
	#include "xfs_alloc.h"

	static const struct {
	enum xfs_metafile_type mtype;
	const char *name;
	} xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };

	const char *
	xfs_metafile_type_str(enum xfs_metafile_type metatype)
	{
	unsigned int i;

	for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
	if (xfs_metafile_type_strs[i].mtype == metatype)
	return xfs_metafile_type_strs[i].name;
	}

	return NULL;
	}

	/* Set up an inode to be recognized as a metadata directory inode. */
	void
	xfs_metafile_set_iflag(
	struct xfs_trans *tp,
	struct xfs_inode *ip,
	enum xfs_metafile_type metafile_type)
	{
	VFS_I(ip)->i_mode &= ~0777;
	VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
	VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
	if (S_ISDIR(VFS_I(ip)->i_mode))
	ip->i_diflags \|= XFS_METADIR_DIFLAGS;
	else
	ip->i_diflags \|= XFS_METAFILE_DIFLAGS;
	ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
	ip->i_diflags2 \|= XFS_DIFLAG2_METADATA;
	ip->i_metatype = metafile_type;
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
	}

	/* Clear the metadata directory inode flag. */
	void
	xfs_metafile_clear_iflag(
	struct xfs_trans *tp,
	struct xfs_inode *ip)
	{
	ASSERT(xfs_is_metadir_inode(ip));
	ASSERT(VFS_I(ip)->i_nlink == 0);

	ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
	}

	/*
	* Is the amount of space that could be allocated towards a given metadata
	* file at or beneath a certain threshold?
	*/
	static inline bool
	xfs_metafile_resv_can_cover(
	struct xfs_inode *ip,
	int64_t rhs)
	{
	/*
	* The amount of space that can be allocated to this metadata file is
	* the remaining reservation for the particular metadata file + the
	* global free block count. Take care of the first case to avoid
	* touching the per-cpu counter.
	*/
	if (ip->i_delayed_blks >= rhs)
	return true;

	/*
	* There aren't enough blocks left in the inode's reservation, but it
	* isn't critical unless there also isn't enough free space.
	*/
	return __percpu_counter_compare(&ip->i_mount->m_fdblocks,
	rhs - ip->i_delayed_blks, 2048) >= 0;
	}

	/*
	* Is this metadata file critically low on blocks? For now we'll define that
	* as the number of blocks we can get our hands on being less than 10% of what
	* we reserved or less than some arbitrary number (maximum btree height).
	*/
	bool
	xfs_metafile_resv_critical(
	struct xfs_inode *ip)
	{
	uint64_t asked_low_water;

	if (!ip)
	return false;

	ASSERT(xfs_is_metadir_inode(ip));
	trace_xfs_metafile_resv_critical(ip, 0);

	if (!xfs_metafile_resv_can_cover(ip, ip->i_mount->m_rtbtree_maxlevels))
	return true;

	asked_low_water = div_u64(ip->i_meta_resv_asked, 10);
	if (!xfs_metafile_resv_can_cover(ip, asked_low_water))
	return true;

	return XFS_TEST_ERROR(false, ip->i_mount,
	XFS_ERRTAG_METAFILE_RESV_CRITICAL);
	}

	/* Allocate a block from the metadata file's reservation. */
	void
	xfs_metafile_resv_alloc_space(
	struct xfs_inode *ip,
	struct xfs_alloc_arg *args)
	{
	int64_t len = args->len;

	ASSERT(xfs_is_metadir_inode(ip));
	ASSERT(args->resv == XFS_AG_RESV_METAFILE);

	trace_xfs_metafile_resv_alloc_space(ip, args->len);

	/*
	* Allocate the blocks from the metadata inode's block reservation
	* and update the ondisk sb counter.
	*/
	if (ip->i_delayed_blks > 0) {
	int64_t from_resv;

	from_resv = min_t(int64_t, len, ip->i_delayed_blks);
	ip->i_delayed_blks -= from_resv;
	xfs_mod_delalloc(ip, 0, -from_resv);
	xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
	-from_resv);
	len -= from_resv;
	}

	/*
	* Any allocation in excess of the reservation requires in-core and
	* on-disk fdblocks updates. If we can grab @len blocks from the
	* in-core fdblocks then all we need to do is update the on-disk
	* superblock; if not, then try to steal some from the transaction's
	* block reservation. Overruns are only expected for rmap btrees.
	*/
	if (len) {
	unsigned int field;
	int error;

	error = xfs_dec_fdblocks(ip->i_mount, len, true);
	if (error)
	field = XFS_TRANS_SB_FDBLOCKS;
	else
	field = XFS_TRANS_SB_RES_FDBLOCKS;

	xfs_trans_mod_sb(args->tp, field, -len);
	}

	ip->i_nblocks += args->len;
	xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
	}

	/* Free a block to the metadata file's reservation. */
	void
	xfs_metafile_resv_free_space(
	struct xfs_inode *ip,
	struct xfs_trans *tp,
	xfs_filblks_t len)
	{
	int64_t to_resv;

	ASSERT(xfs_is_metadir_inode(ip));
	trace_xfs_metafile_resv_free_space(ip, len);

	ip->i_nblocks -= len;
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

	/*
	* Add the freed blocks back into the inode's delalloc reservation
	* until it reaches the maximum size. Update the ondisk fdblocks only.
	*/
	to_resv = ip->i_meta_resv_asked - (ip->i_nblocks + ip->i_delayed_blks);
	if (to_resv > 0) {
	to_resv = min_t(int64_t, to_resv, len);
	ip->i_delayed_blks += to_resv;
	xfs_mod_delalloc(ip, 0, to_resv);
	xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
	len -= to_resv;
	}

	/*
	* Everything else goes back to the filesystem, so update the in-core
	* and on-disk counters.
	*/
	if (len)
	xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
	}

	/* Release a metadata file's space reservation. */
	void
	xfs_metafile_resv_free(
	struct xfs_inode *ip)
	{
	/* Non-btree metadata inodes don't need space reservations. */
	if (!ip \|\| !ip->i_meta_resv_asked)
	return;

	ASSERT(xfs_is_metadir_inode(ip));
	trace_xfs_metafile_resv_free(ip, 0);

	if (ip->i_delayed_blks) {
	xfs_mod_delalloc(ip, 0, -ip->i_delayed_blks);
	xfs_add_fdblocks(ip->i_mount, ip->i_delayed_blks);
	ip->i_delayed_blks = 0;
	}
	ip->i_meta_resv_asked = 0;
	}

	/* Set up a metadata file's space reservation. */
	int
	xfs_metafile_resv_init(
	struct xfs_inode *ip,
	xfs_filblks_t ask)
	{
	xfs_filblks_t hidden_space;
	xfs_filblks_t used;
	int error;

	if (!ip \|\| ip->i_meta_resv_asked > 0)
	return 0;

	ASSERT(xfs_is_metadir_inode(ip));

	/*
	* Space taken by all other metadata btrees are accounted on-disk as
	* used space. We therefore only hide the space that is reserved but
	* not used by the trees.
	*/
	used = ip->i_nblocks;
	if (used > ask)
	ask = used;
	hidden_space = ask - used;

	error = xfs_dec_fdblocks(ip->i_mount, hidden_space, true);
	if (error) {
	trace_xfs_metafile_resv_init_error(ip, error, _RET_IP_);
	return error;
	}

	xfs_mod_delalloc(ip, 0, hidden_space);
	ip->i_delayed_blks = hidden_space;
	ip->i_meta_resv_asked = ask;

	trace_xfs_metafile_resv_init(ip, ask);
	return 0;
	}