bcachefs: Better inlining for bch2_alloc_to_v4_mut

This separates out the slowpath into a separate function, and inlines
bch2_alloc_v4_mut into bch2_trans_start_alloc_update(), the main place
it's called.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index cbde19b..d757381 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -37,8 +37,6 @@ static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
 
 struct bkey_alloc_unpacked {
 	u64		journal_seq;
-	u64		bucket;
-	u8		dev;
 	u8		gen;
 	u8		oldest_gen;
 	u8		data_type;
@@ -194,11 +192,7 @@ static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
 
 static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
 {
-	struct bkey_alloc_unpacked ret = {
-		.dev	= k.k->p.inode,
-		.bucket	= k.k->p.offset,
-		.gen	= 0,
-	};
+	struct bkey_alloc_unpacked ret = { .gen	= 0 };
 
 	switch (k.k->type) {
 	case KEY_TYPE_alloc:
@@ -215,73 +209,6 @@ static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
 	return ret;
 }
 
-void bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
-{
-	if (k.k->type == KEY_TYPE_alloc_v4) {
-		*out = *bkey_s_c_to_alloc_v4(k).v;
-	} else {
-		struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
-
-		*out = (struct bch_alloc_v4) {
-			.journal_seq		= u.journal_seq,
-			.flags			= u.need_discard,
-			.gen			= u.gen,
-			.oldest_gen		= u.oldest_gen,
-			.data_type		= u.data_type,
-			.stripe_redundancy	= u.stripe_redundancy,
-			.dirty_sectors		= u.dirty_sectors,
-			.cached_sectors		= u.cached_sectors,
-			.io_time[READ]		= u.read_time,
-			.io_time[WRITE]		= u.write_time,
-			.stripe			= u.stripe,
-		};
-	}
-}
-
-struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
-{
-	struct bkey_i_alloc_v4 *ret;
-
-	if (k.k->type == KEY_TYPE_alloc_v4) {
-		ret = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
-		if (!IS_ERR(ret))
-			bkey_reassemble(&ret->k_i, k);
-	} else {
-		ret = bch2_trans_kmalloc(trans, sizeof(*ret));
-		if (!IS_ERR(ret)) {
-			bkey_alloc_v4_init(&ret->k_i);
-			ret->k.p = k.k->p;
-			bch2_alloc_to_v4(k, &ret->v);
-		}
-	}
-	return ret;
-}
-
-struct bkey_i_alloc_v4 *
-bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
-			      struct bpos pos)
-{
-	struct bkey_s_c k;
-	struct bkey_i_alloc_v4 *a;
-	int ret;
-
-	bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
-			     BTREE_ITER_WITH_UPDATES|
-			     BTREE_ITER_CACHED|
-			     BTREE_ITER_INTENT);
-	k = bch2_btree_iter_peek_slot(iter);
-	ret = bkey_err(k);
-	if (ret) {
-		bch2_trans_iter_exit(trans, iter);
-		return ERR_PTR(ret);
-	}
-
-	a = bch2_alloc_to_v4_mut(trans, k);
-	if (IS_ERR(a))
-		bch2_trans_iter_exit(trans, iter);
-	return a;
-}
-
 static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
 {
 	unsigned i, bytes = offsetof(struct bch_alloc, data);
@@ -417,21 +344,154 @@ void bch2_alloc_v4_swab(struct bkey_s k)
 
 void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
 {
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 _a;
+	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
 
-	bch2_alloc_to_v4(k, &a);
+	prt_newline(out);
+	printbuf_indent_add(out, 2);
 
-	prt_printf(out, "gen %u oldest_gen %u data_type %s journal_seq %llu need_discard %llu need_inc_gen %llu",
-	       a.gen, a.oldest_gen, bch2_data_types[a.data_type],
-	       a.journal_seq,
-	       BCH_ALLOC_V4_NEED_DISCARD(&a),
-	       BCH_ALLOC_V4_NEED_INC_GEN(&a));
-	prt_printf(out, " dirty_sectors %u",	a.dirty_sectors);
-	prt_printf(out, " cached_sectors %u",	a.cached_sectors);
-	prt_printf(out, " stripe %u",		a.stripe);
-	prt_printf(out, " stripe_redundancy %u",	a.stripe_redundancy);
-	prt_printf(out, " read_time %llu",		a.io_time[READ]);
-	prt_printf(out, " write_time %llu",		a.io_time[WRITE]);
+	prt_printf(out, "gen %u oldest_gen %u data_type %s",
+	       a->gen, a->oldest_gen, bch2_data_types[a->data_type]);
+	prt_newline(out);
+	prt_printf(out, "journal_seq       %llu",	a->journal_seq);
+	prt_newline(out);
+	prt_printf(out, "need_discard      %llu",	BCH_ALLOC_V4_NEED_DISCARD(a));
+	prt_newline(out);
+	prt_printf(out, "need_inc_gen      %llu",	BCH_ALLOC_V4_NEED_INC_GEN(a));
+	prt_newline(out);
+	prt_printf(out, "dirty_sectors     %u",	a->dirty_sectors);
+	prt_newline(out);
+	prt_printf(out, "cached_sectors    %u",	a->cached_sectors);
+	prt_newline(out);
+	prt_printf(out, "stripe            %u",	a->stripe);
+	prt_newline(out);
+	prt_printf(out, "stripe_redundancy %u",	a->stripe_redundancy);
+	prt_newline(out);
+	prt_printf(out, "io_time[READ]     %llu",	a->io_time[READ]);
+	prt_newline(out);
+	prt_printf(out, "io_time[WRITE]    %llu",	a->io_time[WRITE]);
+	prt_newline(out);
+	prt_printf(out, "backpointers:     %llu",	BCH_ALLOC_V4_NR_BACKPOINTERS(a));
+
+	printbuf_indent_sub(out, 2);
+}
+
+static inline void *alloc_v4_backpointers(struct bch_alloc_v4 *a)
+{
+	return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+}
+
+void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
+{
+	if (k.k->type == KEY_TYPE_alloc_v4) {
+		void *src, *dst;
+
+		*out = *bkey_s_c_to_alloc_v4(k).v;
+
+		src = alloc_v4_backpointers(out);
+		SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+		dst = alloc_v4_backpointers(out);
+
+		if (src < dst)
+			memset(src, 0, dst - src);
+	} else {
+		struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
+
+		*out = (struct bch_alloc_v4) {
+			.journal_seq		= u.journal_seq,
+			.flags			= u.need_discard,
+			.gen			= u.gen,
+			.oldest_gen		= u.oldest_gen,
+			.data_type		= u.data_type,
+			.stripe_redundancy	= u.stripe_redundancy,
+			.dirty_sectors		= u.dirty_sectors,
+			.cached_sectors		= u.cached_sectors,
+			.io_time[READ]		= u.read_time,
+			.io_time[WRITE]		= u.write_time,
+			.stripe			= u.stripe,
+		};
+
+		SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+	}
+}
+
+static noinline struct bkey_i_alloc_v4 *
+__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+	struct bkey_i_alloc_v4 *ret;
+
+	if (k.k->type == KEY_TYPE_alloc_v4) {
+		unsigned bytes = min(sizeof(struct bkey_i_alloc_v4), bkey_bytes(k.k));
+		void *src, *dst;
+
+		ret = bch2_trans_kmalloc(trans, bytes);
+		if (IS_ERR(ret))
+			return ret;
+
+		bkey_reassemble(&ret->k_i, k);
+
+		src = alloc_v4_backpointers(&ret->v);
+		SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
+		dst = alloc_v4_backpointers(&ret->v);
+
+		if (src < dst)
+			memset(src, 0, dst - src);
+		set_alloc_v4_u64s(ret);
+	} else {
+		ret = bch2_trans_kmalloc(trans, sizeof(*ret));
+		if (!IS_ERR(ret)) {
+			bkey_alloc_v4_init(&ret->k_i);
+			ret->k.p = k.k->p;
+			bch2_alloc_to_v4(k, &ret->v);
+		}
+	}
+	return ret;
+}
+
+static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
+{
+	if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
+	    BCH_ALLOC_V4_BACKPOINTERS_START(bkey_s_c_to_alloc_v4(k).v) == BCH_ALLOC_V4_U64s) {
+		struct bkey_i_alloc_v4 *ret =
+			bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k));
+		if (!IS_ERR(ret))
+			bkey_reassemble(&ret->k_i, k);
+		return ret;
+	}
+
+	return __bch2_alloc_to_v4_mut(trans, k);
+}
+
+struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
+{
+	return bch2_alloc_to_v4_mut_inlined(trans, k);
+}
+
+struct bkey_i_alloc_v4 *
+bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter *iter,
+			      struct bpos pos)
+{
+	struct bkey_s_c k;
+	struct bkey_i_alloc_v4 *a;
+	int ret;
+
+	bch2_trans_iter_init(trans, iter, BTREE_ID_alloc, pos,
+			     BTREE_ITER_WITH_UPDATES|
+			     BTREE_ITER_CACHED|
+			     BTREE_ITER_INTENT);
+	k = bch2_btree_iter_peek_slot(iter);
+	ret = bkey_err(k);
+	if (unlikely(ret))
+		goto err;
+
+	a = bch2_alloc_to_v4_mut_inlined(trans, k);
+	ret = PTR_ERR_OR_ZERO(a);
+	if (unlikely(ret))
+		goto err;
+	return a;
+err:
+	bch2_trans_iter_exit(trans, iter);
+	return ERR_PTR(ret);
 }
 
 int bch2_alloc_read(struct bch_fs *c)
@@ -455,9 +515,8 @@ int bch2_alloc_read(struct bch_fs *c)
 			continue;
 
 		ca = bch_dev_bkey_exists(c, k.k->p.inode);
-		bch2_alloc_to_v4(k, &a);
 
-		*bucket_gen(ca, k.k->p.offset) = a.gen;
+		*bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
 	}
 	bch2_trans_iter_exit(&trans, &iter);
 
@@ -546,7 +605,8 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
 			  unsigned flags)
 {
 	struct bch_fs *c = trans->c;
-	struct bch_alloc_v4 old_a, *new_a;
+	struct bch_alloc_v4 old_a_convert, *new_a;
+	const struct bch_alloc_v4 *old_a;
 	u64 old_lru, new_lru;
 	int ret = 0;
 
@@ -556,13 +616,13 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
 	 */
 	BUG_ON(new->k.type != KEY_TYPE_alloc_v4);
 
-	bch2_alloc_to_v4(old, &old_a);
+	old_a = bch2_alloc_to_v4(old, &old_a_convert);
 	new_a = &bkey_i_to_alloc_v4(new)->v;
 
 	new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
 
-	if (new_a->dirty_sectors > old_a.dirty_sectors ||
-	    new_a->cached_sectors > old_a.cached_sectors) {
+	if (new_a->dirty_sectors > old_a->dirty_sectors ||
+	    new_a->cached_sectors > old_a->cached_sectors) {
 		new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
 		new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
 		SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
@@ -576,10 +636,10 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
 		SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
 	}
 
-	if (old_a.data_type != new_a->data_type ||
+	if (old_a->data_type != new_a->data_type ||
 	    (new_a->data_type == BCH_DATA_free &&
-	     alloc_freespace_genbits(old_a) != alloc_freespace_genbits(*new_a))) {
-		ret =   bch2_bucket_do_index(trans, old, &old_a, false) ?:
+	     alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
+		ret =   bch2_bucket_do_index(trans, old, old_a, false) ?:
 			bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true);
 		if (ret)
 			return ret;
@@ -589,7 +649,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
 	    !new_a->io_time[READ])
 		new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
 
-	old_lru = alloc_lru_idx(old_a);
+	old_lru = alloc_lru_idx(*old_a);
 	new_lru = alloc_lru_idx(*new_a);
 
 	if (old_lru != new_lru) {
@@ -612,7 +672,8 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
 {
 	struct bch_fs *c = trans->c;
 	struct bch_dev *ca;
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a;
 	unsigned discard_key_type, freespace_key_type;
 	struct bkey_s_c alloc_k, k;
 	struct printbuf buf = PRINTBUF;
@@ -637,15 +698,15 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
 	if (!ca->mi.freespace_initialized)
 		return 0;
 
-	bch2_alloc_to_v4(alloc_k, &a);
+	a = bch2_alloc_to_v4(alloc_k, &a_convert);
 
-	discard_key_type = a.data_type == BCH_DATA_need_discard
+	discard_key_type = a->data_type == BCH_DATA_need_discard
 		? KEY_TYPE_set : 0;
-	freespace_key_type = a.data_type == BCH_DATA_free
+	freespace_key_type = a->data_type == BCH_DATA_free
 		? KEY_TYPE_set : 0;
 
 	bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p);
-	bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, a));
+	bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
 
 	k = bch2_btree_iter_peek_slot(discard_iter);
 	ret = bkey_err(k);
@@ -716,7 +777,8 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
 	struct bch_fs *c = trans->c;
 	struct btree_iter alloc_iter;
 	struct bkey_s_c alloc_k;
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a;
 	u64 genbits;
 	struct bpos pos;
 	enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
@@ -741,16 +803,16 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
 	if (ret)
 		goto err;
 
-	bch2_alloc_to_v4(alloc_k, &a);
+	a = bch2_alloc_to_v4(alloc_k, &a_convert);
 
-	if (fsck_err_on(a.data_type != state ||
+	if (fsck_err_on(a->data_type != state ||
 			(state == BCH_DATA_free &&
-			 genbits != alloc_freespace_genbits(a)), c,
+			 genbits != alloc_freespace_genbits(*a)), c,
 			"%s\n  incorrectly set in %s index (free %u, genbits %llu should be %llu)",
 			(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
 			bch2_btree_ids[iter->btree_id],
-			a.data_type == state,
-			genbits >> 56, alloc_freespace_genbits(a) >> 56))
+			a->data_type == state,
+			genbits >> 56, alloc_freespace_genbits(*a) >> 56))
 		goto delete;
 out:
 err:
@@ -818,7 +880,8 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
 {
 	struct bch_fs *c = trans->c;
 	struct btree_iter lru_iter;
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a;
 	struct bkey_s_c alloc_k, k;
 	struct printbuf buf = PRINTBUF;
 	struct printbuf buf2 = PRINTBUF;
@@ -832,20 +895,20 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
 	if (ret)
 		return ret;
 
-	bch2_alloc_to_v4(alloc_k, &a);
+	a = bch2_alloc_to_v4(alloc_k, &a_convert);
 
-	if (a.data_type != BCH_DATA_cached)
+	if (a->data_type != BCH_DATA_cached)
 		return 0;
 
 	bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
-			     POS(alloc_k.k->p.inode, a.io_time[READ]), 0);
+			     POS(alloc_k.k->p.inode, a->io_time[READ]), 0);
 
 	k = bch2_btree_iter_peek_slot(&lru_iter);
 	ret = bkey_err(k);
 	if (ret)
 		goto err;
 
-	if (fsck_err_on(!a.io_time[READ], c,
+	if (fsck_err_on(!a->io_time[READ], c,
 			"cached bucket with read_time 0\n"
 			"  %s",
 		(printbuf_reset(&buf),
@@ -858,26 +921,24 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
 			(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
-		u64 read_time = a.io_time[READ];
-
-		if (!a.io_time[READ])
-			a.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+		u64 read_time = a->io_time[READ] ?:
+			atomic64_read(&c->io_clock[READ].now);
 
 		ret = bch2_lru_set(trans,
 				   alloc_k.k->p.inode,
 				   alloc_k.k->p.offset,
-				   &a.io_time[READ]);
+				   &read_time);
 		if (ret)
 			goto err;
 
-		if (a.io_time[READ] != read_time) {
+		if (a->io_time[READ] != read_time) {
 			struct bkey_i_alloc_v4 *a_mut =
 				bch2_alloc_to_v4_mut(trans, alloc_k);
 			ret = PTR_ERR_OR_ZERO(a_mut);
 			if (ret)
 				goto err;
 
-			a_mut->v.io_time[READ] = a.io_time[READ];
+			a_mut->v.io_time[READ] = read_time;
 			ret = bch2_trans_update(trans, alloc_iter,
 						&a_mut->k_i, BTREE_TRIGGER_NORUN);
 			if (ret)
@@ -1182,13 +1243,14 @@ void bch2_do_invalidates(struct bch_fs *c)
 static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter,
 				 struct bkey_s_c k, struct bch_dev *ca)
 {
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a;
 
 	if (iter->pos.offset >= ca->mi.nbuckets)
 		return 1;
 
-	bch2_alloc_to_v4(k, &a);
-	return bch2_bucket_do_index(trans, k, &a, true);
+	a = bch2_alloc_to_v4(k, &a_convert);
+	return bch2_bucket_do_index(trans, k, a, true);
 }
 
 static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 318beb5..c562aff 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -70,16 +70,46 @@ static inline struct bpos alloc_freespace_pos(struct bpos pos, struct bch_alloc_
 	return pos;
 }
 
+static inline unsigned alloc_v4_u64s(const struct bch_alloc_v4 *a)
+{
+	unsigned ret = (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+			BCH_ALLOC_V4_U64s_V0);
+
+	BUG_ON(ret > U8_MAX - BKEY_U64s);
+	return ret;
+}
+
+static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
+{
+	set_bkey_val_u64s(&a->k, alloc_v4_u64s(&a->v));
+}
+
 struct bkey_i_alloc_v4 *
 bch2_trans_start_alloc_update(struct btree_trans *, struct btree_iter *, struct bpos);
 
-void bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
+void __bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);
+
+static inline const struct bch_alloc_v4 *bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *convert)
+{
+	const struct bch_alloc_v4 *ret;
+
+	if (unlikely(k.k->type != KEY_TYPE_alloc_v4))
+		goto slowpath;
+
+	ret = bkey_s_c_to_alloc_v4(k).v;
+	if (BCH_ALLOC_V4_BACKPOINTERS_START(ret) != BCH_ALLOC_V4_U64s)
+		goto slowpath;
+
+	return ret;
+slowpath:
+	__bch2_alloc_to_v4(k, convert);
+	return convert;
+}
+
 struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s_c);
 
 int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
 
-#define ALLOC_SCAN_BATCH(ca)		max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
-
 int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
 int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
 int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 471ae15..02a61df 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -204,7 +204,7 @@ static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
 static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
 					      u64 bucket,
 					      enum alloc_reserve reserve,
-					      struct bch_alloc_v4 *a,
+					      const struct bch_alloc_v4 *a,
 					      struct bucket_alloc_state *s,
 					      struct closure *cl)
 {
@@ -289,7 +289,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
 	struct btree_iter iter = { NULL };
 	struct bkey_s_c k;
 	struct open_bucket *ob;
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a;
 	u64 b = free_entry & ~(~0ULL << 56);
 	unsigned genbits = free_entry >> 56;
 	struct printbuf buf = PRINTBUF;
@@ -313,12 +314,12 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
 		goto err;
 	}
 
-	bch2_alloc_to_v4(k, &a);
+	a = bch2_alloc_to_v4(k, &a_convert);
 
-	if (genbits != (alloc_freespace_genbits(a) >> 56)) {
+	if (genbits != (alloc_freespace_genbits(*a) >> 56)) {
 		prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
 		       "  freespace key ",
-		       genbits, alloc_freespace_genbits(a) >> 56);
+		       genbits, alloc_freespace_genbits(*a) >> 56);
 		bch2_bkey_val_to_text(&buf, c, freespace_k);
 		prt_printf(&buf, "\n  ");
 		bch2_bkey_val_to_text(&buf, c, k);
@@ -328,7 +329,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
 
 	}
 
-	if (a.data_type != BCH_DATA_free) {
+	if (a->data_type != BCH_DATA_free) {
 		prt_printf(&buf, "non free bucket in freespace btree\n"
 		       "  freespace key ");
 		bch2_bkey_val_to_text(&buf, c, freespace_k);
@@ -339,7 +340,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
 		goto err;
 	}
 
-	ob = __try_alloc_bucket(c, ca, b, reserve, &a, s, cl);
+	ob = __try_alloc_bucket(c, ca, b, reserve, a, s, cl);
 	if (!ob)
 		iter.path->preserve = false;
 err:
@@ -397,7 +398,8 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
 again:
 	for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
 			   BTREE_ITER_SLOTS, k, ret) {
-		struct bch_alloc_v4 a;
+		struct bch_alloc_v4 a_convert;
+		const struct bch_alloc_v4 *a;
 
 		if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
 			break;
@@ -406,14 +408,14 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
 		    is_superblock_bucket(ca, k.k->p.offset))
 			continue;
 
-		bch2_alloc_to_v4(k, &a);
+		a = bch2_alloc_to_v4(k, &a_convert);
 
-		if (a.data_type != BCH_DATA_free)
+		if (a->data_type != BCH_DATA_free)
 			continue;
 
 		s->buckets_seen++;
 
-		ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, reserve, &a, s, cl);
+		ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, reserve, a, s, cl);
 		if (ob)
 			break;
 	}
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 6ee9321..d96efc8 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -934,6 +934,9 @@ struct bch_alloc_v4 {
 	struct bpos		backpointers[0];
 } __packed __aligned(8);
 
+#define BCH_ALLOC_V4_U64s_V0	6
+#define BCH_ALLOC_V4_U64s	(sizeof(struct bch_alloc_v4) / sizeof(u64))
+
 LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags,  0,  1)
 LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags,  1,  2)
 
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index e43ccf8..959f408 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1351,15 +1351,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 	struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
 	struct bucket gc, *b;
 	struct bkey_i_alloc_v4 *a;
-	struct bch_alloc_v4 old, new;
+	struct bch_alloc_v4 old_convert, new;
+	const struct bch_alloc_v4 *old;
 	enum bch_data_type type;
 	int ret;
 
 	if (bkey_ge(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets)))
 		return 1;
 
-	bch2_alloc_to_v4(k, &old);
-	new = old;
+	old = bch2_alloc_to_v4(k, &old_convert);
+	new = *old;
 
 	percpu_down_read(&c->mark_lock);
 	b = gc_bucket(ca, iter->pos.offset);
@@ -1371,7 +1372,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 	type = __alloc_data_type(b->dirty_sectors,
 				 b->cached_sectors,
 				 b->stripe,
-				 old,
+				 *old,
 				 b->data_type);
 	if (b->data_type != type) {
 		struct bch_dev_usage *u;
@@ -1393,7 +1394,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 	    gc.data_type != BCH_DATA_btree)
 		return 0;
 
-	if (gen_after(old.gen, gc.gen))
+	if (gen_after(old->gen, gc.gen))
 		return 0;
 
 #define copy_bucket_field(_f)						\
@@ -1415,7 +1416,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 	copy_bucket_field(stripe);
 #undef copy_bucket_field
 
-	if (!bch2_alloc_v4_cmp(old, new))
+	if (!bch2_alloc_v4_cmp(*old, new))
 		return 0;
 
 	a = bch2_alloc_to_v4_mut(trans, k);
@@ -1473,7 +1474,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
 	struct btree_iter iter;
 	struct bkey_s_c k;
 	struct bucket *g;
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a;
 	unsigned i;
 	int ret;
 
@@ -1499,20 +1501,20 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
 		ca = bch_dev_bkey_exists(c, k.k->p.inode);
 		g = gc_bucket(ca, k.k->p.offset);
 
-		bch2_alloc_to_v4(k, &a);
+		a = bch2_alloc_to_v4(k, &a_convert);
 
 		g->gen_valid	= 1;
-		g->gen		= a.gen;
+		g->gen		= a->gen;
 
 		if (metadata_only &&
-		    (a.data_type == BCH_DATA_user ||
-		     a.data_type == BCH_DATA_cached ||
-		     a.data_type == BCH_DATA_parity)) {
-			g->data_type		= a.data_type;
-			g->dirty_sectors	= a.dirty_sectors;
-			g->cached_sectors	= a.cached_sectors;
-			g->stripe		= a.stripe;
-			g->stripe_redundancy	= a.stripe_redundancy;
+		    (a->data_type == BCH_DATA_user ||
+		     a->data_type == BCH_DATA_cached ||
+		     a->data_type == BCH_DATA_parity)) {
+			g->data_type		= a->data_type;
+			g->dirty_sectors	= a->dirty_sectors;
+			g->cached_sectors	= a->cached_sectors;
+			g->stripe		= a->stripe;
+			g->stripe_redundancy	= a->stripe_redundancy;
 		}
 	}
 	bch2_trans_iter_exit(&trans, &iter);
@@ -1913,13 +1915,12 @@ static int bch2_alloc_write_oldest_gen(struct btree_trans *trans, struct btree_i
 				       struct bkey_s_c k)
 {
 	struct bch_dev *ca = bch_dev_bkey_exists(trans->c, iter->pos.inode);
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
 	struct bkey_i_alloc_v4 *a_mut;
 	int ret;
 
-	bch2_alloc_to_v4(k, &a);
-
-	if (a.oldest_gen == ca->oldest_gen[iter->pos.offset])
+	if (a->oldest_gen == ca->oldest_gen[iter->pos.offset])
 		return 0;
 
 	a_mut = bch2_alloc_to_v4_mut(trans, k);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 6c9dcfd..1539873 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -490,8 +490,10 @@ int bch2_mark_alloc(struct btree_trans *trans,
 {
 	bool gc = flags & BTREE_TRIGGER_GC;
 	u64 journal_seq = trans->journal_res.seq;
+	u64 bucket_journal_seq;
 	struct bch_fs *c = trans->c;
-	struct bch_alloc_v4 old_a, new_a;
+	struct bch_alloc_v4 old_a_convert, new_a_convert;
+	const struct bch_alloc_v4 *old_a, *new_a;
 	struct bch_dev *ca;
 	int ret = 0;
 
@@ -508,36 +510,38 @@ int bch2_mark_alloc(struct btree_trans *trans,
 
 	ca = bch_dev_bkey_exists(c, new.k->p.inode);
 
-	bch2_alloc_to_v4(old, &old_a);
-	bch2_alloc_to_v4(new, &new_a);
+	old_a = bch2_alloc_to_v4(old, &old_a_convert);
+	new_a = bch2_alloc_to_v4(new, &new_a_convert);
+
+	bucket_journal_seq = new_a->journal_seq;
 
 	if ((flags & BTREE_TRIGGER_INSERT) &&
-	    data_type_is_empty(old_a.data_type) !=
-	    data_type_is_empty(new_a.data_type) &&
+	    data_type_is_empty(old_a->data_type) !=
+	    data_type_is_empty(new_a->data_type) &&
 	    new.k->type == KEY_TYPE_alloc_v4) {
 		struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
 
-		BUG_ON(!journal_seq);
+		EBUG_ON(!journal_seq);
 
 		/*
 		 * If the btree updates referring to a bucket weren't flushed
 		 * before the bucket became empty again, then the we don't have
 		 * to wait on a journal flush before we can reuse the bucket:
 		 */
-		new_a.journal_seq = data_type_is_empty(new_a.data_type) &&
+		v->journal_seq = bucket_journal_seq =
+			data_type_is_empty(new_a->data_type) &&
 			(journal_seq == v->journal_seq ||
 			 bch2_journal_noflush_seq(&c->journal, v->journal_seq))
 			? 0 : journal_seq;
-		v->journal_seq = new_a.journal_seq;
 	}
 
-	if (!data_type_is_empty(old_a.data_type) &&
-	    data_type_is_empty(new_a.data_type) &&
-	    new_a.journal_seq) {
+	if (!data_type_is_empty(old_a->data_type) &&
+	    data_type_is_empty(new_a->data_type) &&
+	    bucket_journal_seq) {
 		ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
 				c->journal.flushed_seq_ondisk,
 				new.k->p.inode, new.k->p.offset,
-				new_a.journal_seq);
+				bucket_journal_seq);
 		if (ret) {
 			bch2_fs_fatal_error(c,
 				"error setting bucket_needs_journal_commit: %i", ret);
@@ -546,10 +550,10 @@ int bch2_mark_alloc(struct btree_trans *trans,
 	}
 
 	percpu_down_read(&c->mark_lock);
-	if (!gc && new_a.gen != old_a.gen)
-		*bucket_gen(ca, new.k->p.offset) = new_a.gen;
+	if (!gc && new_a->gen != old_a->gen)
+		*bucket_gen(ca, new.k->p.offset) = new_a->gen;
 
-	bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
+	bch2_dev_usage_update(c, ca, *old_a, *new_a, journal_seq, gc);
 
 	if (gc) {
 		struct bucket *g = gc_bucket(ca, new.k->p.offset);
@@ -557,12 +561,12 @@ int bch2_mark_alloc(struct btree_trans *trans,
 		bucket_lock(g);
 
 		g->gen_valid		= 1;
-		g->gen			= new_a.gen;
-		g->data_type		= new_a.data_type;
-		g->stripe		= new_a.stripe;
-		g->stripe_redundancy	= new_a.stripe_redundancy;
-		g->dirty_sectors	= new_a.dirty_sectors;
-		g->cached_sectors	= new_a.cached_sectors;
+		g->gen			= new_a->gen;
+		g->data_type		= new_a->data_type;
+		g->stripe		= new_a->stripe;
+		g->stripe_redundancy	= new_a->stripe_redundancy;
+		g->dirty_sectors	= new_a->dirty_sectors;
+		g->cached_sectors	= new_a->cached_sectors;
 
 		bucket_unlock(g);
 	}
@@ -574,9 +578,9 @@ int bch2_mark_alloc(struct btree_trans *trans,
 	 */
 
 	if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
-	    old_a.cached_sectors) {
+	    old_a->cached_sectors) {
 		ret = update_cached_sectors(c, new, ca->dev_idx,
-					    -((s64) old_a.cached_sectors),
+					    -((s64) old_a->cached_sectors),
 					    journal_seq, gc);
 		if (ret) {
 			bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
@@ -585,20 +589,20 @@ int bch2_mark_alloc(struct btree_trans *trans,
 		}
 	}
 
-	if (new_a.data_type == BCH_DATA_free &&
-	    (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+	if (new_a->data_type == BCH_DATA_free &&
+	    (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
 		closure_wake_up(&c->freelist_wait);
 
-	if (new_a.data_type == BCH_DATA_need_discard &&
-	    (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+	if (new_a->data_type == BCH_DATA_need_discard &&
+	    (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
 		bch2_do_discards(c);
 
-	if (old_a.data_type != BCH_DATA_cached &&
-	    new_a.data_type == BCH_DATA_cached &&
+	if (old_a->data_type != BCH_DATA_cached &&
+	    new_a->data_type == BCH_DATA_cached &&
 	    should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
 		bch2_do_invalidates(c);
 
-	if (new_a.data_type == BCH_DATA_need_gc_gens)
+	if (new_a->data_type == BCH_DATA_need_gc_gens)
 		bch2_do_gc_gens(c);
 
 	return 0;
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 9b4ce27..1282186 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -140,7 +140,8 @@ static int bch2_check_lru_key(struct btree_trans *trans,
 	struct bch_fs *c = trans->c;
 	struct btree_iter iter;
 	struct bkey_s_c k;
-	struct bch_alloc_v4 a;
+	struct bch_alloc_v4 a_convert;
+	const struct bch_alloc_v4 *a;
 	struct printbuf buf1 = PRINTBUF;
 	struct printbuf buf2 = PRINTBUF;
 	struct bpos alloc_pos;
@@ -160,10 +161,10 @@ static int bch2_check_lru_key(struct btree_trans *trans,
 	if (ret)
 		goto err;
 
-	bch2_alloc_to_v4(k, &a);
+	a = bch2_alloc_to_v4(k, &a_convert);
 
-	if (fsck_err_on(a.data_type != BCH_DATA_cached ||
-			a.io_time[READ] != lru_k.k->p.offset, c,
+	if (fsck_err_on(a->data_type != BCH_DATA_cached ||
+			a->io_time[READ] != lru_k.k->p.offset, c,
 			"incorrect lru entry %s\n"
 			"  for %s",
 			(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 9c55a88..a04e233 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -117,7 +117,6 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
 	struct btree_trans trans;
 	struct btree_iter iter;
 	struct bkey_s_c k;
-	struct bch_alloc_v4 a;
 	int ret;
 
 	bch2_trans_init(&trans, c, 0, 0);
@@ -126,21 +125,23 @@ static int walk_buckets_to_copygc(struct bch_fs *c)
 			   BTREE_ITER_PREFETCH, k, ret) {
 		struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode);
 		struct copygc_heap_entry e;
+		struct bch_alloc_v4 a_convert;
+		const struct bch_alloc_v4 *a;
 
-		bch2_alloc_to_v4(k, &a);
+		a = bch2_alloc_to_v4(k, &a_convert);
 
-		if (a.data_type != BCH_DATA_user ||
-		    a.dirty_sectors >= ca->mi.bucket_size ||
+		if (a->data_type != BCH_DATA_user ||
+		    a->dirty_sectors >= ca->mi.bucket_size ||
 		    bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset))
 			continue;
 
 		e = (struct copygc_heap_entry) {
 			.dev		= iter.pos.inode,
-			.gen		= a.gen,
-			.replicas	= 1 + a.stripe_redundancy,
-			.fragmentation	= div_u64((u64) a.dirty_sectors * (1ULL << 31),
+			.gen		= a->gen,
+			.replicas	= 1 + a->stripe_redundancy,
+			.fragmentation	= div_u64((u64) a->dirty_sectors * (1ULL << 31),
 						  ca->mi.bucket_size),
-			.sectors	= a.dirty_sectors,
+			.sectors	= a->dirty_sectors,
 			.offset		= bucket_to_sector(ca, iter.pos.offset),
 		};
 		heap_add_or_replace(h, e, -fragmentation_cmp, NULL);