bcachefs: don't call bch2_bucket_seq_cleanup from journal_buf_switch

journal_buf_switch is called from the foreground when getting a journal
reservation and thus is somewhat latency sensitive;
bch2_bucket_seq_cleanup has to run infrequently but is a bit expensive
when it does run.

Call it from the journal write path instead, and punt the journal write
to worqueue context.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 57132c7..0c55cc9 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -719,7 +719,7 @@ struct bch_fs {
 
 	struct journal		journal;
 
-	unsigned		bucket_journal_seq;
+	u64			last_bucket_seq_cleanup;
 
 	/* The rest of this all shows up in sysfs */
 	atomic_long_t		read_realloc_races;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index f347c93..4a910f7 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -147,6 +147,7 @@ static void bch2_disk_reservations_verify(struct bch_fs *c, int flags) {}
  */
 void bch2_bucket_seq_cleanup(struct bch_fs *c)
 {
+	u64 journal_seq = atomic64_read(&c->journal.seq);
 	u16 last_seq_ondisk = c->journal.last_seq_ondisk;
 	struct bch_dev *ca;
 	struct bucket_array *buckets;
@@ -154,6 +155,12 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
 	struct bucket_mark m;
 	unsigned i;
 
+	if (journal_seq - c->last_bucket_seq_cleanup <
+	    (1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
+		return;
+
+	c->last_bucket_seq_cleanup = journal_seq;
+
 	for_each_member_device(ca, c, i) {
 		down_read(&ca->bucket_lock);
 		buckets = bucket_array(ca);
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index 5be9013..cad35a7 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -4,6 +4,8 @@
 
 #include "util.h"
 
+#define BUCKET_JOURNAL_SEQ_BITS		16
+
 struct bucket_mark {
 	union {
 	struct {
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 3878ceb..a83c45b 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -32,14 +32,8 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
 	    test_bit(JOURNAL_NEED_WRITE, &j->flags))
 		bch2_time_stats_update(j->delay_time,
 				       j->need_write_time);
-#if 0
-	closure_call(&j->io, bch2_journal_write, NULL, NULL);
-#else
-	/* Shut sparse up: */
-	closure_init(&j->io, NULL);
-	set_closure_fn(&j->io, bch2_journal_write, NULL);
-	bch2_journal_write(&j->io);
-#endif
+
+	closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
 }
 
 static void journal_pin_new_entry(struct journal *j, int count)
@@ -172,13 +166,6 @@ static enum {
 	cancel_delayed_work(&j->write_work);
 	spin_unlock(&j->lock);
 
-	if (c->bucket_journal_seq > 1 << 14) {
-		c->bucket_journal_seq = 0;
-		bch2_bucket_seq_cleanup(c);
-	}
-
-	c->bucket_journal_seq++;
-
 	/* ugh - might be called from __journal_res_get() under wait_event() */
 	__set_current_state(TASK_RUNNING);
 	bch2_journal_buf_put(j, old.idx, need_write_just_set);
@@ -943,6 +930,7 @@ void bch2_fs_journal_stop(struct journal *j)
 
 void bch2_fs_journal_start(struct journal *j)
 {
+	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct journal_seq_blacklist *bl;
 	u64 blacklist = 0;
 
@@ -964,6 +952,8 @@ void bch2_fs_journal_start(struct journal *j)
 	journal_pin_new_entry(j, 1);
 	bch2_journal_buf_init(j);
 
+	c->last_bucket_seq_cleanup = journal_cur_seq(j);
+
 	spin_unlock(&j->lock);
 
 	/*
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 320f4f2..d479d94 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1384,6 +1384,8 @@ void bch2_journal_write(struct closure *cl)
 	extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr)
 		ptr->offset += sectors;
 
+	bch2_bucket_seq_cleanup(c);
+
 	continue_at(cl, journal_write_done, system_highpri_wq);
 	return;
 err: