Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _BCACHEFS_H |
| 3 | #define _BCACHEFS_H |
| 4 | |
| 5 | /* |
| 6 | * SOME HIGH LEVEL CODE DOCUMENTATION: |
| 7 | * |
| 8 | * Bcache mostly works with cache sets, cache devices, and backing devices. |
| 9 | * |
| 10 | * Support for multiple cache devices hasn't quite been finished off yet, but |
| 11 | * it's about 95% plumbed through. A cache set and its cache devices is sort of |
| 12 | * like a md raid array and its component devices. Most of the code doesn't care |
| 13 | * about individual cache devices, the main abstraction is the cache set. |
| 14 | * |
| 15 | * Multiple cache devices is intended to give us the ability to mirror dirty |
| 16 | * cached data and metadata, without mirroring clean cached data. |
| 17 | * |
| 18 | * Backing devices are different, in that they have a lifetime independent of a |
| 19 | * cache set. When you register a newly formatted backing device it'll come up |
| 20 | * in passthrough mode, and then you can attach and detach a backing device from |
| 21 | * a cache set at runtime - while it's mounted and in use. Detaching implicitly |
| 22 | * invalidates any cached data for that backing device. |
| 23 | * |
| 24 | * A cache set can have multiple (many) backing devices attached to it. |
| 25 | * |
| 26 | * There's also flash only volumes - this is the reason for the distinction |
| 27 | * between struct cached_dev and struct bcache_device. A flash only volume |
| 28 | * works much like a bcache device that has a backing device, except the |
| 29 | * "cached" data is always dirty. The end result is that we get thin |
| 30 | * provisioning with very little additional code. |
| 31 | * |
| 32 | * Flash only volumes work but they're not production ready because the moving |
| 33 | * garbage collector needs more work. More on that later. |
| 34 | * |
| 35 | * BUCKETS/ALLOCATION: |
| 36 | * |
| 37 | * Bcache is primarily designed for caching, which means that in normal |
| 38 | * operation all of our available space will be allocated. Thus, we need an |
| 39 | * efficient way of deleting things from the cache so we can write new things to |
| 40 | * it. |
| 41 | * |
| 42 | * To do this, we first divide the cache device up into buckets. A bucket is the |
| 43 | * unit of allocation; they're typically around 1 mb - anywhere from 128k to 2M+ |
| 44 | * works efficiently. |
| 45 | * |
| 46 | * Each bucket has a 16 bit priority, and an 8 bit generation associated with |
| 47 | * it. The gens and priorities for all the buckets are stored contiguously and |
| 48 | * packed on disk (in a linked list of buckets - aside from the superblock, all |
| 49 | * of bcache's metadata is stored in buckets). |
| 50 | * |
| 51 | * The priority is used to implement an LRU. We reset a bucket's priority when |
| 52 | * we allocate it or on cache it, and every so often we decrement the priority |
| 53 | * of each bucket. It could be used to implement something more sophisticated, |
| 54 | * if anyone ever gets around to it. |
| 55 | * |
| 56 | * The generation is used for invalidating buckets. Each pointer also has an 8 |
| 57 | * bit generation embedded in it; for a pointer to be considered valid, its gen |
| 58 | * must match the gen of the bucket it points into. Thus, to reuse a bucket all |
| 59 | * we have to do is increment its gen (and write its new gen to disk; we batch |
| 60 | * this up). |
| 61 | * |
| 62 | * Bcache is entirely COW - we never write twice to a bucket, even buckets that |
| 63 | * contain metadata (including btree nodes). |
| 64 | * |
| 65 | * THE BTREE: |
| 66 | * |
| 67 | * Bcache is in large part design around the btree. |
| 68 | * |
| 69 | * At a high level, the btree is just an index of key -> ptr tuples. |
| 70 | * |
| 71 | * Keys represent extents, and thus have a size field. Keys also have a variable |
| 72 | * number of pointers attached to them (potentially zero, which is handy for |
| 73 | * invalidating the cache). |
| 74 | * |
| 75 | * The key itself is an inode:offset pair. The inode number corresponds to a |
| 76 | * backing device or a flash only volume. The offset is the ending offset of the |
| 77 | * extent within the inode - not the starting offset; this makes lookups |
| 78 | * slightly more convenient. |
| 79 | * |
| 80 | * Pointers contain the cache device id, the offset on that device, and an 8 bit |
| 81 | * generation number. More on the gen later. |
| 82 | * |
| 83 | * Index lookups are not fully abstracted - cache lookups in particular are |
| 84 | * still somewhat mixed in with the btree code, but things are headed in that |
| 85 | * direction. |
| 86 | * |
| 87 | * Updates are fairly well abstracted, though. There are two different ways of |
| 88 | * updating the btree; insert and replace. |
| 89 | * |
| 90 | * BTREE_INSERT will just take a list of keys and insert them into the btree - |
| 91 | * overwriting (possibly only partially) any extents they overlap with. This is |
| 92 | * used to update the index after a write. |
| 93 | * |
| 94 | * BTREE_REPLACE is really cmpxchg(); it inserts a key into the btree iff it is |
| 95 | * overwriting a key that matches another given key. This is used for inserting |
| 96 | * data into the cache after a cache miss, and for background writeback, and for |
| 97 | * the moving garbage collector. |
| 98 | * |
| 99 | * There is no "delete" operation; deleting things from the index is |
| 100 | * accomplished by either by invalidating pointers (by incrementing a bucket's |
| 101 | * gen) or by inserting a key with 0 pointers - which will overwrite anything |
| 102 | * previously present at that location in the index. |
| 103 | * |
| 104 | * This means that there are always stale/invalid keys in the btree. They're |
| 105 | * filtered out by the code that iterates through a btree node, and removed when |
| 106 | * a btree node is rewritten. |
| 107 | * |
| 108 | * BTREE NODES: |
| 109 | * |
Kent Overstreet | 3e3e02e | 2022-10-19 18:31:33 -0400 | [diff] [blame] | 110 | * Our unit of allocation is a bucket, and we can't arbitrarily allocate and |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 111 | * free smaller than a bucket - so, that's how big our btree nodes are. |
| 112 | * |
| 113 | * (If buckets are really big we'll only use part of the bucket for a btree node |
| 114 | * - no less than 1/4th - but a bucket still contains no more than a single |
| 115 | * btree node. I'd actually like to change this, but for now we rely on the |
| 116 | * bucket's gen for deleting btree nodes when we rewrite/split a node.) |
| 117 | * |
| 118 | * Anyways, btree nodes are big - big enough to be inefficient with a textbook |
| 119 | * btree implementation. |
| 120 | * |
| 121 | * The way this is solved is that btree nodes are internally log structured; we |
| 122 | * can append new keys to an existing btree node without rewriting it. This |
| 123 | * means each set of keys we write is sorted, but the node is not. |
| 124 | * |
| 125 | * We maintain this log structure in memory - keeping 1Mb of keys sorted would |
| 126 | * be expensive, and we have to distinguish between the keys we have written and |
| 127 | * the keys we haven't. So to do a lookup in a btree node, we have to search |
| 128 | * each sorted set. But we do merge written sets together lazily, so the cost of |
| 129 | * these extra searches is quite low (normally most of the keys in a btree node |
| 130 | * will be in one big set, and then there'll be one or two sets that are much |
| 131 | * smaller). |
| 132 | * |
| 133 | * This log structure makes bcache's btree more of a hybrid between a |
| 134 | * conventional btree and a compacting data structure, with some of the |
| 135 | * advantages of both. |
| 136 | * |
| 137 | * GARBAGE COLLECTION: |
| 138 | * |
| 139 | * We can't just invalidate any bucket - it might contain dirty data or |
| 140 | * metadata. If it once contained dirty data, other writes might overwrite it |
| 141 | * later, leaving no valid pointers into that bucket in the index. |
| 142 | * |
| 143 | * Thus, the primary purpose of garbage collection is to find buckets to reuse. |
| 144 | * It also counts how much valid data it each bucket currently contains, so that |
| 145 | * allocation can reuse buckets sooner when they've been mostly overwritten. |
| 146 | * |
| 147 | * It also does some things that are really internal to the btree |
| 148 | * implementation. If a btree node contains pointers that are stale by more than |
| 149 | * some threshold, it rewrites the btree node to avoid the bucket's generation |
| 150 | * wrapping around. It also merges adjacent btree nodes if they're empty enough. |
| 151 | * |
| 152 | * THE JOURNAL: |
| 153 | * |
| 154 | * Bcache's journal is not necessary for consistency; we always strictly |
| 155 | * order metadata writes so that the btree and everything else is consistent on |
| 156 | * disk in the event of an unclean shutdown, and in fact bcache had writeback |
| 157 | * caching (with recovery from unclean shutdown) before journalling was |
| 158 | * implemented. |
| 159 | * |
| 160 | * Rather, the journal is purely a performance optimization; we can't complete a |
| 161 | * write until we've updated the index on disk, otherwise the cache would be |
| 162 | * inconsistent in the event of an unclean shutdown. This means that without the |
| 163 | * journal, on random write workloads we constantly have to update all the leaf |
| 164 | * nodes in the btree, and those writes will be mostly empty (appending at most |
| 165 | * a few keys each) - highly inefficient in terms of amount of metadata writes, |
| 166 | * and it puts more strain on the various btree resorting/compacting code. |
| 167 | * |
| 168 | * The journal is just a log of keys we've inserted; on startup we just reinsert |
| 169 | * all the keys in the open journal entries. That means that when we're updating |
| 170 | * a node in the btree, we can wait until a 4k block of keys fills up before |
| 171 | * writing them out. |
| 172 | * |
| 173 | * For simplicity, we only journal updates to leaf nodes; updates to parent |
| 174 | * nodes are rare enough (since our leaf nodes are huge) that it wasn't worth |
| 175 | * the complexity to deal with journalling them (in particular, journal replay) |
| 176 | * - updates to non leaf nodes just happen synchronously (see btree_split()). |
| 177 | */ |
| 178 | |
| 179 | #undef pr_fmt |
Kent Overstreet | 365f64f | 2022-01-04 00:06:49 -0500 | [diff] [blame] | 180 | #ifdef __KERNEL__ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 181 | #define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__ |
Kent Overstreet | 365f64f | 2022-01-04 00:06:49 -0500 | [diff] [blame] | 182 | #else |
| 183 | #define pr_fmt(fmt) "%s() " fmt "\n", __func__ |
| 184 | #endif |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 185 | |
| 186 | #include <linux/backing-dev-defs.h> |
| 187 | #include <linux/bug.h> |
| 188 | #include <linux/bio.h> |
| 189 | #include <linux/closure.h> |
| 190 | #include <linux/kobject.h> |
| 191 | #include <linux/list.h> |
Kent Overstreet | 1dd7f9d | 2019-04-04 21:53:12 -0400 | [diff] [blame] | 192 | #include <linux/math64.h> |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 193 | #include <linux/mutex.h> |
| 194 | #include <linux/percpu-refcount.h> |
| 195 | #include <linux/percpu-rwsem.h> |
Kent Overstreet | e06af20 | 2023-12-15 22:16:51 -0500 | [diff] [blame] | 196 | #include <linux/refcount.h> |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 197 | #include <linux/rhashtable.h> |
| 198 | #include <linux/rwsem.h> |
Kent Overstreet | ef1b209 | 2021-05-18 23:53:43 -0400 | [diff] [blame] | 199 | #include <linux/semaphore.h> |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 200 | #include <linux/seqlock.h> |
| 201 | #include <linux/shrinker.h> |
Kent Overstreet | 876c7af | 2020-11-15 16:30:22 -0500 | [diff] [blame] | 202 | #include <linux/srcu.h> |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 203 | #include <linux/types.h> |
| 204 | #include <linux/workqueue.h> |
| 205 | #include <linux/zstd.h> |
| 206 | |
| 207 | #include "bcachefs_format.h" |
Kent Overstreet | 1d16c60 | 2023-11-09 14:22:46 -0500 | [diff] [blame] | 208 | #include "disk_accounting_types.h" |
Kent Overstreet | fc6c01e | 2021-11-28 13:42:05 -0500 | [diff] [blame] | 209 | #include "errcode.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 210 | #include "fifo.h" |
Kent Overstreet | 350175b | 2022-12-14 20:52:11 -0500 | [diff] [blame] | 211 | #include "nocow_locking_types.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 212 | #include "opts.h" |
Kent Overstreet | d255426 | 2024-03-23 20:07:46 -0400 | [diff] [blame] | 213 | #include "recovery_passes_types.h" |
Kent Overstreet | f5d26fa | 2023-10-25 15:51:16 -0400 | [diff] [blame] | 214 | #include "sb-errors_types.h" |
Kent Overstreet | a5b696e | 2023-06-19 21:01:13 -0400 | [diff] [blame] | 215 | #include "seqmutex.h" |
Kent Overstreet | f1ca1ab | 2024-03-13 20:16:40 -0400 | [diff] [blame] | 216 | #include "time_stats.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 217 | #include "util.h" |
| 218 | |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 219 | #ifdef CONFIG_BCACHEFS_DEBUG |
| 220 | #define BCH_WRITE_REF_DEBUG |
| 221 | #endif |
| 222 | |
Kent Overstreet | b9fa375 | 2023-03-11 20:38:46 -0500 | [diff] [blame] | 223 | #ifndef dynamic_fault |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 224 | #define dynamic_fault(...) 0 |
Kent Overstreet | b9fa375 | 2023-03-11 20:38:46 -0500 | [diff] [blame] | 225 | #endif |
| 226 | |
| 227 | #define race_fault(...) dynamic_fault("bcachefs:race") |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 228 | |
Kent Overstreet | 7464403 | 2023-11-27 22:37:27 -0500 | [diff] [blame] | 229 | #define count_event(_c, _name) this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]) |
| 230 | |
Kent Overstreet | 674cfc2 | 2022-08-27 12:48:36 -0400 | [diff] [blame] | 231 | #define trace_and_count(_c, _name, ...) \ |
| 232 | do { \ |
Kent Overstreet | 7464403 | 2023-11-27 22:37:27 -0500 | [diff] [blame] | 233 | count_event(_c, _name); \ |
Kent Overstreet | 674cfc2 | 2022-08-27 12:48:36 -0400 | [diff] [blame] | 234 | trace_##_name(__VA_ARGS__); \ |
| 235 | } while (0) |
| 236 | |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 237 | #define bch2_fs_init_fault(name) \ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 238 | dynamic_fault("bcachefs:bch_fs_init:" name) |
| 239 | #define bch2_meta_read_fault(name) \ |
| 240 | dynamic_fault("bcachefs:meta:read:" name) |
| 241 | #define bch2_meta_write_fault(name) \ |
| 242 | dynamic_fault("bcachefs:meta:write:" name) |
| 243 | |
| 244 | #ifdef __KERNEL__ |
Kent Overstreet | b2d1d56 | 2022-11-13 20:01:42 -0500 | [diff] [blame] | 245 | #define BCACHEFS_LOG_PREFIX |
| 246 | #endif |
| 247 | |
| 248 | #ifdef BCACHEFS_LOG_PREFIX |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 249 | |
| 250 | #define bch2_log_msg(_c, fmt) "bcachefs (%s): " fmt, ((_c)->name) |
| 251 | #define bch2_fmt_dev(_ca, fmt) "bcachefs (%s): " fmt "\n", ((_ca)->name) |
| 252 | #define bch2_fmt_dev_offset(_ca, _offset, fmt) "bcachefs (%s sector %llu): " fmt "\n", ((_ca)->name), (_offset) |
| 253 | #define bch2_fmt_inum(_c, _inum, fmt) "bcachefs (%s inum %llu): " fmt "\n", ((_c)->name), (_inum) |
| 254 | #define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \ |
| 255 | "bcachefs (%s inum %llu offset %llu): " fmt "\n", ((_c)->name), (_inum), (_offset) |
| 256 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 257 | #else |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 258 | |
| 259 | #define bch2_log_msg(_c, fmt) fmt |
| 260 | #define bch2_fmt_dev(_ca, fmt) "%s: " fmt "\n", ((_ca)->name) |
| 261 | #define bch2_fmt_dev_offset(_ca, _offset, fmt) "%s sector %llu: " fmt "\n", ((_ca)->name), (_offset) |
| 262 | #define bch2_fmt_inum(_c, _inum, fmt) "inum %llu: " fmt "\n", (_inum) |
| 263 | #define bch2_fmt_inum_offset(_c, _inum, _offset, fmt) \ |
| 264 | "inum %llu offset %llu: " fmt "\n", (_inum), (_offset) |
| 265 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 266 | #endif |
| 267 | |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 268 | #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") |
| 269 | |
Kent Overstreet | 36008d5 | 2024-05-29 22:06:00 -0400 | [diff] [blame] | 270 | void bch2_print_str(struct bch_fs *, const char *); |
| 271 | |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 272 | __printf(2, 3) |
Kent Overstreet | b63570f | 2024-02-12 17:15:29 -0500 | [diff] [blame] | 273 | void bch2_print_opts(struct bch_opts *, const char *, ...); |
| 274 | |
| 275 | __printf(2, 3) |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 276 | void __bch2_print(struct bch_fs *c, const char *fmt, ...); |
| 277 | |
| 278 | #define maybe_dev_to_fs(_c) _Generic((_c), \ |
| 279 | struct bch_dev *: ((struct bch_dev *) (_c))->fs, \ |
| 280 | struct bch_fs *: (_c)) |
| 281 | |
| 282 | #define bch2_print(_c, ...) __bch2_print(maybe_dev_to_fs(_c), __VA_ARGS__) |
| 283 | |
| 284 | #define bch2_print_ratelimited(_c, ...) \ |
| 285 | do { \ |
| 286 | static DEFINE_RATELIMIT_STATE(_rs, \ |
| 287 | DEFAULT_RATELIMIT_INTERVAL, \ |
| 288 | DEFAULT_RATELIMIT_BURST); \ |
| 289 | \ |
| 290 | if (__ratelimit(&_rs)) \ |
| 291 | bch2_print(_c, __VA_ARGS__); \ |
| 292 | } while (0) |
| 293 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 294 | #define bch_info(c, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 295 | bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 296 | #define bch_notice(c, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 297 | bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 298 | #define bch_warn(c, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 299 | bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) |
Kent Overstreet | ac7f0d7 | 2019-04-03 20:38:37 -0400 | [diff] [blame] | 300 | #define bch_warn_ratelimited(c, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 301 | bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 302 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 303 | #define bch_err(c, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 304 | bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 305 | #define bch_err_dev(ca, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 306 | bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 307 | #define bch_err_dev_offset(ca, _offset, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 308 | bch2_print(c, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 309 | #define bch_err_inum(c, _inum, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 310 | bch2_print(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 311 | #define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 312 | bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) |
Kent Overstreet | 0fefe8d | 2020-12-03 13:57:22 -0500 | [diff] [blame] | 313 | |
Kent Overstreet | dfe9bfb | 2018-11-24 17:09:44 -0500 | [diff] [blame] | 314 | #define bch_err_ratelimited(c, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 315 | bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 316 | #define bch_err_dev_ratelimited(ca, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 317 | bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 318 | #define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 319 | bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev_offset(ca, _offset, fmt), ##__VA_ARGS__) |
Kent Overstreet | 0fefe8d | 2020-12-03 13:57:22 -0500 | [diff] [blame] | 320 | #define bch_err_inum_ratelimited(c, _inum, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 321 | bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum(c, _inum, fmt), ##__VA_ARGS__) |
Kent Overstreet | 7fec826 | 2022-11-15 20:25:08 -0500 | [diff] [blame] | 322 | #define bch_err_inum_offset_ratelimited(c, _inum, _offset, fmt, ...) \ |
Kent Overstreet | 2b41226d | 2023-12-04 20:15:23 -0500 | [diff] [blame] | 323 | bch2_print_ratelimited(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 324 | |
Kent Overstreet | cf904c8 | 2023-12-16 22:43:41 -0500 | [diff] [blame] | 325 | static inline bool should_print_err(int err) |
| 326 | { |
| 327 | return err && !bch2_err_matches(err, BCH_ERR_transaction_restart); |
| 328 | } |
| 329 | |
Kent Overstreet | 1bb3c2a | 2023-06-20 13:49:25 -0400 | [diff] [blame] | 330 | #define bch_err_fn(_c, _ret) \ |
Kent Overstreet | d2a990d | 2023-09-26 16:02:06 -0400 | [diff] [blame] | 331 | do { \ |
Kent Overstreet | cf904c8 | 2023-12-16 22:43:41 -0500 | [diff] [blame] | 332 | if (should_print_err(_ret)) \ |
Kent Overstreet | d2a990d | 2023-09-26 16:02:06 -0400 | [diff] [blame] | 333 | bch_err(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\ |
| 334 | } while (0) |
| 335 | |
Kent Overstreet | a7dc10c | 2023-12-19 18:08:19 -0500 | [diff] [blame] | 336 | #define bch_err_fn_ratelimited(_c, _ret) \ |
| 337 | do { \ |
| 338 | if (should_print_err(_ret)) \ |
| 339 | bch_err_ratelimited(_c, "%s(): error %s", __func__, bch2_err_str(_ret));\ |
| 340 | } while (0) |
| 341 | |
Kent Overstreet | e691b39 | 2023-08-06 10:04:05 -0400 | [diff] [blame] | 342 | #define bch_err_msg(_c, _ret, _msg, ...) \ |
Kent Overstreet | d2a990d | 2023-09-26 16:02:06 -0400 | [diff] [blame] | 343 | do { \ |
Kent Overstreet | cf904c8 | 2023-12-16 22:43:41 -0500 | [diff] [blame] | 344 | if (should_print_err(_ret)) \ |
Kent Overstreet | d2a990d | 2023-09-26 16:02:06 -0400 | [diff] [blame] | 345 | bch_err(_c, "%s(): error " _msg " %s", __func__, \ |
| 346 | ##__VA_ARGS__, bch2_err_str(_ret)); \ |
| 347 | } while (0) |
Kent Overstreet | 1bb3c2a | 2023-06-20 13:49:25 -0400 | [diff] [blame] | 348 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 349 | #define bch_verbose(c, fmt, ...) \ |
| 350 | do { \ |
Kent Overstreet | 0b847a1 | 2018-12-19 12:58:56 -0500 | [diff] [blame] | 351 | if ((c)->opts.verbose) \ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 352 | bch_info(c, fmt, ##__VA_ARGS__); \ |
| 353 | } while (0) |
| 354 | |
| 355 | #define pr_verbose_init(opts, fmt, ...) \ |
| 356 | do { \ |
Kent Overstreet | 0b847a1 | 2018-12-19 12:58:56 -0500 | [diff] [blame] | 357 | if (opt_get(opts, verbose)) \ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 358 | pr_info(fmt, ##__VA_ARGS__); \ |
| 359 | } while (0) |
| 360 | |
| 361 | /* Parameters that are useful for debugging, but should always be compiled in: */ |
| 362 | #define BCH_DEBUG_PARAMS_ALWAYS() \ |
| 363 | BCH_DEBUG_PARAM(key_merging_disabled, \ |
| 364 | "Disables merging of extents") \ |
Kent Overstreet | 5577881 | 2024-04-05 16:21:39 -0400 | [diff] [blame] | 365 | BCH_DEBUG_PARAM(btree_node_merging_disabled, \ |
| 366 | "Disables merging of btree nodes") \ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 367 | BCH_DEBUG_PARAM(btree_gc_always_rewrite, \ |
| 368 | "Causes mark and sweep to compact and rewrite every " \ |
| 369 | "btree node it traverses") \ |
| 370 | BCH_DEBUG_PARAM(btree_gc_rewrite_disabled, \ |
| 371 | "Disables rewriting of btree nodes during mark and sweep")\ |
| 372 | BCH_DEBUG_PARAM(btree_shrinker_disabled, \ |
Kent Overstreet | 6adaac0 | 2021-04-20 20:21:12 -0400 | [diff] [blame] | 373 | "Disables the shrinker callback for the btree node cache")\ |
| 374 | BCH_DEBUG_PARAM(verify_btree_ondisk, \ |
| 375 | "Reread btree nodes at various points to verify the " \ |
| 376 | "mergesort in the read path against modifications " \ |
Kent Overstreet | 1ce0cf5 | 2021-05-21 23:57:37 -0400 | [diff] [blame] | 377 | "done in memory") \ |
| 378 | BCH_DEBUG_PARAM(verify_all_btree_replicas, \ |
| 379 | "When reading btree nodes, read all replicas and " \ |
Kent Overstreet | a8c752b | 2022-03-17 20:51:27 -0400 | [diff] [blame] | 380 | "compare them") \ |
| 381 | BCH_DEBUG_PARAM(backpointers_no_use_write_buffer, \ |
| 382 | "Don't use the write buffer for backpointers, enabling "\ |
| 383 | "extra runtime checks") |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 384 | |
Kent Overstreet | a101957 | 2022-10-22 15:59:53 -0400 | [diff] [blame] | 385 | /* Parameters that should only be compiled in debug mode: */ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 386 | #define BCH_DEBUG_PARAMS_DEBUG() \ |
| 387 | BCH_DEBUG_PARAM(expensive_debug_checks, \ |
| 388 | "Enables various runtime debugging checks that " \ |
| 389 | "significantly affect performance") \ |
Kent Overstreet | f13f5a8 | 2019-03-28 01:51:47 -0400 | [diff] [blame] | 390 | BCH_DEBUG_PARAM(debug_check_iterators, \ |
| 391 | "Enables extra verification for btree iterators") \ |
Kent Overstreet | 692d403 | 2020-11-02 18:36:08 -0500 | [diff] [blame] | 392 | BCH_DEBUG_PARAM(debug_check_btree_accounting, \ |
| 393 | "Verify btree accounting for keys within a node") \ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 394 | BCH_DEBUG_PARAM(journal_seq_verify, \ |
| 395 | "Store the journal sequence number in the version " \ |
| 396 | "number of every btree key, and verify that btree " \ |
| 397 | "update ordering is preserved during recovery") \ |
| 398 | BCH_DEBUG_PARAM(inject_invalid_keys, \ |
| 399 | "Store the journal sequence number in the version " \ |
| 400 | "number of every btree key, and verify that btree " \ |
| 401 | "update ordering is preserved during recovery") \ |
Kent Overstreet | b29e197 | 2018-07-22 10:43:01 -0400 | [diff] [blame] | 402 | BCH_DEBUG_PARAM(test_alloc_startup, \ |
| 403 | "Force allocator startup to use the slowpath where it" \ |
| 404 | "can't find enough free buckets without invalidating" \ |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 405 | "cached data") \ |
| 406 | BCH_DEBUG_PARAM(force_reconstruct_read, \ |
| 407 | "Force reads to use the reconstruct path, when reading" \ |
Kent Overstreet | 6122ab6 | 2019-03-21 19:03:57 -0400 | [diff] [blame] | 408 | "from erasure coded extents") \ |
| 409 | BCH_DEBUG_PARAM(test_restart_gc, \ |
Kent Overstreet | ad7e137 | 2019-08-28 13:20:31 -0400 | [diff] [blame] | 410 | "Test restarting mark and sweep gc when bucket gens change") |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 411 | |
| 412 | #define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG() |
| 413 | |
| 414 | #ifdef CONFIG_BCACHEFS_DEBUG |
| 415 | #define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALL() |
| 416 | #else |
| 417 | #define BCH_DEBUG_PARAMS() BCH_DEBUG_PARAMS_ALWAYS() |
| 418 | #endif |
| 419 | |
Kent Overstreet | 29364f3 | 2020-11-02 18:20:44 -0500 | [diff] [blame] | 420 | #define BCH_DEBUG_PARAM(name, description) extern bool bch2_##name; |
| 421 | BCH_DEBUG_PARAMS() |
| 422 | #undef BCH_DEBUG_PARAM |
| 423 | |
| 424 | #ifndef CONFIG_BCACHEFS_DEBUG |
Kent Overstreet | 96dea3d | 2023-09-12 18:41:22 -0400 | [diff] [blame] | 425 | #define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name; |
Kent Overstreet | 29364f3 | 2020-11-02 18:20:44 -0500 | [diff] [blame] | 426 | BCH_DEBUG_PARAMS_DEBUG() |
| 427 | #undef BCH_DEBUG_PARAM |
| 428 | #endif |
| 429 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 430 | #define BCH_TIME_STATS() \ |
| 431 | x(btree_node_mem_alloc) \ |
Kent Overstreet | dc3b63d | 2019-03-21 16:28:57 -0400 | [diff] [blame] | 432 | x(btree_node_split) \ |
Kent Overstreet | 991ba021 | 2021-12-10 15:41:38 -0500 | [diff] [blame] | 433 | x(btree_node_compact) \ |
| 434 | x(btree_node_merge) \ |
Kent Overstreet | dc3b63d | 2019-03-21 16:28:57 -0400 | [diff] [blame] | 435 | x(btree_node_sort) \ |
| 436 | x(btree_node_read) \ |
Kent Overstreet | c72e4d7 | 2024-01-03 16:42:33 -0500 | [diff] [blame] | 437 | x(btree_node_read_done) \ |
Kent Overstreet | 991ba021 | 2021-12-10 15:41:38 -0500 | [diff] [blame] | 438 | x(btree_interior_update_foreground) \ |
| 439 | x(btree_interior_update_total) \ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 440 | x(btree_gc) \ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 441 | x(data_write) \ |
| 442 | x(data_read) \ |
| 443 | x(data_promote) \ |
Kent Overstreet | 991ba021 | 2021-12-10 15:41:38 -0500 | [diff] [blame] | 444 | x(journal_flush_write) \ |
| 445 | x(journal_noflush_write) \ |
Kent Overstreet | 49a6720 | 2019-03-18 13:42:10 -0400 | [diff] [blame] | 446 | x(journal_flush_seq) \ |
Kent Overstreet | 066a264 | 2023-11-09 22:07:42 -0500 | [diff] [blame] | 447 | x(blocked_journal_low_on_space) \ |
| 448 | x(blocked_journal_low_on_pin) \ |
| 449 | x(blocked_journal_max_in_flight) \ |
Kent Overstreet | 06a8693 | 2024-08-10 15:48:18 -0400 | [diff] [blame] | 450 | x(blocked_key_cache_flush) \ |
Kent Overstreet | 49a6720 | 2019-03-18 13:42:10 -0400 | [diff] [blame] | 451 | x(blocked_allocate) \ |
Kent Overstreet | a8b3a67 | 2022-11-02 17:12:00 -0400 | [diff] [blame] | 452 | x(blocked_allocate_open_bucket) \ |
Kent Overstreet | 09caeab | 2023-11-02 18:57:19 -0400 | [diff] [blame] | 453 | x(blocked_write_buffer_full) \ |
Kent Overstreet | a8b3a67 | 2022-11-02 17:12:00 -0400 | [diff] [blame] | 454 | x(nocow_lock_contended) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 455 | |
| 456 | enum bch_time_stats { |
| 457 | #define x(name) BCH_TIME_##name, |
| 458 | BCH_TIME_STATS() |
| 459 | #undef x |
| 460 | BCH_TIME_STAT_NR |
| 461 | }; |
| 462 | |
| 463 | #include "alloc_types.h" |
Kent Overstreet | 088d0de | 2024-05-27 18:40:50 -0400 | [diff] [blame] | 464 | #include "btree_gc_types.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 465 | #include "btree_types.h" |
Kent Overstreet | 4409b80 | 2024-03-11 23:11:46 -0400 | [diff] [blame] | 466 | #include "btree_node_scan_types.h" |
Kent Overstreet | 920e69b | 2023-01-04 00:00:50 -0500 | [diff] [blame] | 467 | #include "btree_write_buffer_types.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 468 | #include "buckets_types.h" |
Kent Overstreet | 21aec96 | 2022-01-04 22:32:09 -0500 | [diff] [blame] | 469 | #include "buckets_waiting_for_journal_types.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 470 | #include "clock_types.h" |
Kent Overstreet | 37707bb | 2023-10-22 10:58:38 -0400 | [diff] [blame] | 471 | #include "disk_groups_types.h" |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 472 | #include "ec_types.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 473 | #include "journal_types.h" |
| 474 | #include "keylist_types.h" |
| 475 | #include "quota_types.h" |
| 476 | #include "rebalance_types.h" |
Kent Overstreet | 7a92056 | 2018-10-30 14:14:19 -0400 | [diff] [blame] | 477 | #include "replicas_types.h" |
Kent Overstreet | d155272 | 2024-04-11 21:18:35 -0400 | [diff] [blame] | 478 | #include "sb-members_types.h" |
Kent Overstreet | 2027875 | 2021-10-11 12:03:19 -0400 | [diff] [blame] | 479 | #include "subvolume_types.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 480 | #include "super_types.h" |
Kent Overstreet | 96f37ea | 2023-12-31 10:04:54 -0500 | [diff] [blame] | 481 | #include "thread_with_file_types.h" |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 482 | |
| 483 | /* Number of nodes btree coalesce will try to coalesce at once */ |
| 484 | #define GC_MERGE_NODES 4U |
| 485 | |
| 486 | /* Maximum number of nodes we might need to allocate atomically: */ |
| 487 | #define BTREE_RESERVE_MAX (BTREE_MAX_DEPTH + (BTREE_MAX_DEPTH - 1)) |
| 488 | |
| 489 | /* Size of the freelist we allocate btree nodes from: */ |
Kent Overstreet | 7dd1ebf | 2020-06-15 17:38:26 -0400 | [diff] [blame] | 490 | #define BTREE_NODE_RESERVE (BTREE_RESERVE_MAX * 4) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 491 | |
Kent Overstreet | b030f69 | 2019-01-19 13:13:29 -0500 | [diff] [blame] | 492 | #define BTREE_NODE_OPEN_BUCKET_RESERVE (BTREE_RESERVE_MAX * BCH_REPLICAS_MAX) |
| 493 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 494 | struct btree; |
| 495 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 496 | struct io_count { |
| 497 | u64 sectors[2][BCH_DATA_NR]; |
| 498 | }; |
| 499 | |
Kent Overstreet | 64ee143 | 2024-06-23 00:53:44 -0400 | [diff] [blame] | 500 | struct discard_in_flight { |
| 501 | bool in_progress:1; |
| 502 | u64 bucket:63; |
| 503 | }; |
| 504 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 505 | struct bch_dev { |
| 506 | struct kobject kobj; |
Kent Overstreet | 552aa54 | 2024-05-03 18:07:40 -0400 | [diff] [blame] | 507 | #ifdef CONFIG_BCACHEFS_DEBUG |
| 508 | atomic_long_t ref; |
| 509 | bool dying; |
| 510 | unsigned long last_put; |
| 511 | #else |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 512 | struct percpu_ref ref; |
Kent Overstreet | 552aa54 | 2024-05-03 18:07:40 -0400 | [diff] [blame] | 513 | #endif |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 514 | struct completion ref_completion; |
| 515 | struct percpu_ref io_ref; |
| 516 | struct completion io_ref_completion; |
| 517 | |
| 518 | struct bch_fs *fs; |
| 519 | |
| 520 | u8 dev_idx; |
| 521 | /* |
| 522 | * Cached version of this device's member info from superblock |
| 523 | * Committed by bch2_write_super() -> bch_fs_mi_update() |
| 524 | */ |
| 525 | struct bch_member_cpu mi; |
Kent Overstreet | 94119ee | 2023-10-25 16:29:37 -0400 | [diff] [blame] | 526 | atomic64_t errors[BCH_MEMBER_ERROR_NR]; |
| 527 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 528 | __uuid_t uuid; |
| 529 | char name[BDEVNAME_SIZE]; |
| 530 | |
| 531 | struct bch_sb_handle disk_sb; |
Kent Overstreet | 03e183c | 2019-03-21 23:13:46 -0400 | [diff] [blame] | 532 | struct bch_sb *sb_read_scratch; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 533 | int sb_write_error; |
Kent Overstreet | eacb257 | 2022-01-02 21:45:35 -0500 | [diff] [blame] | 534 | dev_t dev; |
Kent Overstreet | a8b3a67 | 2022-11-02 17:12:00 -0400 | [diff] [blame] | 535 | atomic_t flush_seq; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 536 | |
| 537 | struct bch_devs_mask self; |
| 538 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 539 | /* |
| 540 | * Buckets: |
Kent Overstreet | 9166b41 | 2018-11-26 00:13:33 -0500 | [diff] [blame] | 541 | * Per-bucket arrays are protected by c->mark_lock, bucket_lock and |
Kent Overstreet | b0d3ab5 | 2024-06-13 17:07:36 -0400 | [diff] [blame] | 542 | * gc_gens_lock, for device resize - holding any is sufficient for |
| 543 | * access: Or rcu_read_lock(), but only for dev_ptr_stale(): |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 544 | */ |
Kent Overstreet | 2c6a7bf | 2024-08-24 11:38:21 -0400 | [diff] [blame] | 545 | GENRADIX(struct bucket) buckets_gc; |
Kent Overstreet | 80bf2f3 | 2022-02-06 19:20:36 -0500 | [diff] [blame] | 546 | struct bucket_gens __rcu *bucket_gens; |
Kent Overstreet | c45c866 | 2021-12-24 04:51:10 -0500 | [diff] [blame] | 547 | u8 *oldest_gen; |
Kent Overstreet | 8eb7f3e | 2018-11-19 01:16:07 -0500 | [diff] [blame] | 548 | unsigned long *buckets_nouse; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 549 | struct rw_semaphore bucket_lock; |
| 550 | |
Kent Overstreet | f5095b9 | 2024-01-01 19:42:37 -0500 | [diff] [blame] | 551 | struct bch_dev_usage __percpu *usage; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 552 | |
| 553 | /* Allocator: */ |
Kent Overstreet | 09943313 | 2021-12-24 04:22:20 -0500 | [diff] [blame] | 554 | u64 new_fs_bucket_idx; |
Kent Overstreet | c670509 | 2024-04-20 16:25:34 -0400 | [diff] [blame] | 555 | u64 alloc_cursor[3]; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 556 | |
Kent Overstreet | cb66fc5 | 2021-04-13 09:49:23 -0400 | [diff] [blame] | 557 | unsigned nr_open_buckets; |
Kent Overstreet | f25d821 | 2022-01-09 20:48:31 -0500 | [diff] [blame] | 558 | unsigned nr_btree_reserve; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 559 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 560 | size_t inc_gen_needs_gc; |
| 561 | size_t inc_gen_really_needs_gc; |
Kent Overstreet | 21aec96 | 2022-01-04 22:32:09 -0500 | [diff] [blame] | 562 | size_t buckets_waiting_on_journal; |
Kent Overstreet | 430735c | 2018-11-19 01:31:41 -0500 | [diff] [blame] | 563 | |
Kent Overstreet | 64ee143 | 2024-06-23 00:53:44 -0400 | [diff] [blame] | 564 | struct work_struct invalidate_work; |
| 565 | struct work_struct discard_work; |
| 566 | struct mutex discard_buckets_in_flight_lock; |
| 567 | DARRAY(struct discard_in_flight) discard_buckets_in_flight; |
| 568 | struct work_struct discard_fast_work; |
| 569 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 570 | atomic64_t rebalance_work; |
| 571 | |
| 572 | struct journal_device journal; |
Kent Overstreet | a28bd48 | 2021-01-29 13:58:10 -0500 | [diff] [blame] | 573 | u64 prev_journal_sector; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 574 | |
| 575 | struct work_struct io_error_work; |
| 576 | |
| 577 | /* The rest of this all shows up in sysfs */ |
| 578 | atomic64_t cur_latency[2]; |
Darrick J. Wong | 273960b | 2024-02-01 12:41:42 -0800 | [diff] [blame] | 579 | struct bch2_time_stats_quantiles io_latency[2]; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 580 | |
| 581 | #define CONGESTED_MAX 1024 |
| 582 | atomic_t congested; |
| 583 | u64 congested_last; |
| 584 | |
| 585 | struct io_count __percpu *io_done; |
| 586 | }; |
| 587 | |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 588 | /* |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 589 | * initial_gc_unfixed |
| 590 | * error |
| 591 | * topology error |
| 592 | */ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 593 | |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 594 | #define BCH_FS_FLAGS() \ |
Kent Overstreet | a292be3 | 2024-03-27 22:50:19 -0400 | [diff] [blame] | 595 | x(new_fs) \ |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 596 | x(started) \ |
Kent Overstreet | 1c0ee43 | 2024-09-26 16:19:58 -0400 | [diff] [blame] | 597 | x(clean_recovery) \ |
Kent Overstreet | 7773df1 | 2024-04-25 20:45:00 -0400 | [diff] [blame] | 598 | x(btree_running) \ |
Kent Overstreet | 5d9667d | 2023-11-17 00:23:07 -0500 | [diff] [blame] | 599 | x(accounting_replay_done) \ |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 600 | x(may_go_rw) \ |
| 601 | x(rw) \ |
| 602 | x(was_rw) \ |
| 603 | x(stopping) \ |
| 604 | x(emergency_ro) \ |
| 605 | x(going_ro) \ |
| 606 | x(write_disable_complete) \ |
| 607 | x(clean_shutdown) \ |
Kent Overstreet | d55ddf6 | 2023-12-31 19:41:45 -0500 | [diff] [blame] | 608 | x(fsck_running) \ |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 609 | x(initial_gc_unfixed) \ |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 610 | x(need_delete_dead_snapshots) \ |
| 611 | x(error) \ |
| 612 | x(topology_error) \ |
| 613 | x(errors_fixed) \ |
Thomas Bertschinger | 07f9a27 | 2024-05-09 12:37:24 -0600 | [diff] [blame] | 614 | x(errors_not_fixed) \ |
| 615 | x(no_invalid_checks) |
Kent Overstreet | 1cab5a8 | 2022-04-21 13:13:57 -0400 | [diff] [blame] | 616 | |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 617 | enum bch_fs_flags { |
| 618 | #define x(n) BCH_FS_##n, |
| 619 | BCH_FS_FLAGS() |
| 620 | #undef x |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 621 | }; |
| 622 | |
| 623 | struct btree_debug { |
| 624 | unsigned id; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 625 | }; |
| 626 | |
Kent Overstreet | 4aba7d4 | 2022-08-11 19:36:24 -0400 | [diff] [blame] | 627 | #define BCH_TRANSACTIONS_NR 128 |
| 628 | |
| 629 | struct btree_transaction_stats { |
Kent Overstreet | 89056f2 | 2023-12-23 22:43:33 -0500 | [diff] [blame] | 630 | struct bch2_time_stats duration; |
Kent Overstreet | 4aba7d4 | 2022-08-11 19:36:24 -0400 | [diff] [blame] | 631 | struct bch2_time_stats lock_hold_times; |
Kent Overstreet | 5c0bb66 | 2022-08-11 20:14:54 -0400 | [diff] [blame] | 632 | struct mutex lock; |
| 633 | unsigned nr_max_paths; |
Kent Overstreet | 24de63d | 2023-12-10 16:48:22 -0500 | [diff] [blame] | 634 | unsigned journal_entries_size; |
Kent Overstreet | 616928c | 2022-08-22 21:49:55 -0400 | [diff] [blame] | 635 | unsigned max_mem; |
Kent Overstreet | 5c0bb66 | 2022-08-11 20:14:54 -0400 | [diff] [blame] | 636 | char *max_paths_text; |
Daniel Hill | c807ca9 | 2022-07-14 20:33:09 +1200 | [diff] [blame] | 637 | }; |
| 638 | |
Kent Overstreet | 5663a41 | 2018-11-27 08:23:22 -0500 | [diff] [blame] | 639 | struct bch_fs_pcpu { |
| 640 | u64 sectors_available; |
| 641 | }; |
| 642 | |
Kent Overstreet | 1dd7f9d | 2019-04-04 21:53:12 -0400 | [diff] [blame] | 643 | struct journal_seq_blacklist_table { |
| 644 | size_t nr; |
| 645 | struct journal_seq_blacklist_table_entry { |
| 646 | u64 start; |
| 647 | u64 end; |
| 648 | bool dirty; |
Gustavo A. R. Silva | 274c2f8 | 2023-11-06 15:40:22 -0600 | [diff] [blame] | 649 | } entries[]; |
Kent Overstreet | 1dd7f9d | 2019-04-04 21:53:12 -0400 | [diff] [blame] | 650 | }; |
| 651 | |
Kent Overstreet | f1d786a | 2020-03-25 16:12:33 -0400 | [diff] [blame] | 652 | struct journal_keys { |
Kent Overstreet | 894d062 | 2024-02-24 00:15:56 -0500 | [diff] [blame] | 653 | /* must match layout in darray_types.h */ |
| 654 | size_t nr, size; |
Kent Overstreet | f1d786a | 2020-03-25 16:12:33 -0400 | [diff] [blame] | 655 | struct journal_key { |
Kent Overstreet | 1ffb876 | 2022-09-12 02:22:47 -0400 | [diff] [blame] | 656 | u64 journal_seq; |
| 657 | u32 journal_offset; |
Kent Overstreet | f1d786a | 2020-03-25 16:12:33 -0400 | [diff] [blame] | 658 | enum btree_id btree_id:8; |
| 659 | unsigned level:8; |
Kent Overstreet | 5b593ee | 2021-01-26 20:15:46 -0500 | [diff] [blame] | 660 | bool allocated; |
Kent Overstreet | dfd41fb | 2021-12-31 17:54:13 -0500 | [diff] [blame] | 661 | bool overwritten; |
Kent Overstreet | f1d786a | 2020-03-25 16:12:33 -0400 | [diff] [blame] | 662 | struct bkey_i *k; |
Kent Overstreet | 894d062 | 2024-02-24 00:15:56 -0500 | [diff] [blame] | 663 | } *data; |
Kent Overstreet | d1d7737 | 2022-04-04 01:09:26 -0400 | [diff] [blame] | 664 | /* |
| 665 | * Gap buffer: instead of all the empty space in the array being at the |
| 666 | * end of the buffer - from @nr to @size - the empty space is at @gap. |
| 667 | * This means that sequential insertions are O(n) instead of O(n^2). |
| 668 | */ |
| 669 | size_t gap; |
Kent Overstreet | 8a443d3 | 2023-11-17 23:13:49 -0500 | [diff] [blame] | 670 | atomic_t ref; |
| 671 | bool initial_ref_held; |
Kent Overstreet | f1d786a | 2020-03-25 16:12:33 -0400 | [diff] [blame] | 672 | }; |
| 673 | |
Kent Overstreet | 6bd68ec | 2023-09-12 17:16:02 -0400 | [diff] [blame] | 674 | struct btree_trans_buf { |
| 675 | struct btree_trans *trans; |
Kent Overstreet | 1a21bf9 | 2020-11-05 20:02:01 -0500 | [diff] [blame] | 676 | }; |
| 677 | |
Kent Overstreet | 14b393e | 2021-03-16 00:42:25 -0400 | [diff] [blame] | 678 | #define BCACHEFS_ROOT_SUBVOL_INUM \ |
| 679 | ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) |
| 680 | |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 681 | #define BCH_WRITE_REFS() \ |
| 682 | x(trans) \ |
| 683 | x(write) \ |
| 684 | x(promote) \ |
| 685 | x(node_rewrite) \ |
| 686 | x(stripe_create) \ |
| 687 | x(stripe_delete) \ |
| 688 | x(reflink) \ |
| 689 | x(fallocate) \ |
Kent Overstreet | 9e203c4 | 2024-04-13 00:26:01 -0400 | [diff] [blame] | 690 | x(fsync) \ |
| 691 | x(dio_write) \ |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 692 | x(discard) \ |
Kent Overstreet | a393f33 | 2024-02-16 01:08:25 -0500 | [diff] [blame] | 693 | x(discard_fast) \ |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 694 | x(invalidate) \ |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 695 | x(delete_dead_snapshots) \ |
Kent Overstreet | 10330402 | 2024-04-19 22:44:12 -0400 | [diff] [blame] | 696 | x(gc_gens) \ |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 697 | x(snapshot_delete_pagecache) \ |
Kent Overstreet | 183bcc8 | 2023-11-02 19:37:15 -0400 | [diff] [blame] | 698 | x(sysfs) \ |
| 699 | x(btree_write_buffer) |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 700 | |
| 701 | enum bch_write_ref { |
| 702 | #define x(n) BCH_WRITE_REF_##n, |
| 703 | BCH_WRITE_REFS() |
| 704 | #undef x |
| 705 | BCH_WRITE_REF_NR, |
| 706 | }; |
| 707 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 708 | struct bch_fs { |
| 709 | struct closure cl; |
| 710 | |
| 711 | struct list_head list; |
| 712 | struct kobject kobj; |
Daniel Hill | 104c697 | 2022-03-15 21:36:33 +1300 | [diff] [blame] | 713 | struct kobject counters_kobj; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 714 | struct kobject internal; |
| 715 | struct kobject opts_dir; |
| 716 | struct kobject time_stats; |
| 717 | unsigned long flags; |
| 718 | |
| 719 | int minor; |
| 720 | struct device *chardev; |
| 721 | struct super_block *vfs_sb; |
Kent Overstreet | ddc7dd6 | 2021-05-27 19:15:44 -0400 | [diff] [blame] | 722 | dev_t dev; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 723 | char name[40]; |
Kent Overstreet | 96f37ea | 2023-12-31 10:04:54 -0500 | [diff] [blame] | 724 | struct stdio_redirect *stdio; |
| 725 | struct task_struct *stdio_filter; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 726 | |
Kent Overstreet | 1ada160 | 2020-06-15 14:58:47 -0400 | [diff] [blame] | 727 | /* ro/rw, add/remove/resize devices: */ |
| 728 | struct rw_semaphore state_lock; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 729 | |
| 730 | /* Counts outstanding writes, for clean transition to read-only */ |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 731 | #ifdef BCH_WRITE_REF_DEBUG |
| 732 | atomic_long_t writes[BCH_WRITE_REF_NR]; |
| 733 | #else |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 734 | struct percpu_ref writes; |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 735 | #endif |
Kent Overstreet | 63508b7 | 2023-12-06 16:26:18 -0500 | [diff] [blame] | 736 | /* |
| 737 | * Analagous to c->writes, for asynchronous ops that don't necessarily |
| 738 | * need fs to be read-write |
| 739 | */ |
| 740 | refcount_t ro_ref; |
| 741 | wait_queue_head_t ro_ref_wait; |
| 742 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 743 | struct work_struct read_only_work; |
| 744 | |
| 745 | struct bch_dev __rcu *devs[BCH_SB_MEMBERS_MAX]; |
| 746 | |
Kent Overstreet | 2574e95 | 2024-06-06 13:25:28 -0400 | [diff] [blame] | 747 | struct bch_accounting_mem accounting; |
Kent Overstreet | 1d16c60 | 2023-11-09 14:22:46 -0500 | [diff] [blame] | 748 | |
Kent Overstreet | 73e6ab9 | 2018-12-01 10:32:48 -0500 | [diff] [blame] | 749 | struct bch_replicas_cpu replicas; |
| 750 | struct bch_replicas_cpu replicas_gc; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 751 | struct mutex replicas_gc_lock; |
| 752 | |
Kent Overstreet | 4b8f89a | 2021-02-03 13:10:55 -0500 | [diff] [blame] | 753 | struct journal_entry_res btree_root_journal_res; |
Kent Overstreet | 4b8f89a | 2021-02-03 13:10:55 -0500 | [diff] [blame] | 754 | struct journal_entry_res clock_journal_res; |
Kent Overstreet | 180fb49d | 2021-01-21 21:52:06 -0500 | [diff] [blame] | 755 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 756 | struct bch_disk_groups_cpu __rcu *disk_groups; |
| 757 | |
| 758 | struct bch_opts opts; |
| 759 | |
| 760 | /* Updated by bch2_sb_update():*/ |
| 761 | struct { |
| 762 | __uuid_t uuid; |
| 763 | __uuid_t user_uuid; |
| 764 | |
Kent Overstreet | 26609b6 | 2018-11-01 15:10:01 -0400 | [diff] [blame] | 765 | u16 version; |
Kent Overstreet | 84cc758 | 2021-03-21 16:03:23 -0400 | [diff] [blame] | 766 | u16 version_min; |
Kent Overstreet | 24964e1 | 2023-06-28 19:59:56 -0400 | [diff] [blame] | 767 | u16 version_upgrade_complete; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 768 | |
| 769 | u8 nr_devices; |
| 770 | u8 clean; |
| 771 | |
| 772 | u8 encryption_type; |
| 773 | |
| 774 | u64 time_base_lo; |
| 775 | u32 time_base_hi; |
Kent Overstreet | 595c1e9 | 2021-04-28 22:51:42 -0400 | [diff] [blame] | 776 | unsigned time_units_per_sec; |
| 777 | unsigned nsec_per_time_unit; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 778 | u64 features; |
Kent Overstreet | 1df42b5 | 2019-02-06 11:56:51 -0500 | [diff] [blame] | 779 | u64 compat; |
Kent Overstreet | 8d65b15 | 2024-09-26 15:30:17 -0400 | [diff] [blame] | 780 | unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; |
Kent Overstreet | 55936af | 2024-03-15 23:03:42 -0400 | [diff] [blame] | 781 | u64 btrees_lost_data; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 782 | } sb; |
| 783 | |
Kent Overstreet | 595c1e9 | 2021-04-28 22:51:42 -0400 | [diff] [blame] | 784 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 785 | struct bch_sb_handle disk_sb; |
| 786 | |
| 787 | unsigned short block_bits; /* ilog2(block_size) */ |
| 788 | |
| 789 | u16 btree_foreground_merge_threshold; |
| 790 | |
| 791 | struct closure sb_write; |
| 792 | struct mutex sb_lock; |
| 793 | |
Kent Overstreet | 14b393e | 2021-03-16 00:42:25 -0400 | [diff] [blame] | 794 | /* snapshot.c: */ |
Kent Overstreet | 8479938 | 2023-07-12 13:55:03 -0400 | [diff] [blame] | 795 | struct snapshot_table __rcu *snapshots; |
Kent Overstreet | 14b393e | 2021-03-16 00:42:25 -0400 | [diff] [blame] | 796 | struct mutex snapshot_table_lock; |
Kent Overstreet | 37fad94 | 2023-09-29 01:15:33 -0400 | [diff] [blame] | 797 | struct rw_semaphore snapshot_create_lock; |
Kent Overstreet | 8479938 | 2023-07-12 13:55:03 -0400 | [diff] [blame] | 798 | |
Kent Overstreet | 14b393e | 2021-03-16 00:42:25 -0400 | [diff] [blame] | 799 | struct work_struct snapshot_delete_work; |
Kent Overstreet | 2027875 | 2021-10-11 12:03:19 -0400 | [diff] [blame] | 800 | struct work_struct snapshot_wait_for_pagecache_and_delete_work; |
Kent Overstreet | 91d961b | 2022-03-29 15:48:45 -0400 | [diff] [blame] | 801 | snapshot_id_list snapshots_unlinked; |
Kent Overstreet | 2027875 | 2021-10-11 12:03:19 -0400 | [diff] [blame] | 802 | struct mutex snapshots_unlinked_lock; |
Kent Overstreet | 14b393e | 2021-03-16 00:42:25 -0400 | [diff] [blame] | 803 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 804 | /* BTREE CACHE */ |
| 805 | struct bio_set btree_bio; |
Kent Overstreet | 161f73c | 2024-06-05 11:08:20 -0400 | [diff] [blame] | 806 | struct workqueue_struct *btree_read_complete_wq; |
| 807 | struct workqueue_struct *btree_write_submit_wq; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 808 | |
Kent Overstreet | faa6cb6 | 2023-06-28 22:09:13 -0400 | [diff] [blame] | 809 | struct btree_root btree_roots_known[BTREE_ID_NR]; |
| 810 | DARRAY(struct btree_root) btree_roots_extra; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 811 | struct mutex btree_root_lock; |
| 812 | |
| 813 | struct btree_cache btree_cache; |
| 814 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 815 | /* |
| 816 | * Cache of allocated btree nodes - if we allocate a btree node and |
| 817 | * don't use it, if we free it that space can't be reused until going |
| 818 | * _all_ the way through the allocator (which exposes us to a livelock |
| 819 | * when allocating btree reserves fail halfway through) - instead, we |
| 820 | * can stick them here: |
| 821 | */ |
| 822 | struct btree_alloc btree_reserve_cache[BTREE_NODE_RESERVE * 2]; |
| 823 | unsigned btree_reserve_cache_nr; |
| 824 | struct mutex btree_reserve_cache_lock; |
| 825 | |
| 826 | mempool_t btree_interior_update_pool; |
| 827 | struct list_head btree_interior_update_list; |
Kent Overstreet | ac7c51b | 2020-02-08 16:39:37 -0500 | [diff] [blame] | 828 | struct list_head btree_interior_updates_unwritten; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 829 | struct mutex btree_interior_update_lock; |
| 830 | struct closure_waitlist btree_interior_update_wait; |
| 831 | |
Kent Overstreet | 00b8ccf | 2020-05-25 14:57:06 -0400 | [diff] [blame] | 832 | struct workqueue_struct *btree_interior_update_worker; |
| 833 | struct work_struct btree_interior_update_work; |
| 834 | |
Kent Overstreet | a0a466e | 2024-03-17 20:25:39 -0400 | [diff] [blame] | 835 | struct workqueue_struct *btree_node_rewrite_worker; |
| 836 | |
Kent Overstreet | a1f26d7 | 2023-02-11 12:57:04 -0500 | [diff] [blame] | 837 | struct list_head pending_node_rewrites; |
| 838 | struct mutex pending_node_rewrites_lock; |
| 839 | |
Kent Overstreet | 46fee69 | 2022-10-28 17:08:41 -0400 | [diff] [blame] | 840 | /* btree_io.c: */ |
| 841 | spinlock_t btree_write_error_lock; |
| 842 | struct btree_write_stats { |
| 843 | atomic64_t nr; |
| 844 | atomic64_t bytes; |
| 845 | } btree_write_stats[BTREE_WRITE_TYPE_NR]; |
| 846 | |
Kent Overstreet | 495aabe | 2020-06-02 16:36:11 -0400 | [diff] [blame] | 847 | /* btree_iter.c: */ |
Kent Overstreet | a5b696e | 2023-06-19 21:01:13 -0400 | [diff] [blame] | 848 | struct seqmutex btree_trans_lock; |
Kent Overstreet | 495aabe | 2020-06-02 16:36:11 -0400 | [diff] [blame] | 849 | struct list_head btree_trans_list; |
Kent Overstreet | 6bd68ec | 2023-09-12 17:16:02 -0400 | [diff] [blame] | 850 | mempool_t btree_trans_pool; |
Kent Overstreet | e131b6a | 2021-04-24 00:09:06 -0400 | [diff] [blame] | 851 | mempool_t btree_trans_mem_pool; |
Kent Overstreet | 6bd68ec | 2023-09-12 17:16:02 -0400 | [diff] [blame] | 852 | struct btree_trans_buf __percpu *btree_trans_bufs; |
Kent Overstreet | 581edb6 | 2018-08-08 21:22:46 -0400 | [diff] [blame] | 853 | |
Kent Overstreet | 876c7af | 2020-11-15 16:30:22 -0500 | [diff] [blame] | 854 | struct srcu_struct btree_trans_barrier; |
Kent Overstreet | 99fafb0 | 2021-12-20 18:18:35 -0500 | [diff] [blame] | 855 | bool btree_trans_barrier_initialized; |
Kent Overstreet | 876c7af | 2020-11-15 16:30:22 -0500 | [diff] [blame] | 856 | |
Kent Overstreet | 2ca88e5 | 2019-03-07 19:46:10 -0500 | [diff] [blame] | 857 | struct btree_key_cache btree_key_cache; |
Kent Overstreet | 7c8f6f9 | 2022-01-12 02:13:21 -0500 | [diff] [blame] | 858 | unsigned btree_key_cache_btrees; |
Kent Overstreet | 2ca88e5 | 2019-03-07 19:46:10 -0500 | [diff] [blame] | 859 | |
Kent Overstreet | 920e69b | 2023-01-04 00:00:50 -0500 | [diff] [blame] | 860 | struct btree_write_buffer btree_write_buffer; |
| 861 | |
Kent Overstreet | 731bdd2 | 2021-05-22 17:37:25 -0400 | [diff] [blame] | 862 | struct workqueue_struct *btree_update_wq; |
Kent Overstreet | 9f1833c | 2021-07-10 13:44:42 -0400 | [diff] [blame] | 863 | struct workqueue_struct *btree_io_complete_wq; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 864 | /* copygc needs its own workqueue for index updates.. */ |
| 865 | struct workqueue_struct *copygc_wq; |
Brian Foster | 8bff987 | 2023-03-23 14:09:05 -0400 | [diff] [blame] | 866 | /* |
| 867 | * Use a dedicated wq for write ref holder tasks. Required to avoid |
| 868 | * dependency problems with other wq tasks that can block on ref |
| 869 | * draining, such as read-only transition. |
| 870 | */ |
| 871 | struct workqueue_struct *write_ref_wq; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 872 | |
| 873 | /* ALLOCATION */ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 874 | struct bch_devs_mask rw_devs[BCH_DATA_NR]; |
Kent Overstreet | 83ccd9b | 2024-09-06 19:12:53 -0400 | [diff] [blame] | 875 | unsigned long rw_devs_change_count; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 876 | |
| 877 | u64 capacity; /* sectors */ |
Kent Overstreet | 26a170a | 2024-07-11 16:00:46 -0400 | [diff] [blame] | 878 | u64 reserved; /* sectors */ |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 879 | |
| 880 | /* |
| 881 | * When capacity _decreases_ (due to a disk being removed), we |
| 882 | * increment capacity_gen - this invalidates outstanding reservations |
| 883 | * and forces them to be revalidated |
| 884 | */ |
| 885 | u32 capacity_gen; |
Kent Overstreet | b092dad | 2018-11-04 21:55:35 -0500 | [diff] [blame] | 886 | unsigned bucket_size_max; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 887 | |
| 888 | atomic64_t sectors_available; |
Kent Overstreet | fca1223 | 2020-12-03 14:17:33 -0500 | [diff] [blame] | 889 | struct mutex sectors_available_lock; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 890 | |
Kent Overstreet | 5663a41 | 2018-11-27 08:23:22 -0500 | [diff] [blame] | 891 | struct bch_fs_pcpu __percpu *pcpu; |
Kent Overstreet | 9ca53b5 | 2018-07-23 05:32:01 -0400 | [diff] [blame] | 892 | |
Kent Overstreet | 5663a41 | 2018-11-27 08:23:22 -0500 | [diff] [blame] | 893 | struct percpu_rw_semaphore mark_lock; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 894 | |
Kent Overstreet | 5e82a9a | 2019-02-10 19:34:47 -0500 | [diff] [blame] | 895 | seqcount_t usage_lock; |
Kent Overstreet | 8bb8d68 | 2023-12-27 22:09:25 -0500 | [diff] [blame] | 896 | struct bch_fs_usage_base __percpu *usage; |
Kent Overstreet | 5e82a9a | 2019-02-10 19:34:47 -0500 | [diff] [blame] | 897 | u64 __percpu *online_reserved; |
Kent Overstreet | 4d8100d | 2019-03-15 18:20:46 -0400 | [diff] [blame] | 898 | |
Kent Overstreet | cecf727 | 2024-08-07 13:58:57 -0400 | [diff] [blame] | 899 | unsigned long allocator_last_stuck; |
| 900 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 901 | struct io_clock io_clock[2]; |
| 902 | |
Kent Overstreet | 1dd7f9d | 2019-04-04 21:53:12 -0400 | [diff] [blame] | 903 | /* JOURNAL SEQ BLACKLIST */ |
| 904 | struct journal_seq_blacklist_table * |
| 905 | journal_seq_blacklist_table; |
Kent Overstreet | 1dd7f9d | 2019-04-04 21:53:12 -0400 | [diff] [blame] | 906 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 907 | /* ALLOCATOR */ |
| 908 | spinlock_t freelist_lock; |
Kent Overstreet | 90541a7 | 2018-07-21 23:36:11 -0400 | [diff] [blame] | 909 | struct closure_waitlist freelist_wait; |
Kent Overstreet | 9ddffaf | 2021-12-25 21:43:29 -0500 | [diff] [blame] | 910 | |
Kent Overstreet | 374153c | 2020-06-09 15:44:03 -0400 | [diff] [blame] | 911 | open_bucket_idx_t open_buckets_freelist; |
| 912 | open_bucket_idx_t open_buckets_nr_free; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 913 | struct closure_waitlist open_buckets_wait; |
| 914 | struct open_bucket open_buckets[OPEN_BUCKETS_COUNT]; |
Kent Overstreet | 9ddffaf | 2021-12-25 21:43:29 -0500 | [diff] [blame] | 915 | open_bucket_idx_t open_buckets_hash[OPEN_BUCKETS_COUNT]; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 916 | |
Kent Overstreet | 39a1ea1 | 2023-02-25 00:32:34 -0500 | [diff] [blame] | 917 | open_bucket_idx_t open_buckets_partial[OPEN_BUCKETS_COUNT]; |
| 918 | open_bucket_idx_t open_buckets_partial_nr; |
| 919 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 920 | struct write_point btree_write_point; |
| 921 | struct write_point rebalance_write_point; |
| 922 | |
Kent Overstreet | b092dad | 2018-11-04 21:55:35 -0500 | [diff] [blame] | 923 | struct write_point write_points[WRITE_POINT_MAX]; |
| 924 | struct hlist_head write_points_hash[WRITE_POINT_HASH_NR]; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 925 | struct mutex write_points_hash_lock; |
Kent Overstreet | b092dad | 2018-11-04 21:55:35 -0500 | [diff] [blame] | 926 | unsigned write_points_nr; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 927 | |
Kent Overstreet | 21aec96 | 2022-01-04 22:32:09 -0500 | [diff] [blame] | 928 | struct buckets_waiting_for_journal buckets_waiting_for_journal; |
| 929 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 930 | /* GARBAGE COLLECTION */ |
Kent Overstreet | 10330402 | 2024-04-19 22:44:12 -0400 | [diff] [blame] | 931 | struct work_struct gc_gens_work; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 932 | unsigned long gc_count; |
| 933 | |
Kent Overstreet | ac516d0 | 2021-04-13 15:00:40 -0400 | [diff] [blame] | 934 | enum btree_id gc_gens_btree; |
| 935 | struct bpos gc_gens_pos; |
| 936 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 937 | /* |
| 938 | * Tracks GC's progress - everything in the range [ZERO_KEY..gc_cur_pos] |
| 939 | * has been marked by GC. |
| 940 | * |
Kent Overstreet | 41f8b09 | 2021-02-20 19:27:37 -0500 | [diff] [blame] | 941 | * gc_cur_phase is a superset of btree_ids (BTREE_ID_extents etc.) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 942 | * |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 943 | * Protected by gc_pos_lock. Only written to by GC thread, so GC thread |
| 944 | * can read without a lock. |
| 945 | */ |
| 946 | seqcount_t gc_pos_lock; |
| 947 | struct gc_pos gc_pos; |
| 948 | |
| 949 | /* |
| 950 | * The allocation code needs gc_mark in struct bucket to be correct, but |
| 951 | * it's not while a gc is in progress. |
| 952 | */ |
| 953 | struct rw_semaphore gc_lock; |
Kent Overstreet | c45c866 | 2021-12-24 04:51:10 -0500 | [diff] [blame] | 954 | struct mutex gc_gens_lock; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 955 | |
| 956 | /* IO PATH */ |
Kent Overstreet | ef1b209 | 2021-05-18 23:53:43 -0400 | [diff] [blame] | 957 | struct semaphore io_in_flight; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 958 | struct bio_set bio_read; |
| 959 | struct bio_set bio_read_split; |
| 960 | struct bio_set bio_write; |
Kent Overstreet | dbd0408 | 2024-04-30 20:32:44 -0400 | [diff] [blame] | 961 | struct bio_set replica_set; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 962 | struct mutex bio_bounce_pages_lock; |
Kent Overstreet | a8b3a67 | 2022-11-02 17:12:00 -0400 | [diff] [blame] | 963 | mempool_t bio_bounce_pages; |
| 964 | struct bucket_nocow_lock_table |
| 965 | nocow_locks; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 966 | struct rhashtable promote_table; |
| 967 | |
| 968 | mempool_t compression_bounce[2]; |
Kent Overstreet | 1c3ff72 | 2019-12-28 20:17:06 -0500 | [diff] [blame] | 969 | mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 970 | mempool_t decompress_workspace; |
Kent Overstreet | bbc3a46 | 2023-11-24 23:12:45 -0500 | [diff] [blame] | 971 | size_t zstd_workspace_size; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 972 | |
| 973 | struct crypto_shash *sha256; |
| 974 | struct crypto_sync_skcipher *chacha20; |
| 975 | struct crypto_shash *poly1305; |
| 976 | |
| 977 | atomic64_t key_version; |
| 978 | |
Kent Overstreet | 35189e0 | 2019-11-09 16:01:15 -0500 | [diff] [blame] | 979 | mempool_t large_bkey_pool; |
| 980 | |
Kent Overstreet | b9fa375 | 2023-03-11 20:38:46 -0500 | [diff] [blame] | 981 | /* MOVE.C */ |
| 982 | struct list_head moving_context_list; |
| 983 | struct mutex moving_context_lock; |
| 984 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 985 | /* REBALANCE */ |
| 986 | struct bch_fs_rebalance rebalance; |
| 987 | |
Kent Overstreet | e6d1161 | 2020-07-11 16:28:54 -0400 | [diff] [blame] | 988 | /* COPYGC */ |
| 989 | struct task_struct *copygc_thread; |
Kent Overstreet | e6d1161 | 2020-07-11 16:28:54 -0400 | [diff] [blame] | 990 | struct write_point copygc_write_point; |
Kent Overstreet | 0fb11e08 | 2023-03-17 09:59:17 -0400 | [diff] [blame] | 991 | s64 copygc_wait_at; |
Kent Overstreet | 5bbe4bf | 2021-04-13 14:45:55 -0400 | [diff] [blame] | 992 | s64 copygc_wait; |
Daniel Hill | c91996c | 2022-06-16 02:06:43 +1200 | [diff] [blame] | 993 | bool copygc_running; |
| 994 | wait_queue_head_t copygc_running_wq; |
Kent Overstreet | e6d1161 | 2020-07-11 16:28:54 -0400 | [diff] [blame] | 995 | |
Kent Overstreet | dfe9bfb | 2018-11-24 17:09:44 -0500 | [diff] [blame] | 996 | /* STRIPES: */ |
Kent Overstreet | 990d42d | 2021-12-04 23:07:33 -0500 | [diff] [blame] | 997 | GENRADIX(struct stripe) stripes; |
| 998 | GENRADIX(struct gc_stripe) gc_stripes; |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 999 | |
Kent Overstreet | 4b1e669 | 2023-02-18 21:07:25 -0500 | [diff] [blame] | 1000 | struct hlist_head ec_stripes_new[32]; |
| 1001 | spinlock_t ec_stripes_new_lock; |
| 1002 | |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 1003 | ec_stripes_heap ec_stripes_heap; |
Kent Overstreet | 627a231 | 2023-02-18 20:49:37 -0500 | [diff] [blame] | 1004 | struct mutex ec_stripes_heap_lock; |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 1005 | |
Kent Overstreet | dfe9bfb | 2018-11-24 17:09:44 -0500 | [diff] [blame] | 1006 | /* ERASURE CODING */ |
Kent Overstreet | 703e2a4 | 2020-07-06 20:59:46 -0400 | [diff] [blame] | 1007 | struct list_head ec_stripe_head_list; |
| 1008 | struct mutex ec_stripe_head_lock; |
| 1009 | |
| 1010 | struct list_head ec_stripe_new_list; |
| 1011 | struct mutex ec_stripe_new_lock; |
Kent Overstreet | b40901b | 2023-03-13 22:01:47 -0400 | [diff] [blame] | 1012 | wait_queue_head_t ec_stripe_new_wait; |
Kent Overstreet | 703e2a4 | 2020-07-06 20:59:46 -0400 | [diff] [blame] | 1013 | |
| 1014 | struct work_struct ec_stripe_create_work; |
Kent Overstreet | 4e1510c | 2019-08-22 17:09:16 -0400 | [diff] [blame] | 1015 | u64 ec_stripe_hint; |
Kent Overstreet | dfe9bfb | 2018-11-24 17:09:44 -0500 | [diff] [blame] | 1016 | |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 1017 | struct work_struct ec_stripe_delete_work; |
Kent Overstreet | b40901b | 2023-03-13 22:01:47 -0400 | [diff] [blame] | 1018 | |
| 1019 | struct bio_set ec_bioset; |
Kent Overstreet | cd575dd | 2018-11-01 15:13:19 -0400 | [diff] [blame] | 1020 | |
Kent Overstreet | 7642609 | 2019-08-16 09:59:56 -0400 | [diff] [blame] | 1021 | /* REFLINK */ |
Kent Overstreet | 890b74f | 2021-05-23 02:31:33 -0400 | [diff] [blame] | 1022 | reflink_gc_table reflink_gc_table; |
| 1023 | size_t reflink_gc_nr; |
Kent Overstreet | 7642609 | 2019-08-16 09:59:56 -0400 | [diff] [blame] | 1024 | |
Kent Overstreet | 9edbcc7 | 2023-03-15 11:53:51 -0400 | [diff] [blame] | 1025 | /* fs.c */ |
| 1026 | struct list_head vfs_inodes_list; |
| 1027 | struct mutex vfs_inodes_lock; |
Kent Overstreet | 112d21f | 2024-06-08 21:41:01 -0400 | [diff] [blame] | 1028 | struct rhashtable vfs_inodes_table; |
Kent Overstreet | 9edbcc7 | 2023-03-15 11:53:51 -0400 | [diff] [blame] | 1029 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1030 | /* VFS IO PATH - fs-io.c */ |
| 1031 | struct bio_set writepage_bioset; |
| 1032 | struct bio_set dio_write_bioset; |
| 1033 | struct bio_set dio_read_bioset; |
Kent Overstreet | a8b3a67 | 2022-11-02 17:12:00 -0400 | [diff] [blame] | 1034 | struct bio_set nocow_flush_bioset; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1035 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1036 | /* QUOTAS */ |
| 1037 | struct bch_memquota_type quotas[QTYP_NR]; |
| 1038 | |
Kent Overstreet | 78328fe | 2023-07-08 22:33:29 -0400 | [diff] [blame] | 1039 | /* RECOVERY */ |
| 1040 | u64 journal_replay_seq_start; |
| 1041 | u64 journal_replay_seq_end; |
Kent Overstreet | 7f391b2 | 2023-12-06 14:36:18 -0500 | [diff] [blame] | 1042 | /* |
| 1043 | * Two different uses: |
| 1044 | * "Has this fsck pass?" - i.e. should this type of error be an |
| 1045 | * emergency read-only |
| 1046 | * And, in certain situations fsck will rewind to an earlier pass: used |
| 1047 | * for signaling to the toplevel code which pass we want to run now. |
| 1048 | */ |
Kent Overstreet | 067d228 | 2023-07-07 02:42:28 -0400 | [diff] [blame] | 1049 | enum bch_recovery_pass curr_recovery_pass; |
Kent Overstreet | 249bf59 | 2023-12-10 12:42:49 -0500 | [diff] [blame] | 1050 | /* bitmask of recovery passes that we actually ran */ |
Kent Overstreet | 0ed4ca1 | 2023-08-03 20:57:06 -0400 | [diff] [blame] | 1051 | u64 recovery_passes_complete; |
Kent Overstreet | 249bf59 | 2023-12-10 12:42:49 -0500 | [diff] [blame] | 1052 | /* never rewinds version of curr_recovery_pass */ |
| 1053 | enum bch_recovery_pass recovery_pass_done; |
Kent Overstreet | 267b801 | 2023-12-04 13:45:33 -0500 | [diff] [blame] | 1054 | struct semaphore online_fsck_mutex; |
Kent Overstreet | 067d228 | 2023-07-07 02:42:28 -0400 | [diff] [blame] | 1055 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1056 | /* DEBUG JUNK */ |
Kent Overstreet | 75ef2c5 | 2022-02-26 11:48:34 -0500 | [diff] [blame] | 1057 | struct dentry *fs_debug_dir; |
| 1058 | struct dentry *btree_debug_dir; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1059 | struct btree_debug btree_debug[BTREE_ID_NR]; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1060 | struct btree *verify_data; |
| 1061 | struct btree_node *verify_ondisk; |
| 1062 | struct mutex verify_lock; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1063 | |
Kent Overstreet | b5e8a69 | 2020-11-02 23:51:33 -0500 | [diff] [blame] | 1064 | u64 *unused_inode_hints; |
| 1065 | unsigned inode_shard_bits; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1066 | |
| 1067 | /* |
| 1068 | * A btree node on disk could have too many bsets for an iterator to fit |
| 1069 | * on the stack - have to dynamically allocate them |
| 1070 | */ |
| 1071 | mempool_t fill_iter; |
| 1072 | |
| 1073 | mempool_t btree_bounce_pool; |
| 1074 | |
| 1075 | struct journal journal; |
Kent Overstreet | ce6201c | 2022-03-21 00:15:53 -0400 | [diff] [blame] | 1076 | GENRADIX(struct journal_replay *) journal_entries; |
| 1077 | u64 journal_entries_base_seq; |
Kent Overstreet | f1d786a | 2020-03-25 16:12:33 -0400 | [diff] [blame] | 1078 | struct journal_keys journal_keys; |
Kent Overstreet | 5b593ee | 2021-01-26 20:15:46 -0500 | [diff] [blame] | 1079 | struct list_head journal_iters; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1080 | |
Kent Overstreet | 4409b80 | 2024-03-11 23:11:46 -0400 | [diff] [blame] | 1081 | struct find_btree_nodes found_btree_nodes; |
| 1082 | |
Kent Overstreet | c692399 | 2018-07-21 22:57:20 -0400 | [diff] [blame] | 1083 | u64 last_bucket_seq_cleanup; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1084 | |
Daniel Hill | 104c697 | 2022-03-15 21:36:33 +1300 | [diff] [blame] | 1085 | u64 counters_on_mount[BCH_COUNTER_NR]; |
| 1086 | u64 __percpu *counters; |
| 1087 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1088 | unsigned copy_gc_enabled:1; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1089 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1090 | struct bch2_time_stats times[BCH_TIME_STAT_NR]; |
Daniel Hill | c807ca9 | 2022-07-14 20:33:09 +1200 | [diff] [blame] | 1091 | |
Kent Overstreet | 4aba7d4 | 2022-08-11 19:36:24 -0400 | [diff] [blame] | 1092 | struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; |
Kent Overstreet | f5d26fa | 2023-10-25 15:51:16 -0400 | [diff] [blame] | 1093 | |
| 1094 | /* ERRORS */ |
| 1095 | struct list_head fsck_error_msgs; |
| 1096 | struct mutex fsck_error_msgs_lock; |
| 1097 | bool fsck_alloc_msgs_err; |
| 1098 | |
| 1099 | bch_sb_errors_cpu fsck_error_counts; |
| 1100 | struct mutex fsck_error_counts_lock; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1101 | }; |
| 1102 | |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 1103 | extern struct wait_queue_head bch2_read_only_wait; |
| 1104 | |
| 1105 | static inline void bch2_write_ref_get(struct bch_fs *c, enum bch_write_ref ref) |
| 1106 | { |
| 1107 | #ifdef BCH_WRITE_REF_DEBUG |
| 1108 | atomic_long_inc(&c->writes[ref]); |
| 1109 | #else |
| 1110 | percpu_ref_get(&c->writes); |
| 1111 | #endif |
| 1112 | } |
| 1113 | |
Kent Overstreet | 09caeab | 2023-11-02 18:57:19 -0400 | [diff] [blame] | 1114 | static inline bool __bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref) |
| 1115 | { |
| 1116 | #ifdef BCH_WRITE_REF_DEBUG |
| 1117 | return !test_bit(BCH_FS_going_ro, &c->flags) && |
| 1118 | atomic_long_inc_not_zero(&c->writes[ref]); |
| 1119 | #else |
| 1120 | return percpu_ref_tryget(&c->writes); |
| 1121 | #endif |
| 1122 | } |
| 1123 | |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 1124 | static inline bool bch2_write_ref_tryget(struct bch_fs *c, enum bch_write_ref ref) |
| 1125 | { |
| 1126 | #ifdef BCH_WRITE_REF_DEBUG |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 1127 | return !test_bit(BCH_FS_going_ro, &c->flags) && |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 1128 | atomic_long_inc_not_zero(&c->writes[ref]); |
| 1129 | #else |
| 1130 | return percpu_ref_tryget_live(&c->writes); |
| 1131 | #endif |
| 1132 | } |
| 1133 | |
| 1134 | static inline void bch2_write_ref_put(struct bch_fs *c, enum bch_write_ref ref) |
| 1135 | { |
| 1136 | #ifdef BCH_WRITE_REF_DEBUG |
| 1137 | long v = atomic_long_dec_return(&c->writes[ref]); |
| 1138 | |
| 1139 | BUG_ON(v < 0); |
| 1140 | if (v) |
| 1141 | return; |
| 1142 | for (unsigned i = 0; i < BCH_WRITE_REF_NR; i++) |
| 1143 | if (atomic_long_read(&c->writes[i])) |
| 1144 | return; |
| 1145 | |
Kent Overstreet | 3c471b6 | 2023-11-26 17:05:02 -0500 | [diff] [blame] | 1146 | set_bit(BCH_FS_write_disable_complete, &c->flags); |
Kent Overstreet | d94189a | 2023-02-09 12:21:45 -0500 | [diff] [blame] | 1147 | wake_up(&bch2_read_only_wait); |
| 1148 | #else |
| 1149 | percpu_ref_put(&c->writes); |
| 1150 | #endif |
| 1151 | } |
| 1152 | |
Kent Overstreet | 63508b7 | 2023-12-06 16:26:18 -0500 | [diff] [blame] | 1153 | static inline bool bch2_ro_ref_tryget(struct bch_fs *c) |
| 1154 | { |
| 1155 | if (test_bit(BCH_FS_stopping, &c->flags)) |
| 1156 | return false; |
| 1157 | |
| 1158 | return refcount_inc_not_zero(&c->ro_ref); |
| 1159 | } |
| 1160 | |
| 1161 | static inline void bch2_ro_ref_put(struct bch_fs *c) |
| 1162 | { |
| 1163 | if (refcount_dec_and_test(&c->ro_ref)) |
| 1164 | wake_up(&c->ro_ref_wait); |
| 1165 | } |
| 1166 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1167 | static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) |
| 1168 | { |
| 1169 | #ifndef NO_BCACHEFS_FS |
| 1170 | if (c->vfs_sb) |
| 1171 | c->vfs_sb->s_bdi->ra_pages = ra_pages; |
| 1172 | #endif |
| 1173 | } |
| 1174 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1175 | static inline unsigned bucket_bytes(const struct bch_dev *ca) |
| 1176 | { |
| 1177 | return ca->mi.bucket_size << 9; |
| 1178 | } |
| 1179 | |
| 1180 | static inline unsigned block_bytes(const struct bch_fs *c) |
| 1181 | { |
Kent Overstreet | 8244f32 | 2021-12-14 14:24:41 -0500 | [diff] [blame] | 1182 | return c->opts.block_size; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1183 | } |
| 1184 | |
Kent Overstreet | 8244f32 | 2021-12-14 14:24:41 -0500 | [diff] [blame] | 1185 | static inline unsigned block_sectors(const struct bch_fs *c) |
| 1186 | { |
| 1187 | return c->opts.block_size >> 9; |
| 1188 | } |
| 1189 | |
Kent Overstreet | 7c8f6f9 | 2022-01-12 02:13:21 -0500 | [diff] [blame] | 1190 | static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree) |
| 1191 | { |
| 1192 | return c->btree_key_cache_btrees & (1U << btree); |
| 1193 | } |
| 1194 | |
Kent Overstreet | 8244f32 | 2021-12-14 14:24:41 -0500 | [diff] [blame] | 1195 | static inline struct timespec64 bch2_time_to_timespec(const struct bch_fs *c, s64 time) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1196 | { |
Kent Overstreet | 595c1e9 | 2021-04-28 22:51:42 -0400 | [diff] [blame] | 1197 | struct timespec64 t; |
Alyssa Ross | a3ed1cc | 2024-09-07 18:00:26 +0200 | [diff] [blame] | 1198 | s64 sec; |
Kent Overstreet | 595c1e9 | 2021-04-28 22:51:42 -0400 | [diff] [blame] | 1199 | s32 rem; |
| 1200 | |
| 1201 | time += c->sb.time_base_lo; |
| 1202 | |
Alyssa Ross | a3ed1cc | 2024-09-07 18:00:26 +0200 | [diff] [blame] | 1203 | sec = div_s64_rem(time, c->sb.time_units_per_sec, &rem); |
| 1204 | |
| 1205 | set_normalized_timespec64(&t, sec, rem * (s64)c->sb.nsec_per_time_unit); |
| 1206 | |
Kent Overstreet | 595c1e9 | 2021-04-28 22:51:42 -0400 | [diff] [blame] | 1207 | return t; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1208 | } |
| 1209 | |
Kent Overstreet | 8244f32 | 2021-12-14 14:24:41 -0500 | [diff] [blame] | 1210 | static inline s64 timespec_to_bch2_time(const struct bch_fs *c, struct timespec64 ts) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1211 | { |
Kent Overstreet | 595c1e9 | 2021-04-28 22:51:42 -0400 | [diff] [blame] | 1212 | return (ts.tv_sec * c->sb.time_units_per_sec + |
| 1213 | (int) ts.tv_nsec / c->sb.nsec_per_time_unit) - c->sb.time_base_lo; |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1214 | } |
| 1215 | |
Kent Overstreet | 8244f32 | 2021-12-14 14:24:41 -0500 | [diff] [blame] | 1216 | static inline s64 bch2_current_time(const struct bch_fs *c) |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1217 | { |
| 1218 | struct timespec64 now; |
| 1219 | |
Kent Overstreet | ea416023 | 2019-04-16 16:03:31 -0400 | [diff] [blame] | 1220 | ktime_get_coarse_real_ts64(&now); |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1221 | return timespec_to_bch2_time(c, now); |
| 1222 | } |
| 1223 | |
Kent Overstreet | cff07e2 | 2024-06-17 10:06:03 -0400 | [diff] [blame] | 1224 | static inline u64 bch2_current_io_time(const struct bch_fs *c, int rw) |
| 1225 | { |
| 1226 | return max(1ULL, (u64) atomic64_read(&c->io_clock[rw].now) & LRU_TIME_MAX); |
| 1227 | } |
| 1228 | |
Kent Overstreet | 96f37ea | 2023-12-31 10:04:54 -0500 | [diff] [blame] | 1229 | static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c) |
| 1230 | { |
| 1231 | struct stdio_redirect *stdio = c->stdio; |
| 1232 | |
| 1233 | if (c->stdio_filter && c->stdio_filter != current) |
| 1234 | stdio = NULL; |
| 1235 | return stdio; |
| 1236 | } |
| 1237 | |
Kent Overstreet | 4e07447 | 2024-02-10 21:01:40 -0500 | [diff] [blame] | 1238 | static inline unsigned metadata_replicas_required(struct bch_fs *c) |
| 1239 | { |
| 1240 | return min(c->opts.metadata_replicas, |
| 1241 | c->opts.metadata_replicas_required); |
| 1242 | } |
| 1243 | |
| 1244 | static inline unsigned data_replicas_required(struct bch_fs *c) |
| 1245 | { |
| 1246 | return min(c->opts.data_replicas, |
| 1247 | c->opts.data_replicas_required); |
| 1248 | } |
| 1249 | |
Kent Overstreet | 45dd05b | 2023-03-04 22:36:02 -0500 | [diff] [blame] | 1250 | #define BKEY_PADDED_ONSTACK(key, pad) \ |
| 1251 | struct { struct bkey_i key; __u64 key ## _pad[pad]; } |
| 1252 | |
Kent Overstreet | 1c6fdbd | 2017-03-16 22:18:50 -0800 | [diff] [blame] | 1253 | #endif /* _BCACHEFS_H */ |