Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 2 | /* |
| 3 | * background writeback - scan btree for dirty data and write it to the backing |
| 4 | * device |
| 5 | * |
| 6 | * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> |
| 7 | * Copyright 2012 Google, Inc. |
| 8 | */ |
| 9 | |
| 10 | #include "bcache.h" |
| 11 | #include "btree.h" |
| 12 | #include "debug.h" |
Kent Overstreet | 279afba | 2013-06-05 06:21:07 -0700 | [diff] [blame] | 13 | #include "writeback.h" |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 14 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 15 | #include <linux/delay.h> |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 16 | #include <linux/kthread.h> |
Ingo Molnar | e601757 | 2017-02-01 16:36:40 +0100 | [diff] [blame] | 17 | #include <linux/sched/clock.h> |
Kent Overstreet | c37511b | 2013-04-26 15:39:55 -0700 | [diff] [blame] | 18 | #include <trace/events/bcache.h> |
| 19 | |
Coly Li | 7a671d8 | 2018-12-13 22:53:53 +0800 | [diff] [blame] | 20 | static void update_gc_after_writeback(struct cache_set *c) |
| 21 | { |
| 22 | if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) || |
| 23 | c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD) |
| 24 | return; |
| 25 | |
| 26 | c->gc_after_writeback |= BCH_DO_AUTO_GC; |
| 27 | } |
| 28 | |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 29 | /* Rate limiting */ |
Michael Lyle | 616486a | 2018-01-08 12:21:30 -0800 | [diff] [blame] | 30 | static uint64_t __calc_target_rate(struct cached_dev *dc) |
| 31 | { |
| 32 | struct cache_set *c = dc->disk.c; |
| 33 | |
| 34 | /* |
| 35 | * This is the size of the cache, minus the amount used for |
| 36 | * flash-only devices |
| 37 | */ |
Coly Li | 4a78426 | 2020-10-01 14:50:56 +0800 | [diff] [blame] | 38 | uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size - |
Tang Junhui | 99a27d5 | 2018-07-26 12:17:33 +0800 | [diff] [blame] | 39 | atomic_long_read(&c->flash_dev_dirty_sectors); |
Michael Lyle | 616486a | 2018-01-08 12:21:30 -0800 | [diff] [blame] | 40 | |
| 41 | /* |
| 42 | * Unfortunately there is no control of global dirty data. If the |
| 43 | * user states that they want 10% dirty data in the cache, and has, |
| 44 | * e.g., 5 backing volumes of equal size, we try and ensure each |
| 45 | * backing volume uses about 2% of the cache for dirty data. |
| 46 | */ |
| 47 | uint32_t bdev_share = |
Christoph Hellwig | cda25b8 | 2021-10-18 12:11:03 +0200 | [diff] [blame] | 48 | div64_u64(bdev_nr_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT, |
Michael Lyle | 616486a | 2018-01-08 12:21:30 -0800 | [diff] [blame] | 49 | c->cached_dev_sectors); |
| 50 | |
| 51 | uint64_t cache_dirty_target = |
| 52 | div_u64(cache_sectors * dc->writeback_percent, 100); |
| 53 | |
| 54 | /* Ensure each backing dev gets at least one dirty share */ |
| 55 | if (bdev_share < 1) |
| 56 | bdev_share = 1; |
| 57 | |
| 58 | return (cache_dirty_target * bdev_share) >> WRITEBACK_SHARE_SHIFT; |
| 59 | } |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 60 | |
| 61 | static void __update_writeback_rate(struct cached_dev *dc) |
| 62 | { |
Michael Lyle | 1d316e6 | 2017-10-13 16:35:36 -0700 | [diff] [blame] | 63 | /* |
| 64 | * PI controller: |
| 65 | * Figures out the amount that should be written per second. |
| 66 | * |
| 67 | * First, the error (number of sectors that are dirty beyond our |
| 68 | * target) is calculated. The error is accumulated (numerically |
| 69 | * integrated). |
| 70 | * |
| 71 | * Then, the proportional value and integral value are scaled |
| 72 | * based on configured values. These are stored as inverses to |
| 73 | * avoid fixed point math and to make configuration easy-- e.g. |
| 74 | * the default value of 40 for writeback_rate_p_term_inverse |
| 75 | * attempts to write at a rate that would retire all the dirty |
| 76 | * blocks in 40 seconds. |
| 77 | * |
| 78 | * The writeback_rate_i_inverse value of 10000 means that 1/10000th |
| 79 | * of the error is accumulated in the integral term per second. |
| 80 | * This acts as a slow, long-term average that is not subject to |
| 81 | * variations in usage like the p term. |
| 82 | */ |
Michael Lyle | 616486a | 2018-01-08 12:21:30 -0800 | [diff] [blame] | 83 | int64_t target = __calc_target_rate(dc); |
Kent Overstreet | 279afba | 2013-06-05 06:21:07 -0700 | [diff] [blame] | 84 | int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); |
Michael Lyle | 1d316e6 | 2017-10-13 16:35:36 -0700 | [diff] [blame] | 85 | int64_t error = dirty - target; |
| 86 | int64_t proportional_scaled = |
| 87 | div_s64(error, dc->writeback_rate_p_term_inverse); |
Michael Lyle | e41166c | 2017-10-13 16:35:38 -0700 | [diff] [blame] | 88 | int64_t integral_scaled; |
| 89 | uint32_t new_rate; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 90 | |
dongdong tao | 71dda2a | 2021-02-10 13:07:23 +0800 | [diff] [blame] | 91 | /* |
| 92 | * We need to consider the number of dirty buckets as well |
| 93 | * when calculating the proportional_scaled, Otherwise we might |
| 94 | * have an unreasonable small writeback rate at a highly fragmented situation |
| 95 | * when very few dirty sectors consumed a lot dirty buckets, the |
| 96 | * worst case is when dirty buckets reached cutoff_writeback_sync and |
| 97 | * dirty data is still not even reached to writeback percent, so the rate |
| 98 | * still will be at the minimum value, which will cause the write |
| 99 | * stuck at a non-writeback mode. |
| 100 | */ |
| 101 | struct cache_set *c = dc->disk.c; |
| 102 | |
| 103 | int64_t dirty_buckets = c->nbuckets - c->avail_nbuckets; |
| 104 | |
| 105 | if (dc->writeback_consider_fragment && |
| 106 | c->gc_stats.in_use > BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW && dirty > 0) { |
| 107 | int64_t fragment = |
| 108 | div_s64((dirty_buckets * c->cache->sb.bucket_size), dirty); |
| 109 | int64_t fp_term; |
| 110 | int64_t fps; |
| 111 | |
| 112 | if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID) { |
Gustavo A. R. Silva | 62594f1 | 2021-04-11 21:43:15 +0800 | [diff] [blame] | 113 | fp_term = (int64_t)dc->writeback_rate_fp_term_low * |
dongdong tao | 71dda2a | 2021-02-10 13:07:23 +0800 | [diff] [blame] | 114 | (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW); |
| 115 | } else if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH) { |
Gustavo A. R. Silva | 62594f1 | 2021-04-11 21:43:15 +0800 | [diff] [blame] | 116 | fp_term = (int64_t)dc->writeback_rate_fp_term_mid * |
dongdong tao | 71dda2a | 2021-02-10 13:07:23 +0800 | [diff] [blame] | 117 | (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID); |
| 118 | } else { |
Gustavo A. R. Silva | 62594f1 | 2021-04-11 21:43:15 +0800 | [diff] [blame] | 119 | fp_term = (int64_t)dc->writeback_rate_fp_term_high * |
dongdong tao | 71dda2a | 2021-02-10 13:07:23 +0800 | [diff] [blame] | 120 | (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH); |
| 121 | } |
| 122 | fps = div_s64(dirty, dirty_buckets) * fp_term; |
| 123 | if (fragment > 3 && fps > proportional_scaled) { |
| 124 | /* Only overrite the p when fragment > 3 */ |
| 125 | proportional_scaled = fps; |
| 126 | } |
| 127 | } |
| 128 | |
Michael Lyle | 1d316e6 | 2017-10-13 16:35:36 -0700 | [diff] [blame] | 129 | if ((error < 0 && dc->writeback_rate_integral > 0) || |
| 130 | (error > 0 && time_before64(local_clock(), |
| 131 | dc->writeback_rate.next + NSEC_PER_MSEC))) { |
| 132 | /* |
| 133 | * Only decrease the integral term if it's more than |
| 134 | * zero. Only increase the integral term if the device |
| 135 | * is keeping up. (Don't wind up the integral |
| 136 | * ineffectively in either case). |
| 137 | * |
| 138 | * It's necessary to scale this by |
| 139 | * writeback_rate_update_seconds to keep the integral |
| 140 | * term dimensioned properly. |
| 141 | */ |
| 142 | dc->writeback_rate_integral += error * |
| 143 | dc->writeback_rate_update_seconds; |
| 144 | } |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 145 | |
Michael Lyle | 1d316e6 | 2017-10-13 16:35:36 -0700 | [diff] [blame] | 146 | integral_scaled = div_s64(dc->writeback_rate_integral, |
| 147 | dc->writeback_rate_i_term_inverse); |
Kent Overstreet | 16749c2 | 2013-11-11 13:58:34 -0800 | [diff] [blame] | 148 | |
Michael Lyle | e41166c | 2017-10-13 16:35:38 -0700 | [diff] [blame] | 149 | new_rate = clamp_t(int32_t, (proportional_scaled + integral_scaled), |
| 150 | dc->writeback_rate_minimum, NSEC_PER_SEC); |
Kent Overstreet | 16749c2 | 2013-11-11 13:58:34 -0800 | [diff] [blame] | 151 | |
Michael Lyle | 1d316e6 | 2017-10-13 16:35:36 -0700 | [diff] [blame] | 152 | dc->writeback_rate_proportional = proportional_scaled; |
| 153 | dc->writeback_rate_integral_scaled = integral_scaled; |
Coly Li | ea8c5356 | 2018-08-09 15:48:49 +0800 | [diff] [blame] | 154 | dc->writeback_rate_change = new_rate - |
| 155 | atomic_long_read(&dc->writeback_rate.rate); |
| 156 | atomic_long_set(&dc->writeback_rate.rate, new_rate); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 157 | dc->writeback_rate_target = target; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 158 | } |
| 159 | |
Coly Li | ea8c5356 | 2018-08-09 15:48:49 +0800 | [diff] [blame] | 160 | static bool set_at_max_writeback_rate(struct cache_set *c, |
| 161 | struct cached_dev *dc) |
| 162 | { |
Coly Li | c5fcded | 2019-11-13 16:03:23 +0800 | [diff] [blame] | 163 | /* Don't sst max writeback rate if it is disabled */ |
| 164 | if (!c->idle_max_writeback_rate_enabled) |
| 165 | return false; |
| 166 | |
Coly Li | 141df8b | 2019-06-28 19:59:24 +0800 | [diff] [blame] | 167 | /* Don't set max writeback rate if gc is running */ |
| 168 | if (!c->gc_mark_valid) |
| 169 | return false; |
Coly Li | ea8c5356 | 2018-08-09 15:48:49 +0800 | [diff] [blame] | 170 | /* |
| 171 | * Idle_counter is increased everytime when update_writeback_rate() is |
| 172 | * called. If all backing devices attached to the same cache set have |
| 173 | * identical dc->writeback_rate_update_seconds values, it is about 6 |
| 174 | * rounds of update_writeback_rate() on each backing device before |
| 175 | * c->at_max_writeback_rate is set to 1, and then max wrteback rate set |
| 176 | * to each dc->writeback_rate.rate. |
| 177 | * In order to avoid extra locking cost for counting exact dirty cached |
| 178 | * devices number, c->attached_dev_nr is used to calculate the idle |
| 179 | * throushold. It might be bigger if not all cached device are in write- |
| 180 | * back mode, but it still works well with limited extra rounds of |
| 181 | * update_writeback_rate(). |
| 182 | */ |
| 183 | if (atomic_inc_return(&c->idle_counter) < |
| 184 | atomic_read(&c->attached_dev_nr) * 6) |
| 185 | return false; |
| 186 | |
| 187 | if (atomic_read(&c->at_max_writeback_rate) != 1) |
| 188 | atomic_set(&c->at_max_writeback_rate, 1); |
| 189 | |
| 190 | atomic_long_set(&dc->writeback_rate.rate, INT_MAX); |
| 191 | |
| 192 | /* keep writeback_rate_target as existing value */ |
| 193 | dc->writeback_rate_proportional = 0; |
| 194 | dc->writeback_rate_integral_scaled = 0; |
| 195 | dc->writeback_rate_change = 0; |
| 196 | |
| 197 | /* |
| 198 | * Check c->idle_counter and c->at_max_writeback_rate agagain in case |
| 199 | * new I/O arrives during before set_at_max_writeback_rate() returns. |
| 200 | * Then the writeback rate is set to 1, and its new value should be |
| 201 | * decided via __update_writeback_rate(). |
| 202 | */ |
| 203 | if ((atomic_read(&c->idle_counter) < |
| 204 | atomic_read(&c->attached_dev_nr) * 6) || |
| 205 | !atomic_read(&c->at_max_writeback_rate)) |
| 206 | return false; |
| 207 | |
| 208 | return true; |
| 209 | } |
| 210 | |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 211 | static void update_writeback_rate(struct work_struct *work) |
| 212 | { |
| 213 | struct cached_dev *dc = container_of(to_delayed_work(work), |
| 214 | struct cached_dev, |
| 215 | writeback_rate_update); |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 216 | struct cache_set *c = dc->disk.c; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 217 | |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 218 | /* |
| 219 | * should check BCACHE_DEV_RATE_DW_RUNNING before calling |
| 220 | * cancel_delayed_work_sync(). |
| 221 | */ |
| 222 | set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); |
| 223 | /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ |
Davidlohr Bueso | b004aa8 | 2020-03-22 14:03:04 +0800 | [diff] [blame] | 224 | smp_mb__after_atomic(); |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 225 | |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 226 | /* |
| 227 | * CACHE_SET_IO_DISABLE might be set via sysfs interface, |
| 228 | * check it here too. |
| 229 | */ |
| 230 | if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) || |
| 231 | test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 232 | clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); |
| 233 | /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ |
Davidlohr Bueso | b004aa8 | 2020-03-22 14:03:04 +0800 | [diff] [blame] | 234 | smp_mb__after_atomic(); |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 235 | return; |
| 236 | } |
| 237 | |
Coly Li | ea8c5356 | 2018-08-09 15:48:49 +0800 | [diff] [blame] | 238 | if (atomic_read(&dc->has_dirty) && dc->writeback_percent) { |
| 239 | /* |
| 240 | * If the whole cache set is idle, set_at_max_writeback_rate() |
| 241 | * will set writeback rate to a max number. Then it is |
| 242 | * unncessary to update writeback rate for an idle cache set |
| 243 | * in maximum writeback rate number(s). |
| 244 | */ |
| 245 | if (!set_at_max_writeback_rate(c, dc)) { |
| 246 | down_read(&dc->writeback_lock); |
| 247 | __update_writeback_rate(dc); |
Coly Li | 7a671d8 | 2018-12-13 22:53:53 +0800 | [diff] [blame] | 248 | update_gc_after_writeback(c); |
Coly Li | ea8c5356 | 2018-08-09 15:48:49 +0800 | [diff] [blame] | 249 | up_read(&dc->writeback_lock); |
| 250 | } |
| 251 | } |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 252 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 253 | |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 254 | /* |
| 255 | * CACHE_SET_IO_DISABLE might be set via sysfs interface, |
| 256 | * check it here too. |
| 257 | */ |
| 258 | if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) && |
| 259 | !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 260 | schedule_delayed_work(&dc->writeback_rate_update, |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 261 | dc->writeback_rate_update_seconds * HZ); |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 262 | } |
| 263 | |
| 264 | /* |
| 265 | * should check BCACHE_DEV_RATE_DW_RUNNING before calling |
| 266 | * cancel_delayed_work_sync(). |
| 267 | */ |
| 268 | clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); |
| 269 | /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ |
Davidlohr Bueso | b004aa8 | 2020-03-22 14:03:04 +0800 | [diff] [blame] | 270 | smp_mb__after_atomic(); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 271 | } |
| 272 | |
Coly Li | 6f10f7d | 2018-08-11 13:19:44 +0800 | [diff] [blame] | 273 | static unsigned int writeback_delay(struct cached_dev *dc, |
| 274 | unsigned int sectors) |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 275 | { |
Kent Overstreet | c4d951d | 2013-08-21 17:49:09 -0700 | [diff] [blame] | 276 | if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) || |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 277 | !dc->writeback_percent) |
| 278 | return 0; |
| 279 | |
Kent Overstreet | 16749c2 | 2013-11-11 13:58:34 -0800 | [diff] [blame] | 280 | return bch_next_delay(&dc->writeback_rate, sectors); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 281 | } |
| 282 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 283 | struct dirty_io { |
| 284 | struct closure cl; |
| 285 | struct cached_dev *dc; |
Michael Lyle | 6e6ccc6 | 2018-01-08 12:21:23 -0800 | [diff] [blame] | 286 | uint16_t sequence; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 287 | struct bio bio; |
| 288 | }; |
Kent Overstreet | 72c2706 | 2013-06-05 06:24:39 -0700 | [diff] [blame] | 289 | |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 290 | static void dirty_init(struct keybuf_key *w) |
| 291 | { |
| 292 | struct dirty_io *io = w->private; |
| 293 | struct bio *bio = &io->bio; |
| 294 | |
Christoph Hellwig | 49add49 | 2022-01-24 10:11:06 +0100 | [diff] [blame] | 295 | bio_init(bio, NULL, bio->bi_inline_vecs, |
| 296 | DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 297 | if (!io->dc->writeback_percent) |
| 298 | bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); |
| 299 | |
Kent Overstreet | 4f024f3 | 2013-10-11 15:44:27 -0700 | [diff] [blame] | 300 | bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 301 | bio->bi_private = w; |
Kent Overstreet | 169ef1c | 2013-03-28 12:50:55 -0600 | [diff] [blame] | 302 | bch_bio_map(bio, NULL); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 303 | } |
| 304 | |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 305 | static void dirty_io_destructor(struct closure *cl) |
| 306 | { |
| 307 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
Coly Li | 1fae7cf | 2018-08-11 13:19:45 +0800 | [diff] [blame] | 308 | |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 309 | kfree(io); |
| 310 | } |
| 311 | |
| 312 | static void write_dirty_finish(struct closure *cl) |
| 313 | { |
| 314 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
| 315 | struct keybuf_key *w = io->bio.bi_private; |
| 316 | struct cached_dev *dc = io->dc; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 317 | |
Guoqing Jiang | 491221f | 2016-09-22 03:10:01 -0400 | [diff] [blame] | 318 | bio_free_pages(&io->bio); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 319 | |
| 320 | /* This is kind of a dumb way of signalling errors. */ |
| 321 | if (KEY_DIRTY(&w->key)) { |
Kent Overstreet | 6054c6d | 2013-07-24 18:06:22 -0700 | [diff] [blame] | 322 | int ret; |
Coly Li | 6f10f7d | 2018-08-11 13:19:44 +0800 | [diff] [blame] | 323 | unsigned int i; |
Kent Overstreet | cc7b8819 | 2013-07-24 18:07:22 -0700 | [diff] [blame] | 324 | struct keylist keys; |
Kent Overstreet | 0b93207 | 2013-07-24 17:26:51 -0700 | [diff] [blame] | 325 | |
Kent Overstreet | 0b93207 | 2013-07-24 17:26:51 -0700 | [diff] [blame] | 326 | bch_keylist_init(&keys); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 327 | |
Kent Overstreet | 1b207d8 | 2013-09-10 18:52:54 -0700 | [diff] [blame] | 328 | bkey_copy(keys.top, &w->key); |
| 329 | SET_KEY_DIRTY(keys.top, false); |
| 330 | bch_keylist_push(&keys); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 331 | |
| 332 | for (i = 0; i < KEY_PTRS(&w->key); i++) |
| 333 | atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); |
| 334 | |
Kent Overstreet | cc7b8819 | 2013-07-24 18:07:22 -0700 | [diff] [blame] | 335 | ret = bch_btree_insert(dc->disk.c, &keys, NULL, &w->key); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 336 | |
Kent Overstreet | 6054c6d | 2013-07-24 18:06:22 -0700 | [diff] [blame] | 337 | if (ret) |
Kent Overstreet | c37511b | 2013-04-26 15:39:55 -0700 | [diff] [blame] | 338 | trace_bcache_writeback_collision(&w->key); |
| 339 | |
Kent Overstreet | 6054c6d | 2013-07-24 18:06:22 -0700 | [diff] [blame] | 340 | atomic_long_inc(ret |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 341 | ? &dc->disk.c->writeback_keys_failed |
| 342 | : &dc->disk.c->writeback_keys_done); |
| 343 | } |
| 344 | |
| 345 | bch_keybuf_del(&dc->writeback_keys, w); |
Kent Overstreet | c2a4f31 | 2013-09-23 23:17:31 -0700 | [diff] [blame] | 346 | up(&dc->in_flight); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 347 | |
| 348 | closure_return_with_destructor(cl, dirty_io_destructor); |
| 349 | } |
| 350 | |
Christoph Hellwig | 4246a0b | 2015-07-20 15:29:37 +0200 | [diff] [blame] | 351 | static void dirty_endio(struct bio *bio) |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 352 | { |
| 353 | struct keybuf_key *w = bio->bi_private; |
| 354 | struct dirty_io *io = w->private; |
| 355 | |
Coly Li | bf78980 | 2018-05-03 18:51:34 +0800 | [diff] [blame] | 356 | if (bio->bi_status) { |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 357 | SET_KEY_DIRTY(&w->key, false); |
Coly Li | bf78980 | 2018-05-03 18:51:34 +0800 | [diff] [blame] | 358 | bch_count_backing_io_errors(io->dc, bio); |
| 359 | } |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 360 | |
| 361 | closure_put(&io->cl); |
| 362 | } |
| 363 | |
| 364 | static void write_dirty(struct closure *cl) |
| 365 | { |
| 366 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
| 367 | struct keybuf_key *w = io->bio.bi_private; |
Michael Lyle | 6e6ccc6 | 2018-01-08 12:21:23 -0800 | [diff] [blame] | 368 | struct cached_dev *dc = io->dc; |
| 369 | |
| 370 | uint16_t next_sequence; |
| 371 | |
| 372 | if (atomic_read(&dc->writeback_sequence_next) != io->sequence) { |
| 373 | /* Not our turn to write; wait for a write to complete */ |
| 374 | closure_wait(&dc->writeback_ordering_wait, cl); |
| 375 | |
| 376 | if (atomic_read(&dc->writeback_sequence_next) == io->sequence) { |
| 377 | /* |
| 378 | * Edge case-- it happened in indeterminate order |
| 379 | * relative to when we were added to wait list.. |
| 380 | */ |
| 381 | closure_wake_up(&dc->writeback_ordering_wait); |
| 382 | } |
| 383 | |
| 384 | continue_at(cl, write_dirty, io->dc->writeback_write_wq); |
| 385 | return; |
| 386 | } |
| 387 | |
| 388 | next_sequence = io->sequence + 1; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 389 | |
Michael Lyle | 5fa89fb | 2017-10-13 16:35:35 -0700 | [diff] [blame] | 390 | /* |
| 391 | * IO errors are signalled using the dirty bit on the key. |
| 392 | * If we failed to read, we should not attempt to write to the |
| 393 | * backing device. Instead, immediately go to write_dirty_finish |
| 394 | * to clean up. |
| 395 | */ |
| 396 | if (KEY_DIRTY(&w->key)) { |
| 397 | dirty_init(w); |
| 398 | bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); |
| 399 | io->bio.bi_iter.bi_sector = KEY_START(&w->key); |
| 400 | bio_set_dev(&io->bio, io->dc->bdev); |
| 401 | io->bio.bi_end_io = dirty_endio; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 402 | |
Coly Li | 27a40ab | 2018-03-18 17:36:24 -0700 | [diff] [blame] | 403 | /* I/O request sent to backing device */ |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 404 | closure_bio_submit(io->dc->disk.c, &io->bio, cl); |
Michael Lyle | 5fa89fb | 2017-10-13 16:35:35 -0700 | [diff] [blame] | 405 | } |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 406 | |
Michael Lyle | 6e6ccc6 | 2018-01-08 12:21:23 -0800 | [diff] [blame] | 407 | atomic_set(&dc->writeback_sequence_next, next_sequence); |
| 408 | closure_wake_up(&dc->writeback_ordering_wait); |
| 409 | |
Tang Junhui | 9baf3097 | 2017-09-06 14:25:59 +0800 | [diff] [blame] | 410 | continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 411 | } |
| 412 | |
Christoph Hellwig | 4246a0b | 2015-07-20 15:29:37 +0200 | [diff] [blame] | 413 | static void read_dirty_endio(struct bio *bio) |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 414 | { |
| 415 | struct keybuf_key *w = bio->bi_private; |
| 416 | struct dirty_io *io = w->private; |
| 417 | |
Coly Li | 5138ac6 | 2018-01-08 12:21:29 -0800 | [diff] [blame] | 418 | /* is_read = 1 */ |
Christoph Hellwig | 11e9560 | 2021-04-11 21:43:11 +0800 | [diff] [blame] | 419 | bch_count_io_errors(io->dc->disk.c->cache, |
Coly Li | 5138ac6 | 2018-01-08 12:21:29 -0800 | [diff] [blame] | 420 | bio->bi_status, 1, |
| 421 | "reading dirty data from cache"); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 422 | |
Christoph Hellwig | 4246a0b | 2015-07-20 15:29:37 +0200 | [diff] [blame] | 423 | dirty_endio(bio); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 424 | } |
| 425 | |
| 426 | static void read_dirty_submit(struct closure *cl) |
| 427 | { |
| 428 | struct dirty_io *io = container_of(cl, struct dirty_io, cl); |
| 429 | |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 430 | closure_bio_submit(io->dc->disk.c, &io->bio, cl); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 431 | |
Tang Junhui | 9baf3097 | 2017-09-06 14:25:59 +0800 | [diff] [blame] | 432 | continue_at(cl, write_dirty, io->dc->writeback_write_wq); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 433 | } |
| 434 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 435 | static void read_dirty(struct cached_dev *dc) |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 436 | { |
Coly Li | 6f10f7d | 2018-08-11 13:19:44 +0800 | [diff] [blame] | 437 | unsigned int delay = 0; |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 438 | struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w; |
| 439 | size_t size; |
| 440 | int nk, i; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 441 | struct dirty_io *io; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 442 | struct closure cl; |
Michael Lyle | 6e6ccc6 | 2018-01-08 12:21:23 -0800 | [diff] [blame] | 443 | uint16_t sequence = 0; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 444 | |
Michael Lyle | 6e6ccc6 | 2018-01-08 12:21:23 -0800 | [diff] [blame] | 445 | BUG_ON(!llist_empty(&dc->writeback_ordering_wait.list)); |
| 446 | atomic_set(&dc->writeback_sequence_next, sequence); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 447 | closure_init_stack(&cl); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 448 | |
| 449 | /* |
| 450 | * XXX: if we error, background writeback just spins. Should use some |
| 451 | * mempools. |
| 452 | */ |
| 453 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 454 | next = bch_keybuf_next(&dc->writeback_keys); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 455 | |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 456 | while (!kthread_should_stop() && |
| 457 | !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && |
| 458 | next) { |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 459 | size = 0; |
| 460 | nk = 0; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 461 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 462 | do { |
| 463 | BUG_ON(ptr_stale(dc->disk.c, &next->key, 0)); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 464 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 465 | /* |
| 466 | * Don't combine too many operations, even if they |
| 467 | * are all small. |
| 468 | */ |
| 469 | if (nk >= MAX_WRITEBACKS_IN_PASS) |
| 470 | break; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 471 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 472 | /* |
| 473 | * If the current operation is very large, don't |
| 474 | * further combine operations. |
| 475 | */ |
| 476 | if (size >= MAX_WRITESIZE_IN_PASS) |
| 477 | break; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 478 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 479 | /* |
| 480 | * Operations are only eligible to be combined |
| 481 | * if they are contiguous. |
| 482 | * |
| 483 | * TODO: add a heuristic willing to fire a |
| 484 | * certain amount of non-contiguous IO per pass, |
| 485 | * so that we can benefit from backing device |
| 486 | * command queueing. |
| 487 | */ |
| 488 | if ((nk != 0) && bkey_cmp(&keys[nk-1]->key, |
| 489 | &START_KEY(&next->key))) |
| 490 | break; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 491 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 492 | size += KEY_SIZE(&next->key); |
| 493 | keys[nk++] = next; |
| 494 | } while ((next = bch_keybuf_next(&dc->writeback_keys))); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 495 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 496 | /* Now we have gathered a set of 1..5 keys to write back. */ |
| 497 | for (i = 0; i < nk; i++) { |
| 498 | w = keys[i]; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 499 | |
Gustavo A. R. Silva | 29f1d5c | 2020-07-25 20:00:20 +0800 | [diff] [blame] | 500 | io = kzalloc(struct_size(io, bio.bi_inline_vecs, |
| 501 | DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)), |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 502 | GFP_KERNEL); |
| 503 | if (!io) |
| 504 | goto err; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 505 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 506 | w->private = io; |
| 507 | io->dc = dc; |
Michael Lyle | 6e6ccc6 | 2018-01-08 12:21:23 -0800 | [diff] [blame] | 508 | io->sequence = sequence++; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 509 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 510 | dirty_init(w); |
| 511 | bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); |
| 512 | io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); |
Christoph Hellwig | 11e9560 | 2021-04-11 21:43:11 +0800 | [diff] [blame] | 513 | bio_set_dev(&io->bio, dc->disk.c->cache->bdev); |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 514 | io->bio.bi_end_io = read_dirty_endio; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 515 | |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 516 | if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) |
| 517 | goto err_free; |
| 518 | |
| 519 | trace_bcache_writeback(&w->key); |
| 520 | |
| 521 | down(&dc->in_flight); |
| 522 | |
Coly Li | 3be11db | 2018-08-11 13:19:55 +0800 | [diff] [blame] | 523 | /* |
| 524 | * We've acquired a semaphore for the maximum |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 525 | * simultaneous number of writebacks; from here |
| 526 | * everything happens asynchronously. |
| 527 | */ |
| 528 | closure_call(&io->cl, read_dirty_submit, NULL, &cl); |
| 529 | } |
| 530 | |
| 531 | delay = writeback_delay(dc, size); |
| 532 | |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 533 | while (!kthread_should_stop() && |
| 534 | !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && |
| 535 | delay) { |
Tang Junhui | 539d39e | 2018-01-08 12:21:22 -0800 | [diff] [blame] | 536 | schedule_timeout_interruptible(delay); |
| 537 | delay = writeback_delay(dc, 0); |
| 538 | } |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 539 | } |
| 540 | |
| 541 | if (0) { |
| 542 | err_free: |
| 543 | kfree(w->private); |
| 544 | err: |
| 545 | bch_keybuf_del(&dc->writeback_keys, w); |
| 546 | } |
| 547 | |
Kent Overstreet | c2a4f31 | 2013-09-23 23:17:31 -0700 | [diff] [blame] | 548 | /* |
| 549 | * Wait for outstanding writeback IOs to finish (and keybuf slots to be |
| 550 | * freed) before refilling again |
| 551 | */ |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 552 | closure_sync(&cl); |
| 553 | } |
| 554 | |
| 555 | /* Scan for dirty data */ |
| 556 | |
Coly Li | 6f10f7d | 2018-08-11 13:19:44 +0800 | [diff] [blame] | 557 | void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode, |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 558 | uint64_t offset, int nr_sectors) |
| 559 | { |
| 560 | struct bcache_device *d = c->devices[inode]; |
Coly Li | 7a14812 | 2020-07-25 20:00:22 +0800 | [diff] [blame] | 561 | unsigned int stripe_offset, sectors_dirty; |
| 562 | int stripe; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 563 | |
| 564 | if (!d) |
| 565 | return; |
| 566 | |
Coly Li | 7a14812 | 2020-07-25 20:00:22 +0800 | [diff] [blame] | 567 | stripe = offset_to_stripe(d, offset); |
| 568 | if (stripe < 0) |
| 569 | return; |
| 570 | |
Tang Junhui | 99a27d5 | 2018-07-26 12:17:33 +0800 | [diff] [blame] | 571 | if (UUID_FLASH_ONLY(&c->uuids[inode])) |
| 572 | atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors); |
| 573 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 574 | stripe_offset = offset & (d->stripe_size - 1); |
| 575 | |
| 576 | while (nr_sectors) { |
Coly Li | 6f10f7d | 2018-08-11 13:19:44 +0800 | [diff] [blame] | 577 | int s = min_t(unsigned int, abs(nr_sectors), |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 578 | d->stripe_size - stripe_offset); |
| 579 | |
| 580 | if (nr_sectors < 0) |
| 581 | s = -s; |
| 582 | |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 583 | if (stripe >= d->nr_stripes) |
| 584 | return; |
| 585 | |
| 586 | sectors_dirty = atomic_add_return(s, |
| 587 | d->stripe_sectors_dirty + stripe); |
Mingzhe Zou | 7b1002f | 2022-01-07 16:21:13 +0800 | [diff] [blame] | 588 | if (sectors_dirty == d->stripe_size) { |
| 589 | if (!test_bit(stripe, d->full_dirty_stripes)) |
| 590 | set_bit(stripe, d->full_dirty_stripes); |
| 591 | } else { |
| 592 | if (test_bit(stripe, d->full_dirty_stripes)) |
| 593 | clear_bit(stripe, d->full_dirty_stripes); |
| 594 | } |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 595 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 596 | nr_sectors -= s; |
| 597 | stripe_offset = 0; |
| 598 | stripe++; |
| 599 | } |
| 600 | } |
| 601 | |
| 602 | static bool dirty_pred(struct keybuf *buf, struct bkey *k) |
| 603 | { |
Coly Li | b0d3098 | 2018-08-11 13:19:47 +0800 | [diff] [blame] | 604 | struct cached_dev *dc = container_of(buf, |
| 605 | struct cached_dev, |
| 606 | writeback_keys); |
Kent Overstreet | 627ccd2 | 2015-11-29 18:47:01 -0800 | [diff] [blame] | 607 | |
| 608 | BUG_ON(KEY_INODE(k) != dc->disk.id); |
| 609 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 610 | return KEY_DIRTY(k); |
| 611 | } |
| 612 | |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 613 | static void refill_full_stripes(struct cached_dev *dc) |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 614 | { |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 615 | struct keybuf *buf = &dc->writeback_keys; |
Coly Li | 7a14812 | 2020-07-25 20:00:22 +0800 | [diff] [blame] | 616 | unsigned int start_stripe, next_stripe; |
| 617 | int stripe; |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 618 | bool wrapped = false; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 619 | |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 620 | stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned)); |
Coly Li | 7a14812 | 2020-07-25 20:00:22 +0800 | [diff] [blame] | 621 | if (stripe < 0) |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 622 | stripe = 0; |
| 623 | |
| 624 | start_stripe = stripe; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 625 | |
| 626 | while (1) { |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 627 | stripe = find_next_bit(dc->disk.full_dirty_stripes, |
| 628 | dc->disk.nr_stripes, stripe); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 629 | |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 630 | if (stripe == dc->disk.nr_stripes) |
| 631 | goto next; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 632 | |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 633 | next_stripe = find_next_zero_bit(dc->disk.full_dirty_stripes, |
| 634 | dc->disk.nr_stripes, stripe); |
| 635 | |
| 636 | buf->last_scanned = KEY(dc->disk.id, |
| 637 | stripe * dc->disk.stripe_size, 0); |
| 638 | |
| 639 | bch_refill_keybuf(dc->disk.c, buf, |
| 640 | &KEY(dc->disk.id, |
| 641 | next_stripe * dc->disk.stripe_size, 0), |
| 642 | dirty_pred); |
| 643 | |
| 644 | if (array_freelist_empty(&buf->freelist)) |
| 645 | return; |
| 646 | |
| 647 | stripe = next_stripe; |
| 648 | next: |
| 649 | if (wrapped && stripe > start_stripe) |
| 650 | return; |
| 651 | |
| 652 | if (stripe == dc->disk.nr_stripes) { |
| 653 | stripe = 0; |
| 654 | wrapped = true; |
| 655 | } |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 656 | } |
| 657 | } |
| 658 | |
Kent Overstreet | 627ccd2 | 2015-11-29 18:47:01 -0800 | [diff] [blame] | 659 | /* |
| 660 | * Returns true if we scanned the entire disk |
| 661 | */ |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 662 | static bool refill_dirty(struct cached_dev *dc) |
| 663 | { |
| 664 | struct keybuf *buf = &dc->writeback_keys; |
Kent Overstreet | 627ccd2 | 2015-11-29 18:47:01 -0800 | [diff] [blame] | 665 | struct bkey start = KEY(dc->disk.id, 0, 0); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 666 | struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); |
Kent Overstreet | 627ccd2 | 2015-11-29 18:47:01 -0800 | [diff] [blame] | 667 | struct bkey start_pos; |
| 668 | |
| 669 | /* |
| 670 | * make sure keybuf pos is inside the range for this disk - at bringup |
| 671 | * we might not be attached yet so this disk's inode nr isn't |
| 672 | * initialized then |
| 673 | */ |
| 674 | if (bkey_cmp(&buf->last_scanned, &start) < 0 || |
| 675 | bkey_cmp(&buf->last_scanned, &end) > 0) |
| 676 | buf->last_scanned = start; |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 677 | |
| 678 | if (dc->partial_stripes_expensive) { |
| 679 | refill_full_stripes(dc); |
| 680 | if (array_freelist_empty(&buf->freelist)) |
| 681 | return false; |
| 682 | } |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 683 | |
Kent Overstreet | 627ccd2 | 2015-11-29 18:47:01 -0800 | [diff] [blame] | 684 | start_pos = buf->last_scanned; |
Kent Overstreet | 48a915a | 2013-10-31 15:43:22 -0700 | [diff] [blame] | 685 | bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 686 | |
Kent Overstreet | 627ccd2 | 2015-11-29 18:47:01 -0800 | [diff] [blame] | 687 | if (bkey_cmp(&buf->last_scanned, &end) < 0) |
| 688 | return false; |
| 689 | |
| 690 | /* |
| 691 | * If we get to the end start scanning again from the beginning, and |
| 692 | * only scan up to where we initially started scanning from: |
| 693 | */ |
| 694 | buf->last_scanned = start; |
| 695 | bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred); |
| 696 | |
| 697 | return bkey_cmp(&buf->last_scanned, &start_pos) >= 0; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 698 | } |
| 699 | |
| 700 | static int bch_writeback_thread(void *arg) |
| 701 | { |
| 702 | struct cached_dev *dc = arg; |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 703 | struct cache_set *c = dc->disk.c; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 704 | bool searched_full_index; |
| 705 | |
Michael Lyle | a8500fc | 2017-10-13 16:35:39 -0700 | [diff] [blame] | 706 | bch_ratelimit_reset(&dc->writeback_rate); |
| 707 | |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 708 | while (!kthread_should_stop() && |
| 709 | !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 710 | down_write(&dc->writeback_lock); |
Coly Li | 99361bb | 2018-02-07 11:41:41 -0800 | [diff] [blame] | 711 | set_current_state(TASK_INTERRUPTIBLE); |
Coly Li | fadd94e | 2018-03-18 17:36:15 -0700 | [diff] [blame] | 712 | /* |
| 713 | * If the bache device is detaching, skip here and continue |
| 714 | * to perform writeback. Otherwise, if no dirty data on cache, |
| 715 | * or there is dirty data on cache but writeback is disabled, |
| 716 | * the writeback thread should sleep here and wait for others |
| 717 | * to wake up it. |
| 718 | */ |
| 719 | if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) && |
| 720 | (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) { |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 721 | up_write(&dc->writeback_lock); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 722 | |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 723 | if (kthread_should_stop() || |
| 724 | test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { |
Coly Li | 99361bb | 2018-02-07 11:41:41 -0800 | [diff] [blame] | 725 | set_current_state(TASK_RUNNING); |
Coly Li | 804f3c6 | 2018-03-18 17:36:14 -0700 | [diff] [blame] | 726 | break; |
Coly Li | 99361bb | 2018-02-07 11:41:41 -0800 | [diff] [blame] | 727 | } |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 728 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 729 | schedule(); |
| 730 | continue; |
| 731 | } |
Coly Li | 99361bb | 2018-02-07 11:41:41 -0800 | [diff] [blame] | 732 | set_current_state(TASK_RUNNING); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 733 | |
| 734 | searched_full_index = refill_dirty(dc); |
| 735 | |
| 736 | if (searched_full_index && |
| 737 | RB_EMPTY_ROOT(&dc->writeback_keys.keys)) { |
| 738 | atomic_set(&dc->has_dirty, 0); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 739 | SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); |
| 740 | bch_write_bdev_super(dc, NULL); |
Coly Li | fadd94e | 2018-03-18 17:36:15 -0700 | [diff] [blame] | 741 | /* |
| 742 | * If bcache device is detaching via sysfs interface, |
| 743 | * writeback thread should stop after there is no dirty |
| 744 | * data on cache. BCACHE_DEV_DETACHING flag is set in |
| 745 | * bch_cached_dev_detach(). |
| 746 | */ |
Shan Hai | 3943b040 | 2018-08-23 02:02:56 +0800 | [diff] [blame] | 747 | if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) { |
Dongsheng Yang | df4ad53 | 2020-12-08 00:39:15 +0800 | [diff] [blame] | 748 | struct closure cl; |
| 749 | |
| 750 | closure_init_stack(&cl); |
| 751 | memset(&dc->sb.set_uuid, 0, 16); |
| 752 | SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); |
| 753 | |
| 754 | bch_write_bdev_super(dc, &cl); |
| 755 | closure_sync(&cl); |
| 756 | |
Shan Hai | 3943b040 | 2018-08-23 02:02:56 +0800 | [diff] [blame] | 757 | up_write(&dc->writeback_lock); |
Coly Li | fadd94e | 2018-03-18 17:36:15 -0700 | [diff] [blame] | 758 | break; |
Shan Hai | 3943b040 | 2018-08-23 02:02:56 +0800 | [diff] [blame] | 759 | } |
Coly Li | 7a671d8 | 2018-12-13 22:53:53 +0800 | [diff] [blame] | 760 | |
| 761 | /* |
| 762 | * When dirty data rate is high (e.g. 50%+), there might |
| 763 | * be heavy buckets fragmentation after writeback |
| 764 | * finished, which hurts following write performance. |
| 765 | * If users really care about write performance they |
| 766 | * may set BCH_ENABLE_AUTO_GC via sysfs, then when |
| 767 | * BCH_DO_AUTO_GC is set, garbage collection thread |
| 768 | * will be wake up here. After moving gc, the shrunk |
| 769 | * btree and discarded free buckets SSD space may be |
| 770 | * helpful for following write requests. |
| 771 | */ |
| 772 | if (c->gc_after_writeback == |
| 773 | (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) { |
| 774 | c->gc_after_writeback &= ~BCH_DO_AUTO_GC; |
| 775 | force_wake_up_gc(c); |
| 776 | } |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 777 | } |
| 778 | |
| 779 | up_write(&dc->writeback_lock); |
| 780 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 781 | read_dirty(dc); |
| 782 | |
| 783 | if (searched_full_index) { |
Coly Li | 6f10f7d | 2018-08-11 13:19:44 +0800 | [diff] [blame] | 784 | unsigned int delay = dc->writeback_delay * HZ; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 785 | |
| 786 | while (delay && |
| 787 | !kthread_should_stop() && |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 788 | !test_bit(CACHE_SET_IO_DISABLE, &c->flags) && |
Kent Overstreet | c4d951d | 2013-08-21 17:49:09 -0700 | [diff] [blame] | 789 | !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) |
Slava Pestov | 9e5c353 | 2014-05-01 13:48:57 -0700 | [diff] [blame] | 790 | delay = schedule_timeout_interruptible(delay); |
Michael Lyle | a8500fc | 2017-10-13 16:35:39 -0700 | [diff] [blame] | 791 | |
| 792 | bch_ratelimit_reset(&dc->writeback_rate); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 793 | } |
| 794 | } |
| 795 | |
Coly Li | 7e865eb | 2019-06-28 19:59:49 +0800 | [diff] [blame] | 796 | if (dc->writeback_write_wq) { |
| 797 | flush_workqueue(dc->writeback_write_wq); |
| 798 | destroy_workqueue(dc->writeback_write_wq); |
| 799 | } |
Coly Li | 804f3c6 | 2018-03-18 17:36:14 -0700 | [diff] [blame] | 800 | cached_dev_put(dc); |
Coly Li | 771f393 | 2018-03-18 17:36:17 -0700 | [diff] [blame] | 801 | wait_for_kthread_stop(); |
Coly Li | 804f3c6 | 2018-03-18 17:36:14 -0700 | [diff] [blame] | 802 | |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 803 | return 0; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 804 | } |
| 805 | |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 806 | /* Init */ |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 807 | #define INIT_KEYS_EACH_TIME 500000 |
| 808 | #define INIT_KEYS_SLEEP_MS 100 |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 809 | |
Kent Overstreet | c18536a | 2013-07-24 17:44:17 -0700 | [diff] [blame] | 810 | struct sectors_dirty_init { |
| 811 | struct btree_op op; |
Coly Li | 6f10f7d | 2018-08-11 13:19:44 +0800 | [diff] [blame] | 812 | unsigned int inode; |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 813 | size_t count; |
| 814 | struct bkey start; |
Kent Overstreet | c18536a | 2013-07-24 17:44:17 -0700 | [diff] [blame] | 815 | }; |
| 816 | |
| 817 | static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, |
Kent Overstreet | 48dad8b | 2013-09-10 18:48:51 -0700 | [diff] [blame] | 818 | struct bkey *k) |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 819 | { |
Kent Overstreet | c18536a | 2013-07-24 17:44:17 -0700 | [diff] [blame] | 820 | struct sectors_dirty_init *op = container_of(_op, |
| 821 | struct sectors_dirty_init, op); |
Kent Overstreet | 48dad8b | 2013-09-10 18:48:51 -0700 | [diff] [blame] | 822 | if (KEY_INODE(k) > op->inode) |
| 823 | return MAP_DONE; |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 824 | |
Kent Overstreet | 48dad8b | 2013-09-10 18:48:51 -0700 | [diff] [blame] | 825 | if (KEY_DIRTY(k)) |
| 826 | bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), |
| 827 | KEY_START(k), KEY_SIZE(k)); |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 828 | |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 829 | op->count++; |
| 830 | if (atomic_read(&b->c->search_inflight) && |
| 831 | !(op->count % INIT_KEYS_EACH_TIME)) { |
| 832 | bkey_copy_key(&op->start, k); |
| 833 | return -EAGAIN; |
| 834 | } |
| 835 | |
Kent Overstreet | 48dad8b | 2013-09-10 18:48:51 -0700 | [diff] [blame] | 836 | return MAP_CONTINUE; |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 837 | } |
| 838 | |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 839 | static int bch_root_node_dirty_init(struct cache_set *c, |
| 840 | struct bcache_device *d, |
| 841 | struct bkey *k) |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 842 | { |
Kent Overstreet | c18536a | 2013-07-24 17:44:17 -0700 | [diff] [blame] | 843 | struct sectors_dirty_init op; |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 844 | int ret; |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 845 | |
Kent Overstreet | b54d693 | 2013-07-24 18:04:18 -0700 | [diff] [blame] | 846 | bch_btree_op_init(&op.op, -1); |
Tang Junhui | 175206c | 2017-09-07 01:28:53 +0800 | [diff] [blame] | 847 | op.inode = d->id; |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 848 | op.count = 0; |
| 849 | op.start = KEY(op.inode, 0, 0); |
Kent Overstreet | 48dad8b | 2013-09-10 18:48:51 -0700 | [diff] [blame] | 850 | |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 851 | do { |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 852 | ret = bcache_btree(map_keys_recurse, |
| 853 | k, |
| 854 | c->root, |
| 855 | &op.op, |
| 856 | &op.start, |
| 857 | sectors_dirty_init_fn, |
| 858 | 0); |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 859 | if (ret == -EAGAIN) |
| 860 | schedule_timeout_interruptible( |
| 861 | msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); |
| 862 | else if (ret < 0) { |
Joe Perches | 46f5aa8 | 2020-05-27 12:01:52 +0800 | [diff] [blame] | 863 | pr_warn("sectors dirty init failed, ret=%d!\n", ret); |
Tang Junhui | 94f71c1 | 2018-07-26 12:17:36 +0800 | [diff] [blame] | 864 | break; |
| 865 | } |
| 866 | } while (ret == -EAGAIN); |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 867 | |
| 868 | return ret; |
| 869 | } |
| 870 | |
| 871 | static int bch_dirty_init_thread(void *arg) |
| 872 | { |
| 873 | struct dirty_init_thrd_info *info = arg; |
| 874 | struct bch_dirty_init_state *state = info->state; |
| 875 | struct cache_set *c = state->c; |
| 876 | struct btree_iter iter; |
| 877 | struct bkey *k, *p; |
| 878 | int cur_idx, prev_idx, skip_nr; |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 879 | |
| 880 | k = p = NULL; |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 881 | cur_idx = prev_idx = 0; |
| 882 | |
| 883 | bch_btree_iter_init(&c->root->keys, &iter, NULL); |
| 884 | k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); |
| 885 | BUG_ON(!k); |
| 886 | |
| 887 | p = k; |
| 888 | |
| 889 | while (k) { |
| 890 | spin_lock(&state->idx_lock); |
| 891 | cur_idx = state->key_idx; |
| 892 | state->key_idx++; |
| 893 | spin_unlock(&state->idx_lock); |
| 894 | |
| 895 | skip_nr = cur_idx - prev_idx; |
| 896 | |
| 897 | while (skip_nr) { |
| 898 | k = bch_btree_iter_next_filter(&iter, |
| 899 | &c->root->keys, |
| 900 | bch_ptr_bad); |
| 901 | if (k) |
| 902 | p = k; |
| 903 | else { |
| 904 | atomic_set(&state->enough, 1); |
| 905 | /* Update state->enough earlier */ |
Coly Li | eb9b666 | 2020-03-22 14:03:05 +0800 | [diff] [blame] | 906 | smp_mb__after_atomic(); |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 907 | goto out; |
| 908 | } |
| 909 | skip_nr--; |
| 910 | cond_resched(); |
| 911 | } |
| 912 | |
| 913 | if (p) { |
| 914 | if (bch_root_node_dirty_init(c, state->d, p) < 0) |
| 915 | goto out; |
| 916 | } |
| 917 | |
| 918 | p = NULL; |
| 919 | prev_idx = cur_idx; |
| 920 | cond_resched(); |
| 921 | } |
| 922 | |
| 923 | out: |
| 924 | /* In order to wake up state->wait in time */ |
Coly Li | eb9b666 | 2020-03-22 14:03:05 +0800 | [diff] [blame] | 925 | smp_mb__before_atomic(); |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 926 | if (atomic_dec_and_test(&state->started)) |
| 927 | wake_up(&state->wait); |
| 928 | |
| 929 | return 0; |
| 930 | } |
| 931 | |
| 932 | static int bch_btre_dirty_init_thread_nr(void) |
| 933 | { |
| 934 | int n = num_online_cpus()/2; |
| 935 | |
| 936 | if (n == 0) |
| 937 | n = 1; |
| 938 | else if (n > BCH_DIRTY_INIT_THRD_MAX) |
| 939 | n = BCH_DIRTY_INIT_THRD_MAX; |
| 940 | |
| 941 | return n; |
| 942 | } |
| 943 | |
| 944 | void bch_sectors_dirty_init(struct bcache_device *d) |
| 945 | { |
| 946 | int i; |
| 947 | struct bkey *k = NULL; |
| 948 | struct btree_iter iter; |
| 949 | struct sectors_dirty_init op; |
| 950 | struct cache_set *c = d->c; |
| 951 | struct bch_dirty_init_state *state; |
| 952 | char name[32]; |
| 953 | |
| 954 | /* Just count root keys if no leaf node */ |
| 955 | if (c->root->level == 0) { |
| 956 | bch_btree_op_init(&op.op, -1); |
| 957 | op.inode = d->id; |
| 958 | op.count = 0; |
| 959 | op.start = KEY(op.inode, 0, 0); |
| 960 | |
| 961 | for_each_key_filter(&c->root->keys, |
| 962 | k, &iter, bch_ptr_invalid) |
| 963 | sectors_dirty_init_fn(&op.op, c->root, k); |
| 964 | return; |
| 965 | } |
| 966 | |
| 967 | state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL); |
| 968 | if (!state) { |
Joe Perches | 46f5aa8 | 2020-05-27 12:01:52 +0800 | [diff] [blame] | 969 | pr_warn("sectors dirty init failed: cannot allocate memory\n"); |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 970 | return; |
| 971 | } |
| 972 | |
| 973 | state->c = c; |
| 974 | state->d = d; |
| 975 | state->total_threads = bch_btre_dirty_init_thread_nr(); |
| 976 | state->key_idx = 0; |
| 977 | spin_lock_init(&state->idx_lock); |
| 978 | atomic_set(&state->started, 0); |
| 979 | atomic_set(&state->enough, 0); |
| 980 | init_waitqueue_head(&state->wait); |
| 981 | |
| 982 | for (i = 0; i < state->total_threads; i++) { |
| 983 | /* Fetch latest state->enough earlier */ |
Coly Li | eb9b666 | 2020-03-22 14:03:05 +0800 | [diff] [blame] | 984 | smp_mb__before_atomic(); |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 985 | if (atomic_read(&state->enough)) |
| 986 | break; |
| 987 | |
| 988 | state->infos[i].state = state; |
| 989 | atomic_inc(&state->started); |
| 990 | snprintf(name, sizeof(name), "bch_dirty_init[%d]", i); |
| 991 | |
| 992 | state->infos[i].thread = |
| 993 | kthread_run(bch_dirty_init_thread, |
| 994 | &state->infos[i], |
| 995 | name); |
| 996 | if (IS_ERR(state->infos[i].thread)) { |
Joe Perches | 46f5aa8 | 2020-05-27 12:01:52 +0800 | [diff] [blame] | 997 | pr_err("fails to run thread bch_dirty_init[%d]\n", i); |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 998 | for (--i; i >= 0; i--) |
| 999 | kthread_stop(state->infos[i].thread); |
| 1000 | goto out; |
| 1001 | } |
| 1002 | } |
| 1003 | |
Mingzhe Zou | 887554a | 2022-02-11 14:39:15 +0800 | [diff] [blame] | 1004 | /* |
| 1005 | * Must wait for all threads to stop. |
| 1006 | */ |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 1007 | wait_event_interruptible(state->wait, |
Mingzhe Zou | 887554a | 2022-02-11 14:39:15 +0800 | [diff] [blame] | 1008 | atomic_read(&state->started) == 0); |
Coly Li | b144e45 | 2020-03-22 14:03:02 +0800 | [diff] [blame] | 1009 | |
| 1010 | out: |
| 1011 | kfree(state); |
Kent Overstreet | 444fc0b | 2013-05-11 17:07:26 -0700 | [diff] [blame] | 1012 | } |
| 1013 | |
Slava Pestov | 9e5c353 | 2014-05-01 13:48:57 -0700 | [diff] [blame] | 1014 | void bch_cached_dev_writeback_init(struct cached_dev *dc) |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1015 | { |
Kent Overstreet | c2a4f31 | 2013-09-23 23:17:31 -0700 | [diff] [blame] | 1016 | sema_init(&dc->in_flight, 64); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1017 | init_rwsem(&dc->writeback_lock); |
Kent Overstreet | 72c2706 | 2013-06-05 06:24:39 -0700 | [diff] [blame] | 1018 | bch_keybuf_init(&dc->writeback_keys); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1019 | |
| 1020 | dc->writeback_metadata = true; |
Shenghui Wang | 79b7914 | 2018-12-13 22:53:50 +0800 | [diff] [blame] | 1021 | dc->writeback_running = false; |
dongdong tao | 71dda2a | 2021-02-10 13:07:23 +0800 | [diff] [blame] | 1022 | dc->writeback_consider_fragment = true; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1023 | dc->writeback_percent = 10; |
| 1024 | dc->writeback_delay = 30; |
Coly Li | ea8c5356 | 2018-08-09 15:48:49 +0800 | [diff] [blame] | 1025 | atomic_long_set(&dc->writeback_rate.rate, 1024); |
Michael Lyle | ae82ddb | 2017-10-13 16:35:37 -0700 | [diff] [blame] | 1026 | dc->writeback_rate_minimum = 8; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1027 | |
Coly Li | 7a5e3ec | 2018-02-07 11:41:44 -0800 | [diff] [blame] | 1028 | dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; |
Michael Lyle | 1d316e6 | 2017-10-13 16:35:36 -0700 | [diff] [blame] | 1029 | dc->writeback_rate_p_term_inverse = 40; |
dongdong tao | 71dda2a | 2021-02-10 13:07:23 +0800 | [diff] [blame] | 1030 | dc->writeback_rate_fp_term_low = 1; |
| 1031 | dc->writeback_rate_fp_term_mid = 10; |
| 1032 | dc->writeback_rate_fp_term_high = 1000; |
Michael Lyle | 1d316e6 | 2017-10-13 16:35:36 -0700 | [diff] [blame] | 1033 | dc->writeback_rate_i_term_inverse = 10000; |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1034 | |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 1035 | WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); |
Slava Pestov | 9e5c353 | 2014-05-01 13:48:57 -0700 | [diff] [blame] | 1036 | INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); |
| 1037 | } |
| 1038 | |
| 1039 | int bch_cached_dev_writeback_start(struct cached_dev *dc) |
| 1040 | { |
Tang Junhui | 9baf3097 | 2017-09-06 14:25:59 +0800 | [diff] [blame] | 1041 | dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq", |
| 1042 | WQ_MEM_RECLAIM, 0); |
| 1043 | if (!dc->writeback_write_wq) |
| 1044 | return -ENOMEM; |
| 1045 | |
Coly Li | 804f3c6 | 2018-03-18 17:36:14 -0700 | [diff] [blame] | 1046 | cached_dev_get(dc); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 1047 | dc->writeback_thread = kthread_create(bch_writeback_thread, dc, |
| 1048 | "bcache_writeback"); |
Coly Li | 804f3c6 | 2018-03-18 17:36:14 -0700 | [diff] [blame] | 1049 | if (IS_ERR(dc->writeback_thread)) { |
| 1050 | cached_dev_put(dc); |
Coly Li | f54d801 | 2019-06-28 19:59:44 +0800 | [diff] [blame] | 1051 | destroy_workqueue(dc->writeback_write_wq); |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 1052 | return PTR_ERR(dc->writeback_thread); |
Coly Li | 804f3c6 | 2018-03-18 17:36:14 -0700 | [diff] [blame] | 1053 | } |
Shenghui Wang | 79b7914 | 2018-12-13 22:53:50 +0800 | [diff] [blame] | 1054 | dc->writeback_running = true; |
Kent Overstreet | 5e6926da | 2013-07-24 17:50:06 -0700 | [diff] [blame] | 1055 | |
Coly Li | 3fd47bf | 2018-03-18 17:36:16 -0700 | [diff] [blame] | 1056 | WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1057 | schedule_delayed_work(&dc->writeback_rate_update, |
| 1058 | dc->writeback_rate_update_seconds * HZ); |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1059 | |
Slava Pestov | 9e5c353 | 2014-05-01 13:48:57 -0700 | [diff] [blame] | 1060 | bch_writeback_queue(dc); |
| 1061 | |
Kent Overstreet | cafe563 | 2013-03-23 16:11:31 -0700 | [diff] [blame] | 1062 | return 0; |
| 1063 | } |