Christoph Hellwig | 3dcf60bc | 2019-04-30 14:42:43 -0400 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 2 | /* |
| 3 | * Zoned block device handling |
| 4 | * |
| 5 | * Copyright (c) 2015, Hannes Reinecke |
| 6 | * Copyright (c) 2015, SUSE Linux GmbH |
| 7 | * |
| 8 | * Copyright (c) 2016, Damien Le Moal |
| 9 | * Copyright (c) 2016, Western Digital |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 10 | * Copyright (c) 2024, Western Digital Corporation or its affiliates. |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 11 | */ |
| 12 | |
| 13 | #include <linux/kernel.h> |
| 14 | #include <linux/module.h> |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 15 | #include <linux/blkdev.h> |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 16 | #include <linux/blk-mq.h> |
Damien Le Moal | 2620292 | 2019-07-01 14:09:18 +0900 | [diff] [blame] | 17 | #include <linux/mm.h> |
| 18 | #include <linux/vmalloc.h> |
Damien Le Moal | bd976e5 | 2019-07-01 14:09:16 +0900 | [diff] [blame] | 19 | #include <linux/sched/mm.h> |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 20 | #include <linux/spinlock.h> |
| 21 | #include <linux/atomic.h> |
| 22 | #include <linux/mempool.h> |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 23 | |
Damien Le Moal | a2d6b3a | 2018-10-12 19:08:47 +0900 | [diff] [blame] | 24 | #include "blk.h" |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 25 | #include "blk-mq-sched.h" |
Damien Le Moal | d9f1439 | 2024-04-08 10:41:24 +0900 | [diff] [blame] | 26 | #include "blk-mq-debugfs.h" |
Damien Le Moal | a2d6b3a | 2018-10-12 19:08:47 +0900 | [diff] [blame] | 27 | |
Chaitanya Kulkarni | 02694e8 | 2020-03-25 10:49:54 -0700 | [diff] [blame] | 28 | #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name |
| 29 | static const char *const zone_cond_name[] = { |
| 30 | ZONE_COND_NAME(NOT_WP), |
| 31 | ZONE_COND_NAME(EMPTY), |
| 32 | ZONE_COND_NAME(IMP_OPEN), |
| 33 | ZONE_COND_NAME(EXP_OPEN), |
| 34 | ZONE_COND_NAME(CLOSED), |
| 35 | ZONE_COND_NAME(READONLY), |
| 36 | ZONE_COND_NAME(FULL), |
| 37 | ZONE_COND_NAME(OFFLINE), |
| 38 | }; |
| 39 | #undef ZONE_COND_NAME |
| 40 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 41 | /* |
| 42 | * Per-zone write plug. |
| 43 | * @node: hlist_node structure for managing the plug using a hash table. |
| 44 | * @link: To list the plug in the zone write plug error list of the disk. |
| 45 | * @ref: Zone write plug reference counter. A zone write plug reference is |
| 46 | * always at least 1 when the plug is hashed in the disk plug hash table. |
| 47 | * The reference is incremented whenever a new BIO needing plugging is |
| 48 | * submitted and when a function needs to manipulate a plug. The |
| 49 | * reference count is decremented whenever a plugged BIO completes and |
| 50 | * when a function that referenced the plug returns. The initial |
| 51 | * reference is dropped whenever the zone of the zone write plug is reset, |
| 52 | * finished and when the zone becomes full (last write BIO to the zone |
| 53 | * completes). |
| 54 | * @lock: Spinlock to atomically manipulate the plug. |
| 55 | * @flags: Flags indicating the plug state. |
| 56 | * @zone_no: The number of the zone the plug is managing. |
| 57 | * @wp_offset: The zone write pointer location relative to the start of the zone |
| 58 | * as a number of 512B sectors. |
| 59 | * @bio_list: The list of BIOs that are currently plugged. |
| 60 | * @bio_work: Work struct to handle issuing of plugged BIOs |
| 61 | * @rcu_head: RCU head to free zone write plugs with an RCU grace period. |
| 62 | * @disk: The gendisk the plug belongs to. |
| 63 | */ |
| 64 | struct blk_zone_wplug { |
| 65 | struct hlist_node node; |
| 66 | struct list_head link; |
| 67 | atomic_t ref; |
| 68 | spinlock_t lock; |
| 69 | unsigned int flags; |
| 70 | unsigned int zone_no; |
| 71 | unsigned int wp_offset; |
| 72 | struct bio_list bio_list; |
| 73 | struct work_struct bio_work; |
| 74 | struct rcu_head rcu_head; |
| 75 | struct gendisk *disk; |
| 76 | }; |
| 77 | |
| 78 | /* |
| 79 | * Zone write plug flags bits: |
| 80 | * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged, |
| 81 | * that is, that write BIOs are being throttled due to a write BIO already |
| 82 | * being executed or the zone write plug bio list is not empty. |
| 83 | * - BLK_ZONE_WPLUG_ERROR: Indicates that a write error happened which will be |
| 84 | * recovered with a report zone to update the zone write pointer offset. |
| 85 | * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed |
| 86 | * from the disk hash table and that the initial reference to the zone |
| 87 | * write plug set when the plug was first added to the hash table has been |
| 88 | * dropped. This flag is set when a zone is reset, finished or become full, |
| 89 | * to prevent new references to the zone write plug to be taken for |
| 90 | * newly incoming BIOs. A zone write plug flagged with this flag will be |
| 91 | * freed once all remaining references from BIOs or functions are dropped. |
| 92 | */ |
| 93 | #define BLK_ZONE_WPLUG_PLUGGED (1U << 0) |
| 94 | #define BLK_ZONE_WPLUG_ERROR (1U << 1) |
| 95 | #define BLK_ZONE_WPLUG_UNHASHED (1U << 2) |
| 96 | |
| 97 | #define BLK_ZONE_WPLUG_BUSY (BLK_ZONE_WPLUG_PLUGGED | BLK_ZONE_WPLUG_ERROR) |
| 98 | |
Chaitanya Kulkarni | 02694e8 | 2020-03-25 10:49:54 -0700 | [diff] [blame] | 99 | /** |
| 100 | * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. |
| 101 | * @zone_cond: BLK_ZONE_COND_XXX. |
| 102 | * |
| 103 | * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX |
| 104 | * into string format. Useful in the debugging and tracing zone conditions. For |
| 105 | * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN". |
| 106 | */ |
| 107 | const char *blk_zone_cond_str(enum blk_zone_cond zone_cond) |
| 108 | { |
| 109 | static const char *zone_cond_str = "UNKNOWN"; |
| 110 | |
| 111 | if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond]) |
| 112 | zone_cond_str = zone_cond_name[zone_cond]; |
| 113 | |
| 114 | return zone_cond_str; |
| 115 | } |
| 116 | EXPORT_SYMBOL_GPL(blk_zone_cond_str); |
| 117 | |
Damien Le Moal | a91e138 | 2018-10-12 19:08:43 +0900 | [diff] [blame] | 118 | /** |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 119 | * blkdev_report_zones - Get zones information |
| 120 | * @bdev: Target block device |
| 121 | * @sector: Sector from which to report zones |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 122 | * @nr_zones: Maximum number of zones to report |
| 123 | * @cb: Callback function called for each reported zone |
| 124 | * @data: Private data for the callback |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 125 | * |
| 126 | * Description: |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 127 | * Get zone information starting from the zone containing @sector for at most |
| 128 | * @nr_zones, and call @cb for each zone reported by the device. |
| 129 | * To report all zones in a device starting from @sector, the BLK_ALL_ZONES |
| 130 | * constant can be passed to @nr_zones. |
| 131 | * Returns the number of zones reported by the device, or a negative errno |
| 132 | * value in case of failure. |
| 133 | * |
| 134 | * Note: The caller must use memalloc_noXX_save/restore() calls to control |
| 135 | * memory allocations done within this function. |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 136 | */ |
Christoph Hellwig | e76239a | 2018-10-12 19:08:49 +0900 | [diff] [blame] | 137 | int blkdev_report_zones(struct block_device *bdev, sector_t sector, |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 138 | unsigned int nr_zones, report_zones_cb cb, void *data) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 139 | { |
Damien Le Moal | ceeb373 | 2019-11-11 11:39:24 +0900 | [diff] [blame] | 140 | struct gendisk *disk = bdev->bd_disk; |
Damien Le Moal | 5eac3eb | 2019-11-11 11:39:25 +0900 | [diff] [blame] | 141 | sector_t capacity = get_capacity(disk); |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 142 | |
Christoph Hellwig | edd1dbc | 2022-07-06 09:03:37 +0200 | [diff] [blame] | 143 | if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 144 | return -EOPNOTSUPP; |
| 145 | |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 146 | if (!nr_zones || sector >= capacity) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 147 | return 0; |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 148 | |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 149 | return disk->fops->report_zones(disk, sector, nr_zones, cb, data); |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 150 | } |
| 151 | EXPORT_SYMBOL_GPL(blkdev_report_zones); |
| 152 | |
Johannes Thumshirn | 71f4ecdb | 2024-01-28 23:52:20 -0800 | [diff] [blame] | 153 | static int blkdev_zone_reset_all(struct block_device *bdev) |
Damien Le Moal | 1ee533e | 2021-05-26 06:24:51 +0900 | [diff] [blame] | 154 | { |
| 155 | struct bio bio; |
| 156 | |
Christoph Hellwig | 49add49 | 2022-01-24 10:11:06 +0100 | [diff] [blame] | 157 | bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC); |
Damien Le Moal | 1ee533e | 2021-05-26 06:24:51 +0900 | [diff] [blame] | 158 | return submit_bio_wait(&bio); |
Chaitanya Kulkarni | 6e33dbf | 2019-08-01 10:26:36 -0700 | [diff] [blame] | 159 | } |
| 160 | |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 161 | /** |
Ajay Joshi | 6c1b1da | 2019-10-27 23:05:45 +0900 | [diff] [blame] | 162 | * blkdev_zone_mgmt - Execute a zone management operation on a range of zones |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 163 | * @bdev: Target block device |
Ajay Joshi | 6c1b1da | 2019-10-27 23:05:45 +0900 | [diff] [blame] | 164 | * @op: Operation to be performed on the zones |
| 165 | * @sector: Start sector of the first zone to operate on |
| 166 | * @nr_sectors: Number of sectors, should be at least the length of one zone and |
| 167 | * must be zone size aligned. |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 168 | * |
| 169 | * Description: |
Ajay Joshi | 6c1b1da | 2019-10-27 23:05:45 +0900 | [diff] [blame] | 170 | * Perform the specified operation on the range of zones specified by |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 171 | * @sector..@sector+@nr_sectors. Specifying the entire disk sector range |
| 172 | * is valid, but the specified range should not contain conventional zones. |
Ajay Joshi | 6c1b1da | 2019-10-27 23:05:45 +0900 | [diff] [blame] | 173 | * The operation to execute on each zone can be a zone reset, open, close |
| 174 | * or finish request. |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 175 | */ |
Bart Van Assche | ff07a02 | 2022-07-14 11:06:27 -0700 | [diff] [blame] | 176 | int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, |
Johannes Thumshirn | 71f4ecdb | 2024-01-28 23:52:20 -0800 | [diff] [blame] | 177 | sector_t sector, sector_t nr_sectors) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 178 | { |
Christoph Hellwig | 375c140 | 2022-07-06 09:03:46 +0200 | [diff] [blame] | 179 | sector_t zone_sectors = bdev_zone_sectors(bdev); |
| 180 | sector_t capacity = bdev_nr_sectors(bdev); |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 181 | sector_t end_sector = sector + nr_sectors; |
Damien Le Moal | a2d6b3a | 2018-10-12 19:08:47 +0900 | [diff] [blame] | 182 | struct bio *bio = NULL; |
Damien Le Moal | 1ee533e | 2021-05-26 06:24:51 +0900 | [diff] [blame] | 183 | int ret = 0; |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 184 | |
Christoph Hellwig | edd1dbc | 2022-07-06 09:03:37 +0200 | [diff] [blame] | 185 | if (!bdev_is_zoned(bdev)) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 186 | return -EOPNOTSUPP; |
| 187 | |
Damien Le Moal | a2d6b3a | 2018-10-12 19:08:47 +0900 | [diff] [blame] | 188 | if (bdev_read_only(bdev)) |
| 189 | return -EPERM; |
| 190 | |
Ajay Joshi | 6c1b1da | 2019-10-27 23:05:45 +0900 | [diff] [blame] | 191 | if (!op_is_zone_mgmt(op)) |
| 192 | return -EOPNOTSUPP; |
| 193 | |
Alexey Dobriyan | 11bde98 | 2020-02-12 20:40:27 +0300 | [diff] [blame] | 194 | if (end_sector <= sector || end_sector > capacity) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 195 | /* Out of range */ |
| 196 | return -EINVAL; |
| 197 | |
| 198 | /* Check alignment (handle eventual smaller last zone) */ |
Pankaj Raghav | e29b210 | 2023-01-10 15:36:34 +0100 | [diff] [blame] | 199 | if (!bdev_is_zone_start(bdev, sector)) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 200 | return -EINVAL; |
| 201 | |
Pankaj Raghav | e29b210 | 2023-01-10 15:36:34 +0100 | [diff] [blame] | 202 | if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity) |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 203 | return -EINVAL; |
| 204 | |
Damien Le Moal | 1ee533e | 2021-05-26 06:24:51 +0900 | [diff] [blame] | 205 | /* |
Damien Le Moal | f2a7bea | 2024-07-04 14:28:15 +0900 | [diff] [blame] | 206 | * In the case of a zone reset operation over all zones, use |
| 207 | * REQ_OP_ZONE_RESET_ALL. |
Damien Le Moal | 1ee533e | 2021-05-26 06:24:51 +0900 | [diff] [blame] | 208 | */ |
Damien Le Moal | f2a7bea | 2024-07-04 14:28:15 +0900 | [diff] [blame] | 209 | if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) |
Johannes Thumshirn | 71f4ecdb | 2024-01-28 23:52:20 -0800 | [diff] [blame] | 210 | return blkdev_zone_reset_all(bdev); |
Damien Le Moal | 1ee533e | 2021-05-26 06:24:51 +0900 | [diff] [blame] | 211 | |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 212 | while (sector < end_sector) { |
Johannes Thumshirn | 71f4ecdb | 2024-01-28 23:52:20 -0800 | [diff] [blame] | 213 | bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL); |
Damien Le Moal | c7a1d92 | 2019-10-27 23:05:43 +0900 | [diff] [blame] | 214 | bio->bi_iter.bi_sector = sector; |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 215 | sector += zone_sectors; |
| 216 | |
| 217 | /* This may take a while, so be nice to others */ |
| 218 | cond_resched(); |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 219 | } |
| 220 | |
Damien Le Moal | a2d6b3a | 2018-10-12 19:08:47 +0900 | [diff] [blame] | 221 | ret = submit_bio_wait(bio); |
| 222 | bio_put(bio); |
| 223 | |
Damien Le Moal | a2d6b3a | 2018-10-12 19:08:47 +0900 | [diff] [blame] | 224 | return ret; |
Hannes Reinecke | 6a0cb1b | 2016-10-18 15:40:33 +0900 | [diff] [blame] | 225 | } |
Ajay Joshi | 6c1b1da | 2019-10-27 23:05:45 +0900 | [diff] [blame] | 226 | EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 227 | |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 228 | struct zone_report_args { |
| 229 | struct blk_zone __user *zones; |
| 230 | }; |
| 231 | |
| 232 | static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, |
| 233 | void *data) |
| 234 | { |
| 235 | struct zone_report_args *args = data; |
| 236 | |
| 237 | if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) |
| 238 | return -EFAULT; |
| 239 | return 0; |
| 240 | } |
| 241 | |
Bart Van Assche | 56c4bdd | 2018-03-08 15:28:50 -0800 | [diff] [blame] | 242 | /* |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 243 | * BLKREPORTZONE ioctl processing. |
| 244 | * Called from blkdev_ioctl. |
| 245 | */ |
Christoph Hellwig | 5e4ea834 | 2023-06-08 13:02:54 +0200 | [diff] [blame] | 246 | int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, |
| 247 | unsigned long arg) |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 248 | { |
| 249 | void __user *argp = (void __user *)arg; |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 250 | struct zone_report_args args; |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 251 | struct blk_zone_report rep; |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 252 | int ret; |
| 253 | |
| 254 | if (!argp) |
| 255 | return -EINVAL; |
| 256 | |
Christoph Hellwig | edd1dbc | 2022-07-06 09:03:37 +0200 | [diff] [blame] | 257 | if (!bdev_is_zoned(bdev)) |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 258 | return -ENOTTY; |
| 259 | |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 260 | if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) |
| 261 | return -EFAULT; |
| 262 | |
| 263 | if (!rep.nr_zones) |
| 264 | return -EINVAL; |
| 265 | |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 266 | args.zones = argp + sizeof(struct blk_zone_report); |
| 267 | ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, |
| 268 | blkdev_copy_zone_to_user, &args); |
| 269 | if (ret < 0) |
| 270 | return ret; |
Bart Van Assche | 327ea4a | 2018-05-22 08:27:22 -0700 | [diff] [blame] | 271 | |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 272 | rep.nr_zones = ret; |
Matias Bjørling | 82394db | 2020-06-29 12:06:37 -0700 | [diff] [blame] | 273 | rep.flags = BLK_ZONE_REP_CAPACITY; |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 274 | if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) |
| 275 | return -EFAULT; |
| 276 | return 0; |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 277 | } |
| 278 | |
Christoph Hellwig | 05bdb99 | 2023-06-08 13:02:55 +0200 | [diff] [blame] | 279 | static int blkdev_truncate_zone_range(struct block_device *bdev, |
| 280 | blk_mode_t mode, const struct blk_zone_range *zrange) |
Shin'ichiro Kawasaki | e511350 | 2021-03-11 16:25:46 +0900 | [diff] [blame] | 281 | { |
| 282 | loff_t start, end; |
| 283 | |
| 284 | if (zrange->sector + zrange->nr_sectors <= zrange->sector || |
| 285 | zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) |
| 286 | /* Out of range */ |
| 287 | return -EINVAL; |
| 288 | |
| 289 | start = zrange->sector << SECTOR_SHIFT; |
| 290 | end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; |
| 291 | |
| 292 | return truncate_bdev_range(bdev, mode, start, end); |
| 293 | } |
| 294 | |
Bart Van Assche | 56c4bdd | 2018-03-08 15:28:50 -0800 | [diff] [blame] | 295 | /* |
Ajay Joshi | e876df1 | 2019-10-27 23:05:46 +0900 | [diff] [blame] | 296 | * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 297 | * Called from blkdev_ioctl. |
| 298 | */ |
Christoph Hellwig | 05bdb99 | 2023-06-08 13:02:55 +0200 | [diff] [blame] | 299 | int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, |
Ajay Joshi | e876df1 | 2019-10-27 23:05:46 +0900 | [diff] [blame] | 300 | unsigned int cmd, unsigned long arg) |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 301 | { |
| 302 | void __user *argp = (void __user *)arg; |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 303 | struct blk_zone_range zrange; |
Bart Van Assche | ff07a02 | 2022-07-14 11:06:27 -0700 | [diff] [blame] | 304 | enum req_op op; |
Shin'ichiro Kawasaki | e511350 | 2021-03-11 16:25:46 +0900 | [diff] [blame] | 305 | int ret; |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 306 | |
| 307 | if (!argp) |
| 308 | return -EINVAL; |
| 309 | |
Christoph Hellwig | edd1dbc | 2022-07-06 09:03:37 +0200 | [diff] [blame] | 310 | if (!bdev_is_zoned(bdev)) |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 311 | return -ENOTTY; |
| 312 | |
Christoph Hellwig | 05bdb99 | 2023-06-08 13:02:55 +0200 | [diff] [blame] | 313 | if (!(mode & BLK_OPEN_WRITE)) |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 314 | return -EBADF; |
| 315 | |
| 316 | if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) |
| 317 | return -EFAULT; |
| 318 | |
Ajay Joshi | e876df1 | 2019-10-27 23:05:46 +0900 | [diff] [blame] | 319 | switch (cmd) { |
| 320 | case BLKRESETZONE: |
| 321 | op = REQ_OP_ZONE_RESET; |
Shin'ichiro Kawasaki | e511350 | 2021-03-11 16:25:46 +0900 | [diff] [blame] | 322 | |
| 323 | /* Invalidate the page cache, including dirty pages. */ |
Al Viro | 224941e | 2024-04-11 15:53:37 +0100 | [diff] [blame] | 324 | filemap_invalidate_lock(bdev->bd_mapping); |
Shin'ichiro Kawasaki | e511350 | 2021-03-11 16:25:46 +0900 | [diff] [blame] | 325 | ret = blkdev_truncate_zone_range(bdev, mode, &zrange); |
| 326 | if (ret) |
Shin'ichiro Kawasaki | 86399ea | 2021-11-11 17:52:38 +0900 | [diff] [blame] | 327 | goto fail; |
Ajay Joshi | e876df1 | 2019-10-27 23:05:46 +0900 | [diff] [blame] | 328 | break; |
| 329 | case BLKOPENZONE: |
| 330 | op = REQ_OP_ZONE_OPEN; |
| 331 | break; |
| 332 | case BLKCLOSEZONE: |
| 333 | op = REQ_OP_ZONE_CLOSE; |
| 334 | break; |
| 335 | case BLKFINISHZONE: |
| 336 | op = REQ_OP_ZONE_FINISH; |
| 337 | break; |
| 338 | default: |
| 339 | return -ENOTTY; |
| 340 | } |
| 341 | |
Johannes Thumshirn | 71f4ecdb | 2024-01-28 23:52:20 -0800 | [diff] [blame] | 342 | ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); |
Shin'ichiro Kawasaki | e511350 | 2021-03-11 16:25:46 +0900 | [diff] [blame] | 343 | |
Shin'ichiro Kawasaki | 86399ea | 2021-11-11 17:52:38 +0900 | [diff] [blame] | 344 | fail: |
| 345 | if (cmd == BLKRESETZONE) |
Al Viro | 224941e | 2024-04-11 15:53:37 +0100 | [diff] [blame] | 346 | filemap_invalidate_unlock(bdev->bd_mapping); |
Shin'ichiro Kawasaki | e511350 | 2021-03-11 16:25:46 +0900 | [diff] [blame] | 347 | |
| 348 | return ret; |
Shaun Tancheff | 3ed05a9 | 2016-10-18 15:40:35 +0900 | [diff] [blame] | 349 | } |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 350 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 351 | static inline bool disk_zone_is_conv(struct gendisk *disk, sector_t sector) |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 352 | { |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 353 | if (!disk->conv_zones_bitmap) |
| 354 | return false; |
| 355 | return test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); |
| 356 | } |
| 357 | |
Damien Le Moal | cd63999 | 2024-05-30 14:40:33 +0900 | [diff] [blame] | 358 | static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone) |
| 359 | { |
| 360 | return zone->start + zone->len >= get_capacity(disk); |
| 361 | } |
| 362 | |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 363 | static bool disk_zone_is_full(struct gendisk *disk, |
| 364 | unsigned int zno, unsigned int offset_in_zone) |
| 365 | { |
| 366 | if (zno < disk->nr_zones - 1) |
| 367 | return offset_in_zone >= disk->zone_capacity; |
| 368 | return offset_in_zone >= disk->last_zone_capacity; |
| 369 | } |
| 370 | |
| 371 | static bool disk_zone_wplug_is_full(struct gendisk *disk, |
| 372 | struct blk_zone_wplug *zwplug) |
| 373 | { |
| 374 | return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset); |
| 375 | } |
| 376 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 377 | static bool disk_insert_zone_wplug(struct gendisk *disk, |
| 378 | struct blk_zone_wplug *zwplug) |
| 379 | { |
| 380 | struct blk_zone_wplug *zwplg; |
| 381 | unsigned long flags; |
| 382 | unsigned int idx = |
| 383 | hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits); |
| 384 | |
| 385 | /* |
| 386 | * Add the new zone write plug to the hash table, but carefully as we |
| 387 | * are racing with other submission context, so we may already have a |
| 388 | * zone write plug for the same zone. |
| 389 | */ |
| 390 | spin_lock_irqsave(&disk->zone_wplugs_lock, flags); |
| 391 | hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) { |
| 392 | if (zwplg->zone_no == zwplug->zone_no) { |
| 393 | spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); |
| 394 | return false; |
| 395 | } |
| 396 | } |
| 397 | hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); |
| 398 | spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); |
| 399 | |
| 400 | return true; |
| 401 | } |
| 402 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 403 | static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, |
| 404 | sector_t sector) |
| 405 | { |
| 406 | unsigned int zno = disk_zone_no(disk, sector); |
| 407 | unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); |
| 408 | struct blk_zone_wplug *zwplug; |
| 409 | |
| 410 | rcu_read_lock(); |
| 411 | |
| 412 | hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) { |
| 413 | if (zwplug->zone_no == zno && |
| 414 | atomic_inc_not_zero(&zwplug->ref)) { |
| 415 | rcu_read_unlock(); |
| 416 | return zwplug; |
| 417 | } |
| 418 | } |
| 419 | |
| 420 | rcu_read_unlock(); |
| 421 | |
| 422 | return NULL; |
| 423 | } |
| 424 | |
| 425 | static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) |
| 426 | { |
| 427 | struct blk_zone_wplug *zwplug = |
| 428 | container_of(rcu_head, struct blk_zone_wplug, rcu_head); |
| 429 | |
| 430 | mempool_free(zwplug, zwplug->disk->zone_wplugs_pool); |
| 431 | } |
| 432 | |
| 433 | static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug) |
| 434 | { |
| 435 | if (atomic_dec_and_test(&zwplug->ref)) { |
| 436 | WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list)); |
| 437 | WARN_ON_ONCE(!list_empty(&zwplug->link)); |
Damien Le Moal | 79ae35a | 2024-05-01 20:08:59 +0900 | [diff] [blame] | 438 | WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 439 | |
| 440 | call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu); |
| 441 | } |
| 442 | } |
| 443 | |
Damien Le Moal | 79ae35a | 2024-05-01 20:08:59 +0900 | [diff] [blame] | 444 | static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, |
| 445 | struct blk_zone_wplug *zwplug) |
| 446 | { |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 447 | /* If the zone write plug was already removed, we are done. */ |
| 448 | if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) |
| 449 | return false; |
| 450 | |
| 451 | /* If the zone write plug is still busy, it cannot be removed. */ |
Damien Le Moal | 79ae35a | 2024-05-01 20:08:59 +0900 | [diff] [blame] | 452 | if (zwplug->flags & BLK_ZONE_WPLUG_BUSY) |
| 453 | return false; |
| 454 | |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 455 | /* |
| 456 | * Completions of BIOs with blk_zone_write_plug_bio_endio() may |
| 457 | * happen after handling a request completion with |
Damien Le Moal | 347bde9 | 2024-05-01 20:09:04 +0900 | [diff] [blame] | 458 | * blk_zone_write_plug_finish_request() (e.g. with split BIOs |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 459 | * that are chained). In such case, disk_zone_wplug_unplug_bio() |
| 460 | * should not attempt to remove the zone write plug until all BIO |
| 461 | * completions are seen. Check by looking at the zone write plug |
| 462 | * reference count, which is 2 when the plug is unused (one reference |
| 463 | * taken when the plug was allocated and another reference taken by the |
| 464 | * caller context). |
| 465 | */ |
| 466 | if (atomic_read(&zwplug->ref) > 2) |
| 467 | return false; |
| 468 | |
Damien Le Moal | 79ae35a | 2024-05-01 20:08:59 +0900 | [diff] [blame] | 469 | /* We can remove zone write plugs for zones that are empty or full. */ |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 470 | return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug); |
Damien Le Moal | 79ae35a | 2024-05-01 20:08:59 +0900 | [diff] [blame] | 471 | } |
| 472 | |
| 473 | static void disk_remove_zone_wplug(struct gendisk *disk, |
| 474 | struct blk_zone_wplug *zwplug) |
| 475 | { |
| 476 | unsigned long flags; |
| 477 | |
| 478 | /* If the zone write plug was already removed, we have nothing to do. */ |
| 479 | if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) |
| 480 | return; |
| 481 | |
| 482 | /* |
| 483 | * Mark the zone write plug as unhashed and drop the extra reference we |
| 484 | * took when the plug was inserted in the hash table. |
| 485 | */ |
| 486 | zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; |
| 487 | spin_lock_irqsave(&disk->zone_wplugs_lock, flags); |
| 488 | hlist_del_init_rcu(&zwplug->node); |
| 489 | spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); |
| 490 | disk_put_zone_wplug(zwplug); |
| 491 | } |
| 492 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 493 | static void blk_zone_wplug_bio_work(struct work_struct *work); |
| 494 | |
| 495 | /* |
| 496 | * Get a reference on the write plug for the zone containing @sector. |
| 497 | * If the plug does not exist, it is allocated and hashed. |
| 498 | * Return a pointer to the zone write plug with the plug spinlock held. |
| 499 | */ |
| 500 | static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk, |
| 501 | sector_t sector, gfp_t gfp_mask, |
| 502 | unsigned long *flags) |
| 503 | { |
| 504 | unsigned int zno = disk_zone_no(disk, sector); |
| 505 | struct blk_zone_wplug *zwplug; |
| 506 | |
| 507 | again: |
| 508 | zwplug = disk_get_zone_wplug(disk, sector); |
| 509 | if (zwplug) { |
| 510 | /* |
| 511 | * Check that a BIO completion or a zone reset or finish |
| 512 | * operation has not already removed the zone write plug from |
| 513 | * the hash table and dropped its reference count. In such case, |
| 514 | * we need to get a new plug so start over from the beginning. |
| 515 | */ |
| 516 | spin_lock_irqsave(&zwplug->lock, *flags); |
| 517 | if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) { |
| 518 | spin_unlock_irqrestore(&zwplug->lock, *flags); |
| 519 | disk_put_zone_wplug(zwplug); |
| 520 | goto again; |
| 521 | } |
| 522 | return zwplug; |
| 523 | } |
| 524 | |
| 525 | /* |
| 526 | * Allocate and initialize a zone write plug with an extra reference |
| 527 | * so that it is not freed when the zone write plug becomes idle without |
| 528 | * the zone being full. |
| 529 | */ |
| 530 | zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask); |
| 531 | if (!zwplug) |
| 532 | return NULL; |
| 533 | |
| 534 | INIT_HLIST_NODE(&zwplug->node); |
| 535 | INIT_LIST_HEAD(&zwplug->link); |
| 536 | atomic_set(&zwplug->ref, 2); |
| 537 | spin_lock_init(&zwplug->lock); |
| 538 | zwplug->flags = 0; |
| 539 | zwplug->zone_no = zno; |
| 540 | zwplug->wp_offset = sector & (disk->queue->limits.chunk_sectors - 1); |
| 541 | bio_list_init(&zwplug->bio_list); |
| 542 | INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work); |
| 543 | zwplug->disk = disk; |
| 544 | |
| 545 | spin_lock_irqsave(&zwplug->lock, *flags); |
| 546 | |
| 547 | /* |
| 548 | * Insert the new zone write plug in the hash table. This can fail only |
| 549 | * if another context already inserted a plug. Retry from the beginning |
| 550 | * in such case. |
| 551 | */ |
| 552 | if (!disk_insert_zone_wplug(disk, zwplug)) { |
| 553 | spin_unlock_irqrestore(&zwplug->lock, *flags); |
| 554 | mempool_free(zwplug, disk->zone_wplugs_pool); |
| 555 | goto again; |
| 556 | } |
| 557 | |
| 558 | return zwplug; |
| 559 | } |
| 560 | |
Damien Le Moal | c9c8aea | 2024-05-01 20:09:06 +0900 | [diff] [blame] | 561 | static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug, |
| 562 | struct bio *bio) |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 563 | { |
Damien Le Moal | c9c8aea | 2024-05-01 20:09:06 +0900 | [diff] [blame] | 564 | struct request_queue *q = zwplug->disk->queue; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 565 | |
| 566 | bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); |
| 567 | bio_io_error(bio); |
Damien Le Moal | c9c8aea | 2024-05-01 20:09:06 +0900 | [diff] [blame] | 568 | disk_put_zone_wplug(zwplug); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 569 | blk_queue_exit(q); |
| 570 | } |
| 571 | |
| 572 | /* |
| 573 | * Abort (fail) all plugged BIOs of a zone write plug. |
| 574 | */ |
| 575 | static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) |
| 576 | { |
| 577 | struct bio *bio; |
| 578 | |
Damien Le Moal | c9c8aea | 2024-05-01 20:09:06 +0900 | [diff] [blame] | 579 | while ((bio = bio_list_pop(&zwplug->bio_list))) |
| 580 | blk_zone_wplug_bio_io_error(zwplug, bio); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 581 | } |
| 582 | |
| 583 | /* |
| 584 | * Abort (fail) all plugged BIOs of a zone write plug that are not aligned |
| 585 | * with the assumed write pointer location of the zone when the BIO will |
| 586 | * be unplugged. |
| 587 | */ |
| 588 | static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, |
| 589 | struct blk_zone_wplug *zwplug) |
| 590 | { |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 591 | unsigned int wp_offset = zwplug->wp_offset; |
| 592 | struct bio_list bl = BIO_EMPTY_LIST; |
| 593 | struct bio *bio; |
| 594 | |
| 595 | while ((bio = bio_list_pop(&zwplug->bio_list))) { |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 596 | if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) || |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 597 | (bio_op(bio) != REQ_OP_ZONE_APPEND && |
| 598 | bio_offset_from_zone_start(bio) != wp_offset)) { |
Damien Le Moal | c9c8aea | 2024-05-01 20:09:06 +0900 | [diff] [blame] | 599 | blk_zone_wplug_bio_io_error(zwplug, bio); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 600 | continue; |
| 601 | } |
| 602 | |
| 603 | wp_offset += bio_sectors(bio); |
| 604 | bio_list_add(&bl, bio); |
| 605 | } |
| 606 | |
| 607 | bio_list_merge(&zwplug->bio_list, &bl); |
| 608 | } |
| 609 | |
Damien Le Moal | 19aad27 | 2024-05-01 20:08:57 +0900 | [diff] [blame] | 610 | static inline void disk_zone_wplug_set_error(struct gendisk *disk, |
| 611 | struct blk_zone_wplug *zwplug) |
| 612 | { |
| 613 | unsigned long flags; |
| 614 | |
| 615 | if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) |
| 616 | return; |
| 617 | |
| 618 | /* |
| 619 | * At this point, we already have a reference on the zone write plug. |
| 620 | * However, since we are going to add the plug to the disk zone write |
| 621 | * plugs work list, increase its reference count. This reference will |
| 622 | * be dropped in disk_zone_wplugs_work() once the error state is |
| 623 | * handled, or in disk_zone_wplug_clear_error() if the zone is reset or |
| 624 | * finished. |
| 625 | */ |
| 626 | zwplug->flags |= BLK_ZONE_WPLUG_ERROR; |
| 627 | atomic_inc(&zwplug->ref); |
| 628 | |
| 629 | spin_lock_irqsave(&disk->zone_wplugs_lock, flags); |
| 630 | list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list); |
| 631 | spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); |
| 632 | } |
| 633 | |
| 634 | static inline void disk_zone_wplug_clear_error(struct gendisk *disk, |
| 635 | struct blk_zone_wplug *zwplug) |
| 636 | { |
| 637 | unsigned long flags; |
| 638 | |
| 639 | if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) |
| 640 | return; |
| 641 | |
| 642 | /* |
| 643 | * We are racing with the error handling work which drops the reference |
| 644 | * on the zone write plug after handling the error state. So remove the |
| 645 | * plug from the error list and drop its reference count only if the |
| 646 | * error handling has not yet started, that is, if the zone write plug |
| 647 | * is still listed. |
| 648 | */ |
| 649 | spin_lock_irqsave(&disk->zone_wplugs_lock, flags); |
| 650 | if (!list_empty(&zwplug->link)) { |
| 651 | list_del_init(&zwplug->link); |
| 652 | zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR; |
| 653 | disk_put_zone_wplug(zwplug); |
| 654 | } |
| 655 | spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); |
| 656 | } |
| 657 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 658 | /* |
| 659 | * Set a zone write plug write pointer offset to either 0 (zone reset case) |
| 660 | * or to the zone size (zone finish case). This aborts all plugged BIOs, which |
| 661 | * is fine to do as doing a zone reset or zone finish while writes are in-flight |
| 662 | * is a mistake from the user which will most likely cause all plugged BIOs to |
| 663 | * fail anyway. |
| 664 | */ |
| 665 | static void disk_zone_wplug_set_wp_offset(struct gendisk *disk, |
| 666 | struct blk_zone_wplug *zwplug, |
| 667 | unsigned int wp_offset) |
| 668 | { |
| 669 | unsigned long flags; |
| 670 | |
| 671 | spin_lock_irqsave(&zwplug->lock, flags); |
| 672 | |
| 673 | /* |
| 674 | * Make sure that a BIO completion or another zone reset or finish |
| 675 | * operation has not already removed the plug from the hash table. |
| 676 | */ |
| 677 | if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) { |
| 678 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 679 | return; |
| 680 | } |
| 681 | |
| 682 | /* Update the zone write pointer and abort all plugged BIOs. */ |
| 683 | zwplug->wp_offset = wp_offset; |
| 684 | disk_zone_wplug_abort(zwplug); |
| 685 | |
| 686 | /* |
| 687 | * Updating the write pointer offset puts back the zone |
| 688 | * in a good state. So clear the error flag and decrement the |
| 689 | * error count if we were in error state. |
| 690 | */ |
Damien Le Moal | 19aad27 | 2024-05-01 20:08:57 +0900 | [diff] [blame] | 691 | disk_zone_wplug_clear_error(disk, zwplug); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 692 | |
| 693 | /* |
| 694 | * The zone write plug now has no BIO plugged: remove it from the |
| 695 | * hash table so that it cannot be seen. The plug will be freed |
| 696 | * when the last reference is dropped. |
| 697 | */ |
| 698 | if (disk_should_remove_zone_wplug(disk, zwplug)) |
| 699 | disk_remove_zone_wplug(disk, zwplug); |
| 700 | |
| 701 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 702 | } |
| 703 | |
| 704 | static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, |
| 705 | unsigned int wp_offset) |
| 706 | { |
| 707 | struct gendisk *disk = bio->bi_bdev->bd_disk; |
| 708 | sector_t sector = bio->bi_iter.bi_sector; |
| 709 | struct blk_zone_wplug *zwplug; |
| 710 | |
| 711 | /* Conventional zones cannot be reset nor finished. */ |
| 712 | if (disk_zone_is_conv(disk, sector)) { |
| 713 | bio_io_error(bio); |
| 714 | return true; |
| 715 | } |
| 716 | |
| 717 | /* |
| 718 | * If we have a zone write plug, set its write pointer offset to 0 |
| 719 | * (reset case) or to the zone size (finish case). This will abort all |
| 720 | * BIOs plugged for the target zone. It is fine as resetting or |
| 721 | * finishing zones while writes are still in-flight will result in the |
| 722 | * writes failing anyway. |
| 723 | */ |
| 724 | zwplug = disk_get_zone_wplug(disk, sector); |
| 725 | if (zwplug) { |
| 726 | disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset); |
| 727 | disk_put_zone_wplug(zwplug); |
| 728 | } |
| 729 | |
| 730 | return false; |
| 731 | } |
| 732 | |
| 733 | static bool blk_zone_wplug_handle_reset_all(struct bio *bio) |
| 734 | { |
| 735 | struct gendisk *disk = bio->bi_bdev->bd_disk; |
| 736 | struct blk_zone_wplug *zwplug; |
| 737 | sector_t sector; |
| 738 | |
| 739 | /* |
| 740 | * Set the write pointer offset of all zone write plugs to 0. This will |
| 741 | * abort all plugged BIOs. It is fine as resetting zones while writes |
| 742 | * are still in-flight will result in the writes failing anyway. |
| 743 | */ |
| 744 | for (sector = 0; sector < get_capacity(disk); |
| 745 | sector += disk->queue->limits.chunk_sectors) { |
| 746 | zwplug = disk_get_zone_wplug(disk, sector); |
| 747 | if (zwplug) { |
| 748 | disk_zone_wplug_set_wp_offset(disk, zwplug, 0); |
| 749 | disk_put_zone_wplug(zwplug); |
| 750 | } |
| 751 | } |
| 752 | |
| 753 | return false; |
| 754 | } |
| 755 | |
| 756 | static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug, |
| 757 | struct bio *bio, unsigned int nr_segs) |
| 758 | { |
| 759 | /* |
| 760 | * Grab an extra reference on the BIO request queue usage counter. |
| 761 | * This reference will be reused to submit a request for the BIO for |
| 762 | * blk-mq devices and dropped when the BIO is failed and after |
| 763 | * it is issued in the case of BIO-based devices. |
| 764 | */ |
| 765 | percpu_ref_get(&bio->bi_bdev->bd_disk->queue->q_usage_counter); |
| 766 | |
| 767 | /* |
| 768 | * The BIO is being plugged and thus will have to wait for the on-going |
| 769 | * write and for all other writes already plugged. So polling makes |
| 770 | * no sense. |
| 771 | */ |
| 772 | bio_clear_polled(bio); |
| 773 | |
| 774 | /* |
| 775 | * Reuse the poll cookie field to store the number of segments when |
| 776 | * split to the hardware limits. |
| 777 | */ |
| 778 | bio->__bi_nr_segments = nr_segs; |
| 779 | |
| 780 | /* |
| 781 | * We always receive BIOs after they are split and ready to be issued. |
| 782 | * The block layer passes the parts of a split BIO in order, and the |
| 783 | * user must also issue write sequentially. So simply add the new BIO |
| 784 | * at the tail of the list to preserve the sequential write order. |
| 785 | */ |
| 786 | bio_list_add(&zwplug->bio_list, bio); |
| 787 | } |
| 788 | |
| 789 | /* |
| 790 | * Called from bio_attempt_back_merge() when a BIO was merged with a request. |
| 791 | */ |
| 792 | void blk_zone_write_plug_bio_merged(struct bio *bio) |
| 793 | { |
| 794 | struct blk_zone_wplug *zwplug; |
| 795 | unsigned long flags; |
| 796 | |
| 797 | /* |
| 798 | * If the BIO was already plugged, then we were called through |
Damien Le Moal | 096bc7e | 2024-05-01 20:09:02 +0900 | [diff] [blame] | 799 | * blk_zone_write_plug_init_request() -> blk_attempt_bio_merge(). |
| 800 | * For this case, we already hold a reference on the zone write plug for |
| 801 | * the BIO and blk_zone_write_plug_init_request() will handle the |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 802 | * zone write pointer offset update. |
| 803 | */ |
| 804 | if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) |
| 805 | return; |
| 806 | |
| 807 | bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING); |
| 808 | |
| 809 | /* |
Damien Le Moal | c4c3ffd | 2024-05-01 20:09:03 +0900 | [diff] [blame] | 810 | * Get a reference on the zone write plug of the target zone and advance |
| 811 | * the zone write pointer offset. Given that this is a merge, we already |
| 812 | * have at least one request and one BIO referencing the zone write |
| 813 | * plug. So this should not fail. |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 814 | */ |
| 815 | zwplug = disk_get_zone_wplug(bio->bi_bdev->bd_disk, |
| 816 | bio->bi_iter.bi_sector); |
Damien Le Moal | c4c3ffd | 2024-05-01 20:09:03 +0900 | [diff] [blame] | 817 | if (WARN_ON_ONCE(!zwplug)) |
| 818 | return; |
| 819 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 820 | spin_lock_irqsave(&zwplug->lock, flags); |
| 821 | zwplug->wp_offset += bio_sectors(bio); |
| 822 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 823 | } |
| 824 | |
| 825 | /* |
| 826 | * Attempt to merge plugged BIOs with a newly prepared request for a BIO that |
| 827 | * already went through zone write plugging (either a new BIO or one that was |
| 828 | * unplugged). |
| 829 | */ |
Damien Le Moal | 096bc7e | 2024-05-01 20:09:02 +0900 | [diff] [blame] | 830 | void blk_zone_write_plug_init_request(struct request *req) |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 831 | { |
| 832 | sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req); |
| 833 | struct request_queue *q = req->q; |
| 834 | struct gendisk *disk = q->disk; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 835 | struct blk_zone_wplug *zwplug = |
| 836 | disk_get_zone_wplug(disk, blk_rq_pos(req)); |
| 837 | unsigned long flags; |
| 838 | struct bio *bio; |
| 839 | |
Damien Le Moal | 096bc7e | 2024-05-01 20:09:02 +0900 | [diff] [blame] | 840 | if (WARN_ON_ONCE(!zwplug)) |
| 841 | return; |
| 842 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 843 | /* |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 844 | * Indicate that completion of this request needs to be handled with |
Damien Le Moal | 347bde9 | 2024-05-01 20:09:04 +0900 | [diff] [blame] | 845 | * blk_zone_write_plug_finish_request(), which will drop the reference |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 846 | * on the zone write plug we took above on entry to this function. |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 847 | */ |
| 848 | req->rq_flags |= RQF_ZONE_WRITE_PLUGGING; |
| 849 | |
| 850 | if (blk_queue_nomerges(q)) |
| 851 | return; |
| 852 | |
| 853 | /* |
| 854 | * Walk through the list of plugged BIOs to check if they can be merged |
| 855 | * into the back of the request. |
| 856 | */ |
| 857 | spin_lock_irqsave(&zwplug->lock, flags); |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 858 | while (!disk_zone_wplug_is_full(disk, zwplug)) { |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 859 | bio = bio_list_peek(&zwplug->bio_list); |
| 860 | if (!bio) |
| 861 | break; |
| 862 | |
| 863 | if (bio->bi_iter.bi_sector != req_back_sector || |
| 864 | !blk_rq_merge_ok(req, bio)) |
| 865 | break; |
| 866 | |
| 867 | WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE_ZEROES && |
| 868 | !bio->__bi_nr_segments); |
| 869 | |
| 870 | bio_list_pop(&zwplug->bio_list); |
| 871 | if (bio_attempt_back_merge(req, bio, bio->__bi_nr_segments) != |
| 872 | BIO_MERGE_OK) { |
| 873 | bio_list_add_head(&zwplug->bio_list, bio); |
| 874 | break; |
| 875 | } |
| 876 | |
| 877 | /* |
| 878 | * Drop the extra reference on the queue usage we got when |
| 879 | * plugging the BIO and advance the write pointer offset. |
| 880 | */ |
| 881 | blk_queue_exit(q); |
| 882 | zwplug->wp_offset += bio_sectors(bio); |
| 883 | |
| 884 | req_back_sector += bio_sectors(bio); |
| 885 | } |
| 886 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 887 | } |
| 888 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 889 | /* |
| 890 | * Check and prepare a BIO for submission by incrementing the write pointer |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 891 | * offset of its zone write plug and changing zone append operations into |
| 892 | * regular write when zone append emulation is needed. |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 893 | */ |
| 894 | static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, |
| 895 | struct bio *bio) |
| 896 | { |
| 897 | struct gendisk *disk = bio->bi_bdev->bd_disk; |
| 898 | |
| 899 | /* |
| 900 | * Check that the user is not attempting to write to a full zone. |
| 901 | * We know such BIO will fail, and that would potentially overflow our |
| 902 | * write pointer offset beyond the end of the zone. |
| 903 | */ |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 904 | if (disk_zone_wplug_is_full(disk, zwplug)) |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 905 | goto err; |
| 906 | |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 907 | if (bio_op(bio) == REQ_OP_ZONE_APPEND) { |
| 908 | /* |
| 909 | * Use a regular write starting at the current write pointer. |
| 910 | * Similarly to native zone append operations, do not allow |
| 911 | * merging. |
| 912 | */ |
| 913 | bio->bi_opf &= ~REQ_OP_MASK; |
| 914 | bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE; |
| 915 | bio->bi_iter.bi_sector += zwplug->wp_offset; |
| 916 | |
| 917 | /* |
| 918 | * Remember that this BIO is in fact a zone append operation |
| 919 | * so that we can restore its operation code on completion. |
| 920 | */ |
| 921 | bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND); |
| 922 | } else { |
| 923 | /* |
| 924 | * Check for non-sequential writes early because we avoid a |
| 925 | * whole lot of error handling trouble if we don't send it off |
| 926 | * to the driver. |
| 927 | */ |
| 928 | if (bio_offset_from_zone_start(bio) != zwplug->wp_offset) |
| 929 | goto err; |
| 930 | } |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 931 | |
| 932 | /* Advance the zone write pointer offset. */ |
| 933 | zwplug->wp_offset += bio_sectors(bio); |
| 934 | |
| 935 | return true; |
| 936 | |
| 937 | err: |
| 938 | /* We detected an invalid write BIO: schedule error recovery. */ |
| 939 | disk_zone_wplug_set_error(disk, zwplug); |
| 940 | kblockd_schedule_work(&disk->zone_wplugs_work); |
| 941 | return false; |
| 942 | } |
| 943 | |
| 944 | static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) |
| 945 | { |
| 946 | struct gendisk *disk = bio->bi_bdev->bd_disk; |
| 947 | sector_t sector = bio->bi_iter.bi_sector; |
| 948 | struct blk_zone_wplug *zwplug; |
| 949 | gfp_t gfp_mask = GFP_NOIO; |
| 950 | unsigned long flags; |
| 951 | |
| 952 | /* |
| 953 | * BIOs must be fully contained within a zone so that we use the correct |
| 954 | * zone write plug for the entire BIO. For blk-mq devices, the block |
| 955 | * layer should already have done any splitting required to ensure this |
| 956 | * and this BIO should thus not be straddling zone boundaries. For |
| 957 | * BIO-based devices, it is the responsibility of the driver to split |
| 958 | * the bio before submitting it. |
| 959 | */ |
| 960 | if (WARN_ON_ONCE(bio_straddles_zones(bio))) { |
| 961 | bio_io_error(bio); |
| 962 | return true; |
| 963 | } |
| 964 | |
| 965 | /* Conventional zones do not need write plugging. */ |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 966 | if (disk_zone_is_conv(disk, sector)) { |
| 967 | /* Zone append to conventional zones is not allowed. */ |
| 968 | if (bio_op(bio) == REQ_OP_ZONE_APPEND) { |
| 969 | bio_io_error(bio); |
| 970 | return true; |
| 971 | } |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 972 | return false; |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 973 | } |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 974 | |
| 975 | if (bio->bi_opf & REQ_NOWAIT) |
| 976 | gfp_mask = GFP_NOWAIT; |
| 977 | |
| 978 | zwplug = disk_get_and_lock_zone_wplug(disk, sector, gfp_mask, &flags); |
| 979 | if (!zwplug) { |
| 980 | bio_io_error(bio); |
| 981 | return true; |
| 982 | } |
| 983 | |
| 984 | /* Indicate that this BIO is being handled using zone write plugging. */ |
| 985 | bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING); |
| 986 | |
| 987 | /* |
| 988 | * If the zone is already plugged or has a pending error, add the BIO |
| 989 | * to the plug BIO list. Otherwise, plug and let the BIO execute. |
| 990 | */ |
| 991 | if (zwplug->flags & BLK_ZONE_WPLUG_BUSY) |
| 992 | goto plug; |
| 993 | |
| 994 | /* |
| 995 | * If an error is detected when preparing the BIO, add it to the BIO |
| 996 | * list so that error recovery can deal with it. |
| 997 | */ |
| 998 | if (!blk_zone_wplug_prepare_bio(zwplug, bio)) |
| 999 | goto plug; |
| 1000 | |
| 1001 | zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; |
| 1002 | |
| 1003 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1004 | |
| 1005 | return false; |
| 1006 | |
| 1007 | plug: |
| 1008 | zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; |
| 1009 | blk_zone_wplug_add_bio(zwplug, bio, nr_segs); |
| 1010 | |
| 1011 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1012 | |
| 1013 | return true; |
| 1014 | } |
| 1015 | |
| 1016 | /** |
| 1017 | * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging |
| 1018 | * @bio: The BIO being submitted |
| 1019 | * @nr_segs: The number of physical segments of @bio |
| 1020 | * |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 1021 | * Handle write, write zeroes and zone append operations requiring emulation |
| 1022 | * using zone write plugging. |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1023 | * |
| 1024 | * Return true whenever @bio execution needs to be delayed through the zone |
| 1025 | * write plug. Otherwise, return false to let the submission path process |
| 1026 | * @bio normally. |
| 1027 | */ |
| 1028 | bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) |
| 1029 | { |
| 1030 | struct block_device *bdev = bio->bi_bdev; |
| 1031 | |
| 1032 | if (!bdev->bd_disk->zone_wplugs_hash) |
| 1033 | return false; |
| 1034 | |
| 1035 | /* |
| 1036 | * If the BIO already has the plugging flag set, then it was already |
| 1037 | * handled through this path and this is a submission from the zone |
| 1038 | * plug bio submit work. |
| 1039 | */ |
| 1040 | if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) |
| 1041 | return false; |
| 1042 | |
| 1043 | /* |
| 1044 | * We do not need to do anything special for empty flush BIOs, e.g |
| 1045 | * BIOs such as issued by blkdev_issue_flush(). The is because it is |
| 1046 | * the responsibility of the user to first wait for the completion of |
| 1047 | * write operations for flush to have any effect on the persistence of |
| 1048 | * the written data. |
| 1049 | */ |
| 1050 | if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) |
| 1051 | return false; |
| 1052 | |
| 1053 | /* |
| 1054 | * Regular writes and write zeroes need to be handled through the target |
| 1055 | * zone write plug. This includes writes with REQ_FUA | REQ_PREFLUSH |
| 1056 | * which may need to go through the flush machinery depending on the |
| 1057 | * target device capabilities. Plugging such writes is fine as the flush |
| 1058 | * machinery operates at the request level, below the plug, and |
| 1059 | * completion of the flush sequence will go through the regular BIO |
| 1060 | * completion, which will handle zone write plugging. |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 1061 | * Zone append operations for devices that requested emulation must |
| 1062 | * also be plugged so that these BIOs can be changed into regular |
| 1063 | * write BIOs. |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1064 | * Zone reset, reset all and finish commands need special treatment |
| 1065 | * to correctly track the write pointer offset of zones. These commands |
| 1066 | * are not plugged as we do not need serialization with write |
| 1067 | * operations. It is the responsibility of the user to not issue reset |
| 1068 | * and finish commands when write operations are in flight. |
| 1069 | */ |
| 1070 | switch (bio_op(bio)) { |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 1071 | case REQ_OP_ZONE_APPEND: |
| 1072 | if (!bdev_emulates_zone_append(bdev)) |
| 1073 | return false; |
| 1074 | fallthrough; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1075 | case REQ_OP_WRITE: |
| 1076 | case REQ_OP_WRITE_ZEROES: |
| 1077 | return blk_zone_wplug_handle_write(bio, nr_segs); |
| 1078 | case REQ_OP_ZONE_RESET: |
| 1079 | return blk_zone_wplug_handle_reset_or_finish(bio, 0); |
| 1080 | case REQ_OP_ZONE_FINISH: |
| 1081 | return blk_zone_wplug_handle_reset_or_finish(bio, |
| 1082 | bdev_zone_sectors(bdev)); |
| 1083 | case REQ_OP_ZONE_RESET_ALL: |
| 1084 | return blk_zone_wplug_handle_reset_all(bio); |
| 1085 | default: |
| 1086 | return false; |
| 1087 | } |
| 1088 | |
| 1089 | return false; |
| 1090 | } |
| 1091 | EXPORT_SYMBOL_GPL(blk_zone_plug_bio); |
| 1092 | |
Damien Le Moal | 9e78c38 | 2024-05-01 20:08:58 +0900 | [diff] [blame] | 1093 | static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, |
| 1094 | struct blk_zone_wplug *zwplug) |
| 1095 | { |
| 1096 | /* |
| 1097 | * Take a reference on the zone write plug and schedule the submission |
| 1098 | * of the next plugged BIO. blk_zone_wplug_bio_work() will release the |
| 1099 | * reference we take here. |
| 1100 | */ |
| 1101 | WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); |
| 1102 | atomic_inc(&zwplug->ref); |
| 1103 | queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); |
| 1104 | } |
| 1105 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1106 | static void disk_zone_wplug_unplug_bio(struct gendisk *disk, |
| 1107 | struct blk_zone_wplug *zwplug) |
| 1108 | { |
| 1109 | unsigned long flags; |
| 1110 | |
| 1111 | spin_lock_irqsave(&zwplug->lock, flags); |
| 1112 | |
| 1113 | /* |
| 1114 | * If we had an error, schedule error recovery. The recovery work |
| 1115 | * will restart submission of plugged BIOs. |
| 1116 | */ |
| 1117 | if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) { |
| 1118 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1119 | kblockd_schedule_work(&disk->zone_wplugs_work); |
| 1120 | return; |
| 1121 | } |
| 1122 | |
| 1123 | /* Schedule submission of the next plugged BIO if we have one. */ |
| 1124 | if (!bio_list_empty(&zwplug->bio_list)) { |
Damien Le Moal | 9e78c38 | 2024-05-01 20:08:58 +0900 | [diff] [blame] | 1125 | disk_zone_wplug_schedule_bio_work(disk, zwplug); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1126 | spin_unlock_irqrestore(&zwplug->lock, flags); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1127 | return; |
| 1128 | } |
| 1129 | |
| 1130 | zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; |
| 1131 | |
| 1132 | /* |
| 1133 | * If the zone is full (it was fully written or finished, or empty |
| 1134 | * (it was reset), remove its zone write plug from the hash table. |
| 1135 | */ |
| 1136 | if (disk_should_remove_zone_wplug(disk, zwplug)) |
| 1137 | disk_remove_zone_wplug(disk, zwplug); |
| 1138 | |
| 1139 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1140 | } |
| 1141 | |
| 1142 | void blk_zone_write_plug_bio_endio(struct bio *bio) |
| 1143 | { |
| 1144 | struct gendisk *disk = bio->bi_bdev->bd_disk; |
| 1145 | struct blk_zone_wplug *zwplug = |
Damien Le Moal | b5a64ec | 2024-05-01 20:09:05 +0900 | [diff] [blame] | 1146 | disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1147 | unsigned long flags; |
| 1148 | |
| 1149 | if (WARN_ON_ONCE(!zwplug)) |
| 1150 | return; |
| 1151 | |
| 1152 | /* Make sure we do not see this BIO again by clearing the plug flag. */ |
| 1153 | bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); |
| 1154 | |
| 1155 | /* |
Damien Le Moal | 9b1ce7f | 2024-04-08 10:41:10 +0900 | [diff] [blame] | 1156 | * If this is a regular write emulating a zone append operation, |
| 1157 | * restore the original operation code. |
| 1158 | */ |
| 1159 | if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { |
| 1160 | bio->bi_opf &= ~REQ_OP_MASK; |
| 1161 | bio->bi_opf |= REQ_OP_ZONE_APPEND; |
| 1162 | } |
| 1163 | |
| 1164 | /* |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1165 | * If the BIO failed, mark the plug as having an error to trigger |
| 1166 | * recovery. |
| 1167 | */ |
| 1168 | if (bio->bi_status != BLK_STS_OK) { |
| 1169 | spin_lock_irqsave(&zwplug->lock, flags); |
| 1170 | disk_zone_wplug_set_error(disk, zwplug); |
| 1171 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1172 | } |
| 1173 | |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 1174 | /* Drop the reference we took when the BIO was issued. */ |
| 1175 | disk_put_zone_wplug(zwplug); |
| 1176 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1177 | /* |
Damien Le Moal | 347bde9 | 2024-05-01 20:09:04 +0900 | [diff] [blame] | 1178 | * For BIO-based devices, blk_zone_write_plug_finish_request() |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1179 | * is not called. So we need to schedule execution of the next |
| 1180 | * plugged BIO here. |
| 1181 | */ |
Linus Torvalds | 3413efa | 2024-05-21 13:02:56 -0700 | [diff] [blame] | 1182 | if (bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1183 | disk_zone_wplug_unplug_bio(disk, zwplug); |
| 1184 | |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 1185 | /* Drop the reference we took when entering this function. */ |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1186 | disk_put_zone_wplug(zwplug); |
| 1187 | } |
| 1188 | |
Damien Le Moal | 347bde9 | 2024-05-01 20:09:04 +0900 | [diff] [blame] | 1189 | void blk_zone_write_plug_finish_request(struct request *req) |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1190 | { |
| 1191 | struct gendisk *disk = req->q->disk; |
Damien Le Moal | 347bde9 | 2024-05-01 20:09:04 +0900 | [diff] [blame] | 1192 | struct blk_zone_wplug *zwplug; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1193 | |
Damien Le Moal | 347bde9 | 2024-05-01 20:09:04 +0900 | [diff] [blame] | 1194 | zwplug = disk_get_zone_wplug(disk, req->__sector); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1195 | if (WARN_ON_ONCE(!zwplug)) |
| 1196 | return; |
| 1197 | |
| 1198 | req->rq_flags &= ~RQF_ZONE_WRITE_PLUGGING; |
| 1199 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1200 | /* |
| 1201 | * Drop the reference we took when the request was initialized in |
Damien Le Moal | 096bc7e | 2024-05-01 20:09:02 +0900 | [diff] [blame] | 1202 | * blk_zone_write_plug_init_request(). |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1203 | */ |
Damien Le Moal | 7b29518 | 2024-05-01 20:09:00 +0900 | [diff] [blame] | 1204 | disk_put_zone_wplug(zwplug); |
| 1205 | |
| 1206 | disk_zone_wplug_unplug_bio(disk, zwplug); |
| 1207 | |
| 1208 | /* Drop the reference we took when entering this function. */ |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1209 | disk_put_zone_wplug(zwplug); |
| 1210 | } |
| 1211 | |
| 1212 | static void blk_zone_wplug_bio_work(struct work_struct *work) |
| 1213 | { |
| 1214 | struct blk_zone_wplug *zwplug = |
| 1215 | container_of(work, struct blk_zone_wplug, bio_work); |
| 1216 | struct block_device *bdev; |
| 1217 | unsigned long flags; |
| 1218 | struct bio *bio; |
| 1219 | |
| 1220 | /* |
| 1221 | * Submit the next plugged BIO. If we do not have any, clear |
| 1222 | * the plugged flag. |
| 1223 | */ |
| 1224 | spin_lock_irqsave(&zwplug->lock, flags); |
| 1225 | |
| 1226 | bio = bio_list_pop(&zwplug->bio_list); |
| 1227 | if (!bio) { |
| 1228 | zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; |
| 1229 | spin_unlock_irqrestore(&zwplug->lock, flags); |
Damien Le Moal | 9e78c38 | 2024-05-01 20:08:58 +0900 | [diff] [blame] | 1230 | goto put_zwplug; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1231 | } |
| 1232 | |
| 1233 | if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { |
| 1234 | /* Error recovery will decide what to do with the BIO. */ |
| 1235 | bio_list_add_head(&zwplug->bio_list, bio); |
| 1236 | spin_unlock_irqrestore(&zwplug->lock, flags); |
Damien Le Moal | 9e78c38 | 2024-05-01 20:08:58 +0900 | [diff] [blame] | 1237 | goto put_zwplug; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1238 | } |
| 1239 | |
| 1240 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1241 | |
| 1242 | bdev = bio->bi_bdev; |
| 1243 | submit_bio_noacct_nocheck(bio); |
| 1244 | |
| 1245 | /* |
| 1246 | * blk-mq devices will reuse the extra reference on the request queue |
| 1247 | * usage counter we took when the BIO was plugged, but the submission |
| 1248 | * path for BIO-based devices will not do that. So drop this extra |
| 1249 | * reference here. |
| 1250 | */ |
Linus Torvalds | 3413efa | 2024-05-21 13:02:56 -0700 | [diff] [blame] | 1251 | if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1252 | blk_queue_exit(bdev->bd_disk->queue); |
Damien Le Moal | 9e78c38 | 2024-05-01 20:08:58 +0900 | [diff] [blame] | 1253 | |
| 1254 | put_zwplug: |
| 1255 | /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ |
| 1256 | disk_put_zone_wplug(zwplug); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1257 | } |
| 1258 | |
| 1259 | static unsigned int blk_zone_wp_offset(struct blk_zone *zone) |
| 1260 | { |
| 1261 | switch (zone->cond) { |
| 1262 | case BLK_ZONE_COND_IMP_OPEN: |
| 1263 | case BLK_ZONE_COND_EXP_OPEN: |
| 1264 | case BLK_ZONE_COND_CLOSED: |
| 1265 | return zone->wp - zone->start; |
| 1266 | case BLK_ZONE_COND_FULL: |
| 1267 | return zone->len; |
| 1268 | case BLK_ZONE_COND_EMPTY: |
| 1269 | return 0; |
| 1270 | case BLK_ZONE_COND_NOT_WP: |
| 1271 | case BLK_ZONE_COND_OFFLINE: |
| 1272 | case BLK_ZONE_COND_READONLY: |
| 1273 | default: |
| 1274 | /* |
| 1275 | * Conventional, offline and read-only zones do not have a valid |
| 1276 | * write pointer. |
| 1277 | */ |
| 1278 | return UINT_MAX; |
| 1279 | } |
| 1280 | } |
| 1281 | |
| 1282 | static int blk_zone_wplug_report_zone_cb(struct blk_zone *zone, |
| 1283 | unsigned int idx, void *data) |
| 1284 | { |
| 1285 | struct blk_zone *zonep = data; |
| 1286 | |
| 1287 | *zonep = *zone; |
| 1288 | return 0; |
| 1289 | } |
| 1290 | |
| 1291 | static void disk_zone_wplug_handle_error(struct gendisk *disk, |
| 1292 | struct blk_zone_wplug *zwplug) |
| 1293 | { |
| 1294 | sector_t zone_start_sector = |
| 1295 | bdev_zone_sectors(disk->part0) * zwplug->zone_no; |
| 1296 | unsigned int noio_flag; |
| 1297 | struct blk_zone zone; |
| 1298 | unsigned long flags; |
| 1299 | int ret; |
| 1300 | |
| 1301 | /* Get the current zone information from the device. */ |
| 1302 | noio_flag = memalloc_noio_save(); |
| 1303 | ret = disk->fops->report_zones(disk, zone_start_sector, 1, |
| 1304 | blk_zone_wplug_report_zone_cb, &zone); |
| 1305 | memalloc_noio_restore(noio_flag); |
| 1306 | |
| 1307 | spin_lock_irqsave(&zwplug->lock, flags); |
| 1308 | |
| 1309 | /* |
| 1310 | * A zone reset or finish may have cleared the error already. In such |
| 1311 | * case, do nothing as the report zones may have seen the "old" write |
| 1312 | * pointer value before the reset/finish operation completed. |
| 1313 | */ |
| 1314 | if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) |
| 1315 | goto unlock; |
| 1316 | |
| 1317 | zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR; |
| 1318 | |
| 1319 | if (ret != 1) { |
| 1320 | /* |
| 1321 | * We failed to get the zone information, meaning that something |
| 1322 | * is likely really wrong with the device. Abort all remaining |
| 1323 | * plugged BIOs as otherwise we could endup waiting forever on |
| 1324 | * plugged BIOs to complete if there is a queue freeze on-going. |
| 1325 | */ |
| 1326 | disk_zone_wplug_abort(zwplug); |
| 1327 | goto unplug; |
| 1328 | } |
| 1329 | |
| 1330 | /* Update the zone write pointer offset. */ |
| 1331 | zwplug->wp_offset = blk_zone_wp_offset(&zone); |
| 1332 | disk_zone_wplug_abort_unaligned(disk, zwplug); |
| 1333 | |
| 1334 | /* Restart BIO submission if we still have any BIO left. */ |
| 1335 | if (!bio_list_empty(&zwplug->bio_list)) { |
Damien Le Moal | 9e78c38 | 2024-05-01 20:08:58 +0900 | [diff] [blame] | 1336 | disk_zone_wplug_schedule_bio_work(disk, zwplug); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1337 | goto unlock; |
| 1338 | } |
| 1339 | |
| 1340 | unplug: |
| 1341 | zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; |
| 1342 | if (disk_should_remove_zone_wplug(disk, zwplug)) |
| 1343 | disk_remove_zone_wplug(disk, zwplug); |
| 1344 | |
| 1345 | unlock: |
| 1346 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1347 | } |
| 1348 | |
| 1349 | static void disk_zone_wplugs_work(struct work_struct *work) |
| 1350 | { |
| 1351 | struct gendisk *disk = |
| 1352 | container_of(work, struct gendisk, zone_wplugs_work); |
| 1353 | struct blk_zone_wplug *zwplug; |
| 1354 | unsigned long flags; |
| 1355 | |
| 1356 | spin_lock_irqsave(&disk->zone_wplugs_lock, flags); |
| 1357 | |
| 1358 | while (!list_empty(&disk->zone_wplugs_err_list)) { |
| 1359 | zwplug = list_first_entry(&disk->zone_wplugs_err_list, |
| 1360 | struct blk_zone_wplug, link); |
| 1361 | list_del_init(&zwplug->link); |
| 1362 | spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); |
| 1363 | |
| 1364 | disk_zone_wplug_handle_error(disk, zwplug); |
| 1365 | disk_put_zone_wplug(zwplug); |
| 1366 | |
| 1367 | spin_lock_irqsave(&disk->zone_wplugs_lock, flags); |
| 1368 | } |
| 1369 | |
| 1370 | spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); |
| 1371 | } |
| 1372 | |
| 1373 | static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) |
| 1374 | { |
| 1375 | return 1U << disk->zone_wplugs_hash_bits; |
| 1376 | } |
| 1377 | |
| 1378 | void disk_init_zone_resources(struct gendisk *disk) |
| 1379 | { |
| 1380 | spin_lock_init(&disk->zone_wplugs_lock); |
| 1381 | INIT_LIST_HEAD(&disk->zone_wplugs_err_list); |
| 1382 | INIT_WORK(&disk->zone_wplugs_work, disk_zone_wplugs_work); |
| 1383 | } |
| 1384 | |
| 1385 | /* |
| 1386 | * For the size of a disk zone write plug hash table, use the size of the |
| 1387 | * zone write plug mempool, which is the maximum of the disk open zones and |
| 1388 | * active zones limits. But do not exceed 4KB (512 hlist head entries), that is, |
| 1389 | * 9 bits. For a disk that has no limits, mempool size defaults to 128. |
| 1390 | */ |
| 1391 | #define BLK_ZONE_WPLUG_MAX_HASH_BITS 9 |
| 1392 | #define BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE 128 |
| 1393 | |
| 1394 | static int disk_alloc_zone_resources(struct gendisk *disk, |
| 1395 | unsigned int pool_size) |
| 1396 | { |
| 1397 | unsigned int i; |
| 1398 | |
| 1399 | disk->zone_wplugs_hash_bits = |
| 1400 | min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS); |
| 1401 | |
| 1402 | disk->zone_wplugs_hash = |
| 1403 | kcalloc(disk_zone_wplugs_hash_size(disk), |
| 1404 | sizeof(struct hlist_head), GFP_KERNEL); |
| 1405 | if (!disk->zone_wplugs_hash) |
| 1406 | return -ENOMEM; |
| 1407 | |
| 1408 | for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) |
| 1409 | INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]); |
| 1410 | |
| 1411 | disk->zone_wplugs_pool = mempool_create_kmalloc_pool(pool_size, |
| 1412 | sizeof(struct blk_zone_wplug)); |
Damien Le Moal | a8f59e5a | 2024-04-20 16:58:11 +0900 | [diff] [blame] | 1413 | if (!disk->zone_wplugs_pool) |
| 1414 | goto free_hash; |
| 1415 | |
| 1416 | disk->zone_wplugs_wq = |
| 1417 | alloc_workqueue("%s_zwplugs", WQ_MEM_RECLAIM | WQ_HIGHPRI, |
| 1418 | pool_size, disk->disk_name); |
| 1419 | if (!disk->zone_wplugs_wq) |
| 1420 | goto destroy_pool; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1421 | |
| 1422 | return 0; |
Damien Le Moal | a8f59e5a | 2024-04-20 16:58:11 +0900 | [diff] [blame] | 1423 | |
| 1424 | destroy_pool: |
| 1425 | mempool_destroy(disk->zone_wplugs_pool); |
| 1426 | disk->zone_wplugs_pool = NULL; |
| 1427 | free_hash: |
| 1428 | kfree(disk->zone_wplugs_hash); |
| 1429 | disk->zone_wplugs_hash = NULL; |
| 1430 | disk->zone_wplugs_hash_bits = 0; |
| 1431 | return -ENOMEM; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1432 | } |
| 1433 | |
| 1434 | static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) |
| 1435 | { |
| 1436 | struct blk_zone_wplug *zwplug; |
| 1437 | unsigned int i; |
| 1438 | |
| 1439 | if (!disk->zone_wplugs_hash) |
| 1440 | return; |
| 1441 | |
| 1442 | /* Free all the zone write plugs we have. */ |
| 1443 | for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { |
| 1444 | while (!hlist_empty(&disk->zone_wplugs_hash[i])) { |
| 1445 | zwplug = hlist_entry(disk->zone_wplugs_hash[i].first, |
| 1446 | struct blk_zone_wplug, node); |
| 1447 | atomic_inc(&zwplug->ref); |
| 1448 | disk_remove_zone_wplug(disk, zwplug); |
| 1449 | disk_put_zone_wplug(zwplug); |
| 1450 | } |
| 1451 | } |
| 1452 | |
| 1453 | kfree(disk->zone_wplugs_hash); |
| 1454 | disk->zone_wplugs_hash = NULL; |
| 1455 | disk->zone_wplugs_hash_bits = 0; |
| 1456 | } |
| 1457 | |
| 1458 | void disk_free_zone_resources(struct gendisk *disk) |
| 1459 | { |
Damien Le Moal | 1933192 | 2024-06-07 09:21:26 +0900 | [diff] [blame] | 1460 | if (!disk->zone_wplugs_pool) |
| 1461 | return; |
| 1462 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1463 | cancel_work_sync(&disk->zone_wplugs_work); |
| 1464 | |
Damien Le Moal | a8f59e5a | 2024-04-20 16:58:11 +0900 | [diff] [blame] | 1465 | if (disk->zone_wplugs_wq) { |
| 1466 | destroy_workqueue(disk->zone_wplugs_wq); |
| 1467 | disk->zone_wplugs_wq = NULL; |
| 1468 | } |
| 1469 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1470 | disk_destroy_zone_wplugs_hash_table(disk); |
| 1471 | |
| 1472 | /* |
| 1473 | * Wait for the zone write plugs to be RCU-freed before |
| 1474 | * destorying the mempool. |
| 1475 | */ |
| 1476 | rcu_barrier(); |
| 1477 | |
| 1478 | mempool_destroy(disk->zone_wplugs_pool); |
| 1479 | disk->zone_wplugs_pool = NULL; |
| 1480 | |
Damien Le Moal | 2f20872 | 2024-07-04 14:28:16 +0900 | [diff] [blame] | 1481 | bitmap_free(disk->conv_zones_bitmap); |
Christoph Hellwig | d86e716 | 2022-07-06 09:03:50 +0200 | [diff] [blame] | 1482 | disk->conv_zones_bitmap = NULL; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1483 | disk->zone_capacity = 0; |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 1484 | disk->last_zone_capacity = 0; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1485 | disk->nr_zones = 0; |
| 1486 | } |
| 1487 | |
Damien Le Moal | 946dd71 | 2024-04-08 10:41:11 +0900 | [diff] [blame] | 1488 | static inline bool disk_need_zone_resources(struct gendisk *disk) |
| 1489 | { |
| 1490 | /* |
| 1491 | * All mq zoned devices need zone resources so that the block layer |
| 1492 | * can automatically handle write BIO plugging. BIO-based device drivers |
| 1493 | * (e.g. DM devices) are normally responsible for handling zone write |
| 1494 | * ordering and do not need zone resources, unless the driver requires |
| 1495 | * zone append emulation. |
| 1496 | */ |
| 1497 | return queue_is_mq(disk->queue) || |
| 1498 | queue_emulates_zone_append(disk->queue); |
| 1499 | } |
| 1500 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1501 | static int disk_revalidate_zone_resources(struct gendisk *disk, |
| 1502 | unsigned int nr_zones) |
| 1503 | { |
| 1504 | struct queue_limits *lim = &disk->queue->limits; |
| 1505 | unsigned int pool_size; |
| 1506 | |
Damien Le Moal | 946dd71 | 2024-04-08 10:41:11 +0900 | [diff] [blame] | 1507 | if (!disk_need_zone_resources(disk)) |
| 1508 | return 0; |
| 1509 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1510 | /* |
| 1511 | * If the device has no limit on the maximum number of open and active |
| 1512 | * zones, use BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE. |
| 1513 | */ |
| 1514 | pool_size = max(lim->max_open_zones, lim->max_active_zones); |
| 1515 | if (!pool_size) |
| 1516 | pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_zones); |
| 1517 | |
| 1518 | if (!disk->zone_wplugs_hash) |
| 1519 | return disk_alloc_zone_resources(disk, pool_size); |
| 1520 | |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1521 | return 0; |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1522 | } |
| 1523 | |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 1524 | struct blk_revalidate_zone_args { |
| 1525 | struct gendisk *disk; |
Christoph Hellwig | f216fdd | 2019-12-03 10:39:05 +0100 | [diff] [blame] | 1526 | unsigned long *conv_zones_bitmap; |
Christoph Hellwig | e94f581 | 2019-12-03 10:39:06 +0100 | [diff] [blame] | 1527 | unsigned int nr_zones; |
Damien Le Moal | ecfe43b1 | 2024-04-08 10:41:06 +0900 | [diff] [blame] | 1528 | unsigned int zone_capacity; |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 1529 | unsigned int last_zone_capacity; |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 1530 | sector_t sector; |
| 1531 | }; |
| 1532 | |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1533 | /* |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1534 | * Update the disk zone resources information and device queue limits. |
| 1535 | * The disk queue is frozen when this is executed. |
| 1536 | */ |
| 1537 | static int disk_update_zone_resources(struct gendisk *disk, |
| 1538 | struct blk_revalidate_zone_args *args) |
| 1539 | { |
| 1540 | struct request_queue *q = disk->queue; |
Damien Le Moal | 6b7593b | 2024-05-01 20:08:55 +0900 | [diff] [blame] | 1541 | unsigned int nr_seq_zones, nr_conv_zones = 0; |
| 1542 | unsigned int pool_size; |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1543 | struct queue_limits lim; |
| 1544 | |
| 1545 | disk->nr_zones = args->nr_zones; |
| 1546 | disk->zone_capacity = args->zone_capacity; |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 1547 | disk->last_zone_capacity = args->last_zone_capacity; |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1548 | swap(disk->conv_zones_bitmap, args->conv_zones_bitmap); |
Damien Le Moal | 6b7593b | 2024-05-01 20:08:55 +0900 | [diff] [blame] | 1549 | if (disk->conv_zones_bitmap) |
| 1550 | nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap, |
| 1551 | disk->nr_zones); |
| 1552 | if (nr_conv_zones >= disk->nr_zones) { |
| 1553 | pr_warn("%s: Invalid number of conventional zones %u / %u\n", |
| 1554 | disk->disk_name, nr_conv_zones, disk->nr_zones); |
| 1555 | return -ENODEV; |
| 1556 | } |
| 1557 | |
Damien Le Moal | e21d12c | 2024-06-11 11:36:36 +0900 | [diff] [blame] | 1558 | lim = queue_limits_start_update(q); |
| 1559 | |
| 1560 | /* |
| 1561 | * Some devices can advertize zone resource limits that are larger than |
| 1562 | * the number of sequential zones of the zoned block device, e.g. a |
| 1563 | * small ZNS namespace. For such case, assume that the zoned device has |
| 1564 | * no zone resource limits. |
| 1565 | */ |
| 1566 | nr_seq_zones = disk->nr_zones - nr_conv_zones; |
| 1567 | if (lim.max_open_zones >= nr_seq_zones) |
| 1568 | lim.max_open_zones = 0; |
| 1569 | if (lim.max_active_zones >= nr_seq_zones) |
| 1570 | lim.max_active_zones = 0; |
| 1571 | |
Damien Le Moal | 6b7593b | 2024-05-01 20:08:55 +0900 | [diff] [blame] | 1572 | if (!disk->zone_wplugs_pool) |
Damien Le Moal | e21d12c | 2024-06-11 11:36:36 +0900 | [diff] [blame] | 1573 | goto commit; |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1574 | |
| 1575 | /* |
| 1576 | * If the device has no limit on the maximum number of open and active |
| 1577 | * zones, set its max open zone limit to the mempool size to indicate |
| 1578 | * to the user that there is a potential performance impact due to |
| 1579 | * dynamic zone write plug allocation when simultaneously writing to |
| 1580 | * more zones than the size of the mempool. |
| 1581 | */ |
Damien Le Moal | 6b7593b | 2024-05-01 20:08:55 +0900 | [diff] [blame] | 1582 | pool_size = max(lim.max_open_zones, lim.max_active_zones); |
| 1583 | if (!pool_size) |
| 1584 | pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones); |
| 1585 | |
| 1586 | mempool_resize(disk->zone_wplugs_pool, pool_size); |
| 1587 | |
| 1588 | if (!lim.max_open_zones && !lim.max_active_zones) { |
| 1589 | if (pool_size < nr_seq_zones) |
| 1590 | lim.max_open_zones = pool_size; |
| 1591 | else |
| 1592 | lim.max_open_zones = 0; |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1593 | } |
| 1594 | |
Damien Le Moal | e21d12c | 2024-06-11 11:36:36 +0900 | [diff] [blame] | 1595 | commit: |
Damien Le Moal | 6b7593b | 2024-05-01 20:08:55 +0900 | [diff] [blame] | 1596 | return queue_limits_commit_update(q, &lim); |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1597 | } |
| 1598 | |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1599 | static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, |
| 1600 | struct blk_revalidate_zone_args *args) |
| 1601 | { |
| 1602 | struct gendisk *disk = args->disk; |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1603 | |
| 1604 | if (zone->capacity != zone->len) { |
| 1605 | pr_warn("%s: Invalid conventional zone capacity\n", |
| 1606 | disk->disk_name); |
| 1607 | return -ENODEV; |
| 1608 | } |
| 1609 | |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 1610 | if (disk_zone_is_last(disk, zone)) |
| 1611 | args->last_zone_capacity = zone->capacity; |
| 1612 | |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1613 | if (!disk_need_zone_resources(disk)) |
| 1614 | return 0; |
| 1615 | |
| 1616 | if (!args->conv_zones_bitmap) { |
| 1617 | args->conv_zones_bitmap = |
Damien Le Moal | 2f20872 | 2024-07-04 14:28:16 +0900 | [diff] [blame] | 1618 | bitmap_zalloc(args->nr_zones, GFP_NOIO); |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1619 | if (!args->conv_zones_bitmap) |
| 1620 | return -ENOMEM; |
| 1621 | } |
| 1622 | |
| 1623 | set_bit(idx, args->conv_zones_bitmap); |
| 1624 | |
| 1625 | return 0; |
| 1626 | } |
| 1627 | |
| 1628 | static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, |
| 1629 | struct blk_revalidate_zone_args *args) |
| 1630 | { |
| 1631 | struct gendisk *disk = args->disk; |
| 1632 | struct blk_zone_wplug *zwplug; |
| 1633 | unsigned int wp_offset; |
| 1634 | unsigned long flags; |
| 1635 | |
| 1636 | /* |
| 1637 | * Remember the capacity of the first sequential zone and check |
Damien Le Moal | cd63999 | 2024-05-30 14:40:33 +0900 | [diff] [blame] | 1638 | * if it is constant for all zones, ignoring the last zone as it can be |
| 1639 | * smaller. |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1640 | */ |
| 1641 | if (!args->zone_capacity) |
| 1642 | args->zone_capacity = zone->capacity; |
Damien Le Moal | 29459c3 | 2024-05-30 14:40:34 +0900 | [diff] [blame] | 1643 | if (disk_zone_is_last(disk, zone)) { |
| 1644 | args->last_zone_capacity = zone->capacity; |
| 1645 | } else if (zone->capacity != args->zone_capacity) { |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1646 | pr_warn("%s: Invalid variable zone capacity\n", |
| 1647 | disk->disk_name); |
| 1648 | return -ENODEV; |
| 1649 | } |
| 1650 | |
| 1651 | /* |
| 1652 | * We need to track the write pointer of all zones that are not |
| 1653 | * empty nor full. So make sure we have a zone write plug for |
| 1654 | * such zone if the device has a zone write plug hash table. |
| 1655 | */ |
| 1656 | if (!disk->zone_wplugs_hash) |
| 1657 | return 0; |
| 1658 | |
| 1659 | wp_offset = blk_zone_wp_offset(zone); |
| 1660 | if (!wp_offset || wp_offset >= zone->capacity) |
| 1661 | return 0; |
| 1662 | |
| 1663 | zwplug = disk_get_and_lock_zone_wplug(disk, zone->wp, GFP_NOIO, &flags); |
| 1664 | if (!zwplug) |
| 1665 | return -ENOMEM; |
| 1666 | spin_unlock_irqrestore(&zwplug->lock, flags); |
| 1667 | disk_put_zone_wplug(zwplug); |
| 1668 | |
| 1669 | return 0; |
| 1670 | } |
| 1671 | |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1672 | /* |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1673 | * Helper function to check the validity of zones of a zoned block device. |
| 1674 | */ |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 1675 | static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, |
| 1676 | void *data) |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1677 | { |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 1678 | struct blk_revalidate_zone_args *args = data; |
| 1679 | struct gendisk *disk = args->disk; |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1680 | sector_t zone_sectors = disk->queue->limits.chunk_sectors; |
| 1681 | int ret; |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1682 | |
| 1683 | /* Check for bad zones and holes in the zone report */ |
| 1684 | if (zone->start != args->sector) { |
| 1685 | pr_warn("%s: Zone gap at sectors %llu..%llu\n", |
| 1686 | disk->disk_name, args->sector, zone->start); |
| 1687 | return -ENODEV; |
| 1688 | } |
| 1689 | |
Damien Le Moal | cd63999 | 2024-05-30 14:40:33 +0900 | [diff] [blame] | 1690 | if (zone->start >= get_capacity(disk) || !zone->len) { |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1691 | pr_warn("%s: Invalid zone start %llu, length %llu\n", |
| 1692 | disk->disk_name, zone->start, zone->len); |
| 1693 | return -ENODEV; |
| 1694 | } |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1695 | |
| 1696 | /* |
| 1697 | * All zones must have the same size, with the exception on an eventual |
| 1698 | * smaller last zone. |
| 1699 | */ |
Damien Le Moal | cd63999 | 2024-05-30 14:40:33 +0900 | [diff] [blame] | 1700 | if (!disk_zone_is_last(disk, zone)) { |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1701 | if (zone->len != zone_sectors) { |
Christoph Hellwig | 6c6b354 | 2019-12-03 10:39:08 +0100 | [diff] [blame] | 1702 | pr_warn("%s: Invalid zoned device with non constant zone size\n", |
| 1703 | disk->disk_name); |
| 1704 | return -ENODEV; |
| 1705 | } |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1706 | } else if (zone->len > zone_sectors) { |
| 1707 | pr_warn("%s: Invalid zoned device with larger last zone size\n", |
| 1708 | disk->disk_name); |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 1709 | return -ENODEV; |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1710 | } |
| 1711 | |
Damien Le Moal | ecfe43b1 | 2024-04-08 10:41:06 +0900 | [diff] [blame] | 1712 | if (!zone->capacity || zone->capacity > zone->len) { |
| 1713 | pr_warn("%s: Invalid zone capacity\n", |
| 1714 | disk->disk_name); |
| 1715 | return -ENODEV; |
| 1716 | } |
| 1717 | |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1718 | /* Check zone type */ |
| 1719 | switch (zone->type) { |
| 1720 | case BLK_ZONE_TYPE_CONVENTIONAL: |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1721 | ret = blk_revalidate_conv_zone(zone, idx, args); |
Christoph Hellwig | e94f581 | 2019-12-03 10:39:06 +0100 | [diff] [blame] | 1722 | break; |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1723 | case BLK_ZONE_TYPE_SEQWRITE_REQ: |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1724 | ret = blk_revalidate_seq_zone(zone, idx, args); |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1725 | break; |
Damien Le Moal | 587371e | 2024-01-07 16:22:12 +0900 | [diff] [blame] | 1726 | case BLK_ZONE_TYPE_SEQWRITE_PREF: |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1727 | default: |
| 1728 | pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", |
| 1729 | disk->disk_name, (int)zone->type, zone->start); |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1730 | ret = -ENODEV; |
Damien Le Moal | d9dd730 | 2019-11-11 11:39:22 +0900 | [diff] [blame] | 1731 | } |
| 1732 | |
Damien Le Moal | d758014 | 2024-05-01 20:09:07 +0900 | [diff] [blame] | 1733 | if (!ret) |
| 1734 | args->sector += zone->len; |
| 1735 | |
| 1736 | return ret; |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 1737 | } |
| 1738 | |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1739 | /** |
Damien Le Moal | 02ccd7c | 2024-04-08 10:41:26 +0900 | [diff] [blame] | 1740 | * blk_revalidate_disk_zones - (re)allocate and initialize zone write plugs |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1741 | * @disk: Target disk |
| 1742 | * |
Damien Le Moal | 9b3c08b | 2024-04-08 10:41:20 +0900 | [diff] [blame] | 1743 | * Helper function for low-level device drivers to check, (re) allocate and |
| 1744 | * initialize resources used for managing zoned disks. This function should |
| 1745 | * normally be called by blk-mq based drivers when a zoned gendisk is probed |
| 1746 | * and when the zone configuration of the gendisk changes (e.g. after a format). |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1747 | * Before calling this function, the device driver must already have set the |
| 1748 | * device zone size (chunk_sector limit) and the max zone append limit. |
Damien Le Moal | 946dd71 | 2024-04-08 10:41:11 +0900 | [diff] [blame] | 1749 | * BIO based drivers can also use this function as long as the device queue |
| 1750 | * can be safely frozen. |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1751 | */ |
Damien Le Moal | 9b3c08b | 2024-04-08 10:41:20 +0900 | [diff] [blame] | 1752 | int blk_revalidate_disk_zones(struct gendisk *disk) |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1753 | { |
| 1754 | struct request_queue *q = disk->queue; |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1755 | sector_t zone_sectors = q->limits.chunk_sectors; |
| 1756 | sector_t capacity = get_capacity(disk); |
| 1757 | struct blk_revalidate_zone_args args = { }; |
Christoph Hellwig | 6c6b354 | 2019-12-03 10:39:08 +0100 | [diff] [blame] | 1758 | unsigned int noio_flag; |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1759 | int ret = -ENOMEM; |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1760 | |
Christoph Hellwig | c98c3d09 | 2019-11-11 11:39:23 +0900 | [diff] [blame] | 1761 | if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) |
| 1762 | return -EIO; |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1763 | |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1764 | if (!capacity) |
| 1765 | return -ENODEV; |
| 1766 | |
| 1767 | /* |
| 1768 | * Checks that the device driver indicated a valid zone size and that |
| 1769 | * the max zone append limit is set. |
| 1770 | */ |
| 1771 | if (!zone_sectors || !is_power_of_2(zone_sectors)) { |
| 1772 | pr_warn("%s: Invalid non power of two zone size (%llu)\n", |
| 1773 | disk->disk_name, zone_sectors); |
| 1774 | return -ENODEV; |
| 1775 | } |
| 1776 | |
Damien Le Moal | ccdbf0a | 2024-04-08 10:41:09 +0900 | [diff] [blame] | 1777 | if (!queue_max_zone_append_sectors(q)) { |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1778 | pr_warn("%s: Invalid 0 maximum zone append limit\n", |
| 1779 | disk->disk_name); |
| 1780 | return -ENODEV; |
| 1781 | } |
Johannes Thumshirn | 1a1206d | 2020-07-30 20:25:17 +0900 | [diff] [blame] | 1782 | |
Christoph Hellwig | e94f581 | 2019-12-03 10:39:06 +0100 | [diff] [blame] | 1783 | /* |
Christoph Hellwig | 6c6b354 | 2019-12-03 10:39:08 +0100 | [diff] [blame] | 1784 | * Ensure that all memory allocations in this context are done as if |
| 1785 | * GFP_NOIO was specified. |
Christoph Hellwig | e94f581 | 2019-12-03 10:39:06 +0100 | [diff] [blame] | 1786 | */ |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1787 | args.disk = disk; |
| 1788 | args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors); |
Christoph Hellwig | 6c6b354 | 2019-12-03 10:39:08 +0100 | [diff] [blame] | 1789 | noio_flag = memalloc_noio_save(); |
Damien Le Moal | dd291d7 | 2024-04-08 10:41:07 +0900 | [diff] [blame] | 1790 | ret = disk_revalidate_zone_resources(disk, args.nr_zones); |
| 1791 | if (ret) { |
| 1792 | memalloc_noio_restore(noio_flag); |
| 1793 | return ret; |
| 1794 | } |
Christoph Hellwig | 6c6b354 | 2019-12-03 10:39:08 +0100 | [diff] [blame] | 1795 | ret = disk->fops->report_zones(disk, 0, UINT_MAX, |
| 1796 | blk_revalidate_zone_cb, &args); |
Damien Le Moal | 2afdeb2 | 2020-11-11 16:36:06 +0900 | [diff] [blame] | 1797 | if (!ret) { |
| 1798 | pr_warn("%s: No zones reported\n", disk->disk_name); |
| 1799 | ret = -ENODEV; |
| 1800 | } |
Christoph Hellwig | 6c6b354 | 2019-12-03 10:39:08 +0100 | [diff] [blame] | 1801 | memalloc_noio_restore(noio_flag); |
Damien Le Moal | bd976e5 | 2019-07-01 14:09:16 +0900 | [diff] [blame] | 1802 | |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1803 | /* |
Damien Le Moal | 2afdeb2 | 2020-11-11 16:36:06 +0900 | [diff] [blame] | 1804 | * If zones where reported, make sure that the entire disk capacity |
| 1805 | * has been checked. |
| 1806 | */ |
Damien Le Moal | 03e51c4 | 2023-07-03 11:48:12 +0900 | [diff] [blame] | 1807 | if (ret > 0 && args.sector != capacity) { |
Damien Le Moal | 2afdeb2 | 2020-11-11 16:36:06 +0900 | [diff] [blame] | 1808 | pr_warn("%s: Missing zones from sector %llu\n", |
| 1809 | disk->disk_name, args.sector); |
| 1810 | ret = -ENODEV; |
| 1811 | } |
| 1812 | |
| 1813 | /* |
Damien Le Moal | 02ccd7c | 2024-04-08 10:41:26 +0900 | [diff] [blame] | 1814 | * Set the new disk zone parameters only once the queue is frozen and |
| 1815 | * all I/Os are completed. |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1816 | */ |
| 1817 | blk_mq_freeze_queue(q); |
Damien Le Moal | 9b3c08b | 2024-04-08 10:41:20 +0900 | [diff] [blame] | 1818 | if (ret > 0) |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1819 | ret = disk_update_zone_resources(disk, &args); |
Damien Le Moal | 9b3c08b | 2024-04-08 10:41:20 +0900 | [diff] [blame] | 1820 | else |
Christoph Hellwig | d410035 | 2019-11-11 11:39:30 +0900 | [diff] [blame] | 1821 | pr_warn("%s: failed to revalidate zones\n", disk->disk_name); |
Damien Le Moal | 843283e | 2024-04-08 10:41:08 +0900 | [diff] [blame] | 1822 | if (ret) |
| 1823 | disk_free_zone_resources(disk); |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1824 | blk_mq_unfreeze_queue(q); |
| 1825 | |
Christoph Hellwig | f216fdd | 2019-12-03 10:39:05 +0100 | [diff] [blame] | 1826 | kfree(args.conv_zones_bitmap); |
Damien Le Moal | ecfe43b1 | 2024-04-08 10:41:06 +0900 | [diff] [blame] | 1827 | |
Damien Le Moal | bf50545 | 2018-10-12 19:08:50 +0900 | [diff] [blame] | 1828 | return ret; |
| 1829 | } |
| 1830 | EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); |
Damien Le Moal | d9f1439 | 2024-04-08 10:41:24 +0900 | [diff] [blame] | 1831 | |
| 1832 | #ifdef CONFIG_BLK_DEBUG_FS |
| 1833 | |
Damien Le Moal | a98b05b | 2024-04-08 10:41:25 +0900 | [diff] [blame] | 1834 | int queue_zone_wplugs_show(void *data, struct seq_file *m) |
Damien Le Moal | d9f1439 | 2024-04-08 10:41:24 +0900 | [diff] [blame] | 1835 | { |
| 1836 | struct request_queue *q = data; |
Damien Le Moal | a98b05b | 2024-04-08 10:41:25 +0900 | [diff] [blame] | 1837 | struct gendisk *disk = q->disk; |
| 1838 | struct blk_zone_wplug *zwplug; |
| 1839 | unsigned int zwp_wp_offset, zwp_flags; |
| 1840 | unsigned int zwp_zone_no, zwp_ref; |
| 1841 | unsigned int zwp_bio_list_size, i; |
| 1842 | unsigned long flags; |
Damien Le Moal | d9f1439 | 2024-04-08 10:41:24 +0900 | [diff] [blame] | 1843 | |
Johannes Thumshirn | 57787fa | 2024-04-25 05:02:39 -0700 | [diff] [blame] | 1844 | if (!disk->zone_wplugs_hash) |
| 1845 | return 0; |
| 1846 | |
Damien Le Moal | a98b05b | 2024-04-08 10:41:25 +0900 | [diff] [blame] | 1847 | rcu_read_lock(); |
| 1848 | for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { |
| 1849 | hlist_for_each_entry_rcu(zwplug, |
| 1850 | &disk->zone_wplugs_hash[i], node) { |
| 1851 | spin_lock_irqsave(&zwplug->lock, flags); |
| 1852 | zwp_zone_no = zwplug->zone_no; |
| 1853 | zwp_flags = zwplug->flags; |
| 1854 | zwp_ref = atomic_read(&zwplug->ref); |
| 1855 | zwp_wp_offset = zwplug->wp_offset; |
| 1856 | zwp_bio_list_size = bio_list_size(&zwplug->bio_list); |
| 1857 | spin_unlock_irqrestore(&zwplug->lock, flags); |
Damien Le Moal | d9f1439 | 2024-04-08 10:41:24 +0900 | [diff] [blame] | 1858 | |
Damien Le Moal | a98b05b | 2024-04-08 10:41:25 +0900 | [diff] [blame] | 1859 | seq_printf(m, "%u 0x%x %u %u %u\n", |
| 1860 | zwp_zone_no, zwp_flags, zwp_ref, |
| 1861 | zwp_wp_offset, zwp_bio_list_size); |
| 1862 | } |
| 1863 | } |
| 1864 | rcu_read_unlock(); |
Damien Le Moal | d9f1439 | 2024-04-08 10:41:24 +0900 | [diff] [blame] | 1865 | |
| 1866 | return 0; |
| 1867 | } |
| 1868 | |
| 1869 | #endif |