| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Bad block management |
| * |
| * - Heavily based on MD badblocks code from Neil Brown |
| * |
| * Copyright (c) 2015, Intel Corporation. |
| */ |
| |
| #include <linux/badblocks.h> |
| #include <linux/seqlock.h> |
| #include <linux/device.h> |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <linux/stddef.h> |
| #include <linux/types.h> |
| #include <linux/slab.h> |
| |
| /** |
| * badblocks_check() - check a given range for bad sectors |
| * @bb: the badblocks structure that holds all badblock information |
| * @s: sector (start) at which to check for badblocks |
| * @sectors: number of sectors to check for badblocks |
| * @first_bad: pointer to store location of the first badblock |
| * @bad_sectors: pointer to store number of badblocks after @first_bad |
| * |
| * We can record which blocks on each device are 'bad' and so just |
| * fail those blocks, or that stripe, rather than the whole device. |
| * Entries in the bad-block table are 64bits wide. This comprises: |
| * Length of bad-range, in sectors: 0-511 for lengths 1-512 |
| * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) |
| * A 'shift' can be set so that larger blocks are tracked and |
| * consequently larger devices can be covered. |
| * 'Acknowledged' flag - 1 bit. - the most significant bit. |
| * |
| * Locking of the bad-block table uses a seqlock so badblocks_check |
| * might need to retry if it is very unlucky. |
| * We will sometimes want to check for bad blocks in a bi_end_io function, |
| * so we use the write_seqlock_irq variant. |
| * |
| * When looking for a bad block we specify a range and want to |
| * know if any block in the range is bad. So we binary-search |
| * to the last range that starts at-or-before the given endpoint, |
| * (or "before the sector after the target range") |
| * then see if it ends after the given start. |
| * |
| * Return: |
| * 0: there are no known bad blocks in the range |
| * 1: there are known bad block which are all acknowledged |
| * -1: there are bad blocks which have not yet been acknowledged in metadata. |
| * plus the start/length of the first bad section we overlap. |
| */ |
| int badblocks_check(struct badblocks *bb, sector_t s, int sectors, |
| sector_t *first_bad, int *bad_sectors) |
| { |
| int hi; |
| int lo; |
| u64 *p = bb->page; |
| int rv; |
| sector_t target = s + sectors; |
| unsigned seq; |
| |
| if (bb->shift > 0) { |
| /* round the start down, and the end up */ |
| s >>= bb->shift; |
| target += (1<<bb->shift) - 1; |
| target >>= bb->shift; |
| } |
| /* 'target' is now the first block after the bad range */ |
| |
| retry: |
| seq = read_seqbegin(&bb->lock); |
| lo = 0; |
| rv = 0; |
| hi = bb->count; |
| |
| /* Binary search between lo and hi for 'target' |
| * i.e. for the last range that starts before 'target' |
| */ |
| /* INVARIANT: ranges before 'lo' and at-or-after 'hi' |
| * are known not to be the last range before target. |
| * VARIANT: hi-lo is the number of possible |
| * ranges, and decreases until it reaches 1 |
| */ |
| while (hi - lo > 1) { |
| int mid = (lo + hi) / 2; |
| sector_t a = BB_OFFSET(p[mid]); |
| |
| if (a < target) |
| /* This could still be the one, earlier ranges |
| * could not. |
| */ |
| lo = mid; |
| else |
| /* This and later ranges are definitely out. */ |
| hi = mid; |
| } |
| /* 'lo' might be the last that started before target, but 'hi' isn't */ |
| if (hi > lo) { |
| /* need to check all range that end after 's' to see if |
| * any are unacknowledged. |
| */ |
| while (lo >= 0 && |
| BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { |
| if (BB_OFFSET(p[lo]) < target) { |
| /* starts before the end, and finishes after |
| * the start, so they must overlap |
| */ |
| if (rv != -1 && BB_ACK(p[lo])) |
| rv = 1; |
| else |
| rv = -1; |
| *first_bad = BB_OFFSET(p[lo]); |
| *bad_sectors = BB_LEN(p[lo]); |
| } |
| lo--; |
| } |
| } |
| |
| if (read_seqretry(&bb->lock, seq)) |
| goto retry; |
| |
| return rv; |
| } |
| EXPORT_SYMBOL_GPL(badblocks_check); |
| |
| static void badblocks_update_acked(struct badblocks *bb) |
| { |
| u64 *p = bb->page; |
| int i; |
| bool unacked = false; |
| |
| if (!bb->unacked_exist) |
| return; |
| |
| for (i = 0; i < bb->count ; i++) { |
| if (!BB_ACK(p[i])) { |
| unacked = true; |
| break; |
| } |
| } |
| |
| if (!unacked) |
| bb->unacked_exist = 0; |
| } |
| |
| /** |
| * badblocks_set() - Add a range of bad blocks to the table. |
| * @bb: the badblocks structure that holds all badblock information |
| * @s: first sector to mark as bad |
| * @sectors: number of sectors to mark as bad |
| * @acknowledged: weather to mark the bad sectors as acknowledged |
| * |
| * This might extend the table, or might contract it if two adjacent ranges |
| * can be merged. We binary-search to find the 'insertion' point, then |
| * decide how best to handle it. |
| * |
| * Return: |
| * 0: success |
| * 1: failed to set badblocks (out of space) |
| */ |
| int badblocks_set(struct badblocks *bb, sector_t s, int sectors, |
| int acknowledged) |
| { |
| u64 *p; |
| int lo, hi; |
| int rv = 0; |
| unsigned long flags; |
| |
| if (bb->shift < 0) |
| /* badblocks are disabled */ |
| return 1; |
| |
| if (bb->shift) { |
| /* round the start down, and the end up */ |
| sector_t next = s + sectors; |
| |
| s >>= bb->shift; |
| next += (1<<bb->shift) - 1; |
| next >>= bb->shift; |
| sectors = next - s; |
| } |
| |
| write_seqlock_irqsave(&bb->lock, flags); |
| |
| p = bb->page; |
| lo = 0; |
| hi = bb->count; |
| /* Find the last range that starts at-or-before 's' */ |
| while (hi - lo > 1) { |
| int mid = (lo + hi) / 2; |
| sector_t a = BB_OFFSET(p[mid]); |
| |
| if (a <= s) |
| lo = mid; |
| else |
| hi = mid; |
| } |
| if (hi > lo && BB_OFFSET(p[lo]) > s) |
| hi = lo; |
| |
| if (hi > lo) { |
| /* we found a range that might merge with the start |
| * of our new range |
| */ |
| sector_t a = BB_OFFSET(p[lo]); |
| sector_t e = a + BB_LEN(p[lo]); |
| int ack = BB_ACK(p[lo]); |
| |
| if (e >= s) { |
| /* Yes, we can merge with a previous range */ |
| if (s == a && s + sectors >= e) |
| /* new range covers old */ |
| ack = acknowledged; |
| else |
| ack = ack && acknowledged; |
| |
| if (e < s + sectors) |
| e = s + sectors; |
| if (e - a <= BB_MAX_LEN) { |
| p[lo] = BB_MAKE(a, e-a, ack); |
| s = e; |
| } else { |
| /* does not all fit in one range, |
| * make p[lo] maximal |
| */ |
| if (BB_LEN(p[lo]) != BB_MAX_LEN) |
| p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); |
| s = a + BB_MAX_LEN; |
| } |
| sectors = e - s; |
| } |
| } |
| if (sectors && hi < bb->count) { |
| /* 'hi' points to the first range that starts after 's'. |
| * Maybe we can merge with the start of that range |
| */ |
| sector_t a = BB_OFFSET(p[hi]); |
| sector_t e = a + BB_LEN(p[hi]); |
| int ack = BB_ACK(p[hi]); |
| |
| if (a <= s + sectors) { |
| /* merging is possible */ |
| if (e <= s + sectors) { |
| /* full overlap */ |
| e = s + sectors; |
| ack = acknowledged; |
| } else |
| ack = ack && acknowledged; |
| |
| a = s; |
| if (e - a <= BB_MAX_LEN) { |
| p[hi] = BB_MAKE(a, e-a, ack); |
| s = e; |
| } else { |
| p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); |
| s = a + BB_MAX_LEN; |
| } |
| sectors = e - s; |
| lo = hi; |
| hi++; |
| } |
| } |
| if (sectors == 0 && hi < bb->count) { |
| /* we might be able to combine lo and hi */ |
| /* Note: 's' is at the end of 'lo' */ |
| sector_t a = BB_OFFSET(p[hi]); |
| int lolen = BB_LEN(p[lo]); |
| int hilen = BB_LEN(p[hi]); |
| int newlen = lolen + hilen - (s - a); |
| |
| if (s >= a && newlen < BB_MAX_LEN) { |
| /* yes, we can combine them */ |
| int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); |
| |
| p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); |
| memmove(p + hi, p + hi + 1, |
| (bb->count - hi - 1) * 8); |
| bb->count--; |
| } |
| } |
| while (sectors) { |
| /* didn't merge (it all). |
| * Need to add a range just before 'hi' |
| */ |
| if (bb->count >= MAX_BADBLOCKS) { |
| /* No room for more */ |
| rv = 1; |
| break; |
| } else { |
| int this_sectors = sectors; |
| |
| memmove(p + hi + 1, p + hi, |
| (bb->count - hi) * 8); |
| bb->count++; |
| |
| if (this_sectors > BB_MAX_LEN) |
| this_sectors = BB_MAX_LEN; |
| p[hi] = BB_MAKE(s, this_sectors, acknowledged); |
| sectors -= this_sectors; |
| s += this_sectors; |
| } |
| } |
| |
| bb->changed = 1; |
| if (!acknowledged) |
| bb->unacked_exist = 1; |
| else |
| badblocks_update_acked(bb); |
| write_sequnlock_irqrestore(&bb->lock, flags); |
| |
| return rv; |
| } |
| EXPORT_SYMBOL_GPL(badblocks_set); |
| |
| /** |
| * badblocks_clear() - Remove a range of bad blocks to the table. |
| * @bb: the badblocks structure that holds all badblock information |
| * @s: first sector to mark as bad |
| * @sectors: number of sectors to mark as bad |
| * |
| * This may involve extending the table if we spilt a region, |
| * but it must not fail. So if the table becomes full, we just |
| * drop the remove request. |
| * |
| * Return: |
| * 0: success |
| * 1: failed to clear badblocks |
| */ |
| int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) |
| { |
| u64 *p; |
| int lo, hi; |
| sector_t target = s + sectors; |
| int rv = 0; |
| |
| if (bb->shift > 0) { |
| /* When clearing we round the start up and the end down. |
| * This should not matter as the shift should align with |
| * the block size and no rounding should ever be needed. |
| * However it is better the think a block is bad when it |
| * isn't than to think a block is not bad when it is. |
| */ |
| s += (1<<bb->shift) - 1; |
| s >>= bb->shift; |
| target >>= bb->shift; |
| } |
| |
| write_seqlock_irq(&bb->lock); |
| |
| p = bb->page; |
| lo = 0; |
| hi = bb->count; |
| /* Find the last range that starts before 'target' */ |
| while (hi - lo > 1) { |
| int mid = (lo + hi) / 2; |
| sector_t a = BB_OFFSET(p[mid]); |
| |
| if (a < target) |
| lo = mid; |
| else |
| hi = mid; |
| } |
| if (hi > lo) { |
| /* p[lo] is the last range that could overlap the |
| * current range. Earlier ranges could also overlap, |
| * but only this one can overlap the end of the range. |
| */ |
| if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) && |
| (BB_OFFSET(p[lo]) < target)) { |
| /* Partial overlap, leave the tail of this range */ |
| int ack = BB_ACK(p[lo]); |
| sector_t a = BB_OFFSET(p[lo]); |
| sector_t end = a + BB_LEN(p[lo]); |
| |
| if (a < s) { |
| /* we need to split this range */ |
| if (bb->count >= MAX_BADBLOCKS) { |
| rv = -ENOSPC; |
| goto out; |
| } |
| memmove(p+lo+1, p+lo, (bb->count - lo) * 8); |
| bb->count++; |
| p[lo] = BB_MAKE(a, s-a, ack); |
| lo++; |
| } |
| p[lo] = BB_MAKE(target, end - target, ack); |
| /* there is no longer an overlap */ |
| hi = lo; |
| lo--; |
| } |
| while (lo >= 0 && |
| (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) && |
| (BB_OFFSET(p[lo]) < target)) { |
| /* This range does overlap */ |
| if (BB_OFFSET(p[lo]) < s) { |
| /* Keep the early parts of this range. */ |
| int ack = BB_ACK(p[lo]); |
| sector_t start = BB_OFFSET(p[lo]); |
| |
| p[lo] = BB_MAKE(start, s - start, ack); |
| /* now low doesn't overlap, so.. */ |
| break; |
| } |
| lo--; |
| } |
| /* 'lo' is strictly before, 'hi' is strictly after, |
| * anything between needs to be discarded |
| */ |
| if (hi - lo > 1) { |
| memmove(p+lo+1, p+hi, (bb->count - hi) * 8); |
| bb->count -= (hi - lo - 1); |
| } |
| } |
| |
| badblocks_update_acked(bb); |
| bb->changed = 1; |
| out: |
| write_sequnlock_irq(&bb->lock); |
| return rv; |
| } |
| EXPORT_SYMBOL_GPL(badblocks_clear); |
| |
| /** |
| * ack_all_badblocks() - Acknowledge all bad blocks in a list. |
| * @bb: the badblocks structure that holds all badblock information |
| * |
| * This only succeeds if ->changed is clear. It is used by |
| * in-kernel metadata updates |
| */ |
| void ack_all_badblocks(struct badblocks *bb) |
| { |
| if (bb->page == NULL || bb->changed) |
| /* no point even trying */ |
| return; |
| write_seqlock_irq(&bb->lock); |
| |
| if (bb->changed == 0 && bb->unacked_exist) { |
| u64 *p = bb->page; |
| int i; |
| |
| for (i = 0; i < bb->count ; i++) { |
| if (!BB_ACK(p[i])) { |
| sector_t start = BB_OFFSET(p[i]); |
| int len = BB_LEN(p[i]); |
| |
| p[i] = BB_MAKE(start, len, 1); |
| } |
| } |
| bb->unacked_exist = 0; |
| } |
| write_sequnlock_irq(&bb->lock); |
| } |
| EXPORT_SYMBOL_GPL(ack_all_badblocks); |
| |
| /** |
| * badblocks_show() - sysfs access to bad-blocks list |
| * @bb: the badblocks structure that holds all badblock information |
| * @page: buffer received from sysfs |
| * @unack: weather to show unacknowledged badblocks |
| * |
| * Return: |
| * Length of returned data |
| */ |
| ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) |
| { |
| size_t len; |
| int i; |
| u64 *p = bb->page; |
| unsigned seq; |
| |
| if (bb->shift < 0) |
| return 0; |
| |
| retry: |
| seq = read_seqbegin(&bb->lock); |
| |
| len = 0; |
| i = 0; |
| |
| while (len < PAGE_SIZE && i < bb->count) { |
| sector_t s = BB_OFFSET(p[i]); |
| unsigned int length = BB_LEN(p[i]); |
| int ack = BB_ACK(p[i]); |
| |
| i++; |
| |
| if (unack && ack) |
| continue; |
| |
| len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", |
| (unsigned long long)s << bb->shift, |
| length << bb->shift); |
| } |
| if (unack && len == 0) |
| bb->unacked_exist = 0; |
| |
| if (read_seqretry(&bb->lock, seq)) |
| goto retry; |
| |
| return len; |
| } |
| EXPORT_SYMBOL_GPL(badblocks_show); |
| |
| /** |
| * badblocks_store() - sysfs access to bad-blocks list |
| * @bb: the badblocks structure that holds all badblock information |
| * @page: buffer received from sysfs |
| * @len: length of data received from sysfs |
| * @unack: weather to show unacknowledged badblocks |
| * |
| * Return: |
| * Length of the buffer processed or -ve error. |
| */ |
| ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, |
| int unack) |
| { |
| unsigned long long sector; |
| int length; |
| char newline; |
| |
| switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { |
| case 3: |
| if (newline != '\n') |
| return -EINVAL; |
| fallthrough; |
| case 2: |
| if (length <= 0) |
| return -EINVAL; |
| break; |
| default: |
| return -EINVAL; |
| } |
| |
| if (badblocks_set(bb, sector, length, !unack)) |
| return -ENOSPC; |
| else |
| return len; |
| } |
| EXPORT_SYMBOL_GPL(badblocks_store); |
| |
| static int __badblocks_init(struct device *dev, struct badblocks *bb, |
| int enable) |
| { |
| bb->dev = dev; |
| bb->count = 0; |
| if (enable) |
| bb->shift = 0; |
| else |
| bb->shift = -1; |
| if (dev) |
| bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); |
| else |
| bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); |
| if (!bb->page) { |
| bb->shift = -1; |
| return -ENOMEM; |
| } |
| seqlock_init(&bb->lock); |
| |
| return 0; |
| } |
| |
| /** |
| * badblocks_init() - initialize the badblocks structure |
| * @bb: the badblocks structure that holds all badblock information |
| * @enable: weather to enable badblocks accounting |
| * |
| * Return: |
| * 0: success |
| * -ve errno: on error |
| */ |
| int badblocks_init(struct badblocks *bb, int enable) |
| { |
| return __badblocks_init(NULL, bb, enable); |
| } |
| EXPORT_SYMBOL_GPL(badblocks_init); |
| |
| int devm_init_badblocks(struct device *dev, struct badblocks *bb) |
| { |
| if (!bb) |
| return -EINVAL; |
| return __badblocks_init(dev, bb, 1); |
| } |
| EXPORT_SYMBOL_GPL(devm_init_badblocks); |
| |
| /** |
| * badblocks_exit() - free the badblocks structure |
| * @bb: the badblocks structure that holds all badblock information |
| */ |
| void badblocks_exit(struct badblocks *bb) |
| { |
| if (!bb) |
| return; |
| if (bb->dev) |
| devm_kfree(bb->dev, bb->page); |
| else |
| kfree(bb->page); |
| bb->page = NULL; |
| } |
| EXPORT_SYMBOL_GPL(badblocks_exit); |