| // SPDX-License-Identifier: GPL-2.0 |
| |
| #include <linux/slab.h> |
| #include <trace/events/btrfs.h> |
| #include "messages.h" |
| #include "ctree.h" |
| #include "extent_io.h" |
| #include "extent-io-tree.h" |
| #include "btrfs_inode.h" |
| |
| static struct kmem_cache *extent_state_cache; |
| |
| static inline bool extent_state_in_tree(const struct extent_state *state) |
| { |
| return !RB_EMPTY_NODE(&state->rb_node); |
| } |
| |
| #ifdef CONFIG_BTRFS_DEBUG |
| static LIST_HEAD(states); |
| static DEFINE_SPINLOCK(leak_lock); |
| |
| static inline void btrfs_leak_debug_add_state(struct extent_state *state) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&leak_lock, flags); |
| list_add(&state->leak_list, &states); |
| spin_unlock_irqrestore(&leak_lock, flags); |
| } |
| |
| static inline void btrfs_leak_debug_del_state(struct extent_state *state) |
| { |
| unsigned long flags; |
| |
| spin_lock_irqsave(&leak_lock, flags); |
| list_del(&state->leak_list); |
| spin_unlock_irqrestore(&leak_lock, flags); |
| } |
| |
| static inline void btrfs_extent_state_leak_debug_check(void) |
| { |
| struct extent_state *state; |
| |
| while (!list_empty(&states)) { |
| state = list_entry(states.next, struct extent_state, leak_list); |
| pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", |
| state->start, state->end, state->state, |
| extent_state_in_tree(state), |
| refcount_read(&state->refs)); |
| list_del(&state->leak_list); |
| WARN_ON_ONCE(1); |
| kmem_cache_free(extent_state_cache, state); |
| } |
| } |
| |
| #define btrfs_debug_check_extent_io_range(tree, start, end) \ |
| __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) |
| static inline void __btrfs_debug_check_extent_io_range(const char *caller, |
| struct extent_io_tree *tree, |
| u64 start, u64 end) |
| { |
| const struct btrfs_inode *inode; |
| u64 isize; |
| |
| if (tree->owner != IO_TREE_INODE_IO) |
| return; |
| |
| inode = extent_io_tree_to_inode_const(tree); |
| isize = i_size_read(&inode->vfs_inode); |
| if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { |
| btrfs_debug_rl(inode->root->fs_info, |
| "%s: ino %llu isize %llu odd range [%llu,%llu]", |
| caller, btrfs_ino(inode), isize, start, end); |
| } |
| } |
| #else |
| #define btrfs_leak_debug_add_state(state) do {} while (0) |
| #define btrfs_leak_debug_del_state(state) do {} while (0) |
| #define btrfs_extent_state_leak_debug_check() do {} while (0) |
| #define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) |
| #endif |
| |
| |
| /* |
| * The only tree allowed to set the inode is IO_TREE_INODE_IO. |
| */ |
| static bool is_inode_io_tree(const struct extent_io_tree *tree) |
| { |
| return tree->owner == IO_TREE_INODE_IO; |
| } |
| |
| /* Return the inode if it's valid for the given tree, otherwise NULL. */ |
| struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree) |
| { |
| if (tree->owner == IO_TREE_INODE_IO) |
| return tree->inode; |
| return NULL; |
| } |
| |
| /* Read-only access to the inode. */ |
| const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree) |
| { |
| if (tree->owner == IO_TREE_INODE_IO) |
| return tree->inode; |
| return NULL; |
| } |
| |
| /* For read-only access to fs_info. */ |
| const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree) |
| { |
| if (tree->owner == IO_TREE_INODE_IO) |
| return tree->inode->root->fs_info; |
| return tree->fs_info; |
| } |
| |
| void extent_io_tree_init(struct btrfs_fs_info *fs_info, |
| struct extent_io_tree *tree, unsigned int owner) |
| { |
| tree->state = RB_ROOT; |
| spin_lock_init(&tree->lock); |
| tree->fs_info = fs_info; |
| tree->owner = owner; |
| } |
| |
| /* |
| * Empty an io tree, removing and freeing every extent state record from the |
| * tree. This should be called once we are sure no other task can access the |
| * tree anymore, so no tree updates happen after we empty the tree and there |
| * aren't any waiters on any extent state record (EXTENT_LOCK_BITS are never |
| * set on any extent state when calling this function). |
| */ |
| void extent_io_tree_release(struct extent_io_tree *tree) |
| { |
| struct rb_root root; |
| struct extent_state *state; |
| struct extent_state *tmp; |
| |
| spin_lock(&tree->lock); |
| root = tree->state; |
| tree->state = RB_ROOT; |
| rbtree_postorder_for_each_entry_safe(state, tmp, &root, rb_node) { |
| /* Clear node to keep free_extent_state() happy. */ |
| RB_CLEAR_NODE(&state->rb_node); |
| ASSERT(!(state->state & EXTENT_LOCK_BITS)); |
| /* |
| * No need for a memory barrier here, as we are holding the tree |
| * lock and we only change the waitqueue while holding that lock |
| * (see wait_extent_bit()). |
| */ |
| ASSERT(!waitqueue_active(&state->wq)); |
| free_extent_state(state); |
| cond_resched_lock(&tree->lock); |
| } |
| /* |
| * Should still be empty even after a reschedule, no other task should |
| * be accessing the tree anymore. |
| */ |
| ASSERT(RB_EMPTY_ROOT(&tree->state)); |
| spin_unlock(&tree->lock); |
| } |
| |
| static struct extent_state *alloc_extent_state(gfp_t mask) |
| { |
| struct extent_state *state; |
| |
| /* |
| * The given mask might be not appropriate for the slab allocator, |
| * drop the unsupported bits |
| */ |
| mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); |
| state = kmem_cache_alloc(extent_state_cache, mask); |
| if (!state) |
| return state; |
| state->state = 0; |
| RB_CLEAR_NODE(&state->rb_node); |
| btrfs_leak_debug_add_state(state); |
| refcount_set(&state->refs, 1); |
| init_waitqueue_head(&state->wq); |
| trace_alloc_extent_state(state, mask, _RET_IP_); |
| return state; |
| } |
| |
| static struct extent_state *alloc_extent_state_atomic(struct extent_state *prealloc) |
| { |
| if (!prealloc) |
| prealloc = alloc_extent_state(GFP_ATOMIC); |
| |
| return prealloc; |
| } |
| |
| void free_extent_state(struct extent_state *state) |
| { |
| if (!state) |
| return; |
| if (refcount_dec_and_test(&state->refs)) { |
| WARN_ON(extent_state_in_tree(state)); |
| btrfs_leak_debug_del_state(state); |
| trace_free_extent_state(state, _RET_IP_); |
| kmem_cache_free(extent_state_cache, state); |
| } |
| } |
| |
| static int add_extent_changeset(struct extent_state *state, u32 bits, |
| struct extent_changeset *changeset, |
| int set) |
| { |
| int ret; |
| |
| if (!changeset) |
| return 0; |
| if (set && (state->state & bits) == bits) |
| return 0; |
| if (!set && (state->state & bits) == 0) |
| return 0; |
| changeset->bytes_changed += state->end - state->start + 1; |
| ret = ulist_add(&changeset->range_changed, state->start, state->end, |
| GFP_ATOMIC); |
| return ret; |
| } |
| |
| static inline struct extent_state *next_state(struct extent_state *state) |
| { |
| struct rb_node *next = rb_next(&state->rb_node); |
| |
| if (next) |
| return rb_entry(next, struct extent_state, rb_node); |
| else |
| return NULL; |
| } |
| |
| static inline struct extent_state *prev_state(struct extent_state *state) |
| { |
| struct rb_node *next = rb_prev(&state->rb_node); |
| |
| if (next) |
| return rb_entry(next, struct extent_state, rb_node); |
| else |
| return NULL; |
| } |
| |
| /* |
| * Search @tree for an entry that contains @offset. Such entry would have |
| * entry->start <= offset && entry->end >= offset. |
| * |
| * @tree: the tree to search |
| * @offset: offset that should fall within an entry in @tree |
| * @node_ret: pointer where new node should be anchored (used when inserting an |
| * entry in the tree) |
| * @parent_ret: points to entry which would have been the parent of the entry, |
| * containing @offset |
| * |
| * Return a pointer to the entry that contains @offset byte address and don't change |
| * @node_ret and @parent_ret. |
| * |
| * If no such entry exists, return pointer to entry that ends before @offset |
| * and fill parameters @node_ret and @parent_ret, ie. does not return NULL. |
| */ |
| static inline struct extent_state *tree_search_for_insert(struct extent_io_tree *tree, |
| u64 offset, |
| struct rb_node ***node_ret, |
| struct rb_node **parent_ret) |
| { |
| struct rb_root *root = &tree->state; |
| struct rb_node **node = &root->rb_node; |
| struct rb_node *prev = NULL; |
| struct extent_state *entry = NULL; |
| |
| while (*node) { |
| prev = *node; |
| entry = rb_entry(prev, struct extent_state, rb_node); |
| |
| if (offset < entry->start) |
| node = &(*node)->rb_left; |
| else if (offset > entry->end) |
| node = &(*node)->rb_right; |
| else |
| return entry; |
| } |
| |
| if (node_ret) |
| *node_ret = node; |
| if (parent_ret) |
| *parent_ret = prev; |
| |
| /* Search neighbors until we find the first one past the end */ |
| while (entry && offset > entry->end) |
| entry = next_state(entry); |
| |
| return entry; |
| } |
| |
| /* |
| * Search offset in the tree or fill neighbor rbtree node pointers. |
| * |
| * @tree: the tree to search |
| * @offset: offset that should fall within an entry in @tree |
| * @next_ret: pointer to the first entry whose range ends after @offset |
| * @prev_ret: pointer to the first entry whose range begins before @offset |
| * |
| * Return a pointer to the entry that contains @offset byte address. If no |
| * such entry exists, then return NULL and fill @prev_ret and @next_ret. |
| * Otherwise return the found entry and other pointers are left untouched. |
| */ |
| static struct extent_state *tree_search_prev_next(struct extent_io_tree *tree, |
| u64 offset, |
| struct extent_state **prev_ret, |
| struct extent_state **next_ret) |
| { |
| struct rb_root *root = &tree->state; |
| struct rb_node **node = &root->rb_node; |
| struct extent_state *orig_prev; |
| struct extent_state *entry = NULL; |
| |
| ASSERT(prev_ret); |
| ASSERT(next_ret); |
| |
| while (*node) { |
| entry = rb_entry(*node, struct extent_state, rb_node); |
| |
| if (offset < entry->start) |
| node = &(*node)->rb_left; |
| else if (offset > entry->end) |
| node = &(*node)->rb_right; |
| else |
| return entry; |
| } |
| |
| orig_prev = entry; |
| while (entry && offset > entry->end) |
| entry = next_state(entry); |
| *next_ret = entry; |
| entry = orig_prev; |
| |
| while (entry && offset < entry->start) |
| entry = prev_state(entry); |
| *prev_ret = entry; |
| |
| return NULL; |
| } |
| |
| /* |
| * Inexact rb-tree search, return the next entry if @offset is not found |
| */ |
| static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64 offset) |
| { |
| return tree_search_for_insert(tree, offset, NULL, NULL); |
| } |
| |
| static void extent_io_tree_panic(const struct extent_io_tree *tree, |
| const struct extent_state *state, |
| const char *opname, |
| int err) |
| { |
| btrfs_panic(extent_io_tree_to_fs_info(tree), err, |
| "extent io tree error on %s state start %llu end %llu", |
| opname, state->start, state->end); |
| } |
| |
| static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state) |
| { |
| struct extent_state *prev; |
| |
| prev = prev_state(state); |
| if (prev && prev->end == state->start - 1 && prev->state == state->state) { |
| if (is_inode_io_tree(tree)) |
| btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), |
| state, prev); |
| state->start = prev->start; |
| rb_erase(&prev->rb_node, &tree->state); |
| RB_CLEAR_NODE(&prev->rb_node); |
| free_extent_state(prev); |
| } |
| } |
| |
| static void merge_next_state(struct extent_io_tree *tree, struct extent_state *state) |
| { |
| struct extent_state *next; |
| |
| next = next_state(state); |
| if (next && next->start == state->end + 1 && next->state == state->state) { |
| if (is_inode_io_tree(tree)) |
| btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree), |
| state, next); |
| state->end = next->end; |
| rb_erase(&next->rb_node, &tree->state); |
| RB_CLEAR_NODE(&next->rb_node); |
| free_extent_state(next); |
| } |
| } |
| |
| /* |
| * Utility function to look for merge candidates inside a given range. Any |
| * extents with matching state are merged together into a single extent in the |
| * tree. Extents with EXTENT_IO in their state field are not merged because |
| * the end_io handlers need to be able to do operations on them without |
| * sleeping (or doing allocations/splits). |
| * |
| * This should be called with the tree lock held. |
| */ |
| static void merge_state(struct extent_io_tree *tree, struct extent_state *state) |
| { |
| if (state->state & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) |
| return; |
| |
| merge_prev_state(tree, state); |
| merge_next_state(tree, state); |
| } |
| |
| static void set_state_bits(struct extent_io_tree *tree, |
| struct extent_state *state, |
| u32 bits, struct extent_changeset *changeset) |
| { |
| u32 bits_to_set = bits & ~EXTENT_CTLBITS; |
| int ret; |
| |
| if (is_inode_io_tree(tree)) |
| btrfs_set_delalloc_extent(extent_io_tree_to_inode(tree), state, bits); |
| |
| ret = add_extent_changeset(state, bits_to_set, changeset, 1); |
| BUG_ON(ret < 0); |
| state->state |= bits_to_set; |
| } |
| |
| /* |
| * Insert an extent_state struct into the tree. 'bits' are set on the |
| * struct before it is inserted. |
| * |
| * Returns a pointer to the struct extent_state record containing the range |
| * requested for insertion, which may be the same as the given struct or it |
| * may be an existing record in the tree that was expanded to accommodate the |
| * requested range. In case of an extent_state different from the one that was |
| * given, the later can be freed or reused by the caller. |
| * |
| * On error it returns an error pointer. |
| * |
| * The tree lock is not taken internally. This is a utility function and |
| * probably isn't what you want to call (see set/clear_extent_bit). |
| */ |
| static struct extent_state *insert_state(struct extent_io_tree *tree, |
| struct extent_state *state, |
| u32 bits, |
| struct extent_changeset *changeset) |
| { |
| struct rb_node **node; |
| struct rb_node *parent = NULL; |
| const u64 start = state->start - 1; |
| const u64 end = state->end + 1; |
| const bool try_merge = !(bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)); |
| |
| set_state_bits(tree, state, bits, changeset); |
| |
| node = &tree->state.rb_node; |
| while (*node) { |
| struct extent_state *entry; |
| |
| parent = *node; |
| entry = rb_entry(parent, struct extent_state, rb_node); |
| |
| if (state->end < entry->start) { |
| if (try_merge && end == entry->start && |
| state->state == entry->state) { |
| if (is_inode_io_tree(tree)) |
| btrfs_merge_delalloc_extent( |
| extent_io_tree_to_inode(tree), |
| state, entry); |
| entry->start = state->start; |
| merge_prev_state(tree, entry); |
| state->state = 0; |
| return entry; |
| } |
| node = &(*node)->rb_left; |
| } else if (state->end > entry->end) { |
| if (try_merge && entry->end == start && |
| state->state == entry->state) { |
| if (is_inode_io_tree(tree)) |
| btrfs_merge_delalloc_extent( |
| extent_io_tree_to_inode(tree), |
| state, entry); |
| entry->end = state->end; |
| merge_next_state(tree, entry); |
| state->state = 0; |
| return entry; |
| } |
| node = &(*node)->rb_right; |
| } else { |
| return ERR_PTR(-EEXIST); |
| } |
| } |
| |
| rb_link_node(&state->rb_node, parent, node); |
| rb_insert_color(&state->rb_node, &tree->state); |
| |
| return state; |
| } |
| |
| /* |
| * Insert state to @tree to the location given by @node and @parent. |
| */ |
| static void insert_state_fast(struct extent_io_tree *tree, |
| struct extent_state *state, struct rb_node **node, |
| struct rb_node *parent, unsigned bits, |
| struct extent_changeset *changeset) |
| { |
| set_state_bits(tree, state, bits, changeset); |
| rb_link_node(&state->rb_node, parent, node); |
| rb_insert_color(&state->rb_node, &tree->state); |
| merge_state(tree, state); |
| } |
| |
| /* |
| * Split a given extent state struct in two, inserting the preallocated |
| * struct 'prealloc' as the newly created second half. 'split' indicates an |
| * offset inside 'orig' where it should be split. |
| * |
| * Before calling, |
| * the tree has 'orig' at [orig->start, orig->end]. After calling, there |
| * are two extent state structs in the tree: |
| * prealloc: [orig->start, split - 1] |
| * orig: [ split, orig->end ] |
| * |
| * The tree locks are not taken by this function. They need to be held |
| * by the caller. |
| */ |
| static int split_state(struct extent_io_tree *tree, struct extent_state *orig, |
| struct extent_state *prealloc, u64 split) |
| { |
| struct rb_node *parent = NULL; |
| struct rb_node **node; |
| |
| if (is_inode_io_tree(tree)) |
| btrfs_split_delalloc_extent(extent_io_tree_to_inode(tree), orig, |
| split); |
| |
| prealloc->start = orig->start; |
| prealloc->end = split - 1; |
| prealloc->state = orig->state; |
| orig->start = split; |
| |
| parent = &orig->rb_node; |
| node = &parent; |
| while (*node) { |
| struct extent_state *entry; |
| |
| parent = *node; |
| entry = rb_entry(parent, struct extent_state, rb_node); |
| |
| if (prealloc->end < entry->start) { |
| node = &(*node)->rb_left; |
| } else if (prealloc->end > entry->end) { |
| node = &(*node)->rb_right; |
| } else { |
| free_extent_state(prealloc); |
| return -EEXIST; |
| } |
| } |
| |
| rb_link_node(&prealloc->rb_node, parent, node); |
| rb_insert_color(&prealloc->rb_node, &tree->state); |
| |
| return 0; |
| } |
| |
| /* |
| * Utility function to clear some bits in an extent state struct. It will |
| * optionally wake up anyone waiting on this state (wake == 1). |
| * |
| * If no bits are set on the state struct after clearing things, the |
| * struct is freed and removed from the tree |
| */ |
| static struct extent_state *clear_state_bit(struct extent_io_tree *tree, |
| struct extent_state *state, |
| u32 bits, int wake, |
| struct extent_changeset *changeset) |
| { |
| struct extent_state *next; |
| u32 bits_to_clear = bits & ~EXTENT_CTLBITS; |
| int ret; |
| |
| if (is_inode_io_tree(tree)) |
| btrfs_clear_delalloc_extent(extent_io_tree_to_inode(tree), state, |
| bits); |
| |
| ret = add_extent_changeset(state, bits_to_clear, changeset, 0); |
| BUG_ON(ret < 0); |
| state->state &= ~bits_to_clear; |
| if (wake) |
| wake_up(&state->wq); |
| if (state->state == 0) { |
| next = next_state(state); |
| if (extent_state_in_tree(state)) { |
| rb_erase(&state->rb_node, &tree->state); |
| RB_CLEAR_NODE(&state->rb_node); |
| free_extent_state(state); |
| } else { |
| WARN_ON(1); |
| } |
| } else { |
| merge_state(tree, state); |
| next = next_state(state); |
| } |
| return next; |
| } |
| |
| /* |
| * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly, |
| * unset the EXTENT_NOWAIT bit. |
| */ |
| static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask) |
| { |
| *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS); |
| *bits &= EXTENT_NOWAIT - 1; |
| } |
| |
| /* |
| * Clear some bits on a range in the tree. This may require splitting or |
| * inserting elements in the tree, so the gfp mask is used to indicate which |
| * allocations or sleeping are allowed. |
| * |
| * The range [start, end] is inclusive. |
| * |
| * This takes the tree lock, and returns 0 on success and < 0 on error. |
| */ |
| int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| u32 bits, struct extent_state **cached_state, |
| struct extent_changeset *changeset) |
| { |
| struct extent_state *state; |
| struct extent_state *cached; |
| struct extent_state *prealloc = NULL; |
| u64 last_end; |
| int err; |
| int clear = 0; |
| int wake; |
| int delete = (bits & EXTENT_CLEAR_ALL_BITS); |
| gfp_t mask; |
| |
| set_gfp_mask_from_bits(&bits, &mask); |
| btrfs_debug_check_extent_io_range(tree, start, end); |
| trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); |
| |
| if (delete) |
| bits |= ~EXTENT_CTLBITS; |
| |
| if (bits & EXTENT_DELALLOC) |
| bits |= EXTENT_NORESERVE; |
| |
| wake = ((bits & EXTENT_LOCK_BITS) ? 1 : 0); |
| if (bits & (EXTENT_LOCK_BITS | EXTENT_BOUNDARY)) |
| clear = 1; |
| again: |
| if (!prealloc) { |
| /* |
| * Don't care for allocation failure here because we might end |
| * up not needing the pre-allocated extent state at all, which |
| * is the case if we only have in the tree extent states that |
| * cover our input range and don't cover too any other range. |
| * If we end up needing a new extent state we allocate it later. |
| */ |
| prealloc = alloc_extent_state(mask); |
| } |
| |
| spin_lock(&tree->lock); |
| if (cached_state) { |
| cached = *cached_state; |
| |
| if (clear) { |
| *cached_state = NULL; |
| cached_state = NULL; |
| } |
| |
| if (cached && extent_state_in_tree(cached) && |
| cached->start <= start && cached->end > start) { |
| if (clear) |
| refcount_dec(&cached->refs); |
| state = cached; |
| goto hit_next; |
| } |
| if (clear) |
| free_extent_state(cached); |
| } |
| |
| /* This search will find the extents that end after our range starts. */ |
| state = tree_search(tree, start); |
| if (!state) |
| goto out; |
| hit_next: |
| if (state->start > end) |
| goto out; |
| WARN_ON(state->end < start); |
| last_end = state->end; |
| |
| /* The state doesn't have the wanted bits, go ahead. */ |
| if (!(state->state & bits)) { |
| state = next_state(state); |
| goto next; |
| } |
| |
| /* |
| * | ---- desired range ---- | |
| * | state | or |
| * | ------------- state -------------- | |
| * |
| * We need to split the extent we found, and may flip bits on second |
| * half. |
| * |
| * If the extent we found extends past our range, we just split and |
| * search again. It'll get split again the next time though. |
| * |
| * If the extent we found is inside our range, we clear the desired bit |
| * on it. |
| */ |
| |
| if (state->start < start) { |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) |
| goto search_again; |
| err = split_state(tree, state, prealloc, start); |
| if (err) |
| extent_io_tree_panic(tree, state, "split", err); |
| |
| prealloc = NULL; |
| if (err) |
| goto out; |
| if (state->end <= end) { |
| state = clear_state_bit(tree, state, bits, wake, changeset); |
| goto next; |
| } |
| goto search_again; |
| } |
| /* |
| * | ---- desired range ---- | |
| * | state | |
| * We need to split the extent, and clear the bit on the first half. |
| */ |
| if (state->start <= end && state->end > end) { |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) |
| goto search_again; |
| err = split_state(tree, state, prealloc, end + 1); |
| if (err) |
| extent_io_tree_panic(tree, state, "split", err); |
| |
| if (wake) |
| wake_up(&state->wq); |
| |
| clear_state_bit(tree, prealloc, bits, wake, changeset); |
| |
| prealloc = NULL; |
| goto out; |
| } |
| |
| state = clear_state_bit(tree, state, bits, wake, changeset); |
| next: |
| if (last_end == (u64)-1) |
| goto out; |
| start = last_end + 1; |
| if (start <= end && state && !need_resched()) |
| goto hit_next; |
| |
| search_again: |
| if (start > end) |
| goto out; |
| spin_unlock(&tree->lock); |
| if (gfpflags_allow_blocking(mask)) |
| cond_resched(); |
| goto again; |
| |
| out: |
| spin_unlock(&tree->lock); |
| if (prealloc) |
| free_extent_state(prealloc); |
| |
| return 0; |
| |
| } |
| |
| /* |
| * Wait for one or more bits to clear on a range in the state tree. |
| * The range [start, end] is inclusive. |
| * The tree lock is taken by this function |
| */ |
| static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| u32 bits, struct extent_state **cached_state) |
| { |
| struct extent_state *state; |
| |
| btrfs_debug_check_extent_io_range(tree, start, end); |
| |
| spin_lock(&tree->lock); |
| again: |
| /* |
| * Maintain cached_state, as we may not remove it from the tree if there |
| * are more bits than the bits we're waiting on set on this state. |
| */ |
| if (cached_state && *cached_state) { |
| state = *cached_state; |
| if (extent_state_in_tree(state) && |
| state->start <= start && start < state->end) |
| goto process_node; |
| } |
| while (1) { |
| /* |
| * This search will find all the extents that end after our |
| * range starts. |
| */ |
| state = tree_search(tree, start); |
| process_node: |
| if (!state) |
| break; |
| if (state->start > end) |
| goto out; |
| |
| if (state->state & bits) { |
| DEFINE_WAIT(wait); |
| |
| start = state->start; |
| refcount_inc(&state->refs); |
| prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); |
| spin_unlock(&tree->lock); |
| schedule(); |
| spin_lock(&tree->lock); |
| finish_wait(&state->wq, &wait); |
| free_extent_state(state); |
| goto again; |
| } |
| start = state->end + 1; |
| |
| if (start > end) |
| break; |
| |
| if (!cond_resched_lock(&tree->lock)) { |
| state = next_state(state); |
| goto process_node; |
| } |
| } |
| out: |
| /* This state is no longer useful, clear it and free it up. */ |
| if (cached_state && *cached_state) { |
| state = *cached_state; |
| *cached_state = NULL; |
| free_extent_state(state); |
| } |
| spin_unlock(&tree->lock); |
| } |
| |
| static void cache_state_if_flags(struct extent_state *state, |
| struct extent_state **cached_ptr, |
| unsigned flags) |
| { |
| if (cached_ptr && !(*cached_ptr)) { |
| if (!flags || (state->state & flags)) { |
| *cached_ptr = state; |
| refcount_inc(&state->refs); |
| } |
| } |
| } |
| |
| static void cache_state(struct extent_state *state, |
| struct extent_state **cached_ptr) |
| { |
| return cache_state_if_flags(state, cached_ptr, EXTENT_LOCK_BITS | EXTENT_BOUNDARY); |
| } |
| |
| /* |
| * Find the first state struct with 'bits' set after 'start', and return it. |
| * tree->lock must be held. NULL will returned if nothing was found after |
| * 'start'. |
| */ |
| static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, |
| u64 start, u32 bits) |
| { |
| struct extent_state *state; |
| |
| /* |
| * This search will find all the extents that end after our range |
| * starts. |
| */ |
| state = tree_search(tree, start); |
| while (state) { |
| if (state->end >= start && (state->state & bits)) |
| return state; |
| state = next_state(state); |
| } |
| return NULL; |
| } |
| |
| /* |
| * Find the first offset in the io tree with one or more @bits set. |
| * |
| * Note: If there are multiple bits set in @bits, any of them will match. |
| * |
| * Return true if we find something, and update @start_ret and @end_ret. |
| * Return false if we found nothing. |
| */ |
| bool find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| u64 *start_ret, u64 *end_ret, u32 bits, |
| struct extent_state **cached_state) |
| { |
| struct extent_state *state; |
| bool ret = false; |
| |
| spin_lock(&tree->lock); |
| if (cached_state && *cached_state) { |
| state = *cached_state; |
| if (state->end == start - 1 && extent_state_in_tree(state)) { |
| while ((state = next_state(state)) != NULL) { |
| if (state->state & bits) |
| break; |
| } |
| /* |
| * If we found the next extent state, clear cached_state |
| * so that we can cache the next extent state below and |
| * avoid future calls going over the same extent state |
| * again. If we haven't found any, clear as well since |
| * it's now useless. |
| */ |
| free_extent_state(*cached_state); |
| *cached_state = NULL; |
| if (state) |
| goto got_it; |
| goto out; |
| } |
| free_extent_state(*cached_state); |
| *cached_state = NULL; |
| } |
| |
| state = find_first_extent_bit_state(tree, start, bits); |
| got_it: |
| if (state) { |
| cache_state_if_flags(state, cached_state, 0); |
| *start_ret = state->start; |
| *end_ret = state->end; |
| ret = true; |
| } |
| out: |
| spin_unlock(&tree->lock); |
| return ret; |
| } |
| |
| /* |
| * Find a contiguous area of bits |
| * |
| * @tree: io tree to check |
| * @start: offset to start the search from |
| * @start_ret: the first offset we found with the bits set |
| * @end_ret: the final contiguous range of the bits that were set |
| * @bits: bits to look for |
| * |
| * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges |
| * to set bits appropriately, and then merge them again. During this time it |
| * will drop the tree->lock, so use this helper if you want to find the actual |
| * contiguous area for given bits. We will search to the first bit we find, and |
| * then walk down the tree until we find a non-contiguous area. The area |
| * returned will be the full contiguous area with the bits set. |
| */ |
| int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, |
| u64 *start_ret, u64 *end_ret, u32 bits) |
| { |
| struct extent_state *state; |
| int ret = 1; |
| |
| ASSERT(!btrfs_fs_incompat(extent_io_tree_to_fs_info(tree), NO_HOLES)); |
| |
| spin_lock(&tree->lock); |
| state = find_first_extent_bit_state(tree, start, bits); |
| if (state) { |
| *start_ret = state->start; |
| *end_ret = state->end; |
| while ((state = next_state(state)) != NULL) { |
| if (state->start > (*end_ret + 1)) |
| break; |
| *end_ret = state->end; |
| } |
| ret = 0; |
| } |
| spin_unlock(&tree->lock); |
| return ret; |
| } |
| |
| /* |
| * Find a contiguous range of bytes in the file marked as delalloc, not more |
| * than 'max_bytes'. start and end are used to return the range, |
| * |
| * True is returned if we find something, false if nothing was in the tree. |
| */ |
| bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, |
| u64 *end, u64 max_bytes, |
| struct extent_state **cached_state) |
| { |
| struct extent_state *state; |
| u64 cur_start = *start; |
| bool found = false; |
| u64 total_bytes = 0; |
| |
| spin_lock(&tree->lock); |
| |
| /* |
| * This search will find all the extents that end after our range |
| * starts. |
| */ |
| state = tree_search(tree, cur_start); |
| if (!state) { |
| *end = (u64)-1; |
| goto out; |
| } |
| |
| while (state) { |
| if (found && (state->start != cur_start || |
| (state->state & EXTENT_BOUNDARY))) { |
| goto out; |
| } |
| if (!(state->state & EXTENT_DELALLOC)) { |
| if (!found) |
| *end = state->end; |
| goto out; |
| } |
| if (!found) { |
| *start = state->start; |
| *cached_state = state; |
| refcount_inc(&state->refs); |
| } |
| found = true; |
| *end = state->end; |
| cur_start = state->end + 1; |
| total_bytes += state->end - state->start + 1; |
| if (total_bytes >= max_bytes) |
| break; |
| state = next_state(state); |
| } |
| out: |
| spin_unlock(&tree->lock); |
| return found; |
| } |
| |
| /* |
| * Set some bits on a range in the tree. This may require allocations or |
| * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for |
| * GFP_NOWAIT. |
| * |
| * If any of the exclusive bits are set, this will fail with -EEXIST if some |
| * part of the range already has the desired bits set. The extent_state of the |
| * existing range is returned in failed_state in this case, and the start of the |
| * existing range is returned in failed_start. failed_state is used as an |
| * optimization for wait_extent_bit, failed_start must be used as the source of |
| * truth as failed_state may have changed since we returned. |
| * |
| * [start, end] is inclusive This takes the tree lock. |
| */ |
| static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| u32 bits, u64 *failed_start, |
| struct extent_state **failed_state, |
| struct extent_state **cached_state, |
| struct extent_changeset *changeset) |
| { |
| struct extent_state *state; |
| struct extent_state *prealloc = NULL; |
| struct rb_node **p = NULL; |
| struct rb_node *parent = NULL; |
| int ret = 0; |
| u64 last_start; |
| u64 last_end; |
| u32 exclusive_bits = (bits & EXTENT_LOCK_BITS); |
| gfp_t mask; |
| |
| set_gfp_mask_from_bits(&bits, &mask); |
| btrfs_debug_check_extent_io_range(tree, start, end); |
| trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); |
| |
| if (exclusive_bits) |
| ASSERT(failed_start); |
| else |
| ASSERT(failed_start == NULL && failed_state == NULL); |
| again: |
| if (!prealloc) { |
| /* |
| * Don't care for allocation failure here because we might end |
| * up not needing the pre-allocated extent state at all, which |
| * is the case if we only have in the tree extent states that |
| * cover our input range and don't cover too any other range. |
| * If we end up needing a new extent state we allocate it later. |
| */ |
| prealloc = alloc_extent_state(mask); |
| } |
| /* Optimistically preallocate the extent changeset ulist node. */ |
| if (changeset) |
| extent_changeset_prealloc(changeset, mask); |
| |
| spin_lock(&tree->lock); |
| if (cached_state && *cached_state) { |
| state = *cached_state; |
| if (state->start <= start && state->end > start && |
| extent_state_in_tree(state)) |
| goto hit_next; |
| } |
| /* |
| * This search will find all the extents that end after our range |
| * starts. |
| */ |
| state = tree_search_for_insert(tree, start, &p, &parent); |
| if (!state) { |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) |
| goto search_again; |
| prealloc->start = start; |
| prealloc->end = end; |
| insert_state_fast(tree, prealloc, p, parent, bits, changeset); |
| cache_state(prealloc, cached_state); |
| prealloc = NULL; |
| goto out; |
| } |
| hit_next: |
| last_start = state->start; |
| last_end = state->end; |
| |
| /* |
| * | ---- desired range ---- | |
| * | state | |
| * |
| * Just lock what we found and keep going |
| */ |
| if (state->start == start && state->end <= end) { |
| if (state->state & exclusive_bits) { |
| *failed_start = state->start; |
| cache_state(state, failed_state); |
| ret = -EEXIST; |
| goto out; |
| } |
| |
| set_state_bits(tree, state, bits, changeset); |
| cache_state(state, cached_state); |
| merge_state(tree, state); |
| if (last_end == (u64)-1) |
| goto out; |
| start = last_end + 1; |
| state = next_state(state); |
| if (start < end && state && state->start == start && |
| !need_resched()) |
| goto hit_next; |
| goto search_again; |
| } |
| |
| /* |
| * | ---- desired range ---- | |
| * | state | |
| * or |
| * | ------------- state -------------- | |
| * |
| * We need to split the extent we found, and may flip bits on second |
| * half. |
| * |
| * If the extent we found extends past our range, we just split and |
| * search again. It'll get split again the next time though. |
| * |
| * If the extent we found is inside our range, we set the desired bit |
| * on it. |
| */ |
| if (state->start < start) { |
| if (state->state & exclusive_bits) { |
| *failed_start = start; |
| cache_state(state, failed_state); |
| ret = -EEXIST; |
| goto out; |
| } |
| |
| /* |
| * If this extent already has all the bits we want set, then |
| * skip it, not necessary to split it or do anything with it. |
| */ |
| if ((state->state & bits) == bits) { |
| start = state->end + 1; |
| cache_state(state, cached_state); |
| goto search_again; |
| } |
| |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) |
| goto search_again; |
| ret = split_state(tree, state, prealloc, start); |
| if (ret) |
| extent_io_tree_panic(tree, state, "split", ret); |
| |
| prealloc = NULL; |
| if (ret) |
| goto out; |
| if (state->end <= end) { |
| set_state_bits(tree, state, bits, changeset); |
| cache_state(state, cached_state); |
| merge_state(tree, state); |
| if (last_end == (u64)-1) |
| goto out; |
| start = last_end + 1; |
| state = next_state(state); |
| if (start < end && state && state->start == start && |
| !need_resched()) |
| goto hit_next; |
| } |
| goto search_again; |
| } |
| /* |
| * | ---- desired range ---- | |
| * | state | or | state | |
| * |
| * There's a hole, we need to insert something in it and ignore the |
| * extent we found. |
| */ |
| if (state->start > start) { |
| u64 this_end; |
| struct extent_state *inserted_state; |
| |
| if (end < last_start) |
| this_end = end; |
| else |
| this_end = last_start - 1; |
| |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) |
| goto search_again; |
| |
| /* |
| * Avoid to free 'prealloc' if it can be merged with the later |
| * extent. |
| */ |
| prealloc->start = start; |
| prealloc->end = this_end; |
| inserted_state = insert_state(tree, prealloc, bits, changeset); |
| if (IS_ERR(inserted_state)) { |
| ret = PTR_ERR(inserted_state); |
| extent_io_tree_panic(tree, prealloc, "insert", ret); |
| } |
| |
| cache_state(inserted_state, cached_state); |
| if (inserted_state == prealloc) |
| prealloc = NULL; |
| start = this_end + 1; |
| goto search_again; |
| } |
| /* |
| * | ---- desired range ---- | |
| * | state | |
| * |
| * We need to split the extent, and set the bit on the first half |
| */ |
| if (state->start <= end && state->end > end) { |
| if (state->state & exclusive_bits) { |
| *failed_start = start; |
| cache_state(state, failed_state); |
| ret = -EEXIST; |
| goto out; |
| } |
| |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) |
| goto search_again; |
| ret = split_state(tree, state, prealloc, end + 1); |
| if (ret) |
| extent_io_tree_panic(tree, state, "split", ret); |
| |
| set_state_bits(tree, prealloc, bits, changeset); |
| cache_state(prealloc, cached_state); |
| merge_state(tree, prealloc); |
| prealloc = NULL; |
| goto out; |
| } |
| |
| search_again: |
| if (start > end) |
| goto out; |
| spin_unlock(&tree->lock); |
| if (gfpflags_allow_blocking(mask)) |
| cond_resched(); |
| goto again; |
| |
| out: |
| spin_unlock(&tree->lock); |
| if (prealloc) |
| free_extent_state(prealloc); |
| |
| return ret; |
| |
| } |
| |
| int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| u32 bits, struct extent_state **cached_state) |
| { |
| return __set_extent_bit(tree, start, end, bits, NULL, NULL, |
| cached_state, NULL); |
| } |
| |
| /* |
| * Convert all bits in a given range from one bit to another |
| * |
| * @tree: the io tree to search |
| * @start: the start offset in bytes |
| * @end: the end offset in bytes (inclusive) |
| * @bits: the bits to set in this range |
| * @clear_bits: the bits to clear in this range |
| * @cached_state: state that we're going to cache |
| * |
| * This will go through and set bits for the given range. If any states exist |
| * already in this range they are set with the given bit and cleared of the |
| * clear_bits. This is only meant to be used by things that are mergeable, ie. |
| * converting from say DELALLOC to DIRTY. This is not meant to be used with |
| * boundary bits like LOCK. |
| * |
| * All allocations are done with GFP_NOFS. |
| */ |
| int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| u32 bits, u32 clear_bits, |
| struct extent_state **cached_state) |
| { |
| struct extent_state *state; |
| struct extent_state *prealloc = NULL; |
| struct rb_node **p = NULL; |
| struct rb_node *parent = NULL; |
| int ret = 0; |
| u64 last_start; |
| u64 last_end; |
| bool first_iteration = true; |
| |
| btrfs_debug_check_extent_io_range(tree, start, end); |
| trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, |
| clear_bits); |
| |
| again: |
| if (!prealloc) { |
| /* |
| * Best effort, don't worry if extent state allocation fails |
| * here for the first iteration. We might have a cached state |
| * that matches exactly the target range, in which case no |
| * extent state allocations are needed. We'll only know this |
| * after locking the tree. |
| */ |
| prealloc = alloc_extent_state(GFP_NOFS); |
| if (!prealloc && !first_iteration) |
| return -ENOMEM; |
| } |
| |
| spin_lock(&tree->lock); |
| if (cached_state && *cached_state) { |
| state = *cached_state; |
| if (state->start <= start && state->end > start && |
| extent_state_in_tree(state)) |
| goto hit_next; |
| } |
| |
| /* |
| * This search will find all the extents that end after our range |
| * starts. |
| */ |
| state = tree_search_for_insert(tree, start, &p, &parent); |
| if (!state) { |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| prealloc->start = start; |
| prealloc->end = end; |
| insert_state_fast(tree, prealloc, p, parent, bits, NULL); |
| cache_state(prealloc, cached_state); |
| prealloc = NULL; |
| goto out; |
| } |
| hit_next: |
| last_start = state->start; |
| last_end = state->end; |
| |
| /* |
| * | ---- desired range ---- | |
| * | state | |
| * |
| * Just lock what we found and keep going. |
| */ |
| if (state->start == start && state->end <= end) { |
| set_state_bits(tree, state, bits, NULL); |
| cache_state(state, cached_state); |
| state = clear_state_bit(tree, state, clear_bits, 0, NULL); |
| if (last_end == (u64)-1) |
| goto out; |
| start = last_end + 1; |
| if (start < end && state && state->start == start && |
| !need_resched()) |
| goto hit_next; |
| goto search_again; |
| } |
| |
| /* |
| * | ---- desired range ---- | |
| * | state | |
| * or |
| * | ------------- state -------------- | |
| * |
| * We need to split the extent we found, and may flip bits on second |
| * half. |
| * |
| * If the extent we found extends past our range, we just split and |
| * search again. It'll get split again the next time though. |
| * |
| * If the extent we found is inside our range, we set the desired bit |
| * on it. |
| */ |
| if (state->start < start) { |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| ret = split_state(tree, state, prealloc, start); |
| if (ret) |
| extent_io_tree_panic(tree, state, "split", ret); |
| prealloc = NULL; |
| if (ret) |
| goto out; |
| if (state->end <= end) { |
| set_state_bits(tree, state, bits, NULL); |
| cache_state(state, cached_state); |
| state = clear_state_bit(tree, state, clear_bits, 0, NULL); |
| if (last_end == (u64)-1) |
| goto out; |
| start = last_end + 1; |
| if (start < end && state && state->start == start && |
| !need_resched()) |
| goto hit_next; |
| } |
| goto search_again; |
| } |
| /* |
| * | ---- desired range ---- | |
| * | state | or | state | |
| * |
| * There's a hole, we need to insert something in it and ignore the |
| * extent we found. |
| */ |
| if (state->start > start) { |
| u64 this_end; |
| struct extent_state *inserted_state; |
| |
| if (end < last_start) |
| this_end = end; |
| else |
| this_end = last_start - 1; |
| |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| |
| /* |
| * Avoid to free 'prealloc' if it can be merged with the later |
| * extent. |
| */ |
| prealloc->start = start; |
| prealloc->end = this_end; |
| inserted_state = insert_state(tree, prealloc, bits, NULL); |
| if (IS_ERR(inserted_state)) { |
| ret = PTR_ERR(inserted_state); |
| extent_io_tree_panic(tree, prealloc, "insert", ret); |
| } |
| cache_state(inserted_state, cached_state); |
| if (inserted_state == prealloc) |
| prealloc = NULL; |
| start = this_end + 1; |
| goto search_again; |
| } |
| /* |
| * | ---- desired range ---- | |
| * | state | |
| * |
| * We need to split the extent, and set the bit on the first half. |
| */ |
| if (state->start <= end && state->end > end) { |
| prealloc = alloc_extent_state_atomic(prealloc); |
| if (!prealloc) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| |
| ret = split_state(tree, state, prealloc, end + 1); |
| if (ret) |
| extent_io_tree_panic(tree, state, "split", ret); |
| |
| set_state_bits(tree, prealloc, bits, NULL); |
| cache_state(prealloc, cached_state); |
| clear_state_bit(tree, prealloc, clear_bits, 0, NULL); |
| prealloc = NULL; |
| goto out; |
| } |
| |
| search_again: |
| if (start > end) |
| goto out; |
| spin_unlock(&tree->lock); |
| cond_resched(); |
| first_iteration = false; |
| goto again; |
| |
| out: |
| spin_unlock(&tree->lock); |
| if (prealloc) |
| free_extent_state(prealloc); |
| |
| return ret; |
| } |
| |
| /* |
| * Find the first range that has @bits not set. This range could start before |
| * @start. |
| * |
| * @tree: the tree to search |
| * @start: offset at/after which the found extent should start |
| * @start_ret: records the beginning of the range |
| * @end_ret: records the end of the range (inclusive) |
| * @bits: the set of bits which must be unset |
| * |
| * Since unallocated range is also considered one which doesn't have the bits |
| * set it's possible that @end_ret contains -1, this happens in case the range |
| * spans (last_range_end, end of device]. In this case it's up to the caller to |
| * trim @end_ret to the appropriate size. |
| */ |
| void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, |
| u64 *start_ret, u64 *end_ret, u32 bits) |
| { |
| struct extent_state *state; |
| struct extent_state *prev = NULL, *next = NULL; |
| |
| spin_lock(&tree->lock); |
| |
| /* Find first extent with bits cleared */ |
| while (1) { |
| state = tree_search_prev_next(tree, start, &prev, &next); |
| if (!state && !next && !prev) { |
| /* |
| * Tree is completely empty, send full range and let |
| * caller deal with it |
| */ |
| *start_ret = 0; |
| *end_ret = -1; |
| goto out; |
| } else if (!state && !next) { |
| /* |
| * We are past the last allocated chunk, set start at |
| * the end of the last extent. |
| */ |
| *start_ret = prev->end + 1; |
| *end_ret = -1; |
| goto out; |
| } else if (!state) { |
| state = next; |
| } |
| |
| /* |
| * At this point 'state' either contains 'start' or start is |
| * before 'state' |
| */ |
| if (in_range(start, state->start, state->end - state->start + 1)) { |
| if (state->state & bits) { |
| /* |
| * |--range with bits sets--| |
| * | |
| * start |
| */ |
| start = state->end + 1; |
| } else { |
| /* |
| * 'start' falls within a range that doesn't |
| * have the bits set, so take its start as the |
| * beginning of the desired range |
| * |
| * |--range with bits cleared----| |
| * | |
| * start |
| */ |
| *start_ret = state->start; |
| break; |
| } |
| } else { |
| /* |
| * |---prev range---|---hole/unset---|---node range---| |
| * | |
| * start |
| * |
| * or |
| * |
| * |---hole/unset--||--first node--| |
| * 0 | |
| * start |
| */ |
| if (prev) |
| *start_ret = prev->end + 1; |
| else |
| *start_ret = 0; |
| break; |
| } |
| } |
| |
| /* |
| * Find the longest stretch from start until an entry which has the |
| * bits set |
| */ |
| while (state) { |
| if (state->end >= start && !(state->state & bits)) { |
| *end_ret = state->end; |
| } else { |
| *end_ret = state->start - 1; |
| break; |
| } |
| state = next_state(state); |
| } |
| out: |
| spin_unlock(&tree->lock); |
| } |
| |
| /* |
| * Count the number of bytes in the tree that have a given bit(s) set for a |
| * given range. |
| * |
| * @tree: The io tree to search. |
| * @start: The start offset of the range. This value is updated to the |
| * offset of the first byte found with the given bit(s), so it |
| * can end up being bigger than the initial value. |
| * @search_end: The end offset (inclusive value) of the search range. |
| * @max_bytes: The maximum byte count we are interested. The search stops |
| * once it reaches this count. |
| * @bits: The bits the range must have in order to be accounted for. |
| * If multiple bits are set, then only subranges that have all |
| * the bits set are accounted for. |
| * @contig: Indicate if we should ignore holes in the range or not. If |
| * this is true, then stop once we find a hole. |
| * @cached_state: A cached state to be used across multiple calls to this |
| * function in order to speedup searches. Use NULL if this is |
| * called only once or if each call does not start where the |
| * previous one ended. |
| * |
| * Returns the total number of bytes found within the given range that have |
| * all given bits set. If the returned number of bytes is greater than zero |
| * then @start is updated with the offset of the first byte with the bits set. |
| */ |
| u64 count_range_bits(struct extent_io_tree *tree, |
| u64 *start, u64 search_end, u64 max_bytes, |
| u32 bits, int contig, |
| struct extent_state **cached_state) |
| { |
| struct extent_state *state = NULL; |
| struct extent_state *cached; |
| u64 cur_start = *start; |
| u64 total_bytes = 0; |
| u64 last = 0; |
| int found = 0; |
| |
| if (WARN_ON(search_end < cur_start)) |
| return 0; |
| |
| spin_lock(&tree->lock); |
| |
| if (!cached_state || !*cached_state) |
| goto search; |
| |
| cached = *cached_state; |
| |
| if (!extent_state_in_tree(cached)) |
| goto search; |
| |
| if (cached->start <= cur_start && cur_start <= cached->end) { |
| state = cached; |
| } else if (cached->start > cur_start) { |
| struct extent_state *prev; |
| |
| /* |
| * The cached state starts after our search range's start. Check |
| * if the previous state record starts at or before the range we |
| * are looking for, and if so, use it - this is a common case |
| * when there are holes between records in the tree. If there is |
| * no previous state record, we can start from our cached state. |
| */ |
| prev = prev_state(cached); |
| if (!prev) |
| state = cached; |
| else if (prev->start <= cur_start && cur_start <= prev->end) |
| state = prev; |
| } |
| |
| /* |
| * This search will find all the extents that end after our range |
| * starts. |
| */ |
| search: |
| if (!state) |
| state = tree_search(tree, cur_start); |
| |
| while (state) { |
| if (state->start > search_end) |
| break; |
| if (contig && found && state->start > last + 1) |
| break; |
| if (state->end >= cur_start && (state->state & bits) == bits) { |
| total_bytes += min(search_end, state->end) + 1 - |
| max(cur_start, state->start); |
| if (total_bytes >= max_bytes) |
| break; |
| if (!found) { |
| *start = max(cur_start, state->start); |
| found = 1; |
| } |
| last = state->end; |
| } else if (contig && found) { |
| break; |
| } |
| state = next_state(state); |
| } |
| |
| if (cached_state) { |
| free_extent_state(*cached_state); |
| *cached_state = state; |
| if (state) |
| refcount_inc(&state->refs); |
| } |
| |
| spin_unlock(&tree->lock); |
| |
| return total_bytes; |
| } |
| |
| /* |
| * Check if the single @bit exists in the given range. |
| */ |
| bool test_range_bit_exists(struct extent_io_tree *tree, u64 start, u64 end, u32 bit) |
| { |
| struct extent_state *state = NULL; |
| bool bitset = false; |
| |
| ASSERT(is_power_of_2(bit)); |
| |
| spin_lock(&tree->lock); |
| state = tree_search(tree, start); |
| while (state && start <= end) { |
| if (state->start > end) |
| break; |
| |
| if (state->state & bit) { |
| bitset = true; |
| break; |
| } |
| |
| /* If state->end is (u64)-1, start will overflow to 0 */ |
| start = state->end + 1; |
| if (start > end || start == 0) |
| break; |
| state = next_state(state); |
| } |
| spin_unlock(&tree->lock); |
| return bitset; |
| } |
| |
| /* |
| * Check if the whole range [@start,@end) contains the single @bit set. |
| */ |
| bool test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bit, |
| struct extent_state *cached) |
| { |
| struct extent_state *state = NULL; |
| bool bitset = true; |
| |
| ASSERT(is_power_of_2(bit)); |
| |
| spin_lock(&tree->lock); |
| if (cached && extent_state_in_tree(cached) && cached->start <= start && |
| cached->end > start) |
| state = cached; |
| else |
| state = tree_search(tree, start); |
| while (state && start <= end) { |
| if (state->start > start) { |
| bitset = false; |
| break; |
| } |
| |
| if (state->start > end) |
| break; |
| |
| if ((state->state & bit) == 0) { |
| bitset = false; |
| break; |
| } |
| |
| if (state->end == (u64)-1) |
| break; |
| |
| /* |
| * Last entry (if state->end is (u64)-1 and overflow happens), |
| * or next entry starts after the range. |
| */ |
| start = state->end + 1; |
| if (start > end || start == 0) |
| break; |
| state = next_state(state); |
| } |
| |
| /* We ran out of states and were still inside of our range. */ |
| if (!state) |
| bitset = false; |
| spin_unlock(&tree->lock); |
| return bitset; |
| } |
| |
| /* Wrappers around set/clear extent bit */ |
| int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| u32 bits, struct extent_changeset *changeset) |
| { |
| /* |
| * We don't support EXTENT_LOCK_BITS yet, as current changeset will |
| * record any bits changed, so for EXTENT_LOCK_BITS case, it will either |
| * fail with -EEXIST or changeset will record the whole range. |
| */ |
| ASSERT(!(bits & EXTENT_LOCK_BITS)); |
| |
| return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset); |
| } |
| |
| int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| u32 bits, struct extent_changeset *changeset) |
| { |
| /* |
| * Don't support EXTENT_LOCK_BITS case, same reason as |
| * set_record_extent_bits(). |
| */ |
| ASSERT(!(bits & EXTENT_LOCK_BITS)); |
| |
| return __clear_extent_bit(tree, start, end, bits, NULL, changeset); |
| } |
| |
| bool __try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, |
| struct extent_state **cached) |
| { |
| int err; |
| u64 failed_start; |
| |
| err = __set_extent_bit(tree, start, end, bits, &failed_start, |
| NULL, cached, NULL); |
| if (err == -EEXIST) { |
| if (failed_start > start) |
| clear_extent_bit(tree, start, failed_start - 1, bits, cached); |
| return 0; |
| } |
| return 1; |
| } |
| |
| /* |
| * Either insert or lock state struct between start and end use mask to tell |
| * us if waiting is desired. |
| */ |
| int __lock_extent(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, |
| struct extent_state **cached_state) |
| { |
| struct extent_state *failed_state = NULL; |
| int err; |
| u64 failed_start; |
| |
| err = __set_extent_bit(tree, start, end, bits, &failed_start, |
| &failed_state, cached_state, NULL); |
| while (err == -EEXIST) { |
| if (failed_start != start) |
| clear_extent_bit(tree, start, failed_start - 1, |
| bits, cached_state); |
| |
| wait_extent_bit(tree, failed_start, end, bits, &failed_state); |
| err = __set_extent_bit(tree, start, end, bits, |
| &failed_start, &failed_state, |
| cached_state, NULL); |
| } |
| return err; |
| } |
| |
| void __cold extent_state_free_cachep(void) |
| { |
| btrfs_extent_state_leak_debug_check(); |
| kmem_cache_destroy(extent_state_cache); |
| } |
| |
| int __init extent_state_init_cachep(void) |
| { |
| extent_state_cache = kmem_cache_create("btrfs_extent_state", |
| sizeof(struct extent_state), 0, 0, |
| NULL); |
| if (!extent_state_cache) |
| return -ENOMEM; |
| |
| return 0; |
| } |