Btrfs: Fix some data=ordered related data corruptions
Stress testing was showing data checksum errors, most of which were caused
by a lookup bug in the extent_map tree. The tree was caching the last
pointer returned, and searches would check the last pointer first.
But, search callers also expect the search to return the very first
matching extent in the range, which wasn't always true with the last
pointer usage.
For now, the code to cache the last return value is just removed. It is
easy to fix, but I think lookups are rare enough that it isn't required anymore.
This commit also replaces do_sync_mapping_range with a local copy of the
related functions.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 60852ad..3da12a4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -485,7 +485,7 @@
fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
if (!fixup)
return -EAGAIN;
-printk("queueing worker to fixup page %lu %Lu\n", inode->i_ino, page_offset(page));
+
SetPageChecked(page);
page_cache_get(page);
fixup->work.func = btrfs_writepage_fixup_worker;
@@ -502,11 +502,13 @@
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em;
+ struct extent_map *em_orig;
u64 alloc_hint = 0;
u64 clear_start;
u64 clear_end;
struct list_head list;
struct btrfs_key ins;
+ struct rb_node *rb;
int ret;
ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
@@ -535,6 +537,22 @@
mutex_lock(&BTRFS_I(inode)->extent_mutex);
+ spin_lock(&em_tree->lock);
+ clear_start = ordered_extent->file_offset;
+ clear_end = ordered_extent->file_offset + ordered_extent->len;
+ em = lookup_extent_mapping(em_tree, clear_start,
+ ordered_extent->len);
+ em_orig = em;
+ while(em && clear_start < extent_map_end(em) && clear_end > em->start) {
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+ rb = rb_next(&em->rb_node);
+ if (!rb)
+ break;
+ em = rb_entry(rb, struct extent_map, rb_node);
+ }
+ free_extent_map(em_orig);
+ spin_unlock(&em_tree->lock);
+
ret = btrfs_drop_extents(trans, root, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
@@ -548,22 +566,6 @@
ordered_extent->len, 0);
BUG_ON(ret);
- spin_lock(&em_tree->lock);
- clear_start = ordered_extent->file_offset;
- clear_end = ordered_extent->file_offset + ordered_extent->len;
- while(clear_start < clear_end) {
- em = lookup_extent_mapping(em_tree, clear_start,
- clear_end - clear_start);
- if (em) {
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
- clear_start = em->start + em->len;
- free_extent_map(em);
- } else {
- break;
- }
- }
- spin_unlock(&em_tree->lock);
-
btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1);
@@ -2318,7 +2320,7 @@
u64 extent_end = 0;
u64 objectid = inode->i_ino;
u32 found_type;
- struct btrfs_path *path;
+ struct btrfs_path *path = NULL;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_file_extent_item *item;
struct extent_buffer *leaf;
@@ -2328,9 +2330,6 @@
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_trans_handle *trans = NULL;
- path = btrfs_alloc_path();
- BUG_ON(!path);
-
again:
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
@@ -2354,6 +2353,12 @@
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->start = EXTENT_MAP_HOLE;
em->len = (u64)-1;
+
+ if (!path) {
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ }
+
ret = btrfs_lookup_file_extent(trans, root, path,
objectid, start, trans != NULL);
if (ret < 0) {
@@ -2530,7 +2535,8 @@
}
spin_unlock(&em_tree->lock);
out:
- btrfs_free_path(path);
+ if (path)
+ btrfs_free_path(path);
if (trans) {
ret = btrfs_end_transaction(trans, root);
if (!err) {
@@ -2643,8 +2649,8 @@
return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
}
-static int btrfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+int btrfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(mapping->host)->io_tree;