Btrfs: Make the resizer work based on shrinking and growing devices

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f00c4be..ac7106e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -474,6 +474,7 @@
 	u64 pinned;
 	u64 flags;
 	int cached;
+	int ro;
 };
 
 struct btrfs_device;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7183826..a9ce491 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -818,6 +818,10 @@
 		return fs_info->tree_root;
 	if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
 		return fs_info->extent_root;
+	if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
+		return fs_info->chunk_root;
+	if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
+		return fs_info->dev_root;
 
 	root = radix_tree_lookup(&fs_info->fs_roots_radix,
 				 (unsigned long)location->objectid);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c49592c..6540095 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -187,6 +187,7 @@
 
 	if (!cache)
 		goto out;
+
 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 	free_space_cache = &root->fs_info->free_space_cache;
 
@@ -196,7 +197,7 @@
 		goto out;
 
 	last = max(search_start, cache->key.objectid);
-	if (!block_group_bits(cache, data)) {
+	if (!block_group_bits(cache, data) || cache->ro) {
 		goto new_group;
 	}
 
@@ -221,6 +222,8 @@
 			continue;
 		}
 		spin_unlock_irq(&free_space_cache->lock);
+		if (cache->ro)
+			goto new_group;
 		if (start + num > cache->key.objectid + cache->key.offset)
 			goto new_group;
 		if (start + num  > total_fs_bytes)
@@ -319,7 +322,7 @@
 	if (search_start && search_start < total_fs_bytes) {
 		struct btrfs_block_group_cache *shint;
 		shint = btrfs_lookup_block_group(info, search_start);
-		if (shint && block_group_bits(shint, data)) {
+		if (shint && block_group_bits(shint, data) && !shint->ro) {
 			used = btrfs_block_group_used(&shint->item);
 			if (used + shint->pinned <
 			    div_factor(shint->key.offset, factor)) {
@@ -327,7 +330,7 @@
 			}
 		}
 	}
-	if (hint && block_group_bits(hint, data) &&
+	if (hint && !hint->ro && block_group_bits(hint, data) &&
 	    hint->key.objectid < total_fs_bytes) {
 		used = btrfs_block_group_used(&hint->item);
 		if (used + hint->pinned <
@@ -364,7 +367,7 @@
 		if (cache->key.objectid > total_fs_bytes)
 			break;
 
-		if (block_group_bits(cache, data)) {
+		if (!cache->ro && block_group_bits(cache, data)) {
 			if (full_search)
 				free_check = cache->key.offset;
 			else
@@ -1020,6 +1023,7 @@
 	if (found) {
 		found->total_bytes += total_bytes;
 		found->bytes_used += bytes_used;
+		found->full = 0;
 		WARN_ON(found->total_bytes < found->bytes_used);
 		*space_info = found;
 		return 0;
@@ -1700,7 +1704,6 @@
 	u64 super_used;
 	u64 root_used;
 	u64 search_start = 0;
-	u64 new_hint;
 	u64 alloc_profile;
 	u32 sizes[2];
 	struct btrfs_fs_info *info = root->fs_info;
@@ -1724,7 +1727,7 @@
 		data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
 	}
 again:
-	if (root->ref_cows) {
+	if (root != root->fs_info->extent_root) {
 		if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 					     2 * 1024 * 1024,
@@ -1738,10 +1741,6 @@
 		BUG_ON(ret);
 	}
 
-	new_hint = max(hint_byte, root->fs_info->alloc_start);
-	if (new_hint < btrfs_super_total_bytes(&info->super_copy))
-		hint_byte = new_hint;
-
 	WARN_ON(num_bytes < root->sectorsize);
 	ret = find_free_extent(trans, root, num_bytes, empty_size,
 			       search_start, search_end, hint_byte, ins,
@@ -2473,15 +2472,16 @@
 	return ret;
 }
 
-int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
+int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	struct btrfs_path *path;
 	u64 cur_byte;
 	u64 total_found;
+	u64 shrink_last_byte;
+	struct btrfs_block_group_cache *shrink_block_group;
 	struct btrfs_fs_info *info = root->fs_info;
-	struct extent_io_tree *block_group_cache;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
 	struct extent_buffer *leaf;
@@ -2489,17 +2489,29 @@
 	int ret;
 	int progress = 0;
 
-	btrfs_set_super_total_bytes(&info->super_copy, new_size);
-	clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
-			   GFP_NOFS);
-	block_group_cache = &info->block_group_cache;
+	shrink_block_group = btrfs_lookup_block_group(root->fs_info,
+						      shrink_start);
+	BUG_ON(!shrink_block_group);
+
+	shrink_last_byte = shrink_start + shrink_block_group->key.offset;
+
+	shrink_block_group->space_info->total_bytes -=
+		shrink_block_group->key.offset;
+printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags);
 	path = btrfs_alloc_path();
 	root = root->fs_info->extent_root;
 	path->reada = 2;
 
 again:
+	trans = btrfs_start_transaction(root, 1);
+	do_chunk_alloc(trans, root->fs_info->extent_root,
+			btrfs_block_group_used(&shrink_block_group->item) +
+			2 * 1024 * 1024, shrink_block_group->flags);
+	btrfs_end_transaction(trans, root);
+	shrink_block_group->ro = 1;
+
 	total_found = 0;
-	key.objectid = new_size;
+	key.objectid = shrink_start;
 	key.offset = 0;
 	key.type = 0;
 	cur_byte = key.objectid;
@@ -2511,10 +2523,12 @@
 	ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
 	if (ret < 0)
 		goto out;
+
 	if (ret == 0) {
 		leaf = path->nodes[0];
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-		if (found_key.objectid + found_key.offset > new_size) {
+		if (found_key.objectid + found_key.offset > shrink_start &&
+		    found_key.objectid < shrink_last_byte) {
 			cur_byte = found_key.objectid;
 			key.objectid = cur_byte;
 		}
@@ -2543,6 +2557,9 @@
 
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
+		if (found_key.objectid >= shrink_last_byte)
+			break;
+
 		if (progress && need_resched()) {
 			memcpy(&key, &found_key, sizeof(key));
 			mutex_unlock(&root->fs_info->fs_mutex);
@@ -2583,68 +2600,31 @@
 		goto again;
 	}
 
+	/*
+	 * we've freed all the extents, now remove the block
+	 * group item from the tree
+	 */
 	trans = btrfs_start_transaction(root, 1);
-	key.objectid = new_size;
-	key.offset = 0;
-	key.type = 0;
-	while(1) {
-		u64 ptr;
+	memcpy(&key, &shrink_block_group->key, sizeof(key));
 
-		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-		if (ret < 0)
-			goto out;
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret > 0)
+		ret = -EIO;
+	if (ret < 0)
+		goto out;
 
-		leaf = path->nodes[0];
-		nritems = btrfs_header_nritems(leaf);
-bg_next:
-		if (path->slots[0] >= nritems) {
-			ret = btrfs_next_leaf(root, path);
-			if (ret < 0)
-				break;
-			if (ret == 1) {
-				ret = 0;
-				break;
-			}
-			leaf = path->nodes[0];
-			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+	leaf = path->nodes[0];
+	nritems = btrfs_header_nritems(leaf);
+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+	kfree(shrink_block_group);
 
-			/*
-			 * btrfs_next_leaf doesn't cow buffers, we have to
-			 * do the search again
-			 */
-			memcpy(&key, &found_key, sizeof(key));
-			btrfs_release_path(root, path);
-			goto resched_check;
-		}
+	clear_extent_bits(&info->block_group_cache, found_key.objectid,
+			  found_key.objectid + found_key.offset - 1,
+			  (unsigned int)-1, GFP_NOFS);
 
-		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-		if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
-			printk("shrinker found key %Lu %u %Lu\n",
-				found_key.objectid, found_key.type,
-				found_key.offset);
-			path->slots[0]++;
-			goto bg_next;
-		}
-		ret = get_state_private(&info->block_group_cache,
-					found_key.objectid, &ptr);
-		if (!ret)
-			kfree((void *)(unsigned long)ptr);
-
-		clear_extent_bits(&info->block_group_cache, found_key.objectid,
-				  found_key.objectid + found_key.offset - 1,
-				  (unsigned int)-1, GFP_NOFS);
-
-		key.objectid = found_key.objectid + 1;
-		btrfs_del_item(trans, root, path);
-		btrfs_release_path(root, path);
-resched_check:
-		if (need_resched()) {
-			mutex_unlock(&root->fs_info->fs_mutex);
-			cond_resched();
-			mutex_lock(&root->fs_info->fs_mutex);
-		}
-	}
-	clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
+	btrfs_del_item(trans, root, path);
+	clear_extent_dirty(&info->free_space_cache,
+			   shrink_start, shrink_last_byte - 1,
 			   GFP_NOFS);
 	btrfs_commit_transaction(trans, root);
 out:
@@ -2652,13 +2632,6 @@
 	return ret;
 }
 
-int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root, u64 new_size)
-{
-	btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
-	return 0;
-}
-
 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
 			   struct btrfs_key *key)
 {
@@ -2726,7 +2699,7 @@
 
 		leaf = path->nodes[0];
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-		cache = kmalloc(sizeof(*cache), GFP_NOFS);
+		cache = kzalloc(sizeof(*cache), GFP_NOFS);
 		if (!cache) {
 			ret = -ENOMEM;
 			break;
@@ -2736,8 +2709,6 @@
 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
 				   sizeof(cache->item));
 		memcpy(&cache->key, &found_key, sizeof(found_key));
-		cache->cached = 0;
-		cache->pinned = 0;
 
 		key.objectid = found_key.objectid + found_key.offset;
 		btrfs_release_path(root, path);
@@ -2789,12 +2760,10 @@
 	extent_root = root->fs_info->extent_root;
 	block_group_cache = &root->fs_info->block_group_cache;
 
-	cache = kmalloc(sizeof(*cache), GFP_NOFS);
+	cache = kzalloc(sizeof(*cache), GFP_NOFS);
 	BUG_ON(!cache);
 	cache->key.objectid = chunk_offset;
 	cache->key.offset = size;
-	cache->cached = 0;
-	cache->pinned = 0;
 
 	btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
 	memset(&cache->item, 0, sizeof(cache->item));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b31f52d..4d12aa5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/kernel.h>
 #include <linux/bio.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
@@ -2887,9 +2888,12 @@
 {
 	u64 new_size;
 	u64 old_size;
+	u64 devid = 1;
 	struct btrfs_ioctl_vol_args *vol_args;
 	struct btrfs_trans_handle *trans;
+	struct btrfs_device *device = NULL;
 	char *sizestr;
+	char *devstr = NULL;
 	int ret = 0;
 	int namelen;
 	int mod = 0;
@@ -2909,9 +2913,25 @@
 		goto out;
 	}
 
+	mutex_lock(&root->fs_info->fs_mutex);
 	sizestr = vol_args->name;
+	devstr = strchr(sizestr, ':');
+	if (devstr) {
+		char *end;
+		sizestr = devstr + 1;
+		*devstr = '\0';
+		devstr = vol_args->name;
+		devid = simple_strtoull(devstr, &end, 10);
+printk("resizing devid %Lu\n", devid);
+	}
+	device = btrfs_find_device(root, devid, NULL);
+	if (!device) {
+		printk("resizer unable to find device %Lu\n", devid);
+		ret = -EINVAL;
+		goto out_unlock;
+	}
 	if (!strcmp(sizestr, "max"))
-		new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
+		new_size = device->bdev->bd_inode->i_size;
 	else {
 		if (sizestr[0] == '-') {
 			mod = -1;
@@ -2923,12 +2943,11 @@
 		new_size = btrfs_parse_size(sizestr);
 		if (new_size == 0) {
 			ret = -EINVAL;
-			goto out;
+			goto out_unlock;
 		}
 	}
 
-	mutex_lock(&root->fs_info->fs_mutex);
-	old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
+	old_size = device->total_bytes;
 
 	if (mod < 0) {
 		if (new_size > old_size) {
@@ -2944,7 +2963,7 @@
 		ret = -EINVAL;
 		goto out_unlock;
 	}
-	if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
+	if (new_size > device->bdev->bd_inode->i_size) {
 		ret = -EFBIG;
 		goto out_unlock;
 	}
@@ -2952,13 +2971,14 @@
 	do_div(new_size, root->sectorsize);
 	new_size *= root->sectorsize;
 
-printk("new size is %Lu\n", new_size);
+printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size);
+
 	if (new_size > old_size) {
 		trans = btrfs_start_transaction(root, 1);
-		ret = btrfs_grow_extent_tree(trans, root, new_size);
+		ret = btrfs_grow_device(trans, device, new_size);
 		btrfs_commit_transaction(trans, root);
 	} else {
-		ret = btrfs_shrink_extent_tree(root, new_size);
+		ret = btrfs_shrink_device(device, new_size);
 	}
 
 out_unlock:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c63a982..a2c56de1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -77,7 +77,7 @@
 	list_for_each(cur, head) {
 		dev = list_entry(cur, struct btrfs_device, dev_list);
 		if (dev->devid == devid &&
-		    !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) {
+		    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
 			return dev;
 		}
 	}
@@ -293,6 +293,10 @@
 	 * so we make sure to start at an offset of at least 1MB
 	 */
 	search_start = max((u64)1024 * 1024, search_start);
+
+	if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
+		search_start = max(root->fs_info->alloc_start, search_start);
+
 	key.objectid = device->devid;
 	key.offset = search_start;
 	key.type = BTRFS_DEV_EXTENT_KEY;
@@ -380,6 +384,33 @@
 	return ret;
 }
 
+int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
+			  struct btrfs_device *device,
+			  u64 start)
+{
+	int ret;
+	struct btrfs_path *path;
+	struct btrfs_root *root = device->dev_root;
+	struct btrfs_key key;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = device->devid;
+	key.offset = start;
+	key.type = BTRFS_DEV_EXTENT_KEY;
+
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	BUG_ON(ret);
+
+	ret = btrfs_del_item(trans, root, path);
+	BUG_ON(ret);
+
+	btrfs_free_path(path);
+	return ret;
+}
+
 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
 			   struct btrfs_device *device,
 			   u64 chunk_tree, u64 chunk_objectid,
@@ -560,6 +591,7 @@
 	btrfs_free_path(path);
 	return ret;
 }
+
 int btrfs_update_device(struct btrfs_trans_handle *trans,
 			struct btrfs_device *device)
 {
@@ -606,6 +638,254 @@
 	return ret;
 }
 
+int btrfs_grow_device(struct btrfs_trans_handle *trans,
+		      struct btrfs_device *device, u64 new_size)
+{
+	struct btrfs_super_block *super_copy =
+		&device->dev_root->fs_info->super_copy;
+	u64 old_total = btrfs_super_total_bytes(super_copy);
+	u64 diff = new_size - device->total_bytes;
+
+	btrfs_set_super_total_bytes(super_copy, old_total + diff);
+	return btrfs_update_device(trans, device);
+}
+
+static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
+			    struct btrfs_root *root,
+			    u64 chunk_tree, u64 chunk_objectid,
+			    u64 chunk_offset)
+{
+	int ret;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+
+	root = root->fs_info->chunk_root;
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = chunk_objectid;
+	key.offset = chunk_offset;
+	key.type = BTRFS_CHUNK_ITEM_KEY;
+
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	BUG_ON(ret);
+
+	ret = btrfs_del_item(trans, root, path);
+	BUG_ON(ret);
+
+	btrfs_free_path(path);
+	return 0;
+}
+
+int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
+			chunk_offset)
+{
+	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+	struct btrfs_disk_key *disk_key;
+	struct btrfs_chunk *chunk;
+	u8 *ptr;
+	int ret = 0;
+	u32 num_stripes;
+	u32 array_size;
+	u32 len = 0;
+	u32 cur;
+	struct btrfs_key key;
+
+	array_size = btrfs_super_sys_array_size(super_copy);
+
+	ptr = super_copy->sys_chunk_array;
+	cur = 0;
+
+	while (cur < array_size) {
+		disk_key = (struct btrfs_disk_key *)ptr;
+		btrfs_disk_key_to_cpu(&key, disk_key);
+
+		len = sizeof(*disk_key);
+
+		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+			chunk = (struct btrfs_chunk *)(ptr + len);
+			num_stripes = btrfs_stack_chunk_num_stripes(chunk);
+			len += btrfs_chunk_item_size(num_stripes);
+		} else {
+			ret = -EIO;
+			break;
+		}
+		if (key.objectid == chunk_objectid &&
+		    key.offset == chunk_offset) {
+			memmove(ptr, ptr + len, array_size - (cur + len));
+			array_size -= len;
+			btrfs_set_super_sys_array_size(super_copy, array_size);
+		} else {
+			ptr += len;
+			cur += len;
+		}
+	}
+	return ret;
+}
+
+
+int btrfs_relocate_chunk(struct btrfs_root *root,
+			 u64 chunk_tree, u64 chunk_objectid,
+			 u64 chunk_offset)
+{
+	struct extent_map_tree *em_tree;
+	struct btrfs_root *extent_root;
+	struct btrfs_trans_handle *trans;
+	struct extent_map *em;
+	struct map_lookup *map;
+	int ret;
+	int i;
+
+	root = root->fs_info->chunk_root;
+	extent_root = root->fs_info->extent_root;
+	em_tree = &root->fs_info->mapping_tree.map_tree;
+
+	/* step one, relocate all the extents inside this chunk */
+	ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
+	BUG_ON(ret);
+
+	trans = btrfs_start_transaction(root, 1);
+	BUG_ON(!trans);
+
+	/*
+	 * step two, delete the device extents and the
+	 * chunk tree entries
+	 */
+	spin_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
+	spin_unlock(&em_tree->lock);
+
+	BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset);
+	map = (struct map_lookup *)em->bdev;
+
+	for (i = 0; i < map->num_stripes; i++) {
+		ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
+					    map->stripes[i].physical);
+		BUG_ON(ret);
+	}
+	ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
+			       chunk_offset);
+
+	BUG_ON(ret);
+
+	if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
+		ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
+		BUG_ON(ret);
+		goto out;
+	}
+
+
+
+	spin_lock(&em_tree->lock);
+	remove_extent_mapping(em_tree, em);
+	kfree(map);
+	em->bdev = NULL;
+
+	/* once for the tree */
+	free_extent_map(em);
+	spin_unlock(&em_tree->lock);
+
+out:
+	/* once for us */
+	free_extent_map(em);
+
+	btrfs_end_transaction(trans, root);
+	return 0;
+}
+
+/*
+ * shrinking a device means finding all of the device extents past
+ * the new size, and then following the back refs to the chunks.
+ * The chunk relocation code actually frees the device extent
+ */
+int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root = device->dev_root;
+	struct btrfs_dev_extent *dev_extent = NULL;
+	struct btrfs_path *path;
+	u64 length;
+	u64 chunk_tree;
+	u64 chunk_objectid;
+	u64 chunk_offset;
+	int ret;
+	int slot;
+	struct extent_buffer *l;
+	struct btrfs_key key;
+	struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+	u64 old_total = btrfs_super_total_bytes(super_copy);
+	u64 diff = device->total_bytes - new_size;
+
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	trans = btrfs_start_transaction(root, 1);
+	if (!trans) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	path->reada = 2;
+
+	device->total_bytes = new_size;
+	ret = btrfs_update_device(trans, device);
+	if (ret) {
+		btrfs_end_transaction(trans, root);
+		goto done;
+	}
+	WARN_ON(diff > old_total);
+	btrfs_set_super_total_bytes(super_copy, old_total - diff);
+	btrfs_end_transaction(trans, root);
+
+	key.objectid = device->devid;
+	key.offset = (u64)-1;
+	key.type = BTRFS_DEV_EXTENT_KEY;
+
+	while (1) {
+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+		if (ret < 0)
+			goto done;
+
+		ret = btrfs_previous_item(root, path, 0, key.type);
+		if (ret < 0)
+			goto done;
+		if (ret) {
+			ret = 0;
+			goto done;
+		}
+
+		l = path->nodes[0];
+		slot = path->slots[0];
+		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
+
+		if (key.objectid != device->devid)
+			goto done;
+
+		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+		length = btrfs_dev_extent_length(l, dev_extent);
+
+		if (key.offset + length <= new_size)
+			goto done;
+
+		chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
+		chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
+		chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
+		btrfs_release_path(root, path);
+
+		ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
+					   chunk_offset);
+		if (ret)
+			goto done;
+	}
+
+done:
+	btrfs_free_path(path);
+	return ret;
+}
+
 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_key *key,
@@ -658,6 +938,7 @@
 	u64 dev_offset;
 	struct btrfs_fs_info *info = extent_root->fs_info;
 	struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
+	struct btrfs_path *path;
 	struct btrfs_stripe *stripes;
 	struct btrfs_device *device = NULL;
 	struct btrfs_chunk *chunk;
@@ -724,6 +1005,10 @@
 		min_stripe_size = 1 * 1024 * 1024;
 	}
 
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
 	/* we don't want a chunk larger than 10% of the FS */
 	percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
 	max_chunk_size = min(percent_max, max_chunk_size);
@@ -759,11 +1044,19 @@
 
 		avail = device->total_bytes - device->bytes_used;
 		cur = cur->next;
+
 		if (avail >= min_free) {
-			list_move_tail(&device->dev_alloc_list, &private_devs);
-			index++;
-			if (type & BTRFS_BLOCK_GROUP_DUP)
+			u64 ignored_start = 0;
+			ret = find_free_dev_extent(trans, device, path,
+						   min_free,
+						   &ignored_start);
+			if (ret == 0) {
+				list_move_tail(&device->dev_alloc_list,
+					       &private_devs);
 				index++;
+				if (type & BTRFS_BLOCK_GROUP_DUP)
+					index++;
+			}
 		} else if (avail > max_avail)
 			max_avail = avail;
 		if (cur == dev_list)
@@ -785,30 +1078,37 @@
 			calc_size = max_avail;
 			goto again;
 		}
+		btrfs_free_path(path);
 		return -ENOSPC;
 	}
 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
 	key.type = BTRFS_CHUNK_ITEM_KEY;
 	ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
 			      &key.offset);
-	if (ret)
+	if (ret) {
+		btrfs_free_path(path);
 		return ret;
+	}
 
 	chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
-	if (!chunk)
+	if (!chunk) {
+		btrfs_free_path(path);
 		return -ENOMEM;
+	}
 
 	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
 	if (!map) {
 		kfree(chunk);
+		btrfs_free_path(path);
 		return -ENOMEM;
 	}
+	btrfs_free_path(path);
+	path = NULL;
 
 	stripes = &chunk->stripe;
 	*num_bytes = chunk_bytes_by_type(type, calc_size,
 					 num_stripes, sub_stripes);
 
-
 	index = 0;
 printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes);
 	while(index < num_stripes) {
@@ -874,6 +1174,11 @@
 	em->len = *num_bytes;
 	em->block_start = 0;
 
+	if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+		ret = btrfs_add_system_chunk(trans, chunk_root, &key,
+				    chunk, btrfs_chunk_item_size(num_stripes));
+		BUG_ON(ret);
+	}
 	kfree(chunk);
 
 	em_tree = &extent_root->fs_info->mapping_tree.map_tree;
@@ -1376,11 +1681,6 @@
 
 	array_size = btrfs_super_sys_array_size(super_copy);
 
-	/*
-	 * we do this loop twice, once for the device items and
-	 * once for all of the chunks.  This way there are device
-	 * structs filled in for every chunk
-	 */
 	ptr = super_copy->sys_chunk_array;
 	sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
 	cur = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index a2660d2..6fe8440 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -128,4 +128,9 @@
 int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
 int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
 		      u64 logical, struct page *page);
+int btrfs_grow_device(struct btrfs_trans_handle *trans,
+		      struct btrfs_device *device, u64 new_size);
+struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
+				       u8 *uuid);
+int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 #endif