Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: avoid delayed metadata items during commits
  btrfs: fix uninitialized return value
  btrfs: fix wrong reservation when doing delayed inode operations
  btrfs: Remove unused sysfs code
  btrfs: fix dereference of ERR_PTR value
  Btrfs: fix relocation races
  Btrfs: set no_trans_join after trying to expand the transaction
  Btrfs: protect the pending_snapshots list with trans_lock
  Btrfs: fix path leakage on subvol deletion
  Btrfs: drop the delalloc_bytes check in shrink_delalloc
  Btrfs: check the return value from set_anon_super
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4..3006287 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -967,6 +967,12 @@
 	struct srcu_struct subvol_srcu;
 
 	spinlock_t trans_lock;
+	/*
+	 * the reloc mutex goes with the trans lock, it is taken
+	 * during commit to protect us from the relocation code
+	 */
+	struct mutex reloc_mutex;
+
 	struct list_head trans_list;
 	struct list_head hashers;
 	struct list_head dead_roots;
@@ -1172,6 +1178,14 @@
 	u32 type;
 
 	u64 highest_objectid;
+
+	/* btrfs_record_root_in_trans is a multi-step process,
+	 * and it can race with the balancing code.   But the
+	 * race is very small, and only the first time the root
+	 * is added to each transaction.  So in_trans_setup
+	 * is used to tell us when more checks are required
+	 */
+	unsigned long in_trans_setup;
 	int ref_cows;
 	int track_dirty;
 	int in_radix;
@@ -1181,7 +1195,6 @@
 	struct btrfs_key defrag_max;
 	int defrag_running;
 	char *name;
-	int in_sysfs;
 
 	/* the dirty list is only used by non-reference counted roots */
 	struct list_head dirty_list;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29..f1cbd028 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -297,7 +297,6 @@
 		item->data_len = data_len;
 		item->ins_or_del = 0;
 		item->bytes_reserved = 0;
-		item->block_rsv = NULL;
 		item->delayed_node = NULL;
 		atomic_set(&item->refs, 1);
 	}
@@ -593,10 +592,8 @@
 
 	num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 	ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
-	if (!ret) {
+	if (!ret)
 		item->bytes_reserved = num_bytes;
-		item->block_rsv = dst_rsv;
-	}
 
 	return ret;
 }
@@ -604,10 +601,13 @@
 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 						struct btrfs_delayed_item *item)
 {
+	struct btrfs_block_rsv *rsv;
+
 	if (!item->bytes_reserved)
 		return;
 
-	btrfs_block_rsv_release(root, item->block_rsv,
+	rsv = &root->fs_info->global_block_rsv;
+	btrfs_block_rsv_release(root, rsv,
 				item->bytes_reserved);
 }
 
@@ -1014,6 +1014,7 @@
 	struct btrfs_delayed_root *delayed_root;
 	struct btrfs_delayed_node *curr_node, *prev_node;
 	struct btrfs_path *path;
+	struct btrfs_block_rsv *block_rsv;
 	int ret = 0;
 
 	path = btrfs_alloc_path();
@@ -1021,6 +1022,9 @@
 		return -ENOMEM;
 	path->leave_spinning = 1;
 
+	block_rsv = trans->block_rsv;
+	trans->block_rsv = &root->fs_info->global_block_rsv;
+
 	delayed_root = btrfs_get_delayed_root(root);
 
 	curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1049,7 @@
 	}
 
 	btrfs_free_path(path);
+	trans->block_rsv = block_rsv;
 	return ret;
 }
 
@@ -1052,6 +1057,7 @@
 					      struct btrfs_delayed_node *node)
 {
 	struct btrfs_path *path;
+	struct btrfs_block_rsv *block_rsv;
 	int ret;
 
 	path = btrfs_alloc_path();
@@ -1059,6 +1065,9 @@
 		return -ENOMEM;
 	path->leave_spinning = 1;
 
+	block_rsv = trans->block_rsv;
+	trans->block_rsv = &node->root->fs_info->global_block_rsv;
+
 	ret = btrfs_insert_delayed_items(trans, path, node->root, node);
 	if (!ret)
 		ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1075,7 @@
 		ret = btrfs_update_delayed_inode(trans, node->root, path, node);
 	btrfs_free_path(path);
 
+	trans->block_rsv = block_rsv;
 	return ret;
 }
 
@@ -1116,6 +1126,7 @@
 	struct btrfs_path *path;
 	struct btrfs_delayed_node *delayed_node = NULL;
 	struct btrfs_root *root;
+	struct btrfs_block_rsv *block_rsv;
 	unsigned long nr = 0;
 	int need_requeue = 0;
 	int ret;
@@ -1134,6 +1145,9 @@
 	if (IS_ERR(trans))
 		goto free_path;
 
+	block_rsv = trans->block_rsv;
+	trans->block_rsv = &root->fs_info->global_block_rsv;
+
 	ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
 	if (!ret)
 		ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1190,7 @@
 
 	nr = trans->blocks_used;
 
+	trans->block_rsv = block_rsv;
 	btrfs_end_transaction_dmeta(trans, root);
 	__btrfs_btree_balance_dirty(root, nr);
 free_path:
@@ -1222,6 +1237,13 @@
 	return 0;
 }
 
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+	struct btrfs_delayed_root *delayed_root;
+	delayed_root = btrfs_get_delayed_root(root);
+	WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
 	struct btrfs_delayed_root *delayed_root;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240..d1a6a29 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@
 	struct list_head tree_list;	/* used for batch insert/delete items */
 	struct list_head readdir_list;	/* used for readdir items */
 	u64 bytes_reserved;
-	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_delayed_node *delayed_node;
 	atomic_t refs;
 	int ins_or_del;
@@ -138,4 +137,8 @@
 /* for init */
 int __init btrfs_delayed_inode_init(void);
 void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
 #endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9f68c68..1ac8db5d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@
 	root->last_trans = 0;
 	root->highest_objectid = 0;
 	root->name = NULL;
-	root->in_sysfs = 0;
 	root->inode_tree = RB_ROOT;
 	INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
 	root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@
 		return root;
 
 	root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
-	if (!root->free_ino_ctl)
-		goto fail;
 	root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
 					GFP_NOFS);
-	if (!root->free_ino_pinned)
+	if (!root->free_ino_pinned || !root->free_ino_ctl) {
+		ret = -ENOMEM;
 		goto fail;
+	}
 
 	btrfs_init_free_ino_ctl(root);
 	mutex_init(&root->fs_commit_mutex);
 	spin_lock_init(&root->cache_lock);
 	init_waitqueue_head(&root->cache_wait);
 
-	set_anon_super(&root->anon_super, NULL);
+	ret = set_anon_super(&root->anon_super, NULL);
+	if (ret)
+		goto fail;
 
 	if (btrfs_root_refs(&root->root_item) == 0) {
 		ret = -ENOENT;
@@ -1618,6 +1619,7 @@
 	spin_lock_init(&fs_info->fs_roots_radix_lock);
 	spin_lock_init(&fs_info->delayed_iput_lock);
 	spin_lock_init(&fs_info->defrag_inodes_lock);
+	mutex_init(&fs_info->reloc_mutex);
 
 	init_completion(&fs_info->kobj_unregister);
 	fs_info->tree_root = tree_root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2..1f61bf5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3314,10 +3314,6 @@
 	if (reserved == 0)
 		return 0;
 
-	/* nothing to shrink - nothing to reclaim */
-	if (root->fs_info->delalloc_bytes == 0)
-		return 0;
-
 	max_reclaim = min(reserved, to_reclaim);
 
 	while (loops < 1024) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 751ddf8..0a9b10c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3076,6 +3076,7 @@
 	ret = btrfs_update_inode(trans, root, dir);
 	BUG_ON(ret);
 
+	btrfs_free_path(path);
 	return 0;
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b793d11..a3c4751 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@
 	ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
 	BUG_ON(ret);
 
+	spin_lock(&root->fs_info->trans_lock);
 	list_add(&pending_snapshot->list,
 		 &trans->transaction->pending_snapshots);
+	spin_unlock(&root->fs_info->trans_lock);
 	if (async_transid) {
 		*async_transid = trans->transid;
 		ret = btrfs_commit_transaction_async(trans,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27c..5e0a3dc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@
 	int ret;
 
 	if (!root->reloc_root)
-		return 0;
+		goto out;
 
 	reloc_root = root->reloc_root;
 	root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@
 	ret = btrfs_update_root(trans, root->fs_info->tree_root,
 				&reloc_root->root_key, root_item);
 	BUG_ON(ret);
+
+out:
 	return 0;
 }
 
@@ -2142,10 +2144,11 @@
 	u64 num_bytes = 0;
 	int ret;
 
-	spin_lock(&root->fs_info->trans_lock);
+	mutex_lock(&root->fs_info->reloc_mutex);
 	rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
 	rc->merging_rsv_size += rc->nodes_relocated * 2;
-	spin_unlock(&root->fs_info->trans_lock);
+	mutex_unlock(&root->fs_info->reloc_mutex);
+
 again:
 	if (!err) {
 		num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@
 	int ret;
 again:
 	root = rc->extent_root;
-	spin_lock(&root->fs_info->trans_lock);
+
+	/*
+	 * this serializes us with btrfs_record_root_in_transaction,
+	 * we have to make sure nobody is in the middle of
+	 * adding their roots to the list while we are
+	 * doing this splice
+	 */
+	mutex_lock(&root->fs_info->reloc_mutex);
 	list_splice_init(&rc->reloc_roots, &reloc_roots);
-	spin_unlock(&root->fs_info->trans_lock);
+	mutex_unlock(&root->fs_info->reloc_mutex);
 
 	while (!list_empty(&reloc_roots)) {
 		found = 1;
@@ -3590,17 +3600,19 @@
 static void set_reloc_control(struct reloc_control *rc)
 {
 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-	spin_lock(&fs_info->trans_lock);
+
+	mutex_lock(&fs_info->reloc_mutex);
 	fs_info->reloc_ctl = rc;
-	spin_unlock(&fs_info->trans_lock);
+	mutex_unlock(&fs_info->reloc_mutex);
 }
 
 static void unset_reloc_control(struct reloc_control *rc)
 {
 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-	spin_lock(&fs_info->trans_lock);
+
+	mutex_lock(&fs_info->reloc_mutex);
 	fs_info->reloc_ctl = NULL;
-	spin_unlock(&fs_info->trans_lock);
+	mutex_unlock(&fs_info->reloc_mutex);
 }
 
 static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223a..daac9ae 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_root_used(&root->root_item));
-}
-
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_root_limit(&root->root_item));
-}
-
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct btrfs_root *, char *);
-	ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
-							      show, store)
-
-ROOT_ATTR(blocks_used,	0444,	root_blocks_used_show,	NULL);
-ROOT_ATTR(block_limit,	0644,	root_block_limit_show,	NULL);
-
-static struct attribute *btrfs_root_attrs[] = {
-	&btrfs_root_attr_blocks_used.attr,
-	&btrfs_root_attr_block_limit.attr,
-	NULL,
-};
-
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct btrfs_fs_info *, char *);
-	ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
-								show, store)
-
-SUPER_ATTR(blocks_used,		0444,	super_blocks_used_show,		NULL);
-SUPER_ATTR(total_blocks,	0444,	super_total_blocks_show,	NULL);
-SUPER_ATTR(blocksize,		0444,	super_blocksize_show,		NULL);
-
-static struct attribute *btrfs_super_attrs[] = {
-	&btrfs_super_attr_blocks_used.attr,
-	&btrfs_super_attr_total_blocks.attr,
-	&btrfs_super_attr_blocksize.attr,
-	NULL,
-};
-
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
-				    struct attribute *attr, char *buf)
-{
-	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-						super_kobj);
-	struct btrfs_super_attr *a = container_of(attr,
-						  struct btrfs_super_attr,
-						  attr);
-
-	return a->show ? a->show(fs, buf) : 0;
-}
-
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
-				     struct attribute *attr,
-				     const char *buf, size_t len)
-{
-	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-						super_kobj);
-	struct btrfs_super_attr *a = container_of(attr,
-						  struct btrfs_super_attr,
-						  attr);
-
-	return a->store ? a->store(fs, buf, len) : 0;
-}
-
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
-				    struct attribute *attr, char *buf)
-{
-	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-						root_kobj);
-	struct btrfs_root_attr *a = container_of(attr,
-						 struct btrfs_root_attr,
-						 attr);
-
-	return a->show ? a->show(root, buf) : 0;
-}
-
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
-				     struct attribute *attr,
-				     const char *buf, size_t len)
-{
-	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-						root_kobj);
-	struct btrfs_root_attr *a = container_of(attr,
-						 struct btrfs_root_attr,
-						 attr);
-	return a->store ? a->store(root, buf, len) : 0;
-}
-
-static void btrfs_super_release(struct kobject *kobj)
-{
-	struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-						super_kobj);
-	complete(&fs->kobj_unregister);
-}
-
-static void btrfs_root_release(struct kobject *kobj)
-{
-	struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-						root_kobj);
-	complete(&root->kobj_unregister);
-}
-
-static const struct sysfs_ops btrfs_super_attr_ops = {
-	.show	= btrfs_super_attr_show,
-	.store	= btrfs_super_attr_store,
-};
-
-static const struct sysfs_ops btrfs_root_attr_ops = {
-	.show	= btrfs_root_attr_show,
-	.store	= btrfs_root_attr_store,
-};
-
 /* /sys/fs/btrfs/ entry */
 static struct kset *btrfs_kset;
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b..51dcec8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@
  * to make sure the old root from before we joined the transaction is deleted
  * when the transaction commits
  */
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
 {
 	if (root->ref_cows && root->last_trans < trans->transid) {
 		WARN_ON(root == root->fs_info->extent_root);
 		WARN_ON(root->commit_root != root->node);
 
+		/*
+		 * see below for in_trans_setup usage rules
+		 * we have the reloc mutex held now, so there
+		 * is only one writer in this function
+		 */
+		root->in_trans_setup = 1;
+
+		/* make sure readers find in_trans_setup before
+		 * they find our root->last_trans update
+		 */
+		smp_wmb();
+
 		spin_lock(&root->fs_info->fs_roots_radix_lock);
 		if (root->last_trans == trans->transid) {
 			spin_unlock(&root->fs_info->fs_roots_radix_lock);
 			return 0;
 		}
-		root->last_trans = trans->transid;
 		radix_tree_tag_set(&root->fs_info->fs_roots_radix,
 			   (unsigned long)root->root_key.objectid,
 			   BTRFS_ROOT_TRANS_TAG);
 		spin_unlock(&root->fs_info->fs_roots_radix_lock);
+		root->last_trans = trans->transid;
+
+		/* this is pretty tricky.  We don't want to
+		 * take the relocation lock in btrfs_record_root_in_trans
+		 * unless we're really doing the first setup for this root in
+		 * this transaction.
+		 *
+		 * Normally we'd use root->last_trans as a flag to decide
+		 * if we want to take the expensive mutex.
+		 *
+		 * But, we have to set root->last_trans before we
+		 * init the relocation root, otherwise, we trip over warnings
+		 * in ctree.c.  The solution used here is to flag ourselves
+		 * with root->in_trans_setup.  When this is 1, we're still
+		 * fixing up the reloc trees and everyone must wait.
+		 *
+		 * When this is zero, they can trust root->last_trans and fly
+		 * through btrfs_record_root_in_trans without having to take the
+		 * lock.  smp_wmb() makes sure that all the writes above are
+		 * done before we pop in the zero below
+		 */
 		btrfs_init_reloc_root(trans, root);
+		smp_wmb();
+		root->in_trans_setup = 0;
 	}
 	return 0;
 }
 
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *root)
+{
+	if (!root->ref_cows)
+		return 0;
+
+	/*
+	 * see record_root_in_trans for comments about in_trans_setup usage
+	 * and barriers
+	 */
+	smp_rmb();
+	if (root->last_trans == trans->transid &&
+	    !root->in_trans_setup)
+		return 0;
+
+	mutex_lock(&root->fs_info->reloc_mutex);
+	record_root_in_trans(trans, root);
+	mutex_unlock(&root->fs_info->reloc_mutex);
+
+	return 0;
+}
+
 /* wait for commit against the current transaction to become unblocked
  * when this is done, it is safe to start a new transaction, but the current
  * transaction might not be fully on disk.
@@ -882,7 +939,7 @@
 	parent = dget_parent(dentry);
 	parent_inode = parent->d_inode;
 	parent_root = BTRFS_I(parent_inode)->root;
-	btrfs_record_root_in_trans(trans, parent_root);
+	record_root_in_trans(trans, parent_root);
 
 	/*
 	 * insert the directory item
@@ -900,7 +957,16 @@
 	ret = btrfs_update_inode(trans, parent_root, parent_inode);
 	BUG_ON(ret);
 
-	btrfs_record_root_in_trans(trans, root);
+	/*
+	 * pull in the delayed directory update
+	 * and the delayed inode item
+	 * otherwise we corrupt the FS during
+	 * snapshot
+	 */
+	ret = btrfs_run_delayed_items(trans, root);
+	BUG_ON(ret);
+
+	record_root_in_trans(trans, root);
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
 	btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@
 	int ret;
 
 	list_for_each_entry(pending, head, list) {
-		/*
-		 * We must deal with the delayed items before creating
-		 * snapshots, or we will create a snapthot with inconsistent
-		 * information.
-		*/
-		ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
-		BUG_ON(ret);
-
 		ret = create_pending_snapshot(trans, fs_info, pending);
 		BUG_ON(ret);
 	}
@@ -1241,21 +1299,42 @@
 			schedule_timeout(1);
 
 		finish_wait(&cur_trans->writer_wait, &wait);
-		spin_lock(&root->fs_info->trans_lock);
-		root->fs_info->trans_no_join = 1;
-		spin_unlock(&root->fs_info->trans_lock);
 	} while (atomic_read(&cur_trans->num_writers) > 1 ||
 		 (should_grow && cur_trans->num_joined != joined));
 
-	ret = create_pending_snapshots(trans, root->fs_info);
-	BUG_ON(ret);
+	/*
+	 * Ok now we need to make sure to block out any other joins while we
+	 * commit the transaction.  We could have started a join before setting
+	 * no_join so make sure to wait for num_writers to == 1 again.
+	 */
+	spin_lock(&root->fs_info->trans_lock);
+	root->fs_info->trans_no_join = 1;
+	spin_unlock(&root->fs_info->trans_lock);
+	wait_event(cur_trans->writer_wait,
+		   atomic_read(&cur_trans->num_writers) == 1);
+
+	/*
+	 * the reloc mutex makes sure that we stop
+	 * the balancing code from coming in and moving
+	 * extents around in the middle of the commit
+	 */
+	mutex_lock(&root->fs_info->reloc_mutex);
 
 	ret = btrfs_run_delayed_items(trans, root);
 	BUG_ON(ret);
 
+	ret = create_pending_snapshots(trans, root->fs_info);
+	BUG_ON(ret);
+
 	ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 	BUG_ON(ret);
 
+	/*
+	 * make sure none of the code above managed to slip in a
+	 * delayed item
+	 */
+	btrfs_assert_delayed_root_empty(root);
+
 	WARN_ON(cur_trans != trans->transaction);
 
 	btrfs_scrub_pause(root);
@@ -1312,6 +1391,7 @@
 	root->fs_info->running_transaction = NULL;
 	root->fs_info->trans_no_join = 0;
 	spin_unlock(&root->fs_info->trans_lock);
+	mutex_unlock(&root->fs_info->reloc_mutex);
 
 	wake_up(&root->fs_info->transaction_wait);
 
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c..4ce8a9f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@
 		tmp_key.offset = (u64)-1;
 
 		wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
-		BUG_ON(!wc.replay_dest);
+		BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
 
 		wc.replay_dest->log_root = log;
 		btrfs_record_root_in_trans(trans, wc.replay_dest);