Btrfs: mount ro and remount support
This patch adds mount ro and remount support. The main
changes in patch are: adding btrfs_remount and related
helper function; splitting the transaction related code
out of close_ctree into btrfs_commit_super; updating
allocator to properly handle read only block group.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f575939..c4c6c12 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -541,6 +541,7 @@
u64 bytes_used;
u64 bytes_pinned;
u64 bytes_reserved;
+ u64 bytes_readonly;
int full;
int force_alloc;
struct list_head list;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3b0e974..c599f0e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1075,10 +1075,12 @@
kfree(root);
return ERR_PTR(ret);
}
- ret = btrfs_find_dead_roots(fs_info->tree_root,
- root->root_key.objectid, root);
- BUG_ON(ret);
-
+ if (!(fs_info->sb->s_flags & MS_RDONLY)) {
+ ret = btrfs_find_dead_roots(fs_info->tree_root,
+ root->root_key.objectid, root);
+ BUG_ON(ret);
+ btrfs_orphan_cleanup(root);
+ }
return root;
}
@@ -1700,7 +1702,8 @@
btrfs_read_block_groups(extent_root);
- fs_info->generation = btrfs_super_generation(disk_super) + 1;
+ fs_info->generation = generation + 1;
+ fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
@@ -1715,6 +1718,9 @@
if (!fs_info->transaction_kthread)
goto fail_cleaner;
+ if (sb->s_flags & MS_RDONLY)
+ return tree_root;
+
if (btrfs_super_log_root(disk_super) != 0) {
u32 blocksize;
u64 bytenr = btrfs_super_log_root(disk_super);
@@ -1735,7 +1741,6 @@
ret = btrfs_recover_log_trees(log_tree_root);
BUG_ON(ret);
}
- fs_info->last_trans_committed = btrfs_super_generation(disk_super);
ret = btrfs_cleanup_reloc_trees(tree_root);
BUG_ON(ret);
@@ -1955,11 +1960,56 @@
return 0;
}
+int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
+{
+ u64 root_objectid = 0;
+ struct btrfs_root *gang[8];
+ int i;
+ int ret;
+
+ while (1) {
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, root_objectid,
+ ARRAY_SIZE(gang));
+ if (!ret)
+ break;
+ for (i = 0; i < ret; i++) {
+ root_objectid = gang[i]->root_key.objectid;
+ ret = btrfs_find_dead_roots(fs_info->tree_root,
+ root_objectid, gang[i]);
+ BUG_ON(ret);
+ btrfs_orphan_cleanup(gang[i]);
+ }
+ root_objectid++;
+ }
+ return 0;
+}
+
+int btrfs_commit_super(struct btrfs_root *root)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ mutex_lock(&root->fs_info->cleaner_mutex);
+ btrfs_clean_old_snapshots(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+ trans = btrfs_start_transaction(root, 1);
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ /* run commit again to drop the original snapshot */
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_commit_transaction(trans, root);
+ ret = btrfs_write_and_wait_transaction(NULL, root);
+ BUG_ON(ret);
+
+ ret = write_ctree_super(NULL, root);
+ return ret;
+}
+
int close_ctree(struct btrfs_root *root)
{
- int ret;
- struct btrfs_trans_handle *trans;
struct btrfs_fs_info *fs_info = root->fs_info;
+ int ret;
fs_info->closing = 1;
smp_mb();
@@ -1967,16 +2017,12 @@
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);
- btrfs_clean_old_snapshots(root);
- trans = btrfs_start_transaction(root, 1);
- ret = btrfs_commit_transaction(trans, root);
- /* run commit again to drop the original snapshot */
- trans = btrfs_start_transaction(root, 1);
- btrfs_commit_transaction(trans, root);
- ret = btrfs_write_and_wait_transaction(NULL, root);
- BUG_ON(ret);
-
- write_ctree_super(NULL, root);
+ if (!(fs_info->sb->s_flags & MS_RDONLY)) {
+ ret = btrfs_commit_super(root);
+ if (ret) {
+ printk("btrfs: commit super returns %d\n", ret);
+ }
+ }
if (fs_info->delalloc_bytes) {
printk("btrfs: at unmount delalloc count %Lu\n",
@@ -2000,12 +2046,10 @@
free_extent_buffer(root->fs_info->dev_root->node);
btrfs_free_block_groups(root->fs_info);
- fs_info->closing = 2;
+
del_fs_roots(fs_info);
- filemap_write_and_wait(fs_info->btree_inode->i_mapping);
-
- truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
+ iput(fs_info->btree_inode);
btrfs_stop_workers(&fs_info->fixup_workers);
btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -2014,7 +2058,6 @@
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
- iput(fs_info->btree_inode);
#if 0
while(!list_empty(&fs_info->hashers)) {
struct btrfs_hasher *hasher;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b8d5948..717e948 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -38,6 +38,7 @@
int close_ctree(struct btrfs_root *root);
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
@@ -49,6 +50,7 @@
struct btrfs_key *location);
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location);
+int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
int btrfs_insert_dev_radix(struct btrfs_root *root,
struct block_device *bdev,
u64 device_id,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e785f0a..af2de30 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1794,7 +1794,7 @@
*space_info = found;
return 0;
}
- found = kmalloc(sizeof(*found), GFP_NOFS);
+ found = kzalloc(sizeof(*found), GFP_NOFS);
if (!found)
return -ENOMEM;
@@ -1807,6 +1807,7 @@
found->bytes_used = bytes_used;
found->bytes_pinned = 0;
found->bytes_reserved = 0;
+ found->bytes_readonly = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
@@ -1829,6 +1830,19 @@
}
}
+static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
+{
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ if (!cache->ro) {
+ cache->space_info->bytes_readonly += cache->key.offset -
+ btrfs_block_group_used(&cache->item);
+ cache->ro = 1;
+ }
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
+}
+
static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
u64 num_devices = root->fs_info->fs_devices->num_devices;
@@ -1865,7 +1879,9 @@
u64 thresh;
u64 start;
u64 num_bytes;
- int ret = 0, waited = 0;
+ int ret = 0;
+
+ mutex_lock(&extent_root->fs_info->chunk_mutex);
flags = reduce_alloc_profile(extent_root, flags);
@@ -1887,46 +1903,28 @@
goto out;
}
- thresh = div_factor(space_info->total_bytes, 6);
+ thresh = space_info->total_bytes - space_info->bytes_readonly;
+ thresh = div_factor(thresh, 6);
if (!force &&
(space_info->bytes_used + space_info->bytes_pinned +
space_info->bytes_reserved + alloc_bytes) < thresh) {
spin_unlock(&space_info->lock);
goto out;
}
-
spin_unlock(&space_info->lock);
- ret = mutex_trylock(&extent_root->fs_info->chunk_mutex);
- if (!ret && !force) {
- goto out;
- } else if (!ret) {
- mutex_lock(&extent_root->fs_info->chunk_mutex);
- waited = 1;
- }
-
- if (waited) {
- spin_lock(&space_info->lock);
- if (space_info->full) {
- spin_unlock(&space_info->lock);
- goto out_unlock;
- }
- spin_unlock(&space_info->lock);
- }
-
ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
if (ret) {
printk("space info full %Lu\n", flags);
space_info->full = 1;
- goto out_unlock;
+ goto out;
}
ret = btrfs_make_block_group(trans, extent_root, 0, flags,
BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
BUG_ON(ret);
-out_unlock:
- mutex_unlock(&extent_root->fs_info->chunk_mutex);
out:
+ mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret;
}
@@ -1956,12 +1954,18 @@
if (alloc) {
old_val += num_bytes;
cache->space_info->bytes_used += num_bytes;
+ if (cache->ro) {
+ cache->space_info->bytes_readonly -= num_bytes;
+ WARN_ON(1);
+ }
btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
} else {
old_val -= num_bytes;
cache->space_info->bytes_used -= num_bytes;
+ if (cache->ro)
+ cache->space_info->bytes_readonly += num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -5560,8 +5564,7 @@
BUG_ON(IS_ERR(reloc_inode));
__alloc_chunk_for_shrink(root, block_group, 1);
- block_group->ro = 1;
- block_group->space_info->total_bytes -= block_group->key.offset;
+ set_block_group_readonly(block_group);
btrfs_start_delalloc_inodes(info->tree_root);
btrfs_wait_ordered_extents(info->tree_root, 0);
@@ -5868,6 +5871,7 @@
block_group = btrfs_lookup_block_group(root->fs_info, group_start);
BUG_ON(!block_group);
+ BUG_ON(!block_group->ro);
memcpy(&key, &block_group->key, sizeof(key));
@@ -5881,6 +5885,11 @@
list_del(&block_group->list);
up_write(&block_group->space_info->groups_sem);
+ spin_lock(&block_group->space_info->lock);
+ block_group->space_info->total_bytes -= block_group->key.offset;
+ block_group->space_info->bytes_readonly -= block_group->key.offset;
+ spin_unlock(&block_group->space_info->lock);
+
/*
memset(shrink_block_group, 0, sizeof(*shrink_block_group));
kfree(shrink_block_group);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2ed2dea..3e3620e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1808,10 +1808,6 @@
struct inode *inode;
int ret = 0, nr_unlink = 0, nr_truncate = 0;
- /* don't do orphan cleanup if the fs is readonly. */
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return;
-
path = btrfs_alloc_path();
if (!path)
return;
@@ -3050,7 +3046,7 @@
struct btrfs_root *root = bi->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
- int ret, new, do_orphan = 0;
+ int ret, new;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -3076,13 +3072,9 @@
if (new && root != sub_root) {
igrab(inode);
sub_root->inode = inode;
- do_orphan = 1;
}
}
- if (unlikely(do_orphan))
- btrfs_orphan_cleanup(sub_root);
-
return d_splice_alias(inode, dentry);
}
@@ -3237,7 +3229,7 @@
struct btrfs_trans_handle *trans;
int ret = 0;
- if (root->fs_info->closing > 1)
+ if (root->fs_info->btree_inode == inode)
return 0;
if (wait) {
@@ -4625,6 +4617,9 @@
struct inode *inode;
unsigned long flags;
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
while(!list_empty(head)) {
binode = list_entry(head->next, struct btrfs_inode,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4d7cc7c..52863ce 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -378,6 +378,9 @@
int namelen;
int mod = 0;
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
if (!vol_args)
@@ -478,6 +481,9 @@
int namelen;
int ret;
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
if (!vol_args)
@@ -534,6 +540,11 @@
{
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ ret = mnt_want_write(file->f_path.mnt);
+ if (ret)
+ return ret;
switch (inode->i_mode & S_IFMT) {
case S_IFDIR:
@@ -575,6 +586,9 @@
struct btrfs_ioctl_vol_args *vol_args;
int ret;
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
if (!vol_args)
@@ -621,6 +635,10 @@
* they don't overlap)?
*/
+ ret = mnt_want_write(file->f_path.mnt);
+ if (ret)
+ return ret;
+
src_file = fget(srcfd);
if (!src_file)
return -EBADF;
@@ -958,6 +976,10 @@
goto out;
}
+ ret = mnt_want_write(file->f_path.mnt);
+ if (ret)
+ goto out;
+
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans++;
mutex_unlock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index ab9d5e8..04a3bf8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -370,6 +370,9 @@
int ret;
root = btrfs_sb(sb);
+ if (sb->s_flags & MS_RDONLY)
+ return 0;
+
sb->s_dirt = 0;
if (!wait) {
filemap_flush(root->fs_info->btree_inode->i_mapping);
@@ -438,7 +441,7 @@
up_write(&s->s_umount);
deactivate_super(s);
error = -EBUSY;
- goto error_bdev;
+ goto error_close_devices;
}
} else {
@@ -487,7 +490,7 @@
error_s:
error = PTR_ERR(s);
-error_bdev:
+error_close_devices:
btrfs_close_devices(fs_devices);
error_free_subvol_name:
kfree(subvol_name);
@@ -495,6 +498,35 @@
return error;
}
+static int btrfs_remount(struct super_block *sb, int *flags, char *data)
+{
+ struct btrfs_root *root = btrfs_sb(sb);
+ int ret;
+
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ return 0;
+
+ if (*flags & MS_RDONLY) {
+ sb->s_flags |= MS_RDONLY;
+
+ ret = btrfs_commit_super(root);
+ WARN_ON(ret);
+ } else {
+ if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
+ return -EINVAL;
+
+ ret = btrfs_cleanup_reloc_trees(root);
+ WARN_ON(ret);
+
+ ret = btrfs_cleanup_fs_roots(root->fs_info);
+ WARN_ON(ret);
+
+ sb->s_flags &= ~MS_RDONLY;
+ }
+
+ return 0;
+}
+
static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -582,6 +614,7 @@
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
+ .remount_fs = btrfs_remount,
.write_super_lockfs = btrfs_write_super_lockfs,
.unlockfs = btrfs_unlockfs,
};