ceph: fix pending vmtruncate race
The locking order for pending vmtruncate is wrong, it can lead to
following race:
write wmtruncate work
------------------------ ----------------------
lock i_mutex
check i_truncate_pending check i_truncate_pending
truncate_inode_pages() lock i_mutex (blocked)
copy data to page cache
unlock i_mutex
truncate_inode_pages()
The fix is take i_mutex before calling __ceph_do_pending_vmtruncate()
Fixes: http://tracker.ceph.com/issues/5453
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Sage Weil <sage@inktank.com>
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index be0f7e2..4906ada 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1465,7 +1465,9 @@
struct inode *inode = &ci->vfs_inode;
dout("vmtruncate_work %p\n", inode);
- __ceph_do_pending_vmtruncate(inode, true);
+ mutex_lock(&inode->i_mutex);
+ __ceph_do_pending_vmtruncate(inode);
+ mutex_unlock(&inode->i_mutex);
iput(inode);
}
@@ -1492,7 +1494,7 @@
* Make sure any pending truncation is applied before doing anything
* that may depend on it.
*/
-void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock)
+void __ceph_do_pending_vmtruncate(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
u64 to;
@@ -1525,11 +1527,7 @@
ci->i_truncate_pending, to);
spin_unlock(&ci->i_ceph_lock);
- if (needlock)
- mutex_lock(&inode->i_mutex);
truncate_inode_pages(inode->i_mapping, to);
- if (needlock)
- mutex_unlock(&inode->i_mutex);
spin_lock(&ci->i_ceph_lock);
if (to == ci->i_truncate_size) {
@@ -1588,7 +1586,7 @@
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
- __ceph_do_pending_vmtruncate(inode, false);
+ __ceph_do_pending_vmtruncate(inode);
err = inode_change_ok(inode, attr);
if (err != 0)
@@ -1770,7 +1768,7 @@
ceph_cap_string(dirtied), mask);
ceph_mdsc_put_request(req);
- __ceph_do_pending_vmtruncate(inode, false);
+ __ceph_do_pending_vmtruncate(inode);
return err;
out:
spin_unlock(&ci->i_ceph_lock);