ceph: fix pending vmtruncate race The locking order for pending vmtruncate is wrong, it can lead to following race: write wmtruncate work ------------------------ ---------------------- lock i_mutex check i_truncate_pending check i_truncate_pending truncate_inode_pages() lock i_mutex (blocked) copy data to page cache unlock i_mutex truncate_inode_pages() The fix is take i_mutex before calling __ceph_do_pending_vmtruncate() Fixes: http://tracker.ceph.com/issues/5453 Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Sage Weil <sage@inktank.com>

commit: b415bf4f9fe25f39934f5c464125e4a2dffb6d08 [log] [tgz]
author: Yan, Zheng <zheng.z.yan@intel.com> Tue Jul 02 12:40:19 2013 +0800
committer: Sage Weil <sage@inktank.com> Wed Jul 03 15:32:56 2013 -0700
tree: f38df9c1746a1419e7942afb4534625175a5d353
parent: 5446429630257f4723829409337a26c076907d5d [diff] [blame]
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index be0f7e2..4906ada 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -1465,7 +1465,9 @@
 	struct inode *inode = &ci->vfs_inode;
 
 	dout("vmtruncate_work %p\n", inode);
-	__ceph_do_pending_vmtruncate(inode, true);
+	mutex_lock(&inode->i_mutex);
+	__ceph_do_pending_vmtruncate(inode);
+	mutex_unlock(&inode->i_mutex);
 	iput(inode);
 }
 
@@ -1492,7 +1494,7 @@
  * Make sure any pending truncation is applied before doing anything
  * that may depend on it.
  */
-void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock)
+void __ceph_do_pending_vmtruncate(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 to;
@@ -1525,11 +1527,7 @@
 	     ci->i_truncate_pending, to);
 	spin_unlock(&ci->i_ceph_lock);
 
-	if (needlock)
-		mutex_lock(&inode->i_mutex);
 	truncate_inode_pages(inode->i_mapping, to);
-	if (needlock)
-		mutex_unlock(&inode->i_mutex);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (to == ci->i_truncate_size) {
@@ -1588,7 +1586,7 @@
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
-	__ceph_do_pending_vmtruncate(inode, false);
+	__ceph_do_pending_vmtruncate(inode);
 
 	err = inode_change_ok(inode, attr);
 	if (err != 0)
@@ -1770,7 +1768,7 @@
 	     ceph_cap_string(dirtied), mask);
 
 	ceph_mdsc_put_request(req);
-	__ceph_do_pending_vmtruncate(inode, false);
+	__ceph_do_pending_vmtruncate(inode);
 	return err;
 out:
 	spin_unlock(&ci->i_ceph_lock);
commit	b415bf4f9fe25f39934f5c464125e4a2dffb6d08	[log] [tgz]
author	Yan, Zheng <zheng.z.yan@intel.com>	Tue Jul 02 12:40:19 2013 +0800
committer	Sage Weil <sage@inktank.com>	Wed Jul 03 15:32:56 2013 -0700
tree	f38df9c1746a1419e7942afb4534625175a5d353
parent	5446429630257f4723829409337a26c076907d5d [diff] [blame]