fs: kill i_alloc_sem i_alloc_sem is a rather special rw_semaphore. It's the last one that may be released by a non-owner, and it's write side is always mirrored by real exclusion. It's intended use it to wait for all pending direct I/O requests to finish before starting a truncate. Replace it with a hand-grown construct: - exclusion for truncates is already guaranteed by i_mutex, so it can simply fall way - the reader side is replaced by an i_dio_count member in struct inode that counts the number of pending direct I/O requests. Truncate can't proceed as long as it's non-zero - when i_dio_count reaches non-zero we wake up a pending truncate using wake_up_bit on a new bit in i_flags - new references to i_dio_count can't appear while we are waiting for it to read zero because the direct I/O count always needs i_mutex (or an equivalent like XFS's i_iolock) for starting a new operation. This scheme is much simpler, and saves the space of a spinlock_t and a struct list_head in struct inode (typically 160 bits on a non-debug 64-bit system). Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

commit: bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3 [log] [tgz]
author: Christoph Hellwig <hch@infradead.org> Fri Jun 24 14:29:43 2011 -0400
committer: Al Viro <viro@zeniv.linux.org.uk> Wed Jul 20 20:47:46 2011 -0400
tree: ef5341c7747f809aec7ae233f6e3ef90af39be5f
parent: f9b5570d7fdedff32a2e78102bfb54cd1b12b289 [diff] [blame]
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1406c37..2c3a465 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c

@@ -2236,9 +2236,9 @@
 	ocfs2_iocb_clear_sem_locked(iocb);
 
 relock:
-	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
+	/* to match setattr's i_mutex -> rw_lock ordering */
 	if (direct_io) {
-		down_read(&inode->i_alloc_sem);
+		atomic_inc(&inode->i_dio_count);
 		have_alloc_sem = 1;
 		/* communicate with ocfs2_dio_end_io */
 		ocfs2_iocb_set_sem_locked(iocb);
@@ -2290,7 +2290,7 @@
 	 */
 	if (direct_io && !can_do_direct) {
 		ocfs2_rw_unlock(inode, rw_level);
-		up_read(&inode->i_alloc_sem);
+		inode_dio_done(inode);
 
 		have_alloc_sem = 0;
 		rw_level = -1;
@@ -2361,8 +2361,7 @@
 	/*
 	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
 	 * function pointer which is called when o_direct io completes so that
-	 * it can unlock our rw lock.  (it's the clustered equivalent of
-	 * i_alloc_sem; protects truncate from racing with pending ios).
+	 * it can unlock our rw lock.
 	 * Unfortunately there are error cases which call end_io and others
 	 * that don't.  so we don't have to unlock the rw_lock if either an
 	 * async dio is going to do it in the future or an end_io after an
@@ -2379,7 +2378,7 @@
 
 out_sems:
 	if (have_alloc_sem) {
-		up_read(&inode->i_alloc_sem);
+		inode_dio_done(inode);
 		ocfs2_iocb_clear_sem_locked(iocb);
 	}
 
@@ -2531,8 +2530,8 @@
 	 * need locks to protect pending reads from racing with truncate.
 	 */
 	if (filp->f_flags & O_DIRECT) {
-		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
+		atomic_inc(&inode->i_dio_count);
 		ocfs2_iocb_set_sem_locked(iocb);
 
 		ret = ocfs2_rw_lock(inode, 0);
@@ -2575,7 +2574,7 @@
 
 bail:
 	if (have_alloc_sem) {
-		up_read(&inode->i_alloc_sem);
+		inode_dio_done(inode);
 		ocfs2_iocb_clear_sem_locked(iocb);
 	}
 	if (rw_level != -1)
commit	bd5fe6c5eb9c548d7f07fe8f89a150bb6705e8e3	[log] [tgz]
author	Christoph Hellwig <hch@infradead.org>	Fri Jun 24 14:29:43 2011 -0400
committer	Al Viro <viro@zeniv.linux.org.uk>	Wed Jul 20 20:47:46 2011 -0400
tree	ef5341c7747f809aec7ae233f6e3ef90af39be5f
parent	f9b5570d7fdedff32a2e78102bfb54cd1b12b289 [diff] [blame]