NFSv4: Fix a race between open() and close()

 We must not remove the nfs4_state structure from the inode open lists
 before we are in sequence lock.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 78a53f5..5396902 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -214,7 +214,7 @@
 extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 
@@ -248,6 +248,7 @@
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
+extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 933e13b..02fddd0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -217,13 +217,12 @@
 	/* Protect against nfs4_find_state() */
 	spin_lock(&state->owner->so_lock);
 	spin_lock(&inode->i_lock);
-	state->state |= open_flags;
-	/* NB! List reordering - see the reclaim code for why.  */
-	if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++)
-		list_move(&state->open_states, &state->owner->so_states);
+	memcpy(&state->stateid, stateid, sizeof(state->stateid));
+	if ((open_flags & FMODE_WRITE))
+		state->nwriters++;
 	if (open_flags & FMODE_READ)
 		state->nreaders++;
-	memcpy(&state->stateid, stateid, sizeof(state->stateid));
+	nfs4_state_set_mode_locked(state, state->state | open_flags);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
 }
@@ -896,7 +895,6 @@
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			state->state = calldata->arg.open_flags;
 			nfs4_schedule_state_recovery(server->nfs4_state);
 			break;
 		default:
@@ -906,7 +904,6 @@
 			}
 	}
 	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
-	state->state = calldata->arg.open_flags;
 	nfs4_free_closedata(calldata);
 }
 
@@ -920,24 +917,26 @@
 		.rpc_resp = &calldata->res,
 		.rpc_cred = state->owner->so_cred,
 	};
-	int mode = 0;
+	int mode = 0, old_mode;
 	int status;
 
 	status = nfs_wait_on_sequence(calldata->arg.seqid, task);
 	if (status != 0)
 		return;
-	/* Don't reorder reads */
-	smp_rmb();
 	/* Recalculate the new open mode in case someone reopened the file
 	 * while we were waiting in line to be scheduled.
 	 */
-	if (state->nreaders != 0)
-		mode |= FMODE_READ;
-	if (state->nwriters != 0)
-		mode |= FMODE_WRITE;
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		state->state = mode;
-	if (mode == state->state) {
+	spin_lock(&state->owner->so_lock);
+	spin_lock(&calldata->inode->i_lock);
+	mode = old_mode = state->state;
+	if (state->nreaders == 0)
+		mode &= ~FMODE_READ;
+	if (state->nwriters == 0)
+		mode &= ~FMODE_WRITE;
+	nfs4_state_set_mode_locked(state, mode);
+	spin_unlock(&calldata->inode->i_lock);
+	spin_unlock(&state->owner->so_lock);
+	if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		nfs4_free_closedata(calldata);
 		task->tk_exit = NULL;
 		rpc_exit(task, 0);
@@ -961,7 +960,7 @@
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
+int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_closedata *calldata;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2d5a6a2..959374d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -366,6 +366,23 @@
 	return state;
 }
 
+void
+nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
+{
+	if (state->state == mode)
+		return;
+	/* NB! List reordering - see the reclaim code for why.  */
+	if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
+		if (mode & FMODE_WRITE)
+			list_move(&state->open_states, &state->owner->so_states);
+		else
+			list_move_tail(&state->open_states, &state->owner->so_states);
+	}
+	if (mode == 0)
+		list_del_init(&state->inode_states);
+	state->state = mode;
+}
+
 static struct nfs4_state *
 __nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
 {
@@ -376,10 +393,6 @@
 	list_for_each_entry(state, &nfsi->open_states, inode_states) {
 		if (state->owner->so_cred != cred)
 			continue;
-		if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
-			continue;
-		if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
-			continue;
 		if ((state->state & mode) != mode)
 			continue;
 		atomic_inc(&state->count);
@@ -400,7 +413,7 @@
 
 	list_for_each_entry(state, &nfsi->open_states, inode_states) {
 		/* Is this in the process of being freed? */
-		if (state->nreaders == 0 && state->nwriters == 0)
+		if (state->state == 0)
 			continue;
 		if (state->owner == owner) {
 			atomic_inc(&state->count);
@@ -481,7 +494,6 @@
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&owner->so_lock);
 	iput(inode);
-	BUG_ON (state->state != 0);
 	nfs4_free_open_state(state);
 	nfs4_put_state_owner(owner);
 }
@@ -493,7 +505,7 @@
 {
 	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
-	int newstate;
+	int oldstate, newstate = 0;
 
 	atomic_inc(&owner->so_count);
 	/* Protect against nfs4_find_state() */
@@ -503,30 +515,20 @@
 		state->nreaders--;
 	if (mode & FMODE_WRITE)
 		state->nwriters--;
-	if (state->nwriters == 0) {
-		if (state->nreaders == 0)
-			list_del_init(&state->inode_states);
-		/* See reclaim code */
-		list_move_tail(&state->open_states, &owner->so_states);
+	oldstate = newstate = state->state;
+	if (state->nreaders == 0)
+		newstate &= ~FMODE_READ;
+	if (state->nwriters == 0)
+		newstate &= ~FMODE_WRITE;
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+		nfs4_state_set_mode_locked(state, newstate);
+		oldstate = newstate;
 	}
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&owner->so_lock);
-	newstate = 0;
-	if (state->state != 0) {
-		if (state->nreaders)
-			newstate |= FMODE_READ;
-		if (state->nwriters)
-			newstate |= FMODE_WRITE;
-		if (state->state == newstate)
-			goto out;
-		if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-			state->state = newstate;
-			goto out;
-		}
-		if (nfs4_do_close(inode, state, newstate) == 0)
-			return;
-	}
-out:
+
+	if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
+		return;
 	nfs4_put_open_state(state);
 	nfs4_put_state_owner(owner);
 }