NFS: Fix asynchronous read error handling

We must always call ->read_done() before we truncate the page data, or
decide to flag an error. The reasons are that
	in NFSv2, ->read_done() is where the eof flag gets set.
	in NFSv3/v4 ->read_done() handles EJUKEBOX-type errors, and
		  v4 state recovery.

However, we need to mark the pages as uptodate before we deal with short
read errors, since we may need to modify the nfs_read_data arguments.

We therefore split the current nfs_readpage_result() into two parts:
nfs_readpage_result(), which calls ->read_done() etc, and
nfs_readpage_retry(), which subsequently handles short reads.

Note: Removing the code that retries in case of a short read also fixes a
bug in nfs_direct_read_result(), which used to return a corrupted number of
bytes.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8b58bbf..e879ee6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -461,6 +461,55 @@
 }
 
 /*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+{
+	int status;
+
+	dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
+			task->tk_status);
+
+	status = NFS_PROTO(data->inode)->read_done(task, data);
+	if (status != 0)
+		return status;
+
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+
+	if (task->tk_status == -ESTALE) {
+		set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+		nfs_mark_for_revalidate(data->inode);
+	}
+	spin_lock(&data->inode->i_lock);
+	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+	spin_unlock(&data->inode->i_lock);
+	return 0;
+}
+
+static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+{
+	struct nfs_readargs *argp = &data->args;
+	struct nfs_readres *resp = &data->res;
+
+	if (resp->eof || resp->count == argp->count)
+		return 0;
+
+	/* This is a short read! */
+	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+	/* Has the server at least made some progress? */
+	if (resp->count == 0)
+		return 0;
+
+	/* Yes, so retry the read at the end of the data */
+	argp->offset += resp->count;
+	argp->pgbase += resp->count;
+	argp->count -= resp->count;
+	rpc_restart_call(task);
+	return -EAGAIN;
+}
+
+/*
  * Handle a read reply that fills part of a page.
  */
 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
@@ -469,12 +518,16 @@
 	struct nfs_page *req = data->req;
 	struct page *page = req->wb_page;
  
-	if (likely(task->tk_status >= 0))
-		nfs_readpage_truncate_uninitialised_page(data);
-	else
-		SetPageError(page);
 	if (nfs_readpage_result(task, data) != 0)
 		return;
+
+	if (likely(task->tk_status >= 0)) {
+		nfs_readpage_truncate_uninitialised_page(data);
+		if (nfs_readpage_retry(task, data) != 0)
+			return;
+	}
+	if (unlikely(task->tk_status < 0))
+		SetPageError(page);
 	if (atomic_dec_and_test(&req->wb_complete)) {
 		if (!PageError(page))
 			SetPageUptodate(page);
@@ -502,25 +555,13 @@
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageUptodate(*pages);
-	if (count != 0)
+	if (count == 0)
+		return;
+	/* Was this a short read? */
+	if (data->res.eof || data->res.count == data->args.count)
 		SetPageUptodate(*pages);
 }
 
-static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
-{
-	unsigned int count = data->args.count;
-	unsigned int base = data->args.pgbase;
-	struct page **pages;
-
-	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-	base &= ~PAGE_CACHE_MASK;
-	count += base;
-	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-		SetPageError(*pages);
-	if (count != 0)
-		SetPageError(*pages);
-}
-
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -529,19 +570,20 @@
 {
 	struct nfs_read_data *data = calldata;
 
+	if (nfs_readpage_result(task, data) != 0)
+		return;
 	/*
-	 * Note: nfs_readpage_result may change the values of
+	 * Note: nfs_readpage_retry may change the values of
 	 * data->args. In the multi-page case, we therefore need
-	 * to ensure that we call the next nfs_readpage_set_page_uptodate()
-	 * first in the multi-page case.
+	 * to ensure that we call nfs_readpage_set_pages_uptodate()
+	 * first.
 	 */
 	if (likely(task->tk_status >= 0)) {
 		nfs_readpage_truncate_uninitialised_page(data);
 		nfs_readpage_set_pages_uptodate(data);
-	} else
-		nfs_readpage_set_pages_error(data);
-	if (nfs_readpage_result(task, data) != 0)
-		return;
+		if (nfs_readpage_retry(task, data) != 0)
+			return;
+	}
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 
@@ -556,50 +598,6 @@
 };
 
 /*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
-{
-	struct nfs_readargs *argp = &data->args;
-	struct nfs_readres *resp = &data->res;
-	int status;
-
-	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-		task->tk_pid, task->tk_status);
-
-	status = NFS_PROTO(data->inode)->read_done(task, data);
-	if (status != 0)
-		return status;
-
-	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
-
-	if (task->tk_status < 0) {
-		if (task->tk_status == -ESTALE) {
-			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
-			nfs_mark_for_revalidate(data->inode);
-		}
-	} else if (resp->count < argp->count && !resp->eof) {
-		/* This is a short read! */
-		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
-		/* Has the server at least made some progress? */
-		if (resp->count != 0) {
-			/* Yes, so retry the read at the end of the data */
-			argp->offset += resp->count;
-			argp->pgbase += resp->count;
-			argp->count -= resp->count;
-			rpc_restart_call(task);
-			return -EAGAIN;
-		}
-		task->tk_status = -EIO;
-	}
-	spin_lock(&data->inode->i_lock);
-	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-	spin_unlock(&data->inode->i_lock);
-	return 0;
-}
-
-/*
  * Read a page over NFS.
  * We read the page synchronously in the following case:
  *  -	The error flag is set for this page. This happens only when a