ceph: fix mds sync() race with completing requests
The wait_unsafe_requests() helper dropped the mdsc mutex to wait
for each request to complete, and then examined r_node to get the
next request after retaking the lock. But the request completion
removes the request from the tree, so r_node was always undefined
at this point. Since it's a small race, it usually led to a
valid request, but not always. The result was an occasional
crash in rb_next() while dereferencing node->rb_left.
Fix this by clearing the rb_node when removing the request from
the request tree, and not walking off into the weeds when we
are done waiting for a request. Since the request we waited on
will _always_ be out of the request tree, take a ref on the next
request, in the hopes that it won't be. But if it is, it's ok:
we can start over from the beginning (and traverse over older read
requests again).
Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a260010..5ec8641 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -529,6 +529,7 @@
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
rb_erase(&req->r_node, &mdsc->request_tree);
+ RB_CLEAR_NODE(&req->r_node);
ceph_mdsc_put_request(req);
if (req->r_unsafe_dir) {
@@ -2682,29 +2683,41 @@
*/
static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
{
- struct ceph_mds_request *req = NULL;
+ struct ceph_mds_request *req = NULL, *nextreq;
struct rb_node *n;
mutex_lock(&mdsc->mutex);
dout("wait_unsafe_requests want %lld\n", want_tid);
+restart:
req = __get_oldest_req(mdsc);
while (req && req->r_tid <= want_tid) {
+ /* find next request */
+ n = rb_next(&req->r_node);
+ if (n)
+ nextreq = rb_entry(n, struct ceph_mds_request, r_node);
+ else
+ nextreq = NULL;
if ((req->r_op & CEPH_MDS_OP_WRITE)) {
/* write op */
ceph_mdsc_get_request(req);
+ if (nextreq)
+ ceph_mdsc_get_request(nextreq);
mutex_unlock(&mdsc->mutex);
dout("wait_unsafe_requests wait on %llu (want %llu)\n",
req->r_tid, want_tid);
wait_for_completion(&req->r_safe_completion);
mutex_lock(&mdsc->mutex);
- n = rb_next(&req->r_node);
ceph_mdsc_put_request(req);
- } else {
- n = rb_next(&req->r_node);
+ if (!nextreq)
+ break; /* next dne before, so we're done! */
+ if (RB_EMPTY_NODE(&nextreq->r_node)) {
+ /* next request was removed from tree */
+ ceph_mdsc_put_request(nextreq);
+ goto restart;
+ }
+ ceph_mdsc_put_request(nextreq); /* won't go away */
}
- if (!n)
- break;
- req = rb_entry(n, struct ceph_mds_request, r_node);
+ req = nextreq;
}
mutex_unlock(&mdsc->mutex);
dout("wait_unsafe_requests done\n");