ceph: reset osd connections after fault
A single osd connection fault (e.g. tcp disconnect) wasn't
reopening the connection, which causes all current and future
requests for that osd to hang.
Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 7f8a26f..fa0f737 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -369,7 +369,6 @@
return;
dout("osd_reset osd%d\n", osd->o_osd);
osdc = osd->o_osdc;
- osd->o_incarnation++;
down_read(&osdc->map_sem);
kick_requests(osdc, osd);
up_read(&osdc->map_sem);
@@ -921,7 +920,9 @@
dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
mutex_lock(&osdc->request_mutex);
- if (!kickosd) {
+ if (kickosd) {
+ __reset_osd(osdc, kickosd);
+ } else {
for (p = rb_first(&osdc->osds); p; p = n) {
struct ceph_osd *osd =
rb_entry(p, struct ceph_osd, o_node);