drbd: differentiate between normal and forced detach
Aborting local requests (not waiting for completion from the lower level
disk) is dangerous: if the master bio has been completed to upper
layers, data pages may be re-used for other things already.
If local IO is still pending and later completes,
this may cause crashes or corrupt unrelated data.
Only abort local IO if explicitly requested.
Intended use case is a lower level device that turned into a tarpit,
not completing io requests, not even doing error completion.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index e54e31b..6ace11e 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -411,7 +411,7 @@
+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
if (++mdev->al_tr_pos >
div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index fcb956b..ba91b40 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1096,7 +1096,7 @@
if (ctx->error) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
err = -EIO; /* ctx->error ? */
}
@@ -1212,7 +1212,7 @@
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
if (ctx->error)
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
/* that should force detach, so the in memory bitmap will be
* gone in a moment as well. */
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 356a6e5..79c69eb 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -832,6 +832,7 @@
BITMAP_IO_QUEUED, /* Started bitmap IO */
GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR, /* Local disk failed returned IO error */
+ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
@@ -1838,12 +1839,20 @@
return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
}
+enum drbd_force_detach_flags {
+ DRBD_IO_ERROR,
+ DRBD_META_IO_ERROR,
+ DRBD_FORCE_DETACH,
+};
+
#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
-static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
+static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
+ enum drbd_force_detach_flags forcedetach,
+ const char *where)
{
switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON:
- if (!forcedetach) {
+ if (forcedetach == DRBD_IO_ERROR) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s.\n", where);
if (mdev->state.disk > D_INCONSISTENT)
@@ -1854,6 +1863,8 @@
case EP_DETACH:
case EP_CALL_HELPER:
set_bit(WAS_IO_ERROR, &mdev->flags);
+ if (forcedetach == DRBD_FORCE_DETACH)
+ set_bit(FORCE_DETACH, &mdev->flags);
if (mdev->state.disk > D_FAILED) {
_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
dev_err(DEV,
@@ -1873,7 +1884,7 @@
*/
#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
- int error, int forcedetach, const char *where)
+ int error, enum drbd_force_detach_flags forcedetach, const char *where)
{
if (error) {
unsigned long flags;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 920ede2..5bebe8d 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1630,9 +1630,21 @@
eh = mdev->ldev->dc.on_io_error;
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
- /* Immediately allow completion of all application IO, that waits
- for completion from the local disk. */
- tl_abort_disk_io(mdev);
+ /* Immediately allow completion of all application IO,
+ * that waits for completion from the local disk,
+ * if this was a force-detach due to disk_timeout
+ * or administrator request (drbdsetup detach --force).
+ * Do NOT abort otherwise.
+ * Aborting local requests may cause serious problems,
+ * if requests are completed to upper layers already,
+ * and then later the already submitted local bio completes.
+ * This can cause DMA into former bio pages that meanwhile
+ * have been re-used for other things.
+ * So aborting local requests may cause crashes,
+ * or even worse, silent data corruption.
+ */
+ if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
+ tl_abort_disk_io(mdev);
/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
@@ -3870,7 +3882,7 @@
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
/* this was a try anyways ... */
dev_err(DEV, "meta data update failed!\n");
- drbd_chk_io_error(mdev, 1, true);
+ drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
}
/* Update mdev->ldev->md.la_size_sect,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 6d4de6a..40a1c4f0 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -950,6 +950,9 @@
* to realize a "hot spare" feature (not that I'd recommend that) */
wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+ /* make sure there is no leftover from previous force-detach attempts */
+ clear_bit(FORCE_DETACH, &mdev->flags);
+
/* allocation not in the IO path, cqueue thread context */
nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
if (!nbc) {
@@ -1345,6 +1348,7 @@
}
if (dt.detach_force) {
+ set_bit(FORCE_DETACH, &mdev->flags);
drbd_force_state(mdev, NS(disk, D_FAILED));
reply->ret_code = SS_SUCCESS;
goto out;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 8e93a6a..1f4b2db 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -455,7 +455,7 @@
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
_req_may_be_done_not_susp(req, m);
break;
@@ -477,7 +477,7 @@
break;
}
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
goto_queue_for_net_read:
@@ -1275,7 +1275,7 @@
time_after(now, req->start_time + dt) &&
!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
- __drbd_chk_io_error(mdev, 1);
+ __drbd_chk_io_error(mdev, DRBD_FORCE_DETACH);
}
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
spin_unlock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 620c70f..a35393f 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -111,7 +111,7 @@
if (list_empty(&mdev->read_ee))
wake_up(&mdev->ee_wait);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
drbd_queue_work(&mdev->data.work, &e->w);
@@ -154,7 +154,7 @@
: list_empty(&mdev->active_ee);
if (test_bit(__EE_WAS_ERROR, &e->flags))
- __drbd_chk_io_error(mdev, false);
+ __drbd_chk_io_error(mdev, DRBD_IO_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);
if (is_syncer_req)