[PATCH] md: fix up some rdev rcu locking in raid5/6
There is this "FIXME" comment with a typo in it!! that been annoying me for
days, so I just had to remove it.
conf->disks[i].rdev should only be accessed if
- we know we hold a reference or
- the mddev->reconfig_sem is down or
- we have a rcu_readlock
handle_stripe was referencing rdev in three places without any of these. For
the first two, get an rcu_readlock. For the last, the same access
(md_sync_acct call) is made a little later after the rdev has been claimed
under and rcu_readlock, if R5_Syncio is set. So just use that access...
However R5_Syncio isn't really needed as the 'syncing' variable contains the
same information. So use that instead.
Issues, comment, and fix are identical in raid5 and raid6.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0d016a8..0222ba1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -961,11 +961,11 @@
syncing = test_bit(STRIPE_SYNCING, &sh->state);
/* Now to look around and see what can be done */
+ rcu_read_lock();
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
dev = &sh->dev[i];
clear_bit(R5_Insync, &dev->flags);
- clear_bit(R5_Syncio, &dev->flags);
PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags, dev->toread, dev->towrite, dev->written);
@@ -1004,7 +1004,7 @@
non_overwrite++;
}
if (dev->written) written++;
- rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -1017,6 +1017,7 @@
} else
set_bit(R5_Insync, &dev->flags);
}
+ rcu_read_unlock();
PRINTK("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d\n",
locked, uptodate, to_read, to_write, failed, failed_num);
@@ -1028,10 +1029,13 @@
int bitmap_end = 0;
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev;
+ rcu_read_lock();
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && test_bit(In_sync, &rdev->flags))
/* multiple read failures in one stripe */
md_error(conf->mddev, rdev);
+ rcu_read_unlock();
}
spin_lock_irq(&conf->device_lock);
@@ -1180,9 +1184,6 @@
locked++;
PRINTK("Reading block %d (sync=%d)\n",
i, syncing);
- if (syncing)
- md_sync_acct(conf->disks[i].rdev->bdev,
- STRIPE_SECTORS);
}
}
}
@@ -1326,7 +1327,6 @@
clear_bit(STRIPE_DEGRADED, &sh->state);
locked++;
set_bit(STRIPE_INSYNC, &sh->state);
- set_bit(R5_Syncio, &dev->flags);
}
}
if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
@@ -1392,7 +1392,7 @@
rcu_read_unlock();
if (rdev) {
- if (test_bit(R5_Syncio, &sh->dev[i].flags))
+ if (syncing)
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_bdev = rdev->bdev;
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 7a51553..b5b7a8d 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1060,11 +1060,11 @@
syncing = test_bit(STRIPE_SYNCING, &sh->state);
/* Now to look around and see what can be done */
+ rcu_read_lock();
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
dev = &sh->dev[i];
clear_bit(R5_Insync, &dev->flags);
- clear_bit(R5_Syncio, &dev->flags);
PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags, dev->toread, dev->towrite, dev->written);
@@ -1103,7 +1103,7 @@
non_overwrite++;
}
if (dev->written) written++;
- rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
@@ -1117,6 +1117,7 @@
} else
set_bit(R5_Insync, &dev->flags);
}
+ rcu_read_unlock();
PRINTK("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
locked, uptodate, to_read, to_write, failed,
@@ -1129,10 +1130,13 @@
int bitmap_end = 0;
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
- mdk_rdev_t *rdev = conf->disks[i].rdev;
+ mdk_rdev_t *rdev;
+ rcu_read_lock();
+ rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && test_bit(In_sync, &rdev->flags))
/* multiple read failures in one stripe */
md_error(conf->mddev, rdev);
+ rcu_read_unlock();
}
spin_lock_irq(&conf->device_lock);
@@ -1307,9 +1311,6 @@
locked++;
PRINTK("Reading block %d (sync=%d)\n",
i, syncing);
- if (syncing)
- md_sync_acct(conf->disks[i].rdev->bdev,
- STRIPE_SECTORS);
}
}
}
@@ -1463,14 +1464,12 @@
locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
- set_bit(R5_Syncio, &dev->flags);
}
if (failed >= 1) {
dev = &sh->dev[failed_num[0]];
locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
- set_bit(R5_Syncio, &dev->flags);
}
if (update_p) {
@@ -1478,14 +1477,12 @@
locked ++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
- set_bit(R5_Syncio, &dev->flags);
}
if (update_q) {
dev = &sh->dev[qd_idx];
locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
- set_bit(R5_Syncio, &dev->flags);
}
clear_bit(STRIPE_DEGRADED, &sh->state);
@@ -1557,7 +1554,7 @@
rcu_read_unlock();
if (rdev) {
- if (test_bit(R5_Syncio, &sh->dev[i].flags))
+ if (syncing)
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_bdev = rdev->bdev;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index e9c1c0d..28fcd75 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -152,7 +152,6 @@
#define R5_Insync 3 /* rdev && rdev->in_sync at start */
#define R5_Wantread 4 /* want to schedule a read */
#define R5_Wantwrite 5
-#define R5_Syncio 6 /* this io need to be accounted as resync io */
#define R5_Overlap 7 /* There is a pending overlapping request on this block */
#define R5_ReadError 8 /* seen a read error here recently */
#define R5_ReWrite 9 /* have tried to over-write the readerror */