Merge tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
- gendisk freeing fix (Christoph)
- blk-iocost wake ordering fix (Tejun)
- tag allocation error handling fix (John)
- loop locking fix. While this isn't the prettiest fix in the world,
nobody has any good alternatives for 5.14. Something to likely
revisit for 5.15. (Tetsuo)
* tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block:
block: delay freeing the gendisk
blk-iocost: fix operation ordering in iocg_wake_fn()
blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling
loop: reintroduce global lock for safe loop_validate_file() traversal
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index c2d6bc8..5fac375 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1440,16 +1440,17 @@
return -1;
iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
+ wait->committed = true;
/*
* autoremove_wake_function() removes the wait entry only when it
- * actually changed the task state. We want the wait always
- * removed. Remove explicitly and use default_wake_function().
+ * actually changed the task state. We want the wait always removed.
+ * Remove explicitly and use default_wake_function(). Note that the
+ * order of operations is important as finish_wait() tests whether
+ * @wq_entry is removed without grabbing the lock.
*/
- list_del_init(&wq_entry->entry);
- wait->committed = true;
-
default_wake_function(wq_entry, mode, flags, key);
+ list_del_init_careful(&wq_entry->entry);
return 0;
}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c838d81..0f006ca 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -515,17 +515,6 @@
percpu_ref_put(&q->q_usage_counter);
}
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
- struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
-{
- if (hctx->sched_tags) {
- blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
- blk_mq_free_rq_map(hctx->sched_tags, set->flags);
- hctx->sched_tags = NULL;
- }
-}
-
static int blk_mq_sched_alloc_tags(struct request_queue *q,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
@@ -539,8 +528,10 @@
return -ENOMEM;
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
- if (ret)
- blk_mq_sched_free_tags(set, hctx, hctx_idx);
+ if (ret) {
+ blk_mq_free_rq_map(hctx->sched_tags, set->flags);
+ hctx->sched_tags = NULL;
+ }
return ret;
}
diff --git a/block/genhd.c b/block/genhd.c
index af4d2ab..298ee78 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1079,10 +1079,9 @@
disk_release_events(disk);
kfree(disk->random);
xa_destroy(&disk->part_tbl);
- bdput(disk->part0);
if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
blk_put_queue(disk->queue);
- kfree(disk);
+ bdput(disk->part0); /* frees the disk */
}
struct class block_class = {
.name = "block",
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f37b9e3..f0cdff0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,6 +88,47 @@
static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+/**
+ * loop_global_lock_killable() - take locks for safe loop_validate_file() test
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo is about to bind another "struct loop_device", false otherwise
+ *
+ * Returns 0 on success, -EINTR otherwise.
+ *
+ * Since loop_validate_file() traverses on other "struct loop_device" if
+ * is_loop_device() is true, we need a global lock for serializing concurrent
+ * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
+ */
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+ int err;
+
+ if (global) {
+ err = mutex_lock_killable(&loop_validate_mutex);
+ if (err)
+ return err;
+ }
+ err = mutex_lock_killable(&lo->lo_mutex);
+ if (err && global)
+ mutex_unlock(&loop_validate_mutex);
+ return err;
+}
+
+/**
+ * loop_global_unlock() - release locks taken by loop_global_lock_killable()
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo was about to bind another "struct loop_device", false otherwise
+ */
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+ mutex_unlock(&lo->lo_mutex);
+ if (global)
+ mutex_unlock(&loop_validate_mutex);
+}
static int max_part;
static int part_shift;
@@ -672,13 +713,15 @@
while (is_loop_device(f)) {
struct loop_device *l;
+ lockdep_assert_held(&loop_validate_mutex);
if (f->f_mapping->host->i_rdev == bdev->bd_dev)
return -EBADF;
l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
- if (l->lo_state != Lo_bound) {
+ if (l->lo_state != Lo_bound)
return -EINVAL;
- }
+ /* Order wrt setting lo->lo_backing_file in loop_configure(). */
+ rmb();
f = l->lo_backing_file;
}
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +740,18 @@
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
unsigned int arg)
{
- struct file *file = NULL, *old_file;
- int error;
- bool partscan;
+ struct file *file = fget(arg);
+ struct file *old_file;
+ int error;
+ bool partscan;
+ bool is_loop;
- error = mutex_lock_killable(&lo->lo_mutex);
+ if (!file)
+ return -EBADF;
+ is_loop = is_loop_device(file);
+ error = loop_global_lock_killable(lo, is_loop);
if (error)
- return error;
+ goto out_putf;
error = -ENXIO;
if (lo->lo_state != Lo_bound)
goto out_err;
@@ -713,11 +761,6 @@
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
goto out_err;
- error = -EBADF;
- file = fget(arg);
- if (!file)
- goto out_err;
-
error = loop_validate_file(file, bdev);
if (error)
goto out_err;
@@ -740,7 +783,16 @@
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, is_loop);
+
+ /*
+ * Flush loop_validate_file() before fput(), for l->lo_backing_file
+ * might be pointing at old_file which might be the last reference.
+ */
+ if (!is_loop) {
+ mutex_lock(&loop_validate_mutex);
+ mutex_unlock(&loop_validate_mutex);
+ }
/*
* We must drop file reference outside of lo_mutex as dropping
* the file ref can take open_mutex which creates circular locking
@@ -752,9 +804,9 @@
return 0;
out_err:
- mutex_unlock(&lo->lo_mutex);
- if (file)
- fput(file);
+ loop_global_unlock(lo, is_loop);
+out_putf:
+ fput(file);
return error;
}
@@ -1136,22 +1188,22 @@
struct block_device *bdev,
const struct loop_config *config)
{
- struct file *file;
- struct inode *inode;
+ struct file *file = fget(config->fd);
+ struct inode *inode;
struct address_space *mapping;
- int error;
- loff_t size;
- bool partscan;
- unsigned short bsize;
+ int error;
+ loff_t size;
+ bool partscan;
+ unsigned short bsize;
+ bool is_loop;
+
+ if (!file)
+ return -EBADF;
+ is_loop = is_loop_device(file);
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
- error = -EBADF;
- file = fget(config->fd);
- if (!file)
- goto out;
-
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
@@ -1162,7 +1214,7 @@
goto out_putf;
}
- error = mutex_lock_killable(&lo->lo_mutex);
+ error = loop_global_lock_killable(lo, is_loop);
if (error)
goto out_bdev;
@@ -1242,6 +1294,9 @@
size = get_loop_size(lo, file);
loop_set_size(lo, size);
+ /* Order wrt reading lo_state in loop_validate_file(). */
+ wmb();
+
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1253,7 +1308,7 @@
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/
bdgrab(bdev);
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, is_loop);
if (partscan)
loop_reread_partitions(lo);
if (!(mode & FMODE_EXCL))
@@ -1261,13 +1316,12 @@
return 0;
out_unlock:
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, is_loop);
out_bdev:
if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure);
out_putf:
fput(file);
-out:
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
@@ -1283,6 +1337,18 @@
int lo_number;
struct loop_worker *pos, *worker;
+ /*
+ * Flush loop_configure() and loop_change_fd(). It is acceptable for
+ * loop_validate_file() to succeed, for actual clear operation has not
+ * started yet.
+ */
+ mutex_lock(&loop_validate_mutex);
+ mutex_unlock(&loop_validate_mutex);
+ /*
+ * loop_validate_file() now fails because l->lo_state != Lo_bound
+ * became visible.
+ */
+
mutex_lock(&lo->lo_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
err = -ENXIO;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0c424a0..9ef4f1f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -812,6 +812,8 @@
free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info);
+ if (!bdev_is_partition(bdev))
+ kfree(bdev->bd_disk);
kmem_cache_free(bdev_cachep, BDEV_I(inode));
}