block: simplify bdev/disk lookup in blkdev_get
To simplify block device lookup and a few other upcoming areas, make sure
that we always have a struct block_device available for each disk and
each partition, and only find existing block devices in bdget. The only
downside of this is that each device and partition uses a little more
memory. The upside will be that a lot of code can be simplified.
With that all we need to look up the block device is to lookup the inode
and do a few sanity checks on the gendisk, instead of the separate lookup
for the gendisk. For blk-cgroup which wants to access a gendisk without
opening it, a new blkdev_{get,put}_no_open low-level interface is added
to replace the previous get_gendisk use.
Note that the change to look up block device directly instead of the two
step lookup using struct gendisk causes a subtile change in behavior:
accessing a non-existing partition on an existing block device can now
cause a call to request_module. That call is harmless, and in practice
no recent system will access these nodes as they aren't created by udev
and static /dev/ setups are unusual.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/genhd.c b/block/genhd.c
index f46e892..bf8fa82 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -27,17 +27,11 @@
static struct kobject *block_depr;
-static DEFINE_XARRAY(bdev_map);
-static DEFINE_MUTEX(bdev_map_lock);
+DECLARE_RWSEM(bdev_lookup_sem);
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
-
-/* For extended devt allocation. ext_devt_lock prevents look up
- * results from going away underneath its user.
- */
-static DEFINE_SPINLOCK(ext_devt_lock);
-static DEFINE_IDR(ext_devt_idr);
+static DEFINE_IDA(ext_devt_ida);
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr);
@@ -580,14 +574,7 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
return 0;
}
- /* allocate ext devt */
- idr_preload(GFP_KERNEL);
-
- spin_lock_bh(&ext_devt_lock);
- idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
- spin_unlock_bh(&ext_devt_lock);
-
- idr_preload_end();
+ idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
if (idx < 0)
return idx == -ENOSPC ? -EBUSY : idx;
@@ -606,26 +593,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
*/
void blk_free_devt(dev_t devt)
{
- if (devt == MKDEV(0, 0))
- return;
-
- if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
- spin_lock_bh(&ext_devt_lock);
- idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
- spin_unlock_bh(&ext_devt_lock);
- }
-}
-
-/*
- * We invalidate devt by assigning NULL pointer for devt in idr.
- */
-void blk_invalidate_devt(dev_t devt)
-{
- if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
- spin_lock_bh(&ext_devt_lock);
- idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt)));
- spin_unlock_bh(&ext_devt_lock);
- }
+ if (MAJOR(devt) == BLOCK_EXT_MAJOR)
+ ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt)));
}
static char *bdevt_str(dev_t devt, char *buf)
@@ -640,28 +609,6 @@ static char *bdevt_str(dev_t devt, char *buf)
return buf;
}
-static void blk_register_region(struct gendisk *disk)
-{
- int i;
-
- mutex_lock(&bdev_map_lock);
- for (i = 0; i < disk->minors; i++) {
- if (xa_insert(&bdev_map, disk_devt(disk) + i, disk, GFP_KERNEL))
- WARN_ON_ONCE(1);
- }
- mutex_unlock(&bdev_map_lock);
-}
-
-static void blk_unregister_region(struct gendisk *disk)
-{
- int i;
-
- mutex_lock(&bdev_map_lock);
- for (i = 0; i < disk->minors; i++)
- xa_erase(&bdev_map, disk_devt(disk) + i);
- mutex_unlock(&bdev_map_lock);
-}
-
static void disk_scan_partitions(struct gendisk *disk)
{
struct block_device *bdev;
@@ -805,7 +752,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
WARN_ON(ret);
bdi_set_owner(bdi, dev);
- blk_register_region(disk);
+ bdev_add(disk->part0.bdev, devt);
}
register_disk(parent, disk, groups);
if (register_queue)
@@ -847,8 +794,8 @@ static void invalidate_partition(struct gendisk *disk, int partno)
__invalidate_device(bdev, true);
/*
- * Unhash the bdev inode for this device so that it gets evicted as soon
- * as last inode reference is dropped.
+ * Unhash the bdev inode for this device so that it can't be looked
+ * up any more even if openers still hold references to it.
*/
remove_inode_hash(bdev->bd_inode);
bdput(bdev);
@@ -890,7 +837,8 @@ void del_gendisk(struct gendisk *disk)
* Block lookups of the disk until all bdevs are unhashed and the
* disk is marked as dead (GENHD_FL_UP cleared).
*/
- down_write(&disk->lookup_sem);
+ down_write(&bdev_lookup_sem);
+
/* invalidate stuff */
disk_part_iter_init(&piter, disk,
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
@@ -903,7 +851,7 @@ void del_gendisk(struct gendisk *disk)
invalidate_partition(disk, 0);
set_capacity(disk, 0);
disk->flags &= ~GENHD_FL_UP;
- up_write(&disk->lookup_sem);
+ up_write(&bdev_lookup_sem);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -916,16 +864,6 @@ void del_gendisk(struct gendisk *disk)
}
blk_unregister_queue(disk);
-
- if (!(disk->flags & GENHD_FL_HIDDEN))
- blk_unregister_region(disk);
- /*
- * Remove gendisk pointer from idr so that it cannot be looked up
- * while RCU period before freeing gendisk is running to prevent
- * use-after-free issues. Note that the device number stays
- * "in-use" until we really free the gendisk.
- */
- blk_invalidate_devt(disk_devt(disk));
kobject_put(disk->part0.holder_dir);
kobject_put(disk->slave_dir);
@@ -964,7 +902,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
return badblocks_store(disk->bb, page, len, 0);
}
-static void request_gendisk_module(dev_t devt)
+void blk_request_module(dev_t devt)
{
unsigned int major = MAJOR(devt);
struct blk_major_name **n;
@@ -984,84 +922,6 @@ static void request_gendisk_module(dev_t devt)
request_module("block-major-%d", MAJOR(devt));
}
-static bool get_disk_and_module(struct gendisk *disk)
-{
- struct module *owner;
-
- if (!disk->fops)
- return false;
- owner = disk->fops->owner;
- if (owner && !try_module_get(owner))
- return false;
- if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) {
- module_put(owner);
- return false;
- }
- return true;
-
-}
-
-/**
- * get_gendisk - get partitioning information for a given device
- * @devt: device to get partitioning information for
- * @partno: returned partition index
- *
- * This function gets the structure containing partitioning
- * information for the given device @devt.
- *
- * Context: can sleep
- */
-struct gendisk *get_gendisk(dev_t devt, int *partno)
-{
- struct gendisk *disk = NULL;
-
- might_sleep();
-
- if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
- mutex_lock(&bdev_map_lock);
- disk = xa_load(&bdev_map, devt);
- if (!disk) {
- mutex_unlock(&bdev_map_lock);
- request_gendisk_module(devt);
- mutex_lock(&bdev_map_lock);
- disk = xa_load(&bdev_map, devt);
- }
- if (disk && !get_disk_and_module(disk))
- disk = NULL;
- if (disk)
- *partno = devt - disk_devt(disk);
- mutex_unlock(&bdev_map_lock);
- } else {
- struct hd_struct *part;
-
- spin_lock_bh(&ext_devt_lock);
- part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
- if (part && get_disk_and_module(part_to_disk(part))) {
- *partno = part->partno;
- disk = part_to_disk(part);
- }
- spin_unlock_bh(&ext_devt_lock);
- }
-
- if (!disk)
- return NULL;
-
- /*
- * Synchronize with del_gendisk() to not return disk that is being
- * destroyed.
- */
- down_read(&disk->lookup_sem);
- if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
- !(disk->flags & GENHD_FL_UP))) {
- up_read(&disk->lookup_sem);
- put_disk_and_module(disk);
- disk = NULL;
- } else {
- up_read(&disk->lookup_sem);
- }
- return disk;
-}
-
/**
* bdget_disk - do bdget() by gendisk and partition number
* @disk: gendisk of interest
@@ -1559,11 +1419,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
*
* This function releases all allocated resources of the gendisk.
*
- * The struct gendisk refcount is incremented with get_gendisk() or
- * get_disk_and_module(), and its refcount is decremented with
- * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this
- * function is called.
- *
* Drivers which used __device_add_disk() have a gendisk with a request_queue
* assigned. Since the request_queue sits on top of the gendisk for these
* drivers we also call blk_put_queue() for them, and we expect the
@@ -1748,16 +1603,17 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
if (!disk)
return NULL;
- disk->part0.dkstats = alloc_percpu(struct disk_stats);
- if (!disk->part0.dkstats)
+ disk->part0.bdev = bdev_alloc(disk, 0);
+ if (!disk->part0.bdev)
goto out_free_disk;
- init_rwsem(&disk->lookup_sem);
+ disk->part0.dkstats = alloc_percpu(struct disk_stats);
+ if (!disk->part0.dkstats)
+ goto out_bdput;
+
disk->node_id = node_id;
- if (disk_expand_part_tbl(disk, 0)) {
- free_percpu(disk->part0.dkstats);
- goto out_free_disk;
- }
+ if (disk_expand_part_tbl(disk, 0))
+ goto out_free_bdstats;
ptbl = rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(ptbl->part[0], &disk->part0);
@@ -1773,7 +1629,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
*/
hd_sects_seq_init(&disk->part0);
if (hd_ref_init(&disk->part0))
- goto out_free_part0;
+ goto out_free_bdstats;
disk->minors = minors;
rand_initialize_disk(disk);
@@ -1782,8 +1638,10 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
device_initialize(disk_to_dev(disk));
return disk;
-out_free_part0:
- hd_free_part(&disk->part0);
+out_free_bdstats:
+ free_percpu(disk->part0.dkstats);
+out_bdput:
+ bdput(disk->part0.bdev);
out_free_disk:
kfree(disk);
return NULL;
@@ -1807,26 +1665,6 @@ void put_disk(struct gendisk *disk)
}
EXPORT_SYMBOL(put_disk);
-/**
- * put_disk_and_module - decrements the module and gendisk refcount
- * @disk: the struct gendisk to decrement the refcount for
- *
- * This is a counterpart of get_disk_and_module() and thus also of
- * get_gendisk().
- *
- * Context: Any context, but the last reference must not be dropped from
- * atomic context.
- */
-void put_disk_and_module(struct gendisk *disk)
-{
- if (disk) {
- struct module *owner = disk->fops->owner;
-
- put_disk(disk);
- module_put(owner);
- }
-}
-
static void set_disk_ro_uevent(struct gendisk *gd, int ro)
{
char event[] = "DISK_RO=1";