|  | /* | 
|  | * Copyright (C) 2011 Red Hat, Inc. | 
|  | * | 
|  | * This file is released under the GPL. | 
|  | */ | 
|  | #include "dm-block-manager.h" | 
|  | #include "dm-persistent-data-internal.h" | 
|  | #include "../dm-bufio.h" | 
|  |  | 
|  | #include <linux/crc32c.h> | 
|  | #include <linux/module.h> | 
|  | #include <linux/slab.h> | 
|  | #include <linux/rwsem.h> | 
|  | #include <linux/device-mapper.h> | 
|  | #include <linux/stacktrace.h> | 
|  |  | 
|  | #define DM_MSG_PREFIX "block manager" | 
|  |  | 
|  | /*----------------------------------------------------------------*/ | 
|  |  | 
|  | /* | 
|  | * This is a read/write semaphore with a couple of differences. | 
|  | * | 
|  | * i) There is a restriction on the number of concurrent read locks that | 
|  | * may be held at once.  This is just an implementation detail. | 
|  | * | 
|  | * ii) Recursive locking attempts are detected and return EINVAL.  A stack | 
|  | * trace is also emitted for the previous lock acquisition. | 
|  | * | 
|  | * iii) Priority is given to write locks. | 
|  | */ | 
|  | #define MAX_HOLDERS 4 | 
|  | #define MAX_STACK 10 | 
|  |  | 
|  | typedef unsigned long stack_entries[MAX_STACK]; | 
|  |  | 
|  | struct block_lock { | 
|  | spinlock_t lock; | 
|  | __s32 count; | 
|  | struct list_head waiters; | 
|  | struct task_struct *holders[MAX_HOLDERS]; | 
|  |  | 
|  | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING | 
|  | struct stack_trace traces[MAX_HOLDERS]; | 
|  | stack_entries entries[MAX_HOLDERS]; | 
|  | #endif | 
|  | }; | 
|  |  | 
|  | struct waiter { | 
|  | struct list_head list; | 
|  | struct task_struct *task; | 
|  | int wants_write; | 
|  | }; | 
|  |  | 
|  | static unsigned __find_holder(struct block_lock *lock, | 
|  | struct task_struct *task) | 
|  | { | 
|  | unsigned i; | 
|  |  | 
|  | for (i = 0; i < MAX_HOLDERS; i++) | 
|  | if (lock->holders[i] == task) | 
|  | break; | 
|  |  | 
|  | BUG_ON(i == MAX_HOLDERS); | 
|  | return i; | 
|  | } | 
|  |  | 
|  | /* call this *after* you increment lock->count */ | 
|  | static void __add_holder(struct block_lock *lock, struct task_struct *task) | 
|  | { | 
|  | unsigned h = __find_holder(lock, NULL); | 
|  | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING | 
|  | struct stack_trace *t; | 
|  | #endif | 
|  |  | 
|  | get_task_struct(task); | 
|  | lock->holders[h] = task; | 
|  |  | 
|  | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING | 
|  | t = lock->traces + h; | 
|  | t->nr_entries = 0; | 
|  | t->max_entries = MAX_STACK; | 
|  | t->entries = lock->entries[h]; | 
|  | t->skip = 2; | 
|  | save_stack_trace(t); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | /* call this *before* you decrement lock->count */ | 
|  | static void __del_holder(struct block_lock *lock, struct task_struct *task) | 
|  | { | 
|  | unsigned h = __find_holder(lock, task); | 
|  | lock->holders[h] = NULL; | 
|  | put_task_struct(task); | 
|  | } | 
|  |  | 
|  | static int __check_holder(struct block_lock *lock) | 
|  | { | 
|  | unsigned i; | 
|  | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING | 
|  | static struct stack_trace t; | 
|  | static stack_entries entries; | 
|  | #endif | 
|  |  | 
|  | for (i = 0; i < MAX_HOLDERS; i++) { | 
|  | if (lock->holders[i] == current) { | 
|  | DMERR("recursive lock detected in pool metadata"); | 
|  | #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING | 
|  | DMERR("previously held here:"); | 
|  | print_stack_trace(lock->traces + i, 4); | 
|  |  | 
|  | DMERR("subsequent acquisition attempted here:"); | 
|  | t.nr_entries = 0; | 
|  | t.max_entries = MAX_STACK; | 
|  | t.entries = entries; | 
|  | t.skip = 3; | 
|  | save_stack_trace(&t); | 
|  | print_stack_trace(&t, 4); | 
|  | #endif | 
|  | return -EINVAL; | 
|  | } | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void __wait(struct waiter *w) | 
|  | { | 
|  | for (;;) { | 
|  | set_task_state(current, TASK_UNINTERRUPTIBLE); | 
|  |  | 
|  | if (!w->task) | 
|  | break; | 
|  |  | 
|  | schedule(); | 
|  | } | 
|  |  | 
|  | set_task_state(current, TASK_RUNNING); | 
|  | } | 
|  |  | 
|  | static void __wake_waiter(struct waiter *w) | 
|  | { | 
|  | struct task_struct *task; | 
|  |  | 
|  | list_del(&w->list); | 
|  | task = w->task; | 
|  | smp_mb(); | 
|  | w->task = NULL; | 
|  | wake_up_process(task); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We either wake a few readers or a single writer. | 
|  | */ | 
|  | static void __wake_many(struct block_lock *lock) | 
|  | { | 
|  | struct waiter *w, *tmp; | 
|  |  | 
|  | BUG_ON(lock->count < 0); | 
|  | list_for_each_entry_safe(w, tmp, &lock->waiters, list) { | 
|  | if (lock->count >= MAX_HOLDERS) | 
|  | return; | 
|  |  | 
|  | if (w->wants_write) { | 
|  | if (lock->count > 0) | 
|  | return; /* still read locked */ | 
|  |  | 
|  | lock->count = -1; | 
|  | __add_holder(lock, w->task); | 
|  | __wake_waiter(w); | 
|  | return; | 
|  | } | 
|  |  | 
|  | lock->count++; | 
|  | __add_holder(lock, w->task); | 
|  | __wake_waiter(w); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void bl_init(struct block_lock *lock) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | spin_lock_init(&lock->lock); | 
|  | lock->count = 0; | 
|  | INIT_LIST_HEAD(&lock->waiters); | 
|  | for (i = 0; i < MAX_HOLDERS; i++) | 
|  | lock->holders[i] = NULL; | 
|  | } | 
|  |  | 
|  | static int __available_for_read(struct block_lock *lock) | 
|  | { | 
|  | return lock->count >= 0 && | 
|  | lock->count < MAX_HOLDERS && | 
|  | list_empty(&lock->waiters); | 
|  | } | 
|  |  | 
|  | static int bl_down_read(struct block_lock *lock) | 
|  | { | 
|  | int r; | 
|  | struct waiter w; | 
|  |  | 
|  | spin_lock(&lock->lock); | 
|  | r = __check_holder(lock); | 
|  | if (r) { | 
|  | spin_unlock(&lock->lock); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | if (__available_for_read(lock)) { | 
|  | lock->count++; | 
|  | __add_holder(lock, current); | 
|  | spin_unlock(&lock->lock); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | get_task_struct(current); | 
|  |  | 
|  | w.task = current; | 
|  | w.wants_write = 0; | 
|  | list_add_tail(&w.list, &lock->waiters); | 
|  | spin_unlock(&lock->lock); | 
|  |  | 
|  | __wait(&w); | 
|  | put_task_struct(current); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int bl_down_read_nonblock(struct block_lock *lock) | 
|  | { | 
|  | int r; | 
|  |  | 
|  | spin_lock(&lock->lock); | 
|  | r = __check_holder(lock); | 
|  | if (r) | 
|  | goto out; | 
|  |  | 
|  | if (__available_for_read(lock)) { | 
|  | lock->count++; | 
|  | __add_holder(lock, current); | 
|  | r = 0; | 
|  | } else | 
|  | r = -EWOULDBLOCK; | 
|  |  | 
|  | out: | 
|  | spin_unlock(&lock->lock); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | static void bl_up_read(struct block_lock *lock) | 
|  | { | 
|  | spin_lock(&lock->lock); | 
|  | BUG_ON(lock->count <= 0); | 
|  | __del_holder(lock, current); | 
|  | --lock->count; | 
|  | if (!list_empty(&lock->waiters)) | 
|  | __wake_many(lock); | 
|  | spin_unlock(&lock->lock); | 
|  | } | 
|  |  | 
|  | static int bl_down_write(struct block_lock *lock) | 
|  | { | 
|  | int r; | 
|  | struct waiter w; | 
|  |  | 
|  | spin_lock(&lock->lock); | 
|  | r = __check_holder(lock); | 
|  | if (r) { | 
|  | spin_unlock(&lock->lock); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | if (lock->count == 0 && list_empty(&lock->waiters)) { | 
|  | lock->count = -1; | 
|  | __add_holder(lock, current); | 
|  | spin_unlock(&lock->lock); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | get_task_struct(current); | 
|  | w.task = current; | 
|  | w.wants_write = 1; | 
|  |  | 
|  | /* | 
|  | * Writers given priority. We know there's only one mutator in the | 
|  | * system, so ignoring the ordering reversal. | 
|  | */ | 
|  | list_add(&w.list, &lock->waiters); | 
|  | spin_unlock(&lock->lock); | 
|  |  | 
|  | __wait(&w); | 
|  | put_task_struct(current); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void bl_up_write(struct block_lock *lock) | 
|  | { | 
|  | spin_lock(&lock->lock); | 
|  | __del_holder(lock, current); | 
|  | lock->count = 0; | 
|  | if (!list_empty(&lock->waiters)) | 
|  | __wake_many(lock); | 
|  | spin_unlock(&lock->lock); | 
|  | } | 
|  |  | 
|  | static void report_recursive_bug(dm_block_t b, int r) | 
|  | { | 
|  | if (r == -EINVAL) | 
|  | DMERR("recursive acquisition of block %llu requested.", | 
|  | (unsigned long long) b); | 
|  | } | 
|  |  | 
|  | /*----------------------------------------------------------------*/ | 
|  |  | 
|  | /* | 
|  | * Block manager is currently implemented using dm-bufio.  struct | 
|  | * dm_block_manager and struct dm_block map directly onto a couple of | 
|  | * structs in the bufio interface.  I want to retain the freedom to move | 
|  | * away from bufio in the future.  So these structs are just cast within | 
|  | * this .c file, rather than making it through to the public interface. | 
|  | */ | 
|  | static struct dm_buffer *to_buffer(struct dm_block *b) | 
|  | { | 
|  | return (struct dm_buffer *) b; | 
|  | } | 
|  |  | 
|  | dm_block_t dm_block_location(struct dm_block *b) | 
|  | { | 
|  | return dm_bufio_get_block_number(to_buffer(b)); | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_block_location); | 
|  |  | 
|  | void *dm_block_data(struct dm_block *b) | 
|  | { | 
|  | return dm_bufio_get_block_data(to_buffer(b)); | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_block_data); | 
|  |  | 
|  | struct buffer_aux { | 
|  | struct dm_block_validator *validator; | 
|  | struct block_lock lock; | 
|  | int write_locked; | 
|  | }; | 
|  |  | 
|  | static void dm_block_manager_alloc_callback(struct dm_buffer *buf) | 
|  | { | 
|  | struct buffer_aux *aux = dm_bufio_get_aux_data(buf); | 
|  | aux->validator = NULL; | 
|  | bl_init(&aux->lock); | 
|  | } | 
|  |  | 
|  | static void dm_block_manager_write_callback(struct dm_buffer *buf) | 
|  | { | 
|  | struct buffer_aux *aux = dm_bufio_get_aux_data(buf); | 
|  | if (aux->validator) { | 
|  | aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf, | 
|  | dm_bufio_get_block_size(dm_bufio_get_client(buf))); | 
|  | } | 
|  | } | 
|  |  | 
|  | /*---------------------------------------------------------------- | 
|  | * Public interface | 
|  | *--------------------------------------------------------------*/ | 
|  | struct dm_block_manager { | 
|  | struct dm_bufio_client *bufio; | 
|  | bool read_only:1; | 
|  | }; | 
|  |  | 
|  | struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, | 
|  | unsigned block_size, | 
|  | unsigned cache_size, | 
|  | unsigned max_held_per_thread) | 
|  | { | 
|  | int r; | 
|  | struct dm_block_manager *bm; | 
|  |  | 
|  | bm = kmalloc(sizeof(*bm), GFP_KERNEL); | 
|  | if (!bm) { | 
|  | r = -ENOMEM; | 
|  | goto bad; | 
|  | } | 
|  |  | 
|  | bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, | 
|  | sizeof(struct buffer_aux), | 
|  | dm_block_manager_alloc_callback, | 
|  | dm_block_manager_write_callback); | 
|  | if (IS_ERR(bm->bufio)) { | 
|  | r = PTR_ERR(bm->bufio); | 
|  | kfree(bm); | 
|  | goto bad; | 
|  | } | 
|  |  | 
|  | bm->read_only = false; | 
|  |  | 
|  | return bm; | 
|  |  | 
|  | bad: | 
|  | return ERR_PTR(r); | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_block_manager_create); | 
|  |  | 
|  | void dm_block_manager_destroy(struct dm_block_manager *bm) | 
|  | { | 
|  | dm_bufio_client_destroy(bm->bufio); | 
|  | kfree(bm); | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_block_manager_destroy); | 
|  |  | 
|  | unsigned dm_bm_block_size(struct dm_block_manager *bm) | 
|  | { | 
|  | return dm_bufio_get_block_size(bm->bufio); | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_bm_block_size); | 
|  |  | 
|  | dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) | 
|  | { | 
|  | return dm_bufio_get_device_size(bm->bufio); | 
|  | } | 
|  |  | 
|  | static int dm_bm_validate_buffer(struct dm_block_manager *bm, | 
|  | struct dm_buffer *buf, | 
|  | struct buffer_aux *aux, | 
|  | struct dm_block_validator *v) | 
|  | { | 
|  | if (unlikely(!aux->validator)) { | 
|  | int r; | 
|  | if (!v) | 
|  | return 0; | 
|  | r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); | 
|  | if (unlikely(r)) { | 
|  | DMERR_LIMIT("%s validator check failed for block %llu", v->name, | 
|  | (unsigned long long) dm_bufio_get_block_number(buf)); | 
|  | return r; | 
|  | } | 
|  | aux->validator = v; | 
|  | } else { | 
|  | if (unlikely(aux->validator != v)) { | 
|  | DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", | 
|  | aux->validator->name, v ? v->name : "NULL", | 
|  | (unsigned long long) dm_bufio_get_block_number(buf)); | 
|  | return -EINVAL; | 
|  | } | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  | int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, | 
|  | struct dm_block_validator *v, | 
|  | struct dm_block **result) | 
|  | { | 
|  | struct buffer_aux *aux; | 
|  | void *p; | 
|  | int r; | 
|  |  | 
|  | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); | 
|  | if (unlikely(IS_ERR(p))) | 
|  | return PTR_ERR(p); | 
|  |  | 
|  | aux = dm_bufio_get_aux_data(to_buffer(*result)); | 
|  | r = bl_down_read(&aux->lock); | 
|  | if (unlikely(r)) { | 
|  | dm_bufio_release(to_buffer(*result)); | 
|  | report_recursive_bug(b, r); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | aux->write_locked = 0; | 
|  |  | 
|  | r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); | 
|  | if (unlikely(r)) { | 
|  | bl_up_read(&aux->lock); | 
|  | dm_bufio_release(to_buffer(*result)); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_bm_read_lock); | 
|  |  | 
|  | int dm_bm_write_lock(struct dm_block_manager *bm, | 
|  | dm_block_t b, struct dm_block_validator *v, | 
|  | struct dm_block **result) | 
|  | { | 
|  | struct buffer_aux *aux; | 
|  | void *p; | 
|  | int r; | 
|  |  | 
|  | if (bm->read_only) | 
|  | return -EPERM; | 
|  |  | 
|  | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); | 
|  | if (unlikely(IS_ERR(p))) | 
|  | return PTR_ERR(p); | 
|  |  | 
|  | aux = dm_bufio_get_aux_data(to_buffer(*result)); | 
|  | r = bl_down_write(&aux->lock); | 
|  | if (r) { | 
|  | dm_bufio_release(to_buffer(*result)); | 
|  | report_recursive_bug(b, r); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | aux->write_locked = 1; | 
|  |  | 
|  | r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); | 
|  | if (unlikely(r)) { | 
|  | bl_up_write(&aux->lock); | 
|  | dm_bufio_release(to_buffer(*result)); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_bm_write_lock); | 
|  |  | 
|  | int dm_bm_read_try_lock(struct dm_block_manager *bm, | 
|  | dm_block_t b, struct dm_block_validator *v, | 
|  | struct dm_block **result) | 
|  | { | 
|  | struct buffer_aux *aux; | 
|  | void *p; | 
|  | int r; | 
|  |  | 
|  | p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); | 
|  | if (unlikely(IS_ERR(p))) | 
|  | return PTR_ERR(p); | 
|  | if (unlikely(!p)) | 
|  | return -EWOULDBLOCK; | 
|  |  | 
|  | aux = dm_bufio_get_aux_data(to_buffer(*result)); | 
|  | r = bl_down_read_nonblock(&aux->lock); | 
|  | if (r < 0) { | 
|  | dm_bufio_release(to_buffer(*result)); | 
|  | report_recursive_bug(b, r); | 
|  | return r; | 
|  | } | 
|  | aux->write_locked = 0; | 
|  |  | 
|  | r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v); | 
|  | if (unlikely(r)) { | 
|  | bl_up_read(&aux->lock); | 
|  | dm_bufio_release(to_buffer(*result)); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int dm_bm_write_lock_zero(struct dm_block_manager *bm, | 
|  | dm_block_t b, struct dm_block_validator *v, | 
|  | struct dm_block **result) | 
|  | { | 
|  | int r; | 
|  | struct buffer_aux *aux; | 
|  | void *p; | 
|  |  | 
|  | if (bm->read_only) | 
|  | return -EPERM; | 
|  |  | 
|  | p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); | 
|  | if (unlikely(IS_ERR(p))) | 
|  | return PTR_ERR(p); | 
|  |  | 
|  | memset(p, 0, dm_bm_block_size(bm)); | 
|  |  | 
|  | aux = dm_bufio_get_aux_data(to_buffer(*result)); | 
|  | r = bl_down_write(&aux->lock); | 
|  | if (r) { | 
|  | dm_bufio_release(to_buffer(*result)); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | aux->write_locked = 1; | 
|  | aux->validator = v; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); | 
|  |  | 
|  | int dm_bm_unlock(struct dm_block *b) | 
|  | { | 
|  | struct buffer_aux *aux; | 
|  | aux = dm_bufio_get_aux_data(to_buffer(b)); | 
|  |  | 
|  | if (aux->write_locked) { | 
|  | dm_bufio_mark_buffer_dirty(to_buffer(b)); | 
|  | bl_up_write(&aux->lock); | 
|  | } else | 
|  | bl_up_read(&aux->lock); | 
|  |  | 
|  | dm_bufio_release(to_buffer(b)); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_bm_unlock); | 
|  |  | 
|  | int dm_bm_flush_and_unlock(struct dm_block_manager *bm, | 
|  | struct dm_block *superblock) | 
|  | { | 
|  | int r; | 
|  |  | 
|  | if (bm->read_only) | 
|  | return -EPERM; | 
|  |  | 
|  | r = dm_bufio_write_dirty_buffers(bm->bufio); | 
|  | if (unlikely(r)) { | 
|  | dm_bm_unlock(superblock); | 
|  | return r; | 
|  | } | 
|  |  | 
|  | dm_bm_unlock(superblock); | 
|  |  | 
|  | return dm_bufio_write_dirty_buffers(bm->bufio); | 
|  | } | 
|  |  | 
|  | void dm_bm_set_read_only(struct dm_block_manager *bm) | 
|  | { | 
|  | bm->read_only = true; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_bm_set_read_only); | 
|  |  | 
|  | u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) | 
|  | { | 
|  | return crc32c(~(u32) 0, data, len) ^ init_xor; | 
|  | } | 
|  | EXPORT_SYMBOL_GPL(dm_bm_checksum); | 
|  |  | 
|  | /*----------------------------------------------------------------*/ | 
|  |  | 
|  | MODULE_LICENSE("GPL"); | 
|  | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | 
|  | MODULE_DESCRIPTION("Immutable metadata library for dm"); | 
|  |  | 
|  | /*----------------------------------------------------------------*/ |