| // SPDX-License-Identifier: GPL-2.0 |
| |
| #include <linux/export.h> |
| #include <linux/log2.h> |
| #include <linux/percpu.h> |
| #include <linux/preempt.h> |
| #include <linux/rcupdate.h> |
| #include <linux/sched.h> |
| #include <linux/sched/clock.h> |
| #include <linux/sched/rt.h> |
| #include <linux/sched/task.h> |
| #include <linux/slab.h> |
| |
| #include <trace/events/lock.h> |
| |
| #include "six.h" |
| |
| #ifdef DEBUG |
| #define EBUG_ON(cond) BUG_ON(cond) |
| #else |
| #define EBUG_ON(cond) do {} while (0) |
| #endif |
| |
| #define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip) |
| #define six_release(l, ip) lock_release(l, ip) |
| |
| static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type); |
| |
| #define SIX_LOCK_HELD_read_OFFSET 0 |
| #define SIX_LOCK_HELD_read ~(~0U << 26) |
| #define SIX_LOCK_HELD_intent (1U << 26) |
| #define SIX_LOCK_HELD_write (1U << 27) |
| #define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read)) |
| #define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write)) |
| #define SIX_LOCK_NOSPIN (1U << 31) |
| |
| struct six_lock_vals { |
| /* Value we add to the lock in order to take the lock: */ |
| u32 lock_val; |
| |
| /* If the lock has this value (used as a mask), taking the lock fails: */ |
| u32 lock_fail; |
| |
| /* Mask that indicates lock is held for this type: */ |
| u32 held_mask; |
| |
| /* Waitlist we wakeup when releasing the lock: */ |
| enum six_lock_type unlock_wakeup; |
| }; |
| |
| static const struct six_lock_vals l[] = { |
| [SIX_LOCK_read] = { |
| .lock_val = 1U << SIX_LOCK_HELD_read_OFFSET, |
| .lock_fail = SIX_LOCK_HELD_write, |
| .held_mask = SIX_LOCK_HELD_read, |
| .unlock_wakeup = SIX_LOCK_write, |
| }, |
| [SIX_LOCK_intent] = { |
| .lock_val = SIX_LOCK_HELD_intent, |
| .lock_fail = SIX_LOCK_HELD_intent, |
| .held_mask = SIX_LOCK_HELD_intent, |
| .unlock_wakeup = SIX_LOCK_intent, |
| }, |
| [SIX_LOCK_write] = { |
| .lock_val = SIX_LOCK_HELD_write, |
| .lock_fail = SIX_LOCK_HELD_read, |
| .held_mask = SIX_LOCK_HELD_write, |
| .unlock_wakeup = SIX_LOCK_read, |
| }, |
| }; |
| |
| static inline void six_set_bitmask(struct six_lock *lock, u32 mask) |
| { |
| if ((atomic_read(&lock->state) & mask) != mask) |
| atomic_or(mask, &lock->state); |
| } |
| |
| static inline void six_clear_bitmask(struct six_lock *lock, u32 mask) |
| { |
| if (atomic_read(&lock->state) & mask) |
| atomic_and(~mask, &lock->state); |
| } |
| |
| static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type, |
| u32 old, struct task_struct *owner) |
| { |
| if (type != SIX_LOCK_intent) |
| return; |
| |
| if (!(old & SIX_LOCK_HELD_intent)) { |
| EBUG_ON(lock->owner); |
| lock->owner = owner; |
| } else { |
| EBUG_ON(lock->owner != current); |
| } |
| } |
| |
| static inline unsigned pcpu_read_count(struct six_lock *lock) |
| { |
| unsigned read_count = 0; |
| int cpu; |
| |
| for_each_possible_cpu(cpu) |
| read_count += *per_cpu_ptr(lock->readers, cpu); |
| return read_count; |
| } |
| |
| /* |
| * __do_six_trylock() - main trylock routine |
| * |
| * Returns 1 on success, 0 on failure |
| * |
| * In percpu reader mode, a failed trylock may cause a spurious trylock failure |
| * for anoter thread taking the competing lock type, and we may havve to do a |
| * wakeup: when a wakeup is required, we return -1 - wakeup_type. |
| */ |
| static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, |
| struct task_struct *task, bool try) |
| { |
| int ret; |
| u32 old; |
| |
| EBUG_ON(type == SIX_LOCK_write && lock->owner != task); |
| EBUG_ON(type == SIX_LOCK_write && |
| (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write))); |
| |
| /* |
| * Percpu reader mode: |
| * |
| * The basic idea behind this algorithm is that you can implement a lock |
| * between two threads without any atomics, just memory barriers: |
| * |
| * For two threads you'll need two variables, one variable for "thread a |
| * has the lock" and another for "thread b has the lock". |
| * |
| * To take the lock, a thread sets its variable indicating that it holds |
| * the lock, then issues a full memory barrier, then reads from the |
| * other thread's variable to check if the other thread thinks it has |
| * the lock. If we raced, we backoff and retry/sleep. |
| * |
| * Failure to take the lock may cause a spurious trylock failure in |
| * another thread, because we temporarily set the lock to indicate that |
| * we held it. This would be a problem for a thread in six_lock(), when |
| * they are calling trylock after adding themself to the waitlist and |
| * prior to sleeping. |
| * |
| * Therefore, if we fail to get the lock, and there were waiters of the |
| * type we conflict with, we will have to issue a wakeup. |
| * |
| * Since we may be called under wait_lock (and by the wakeup code |
| * itself), we return that the wakeup has to be done instead of doing it |
| * here. |
| */ |
| if (type == SIX_LOCK_read && lock->readers) { |
| preempt_disable(); |
| this_cpu_inc(*lock->readers); /* signal that we own lock */ |
| |
| smp_mb(); |
| |
| old = atomic_read(&lock->state); |
| ret = !(old & l[type].lock_fail); |
| |
| this_cpu_sub(*lock->readers, !ret); |
| preempt_enable(); |
| |
| if (!ret) { |
| smp_mb(); |
| if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write) |
| ret = -1 - SIX_LOCK_write; |
| } |
| } else if (type == SIX_LOCK_write && lock->readers) { |
| if (try) { |
| atomic_add(SIX_LOCK_HELD_write, &lock->state); |
| smp_mb__after_atomic(); |
| } |
| |
| ret = !pcpu_read_count(lock); |
| |
| if (try && !ret) { |
| old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state); |
| if (old & SIX_LOCK_WAITING_read) |
| ret = -1 - SIX_LOCK_read; |
| } |
| } else { |
| old = atomic_read(&lock->state); |
| do { |
| ret = !(old & l[type].lock_fail); |
| if (!ret || (type == SIX_LOCK_write && !try)) { |
| smp_mb(); |
| break; |
| } |
| } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val)); |
| |
| EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask)); |
| } |
| |
| if (ret > 0) |
| six_set_owner(lock, type, old, task); |
| |
| EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 && |
| (atomic_read(&lock->state) & SIX_LOCK_HELD_write)); |
| |
| return ret; |
| } |
| |
| static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type) |
| { |
| struct six_lock_waiter *w, *next; |
| struct task_struct *task; |
| bool saw_one; |
| int ret; |
| again: |
| ret = 0; |
| saw_one = false; |
| raw_spin_lock(&lock->wait_lock); |
| |
| list_for_each_entry_safe(w, next, &lock->wait_list, list) { |
| if (w->lock_want != lock_type) |
| continue; |
| |
| if (saw_one && lock_type != SIX_LOCK_read) |
| goto unlock; |
| saw_one = true; |
| |
| ret = __do_six_trylock(lock, lock_type, w->task, false); |
| if (ret <= 0) |
| goto unlock; |
| |
| /* |
| * Similar to percpu_rwsem_wake_function(), we need to guard |
| * against the wakee noticing w->lock_acquired, returning, and |
| * then exiting before we do the wakeup: |
| */ |
| task = get_task_struct(w->task); |
| __list_del(w->list.prev, w->list.next); |
| /* |
| * The release barrier here ensures the ordering of the |
| * __list_del before setting w->lock_acquired; @w is on the |
| * stack of the thread doing the waiting and will be reused |
| * after it sees w->lock_acquired with no other locking: |
| * pairs with smp_load_acquire() in six_lock_slowpath() |
| */ |
| smp_store_release(&w->lock_acquired, true); |
| wake_up_process(task); |
| put_task_struct(task); |
| } |
| |
| six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type); |
| unlock: |
| raw_spin_unlock(&lock->wait_lock); |
| |
| if (ret < 0) { |
| lock_type = -ret - 1; |
| goto again; |
| } |
| } |
| |
| __always_inline |
| static void six_lock_wakeup(struct six_lock *lock, u32 state, |
| enum six_lock_type lock_type) |
| { |
| if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read)) |
| return; |
| |
| if (!(state & (SIX_LOCK_WAITING_read << lock_type))) |
| return; |
| |
| __six_lock_wakeup(lock, lock_type); |
| } |
| |
| __always_inline |
| static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try) |
| { |
| int ret; |
| |
| ret = __do_six_trylock(lock, type, current, try); |
| if (ret < 0) |
| __six_lock_wakeup(lock, -ret - 1); |
| |
| return ret > 0; |
| } |
| |
| /** |
| * six_trylock_ip - attempt to take a six lock without blocking |
| * @lock: lock to take |
| * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
| * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
| * |
| * Return: true on success, false on failure. |
| */ |
| bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) |
| { |
| if (!do_six_trylock(lock, type, true)) |
| return false; |
| |
| if (type != SIX_LOCK_write) |
| six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip); |
| return true; |
| } |
| EXPORT_SYMBOL_GPL(six_trylock_ip); |
| |
| /** |
| * six_relock_ip - attempt to re-take a lock that was held previously |
| * @lock: lock to take |
| * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
| * @seq: lock sequence number obtained from six_lock_seq() while lock was |
| * held previously |
| * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
| * |
| * Return: true on success, false on failure. |
| */ |
| bool six_relock_ip(struct six_lock *lock, enum six_lock_type type, |
| unsigned seq, unsigned long ip) |
| { |
| if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip)) |
| return false; |
| |
| if (six_lock_seq(lock) != seq) { |
| six_unlock_ip(lock, type, ip); |
| return false; |
| } |
| |
| return true; |
| } |
| EXPORT_SYMBOL_GPL(six_relock_ip); |
| |
| #ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER |
| |
| static inline bool six_can_spin_on_owner(struct six_lock *lock) |
| { |
| struct task_struct *owner; |
| bool ret; |
| |
| if (need_resched()) |
| return false; |
| |
| rcu_read_lock(); |
| owner = READ_ONCE(lock->owner); |
| ret = !owner || owner_on_cpu(owner); |
| rcu_read_unlock(); |
| |
| return ret; |
| } |
| |
| static inline bool six_spin_on_owner(struct six_lock *lock, |
| struct task_struct *owner, |
| u64 end_time) |
| { |
| bool ret = true; |
| unsigned loop = 0; |
| |
| rcu_read_lock(); |
| while (lock->owner == owner) { |
| /* |
| * Ensure we emit the owner->on_cpu, dereference _after_ |
| * checking lock->owner still matches owner. If that fails, |
| * owner might point to freed memory. If it still matches, |
| * the rcu_read_lock() ensures the memory stays valid. |
| */ |
| barrier(); |
| |
| if (!owner_on_cpu(owner) || need_resched()) { |
| ret = false; |
| break; |
| } |
| |
| if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { |
| six_set_bitmask(lock, SIX_LOCK_NOSPIN); |
| ret = false; |
| break; |
| } |
| |
| cpu_relax(); |
| } |
| rcu_read_unlock(); |
| |
| return ret; |
| } |
| |
| static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) |
| { |
| struct task_struct *task = current; |
| u64 end_time; |
| |
| if (type == SIX_LOCK_write) |
| return false; |
| |
| preempt_disable(); |
| if (!six_can_spin_on_owner(lock)) |
| goto fail; |
| |
| if (!osq_lock(&lock->osq)) |
| goto fail; |
| |
| end_time = sched_clock() + 10 * NSEC_PER_USEC; |
| |
| while (1) { |
| struct task_struct *owner; |
| |
| /* |
| * If there's an owner, wait for it to either |
| * release the lock or go to sleep. |
| */ |
| owner = READ_ONCE(lock->owner); |
| if (owner && !six_spin_on_owner(lock, owner, end_time)) |
| break; |
| |
| if (do_six_trylock(lock, type, false)) { |
| osq_unlock(&lock->osq); |
| preempt_enable(); |
| return true; |
| } |
| |
| /* |
| * When there's no owner, we might have preempted between the |
| * owner acquiring the lock and setting the owner field. If |
| * we're an RT task that will live-lock because we won't let |
| * the owner complete. |
| */ |
| if (!owner && (need_resched() || rt_task(task))) |
| break; |
| |
| /* |
| * The cpu_relax() call is a compiler barrier which forces |
| * everything in this loop to be re-loaded. We don't need |
| * memory barriers as we'll eventually observe the right |
| * values at the cost of a few extra spins. |
| */ |
| cpu_relax(); |
| } |
| |
| osq_unlock(&lock->osq); |
| fail: |
| preempt_enable(); |
| |
| /* |
| * If we fell out of the spin path because of need_resched(), |
| * reschedule now, before we try-lock again. This avoids getting |
| * scheduled out right after we obtained the lock. |
| */ |
| if (need_resched()) |
| schedule(); |
| |
| return false; |
| } |
| |
| #else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */ |
| |
| static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type) |
| { |
| return false; |
| } |
| |
| #endif |
| |
| noinline |
| static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type, |
| struct six_lock_waiter *wait, |
| six_lock_should_sleep_fn should_sleep_fn, void *p, |
| unsigned long ip) |
| { |
| int ret = 0; |
| |
| if (type == SIX_LOCK_write) { |
| EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write); |
| atomic_add(SIX_LOCK_HELD_write, &lock->state); |
| smp_mb__after_atomic(); |
| } |
| |
| trace_contention_begin(lock, 0); |
| lock_contended(&lock->dep_map, ip); |
| |
| if (six_optimistic_spin(lock, type)) |
| goto out; |
| |
| wait->task = current; |
| wait->lock_want = type; |
| wait->lock_acquired = false; |
| |
| raw_spin_lock(&lock->wait_lock); |
| six_set_bitmask(lock, SIX_LOCK_WAITING_read << type); |
| /* |
| * Retry taking the lock after taking waitlist lock, in case we raced |
| * with an unlock: |
| */ |
| ret = __do_six_trylock(lock, type, current, false); |
| if (ret <= 0) { |
| wait->start_time = local_clock(); |
| |
| if (!list_empty(&lock->wait_list)) { |
| struct six_lock_waiter *last = |
| list_last_entry(&lock->wait_list, |
| struct six_lock_waiter, list); |
| |
| if (time_before_eq64(wait->start_time, last->start_time)) |
| wait->start_time = last->start_time + 1; |
| } |
| |
| list_add_tail(&wait->list, &lock->wait_list); |
| } |
| raw_spin_unlock(&lock->wait_lock); |
| |
| if (unlikely(ret > 0)) { |
| ret = 0; |
| goto out; |
| } |
| |
| if (unlikely(ret < 0)) { |
| __six_lock_wakeup(lock, -ret - 1); |
| ret = 0; |
| } |
| |
| while (1) { |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| |
| /* |
| * Ensures that writes to the waitlist entry happen after we see |
| * wait->lock_acquired: pairs with the smp_store_release in |
| * __six_lock_wakeup |
| */ |
| if (smp_load_acquire(&wait->lock_acquired)) |
| break; |
| |
| ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0; |
| if (unlikely(ret)) { |
| bool acquired; |
| |
| /* |
| * If should_sleep_fn() returns an error, we are |
| * required to return that error even if we already |
| * acquired the lock - should_sleep_fn() might have |
| * modified external state (e.g. when the deadlock cycle |
| * detector in bcachefs issued a transaction restart) |
| */ |
| raw_spin_lock(&lock->wait_lock); |
| acquired = wait->lock_acquired; |
| if (!acquired) |
| list_del(&wait->list); |
| raw_spin_unlock(&lock->wait_lock); |
| |
| if (unlikely(acquired)) |
| do_six_unlock_type(lock, type); |
| break; |
| } |
| |
| schedule(); |
| } |
| |
| __set_current_state(TASK_RUNNING); |
| out: |
| if (ret && type == SIX_LOCK_write) { |
| six_clear_bitmask(lock, SIX_LOCK_HELD_write); |
| six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read); |
| } |
| trace_contention_end(lock, 0); |
| |
| return ret; |
| } |
| |
| /** |
| * six_lock_ip_waiter - take a lock, with full waitlist interface |
| * @lock: lock to take |
| * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
| * @wait: pointer to wait object, which will be added to lock's waitlist |
| * @should_sleep_fn: callback run after adding to waitlist, immediately prior |
| * to scheduling |
| * @p: passed through to @should_sleep_fn |
| * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
| * |
| * This is the most general six_lock() variant, with parameters to support full |
| * cycle detection for deadlock avoidance. |
| * |
| * The code calling this function must implement tracking of held locks, and the |
| * @wait object should be embedded into the struct that tracks held locks - |
| * which must also be accessible in a thread-safe way. |
| * |
| * @should_sleep_fn should invoke the cycle detector; it should walk each |
| * lock's waiters, and for each waiter recursively walk their held locks. |
| * |
| * When this function must block, @wait will be added to @lock's waitlist before |
| * calling trylock, and before calling @should_sleep_fn, and @wait will not be |
| * removed from the lock waitlist until the lock has been successfully acquired, |
| * or we abort. |
| * |
| * @wait.start_time will be monotonically increasing for any given waitlist, and |
| * thus may be used as a loop cursor. |
| * |
| * Return: 0 on success, or the return code from @should_sleep_fn on failure. |
| */ |
| int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type, |
| struct six_lock_waiter *wait, |
| six_lock_should_sleep_fn should_sleep_fn, void *p, |
| unsigned long ip) |
| { |
| int ret; |
| |
| wait->start_time = 0; |
| |
| if (type != SIX_LOCK_write) |
| six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip); |
| |
| ret = do_six_trylock(lock, type, true) ? 0 |
| : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip); |
| |
| if (ret && type != SIX_LOCK_write) |
| six_release(&lock->dep_map, ip); |
| if (!ret) |
| lock_acquired(&lock->dep_map, ip); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_GPL(six_lock_ip_waiter); |
| |
| __always_inline |
| static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type) |
| { |
| u32 state; |
| |
| if (type == SIX_LOCK_intent) |
| lock->owner = NULL; |
| |
| if (type == SIX_LOCK_read && |
| lock->readers) { |
| smp_mb(); /* unlock barrier */ |
| this_cpu_dec(*lock->readers); |
| smp_mb(); /* between unlocking and checking for waiters */ |
| state = atomic_read(&lock->state); |
| } else { |
| u32 v = l[type].lock_val; |
| |
| if (type != SIX_LOCK_read) |
| v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN; |
| |
| EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask)); |
| state = atomic_sub_return_release(v, &lock->state); |
| } |
| |
| six_lock_wakeup(lock, state, l[type].unlock_wakeup); |
| } |
| |
| /** |
| * six_unlock_ip - drop a six lock |
| * @lock: lock to unlock |
| * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write |
| * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_ |
| * |
| * When a lock is held multiple times (because six_lock_incement()) was used), |
| * this decrements the 'lock held' counter by one. |
| * |
| * For example: |
| * six_lock_read(&foo->lock); read count 1 |
| * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2 |
| * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1 |
| * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0 |
| */ |
| void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip) |
| { |
| EBUG_ON(type == SIX_LOCK_write && |
| !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); |
| EBUG_ON((type == SIX_LOCK_write || |
| type == SIX_LOCK_intent) && |
| lock->owner != current); |
| |
| if (type != SIX_LOCK_write) |
| six_release(&lock->dep_map, ip); |
| else |
| lock->seq++; |
| |
| if (type == SIX_LOCK_intent && |
| lock->intent_lock_recurse) { |
| --lock->intent_lock_recurse; |
| return; |
| } |
| |
| do_six_unlock_type(lock, type); |
| } |
| EXPORT_SYMBOL_GPL(six_unlock_ip); |
| |
| /** |
| * six_lock_downgrade - convert an intent lock to a read lock |
| * @lock: lock to dowgrade |
| * |
| * @lock will have read count incremented and intent count decremented |
| */ |
| void six_lock_downgrade(struct six_lock *lock) |
| { |
| six_lock_increment(lock, SIX_LOCK_read); |
| six_unlock_intent(lock); |
| } |
| EXPORT_SYMBOL_GPL(six_lock_downgrade); |
| |
| /** |
| * six_lock_tryupgrade - attempt to convert read lock to an intent lock |
| * @lock: lock to upgrade |
| * |
| * On success, @lock will have intent count incremented and read count |
| * decremented |
| * |
| * Return: true on success, false on failure |
| */ |
| bool six_lock_tryupgrade(struct six_lock *lock) |
| { |
| u32 old = atomic_read(&lock->state), new; |
| |
| do { |
| new = old; |
| |
| if (new & SIX_LOCK_HELD_intent) |
| return false; |
| |
| if (!lock->readers) { |
| EBUG_ON(!(new & SIX_LOCK_HELD_read)); |
| new -= l[SIX_LOCK_read].lock_val; |
| } |
| |
| new |= SIX_LOCK_HELD_intent; |
| } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new)); |
| |
| if (lock->readers) |
| this_cpu_dec(*lock->readers); |
| |
| six_set_owner(lock, SIX_LOCK_intent, old, current); |
| |
| return true; |
| } |
| EXPORT_SYMBOL_GPL(six_lock_tryupgrade); |
| |
| /** |
| * six_trylock_convert - attempt to convert a held lock from one type to another |
| * @lock: lock to upgrade |
| * @from: SIX_LOCK_read or SIX_LOCK_intent |
| * @to: SIX_LOCK_read or SIX_LOCK_intent |
| * |
| * On success, @lock will have intent count incremented and read count |
| * decremented |
| * |
| * Return: true on success, false on failure |
| */ |
| bool six_trylock_convert(struct six_lock *lock, |
| enum six_lock_type from, |
| enum six_lock_type to) |
| { |
| EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write); |
| |
| if (to == from) |
| return true; |
| |
| if (to == SIX_LOCK_read) { |
| six_lock_downgrade(lock); |
| return true; |
| } else { |
| return six_lock_tryupgrade(lock); |
| } |
| } |
| EXPORT_SYMBOL_GPL(six_trylock_convert); |
| |
| /** |
| * six_lock_increment - increase held lock count on a lock that is already held |
| * @lock: lock to increment |
| * @type: SIX_LOCK_read or SIX_LOCK_intent |
| * |
| * @lock must already be held, with a lock type that is greater than or equal to |
| * @type |
| * |
| * A corresponding six_unlock_type() call will be required for @lock to be fully |
| * unlocked. |
| */ |
| void six_lock_increment(struct six_lock *lock, enum six_lock_type type) |
| { |
| six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_); |
| |
| /* XXX: assert already locked, and that we don't overflow: */ |
| |
| switch (type) { |
| case SIX_LOCK_read: |
| if (lock->readers) { |
| this_cpu_inc(*lock->readers); |
| } else { |
| EBUG_ON(!(atomic_read(&lock->state) & |
| (SIX_LOCK_HELD_read| |
| SIX_LOCK_HELD_intent))); |
| atomic_add(l[type].lock_val, &lock->state); |
| } |
| break; |
| case SIX_LOCK_intent: |
| EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent)); |
| lock->intent_lock_recurse++; |
| break; |
| case SIX_LOCK_write: |
| BUG(); |
| break; |
| } |
| } |
| EXPORT_SYMBOL_GPL(six_lock_increment); |
| |
| /** |
| * six_lock_wakeup_all - wake up all waiters on @lock |
| * @lock: lock to wake up waiters for |
| * |
| * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then |
| * abort the lock operation. |
| * |
| * This function is never needed in a bug-free program; it's only useful in |
| * debug code, e.g. to determine if a cycle detector is at fault. |
| */ |
| void six_lock_wakeup_all(struct six_lock *lock) |
| { |
| u32 state = atomic_read(&lock->state); |
| struct six_lock_waiter *w; |
| |
| six_lock_wakeup(lock, state, SIX_LOCK_read); |
| six_lock_wakeup(lock, state, SIX_LOCK_intent); |
| six_lock_wakeup(lock, state, SIX_LOCK_write); |
| |
| raw_spin_lock(&lock->wait_lock); |
| list_for_each_entry(w, &lock->wait_list, list) |
| wake_up_process(w->task); |
| raw_spin_unlock(&lock->wait_lock); |
| } |
| EXPORT_SYMBOL_GPL(six_lock_wakeup_all); |
| |
| /** |
| * six_lock_counts - return held lock counts, for each lock type |
| * @lock: lock to return counters for |
| * |
| * Return: the number of times a lock is held for read, intent and write. |
| */ |
| struct six_lock_count six_lock_counts(struct six_lock *lock) |
| { |
| struct six_lock_count ret; |
| |
| ret.n[SIX_LOCK_read] = !lock->readers |
| ? atomic_read(&lock->state) & SIX_LOCK_HELD_read |
| : pcpu_read_count(lock); |
| ret.n[SIX_LOCK_intent] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) + |
| lock->intent_lock_recurse; |
| ret.n[SIX_LOCK_write] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write); |
| |
| return ret; |
| } |
| EXPORT_SYMBOL_GPL(six_lock_counts); |
| |
| /** |
| * six_lock_readers_add - directly manipulate reader count of a lock |
| * @lock: lock to add/subtract readers for |
| * @nr: reader count to add/subtract |
| * |
| * When an upper layer is implementing lock reentrency, we may have both read |
| * and intent locks on the same lock. |
| * |
| * When we need to take a write lock, the read locks will cause self-deadlock, |
| * because six locks themselves do not track which read locks are held by the |
| * current thread and which are held by a different thread - it does no |
| * per-thread tracking of held locks. |
| * |
| * The upper layer that is tracking held locks may however, if trylock() has |
| * failed, count up its own read locks, subtract them, take the write lock, and |
| * then re-add them. |
| * |
| * As in any other situation when taking a write lock, @lock must be held for |
| * intent one (or more) times, so @lock will never be left unlocked. |
| */ |
| void six_lock_readers_add(struct six_lock *lock, int nr) |
| { |
| if (lock->readers) { |
| this_cpu_add(*lock->readers, nr); |
| } else { |
| EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0); |
| /* reader count starts at bit 0 */ |
| atomic_add(nr, &lock->state); |
| } |
| } |
| EXPORT_SYMBOL_GPL(six_lock_readers_add); |
| |
| /** |
| * six_lock_exit - release resources held by a lock prior to freeing |
| * @lock: lock to exit |
| * |
| * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is |
| * required to free the percpu read counts. |
| */ |
| void six_lock_exit(struct six_lock *lock) |
| { |
| WARN_ON(lock->readers && pcpu_read_count(lock)); |
| WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read); |
| |
| free_percpu(lock->readers); |
| lock->readers = NULL; |
| } |
| EXPORT_SYMBOL_GPL(six_lock_exit); |
| |
| void __six_lock_init(struct six_lock *lock, const char *name, |
| struct lock_class_key *key, enum six_lock_init_flags flags) |
| { |
| atomic_set(&lock->state, 0); |
| raw_spin_lock_init(&lock->wait_lock); |
| INIT_LIST_HEAD(&lock->wait_list); |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| debug_check_no_locks_freed((void *) lock, sizeof(*lock)); |
| lockdep_init_map(&lock->dep_map, name, key, 0); |
| #endif |
| |
| /* |
| * Don't assume that we have real percpu variables available in |
| * userspace: |
| */ |
| #ifdef __KERNEL__ |
| if (flags & SIX_LOCK_INIT_PCPU) { |
| /* |
| * We don't return an error here on memory allocation failure |
| * since percpu is an optimization, and locks will work with the |
| * same semantics in non-percpu mode: callers can check for |
| * failure if they wish by checking lock->readers, but generally |
| * will not want to treat it as an error. |
| */ |
| lock->readers = alloc_percpu(unsigned); |
| } |
| #endif |
| } |
| EXPORT_SYMBOL_GPL(__six_lock_init); |