| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * This file contains functions which emulate a local clock-event |
| * device via a broadcast event source. |
| * |
| * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
| * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
| * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner |
| */ |
| #include <linux/cpu.h> |
| #include <linux/err.h> |
| #include <linux/hrtimer.h> |
| #include <linux/interrupt.h> |
| #include <linux/percpu.h> |
| #include <linux/profile.h> |
| #include <linux/sched.h> |
| #include <linux/smp.h> |
| #include <linux/module.h> |
| |
| #include "tick-internal.h" |
| |
| /* |
| * Broadcast support for broken x86 hardware, where the local apic |
| * timer stops in C3 state. |
| */ |
| |
| static struct tick_device tick_broadcast_device; |
| static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; |
| static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; |
| static cpumask_var_t tmpmask __cpumask_var_read_mostly; |
| static int tick_broadcast_forced; |
| |
| static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); |
| |
| #ifdef CONFIG_TICK_ONESHOT |
| static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); |
| |
| static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic); |
| static void tick_broadcast_clear_oneshot(int cpu); |
| static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); |
| # ifdef CONFIG_HOTPLUG_CPU |
| static void tick_broadcast_oneshot_offline(unsigned int cpu); |
| # endif |
| #else |
| static inline void |
| tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); } |
| static inline void tick_broadcast_clear_oneshot(int cpu) { } |
| static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } |
| # ifdef CONFIG_HOTPLUG_CPU |
| static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } |
| # endif |
| #endif |
| |
| /* |
| * Debugging: see timer_list.c |
| */ |
| struct tick_device *tick_get_broadcast_device(void) |
| { |
| return &tick_broadcast_device; |
| } |
| |
| struct cpumask *tick_get_broadcast_mask(void) |
| { |
| return tick_broadcast_mask; |
| } |
| |
| static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); |
| |
| const struct clock_event_device *tick_get_wakeup_device(int cpu) |
| { |
| return tick_get_oneshot_wakeup_device(cpu); |
| } |
| |
| /* |
| * Start the device in periodic mode |
| */ |
| static void tick_broadcast_start_periodic(struct clock_event_device *bc) |
| { |
| if (bc) |
| tick_setup_periodic(bc, 1); |
| } |
| |
| /* |
| * Check, if the device can be utilized as broadcast device: |
| */ |
| static bool tick_check_broadcast_device(struct clock_event_device *curdev, |
| struct clock_event_device *newdev) |
| { |
| if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || |
| (newdev->features & CLOCK_EVT_FEAT_PERCPU) || |
| (newdev->features & CLOCK_EVT_FEAT_C3STOP)) |
| return false; |
| |
| if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && |
| !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) |
| return false; |
| |
| return !curdev || newdev->rating > curdev->rating; |
| } |
| |
| #ifdef CONFIG_TICK_ONESHOT |
| static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) |
| { |
| return per_cpu(tick_oneshot_wakeup_device, cpu); |
| } |
| |
| static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) |
| { |
| /* |
| * If we woke up early and the tick was reprogrammed in the |
| * meantime then this may be spurious but harmless. |
| */ |
| tick_receive_broadcast(); |
| } |
| |
| static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, |
| int cpu) |
| { |
| struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); |
| |
| if (!newdev) |
| goto set_device; |
| |
| if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || |
| (newdev->features & CLOCK_EVT_FEAT_C3STOP)) |
| return false; |
| |
| if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || |
| !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) |
| return false; |
| |
| if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) |
| return false; |
| |
| if (curdev && newdev->rating <= curdev->rating) |
| return false; |
| |
| if (!try_module_get(newdev->owner)) |
| return false; |
| |
| newdev->event_handler = tick_oneshot_wakeup_handler; |
| set_device: |
| clockevents_exchange_device(curdev, newdev); |
| per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; |
| return true; |
| } |
| #else |
| static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) |
| { |
| return NULL; |
| } |
| |
| static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, |
| int cpu) |
| { |
| return false; |
| } |
| #endif |
| |
| /* |
| * Conditionally install/replace broadcast device |
| */ |
| void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) |
| { |
| struct clock_event_device *cur = tick_broadcast_device.evtdev; |
| |
| if (tick_set_oneshot_wakeup_device(dev, cpu)) |
| return; |
| |
| if (!tick_check_broadcast_device(cur, dev)) |
| return; |
| |
| if (!try_module_get(dev->owner)) |
| return; |
| |
| clockevents_exchange_device(cur, dev); |
| if (cur) |
| cur->event_handler = clockevents_handle_noop; |
| tick_broadcast_device.evtdev = dev; |
| if (!cpumask_empty(tick_broadcast_mask)) |
| tick_broadcast_start_periodic(dev); |
| |
| if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) |
| return; |
| |
| /* |
| * If the system already runs in oneshot mode, switch the newly |
| * registered broadcast device to oneshot mode explicitly. |
| */ |
| if (tick_broadcast_oneshot_active()) { |
| tick_broadcast_switch_to_oneshot(); |
| return; |
| } |
| |
| /* |
| * Inform all cpus about this. We might be in a situation |
| * where we did not switch to oneshot mode because the per cpu |
| * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack |
| * of a oneshot capable broadcast device. Without that |
| * notification the systems stays stuck in periodic mode |
| * forever. |
| */ |
| tick_clock_notify(); |
| } |
| |
| /* |
| * Check, if the device is the broadcast device |
| */ |
| int tick_is_broadcast_device(struct clock_event_device *dev) |
| { |
| return (dev && tick_broadcast_device.evtdev == dev); |
| } |
| |
| int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) |
| { |
| int ret = -ENODEV; |
| |
| if (tick_is_broadcast_device(dev)) { |
| raw_spin_lock(&tick_broadcast_lock); |
| ret = __clockevents_update_freq(dev, freq); |
| raw_spin_unlock(&tick_broadcast_lock); |
| } |
| return ret; |
| } |
| |
| |
| static void err_broadcast(const struct cpumask *mask) |
| { |
| pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); |
| } |
| |
| static void tick_device_setup_broadcast_func(struct clock_event_device *dev) |
| { |
| if (!dev->broadcast) |
| dev->broadcast = tick_broadcast; |
| if (!dev->broadcast) { |
| pr_warn_once("%s depends on broadcast, but no broadcast function available\n", |
| dev->name); |
| dev->broadcast = err_broadcast; |
| } |
| } |
| |
| /* |
| * Check, if the device is dysfunctional and a placeholder, which |
| * needs to be handled by the broadcast device. |
| */ |
| int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) |
| { |
| struct clock_event_device *bc = tick_broadcast_device.evtdev; |
| unsigned long flags; |
| int ret = 0; |
| |
| raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| |
| /* |
| * Devices might be registered with both periodic and oneshot |
| * mode disabled. This signals, that the device needs to be |
| * operated from the broadcast device and is a placeholder for |
| * the cpu local device. |
| */ |
| if (!tick_device_is_functional(dev)) { |
| dev->event_handler = tick_handle_periodic; |
| tick_device_setup_broadcast_func(dev); |
| cpumask_set_cpu(cpu, tick_broadcast_mask); |
| if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
| tick_broadcast_start_periodic(bc); |
| else |
| tick_broadcast_setup_oneshot(bc, false); |
| ret = 1; |
| } else { |
| /* |
| * Clear the broadcast bit for this cpu if the |
| * device is not power state affected. |
| */ |
| if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
| cpumask_clear_cpu(cpu, tick_broadcast_mask); |
| else |
| tick_device_setup_broadcast_func(dev); |
| |
| /* |
| * Clear the broadcast bit if the CPU is not in |
| * periodic broadcast on state. |
| */ |
| if (!cpumask_test_cpu(cpu, tick_broadcast_on)) |
| cpumask_clear_cpu(cpu, tick_broadcast_mask); |
| |
| switch (tick_broadcast_device.mode) { |
| case TICKDEV_MODE_ONESHOT: |
| /* |
| * If the system is in oneshot mode we can |
| * unconditionally clear the oneshot mask bit, |
| * because the CPU is running and therefore |
| * not in an idle state which causes the power |
| * state affected device to stop. Let the |
| * caller initialize the device. |
| */ |
| tick_broadcast_clear_oneshot(cpu); |
| ret = 0; |
| break; |
| |
| case TICKDEV_MODE_PERIODIC: |
| /* |
| * If the system is in periodic mode, check |
| * whether the broadcast device can be |
| * switched off now. |
| */ |
| if (cpumask_empty(tick_broadcast_mask) && bc) |
| clockevents_shutdown(bc); |
| /* |
| * If we kept the cpu in the broadcast mask, |
| * tell the caller to leave the per cpu device |
| * in shutdown state. The periodic interrupt |
| * is delivered by the broadcast device, if |
| * the broadcast device exists and is not |
| * hrtimer based. |
| */ |
| if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) |
| ret = cpumask_test_cpu(cpu, tick_broadcast_mask); |
| break; |
| default: |
| break; |
| } |
| } |
| raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| return ret; |
| } |
| |
| int tick_receive_broadcast(void) |
| { |
| struct tick_device *td = this_cpu_ptr(&tick_cpu_device); |
| struct clock_event_device *evt = td->evtdev; |
| |
| if (!evt) |
| return -ENODEV; |
| |
| if (!evt->event_handler) |
| return -EINVAL; |
| |
| evt->event_handler(evt); |
| return 0; |
| } |
| |
| /* |
| * Broadcast the event to the cpus, which are set in the mask (mangled). |
| */ |
| static bool tick_do_broadcast(struct cpumask *mask) |
| { |
| int cpu = smp_processor_id(); |
| struct tick_device *td; |
| bool local = false; |
| |
| /* |
| * Check, if the current cpu is in the mask |
| */ |
| if (cpumask_test_cpu(cpu, mask)) { |
| struct clock_event_device *bc = tick_broadcast_device.evtdev; |
| |
| cpumask_clear_cpu(cpu, mask); |
| /* |
| * We only run the local handler, if the broadcast |
| * device is not hrtimer based. Otherwise we run into |
| * a hrtimer recursion. |
| * |
| * local timer_interrupt() |
| * local_handler() |
| * expire_hrtimers() |
| * bc_handler() |
| * local_handler() |
| * expire_hrtimers() |
| */ |
| local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); |
| } |
| |
| if (!cpumask_empty(mask)) { |
| /* |
| * It might be necessary to actually check whether the devices |
| * have different broadcast functions. For now, just use the |
| * one of the first device. This works as long as we have this |
| * misfeature only on x86 (lapic) |
| */ |
| td = &per_cpu(tick_cpu_device, cpumask_first(mask)); |
| td->evtdev->broadcast(mask); |
| } |
| return local; |
| } |
| |
| /* |
| * Periodic broadcast: |
| * - invoke the broadcast handlers |
| */ |
| static bool tick_do_periodic_broadcast(void) |
| { |
| cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); |
| return tick_do_broadcast(tmpmask); |
| } |
| |
| /* |
| * Event handler for periodic broadcast ticks |
| */ |
| static void tick_handle_periodic_broadcast(struct clock_event_device *dev) |
| { |
| struct tick_device *td = this_cpu_ptr(&tick_cpu_device); |
| bool bc_local; |
| |
| raw_spin_lock(&tick_broadcast_lock); |
| |
| /* Handle spurious interrupts gracefully */ |
| if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { |
| raw_spin_unlock(&tick_broadcast_lock); |
| return; |
| } |
| |
| bc_local = tick_do_periodic_broadcast(); |
| |
| if (clockevent_state_oneshot(dev)) { |
| ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); |
| |
| clockevents_program_event(dev, next, true); |
| } |
| raw_spin_unlock(&tick_broadcast_lock); |
| |
| /* |
| * We run the handler of the local cpu after dropping |
| * tick_broadcast_lock because the handler might deadlock when |
| * trying to switch to oneshot mode. |
| */ |
| if (bc_local) |
| td->evtdev->event_handler(td->evtdev); |
| } |
| |
| /** |
| * tick_broadcast_control - Enable/disable or force broadcast mode |
| * @mode: The selected broadcast mode |
| * |
| * Called when the system enters a state where affected tick devices |
| * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. |
| */ |
| void tick_broadcast_control(enum tick_broadcast_mode mode) |
| { |
| struct clock_event_device *bc, *dev; |
| struct tick_device *td; |
| int cpu, bc_stopped; |
| unsigned long flags; |
| |
| /* Protects also the local clockevent device. */ |
| raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| td = this_cpu_ptr(&tick_cpu_device); |
| dev = td->evtdev; |
| |
| /* |
| * Is the device not affected by the powerstate ? |
| */ |
| if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
| goto out; |
| |
| if (!tick_device_is_functional(dev)) |
| goto out; |
| |
| cpu = smp_processor_id(); |
| bc = tick_broadcast_device.evtdev; |
| bc_stopped = cpumask_empty(tick_broadcast_mask); |
| |
| switch (mode) { |
| case TICK_BROADCAST_FORCE: |
| tick_broadcast_forced = 1; |
| fallthrough; |
| case TICK_BROADCAST_ON: |
| cpumask_set_cpu(cpu, tick_broadcast_on); |
| if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { |
| /* |
| * Only shutdown the cpu local device, if: |
| * |
| * - the broadcast device exists |
| * - the broadcast device is not a hrtimer based one |
| * - the broadcast device is in periodic mode to |
| * avoid a hiccup during switch to oneshot mode |
| */ |
| if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && |
| tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
| clockevents_shutdown(dev); |
| } |
| break; |
| |
| case TICK_BROADCAST_OFF: |
| if (tick_broadcast_forced) |
| break; |
| cpumask_clear_cpu(cpu, tick_broadcast_on); |
| if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { |
| if (tick_broadcast_device.mode == |
| TICKDEV_MODE_PERIODIC) |
| tick_setup_periodic(dev, 0); |
| } |
| break; |
| } |
| |
| if (bc) { |
| if (cpumask_empty(tick_broadcast_mask)) { |
| if (!bc_stopped) |
| clockevents_shutdown(bc); |
| } else if (bc_stopped) { |
| if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) |
| tick_broadcast_start_periodic(bc); |
| else |
| tick_broadcast_setup_oneshot(bc, false); |
| } |
| } |
| out: |
| raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| } |
| EXPORT_SYMBOL_GPL(tick_broadcast_control); |
| |
| /* |
| * Set the periodic handler depending on broadcast on/off |
| */ |
| void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) |
| { |
| if (!broadcast) |
| dev->event_handler = tick_handle_periodic; |
| else |
| dev->event_handler = tick_handle_periodic_broadcast; |
| } |
| |
| #ifdef CONFIG_HOTPLUG_CPU |
| static void tick_shutdown_broadcast(void) |
| { |
| struct clock_event_device *bc = tick_broadcast_device.evtdev; |
| |
| if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { |
| if (bc && cpumask_empty(tick_broadcast_mask)) |
| clockevents_shutdown(bc); |
| } |
| } |
| |
| /* |
| * Remove a CPU from broadcasting |
| */ |
| void tick_broadcast_offline(unsigned int cpu) |
| { |
| raw_spin_lock(&tick_broadcast_lock); |
| cpumask_clear_cpu(cpu, tick_broadcast_mask); |
| cpumask_clear_cpu(cpu, tick_broadcast_on); |
| tick_broadcast_oneshot_offline(cpu); |
| tick_shutdown_broadcast(); |
| raw_spin_unlock(&tick_broadcast_lock); |
| } |
| |
| #endif |
| |
| void tick_suspend_broadcast(void) |
| { |
| struct clock_event_device *bc; |
| unsigned long flags; |
| |
| raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| |
| bc = tick_broadcast_device.evtdev; |
| if (bc) |
| clockevents_shutdown(bc); |
| |
| raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| } |
| |
| /* |
| * This is called from tick_resume_local() on a resuming CPU. That's |
| * called from the core resume function, tick_unfreeze() and the magic XEN |
| * resume hackery. |
| * |
| * In none of these cases the broadcast device mode can change and the |
| * bit of the resuming CPU in the broadcast mask is safe as well. |
| */ |
| bool tick_resume_check_broadcast(void) |
| { |
| if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) |
| return false; |
| else |
| return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); |
| } |
| |
| void tick_resume_broadcast(void) |
| { |
| struct clock_event_device *bc; |
| unsigned long flags; |
| |
| raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| |
| bc = tick_broadcast_device.evtdev; |
| |
| if (bc) { |
| clockevents_tick_resume(bc); |
| |
| switch (tick_broadcast_device.mode) { |
| case TICKDEV_MODE_PERIODIC: |
| if (!cpumask_empty(tick_broadcast_mask)) |
| tick_broadcast_start_periodic(bc); |
| break; |
| case TICKDEV_MODE_ONESHOT: |
| if (!cpumask_empty(tick_broadcast_mask)) |
| tick_resume_broadcast_oneshot(bc); |
| break; |
| } |
| } |
| raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| } |
| |
| #ifdef CONFIG_TICK_ONESHOT |
| |
| static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; |
| static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; |
| static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; |
| |
| /* |
| * Exposed for debugging: see timer_list.c |
| */ |
| struct cpumask *tick_get_broadcast_oneshot_mask(void) |
| { |
| return tick_broadcast_oneshot_mask; |
| } |
| |
| /* |
| * Called before going idle with interrupts disabled. Checks whether a |
| * broadcast event from the other core is about to happen. We detected |
| * that in tick_broadcast_oneshot_control(). The callsite can use this |
| * to avoid a deep idle transition as we are about to get the |
| * broadcast IPI right away. |
| */ |
| noinstr int tick_check_broadcast_expired(void) |
| { |
| #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H |
| return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask)); |
| #else |
| return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); |
| #endif |
| } |
| |
| /* |
| * Set broadcast interrupt affinity |
| */ |
| static void tick_broadcast_set_affinity(struct clock_event_device *bc, |
| const struct cpumask *cpumask) |
| { |
| if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) |
| return; |
| |
| if (cpumask_equal(bc->cpumask, cpumask)) |
| return; |
| |
| bc->cpumask = cpumask; |
| irq_set_affinity(bc->irq, bc->cpumask); |
| } |
| |
| static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, |
| ktime_t expires) |
| { |
| if (!clockevent_state_oneshot(bc)) |
| clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); |
| |
| clockevents_program_event(bc, expires, 1); |
| tick_broadcast_set_affinity(bc, cpumask_of(cpu)); |
| } |
| |
| static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) |
| { |
| clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); |
| } |
| |
| /* |
| * Called from irq_enter() when idle was interrupted to reenable the |
| * per cpu device. |
| */ |
| void tick_check_oneshot_broadcast_this_cpu(void) |
| { |
| if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { |
| struct tick_device *td = this_cpu_ptr(&tick_cpu_device); |
| |
| /* |
| * We might be in the middle of switching over from |
| * periodic to oneshot. If the CPU has not yet |
| * switched over, leave the device alone. |
| */ |
| if (td->mode == TICKDEV_MODE_ONESHOT) { |
| clockevents_switch_state(td->evtdev, |
| CLOCK_EVT_STATE_ONESHOT); |
| } |
| } |
| } |
| |
| /* |
| * Handle oneshot mode broadcasting |
| */ |
| static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) |
| { |
| struct tick_device *td; |
| ktime_t now, next_event; |
| int cpu, next_cpu = 0; |
| bool bc_local; |
| |
| raw_spin_lock(&tick_broadcast_lock); |
| dev->next_event = KTIME_MAX; |
| next_event = KTIME_MAX; |
| cpumask_clear(tmpmask); |
| now = ktime_get(); |
| /* Find all expired events */ |
| for_each_cpu(cpu, tick_broadcast_oneshot_mask) { |
| /* |
| * Required for !SMP because for_each_cpu() reports |
| * unconditionally CPU0 as set on UP kernels. |
| */ |
| if (!IS_ENABLED(CONFIG_SMP) && |
| cpumask_empty(tick_broadcast_oneshot_mask)) |
| break; |
| |
| td = &per_cpu(tick_cpu_device, cpu); |
| if (td->evtdev->next_event <= now) { |
| cpumask_set_cpu(cpu, tmpmask); |
| /* |
| * Mark the remote cpu in the pending mask, so |
| * it can avoid reprogramming the cpu local |
| * timer in tick_broadcast_oneshot_control(). |
| */ |
| cpumask_set_cpu(cpu, tick_broadcast_pending_mask); |
| } else if (td->evtdev->next_event < next_event) { |
| next_event = td->evtdev->next_event; |
| next_cpu = cpu; |
| } |
| } |
| |
| /* |
| * Remove the current cpu from the pending mask. The event is |
| * delivered immediately in tick_do_broadcast() ! |
| */ |
| cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); |
| |
| /* Take care of enforced broadcast requests */ |
| cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); |
| cpumask_clear(tick_broadcast_force_mask); |
| |
| /* |
| * Sanity check. Catch the case where we try to broadcast to |
| * offline cpus. |
| */ |
| if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) |
| cpumask_and(tmpmask, tmpmask, cpu_online_mask); |
| |
| /* |
| * Wakeup the cpus which have an expired event. |
| */ |
| bc_local = tick_do_broadcast(tmpmask); |
| |
| /* |
| * Two reasons for reprogram: |
| * |
| * - The global event did not expire any CPU local |
| * events. This happens in dyntick mode, as the maximum PIT |
| * delta is quite small. |
| * |
| * - There are pending events on sleeping CPUs which were not |
| * in the event mask |
| */ |
| if (next_event != KTIME_MAX) |
| tick_broadcast_set_event(dev, next_cpu, next_event); |
| |
| raw_spin_unlock(&tick_broadcast_lock); |
| |
| if (bc_local) { |
| td = this_cpu_ptr(&tick_cpu_device); |
| td->evtdev->event_handler(td->evtdev); |
| } |
| } |
| |
| static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) |
| { |
| if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) |
| return 0; |
| if (bc->next_event == KTIME_MAX) |
| return 0; |
| return bc->bound_on == cpu ? -EBUSY : 0; |
| } |
| |
| static void broadcast_shutdown_local(struct clock_event_device *bc, |
| struct clock_event_device *dev) |
| { |
| /* |
| * For hrtimer based broadcasting we cannot shutdown the cpu |
| * local device if our own event is the first one to expire or |
| * if we own the broadcast timer. |
| */ |
| if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { |
| if (broadcast_needs_cpu(bc, smp_processor_id())) |
| return; |
| if (dev->next_event < bc->next_event) |
| return; |
| } |
| clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); |
| } |
| |
| static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, |
| struct tick_device *td, |
| int cpu) |
| { |
| struct clock_event_device *bc, *dev = td->evtdev; |
| int ret = 0; |
| ktime_t now; |
| |
| raw_spin_lock(&tick_broadcast_lock); |
| bc = tick_broadcast_device.evtdev; |
| |
| if (state == TICK_BROADCAST_ENTER) { |
| /* |
| * If the current CPU owns the hrtimer broadcast |
| * mechanism, it cannot go deep idle and we do not add |
| * the CPU to the broadcast mask. We don't have to go |
| * through the EXIT path as the local timer is not |
| * shutdown. |
| */ |
| ret = broadcast_needs_cpu(bc, cpu); |
| if (ret) |
| goto out; |
| |
| /* |
| * If the broadcast device is in periodic mode, we |
| * return. |
| */ |
| if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { |
| /* If it is a hrtimer based broadcast, return busy */ |
| if (bc->features & CLOCK_EVT_FEAT_HRTIMER) |
| ret = -EBUSY; |
| goto out; |
| } |
| |
| if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { |
| WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); |
| |
| /* Conditionally shut down the local timer. */ |
| broadcast_shutdown_local(bc, dev); |
| |
| /* |
| * We only reprogram the broadcast timer if we |
| * did not mark ourself in the force mask and |
| * if the cpu local event is earlier than the |
| * broadcast event. If the current CPU is in |
| * the force mask, then we are going to be |
| * woken by the IPI right away; we return |
| * busy, so the CPU does not try to go deep |
| * idle. |
| */ |
| if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { |
| ret = -EBUSY; |
| } else if (dev->next_event < bc->next_event) { |
| tick_broadcast_set_event(bc, cpu, dev->next_event); |
| /* |
| * In case of hrtimer broadcasts the |
| * programming might have moved the |
| * timer to this cpu. If yes, remove |
| * us from the broadcast mask and |
| * return busy. |
| */ |
| ret = broadcast_needs_cpu(bc, cpu); |
| if (ret) { |
| cpumask_clear_cpu(cpu, |
| tick_broadcast_oneshot_mask); |
| } |
| } |
| } |
| } else { |
| if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { |
| clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); |
| /* |
| * The cpu which was handling the broadcast |
| * timer marked this cpu in the broadcast |
| * pending mask and fired the broadcast |
| * IPI. So we are going to handle the expired |
| * event anyway via the broadcast IPI |
| * handler. No need to reprogram the timer |
| * with an already expired event. |
| */ |
| if (cpumask_test_and_clear_cpu(cpu, |
| tick_broadcast_pending_mask)) |
| goto out; |
| |
| /* |
| * Bail out if there is no next event. |
| */ |
| if (dev->next_event == KTIME_MAX) |
| goto out; |
| /* |
| * If the pending bit is not set, then we are |
| * either the CPU handling the broadcast |
| * interrupt or we got woken by something else. |
| * |
| * We are no longer in the broadcast mask, so |
| * if the cpu local expiry time is already |
| * reached, we would reprogram the cpu local |
| * timer with an already expired event. |
| * |
| * This can lead to a ping-pong when we return |
| * to idle and therefore rearm the broadcast |
| * timer before the cpu local timer was able |
| * to fire. This happens because the forced |
| * reprogramming makes sure that the event |
| * will happen in the future and depending on |
| * the min_delta setting this might be far |
| * enough out that the ping-pong starts. |
| * |
| * If the cpu local next_event has expired |
| * then we know that the broadcast timer |
| * next_event has expired as well and |
| * broadcast is about to be handled. So we |
| * avoid reprogramming and enforce that the |
| * broadcast handler, which did not run yet, |
| * will invoke the cpu local handler. |
| * |
| * We cannot call the handler directly from |
| * here, because we might be in a NOHZ phase |
| * and we did not go through the irq_enter() |
| * nohz fixups. |
| */ |
| now = ktime_get(); |
| if (dev->next_event <= now) { |
| cpumask_set_cpu(cpu, tick_broadcast_force_mask); |
| goto out; |
| } |
| /* |
| * We got woken by something else. Reprogram |
| * the cpu local timer device. |
| */ |
| tick_program_event(dev->next_event, 1); |
| } |
| } |
| out: |
| raw_spin_unlock(&tick_broadcast_lock); |
| return ret; |
| } |
| |
| static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, |
| struct tick_device *td, |
| int cpu) |
| { |
| struct clock_event_device *dev, *wd; |
| |
| dev = td->evtdev; |
| if (td->mode != TICKDEV_MODE_ONESHOT) |
| return -EINVAL; |
| |
| wd = tick_get_oneshot_wakeup_device(cpu); |
| if (!wd) |
| return -ENODEV; |
| |
| switch (state) { |
| case TICK_BROADCAST_ENTER: |
| clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); |
| clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); |
| clockevents_program_event(wd, dev->next_event, 1); |
| break; |
| case TICK_BROADCAST_EXIT: |
| /* We may have transitioned to oneshot mode while idle */ |
| if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) |
| return -ENODEV; |
| } |
| |
| return 0; |
| } |
| |
| int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) |
| { |
| struct tick_device *td = this_cpu_ptr(&tick_cpu_device); |
| int cpu = smp_processor_id(); |
| |
| if (!tick_oneshot_wakeup_control(state, td, cpu)) |
| return 0; |
| |
| if (tick_broadcast_device.evtdev) |
| return ___tick_broadcast_oneshot_control(state, td, cpu); |
| |
| /* |
| * If there is no broadcast or wakeup device, tell the caller not |
| * to go into deep idle. |
| */ |
| return -EBUSY; |
| } |
| |
| /* |
| * Reset the one shot broadcast for a cpu |
| * |
| * Called with tick_broadcast_lock held |
| */ |
| static void tick_broadcast_clear_oneshot(int cpu) |
| { |
| cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); |
| cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); |
| } |
| |
| static void tick_broadcast_init_next_event(struct cpumask *mask, |
| ktime_t expires) |
| { |
| struct tick_device *td; |
| int cpu; |
| |
| for_each_cpu(cpu, mask) { |
| td = &per_cpu(tick_cpu_device, cpu); |
| if (td->evtdev) |
| td->evtdev->next_event = expires; |
| } |
| } |
| |
| static inline ktime_t tick_get_next_period(void) |
| { |
| ktime_t next; |
| |
| /* |
| * Protect against concurrent updates (store /load tearing on |
| * 32bit). It does not matter if the time is already in the |
| * past. The broadcast device which is about to be programmed will |
| * fire in any case. |
| */ |
| raw_spin_lock(&jiffies_lock); |
| next = tick_next_period; |
| raw_spin_unlock(&jiffies_lock); |
| return next; |
| } |
| |
| /** |
| * tick_broadcast_setup_oneshot - setup the broadcast device |
| */ |
| static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, |
| bool from_periodic) |
| { |
| int cpu = smp_processor_id(); |
| ktime_t nexttick = 0; |
| |
| if (!bc) |
| return; |
| |
| /* |
| * When the broadcast device was switched to oneshot by the first |
| * CPU handling the NOHZ change, the other CPUs will reach this |
| * code via hrtimer_run_queues() -> tick_check_oneshot_change() |
| * too. Set up the broadcast device only once! |
| */ |
| if (bc->event_handler == tick_handle_oneshot_broadcast) { |
| /* |
| * The CPU which switched from periodic to oneshot mode |
| * set the broadcast oneshot bit for all other CPUs which |
| * are in the general (periodic) broadcast mask to ensure |
| * that CPUs which wait for the periodic broadcast are |
| * woken up. |
| * |
| * Clear the bit for the local CPU as the set bit would |
| * prevent the first tick_broadcast_enter() after this CPU |
| * switched to oneshot state to program the broadcast |
| * device. |
| * |
| * This code can also be reached via tick_broadcast_control(), |
| * but this cannot avoid the tick_broadcast_clear_oneshot() |
| * as that would break the periodic to oneshot transition of |
| * secondary CPUs. But that's harmless as the below only |
| * clears already cleared bits. |
| */ |
| tick_broadcast_clear_oneshot(cpu); |
| return; |
| } |
| |
| |
| bc->event_handler = tick_handle_oneshot_broadcast; |
| bc->next_event = KTIME_MAX; |
| |
| /* |
| * When the tick mode is switched from periodic to oneshot it must |
| * be ensured that CPUs which are waiting for periodic broadcast |
| * get their wake-up at the next tick. This is achieved by ORing |
| * tick_broadcast_mask into tick_broadcast_oneshot_mask. |
| * |
| * For other callers, e.g. broadcast device replacement, |
| * tick_broadcast_oneshot_mask must not be touched as this would |
| * set bits for CPUs which are already NOHZ, but not idle. Their |
| * next tick_broadcast_enter() would observe the bit set and fail |
| * to update the expiry time and the broadcast event device. |
| */ |
| if (from_periodic) { |
| cpumask_copy(tmpmask, tick_broadcast_mask); |
| /* Remove the local CPU as it is obviously not idle */ |
| cpumask_clear_cpu(cpu, tmpmask); |
| cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask); |
| |
| /* |
| * Ensure that the oneshot broadcast handler will wake the |
| * CPUs which are still waiting for periodic broadcast. |
| */ |
| nexttick = tick_get_next_period(); |
| tick_broadcast_init_next_event(tmpmask, nexttick); |
| |
| /* |
| * If the underlying broadcast clock event device is |
| * already in oneshot state, then there is nothing to do. |
| * The device was already armed for the next tick |
| * in tick_handle_broadcast_periodic() |
| */ |
| if (clockevent_state_oneshot(bc)) |
| return; |
| } |
| |
| /* |
| * When switching from periodic to oneshot mode arm the broadcast |
| * device for the next tick. |
| * |
| * If the broadcast device has been replaced in oneshot mode and |
| * the oneshot broadcast mask is not empty, then arm it to expire |
| * immediately in order to reevaluate the next expiring timer. |
| * @nexttick is 0 and therefore in the past which will cause the |
| * clockevent code to force an event. |
| * |
| * For both cases the programming can be avoided when the oneshot |
| * broadcast mask is empty. |
| * |
| * tick_broadcast_set_event() implicitly switches the broadcast |
| * device to oneshot state. |
| */ |
| if (!cpumask_empty(tick_broadcast_oneshot_mask)) |
| tick_broadcast_set_event(bc, cpu, nexttick); |
| } |
| |
| /* |
| * Select oneshot operating mode for the broadcast device |
| */ |
| void tick_broadcast_switch_to_oneshot(void) |
| { |
| struct clock_event_device *bc; |
| enum tick_device_mode oldmode; |
| unsigned long flags; |
| |
| raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| |
| oldmode = tick_broadcast_device.mode; |
| tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; |
| bc = tick_broadcast_device.evtdev; |
| if (bc) |
| tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC); |
| |
| raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| } |
| |
| #ifdef CONFIG_HOTPLUG_CPU |
| void hotplug_cpu__broadcast_tick_pull(int deadcpu) |
| { |
| struct tick_device *td = this_cpu_ptr(&tick_cpu_device); |
| struct clock_event_device *bc; |
| unsigned long flags; |
| |
| raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
| bc = tick_broadcast_device.evtdev; |
| |
| if (bc && broadcast_needs_cpu(bc, deadcpu)) { |
| /* |
| * If the broadcast force bit of the current CPU is set, |
| * then the current CPU has not yet reprogrammed the local |
| * timer device to avoid a ping-pong race. See |
| * ___tick_broadcast_oneshot_control(). |
| * |
| * If the broadcast device is hrtimer based then |
| * programming the broadcast event below does not have any |
| * effect because the local clockevent device is not |
| * running and not programmed because the broadcast event |
| * is not earlier than the pending event of the local clock |
| * event device. As a consequence all CPUs waiting for a |
| * broadcast event are stuck forever. |
| * |
| * Detect this condition and reprogram the cpu local timer |
| * device to avoid the starvation. |
| */ |
| if (tick_check_broadcast_expired()) { |
| cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); |
| tick_program_event(td->evtdev->next_event, 1); |
| } |
| |
| /* This moves the broadcast assignment to this CPU: */ |
| clockevents_program_event(bc, bc->next_event, 1); |
| } |
| raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| } |
| |
| /* |
| * Remove a dying CPU from broadcasting |
| */ |
| static void tick_broadcast_oneshot_offline(unsigned int cpu) |
| { |
| if (tick_get_oneshot_wakeup_device(cpu)) |
| tick_set_oneshot_wakeup_device(NULL, cpu); |
| |
| /* |
| * Clear the broadcast masks for the dead cpu, but do not stop |
| * the broadcast device! |
| */ |
| cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); |
| cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); |
| cpumask_clear_cpu(cpu, tick_broadcast_force_mask); |
| } |
| #endif |
| |
| /* |
| * Check, whether the broadcast device is in one shot mode |
| */ |
| int tick_broadcast_oneshot_active(void) |
| { |
| return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; |
| } |
| |
| /* |
| * Check whether the broadcast device supports oneshot. |
| */ |
| bool tick_broadcast_oneshot_available(void) |
| { |
| struct clock_event_device *bc = tick_broadcast_device.evtdev; |
| |
| return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; |
| } |
| |
| #else |
| int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) |
| { |
| struct clock_event_device *bc = tick_broadcast_device.evtdev; |
| |
| if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) |
| return -EBUSY; |
| |
| return 0; |
| } |
| #endif |
| |
| void __init tick_broadcast_init(void) |
| { |
| zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); |
| zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); |
| zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); |
| #ifdef CONFIG_TICK_ONESHOT |
| zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); |
| zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); |
| zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); |
| #endif |
| } |