| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Common corrected MCE threshold handler code: |
| */ |
| #include <linux/interrupt.h> |
| #include <linux/kernel.h> |
| |
| #include <asm/irq_vectors.h> |
| #include <asm/traps.h> |
| #include <asm/apic.h> |
| #include <asm/mce.h> |
| #include <asm/trace/irq_vectors.h> |
| |
| #include "internal.h" |
| |
| static void default_threshold_interrupt(void) |
| { |
| pr_err("Unexpected threshold interrupt at vector %x\n", |
| THRESHOLD_APIC_VECTOR); |
| } |
| |
| void (*mce_threshold_vector)(void) = default_threshold_interrupt; |
| |
| DEFINE_IDTENTRY_SYSVEC(sysvec_threshold) |
| { |
| trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); |
| inc_irq_stat(irq_threshold_count); |
| mce_threshold_vector(); |
| trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); |
| apic_eoi(); |
| } |
| |
| DEFINE_PER_CPU(struct mca_storm_desc, storm_desc); |
| |
| void mce_inherit_storm(unsigned int bank) |
| { |
| struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); |
| |
| /* |
| * Previous CPU owning this bank had put it into storm mode, |
| * but the precise history of that storm is unknown. Assume |
| * the worst (all recent polls of the bank found a valid error |
| * logged). This will avoid the new owner prematurely declaring |
| * the storm has ended. |
| */ |
| storm->banks[bank].history = ~0ull; |
| storm->banks[bank].timestamp = jiffies; |
| } |
| |
| bool mce_get_storm_mode(void) |
| { |
| return __this_cpu_read(storm_desc.poll_mode); |
| } |
| |
| void mce_set_storm_mode(bool storm) |
| { |
| __this_cpu_write(storm_desc.poll_mode, storm); |
| } |
| |
| static void mce_handle_storm(unsigned int bank, bool on) |
| { |
| switch (boot_cpu_data.x86_vendor) { |
| case X86_VENDOR_INTEL: |
| mce_intel_handle_storm(bank, on); |
| break; |
| } |
| } |
| |
| void cmci_storm_begin(unsigned int bank) |
| { |
| struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); |
| |
| __set_bit(bank, this_cpu_ptr(mce_poll_banks)); |
| storm->banks[bank].in_storm_mode = true; |
| |
| /* |
| * If this is the first bank on this CPU to enter storm mode |
| * start polling. |
| */ |
| if (++storm->stormy_bank_count == 1) |
| mce_timer_kick(true); |
| } |
| |
| void cmci_storm_end(unsigned int bank) |
| { |
| struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); |
| |
| __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); |
| storm->banks[bank].history = 0; |
| storm->banks[bank].in_storm_mode = false; |
| |
| /* If no banks left in storm mode, stop polling. */ |
| if (!this_cpu_dec_return(storm_desc.stormy_bank_count)) |
| mce_timer_kick(false); |
| } |
| |
| void mce_track_storm(struct mce *mce) |
| { |
| struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc); |
| unsigned long now = jiffies, delta; |
| unsigned int shift = 1; |
| u64 history = 0; |
| |
| /* No tracking needed for banks that do not support CMCI */ |
| if (storm->banks[mce->bank].poll_only) |
| return; |
| |
| /* |
| * When a bank is in storm mode it is polled once per second and |
| * the history mask will record about the last minute of poll results. |
| * If it is not in storm mode, then the bank is only checked when |
| * there is a CMCI interrupt. Check how long it has been since |
| * this bank was last checked, and adjust the amount of "shift" |
| * to apply to history. |
| */ |
| if (!storm->banks[mce->bank].in_storm_mode) { |
| delta = now - storm->banks[mce->bank].timestamp; |
| shift = (delta + HZ) / HZ; |
| } |
| |
| /* If it has been a long time since the last poll, clear history. */ |
| if (shift < NUM_HISTORY_BITS) |
| history = storm->banks[mce->bank].history << shift; |
| |
| storm->banks[mce->bank].timestamp = now; |
| |
| /* History keeps track of corrected errors. VAL=1 && UC=0 */ |
| if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(mce)) |
| history |= 1; |
| |
| storm->banks[mce->bank].history = history; |
| |
| if (storm->banks[mce->bank].in_storm_mode) { |
| if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0)) |
| return; |
| printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank); |
| mce_handle_storm(mce->bank, false); |
| cmci_storm_end(mce->bank); |
| } else { |
| if (hweight64(history) < STORM_BEGIN_THRESHOLD) |
| return; |
| printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank); |
| mce_handle_storm(mce->bank, true); |
| cmci_storm_begin(mce->bank); |
| } |
| } |