| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * itmt.c: Support Intel Turbo Boost Max Technology 3.0 |
| * |
| * (C) Copyright 2016 Intel Corporation |
| * Author: Tim Chen <tim.c.chen@linux.intel.com> |
| * |
| * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), |
| * the maximum turbo frequencies of some cores in a CPU package may be |
| * higher than for the other cores in the same package. In that case, |
| * better performance can be achieved by making the scheduler prefer |
| * to run tasks on the CPUs with higher max turbo frequencies. |
| * |
| * This file provides functions and data structures for enabling the |
| * scheduler to favor scheduling on cores can be boosted to a higher |
| * frequency under ITMT. |
| */ |
| |
| #include <linux/sched.h> |
| #include <linux/cpumask.h> |
| #include <linux/cpuset.h> |
| #include <linux/mutex.h> |
| #include <linux/sysctl.h> |
| #include <linux/nodemask.h> |
| |
| static DEFINE_MUTEX(itmt_update_mutex); |
| DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); |
| |
| /* Boolean to track if system has ITMT capabilities */ |
| static bool __read_mostly sched_itmt_capable; |
| |
| /* |
| * Boolean to control whether we want to move processes to cpu capable |
| * of higher turbo frequency for cpus supporting Intel Turbo Boost Max |
| * Technology 3.0. |
| * |
| * It can be set via /proc/sys/kernel/sched_itmt_enabled |
| */ |
| unsigned int __read_mostly sysctl_sched_itmt_enabled; |
| |
| static int sched_itmt_update_handler(struct ctl_table *table, int write, |
| void *buffer, size_t *lenp, loff_t *ppos) |
| { |
| unsigned int old_sysctl; |
| int ret; |
| |
| mutex_lock(&itmt_update_mutex); |
| |
| if (!sched_itmt_capable) { |
| mutex_unlock(&itmt_update_mutex); |
| return -EINVAL; |
| } |
| |
| old_sysctl = sysctl_sched_itmt_enabled; |
| ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| |
| if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { |
| x86_topology_update = true; |
| rebuild_sched_domains(); |
| } |
| |
| mutex_unlock(&itmt_update_mutex); |
| |
| return ret; |
| } |
| |
| static struct ctl_table itmt_kern_table[] = { |
| { |
| .procname = "sched_itmt_enabled", |
| .data = &sysctl_sched_itmt_enabled, |
| .maxlen = sizeof(unsigned int), |
| .mode = 0644, |
| .proc_handler = sched_itmt_update_handler, |
| .extra1 = SYSCTL_ZERO, |
| .extra2 = SYSCTL_ONE, |
| }, |
| {} |
| }; |
| |
| static struct ctl_table itmt_root_table[] = { |
| { |
| .procname = "kernel", |
| .mode = 0555, |
| .child = itmt_kern_table, |
| }, |
| {} |
| }; |
| |
| static struct ctl_table_header *itmt_sysctl_header; |
| |
| /** |
| * sched_set_itmt_support() - Indicate platform supports ITMT |
| * |
| * This function is used by the OS to indicate to scheduler that the platform |
| * is capable of supporting the ITMT feature. |
| * |
| * The current scheme has the pstate driver detects if the system |
| * is ITMT capable and call sched_set_itmt_support. |
| * |
| * This must be done only after sched_set_itmt_core_prio |
| * has been called to set the cpus' priorities. |
| * It must not be called with cpu hot plug lock |
| * held as we need to acquire the lock to rebuild sched domains |
| * later. |
| * |
| * Return: 0 on success |
| */ |
| int sched_set_itmt_support(void) |
| { |
| mutex_lock(&itmt_update_mutex); |
| |
| if (sched_itmt_capable) { |
| mutex_unlock(&itmt_update_mutex); |
| return 0; |
| } |
| |
| itmt_sysctl_header = register_sysctl_table(itmt_root_table); |
| if (!itmt_sysctl_header) { |
| mutex_unlock(&itmt_update_mutex); |
| return -ENOMEM; |
| } |
| |
| sched_itmt_capable = true; |
| |
| sysctl_sched_itmt_enabled = 1; |
| |
| x86_topology_update = true; |
| rebuild_sched_domains(); |
| |
| mutex_unlock(&itmt_update_mutex); |
| |
| return 0; |
| } |
| |
| /** |
| * sched_clear_itmt_support() - Revoke platform's support of ITMT |
| * |
| * This function is used by the OS to indicate that it has |
| * revoked the platform's support of ITMT feature. |
| * |
| * It must not be called with cpu hot plug lock |
| * held as we need to acquire the lock to rebuild sched domains |
| * later. |
| */ |
| void sched_clear_itmt_support(void) |
| { |
| mutex_lock(&itmt_update_mutex); |
| |
| if (!sched_itmt_capable) { |
| mutex_unlock(&itmt_update_mutex); |
| return; |
| } |
| sched_itmt_capable = false; |
| |
| if (itmt_sysctl_header) { |
| unregister_sysctl_table(itmt_sysctl_header); |
| itmt_sysctl_header = NULL; |
| } |
| |
| if (sysctl_sched_itmt_enabled) { |
| /* disable sched_itmt if we are no longer ITMT capable */ |
| sysctl_sched_itmt_enabled = 0; |
| x86_topology_update = true; |
| rebuild_sched_domains(); |
| } |
| |
| mutex_unlock(&itmt_update_mutex); |
| } |
| |
| int arch_asym_cpu_priority(int cpu) |
| { |
| return per_cpu(sched_core_priority, cpu); |
| } |
| |
| /** |
| * sched_set_itmt_core_prio() - Set CPU priority based on ITMT |
| * @prio: Priority of cpu core |
| * @core_cpu: The cpu number associated with the core |
| * |
| * The pstate driver will find out the max boost frequency |
| * and call this function to set a priority proportional |
| * to the max boost frequency. CPU with higher boost |
| * frequency will receive higher priority. |
| * |
| * No need to rebuild sched domain after updating |
| * the CPU priorities. The sched domains have no |
| * dependency on CPU priorities. |
| */ |
| void sched_set_itmt_core_prio(int prio, int core_cpu) |
| { |
| int cpu, i = 1; |
| |
| for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { |
| int smt_prio; |
| |
| /* |
| * Ensure that the siblings are moved to the end |
| * of the priority chain and only used when |
| * all other high priority cpus are out of capacity. |
| */ |
| smt_prio = prio * smp_num_siblings / (i * i); |
| per_cpu(sched_core_priority, cpu) = smt_prio; |
| i++; |
| } |
| } |