kernel/sched/core_sched.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0-only

 /*
  * A simple wrapper around refcount. An allocated sched_core_cookie's
  * address is used to compute the cookie of the task.
  */
 struct sched_core_cookie {
 	refcount_t refcnt;
 };

 static unsigned long sched_core_alloc_cookie(void)
 {
 	struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL);
 	if (!ck)
 		return 0;

 	refcount_set(&ck->refcnt, 1);
 	sched_core_get();

 	return (unsigned long)ck;
 }

 static void sched_core_put_cookie(unsigned long cookie)
 {
 	struct sched_core_cookie *ptr = (void *)cookie;

 	if (ptr && refcount_dec_and_test(&ptr->refcnt)) {
 		kfree(ptr);
 		sched_core_put();
 	}
 }

 static unsigned long sched_core_get_cookie(unsigned long cookie)
 {
 	struct sched_core_cookie *ptr = (void *)cookie;

 	if (ptr)
 		refcount_inc(&ptr->refcnt);

 	return cookie;
 }

 /*
  * sched_core_update_cookie - replace the cookie on a task
  * @p: the task to update
  * @cookie: the new cookie
  *
  * Effectively exchange the task cookie; caller is responsible for lifetimes on
  * both ends.
  *
  * Returns: the old cookie
  */
 static unsigned long sched_core_update_cookie(struct task_struct *p,
 					      unsigned long cookie)
 {
 	unsigned long old_cookie;
 	struct rq_flags rf;
 	struct rq *rq;

 	rq = task_rq_lock(p, &rf);

 	/*
 	 * Since creating a cookie implies sched_core_get(), and we cannot set
 	 * a cookie until after we've created it, similarly, we cannot destroy
 	 * a cookie until after we've removed it, we must have core scheduling
 	 * enabled here.
 	 */
 	SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));

 	if (sched_core_enqueued(p))
 		sched_core_dequeue(rq, p, DEQUEUE_SAVE);

 	old_cookie = p->core_cookie;
 	p->core_cookie = cookie;

 	/*
 	 * Consider the cases: !prev_cookie and !cookie.
 	 */
 	if (cookie && task_on_rq_queued(p))
 		sched_core_enqueue(rq, p);

 	/*
 	 * If task is currently running, it may not be compatible anymore after
 	 * the cookie change, so enter the scheduler on its CPU to schedule it
 	 * away.
 	 *
 	 * Note that it is possible that as a result of this cookie change, the
 	 * core has now entered/left forced idle state. Defer accounting to the
 	 * next scheduling edge, rather than always forcing a reschedule here.
 	 */
 	if (task_on_cpu(rq, p))
 		resched_curr(rq);

 	task_rq_unlock(rq, p, &rf);

 	return old_cookie;
 }

 static unsigned long sched_core_clone_cookie(struct task_struct *p)
 {
 	unsigned long cookie, flags;

 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	cookie = sched_core_get_cookie(p->core_cookie);
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);

 	return cookie;
 }

 void sched_core_fork(struct task_struct *p)
 {
 	RB_CLEAR_NODE(&p->core_node);
 	p->core_cookie = sched_core_clone_cookie(current);
 }

 void sched_core_free(struct task_struct *p)
 {
 	sched_core_put_cookie(p->core_cookie);
 }

 static void __sched_core_set(struct task_struct *p, unsigned long cookie)
 {
 	cookie = sched_core_get_cookie(cookie);
 	cookie = sched_core_update_cookie(p, cookie);
 	sched_core_put_cookie(cookie);
 }

 /* Called from prctl interface: PR_SCHED_CORE */
 int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
 			 unsigned long uaddr)
 {
 	unsigned long cookie = 0, id = 0;
 	struct task_struct *task, *p;
 	struct pid *grp;
 	int err = 0;

 	if (!static_branch_likely(&sched_smt_present))
 		return -ENODEV;

 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID);
 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID);
 	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID);

 	if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
 	    (cmd != PR_SCHED_CORE_GET && uaddr))
 		return -EINVAL;

 	rcu_read_lock();
 	if (pid == 0) {
 		task = current;
 	} else {
 		task = find_task_by_vpid(pid);
 		if (!task) {
 			rcu_read_unlock();
 			return -ESRCH;
 		}
 	}
 	get_task_struct(task);
 	rcu_read_unlock();

 	/*
 	 * Check if this process has the right to modify the specified
 	 * process. Use the regular "ptrace_may_access()" checks.
 	 */
 	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
 		err = -EPERM;
 		goto out;
 	}

 	switch (cmd) {
 	case PR_SCHED_CORE_GET:
 		if (type != PIDTYPE_PID || uaddr & 7) {
 			err = -EINVAL;
 			goto out;
 		}
 		cookie = sched_core_clone_cookie(task);
 		if (cookie) {
 			/* XXX improve ? */
 			ptr_to_hashval((void *)cookie, &id);
 		}
 		err = put_user(id, (u64 __user *)uaddr);
 		goto out;

 	case PR_SCHED_CORE_CREATE:
 		cookie = sched_core_alloc_cookie();
 		if (!cookie) {
 			err = -ENOMEM;
 			goto out;
 		}
 		break;

 	case PR_SCHED_CORE_SHARE_TO:
 		cookie = sched_core_clone_cookie(current);
 		break;

 	case PR_SCHED_CORE_SHARE_FROM:
 		if (type != PIDTYPE_PID) {
 			err = -EINVAL;
 			goto out;
 		}
 		cookie = sched_core_clone_cookie(task);
 		__sched_core_set(current, cookie);
 		goto out;

 	default:
 		err = -EINVAL;
 		goto out;
 	}

 	if (type == PIDTYPE_PID) {
 		__sched_core_set(task, cookie);
 		goto out;
 	}

 	read_lock(&tasklist_lock);
 	grp = task_pid_type(task, type);

 	do_each_pid_thread(grp, type, p) {
 		if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
 			err = -EPERM;
 			goto out_tasklist;
 		}
 	} while_each_pid_thread(grp, type, p);

 	do_each_pid_thread(grp, type, p) {
 		__sched_core_set(p, cookie);
 	} while_each_pid_thread(grp, type, p);
 out_tasklist:
 	read_unlock(&tasklist_lock);

 out:
 	sched_core_put_cookie(cookie);
 	put_task_struct(task);
 	return err;
 }

 #ifdef CONFIG_SCHEDSTATS

 /* REQUIRES: rq->core's clock recently updated. */
 void __sched_core_account_forceidle(struct rq *rq)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
 	u64 delta, now = rq_clock(rq->core);
 	struct rq *rq_i;
 	struct task_struct *p;
 	int i;

 	lockdep_assert_rq_held(rq);

 	WARN_ON_ONCE(!rq->core->core_forceidle_count);

 	if (rq->core->core_forceidle_start == 0)
 		return;

 	delta = now - rq->core->core_forceidle_start;
 	if (unlikely((s64)delta <= 0))
 		return;

 	rq->core->core_forceidle_start = now;

 	if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
 		/* can't be forced idle without a running task */
 	} else if (rq->core->core_forceidle_count > 1 ||
 		   rq->core->core_forceidle_occupation > 1) {
 		/*
 		 * For larger SMT configurations, we need to scale the charged
 		 * forced idle amount since there can be more than one forced
 		 * idle sibling and more than one running cookied task.
 		 */
 		delta *= rq->core->core_forceidle_count;
 		delta = div_u64(delta, rq->core->core_forceidle_occupation);
 	}

 	for_each_cpu(i, smt_mask) {
 		rq_i = cpu_rq(i);
 		p = rq_i->core_pick ?: rq_i->curr;

 		if (p == rq_i->idle)
 			continue;

 		/*
 		 * Note: this will account forceidle to the current CPU, even
 		 * if it comes from our SMT sibling.
 		 */
 		__account_forceidle_time(p, delta);
 	}
 }

 void __sched_core_tick(struct rq *rq)
 {
 	if (!rq->core->core_forceidle_count)
 		return;

 	if (rq != rq->core)
 		update_rq_clock(rq->core);

 	__sched_core_account_forceidle(rq);
 }

 #endif /* CONFIG_SCHEDSTATS */
	// SPDX-License-Identifier: GPL-2.0-only

	/*
	* A simple wrapper around refcount. An allocated sched_core_cookie's
	* address is used to compute the cookie of the task.
	*/
	struct sched_core_cookie {
	refcount_t refcnt;
	};

	static unsigned long sched_core_alloc_cookie(void)
	{
	struct sched_core_cookie ck = kmalloc(sizeof(ck), GFP_KERNEL);
	if (!ck)
	return 0;

	refcount_set(&ck->refcnt, 1);
	sched_core_get();

	return (unsigned long)ck;
	}

	static void sched_core_put_cookie(unsigned long cookie)
	{
	struct sched_core_cookie ptr = (void )cookie;

	if (ptr && refcount_dec_and_test(&ptr->refcnt)) {
	kfree(ptr);
	sched_core_put();
	}
	}

	static unsigned long sched_core_get_cookie(unsigned long cookie)
	{
	struct sched_core_cookie ptr = (void )cookie;

	if (ptr)
	refcount_inc(&ptr->refcnt);

	return cookie;
	}

	/*
	* sched_core_update_cookie - replace the cookie on a task
	* @p: the task to update
	* @cookie: the new cookie
	*
	* Effectively exchange the task cookie; caller is responsible for lifetimes on
	* both ends.
	*
	* Returns: the old cookie
	*/
	static unsigned long sched_core_update_cookie(struct task_struct *p,
	unsigned long cookie)
	{
	unsigned long old_cookie;
	struct rq_flags rf;
	struct rq *rq;

	rq = task_rq_lock(p, &rf);

	/*
	* Since creating a cookie implies sched_core_get(), and we cannot set
	* a cookie until after we've created it, similarly, we cannot destroy
	* a cookie until after we've removed it, we must have core scheduling
	* enabled here.
	*/
	SCHED_WARN_ON((p->core_cookie \|\| cookie) && !sched_core_enabled(rq));

	if (sched_core_enqueued(p))
	sched_core_dequeue(rq, p, DEQUEUE_SAVE);

	old_cookie = p->core_cookie;
	p->core_cookie = cookie;

	/*
	* Consider the cases: !prev_cookie and !cookie.
	*/
	if (cookie && task_on_rq_queued(p))
	sched_core_enqueue(rq, p);

	/*
	* If task is currently running, it may not be compatible anymore after
	* the cookie change, so enter the scheduler on its CPU to schedule it
	* away.
	*
	* Note that it is possible that as a result of this cookie change, the
	* core has now entered/left forced idle state. Defer accounting to the
	* next scheduling edge, rather than always forcing a reschedule here.
	*/
	if (task_on_cpu(rq, p))
	resched_curr(rq);

	task_rq_unlock(rq, p, &rf);

	return old_cookie;
	}

	static unsigned long sched_core_clone_cookie(struct task_struct *p)
	{
	unsigned long cookie, flags;

	raw_spin_lock_irqsave(&p->pi_lock, flags);
	cookie = sched_core_get_cookie(p->core_cookie);
	raw_spin_unlock_irqrestore(&p->pi_lock, flags);

	return cookie;
	}

	void sched_core_fork(struct task_struct *p)
	{
	RB_CLEAR_NODE(&p->core_node);
	p->core_cookie = sched_core_clone_cookie(current);
	}

	void sched_core_free(struct task_struct *p)
	{
	sched_core_put_cookie(p->core_cookie);
	}

	static void __sched_core_set(struct task_struct *p, unsigned long cookie)
	{
	cookie = sched_core_get_cookie(cookie);
	cookie = sched_core_update_cookie(p, cookie);
	sched_core_put_cookie(cookie);
	}

	/* Called from prctl interface: PR_SCHED_CORE */
	int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
	unsigned long uaddr)
	{
	unsigned long cookie = 0, id = 0;
	struct task_struct task, p;
	struct pid *grp;
	int err = 0;

	if (!static_branch_likely(&sched_smt_present))
	return -ENODEV;

	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID);
	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID);
	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID);

	if (type > PIDTYPE_PGID \|\| cmd >= PR_SCHED_CORE_MAX \|\| pid < 0 \|\|
	(cmd != PR_SCHED_CORE_GET && uaddr))
	return -EINVAL;

	rcu_read_lock();
	if (pid == 0) {
	task = current;
	} else {
	task = find_task_by_vpid(pid);
	if (!task) {
	rcu_read_unlock();
	return -ESRCH;
	}
	}
	get_task_struct(task);
	rcu_read_unlock();

	/*
	* Check if this process has the right to modify the specified
	* process. Use the regular "ptrace_may_access()" checks.
	*/
	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
	err = -EPERM;
	goto out;
	}

	switch (cmd) {
	case PR_SCHED_CORE_GET:
	if (type != PIDTYPE_PID \|\| uaddr & 7) {
	err = -EINVAL;
	goto out;
	}
	cookie = sched_core_clone_cookie(task);
	if (cookie) {
	/* XXX improve ? */
	ptr_to_hashval((void *)cookie, &id);
	}
	err = put_user(id, (u64 __user *)uaddr);
	goto out;

	case PR_SCHED_CORE_CREATE:
	cookie = sched_core_alloc_cookie();
	if (!cookie) {
	err = -ENOMEM;
	goto out;
	}
	break;

	case PR_SCHED_CORE_SHARE_TO:
	cookie = sched_core_clone_cookie(current);
	break;

	case PR_SCHED_CORE_SHARE_FROM:
	if (type != PIDTYPE_PID) {
	err = -EINVAL;
	goto out;
	}
	cookie = sched_core_clone_cookie(task);
	__sched_core_set(current, cookie);
	goto out;

	default:
	err = -EINVAL;
	goto out;
	}

	if (type == PIDTYPE_PID) {
	__sched_core_set(task, cookie);
	goto out;
	}

	read_lock(&tasklist_lock);
	grp = task_pid_type(task, type);

	do_each_pid_thread(grp, type, p) {
	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
	err = -EPERM;
	goto out_tasklist;
	}
	} while_each_pid_thread(grp, type, p);

	do_each_pid_thread(grp, type, p) {
	__sched_core_set(p, cookie);
	} while_each_pid_thread(grp, type, p);
	out_tasklist:
	read_unlock(&tasklist_lock);

	out:
	sched_core_put_cookie(cookie);
	put_task_struct(task);
	return err;
	}

	#ifdef CONFIG_SCHEDSTATS

	/* REQUIRES: rq->core's clock recently updated. */
	void __sched_core_account_forceidle(struct rq *rq)
	{
	const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
	u64 delta, now = rq_clock(rq->core);
	struct rq *rq_i;
	struct task_struct *p;
	int i;

	lockdep_assert_rq_held(rq);

	WARN_ON_ONCE(!rq->core->core_forceidle_count);

	if (rq->core->core_forceidle_start == 0)
	return;

	delta = now - rq->core->core_forceidle_start;
	if (unlikely((s64)delta <= 0))
	return;

	rq->core->core_forceidle_start = now;

	if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
	/* can't be forced idle without a running task */
	} else if (rq->core->core_forceidle_count > 1 \|\|
	rq->core->core_forceidle_occupation > 1) {
	/*
	* For larger SMT configurations, we need to scale the charged
	* forced idle amount since there can be more than one forced
	* idle sibling and more than one running cookied task.
	*/
	delta *= rq->core->core_forceidle_count;
	delta = div_u64(delta, rq->core->core_forceidle_occupation);
	}

	for_each_cpu(i, smt_mask) {
	rq_i = cpu_rq(i);
	p = rq_i->core_pick ?: rq_i->curr;

	if (p == rq_i->idle)
	continue;

	/*
	* Note: this will account forceidle to the current CPU, even
	* if it comes from our SMT sibling.
	*/
	__account_forceidle_time(p, delta);
	}
	}

	void __sched_core_tick(struct rq *rq)
	{
	if (!rq->core->core_forceidle_count)
	return;

	if (rq != rq->core)
	update_rq_clock(rq->core);

	__sched_core_account_forceidle(rq);
	}

	#endif /* CONFIG_SCHEDSTATS */