Blame - block/blk-rq-qos.c - linux

blob: d8cc820a365e3a51331e359345daabcaf499ef79 [file] [log] [blame]

Christoph Hellwig	3dcf60bc	2019-04-30 14:42:43 -0400	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	3	#include "blk-rq-qos.h"
				4
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	5	/*
				6	* Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
				7	* false if 'v' + 1 would be bigger than 'below'.
				8	*/
Josef Bacik	22f1795	2018-07-19 21:42:13 -0400	[diff] [blame]	9	static bool atomic_inc_below(atomic_t *v, unsigned int below)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	10	{
Josef Bacik	22f1795	2018-07-19 21:42:13 -0400	[diff] [blame]	11	unsigned int cur = atomic_read(v);
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	12
Uros Bizjak	f4b1e27	2022-07-12 17:05:47 +0200	[diff] [blame]	13	do {
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	14	if (cur >= below)
				15	return false;
Uros Bizjak	f4b1e27	2022-07-12 17:05:47 +0200	[diff] [blame]	16	} while (!atomic_try_cmpxchg(v, &cur, cur + 1));
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	17
				18	return true;
				19	}
				20
Josef Bacik	22f1795	2018-07-19 21:42:13 -0400	[diff] [blame]	21	bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	22	{
				23	return atomic_inc_below(&rq_wait->inflight, limit);
				24	}
				25
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	26	void __rq_qos_cleanup(struct rq_qos rqos, struct bio bio)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	27	{
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	28	do {
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	29	if (rqos->ops->cleanup)
Josef Bacik	c1c8038	2018-07-03 11:14:59 -0400	[diff] [blame]	30	rqos->ops->cleanup(rqos, bio);
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	31	rqos = rqos->next;
				32	} while (rqos);
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	33	}
				34
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	35	void __rq_qos_done(struct rq_qos rqos, struct request rq)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	36	{
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	37	do {
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	38	if (rqos->ops->done)
				39	rqos->ops->done(rqos, rq);
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	40	rqos = rqos->next;
				41	} while (rqos);
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	42	}
				43
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	44	void __rq_qos_issue(struct rq_qos rqos, struct request rq)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	45	{
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	46	do {
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	47	if (rqos->ops->issue)
				48	rqos->ops->issue(rqos, rq);
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	49	rqos = rqos->next;
				50	} while (rqos);
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	51	}
				52
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	53	void __rq_qos_requeue(struct rq_qos rqos, struct request rq)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	54	{
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	55	do {
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	56	if (rqos->ops->requeue)
				57	rqos->ops->requeue(rqos, rq);
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	58	rqos = rqos->next;
				59	} while (rqos);
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	60	}
				61
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	62	void __rq_qos_throttle(struct rq_qos rqos, struct bio bio)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	63	{
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	64	do {
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	65	if (rqos->ops->throttle)
Christoph Hellwig	d533756	2018-11-14 17:02:09 +0100	[diff] [blame]	66	rqos->ops->throttle(rqos, bio);
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	67	rqos = rqos->next;
				68	} while (rqos);
Josef Bacik	c1c8038	2018-07-03 11:14:59 -0400	[diff] [blame]	69	}
				70
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	71	void __rq_qos_track(struct rq_qos rqos, struct request rq, struct bio *bio)
Josef Bacik	c1c8038	2018-07-03 11:14:59 -0400	[diff] [blame]	72	{
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	73	do {
Josef Bacik	c1c8038	2018-07-03 11:14:59 -0400	[diff] [blame]	74	if (rqos->ops->track)
				75	rqos->ops->track(rqos, rq, bio);
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	76	rqos = rqos->next;
				77	} while (rqos);
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	78	}
				79
Tejun Heo	d3e65ff	2019-08-28 15:05:54 -0700	[diff] [blame]	80	void __rq_qos_merge(struct rq_qos rqos, struct request rq, struct bio *bio)
				81	{
				82	do {
				83	if (rqos->ops->merge)
				84	rqos->ops->merge(rqos, rq, bio);
				85	rqos = rqos->next;
				86	} while (rqos);
				87	}
				88
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	89	void __rq_qos_done_bio(struct rq_qos rqos, struct bio bio)
Josef Bacik	67b42d0	2018-07-03 11:15:00 -0400	[diff] [blame]	90	{
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	91	do {
Josef Bacik	67b42d0	2018-07-03 11:15:00 -0400	[diff] [blame]	92	if (rqos->ops->done_bio)
				93	rqos->ops->done_bio(rqos, bio);
Jens Axboe	e504545	2018-11-15 12:25:10 -0700	[diff] [blame]	94	rqos = rqos->next;
				95	} while (rqos);
Josef Bacik	67b42d0	2018-07-03 11:15:00 -0400	[diff] [blame]	96	}
				97
Tejun Heo	9677a3e	2019-08-28 15:05:55 -0700	[diff] [blame]	98	void __rq_qos_queue_depth_changed(struct rq_qos *rqos)
				99	{
				100	do {
				101	if (rqos->ops->queue_depth_changed)
				102	rqos->ops->queue_depth_changed(rqos);
				103	rqos = rqos->next;
				104	} while (rqos);
				105	}
				106
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	107	/*
				108	* Return true, if we can't increase the depth further by scaling
				109	*/
				110	bool rq_depth_calc_max_depth(struct rq_depth *rqd)
				111	{
				112	unsigned int depth;
				113	bool ret = false;
				114
				115	/*
				116	* For QD=1 devices, this is a special case. It's important for those
				117	* to have one request ready when one completes, so force a depth of
				118	* 2 for those devices. On the backend, it'll be a depth of 1 anyway,
				119	* since the device can't have more than that in flight. If we're
				120	* scaling down, then keep a setting of 1/1/1.
				121	*/
				122	if (rqd->queue_depth == 1) {
				123	if (rqd->scale_step > 0)
				124	rqd->max_depth = 1;
				125	else {
				126	rqd->max_depth = 2;
				127	ret = true;
				128	}
				129	} else {
				130	/*
				131	* scale_step == 0 is our default state. If we have suffered
				132	* latency spikes, step will be > 0, and we shrink the
				133	* allowed write depths. If step is < 0, we're only doing
				134	* writes, and we allow a temporarily higher depth to
				135	* increase performance.
				136	*/
				137	depth = min_t(unsigned int, rqd->default_depth,
				138	rqd->queue_depth);
				139	if (rqd->scale_step > 0)
				140	depth = 1 + ((depth - 1) >> min(31, rqd->scale_step));
				141	else if (rqd->scale_step < 0) {
				142	unsigned int maxd = 3 * rqd->queue_depth / 4;
				143
				144	depth = 1 + ((depth - 1) << -rqd->scale_step);
				145	if (depth > maxd) {
				146	depth = maxd;
				147	ret = true;
				148	}
				149	}
				150
				151	rqd->max_depth = depth;
				152	}
				153
				154	return ret;
				155	}
				156
Harshad Shirwadkar	b84477d	2019-10-05 11:59:27 -0700	[diff] [blame]	157	/* Returns true on success and false if scaling up wasn't possible */
				158	bool rq_depth_scale_up(struct rq_depth *rqd)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	159	{
				160	/*
				161	* Hit max in previous round, stop here
				162	*/
				163	if (rqd->scaled_max)
Harshad Shirwadkar	b84477d	2019-10-05 11:59:27 -0700	[diff] [blame]	164	return false;
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	165
				166	rqd->scale_step--;
				167
				168	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
Harshad Shirwadkar	b84477d	2019-10-05 11:59:27 -0700	[diff] [blame]	169	return true;
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	170	}
				171
				172	/*
				173	* Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
Harshad Shirwadkar	b84477d	2019-10-05 11:59:27 -0700	[diff] [blame]	174	* had a latency violation. Returns true on success and returns false if
				175	* scaling down wasn't possible.
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	176	*/
Harshad Shirwadkar	b84477d	2019-10-05 11:59:27 -0700	[diff] [blame]	177	bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	178	{
				179	/*
				180	* Stop scaling down when we've hit the limit. This also prevents
				181	* ->scale_step from going to crazy values, if the device can't
				182	* keep up.
				183	*/
				184	if (rqd->max_depth == 1)
Harshad Shirwadkar	b84477d	2019-10-05 11:59:27 -0700	[diff] [blame]	185	return false;
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	186
				187	if (rqd->scale_step < 0 && hard_throttle)
				188	rqd->scale_step = 0;
				189	else
				190	rqd->scale_step++;
				191
				192	rqd->scaled_max = false;
				193	rq_depth_calc_max_depth(rqd);
Harshad Shirwadkar	b84477d	2019-10-05 11:59:27 -0700	[diff] [blame]	194	return true;
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	195	}
				196
Josef Bacik	84f6032	2018-12-04 12:59:02 -0500	[diff] [blame]	197	struct rq_qos_wait_data {
				198	struct wait_queue_entry wq;
				199	struct task_struct *task;
				200	struct rq_wait *rqw;
				201	acquire_inflight_cb_t *cb;
				202	void *private_data;
				203	bool got_token;
				204	};
				205
				206	static int rq_qos_wake_function(struct wait_queue_entry *curr,
				207	unsigned int mode, int wake_flags, void *key)
				208	{
				209	struct rq_qos_wait_data *data = container_of(curr,
				210	struct rq_qos_wait_data,
				211	wq);
				212
				213	/*
				214	* If we fail to get a budget, return -1 to interrupt the wake up loop
				215	* in __wake_up_common.
				216	*/
				217	if (!data->cb(data->rqw, data->private_data))
				218	return -1;
				219
				220	data->got_token = true;
Josef Bacik	ac38297	2019-07-16 16:19:29 -0400	[diff] [blame]	221	smp_wmb();
Josef Bacik	84f6032	2018-12-04 12:59:02 -0500	[diff] [blame]	222	list_del_init(&curr->entry);
				223	wake_up_process(data->task);
				224	return 1;
				225	}
				226
				227	/**
				228	* rq_qos_wait - throttle on a rqw if we need to
Bart Van Assche	83826a5	2019-05-30 17:00:50 -0700	[diff] [blame]	229	* @rqw: rqw to throttle on
				230	* @private_data: caller provided specific data
				231	* @acquire_inflight_cb: inc the rqw->inflight counter if we can
				232	* @cleanup_cb: the callback to cleanup in case we race with a waker
Josef Bacik	84f6032	2018-12-04 12:59:02 -0500	[diff] [blame]	233	*
				234	* This provides a uniform place for the rq_qos users to do their throttling.
				235	* Since you can end up with a lot of things sleeping at once, this manages the
				236	* waking up based on the resources available. The acquire_inflight_cb should
				237	* inc the rqw->inflight if we have the ability to do so, or return false if not
				238	* and then we will sleep until the room becomes available.
				239	*
				240	* cleanup_cb is in case that we race with a waker and need to cleanup the
				241	* inflight count accordingly.
				242	*/
				243	void rq_qos_wait(struct rq_wait rqw, void private_data,
				244	acquire_inflight_cb_t *acquire_inflight_cb,
				245	cleanup_cb_t *cleanup_cb)
				246	{
				247	struct rq_qos_wait_data data = {
				248	.wq = {
				249	.func = rq_qos_wake_function,
				250	.entry = LIST_HEAD_INIT(data.wq.entry),
				251	},
				252	.task = current,
				253	.rqw = rqw,
				254	.cb = acquire_inflight_cb,
				255	.private_data = private_data,
				256	};
				257	bool has_sleeper;
				258
				259	has_sleeper = wq_has_sleeper(&rqw->wait);
				260	if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
				261	return;
				262
Jan Kara	11c7aa0	2021-06-07 13:26:13 +0200	[diff] [blame]	263	has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
				264	TASK_UNINTERRUPTIBLE);
Josef Bacik	84f6032	2018-12-04 12:59:02 -0500	[diff] [blame]	265	do {
Josef Bacik	ac38297	2019-07-16 16:19:29 -0400	[diff] [blame]	266	/* The memory barrier in set_task_state saves us here. */
Josef Bacik	84f6032	2018-12-04 12:59:02 -0500	[diff] [blame]	267	if (data.got_token)
				268	break;
				269	if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
				270	finish_wait(&rqw->wait, &data.wq);
				271
				272	/*
				273	* We raced with wbt_wake_function() getting a token,
				274	* which means we now have two. Put our local token
				275	* and wake anyone else potentially waiting for one.
				276	*/
Josef Bacik	ac38297	2019-07-16 16:19:29 -0400	[diff] [blame]	277	smp_rmb();
Josef Bacik	84f6032	2018-12-04 12:59:02 -0500	[diff] [blame]	278	if (data.got_token)
				279	cleanup_cb(rqw, private_data);
				280	break;
				281	}
				282	io_schedule();
Josef Bacik	64e7ea8	2019-07-16 16:19:27 -0400	[diff] [blame]	283	has_sleeper = true;
Josef Bacik	d14a9b3	2019-07-16 16:19:28 -0400	[diff] [blame]	284	set_current_state(TASK_UNINTERRUPTIBLE);
Josef Bacik	84f6032	2018-12-04 12:59:02 -0500	[diff] [blame]	285	} while (1);
				286	finish_wait(&rqw->wait, &data.wq);
				287	}
				288
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	289	void rq_qos_exit(struct request_queue *q)
				290	{
Josef Bacik	a790504	2018-07-03 09:32:35 -0600	[diff] [blame]	291	while (q->rq_qos) {
				292	struct rq_qos *rqos = q->rq_qos;
				293	q->rq_qos = rqos->next;
				294	rqos->ops->exit(rqos);
				295	}
				296	}
Christoph Hellwig	b494f9c	2023-02-03 16:03:53 +0100	[diff] [blame]	297
Christoph Hellwig	ce57b55	2023-02-03 16:03:54 +0100	[diff] [blame]	298	int rq_qos_add(struct rq_qos rqos, struct gendisk disk, enum rq_qos_id id,
Christoph Hellwig	3963d84d	2023-02-03 16:03:55 +0100	[diff] [blame]	299	const struct rq_qos_ops *ops)
Christoph Hellwig	b494f9c	2023-02-03 16:03:53 +0100	[diff] [blame]	300	{
Christoph Hellwig	ce57b55	2023-02-03 16:03:54 +0100	[diff] [blame]	301	struct request_queue *q = disk->queue;
				302
Christoph Hellwig	ba91c84	2023-02-03 16:03:56 +0100	[diff] [blame]	303	rqos->disk = disk;
Christoph Hellwig	ce57b55	2023-02-03 16:03:54 +0100	[diff] [blame]	304	rqos->id = id;
				305	rqos->ops = ops;
				306
Christoph Hellwig	b494f9c	2023-02-03 16:03:53 +0100	[diff] [blame]	307	/*
				308	* No IO can be in-flight when adding rqos, so freeze queue, which
				309	* is fine since we only support rq_qos for blk-mq queue.
				310	*
				311	* Reuse ->queue_lock for protecting against other concurrent
				312	* rq_qos adding/deleting
				313	*/
				314	blk_mq_freeze_queue(q);
				315
				316	spin_lock_irq(&q->queue_lock);
				317	if (rq_qos_id(q, rqos->id))
				318	goto ebusy;
				319	rqos->next = q->rq_qos;
				320	q->rq_qos = rqos;
				321	spin_unlock_irq(&q->queue_lock);
				322
				323	blk_mq_unfreeze_queue(q);
				324
				325	if (rqos->ops->debugfs_attrs) {
				326	mutex_lock(&q->debugfs_mutex);
				327	blk_mq_debugfs_register_rqos(rqos);
				328	mutex_unlock(&q->debugfs_mutex);
				329	}
				330
				331	return 0;
				332	ebusy:
				333	spin_unlock_irq(&q->queue_lock);
				334	blk_mq_unfreeze_queue(q);
				335	return -EBUSY;
Christoph Hellwig	b494f9c	2023-02-03 16:03:53 +0100	[diff] [blame]	336	}
				337
Christoph Hellwig	ce57b55	2023-02-03 16:03:54 +0100	[diff] [blame]	338	void rq_qos_del(struct rq_qos *rqos)
Christoph Hellwig	b494f9c	2023-02-03 16:03:53 +0100	[diff] [blame]	339	{
Christoph Hellwig	ba91c84	2023-02-03 16:03:56 +0100	[diff] [blame]	340	struct request_queue *q = rqos->disk->queue;
Christoph Hellwig	b494f9c	2023-02-03 16:03:53 +0100	[diff] [blame]	341	struct rq_qos **cur;
				342
				343	/*
				344	* See comment in rq_qos_add() about freezing queue & using
				345	* ->queue_lock.
				346	*/
				347	blk_mq_freeze_queue(q);
				348
				349	spin_lock_irq(&q->queue_lock);
				350	for (cur = &q->rq_qos; cur; cur = &(cur)->next) {
				351	if (*cur == rqos) {
				352	*cur = rqos->next;
				353	break;
				354	}
				355	}
				356	spin_unlock_irq(&q->queue_lock);
				357
				358	blk_mq_unfreeze_queue(q);
				359
				360	mutex_lock(&q->debugfs_mutex);
				361	blk_mq_debugfs_unregister_rqos(rqos);
				362	mutex_unlock(&q->debugfs_mutex);
				363	}