block/blk-throttle.h - linux - Git at Google

 #ifndef BLK_THROTTLE_H
 #define BLK_THROTTLE_H

 #include "blk-cgroup-rwstat.h"

 /*
  * To implement hierarchical throttling, throtl_grps form a tree and bios
  * are dispatched upwards level by level until they reach the top and get
  * issued.  When dispatching bios from the children and local group at each
  * level, if the bios are dispatched into a single bio_list, there's a risk
  * of a local or child group which can queue many bios at once filling up
  * the list starving others.
  *
  * To avoid such starvation, dispatched bios are queued separately
  * according to where they came from.  When they are again dispatched to
  * the parent, they're popped in round-robin order so that no single source
  * hogs the dispatch window.
  *
  * throtl_qnode is used to keep the queued bios separated by their sources.
  * Bios are queued to throtl_qnode which in turn is queued to
  * throtl_service_queue and then dispatched in round-robin order.
  *
  * It's also used to track the reference counts on blkg's.  A qnode always
  * belongs to a throtl_grp and gets queued on itself or the parent, so
  * incrementing the reference of the associated throtl_grp when a qnode is
  * queued and decrementing when dequeued is enough to keep the whole blkg
  * tree pinned while bios are in flight.
  */
 struct throtl_qnode {
 	struct list_head	node;		/* service_queue->queued[] */
 	struct bio_list		bios;		/* queued bios */
 	struct throtl_grp	*tg;		/* tg this qnode belongs to */
 };

 struct throtl_service_queue {
 	struct throtl_service_queue *parent_sq;	/* the parent service_queue */

 	/*
 	 * Bios queued directly to this service_queue or dispatched from
 	 * children throtl_grp's.
 	 */
 	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
 	unsigned int		nr_queued[2];	/* number of queued bios */

 	/*
 	 * RB tree of active children throtl_grp's, which are sorted by
 	 * their ->disptime.
 	 */
 	struct rb_root_cached	pending_tree;	/* RB tree of active tgs */
 	unsigned int		nr_pending;	/* # queued in the tree */
 	unsigned long		first_pending_disptime;	/* disptime of the first tg */
 	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
 };

 enum {
 	LIMIT_LOW,
 	LIMIT_MAX,
 	LIMIT_CNT,
 };

 struct throtl_grp {
 	/* must be the first member */
 	struct blkg_policy_data pd;

 	/* active throtl group service_queue member */
 	struct rb_node rb_node;

 	/* throtl_data this group belongs to */
 	struct throtl_data *td;

 	/* this group's service queue */
 	struct throtl_service_queue service_queue;

 	/*
 	 * qnode_on_self is used when bios are directly queued to this
 	 * throtl_grp so that local bios compete fairly with bios
 	 * dispatched from children.  qnode_on_parent is used when bios are
 	 * dispatched from this throtl_grp into its parent and will compete
 	 * with the sibling qnode_on_parents and the parent's
 	 * qnode_on_self.
 	 */
 	struct throtl_qnode qnode_on_self[2];
 	struct throtl_qnode qnode_on_parent[2];

 	/*
 	 * Dispatch time in jiffies. This is the estimated time when group
 	 * will unthrottle and is ready to dispatch more bio. It is used as
 	 * key to sort active groups in service tree.
 	 */
 	unsigned long disptime;

 	unsigned int flags;

 	/* are there any throtl rules between this group and td? */
 	bool has_rules[2];

 	/* internally used bytes per second rate limits */
 	uint64_t bps[2][LIMIT_CNT];
 	/* user configured bps limits */
 	uint64_t bps_conf[2][LIMIT_CNT];

 	/* internally used IOPS limits */
 	unsigned int iops[2][LIMIT_CNT];
 	/* user configured IOPS limits */
 	unsigned int iops_conf[2][LIMIT_CNT];

 	/* Number of bytes dispatched in current slice */
 	uint64_t bytes_disp[2];
 	/* Number of bio's dispatched in current slice */
 	unsigned int io_disp[2];

 	unsigned long last_low_overflow_time[2];

 	uint64_t last_bytes_disp[2];
 	unsigned int last_io_disp[2];

 	unsigned long last_check_time;

 	unsigned long latency_target; /* us */
 	unsigned long latency_target_conf; /* us */
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];

 	unsigned long last_finish_time; /* ns / 1024 */
 	unsigned long checked_last_finish_time; /* ns / 1024 */
 	unsigned long avg_idletime; /* ns / 1024 */
 	unsigned long idletime_threshold; /* us */
 	unsigned long idletime_threshold_conf; /* us */

 	unsigned int bio_cnt; /* total bios */
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
 	unsigned long bio_cnt_reset_time;

 	atomic_t io_split_cnt[2];
 	atomic_t last_io_split_cnt[2];

 	struct blkg_rwstat stat_bytes;
 	struct blkg_rwstat stat_ios;
 };

 extern struct blkcg_policy blkcg_policy_throtl;

 static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
 {
 	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
 }

 static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
 {
 	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
 }

 /*
  * Internal throttling interface
  */
 #ifndef CONFIG_BLK_DEV_THROTTLING
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline void blk_throtl_exit(struct request_queue *q) { }
 static inline void blk_throtl_register_queue(struct request_queue *q) { }
 static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
 static inline bool blk_throtl_bio(struct bio *bio) { return false; }
 #else /* CONFIG_BLK_DEV_THROTTLING */
 int blk_throtl_init(struct request_queue *q);
 void blk_throtl_exit(struct request_queue *q);
 void blk_throtl_register_queue(struct request_queue *q);
 void blk_throtl_charge_bio_split(struct bio *bio);
 bool __blk_throtl_bio(struct bio *bio);
 static inline bool blk_throtl_bio(struct bio *bio)
 {
 	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);

 	if (bio_flagged(bio, BIO_THROTTLED))
 		return false;
 	if (!tg->has_rules[bio_data_dir(bio)])
 		return false;

 	return __blk_throtl_bio(bio);
 }
 #endif /* CONFIG_BLK_DEV_THROTTLING */

 #endif
	#ifndef BLK_THROTTLE_H
	#define BLK_THROTTLE_H

	#include "blk-cgroup-rwstat.h"

	/*
	* To implement hierarchical throttling, throtl_grps form a tree and bios
	* are dispatched upwards level by level until they reach the top and get
	* issued. When dispatching bios from the children and local group at each
	* level, if the bios are dispatched into a single bio_list, there's a risk
	* of a local or child group which can queue many bios at once filling up
	* the list starving others.
	*
	* To avoid such starvation, dispatched bios are queued separately
	* according to where they came from. When they are again dispatched to
	* the parent, they're popped in round-robin order so that no single source
	* hogs the dispatch window.
	*
	* throtl_qnode is used to keep the queued bios separated by their sources.
	* Bios are queued to throtl_qnode which in turn is queued to
	* throtl_service_queue and then dispatched in round-robin order.
	*
	* It's also used to track the reference counts on blkg's. A qnode always
	* belongs to a throtl_grp and gets queued on itself or the parent, so
	* incrementing the reference of the associated throtl_grp when a qnode is
	* queued and decrementing when dequeued is enough to keep the whole blkg
	* tree pinned while bios are in flight.
	*/
	struct throtl_qnode {
	struct list_head node; /* service_queue->queued[] */
	struct bio_list bios; /* queued bios */
	struct throtl_grp tg; / tg this qnode belongs to */
	};

	struct throtl_service_queue {
	struct throtl_service_queue parent_sq; / the parent service_queue */

	/*
	* Bios queued directly to this service_queue or dispatched from
	* children throtl_grp's.
	*/
	struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */
	unsigned int nr_queued[2]; /* number of queued bios */

	/*
	* RB tree of active children throtl_grp's, which are sorted by
	* their ->disptime.
	*/
	struct rb_root_cached pending_tree; /* RB tree of active tgs */
	unsigned int nr_pending; /* # queued in the tree */
	unsigned long first_pending_disptime; /* disptime of the first tg */
	struct timer_list pending_timer; /* fires on first_pending_disptime */
	};

	enum {
	LIMIT_LOW,
	LIMIT_MAX,
	LIMIT_CNT,
	};

	struct throtl_grp {
	/* must be the first member */
	struct blkg_policy_data pd;

	/* active throtl group service_queue member */
	struct rb_node rb_node;

	/* throtl_data this group belongs to */
	struct throtl_data *td;

	/* this group's service queue */
	struct throtl_service_queue service_queue;

	/*
	* qnode_on_self is used when bios are directly queued to this
	* throtl_grp so that local bios compete fairly with bios
	* dispatched from children. qnode_on_parent is used when bios are
	* dispatched from this throtl_grp into its parent and will compete
	* with the sibling qnode_on_parents and the parent's
	* qnode_on_self.
	*/
	struct throtl_qnode qnode_on_self[2];
	struct throtl_qnode qnode_on_parent[2];

	/*
	* Dispatch time in jiffies. This is the estimated time when group
	* will unthrottle and is ready to dispatch more bio. It is used as
	* key to sort active groups in service tree.
	*/
	unsigned long disptime;

	unsigned int flags;

	/* are there any throtl rules between this group and td? */
	bool has_rules[2];

	/* internally used bytes per second rate limits */
	uint64_t bps[2][LIMIT_CNT];
	/* user configured bps limits */
	uint64_t bps_conf[2][LIMIT_CNT];

	/* internally used IOPS limits */
	unsigned int iops[2][LIMIT_CNT];
	/* user configured IOPS limits */
	unsigned int iops_conf[2][LIMIT_CNT];

	/* Number of bytes dispatched in current slice */
	uint64_t bytes_disp[2];
	/* Number of bio's dispatched in current slice */
	unsigned int io_disp[2];

	unsigned long last_low_overflow_time[2];

	uint64_t last_bytes_disp[2];
	unsigned int last_io_disp[2];

	unsigned long last_check_time;

	unsigned long latency_target; /* us */
	unsigned long latency_target_conf; /* us */
	/* When did we start a new slice */
	unsigned long slice_start[2];
	unsigned long slice_end[2];

	unsigned long last_finish_time; /* ns / 1024 */
	unsigned long checked_last_finish_time; /* ns / 1024 */
	unsigned long avg_idletime; /* ns / 1024 */
	unsigned long idletime_threshold; /* us */
	unsigned long idletime_threshold_conf; /* us */

	unsigned int bio_cnt; /* total bios */
	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
	unsigned long bio_cnt_reset_time;

	atomic_t io_split_cnt[2];
	atomic_t last_io_split_cnt[2];

	struct blkg_rwstat stat_bytes;
	struct blkg_rwstat stat_ios;
	};

	extern struct blkcg_policy blkcg_policy_throtl;

	static inline struct throtl_grp pd_to_tg(struct blkg_policy_data pd)
	{
	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
	}

	static inline struct throtl_grp blkg_to_tg(struct blkcg_gq blkg)
	{
	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
	}

	/*
	* Internal throttling interface
	*/
	#ifndef CONFIG_BLK_DEV_THROTTLING
	static inline int blk_throtl_init(struct request_queue *q) { return 0; }
	static inline void blk_throtl_exit(struct request_queue *q) { }
	static inline void blk_throtl_register_queue(struct request_queue *q) { }
	static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
	static inline bool blk_throtl_bio(struct bio *bio) { return false; }
	#else /* CONFIG_BLK_DEV_THROTTLING */
	int blk_throtl_init(struct request_queue *q);
	void blk_throtl_exit(struct request_queue *q);
	void blk_throtl_register_queue(struct request_queue *q);
	void blk_throtl_charge_bio_split(struct bio *bio);
	bool __blk_throtl_bio(struct bio *bio);
	static inline bool blk_throtl_bio(struct bio *bio)
	{
	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);

	if (bio_flagged(bio, BIO_THROTTLED))
	return false;
	if (!tg->has_rules[bio_data_dir(bio)])
	return false;

	return __blk_throtl_bio(bio);
	}
	#endif /* CONFIG_BLK_DEV_THROTTLING */

	#endif