xref: /linux/block/mq-deadline.c (revision f4b369c6fe0ceaba2da2daff8c9eb415f85926dd)
10f783995STejun Heo // SPDX-License-Identifier: GPL-2.0
20f783995STejun Heo /*
30f783995STejun Heo  *  MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
40f783995STejun Heo  *  for the blk-mq scheduling framework
50f783995STejun Heo  *
60f783995STejun Heo  *  Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
70f783995STejun Heo  */
80f783995STejun Heo #include <linux/kernel.h>
90f783995STejun Heo #include <linux/fs.h>
100f783995STejun Heo #include <linux/blkdev.h>
110f783995STejun Heo #include <linux/bio.h>
120f783995STejun Heo #include <linux/module.h>
130f783995STejun Heo #include <linux/slab.h>
140f783995STejun Heo #include <linux/init.h>
150f783995STejun Heo #include <linux/compiler.h>
160f783995STejun Heo #include <linux/rbtree.h>
170f783995STejun Heo #include <linux/sbitmap.h>
180f783995STejun Heo 
190f783995STejun Heo #include <trace/events/block.h>
200f783995STejun Heo 
212e9bc346SChristoph Hellwig #include "elevator.h"
220f783995STejun Heo #include "blk.h"
230f783995STejun Heo #include "blk-mq.h"
240f783995STejun Heo #include "blk-mq-debugfs.h"
250f783995STejun Heo #include "blk-mq-sched.h"
260f783995STejun Heo 
270f783995STejun Heo /*
280f783995STejun Heo  * See Documentation/block/deadline-iosched.rst
290f783995STejun Heo  */
300f783995STejun Heo static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
310f783995STejun Heo static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
32322cff70SBart Van Assche /*
33322cff70SBart Van Assche  * Time after which to dispatch lower priority requests even if higher
34322cff70SBart Van Assche  * priority requests are pending.
35322cff70SBart Van Assche  */
36322cff70SBart Van Assche static const int prio_aging_expire = 10 * HZ;
370f783995STejun Heo static const int writes_starved = 2;    /* max times reads can starve a write */
380f783995STejun Heo static const int fifo_batch = 16;       /* # of sequential requests treated as one
390f783995STejun Heo 				     by the above parameters. For throughput. */
400f783995STejun Heo 
410f783995STejun Heo enum dd_data_dir {
420f783995STejun Heo 	DD_READ		= READ,
430f783995STejun Heo 	DD_WRITE	= WRITE,
440f783995STejun Heo };
450f783995STejun Heo 
460f783995STejun Heo enum { DD_DIR_COUNT = 2 };
470f783995STejun Heo 
480f783995STejun Heo enum dd_prio {
490f783995STejun Heo 	DD_RT_PRIO	= 0,
500f783995STejun Heo 	DD_BE_PRIO	= 1,
510f783995STejun Heo 	DD_IDLE_PRIO	= 2,
520f783995STejun Heo 	DD_PRIO_MAX	= 2,
530f783995STejun Heo };
540f783995STejun Heo 
550f783995STejun Heo enum { DD_PRIO_COUNT = 3 };
560f783995STejun Heo 
57bce0363eSBart Van Assche /*
58bce0363eSBart Van Assche  * I/O statistics per I/O priority. It is fine if these counters overflow.
59bce0363eSBart Van Assche  * What matters is that these counters are at least as wide as
60bce0363eSBart Van Assche  * log2(max_outstanding_requests).
61bce0363eSBart Van Assche  */
620f783995STejun Heo struct io_stats_per_prio {
63bce0363eSBart Van Assche 	uint32_t inserted;
64bce0363eSBart Van Assche 	uint32_t merged;
65bce0363eSBart Van Assche 	uint32_t dispatched;
66bce0363eSBart Van Assche 	atomic_t completed;
670f783995STejun Heo };
680f783995STejun Heo 
690f783995STejun Heo /*
700f783995STejun Heo  * Deadline scheduler data per I/O priority (enum dd_prio). Requests are
710f783995STejun Heo  * present on both sort_list[] and fifo_list[].
720f783995STejun Heo  */
730f783995STejun Heo struct dd_per_prio {
740f783995STejun Heo 	struct rb_root sort_list[DD_DIR_COUNT];
750f783995STejun Heo 	struct list_head fifo_list[DD_DIR_COUNT];
7683c46ed6SBart Van Assche 	/* Position of the most recently dispatched request. */
7783c46ed6SBart Van Assche 	sector_t latest_pos[DD_DIR_COUNT];
78bce0363eSBart Van Assche 	struct io_stats_per_prio stats;
790f783995STejun Heo };
800f783995STejun Heo 
810f783995STejun Heo struct deadline_data {
820f783995STejun Heo 	/*
830f783995STejun Heo 	 * run time data
840f783995STejun Heo 	 */
850f783995STejun Heo 
86d60055cfSBart Van Assche 	struct list_head dispatch;
870f783995STejun Heo 	struct dd_per_prio per_prio[DD_PRIO_COUNT];
880f783995STejun Heo 
890f783995STejun Heo 	/* Data direction of latest dispatched request. */
900f783995STejun Heo 	enum dd_data_dir last_dir;
910f783995STejun Heo 	unsigned int batching;		/* number of sequential requests made */
920f783995STejun Heo 	unsigned int starved;		/* times reads have starved writes */
930f783995STejun Heo 
940f783995STejun Heo 	/*
950f783995STejun Heo 	 * settings that change how the i/o scheduler behaves
960f783995STejun Heo 	 */
970f783995STejun Heo 	int fifo_expire[DD_DIR_COUNT];
980f783995STejun Heo 	int fifo_batch;
990f783995STejun Heo 	int writes_starved;
1000f783995STejun Heo 	int front_merges;
101322cff70SBart Van Assche 	int prio_aging_expire;
1020f783995STejun Heo 
1030f783995STejun Heo 	spinlock_t lock;
1040f783995STejun Heo };
1050f783995STejun Heo 
1060f783995STejun Heo /* Maps an I/O priority class to a deadline scheduler priority. */
1070f783995STejun Heo static const enum dd_prio ioprio_class_to_prio[] = {
1080f783995STejun Heo 	[IOPRIO_CLASS_NONE]	= DD_BE_PRIO,
1090f783995STejun Heo 	[IOPRIO_CLASS_RT]	= DD_RT_PRIO,
1100f783995STejun Heo 	[IOPRIO_CLASS_BE]	= DD_BE_PRIO,
1110f783995STejun Heo 	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
1120f783995STejun Heo };
1130f783995STejun Heo 
1140f783995STejun Heo static inline struct rb_root *
deadline_rb_root(struct dd_per_prio * per_prio,struct request * rq)1150f783995STejun Heo deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
1160f783995STejun Heo {
1170f783995STejun Heo 	return &per_prio->sort_list[rq_data_dir(rq)];
1180f783995STejun Heo }
1190f783995STejun Heo 
1200f783995STejun Heo /*
1210f783995STejun Heo  * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a
1220f783995STejun Heo  * request.
1230f783995STejun Heo  */
dd_rq_ioclass(struct request * rq)1240f783995STejun Heo static u8 dd_rq_ioclass(struct request *rq)
1250f783995STejun Heo {
1260f783995STejun Heo 	return IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
1270f783995STejun Heo }
1280f783995STejun Heo 
1290f783995STejun Heo /*
130fde02699SDamien Le Moal  * Return the first request for which blk_rq_pos() >= @pos.
1310effb390SBart Van Assche  */
deadline_from_pos(struct dd_per_prio * per_prio,enum dd_data_dir data_dir,sector_t pos)13283c46ed6SBart Van Assche static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
13383c46ed6SBart Van Assche 				enum dd_data_dir data_dir, sector_t pos)
13483c46ed6SBart Van Assche {
13583c46ed6SBart Van Assche 	struct rb_node *node = per_prio->sort_list[data_dir].rb_node;
13683c46ed6SBart Van Assche 	struct request *rq, *res = NULL;
13783c46ed6SBart Van Assche 
13883c46ed6SBart Van Assche 	while (node) {
13983c46ed6SBart Van Assche 		rq = rb_entry_rq(node);
14083c46ed6SBart Van Assche 		if (blk_rq_pos(rq) >= pos) {
14183c46ed6SBart Van Assche 			res = rq;
14283c46ed6SBart Van Assche 			node = node->rb_left;
14383c46ed6SBart Van Assche 		} else {
14483c46ed6SBart Van Assche 			node = node->rb_right;
14583c46ed6SBart Van Assche 		}
14683c46ed6SBart Van Assche 	}
14783c46ed6SBart Van Assche 	return res;
14883c46ed6SBart Van Assche }
14983c46ed6SBart Van Assche 
1500f783995STejun Heo static void
deadline_add_rq_rb(struct dd_per_prio * per_prio,struct request * rq)1510f783995STejun Heo deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
1520f783995STejun Heo {
1530f783995STejun Heo 	struct rb_root *root = deadline_rb_root(per_prio, rq);
1540f783995STejun Heo 
1550f783995STejun Heo 	elv_rb_add(root, rq);
1560f783995STejun Heo }
1570f783995STejun Heo 
1580f783995STejun Heo static inline void
deadline_del_rq_rb(struct dd_per_prio * per_prio,struct request * rq)1590f783995STejun Heo deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
1600f783995STejun Heo {
1610f783995STejun Heo 	elv_rb_del(deadline_rb_root(per_prio, rq), rq);
1620f783995STejun Heo }
1630f783995STejun Heo 
1640f783995STejun Heo /*
1650f783995STejun Heo  * remove rq from rbtree and fifo.
1660f783995STejun Heo  */
deadline_remove_request(struct request_queue * q,struct dd_per_prio * per_prio,struct request * rq)1670f783995STejun Heo static void deadline_remove_request(struct request_queue *q,
1680f783995STejun Heo 				    struct dd_per_prio *per_prio,
1690f783995STejun Heo 				    struct request *rq)
1700f783995STejun Heo {
1710f783995STejun Heo 	list_del_init(&rq->queuelist);
1720f783995STejun Heo 
1730f783995STejun Heo 	/*
1740f783995STejun Heo 	 * We might not be on the rbtree, if we are doing an insert merge
1750f783995STejun Heo 	 */
1760f783995STejun Heo 	if (!RB_EMPTY_NODE(&rq->rb_node))
1770f783995STejun Heo 		deadline_del_rq_rb(per_prio, rq);
1780f783995STejun Heo 
1790f783995STejun Heo 	elv_rqhash_del(q, rq);
1800f783995STejun Heo 	if (q->last_merge == rq)
1810f783995STejun Heo 		q->last_merge = NULL;
1820f783995STejun Heo }
1830f783995STejun Heo 
dd_request_merged(struct request_queue * q,struct request * req,enum elv_merge type)1840f783995STejun Heo static void dd_request_merged(struct request_queue *q, struct request *req,
1850f783995STejun Heo 			      enum elv_merge type)
1860f783995STejun Heo {
1870f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
1880f783995STejun Heo 	const u8 ioprio_class = dd_rq_ioclass(req);
1890f783995STejun Heo 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
1900f783995STejun Heo 	struct dd_per_prio *per_prio = &dd->per_prio[prio];
1910f783995STejun Heo 
1920f783995STejun Heo 	/*
1930f783995STejun Heo 	 * if the merge was a front merge, we need to reposition request
1940f783995STejun Heo 	 */
1950f783995STejun Heo 	if (type == ELEVATOR_FRONT_MERGE) {
1960f783995STejun Heo 		elv_rb_del(deadline_rb_root(per_prio, req), req);
1970f783995STejun Heo 		deadline_add_rq_rb(per_prio, req);
1980f783995STejun Heo 	}
1990f783995STejun Heo }
2000f783995STejun Heo 
2010f783995STejun Heo /*
2020f783995STejun Heo  * Callback function that is invoked after @next has been merged into @req.
2030f783995STejun Heo  */
dd_merged_requests(struct request_queue * q,struct request * req,struct request * next)2040f783995STejun Heo static void dd_merged_requests(struct request_queue *q, struct request *req,
2050f783995STejun Heo 			       struct request *next)
2060f783995STejun Heo {
2070f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
2080f783995STejun Heo 	const u8 ioprio_class = dd_rq_ioclass(next);
2090f783995STejun Heo 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
2100f783995STejun Heo 
211bce0363eSBart Van Assche 	lockdep_assert_held(&dd->lock);
212bce0363eSBart Van Assche 
213bce0363eSBart Van Assche 	dd->per_prio[prio].stats.merged++;
2140f783995STejun Heo 
2150f783995STejun Heo 	/*
2160f783995STejun Heo 	 * if next expires before rq, assign its expire time to rq
2170f783995STejun Heo 	 * and move into next position (next will be deleted) in fifo
2180f783995STejun Heo 	 */
2190f783995STejun Heo 	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
2200f783995STejun Heo 		if (time_before((unsigned long)next->fifo_time,
2210f783995STejun Heo 				(unsigned long)req->fifo_time)) {
2220f783995STejun Heo 			list_move(&req->queuelist, &next->queuelist);
2230f783995STejun Heo 			req->fifo_time = next->fifo_time;
2240f783995STejun Heo 		}
2250f783995STejun Heo 	}
2260f783995STejun Heo 
2270f783995STejun Heo 	/*
2280f783995STejun Heo 	 * kill knowledge of next, this one is a goner
2290f783995STejun Heo 	 */
2300f783995STejun Heo 	deadline_remove_request(q, &dd->per_prio[prio], next);
2310f783995STejun Heo }
2320f783995STejun Heo 
2330f783995STejun Heo /*
2340f783995STejun Heo  * move an entry to dispatch queue
2350f783995STejun Heo  */
2360f783995STejun Heo static void
deadline_move_request(struct deadline_data * dd,struct dd_per_prio * per_prio,struct request * rq)2370f783995STejun Heo deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
2380f783995STejun Heo 		      struct request *rq)
2390f783995STejun Heo {
2400f783995STejun Heo 	/*
2410f783995STejun Heo 	 * take it off the sort and fifo list
2420f783995STejun Heo 	 */
2430f783995STejun Heo 	deadline_remove_request(rq->q, per_prio, rq);
2440f783995STejun Heo }
2450f783995STejun Heo 
24632f64cadSBart Van Assche /* Number of requests queued for a given priority level. */
dd_queued(struct deadline_data * dd,enum dd_prio prio)24732f64cadSBart Van Assche static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
24832f64cadSBart Van Assche {
249bce0363eSBart Van Assche 	const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
250bce0363eSBart Van Assche 
251bce0363eSBart Van Assche 	lockdep_assert_held(&dd->lock);
252bce0363eSBart Van Assche 
253bce0363eSBart Van Assche 	return stats->inserted - atomic_read(&stats->completed);
25432f64cadSBart Van Assche }
25532f64cadSBart Van Assche 
2560f783995STejun Heo /*
257e0d85cdeSBart Van Assche  * deadline_check_fifo returns true if and only if there are expired requests
258e0d85cdeSBart Van Assche  * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]).
2590f783995STejun Heo  */
deadline_check_fifo(struct dd_per_prio * per_prio,enum dd_data_dir data_dir)260e0d85cdeSBart Van Assche static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
2610f783995STejun Heo 				       enum dd_data_dir data_dir)
2620f783995STejun Heo {
2630f783995STejun Heo 	struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
2640f783995STejun Heo 
265e0d85cdeSBart Van Assche 	return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
2660f783995STejun Heo }
2670f783995STejun Heo 
2680f783995STejun Heo /*
2690f783995STejun Heo  * For the specified data direction, return the next request to
2700f783995STejun Heo  * dispatch using arrival ordered lists.
2710f783995STejun Heo  */
2720f783995STejun Heo static struct request *
deadline_fifo_request(struct deadline_data * dd,struct dd_per_prio * per_prio,enum dd_data_dir data_dir)2730f783995STejun Heo deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
2740f783995STejun Heo 		      enum dd_data_dir data_dir)
2750f783995STejun Heo {
2760f783995STejun Heo 	if (list_empty(&per_prio->fifo_list[data_dir]))
2770f783995STejun Heo 		return NULL;
2780f783995STejun Heo 
279fde02699SDamien Le Moal 	return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
2800f783995STejun Heo }
2810f783995STejun Heo 
2820f783995STejun Heo /*
2830f783995STejun Heo  * For the specified data direction, return the next request to
2840f783995STejun Heo  * dispatch using sector position sorted lists.
2850f783995STejun Heo  */
2860f783995STejun Heo static struct request *
deadline_next_request(struct deadline_data * dd,struct dd_per_prio * per_prio,enum dd_data_dir data_dir)2870f783995STejun Heo deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
2880f783995STejun Heo 		      enum dd_data_dir data_dir)
2890f783995STejun Heo {
290fde02699SDamien Le Moal 	return deadline_from_pos(per_prio, data_dir,
29183c46ed6SBart Van Assche 				 per_prio->latest_pos[data_dir]);
2920f783995STejun Heo }
2930f783995STejun Heo 
2940f783995STejun Heo /*
295322cff70SBart Van Assche  * Returns true if and only if @rq started after @latest_start where
296322cff70SBart Van Assche  * @latest_start is in jiffies.
297322cff70SBart Van Assche  */
started_after(struct deadline_data * dd,struct request * rq,unsigned long latest_start)298322cff70SBart Van Assche static bool started_after(struct deadline_data *dd, struct request *rq,
299322cff70SBart Van Assche 			  unsigned long latest_start)
300322cff70SBart Van Assche {
301322cff70SBart Van Assche 	unsigned long start_time = (unsigned long)rq->fifo_time;
302322cff70SBart Van Assche 
303322cff70SBart Van Assche 	start_time -= dd->fifo_expire[rq_data_dir(rq)];
304322cff70SBart Van Assche 
305322cff70SBart Van Assche 	return time_after(start_time, latest_start);
306322cff70SBart Van Assche }
307322cff70SBart Van Assche 
dd_start_request(struct deadline_data * dd,enum dd_data_dir data_dir,struct request * rq)30893a358afSBart Van Assche static struct request *dd_start_request(struct deadline_data *dd,
30993a358afSBart Van Assche 					enum dd_data_dir data_dir,
31093a358afSBart Van Assche 					struct request *rq)
31193a358afSBart Van Assche {
31293a358afSBart Van Assche 	u8 ioprio_class = dd_rq_ioclass(rq);
31393a358afSBart Van Assche 	enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
31493a358afSBart Van Assche 
31593a358afSBart Van Assche 	dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
31693a358afSBart Van Assche 	dd->per_prio[prio].stats.dispatched++;
31793a358afSBart Van Assche 	rq->rq_flags |= RQF_STARTED;
31893a358afSBart Van Assche 	return rq;
31993a358afSBart Van Assche }
32093a358afSBart Van Assche 
321322cff70SBart Van Assche /*
3220f783995STejun Heo  * deadline_dispatch_requests selects the best request according to
323322cff70SBart Van Assche  * read/write expire, fifo_batch, etc and with a start time <= @latest_start.
3240f783995STejun Heo  */
__dd_dispatch_request(struct deadline_data * dd,struct dd_per_prio * per_prio,unsigned long latest_start)3250f783995STejun Heo static struct request *__dd_dispatch_request(struct deadline_data *dd,
326322cff70SBart Van Assche 					     struct dd_per_prio *per_prio,
327322cff70SBart Van Assche 					     unsigned long latest_start)
3280f783995STejun Heo {
3290f783995STejun Heo 	struct request *rq, *next_rq;
3300f783995STejun Heo 	enum dd_data_dir data_dir;
3310f783995STejun Heo 
3320f783995STejun Heo 	lockdep_assert_held(&dd->lock);
3330f783995STejun Heo 
3340f783995STejun Heo 	/*
3350f783995STejun Heo 	 * batches are currently reads XOR writes
3360f783995STejun Heo 	 */
3370f783995STejun Heo 	rq = deadline_next_request(dd, per_prio, dd->last_dir);
33883c46ed6SBart Van Assche 	if (rq && dd->batching < dd->fifo_batch) {
33945b46b6fSBart Van Assche 		/* we have a next request and are still entitled to batch */
34083c46ed6SBart Van Assche 		data_dir = rq_data_dir(rq);
3410f783995STejun Heo 		goto dispatch_request;
34283c46ed6SBart Van Assche 	}
3430f783995STejun Heo 
3440f783995STejun Heo 	/*
3450f783995STejun Heo 	 * at this point we are not running a batch. select the appropriate
3460f783995STejun Heo 	 * data direction (read / write)
3470f783995STejun Heo 	 */
3480f783995STejun Heo 
3490f783995STejun Heo 	if (!list_empty(&per_prio->fifo_list[DD_READ])) {
3500f783995STejun Heo 		BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ]));
3510f783995STejun Heo 
3520f783995STejun Heo 		if (deadline_fifo_request(dd, per_prio, DD_WRITE) &&
3530f783995STejun Heo 		    (dd->starved++ >= dd->writes_starved))
3540f783995STejun Heo 			goto dispatch_writes;
3550f783995STejun Heo 
3560f783995STejun Heo 		data_dir = DD_READ;
3570f783995STejun Heo 
3580f783995STejun Heo 		goto dispatch_find_request;
3590f783995STejun Heo 	}
3600f783995STejun Heo 
3610f783995STejun Heo 	/*
3620f783995STejun Heo 	 * there are either no reads or writes have been starved
3630f783995STejun Heo 	 */
3640f783995STejun Heo 
3650f783995STejun Heo 	if (!list_empty(&per_prio->fifo_list[DD_WRITE])) {
3660f783995STejun Heo dispatch_writes:
3670f783995STejun Heo 		BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE]));
3680f783995STejun Heo 
3690f783995STejun Heo 		dd->starved = 0;
3700f783995STejun Heo 
3710f783995STejun Heo 		data_dir = DD_WRITE;
3720f783995STejun Heo 
3730f783995STejun Heo 		goto dispatch_find_request;
3740f783995STejun Heo 	}
3750f783995STejun Heo 
3760f783995STejun Heo 	return NULL;
3770f783995STejun Heo 
3780f783995STejun Heo dispatch_find_request:
3790f783995STejun Heo 	/*
3800f783995STejun Heo 	 * we are not running a batch, find best request for selected data_dir
3810f783995STejun Heo 	 */
3820f783995STejun Heo 	next_rq = deadline_next_request(dd, per_prio, data_dir);
3830f783995STejun Heo 	if (deadline_check_fifo(per_prio, data_dir) || !next_rq) {
3840f783995STejun Heo 		/*
3850f783995STejun Heo 		 * A deadline has expired, the last request was in the other
3860f783995STejun Heo 		 * direction, or we have run out of higher-sectored requests.
3870f783995STejun Heo 		 * Start again from the request with the earliest expiry time.
3880f783995STejun Heo 		 */
3890f783995STejun Heo 		rq = deadline_fifo_request(dd, per_prio, data_dir);
3900f783995STejun Heo 	} else {
3910f783995STejun Heo 		/*
3920f783995STejun Heo 		 * The last req was the same dir and we have a next request in
3930f783995STejun Heo 		 * sort order. No expired requests so continue on from here.
3940f783995STejun Heo 		 */
3950f783995STejun Heo 		rq = next_rq;
3960f783995STejun Heo 	}
3970f783995STejun Heo 
3980f783995STejun Heo 	if (!rq)
3990f783995STejun Heo 		return NULL;
4000f783995STejun Heo 
4010f783995STejun Heo 	dd->last_dir = data_dir;
4020f783995STejun Heo 	dd->batching = 0;
4030f783995STejun Heo 
4040f783995STejun Heo dispatch_request:
405322cff70SBart Van Assche 	if (started_after(dd, rq, latest_start))
406322cff70SBart Van Assche 		return NULL;
407322cff70SBart Van Assche 
4080f783995STejun Heo 	/*
4090f783995STejun Heo 	 * rq is the selected appropriate request.
4100f783995STejun Heo 	 */
4110f783995STejun Heo 	dd->batching++;
4120f783995STejun Heo 	deadline_move_request(dd, per_prio, rq);
41393a358afSBart Van Assche 	return dd_start_request(dd, data_dir, rq);
4140f783995STejun Heo }
4150f783995STejun Heo 
4160f783995STejun Heo /*
417322cff70SBart Van Assche  * Check whether there are any requests with priority other than DD_RT_PRIO
418322cff70SBart Van Assche  * that were inserted more than prio_aging_expire jiffies ago.
419322cff70SBart Van Assche  */
dd_dispatch_prio_aged_requests(struct deadline_data * dd,unsigned long now)420322cff70SBart Van Assche static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
421322cff70SBart Van Assche 						      unsigned long now)
422322cff70SBart Van Assche {
423322cff70SBart Van Assche 	struct request *rq;
424322cff70SBart Van Assche 	enum dd_prio prio;
425322cff70SBart Van Assche 	int prio_cnt;
426322cff70SBart Van Assche 
427322cff70SBart Van Assche 	lockdep_assert_held(&dd->lock);
428322cff70SBart Van Assche 
429322cff70SBart Van Assche 	prio_cnt = !!dd_queued(dd, DD_RT_PRIO) + !!dd_queued(dd, DD_BE_PRIO) +
430322cff70SBart Van Assche 		   !!dd_queued(dd, DD_IDLE_PRIO);
431322cff70SBart Van Assche 	if (prio_cnt < 2)
432322cff70SBart Van Assche 		return NULL;
433322cff70SBart Van Assche 
434322cff70SBart Van Assche 	for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
435322cff70SBart Van Assche 		rq = __dd_dispatch_request(dd, &dd->per_prio[prio],
436322cff70SBart Van Assche 					   now - dd->prio_aging_expire);
437322cff70SBart Van Assche 		if (rq)
438322cff70SBart Van Assche 			return rq;
439322cff70SBart Van Assche 	}
440322cff70SBart Van Assche 
441322cff70SBart Van Assche 	return NULL;
442322cff70SBart Van Assche }
443322cff70SBart Van Assche 
444322cff70SBart Van Assche /*
4450f783995STejun Heo  * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
4460f783995STejun Heo  *
4470f783995STejun Heo  * One confusing aspect here is that we get called for a specific
4480f783995STejun Heo  * hardware queue, but we may return a request that is for a
4490f783995STejun Heo  * different hardware queue. This is because mq-deadline has shared
4500f783995STejun Heo  * state for all hardware queues, in terms of sorting, FIFOs, etc.
4510f783995STejun Heo  */
dd_dispatch_request(struct blk_mq_hw_ctx * hctx)4520f783995STejun Heo static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
4530f783995STejun Heo {
4540f783995STejun Heo 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
455322cff70SBart Van Assche 	const unsigned long now = jiffies;
4567b05bf77SJens Axboe 	struct request *rq;
4570f783995STejun Heo 	enum dd_prio prio;
4580f783995STejun Heo 
4590f783995STejun Heo 	spin_lock(&dd->lock);
460d60055cfSBart Van Assche 
461d60055cfSBart Van Assche 	if (!list_empty(&dd->dispatch)) {
462d60055cfSBart Van Assche 		rq = list_first_entry(&dd->dispatch, struct request, queuelist);
463d60055cfSBart Van Assche 		list_del_init(&rq->queuelist);
464d60055cfSBart Van Assche 		dd_start_request(dd, rq_data_dir(rq), rq);
465d60055cfSBart Van Assche 		goto unlock;
466d60055cfSBart Van Assche 	}
467d60055cfSBart Van Assche 
468322cff70SBart Van Assche 	rq = dd_dispatch_prio_aged_requests(dd, now);
4697b05bf77SJens Axboe 	if (rq)
470322cff70SBart Van Assche 		goto unlock;
471322cff70SBart Van Assche 
472322cff70SBart Van Assche 	/*
473322cff70SBart Van Assche 	 * Next, dispatch requests in priority order. Ignore lower priority
474322cff70SBart Van Assche 	 * requests if any higher priority requests are pending.
475322cff70SBart Van Assche 	 */
476322cff70SBart Van Assche 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
477322cff70SBart Van Assche 		rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now);
478322cff70SBart Van Assche 		if (rq || dd_queued(dd, prio))
4790f783995STejun Heo 			break;
4800f783995STejun Heo 	}
481322cff70SBart Van Assche 
482322cff70SBart Van Assche unlock:
4830f783995STejun Heo 	spin_unlock(&dd->lock);
4840f783995STejun Heo 
4850f783995STejun Heo 	return rq;
4860f783995STejun Heo }
4870f783995STejun Heo 
dd_limit_depth(blk_opf_t opf,struct blk_mq_alloc_data * data)488f8359efeSBart Van Assche static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
4890f783995STejun Heo {
490*988bb1b9SYu Kuai 	if (!blk_mq_is_sync_read(opf))
491*988bb1b9SYu Kuai 		data->shallow_depth = data->q->async_depth;
4920f783995STejun Heo }
4930f783995STejun Heo 
494*988bb1b9SYu Kuai /* Called by blk_mq_init_sched() and blk_mq_update_nr_requests(). */
dd_depth_updated(struct request_queue * q)4957d337eefSYu Kuai static void dd_depth_updated(struct request_queue *q)
4960f783995STejun Heo {
497*988bb1b9SYu Kuai 	blk_mq_set_min_shallow_depth(q, q->async_depth);
4980f783995STejun Heo }
4990f783995STejun Heo 
dd_exit_sched(struct elevator_queue * e)5000f783995STejun Heo static void dd_exit_sched(struct elevator_queue *e)
5010f783995STejun Heo {
5020f783995STejun Heo 	struct deadline_data *dd = e->elevator_data;
5030f783995STejun Heo 	enum dd_prio prio;
5040f783995STejun Heo 
5050f783995STejun Heo 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
5060f783995STejun Heo 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
507bce0363eSBart Van Assche 		const struct io_stats_per_prio *stats = &per_prio->stats;
508bce0363eSBart Van Assche 		uint32_t queued;
5090f783995STejun Heo 
5100f783995STejun Heo 		WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
5110f783995STejun Heo 		WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
5120f783995STejun Heo 
513bce0363eSBart Van Assche 		spin_lock(&dd->lock);
514bce0363eSBart Van Assche 		queued = dd_queued(dd, prio);
515bce0363eSBart Van Assche 		spin_unlock(&dd->lock);
516bce0363eSBart Van Assche 
517bce0363eSBart Van Assche 		WARN_ONCE(queued != 0,
518bce0363eSBart Van Assche 			  "statistics for priority %d: i %u m %u d %u c %u\n",
519bce0363eSBart Van Assche 			  prio, stats->inserted, stats->merged,
520bce0363eSBart Van Assche 			  stats->dispatched, atomic_read(&stats->completed));
521bce0363eSBart Van Assche 	}
5220f783995STejun Heo 
5230f783995STejun Heo 	kfree(dd);
5240f783995STejun Heo }
5250f783995STejun Heo 
5260f783995STejun Heo /*
5270f783995STejun Heo  * initialize elevator private data (deadline_data).
5280f783995STejun Heo  */
dd_init_sched(struct request_queue * q,struct elevator_queue * eq)52949811586SNilay Shroff static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq)
5300f783995STejun Heo {
5310f783995STejun Heo 	struct deadline_data *dd;
5320f783995STejun Heo 	enum dd_prio prio;
5330f783995STejun Heo 
5340f783995STejun Heo 	dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
5350f783995STejun Heo 	if (!dd)
53649811586SNilay Shroff 		return -ENOMEM;
5370f783995STejun Heo 
5380f783995STejun Heo 	eq->elevator_data = dd;
5390f783995STejun Heo 
540d60055cfSBart Van Assche 	INIT_LIST_HEAD(&dd->dispatch);
5410f783995STejun Heo 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
5420f783995STejun Heo 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
5430f783995STejun Heo 
5440f783995STejun Heo 		INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]);
5450f783995STejun Heo 		INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]);
5460f783995STejun Heo 		per_prio->sort_list[DD_READ] = RB_ROOT;
5470f783995STejun Heo 		per_prio->sort_list[DD_WRITE] = RB_ROOT;
5480f783995STejun Heo 	}
5490f783995STejun Heo 	dd->fifo_expire[DD_READ] = read_expire;
5500f783995STejun Heo 	dd->fifo_expire[DD_WRITE] = write_expire;
5510f783995STejun Heo 	dd->writes_starved = writes_starved;
5520f783995STejun Heo 	dd->front_merges = 1;
5530f783995STejun Heo 	dd->last_dir = DD_WRITE;
5540f783995STejun Heo 	dd->fifo_batch = fifo_batch;
555322cff70SBart Van Assche 	dd->prio_aging_expire = prio_aging_expire;
5560f783995STejun Heo 	spin_lock_init(&dd->lock);
5570f783995STejun Heo 
5584d337cebSMing Lei 	/* We dispatch from request queue wide instead of hw queue */
5594d337cebSMing Lei 	blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
5604d337cebSMing Lei 
5610f783995STejun Heo 	q->elevator = eq;
562*988bb1b9SYu Kuai 	q->async_depth = q->nr_requests;
5637d337eefSYu Kuai 	dd_depth_updated(q);
5640f783995STejun Heo 	return 0;
5650f783995STejun Heo }
5660f783995STejun Heo 
5670f783995STejun Heo /*
5680f783995STejun Heo  * Try to merge @bio into an existing request. If @bio has been merged into
5690f783995STejun Heo  * an existing request, store the pointer to that request into *@rq.
5700f783995STejun Heo  */
dd_request_merge(struct request_queue * q,struct request ** rq,struct bio * bio)5710f783995STejun Heo static int dd_request_merge(struct request_queue *q, struct request **rq,
5720f783995STejun Heo 			    struct bio *bio)
5730f783995STejun Heo {
5740f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
5750f783995STejun Heo 	const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
5760f783995STejun Heo 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
5770f783995STejun Heo 	struct dd_per_prio *per_prio = &dd->per_prio[prio];
5780f783995STejun Heo 	sector_t sector = bio_end_sector(bio);
5790f783995STejun Heo 	struct request *__rq;
5800f783995STejun Heo 
5810f783995STejun Heo 	if (!dd->front_merges)
5820f783995STejun Heo 		return ELEVATOR_NO_MERGE;
5830f783995STejun Heo 
5840f783995STejun Heo 	__rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector);
5850f783995STejun Heo 	if (__rq) {
5860f783995STejun Heo 		BUG_ON(sector != blk_rq_pos(__rq));
5870f783995STejun Heo 
5880f783995STejun Heo 		if (elv_bio_merge_ok(__rq, bio)) {
5890f783995STejun Heo 			*rq = __rq;
59067936911SLinus Torvalds 			if (blk_discard_mergable(__rq))
59167936911SLinus Torvalds 				return ELEVATOR_DISCARD_MERGE;
5920f783995STejun Heo 			return ELEVATOR_FRONT_MERGE;
5930f783995STejun Heo 		}
5940f783995STejun Heo 	}
5950f783995STejun Heo 
5960f783995STejun Heo 	return ELEVATOR_NO_MERGE;
5970f783995STejun Heo }
5980f783995STejun Heo 
5990f783995STejun Heo /*
6000f783995STejun Heo  * Attempt to merge a bio into an existing request. This function is called
6010f783995STejun Heo  * before @bio is associated with a request.
6020f783995STejun Heo  */
dd_bio_merge(struct request_queue * q,struct bio * bio,unsigned int nr_segs)6030f783995STejun Heo static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
6040f783995STejun Heo 		unsigned int nr_segs)
6050f783995STejun Heo {
6060f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
6070f783995STejun Heo 	struct request *free = NULL;
6080f783995STejun Heo 	bool ret;
6090f783995STejun Heo 
6100f783995STejun Heo 	spin_lock(&dd->lock);
6110f783995STejun Heo 	ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
6120f783995STejun Heo 	spin_unlock(&dd->lock);
6130f783995STejun Heo 
6140f783995STejun Heo 	if (free)
6150f783995STejun Heo 		blk_mq_free_request(free);
6160f783995STejun Heo 
6170f783995STejun Heo 	return ret;
6180f783995STejun Heo }
6190f783995STejun Heo 
6200f783995STejun Heo /*
6210f783995STejun Heo  * add rq to rbtree and fifo
6220f783995STejun Heo  */
dd_insert_request(struct blk_mq_hw_ctx * hctx,struct request * rq,blk_insert_t flags,struct list_head * free)6230f783995STejun Heo static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
624b2097bd2SBart Van Assche 			      blk_insert_t flags, struct list_head *free)
6250f783995STejun Heo {
6260f783995STejun Heo 	struct request_queue *q = hctx->queue;
6270f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
6280f783995STejun Heo 	const enum dd_data_dir data_dir = rq_data_dir(rq);
6290f783995STejun Heo 	u16 ioprio = req_get_ioprio(rq);
6300f783995STejun Heo 	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
6310f783995STejun Heo 	struct dd_per_prio *per_prio;
6320f783995STejun Heo 	enum dd_prio prio;
6330f783995STejun Heo 
6340f783995STejun Heo 	lockdep_assert_held(&dd->lock);
6350f783995STejun Heo 
6360f783995STejun Heo 	prio = ioprio_class_to_prio[ioprio_class];
637bce0363eSBart Van Assche 	per_prio = &dd->per_prio[prio];
6381b0cab32SChristoph Hellwig 	if (!rq->elv.priv[0])
639bce0363eSBart Van Assche 		per_prio->stats.inserted++;
6401b0cab32SChristoph Hellwig 	rq->elv.priv[0] = per_prio;
6410f783995STejun Heo 
642b2097bd2SBart Van Assche 	if (blk_mq_sched_try_insert_merge(q, rq, free))
6430f783995STejun Heo 		return;
6440f783995STejun Heo 
6450f783995STejun Heo 	trace_block_rq_insert(rq);
6460f783995STejun Heo 
64793fffe16SChristoph Hellwig 	if (flags & BLK_MQ_INSERT_AT_HEAD) {
648d60055cfSBart Van Assche 		list_add(&rq->queuelist, &dd->dispatch);
649725f22a1SBart Van Assche 		rq->fifo_time = jiffies;
6500f783995STejun Heo 	} else {
6510f783995STejun Heo 		deadline_add_rq_rb(per_prio, rq);
6520f783995STejun Heo 
6530f783995STejun Heo 		if (rq_mergeable(rq)) {
6540f783995STejun Heo 			elv_rqhash_add(q, rq);
6550f783995STejun Heo 			if (!q->last_merge)
6560f783995STejun Heo 				q->last_merge = rq;
6570f783995STejun Heo 		}
6580f783995STejun Heo 
6590f783995STejun Heo 		/*
6600f783995STejun Heo 		 * set expire time and add to fifo list
6610f783995STejun Heo 		 */
6620f783995STejun Heo 		rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
663e01424faSBart Van Assche 		list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
6640f783995STejun Heo 	}
6650f783995STejun Heo }
6660f783995STejun Heo 
6670f783995STejun Heo /*
6680aeb7ebfSCaleb Sander Mateos  * Called from blk_mq_insert_request() or blk_mq_dispatch_list().
6690f783995STejun Heo  */
dd_insert_requests(struct blk_mq_hw_ctx * hctx,struct list_head * list,blk_insert_t flags)6700f783995STejun Heo static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
67193fffe16SChristoph Hellwig 			       struct list_head *list,
67293fffe16SChristoph Hellwig 			       blk_insert_t flags)
6730f783995STejun Heo {
6740f783995STejun Heo 	struct request_queue *q = hctx->queue;
6750f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
676b2097bd2SBart Van Assche 	LIST_HEAD(free);
6770f783995STejun Heo 
6780f783995STejun Heo 	spin_lock(&dd->lock);
6790f783995STejun Heo 	while (!list_empty(list)) {
6800f783995STejun Heo 		struct request *rq;
6810f783995STejun Heo 
6820f783995STejun Heo 		rq = list_first_entry(list, struct request, queuelist);
6830f783995STejun Heo 		list_del_init(&rq->queuelist);
684b2097bd2SBart Van Assche 		dd_insert_request(hctx, rq, flags, &free);
6850f783995STejun Heo 	}
6860f783995STejun Heo 	spin_unlock(&dd->lock);
687b2097bd2SBart Van Assche 
688b2097bd2SBart Van Assche 	blk_mq_free_requests(&free);
6890f783995STejun Heo }
6900f783995STejun Heo 
691b6d2b054SBart Van Assche /* Callback from inside blk_mq_rq_ctx_init(). */
dd_prepare_request(struct request * rq)6920f783995STejun Heo static void dd_prepare_request(struct request *rq)
6930f783995STejun Heo {
694b6d2b054SBart Van Assche 	rq->elv.priv[0] = NULL;
6950f783995STejun Heo }
6960f783995STejun Heo 
6970f783995STejun Heo /*
6980f783995STejun Heo  * Callback from inside blk_mq_free_request().
6990f783995STejun Heo  */
dd_finish_request(struct request * rq)7000f783995STejun Heo static void dd_finish_request(struct request *rq)
7010f783995STejun Heo {
7021b0cab32SChristoph Hellwig 	struct dd_per_prio *per_prio = rq->elv.priv[0];
7030f783995STejun Heo 
704b6d2b054SBart Van Assche 	/*
705b6d2b054SBart Van Assche 	 * The block layer core may call dd_finish_request() without having
706e2c7275dSBart Van Assche 	 * called dd_insert_requests(). Skip requests that bypassed I/O
707e2c7275dSBart Van Assche 	 * scheduling. See also blk_mq_request_bypass_insert().
708b6d2b054SBart Van Assche 	 */
7091b0cab32SChristoph Hellwig 	if (per_prio)
710bce0363eSBart Van Assche 		atomic_inc(&per_prio->stats.completed);
7110f783995STejun Heo }
7120f783995STejun Heo 
dd_has_work_for_prio(struct dd_per_prio * per_prio)7130f783995STejun Heo static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
7140f783995STejun Heo {
715d60055cfSBart Van Assche 	return !list_empty_careful(&per_prio->fifo_list[DD_READ]) ||
7160f783995STejun Heo 		!list_empty_careful(&per_prio->fifo_list[DD_WRITE]);
7170f783995STejun Heo }
7180f783995STejun Heo 
dd_has_work(struct blk_mq_hw_ctx * hctx)7190f783995STejun Heo static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
7200f783995STejun Heo {
7210f783995STejun Heo 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
7220f783995STejun Heo 	enum dd_prio prio;
7230f783995STejun Heo 
724d60055cfSBart Van Assche 	if (!list_empty_careful(&dd->dispatch))
725d60055cfSBart Van Assche 		return true;
726d60055cfSBart Van Assche 
7270f783995STejun Heo 	for (prio = 0; prio <= DD_PRIO_MAX; prio++)
7280f783995STejun Heo 		if (dd_has_work_for_prio(&dd->per_prio[prio]))
7290f783995STejun Heo 			return true;
7300f783995STejun Heo 
7310f783995STejun Heo 	return false;
7320f783995STejun Heo }
7330f783995STejun Heo 
7340f783995STejun Heo /*
7350f783995STejun Heo  * sysfs parts below
7360f783995STejun Heo  */
7370f783995STejun Heo #define SHOW_INT(__FUNC, __VAR)						\
7380f783995STejun Heo static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
7390f783995STejun Heo {									\
7400f783995STejun Heo 	struct deadline_data *dd = e->elevator_data;			\
7410f783995STejun Heo 									\
7420f783995STejun Heo 	return sysfs_emit(page, "%d\n", __VAR);				\
7430f783995STejun Heo }
7440f783995STejun Heo #define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
7450f783995STejun Heo SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
7460f783995STejun Heo SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
747322cff70SBart Van Assche SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
7480f783995STejun Heo SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
7490f783995STejun Heo SHOW_INT(deadline_front_merges_show, dd->front_merges);
7500f783995STejun Heo SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
7510f783995STejun Heo #undef SHOW_INT
7520f783995STejun Heo #undef SHOW_JIFFIES
7530f783995STejun Heo 
7540f783995STejun Heo #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
7550f783995STejun Heo static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
7560f783995STejun Heo {									\
7570f783995STejun Heo 	struct deadline_data *dd = e->elevator_data;			\
7580f783995STejun Heo 	int __data, __ret;						\
7590f783995STejun Heo 									\
7600f783995STejun Heo 	__ret = kstrtoint(page, 0, &__data);				\
7610f783995STejun Heo 	if (__ret < 0)							\
7620f783995STejun Heo 		return __ret;						\
7630f783995STejun Heo 	if (__data < (MIN))						\
7640f783995STejun Heo 		__data = (MIN);						\
7650f783995STejun Heo 	else if (__data > (MAX))					\
7660f783995STejun Heo 		__data = (MAX);						\
7670f783995STejun Heo 	*(__PTR) = __CONV(__data);					\
7680f783995STejun Heo 	return count;							\
7690f783995STejun Heo }
7700f783995STejun Heo #define STORE_INT(__FUNC, __PTR, MIN, MAX)				\
7710f783995STejun Heo 	STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, )
7720f783995STejun Heo #define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX)				\
7730f783995STejun Heo 	STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
7740f783995STejun Heo STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
7750f783995STejun Heo STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
776322cff70SBart Van Assche STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
7770f783995STejun Heo STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
7780f783995STejun Heo STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
7790f783995STejun Heo STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
7800f783995STejun Heo #undef STORE_FUNCTION
7810f783995STejun Heo #undef STORE_INT
7820f783995STejun Heo #undef STORE_JIFFIES
7830f783995STejun Heo 
7840f783995STejun Heo #define DD_ATTR(name) \
7850f783995STejun Heo 	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
7860f783995STejun Heo 
7878686e1deSThomas Weißschuh static const struct elv_fs_entry deadline_attrs[] = {
7880f783995STejun Heo 	DD_ATTR(read_expire),
7890f783995STejun Heo 	DD_ATTR(write_expire),
7900f783995STejun Heo 	DD_ATTR(writes_starved),
7910f783995STejun Heo 	DD_ATTR(front_merges),
7920f783995STejun Heo 	DD_ATTR(fifo_batch),
793322cff70SBart Van Assche 	DD_ATTR(prio_aging_expire),
7940f783995STejun Heo 	__ATTR_NULL
7950f783995STejun Heo };
7960f783995STejun Heo 
7970f783995STejun Heo #ifdef CONFIG_BLK_DEBUG_FS
7980f783995STejun Heo #define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name)		\
7990f783995STejun Heo static void *deadline_##name##_fifo_start(struct seq_file *m,		\
8000f783995STejun Heo 					  loff_t *pos)			\
8010f783995STejun Heo 	__acquires(&dd->lock)						\
8020f783995STejun Heo {									\
8030f783995STejun Heo 	struct request_queue *q = m->private;				\
8040f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;		\
8050f783995STejun Heo 	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
8060f783995STejun Heo 									\
8070f783995STejun Heo 	spin_lock(&dd->lock);						\
8080f783995STejun Heo 	return seq_list_start(&per_prio->fifo_list[data_dir], *pos);	\
8090f783995STejun Heo }									\
8100f783995STejun Heo 									\
8110f783995STejun Heo static void *deadline_##name##_fifo_next(struct seq_file *m, void *v,	\
8120f783995STejun Heo 					 loff_t *pos)			\
8130f783995STejun Heo {									\
8140f783995STejun Heo 	struct request_queue *q = m->private;				\
8150f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;		\
8160f783995STejun Heo 	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
8170f783995STejun Heo 									\
8180f783995STejun Heo 	return seq_list_next(v, &per_prio->fifo_list[data_dir], pos);	\
8190f783995STejun Heo }									\
8200f783995STejun Heo 									\
8210f783995STejun Heo static void deadline_##name##_fifo_stop(struct seq_file *m, void *v)	\
8220f783995STejun Heo 	__releases(&dd->lock)						\
8230f783995STejun Heo {									\
8240f783995STejun Heo 	struct request_queue *q = m->private;				\
8250f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;		\
8260f783995STejun Heo 									\
8270f783995STejun Heo 	spin_unlock(&dd->lock);						\
8280f783995STejun Heo }									\
8290f783995STejun Heo 									\
8300f783995STejun Heo static const struct seq_operations deadline_##name##_fifo_seq_ops = {	\
8310f783995STejun Heo 	.start	= deadline_##name##_fifo_start,				\
8320f783995STejun Heo 	.next	= deadline_##name##_fifo_next,				\
8330f783995STejun Heo 	.stop	= deadline_##name##_fifo_stop,				\
8340f783995STejun Heo 	.show	= blk_mq_debugfs_rq_show,				\
8350f783995STejun Heo };									\
8360f783995STejun Heo 									\
8370f783995STejun Heo static int deadline_##name##_next_rq_show(void *data,			\
8380f783995STejun Heo 					  struct seq_file *m)		\
8390f783995STejun Heo {									\
8400f783995STejun Heo 	struct request_queue *q = data;					\
8410f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;		\
8420f783995STejun Heo 	struct dd_per_prio *per_prio = &dd->per_prio[prio];		\
84383c46ed6SBart Van Assche 	struct request *rq;						\
8440f783995STejun Heo 									\
84583c46ed6SBart Van Assche 	rq = deadline_from_pos(per_prio, data_dir,			\
84683c46ed6SBart Van Assche 			       per_prio->latest_pos[data_dir]);		\
8470f783995STejun Heo 	if (rq)								\
8480f783995STejun Heo 		__blk_mq_debugfs_rq_show(m, rq);			\
8490f783995STejun Heo 	return 0;							\
8500f783995STejun Heo }
8510f783995STejun Heo 
8520f783995STejun Heo DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0);
8530f783995STejun Heo DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0);
8540f783995STejun Heo DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1);
8550f783995STejun Heo DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1);
8560f783995STejun Heo DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2);
8570f783995STejun Heo DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2);
8580f783995STejun Heo #undef DEADLINE_DEBUGFS_DDIR_ATTRS
8590f783995STejun Heo 
deadline_batching_show(void * data,struct seq_file * m)8600f783995STejun Heo static int deadline_batching_show(void *data, struct seq_file *m)
8610f783995STejun Heo {
8620f783995STejun Heo 	struct request_queue *q = data;
8630f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
8640f783995STejun Heo 
8650f783995STejun Heo 	seq_printf(m, "%u\n", dd->batching);
8660f783995STejun Heo 	return 0;
8670f783995STejun Heo }
8680f783995STejun Heo 
deadline_starved_show(void * data,struct seq_file * m)8690f783995STejun Heo static int deadline_starved_show(void *data, struct seq_file *m)
8700f783995STejun Heo {
8710f783995STejun Heo 	struct request_queue *q = data;
8720f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
8730f783995STejun Heo 
8740f783995STejun Heo 	seq_printf(m, "%u\n", dd->starved);
8750f783995STejun Heo 	return 0;
8760f783995STejun Heo }
8770f783995STejun Heo 
dd_queued_show(void * data,struct seq_file * m)8780f783995STejun Heo static int dd_queued_show(void *data, struct seq_file *m)
8790f783995STejun Heo {
8800f783995STejun Heo 	struct request_queue *q = data;
8810f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
882bce0363eSBart Van Assche 	u32 rt, be, idle;
8830f783995STejun Heo 
884bce0363eSBart Van Assche 	spin_lock(&dd->lock);
885bce0363eSBart Van Assche 	rt = dd_queued(dd, DD_RT_PRIO);
886bce0363eSBart Van Assche 	be = dd_queued(dd, DD_BE_PRIO);
887bce0363eSBart Van Assche 	idle = dd_queued(dd, DD_IDLE_PRIO);
888bce0363eSBart Van Assche 	spin_unlock(&dd->lock);
889bce0363eSBart Van Assche 
890bce0363eSBart Van Assche 	seq_printf(m, "%u %u %u\n", rt, be, idle);
891bce0363eSBart Van Assche 
8920f783995STejun Heo 	return 0;
8930f783995STejun Heo }
8940f783995STejun Heo 
8950f783995STejun Heo /* Number of requests owned by the block driver for a given priority. */
dd_owned_by_driver(struct deadline_data * dd,enum dd_prio prio)8960f783995STejun Heo static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
8970f783995STejun Heo {
898bce0363eSBart Van Assche 	const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
899bce0363eSBart Van Assche 
900bce0363eSBart Van Assche 	lockdep_assert_held(&dd->lock);
901bce0363eSBart Van Assche 
902bce0363eSBart Van Assche 	return stats->dispatched + stats->merged -
903bce0363eSBart Van Assche 		atomic_read(&stats->completed);
9040f783995STejun Heo }
9050f783995STejun Heo 
dd_owned_by_driver_show(void * data,struct seq_file * m)9060f783995STejun Heo static int dd_owned_by_driver_show(void *data, struct seq_file *m)
9070f783995STejun Heo {
9080f783995STejun Heo 	struct request_queue *q = data;
9090f783995STejun Heo 	struct deadline_data *dd = q->elevator->elevator_data;
910bce0363eSBart Van Assche 	u32 rt, be, idle;
9110f783995STejun Heo 
912bce0363eSBart Van Assche 	spin_lock(&dd->lock);
913bce0363eSBart Van Assche 	rt = dd_owned_by_driver(dd, DD_RT_PRIO);
914bce0363eSBart Van Assche 	be = dd_owned_by_driver(dd, DD_BE_PRIO);
915bce0363eSBart Van Assche 	idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
916bce0363eSBart Van Assche 	spin_unlock(&dd->lock);
917bce0363eSBart Van Assche 
918bce0363eSBart Van Assche 	seq_printf(m, "%u %u %u\n", rt, be, idle);
919bce0363eSBart Van Assche 
9200f783995STejun Heo 	return 0;
9210f783995STejun Heo }
9220f783995STejun Heo 
deadline_dispatch_start(struct seq_file * m,loff_t * pos)923d60055cfSBart Van Assche static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos)
924d60055cfSBart Van Assche 	__acquires(&dd->lock)
925d60055cfSBart Van Assche {
926d60055cfSBart Van Assche 	struct request_queue *q = m->private;
927d60055cfSBart Van Assche 	struct deadline_data *dd = q->elevator->elevator_data;
928d60055cfSBart Van Assche 
929d60055cfSBart Van Assche 	spin_lock(&dd->lock);
930d60055cfSBart Van Assche 	return seq_list_start(&dd->dispatch, *pos);
9310f783995STejun Heo }
9320f783995STejun Heo 
deadline_dispatch_next(struct seq_file * m,void * v,loff_t * pos)933d60055cfSBart Van Assche static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
934d60055cfSBart Van Assche {
935d60055cfSBart Van Assche 	struct request_queue *q = m->private;
936d60055cfSBart Van Assche 	struct deadline_data *dd = q->elevator->elevator_data;
937d60055cfSBart Van Assche 
938d60055cfSBart Van Assche 	return seq_list_next(v, &dd->dispatch, pos);
939d60055cfSBart Van Assche }
940d60055cfSBart Van Assche 
deadline_dispatch_stop(struct seq_file * m,void * v)941d60055cfSBart Van Assche static void deadline_dispatch_stop(struct seq_file *m, void *v)
942d60055cfSBart Van Assche 	__releases(&dd->lock)
943d60055cfSBart Van Assche {
944d60055cfSBart Van Assche 	struct request_queue *q = m->private;
945d60055cfSBart Van Assche 	struct deadline_data *dd = q->elevator->elevator_data;
946d60055cfSBart Van Assche 
947d60055cfSBart Van Assche 	spin_unlock(&dd->lock);
948d60055cfSBart Van Assche }
949d60055cfSBart Van Assche 
950d60055cfSBart Van Assche static const struct seq_operations deadline_dispatch_seq_ops = {
951d60055cfSBart Van Assche 	.start	= deadline_dispatch_start,
952d60055cfSBart Van Assche 	.next	= deadline_dispatch_next,
953d60055cfSBart Van Assche 	.stop	= deadline_dispatch_stop,
954d60055cfSBart Van Assche 	.show	= blk_mq_debugfs_rq_show,
955d60055cfSBart Van Assche };
9560f783995STejun Heo 
9570f783995STejun Heo #define DEADLINE_QUEUE_DDIR_ATTRS(name)					\
9580f783995STejun Heo 	{#name "_fifo_list", 0400,					\
9590f783995STejun Heo 			.seq_ops = &deadline_##name##_fifo_seq_ops}
9600f783995STejun Heo #define DEADLINE_NEXT_RQ_ATTR(name)					\
9610f783995STejun Heo 	{#name "_next_rq", 0400, deadline_##name##_next_rq_show}
9620f783995STejun Heo static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
9630f783995STejun Heo 	DEADLINE_QUEUE_DDIR_ATTRS(read0),
9640f783995STejun Heo 	DEADLINE_QUEUE_DDIR_ATTRS(write0),
9650f783995STejun Heo 	DEADLINE_QUEUE_DDIR_ATTRS(read1),
9660f783995STejun Heo 	DEADLINE_QUEUE_DDIR_ATTRS(write1),
9670f783995STejun Heo 	DEADLINE_QUEUE_DDIR_ATTRS(read2),
9680f783995STejun Heo 	DEADLINE_QUEUE_DDIR_ATTRS(write2),
9690f783995STejun Heo 	DEADLINE_NEXT_RQ_ATTR(read0),
9700f783995STejun Heo 	DEADLINE_NEXT_RQ_ATTR(write0),
9710f783995STejun Heo 	DEADLINE_NEXT_RQ_ATTR(read1),
9720f783995STejun Heo 	DEADLINE_NEXT_RQ_ATTR(write1),
9730f783995STejun Heo 	DEADLINE_NEXT_RQ_ATTR(read2),
9740f783995STejun Heo 	DEADLINE_NEXT_RQ_ATTR(write2),
9750f783995STejun Heo 	{"batching", 0400, deadline_batching_show},
9760f783995STejun Heo 	{"starved", 0400, deadline_starved_show},
977d60055cfSBart Van Assche 	{"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
9780f783995STejun Heo 	{"owned_by_driver", 0400, dd_owned_by_driver_show},
9790f783995STejun Heo 	{"queued", 0400, dd_queued_show},
9800f783995STejun Heo 	{},
9810f783995STejun Heo };
9820f783995STejun Heo #undef DEADLINE_QUEUE_DDIR_ATTRS
9830f783995STejun Heo #endif
9840f783995STejun Heo 
9850f783995STejun Heo static struct elevator_type mq_deadline = {
9860f783995STejun Heo 	.ops = {
9870f783995STejun Heo 		.depth_updated		= dd_depth_updated,
9880f783995STejun Heo 		.limit_depth		= dd_limit_depth,
9890f783995STejun Heo 		.insert_requests	= dd_insert_requests,
9900f783995STejun Heo 		.dispatch_request	= dd_dispatch_request,
9910f783995STejun Heo 		.prepare_request	= dd_prepare_request,
9920f783995STejun Heo 		.finish_request		= dd_finish_request,
9930f783995STejun Heo 		.next_request		= elv_rb_latter_request,
9940f783995STejun Heo 		.former_request		= elv_rb_former_request,
9950f783995STejun Heo 		.bio_merge		= dd_bio_merge,
9960f783995STejun Heo 		.request_merge		= dd_request_merge,
9970f783995STejun Heo 		.requests_merged	= dd_merged_requests,
9980f783995STejun Heo 		.request_merged		= dd_request_merged,
9990f783995STejun Heo 		.has_work		= dd_has_work,
10000f783995STejun Heo 		.init_sched		= dd_init_sched,
10010f783995STejun Heo 		.exit_sched		= dd_exit_sched,
10020f783995STejun Heo 	},
10030f783995STejun Heo 
10040f783995STejun Heo #ifdef CONFIG_BLK_DEBUG_FS
10050f783995STejun Heo 	.queue_debugfs_attrs = deadline_queue_debugfs_attrs,
10060f783995STejun Heo #endif
10070f783995STejun Heo 	.elevator_attrs = deadline_attrs,
10080f783995STejun Heo 	.elevator_name = "mq-deadline",
10090f783995STejun Heo 	.elevator_alias = "deadline",
10100f783995STejun Heo 	.elevator_owner = THIS_MODULE,
10110f783995STejun Heo };
10120f783995STejun Heo MODULE_ALIAS("mq-deadline-iosched");
10130f783995STejun Heo 
deadline_init(void)10140f783995STejun Heo static int __init deadline_init(void)
10150f783995STejun Heo {
10160f783995STejun Heo 	return elv_register(&mq_deadline);
10170f783995STejun Heo }
10180f783995STejun Heo 
deadline_exit(void)10190f783995STejun Heo static void __exit deadline_exit(void)
10200f783995STejun Heo {
10210f783995STejun Heo 	elv_unregister(&mq_deadline);
10220f783995STejun Heo }
10230f783995STejun Heo 
10240f783995STejun Heo module_init(deadline_init);
10250f783995STejun Heo module_exit(deadline_exit);
10260f783995STejun Heo 
10270f783995STejun Heo MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche");
10280f783995STejun Heo MODULE_LICENSE("GPL");
10290f783995STejun Heo MODULE_DESCRIPTION("MQ deadline IO scheduler");
1030