xref: /linux/block/blk-mq-tag.c (revision f4b369c6fe0ceaba2da2daff8c9eb415f85926dd)
13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0
275bb4625SJens Axboe /*
388459642SOmar Sandoval  * Tag allocation using scalable bitmaps. Uses active queue tracking to support
488459642SOmar Sandoval  * fairer distribution of tags between multiple submitters when a shared tag map
588459642SOmar Sandoval  * is used.
675bb4625SJens Axboe  *
775bb4625SJens Axboe  * Copyright (C) 2013-2014 Jens Axboe
875bb4625SJens Axboe  */
9320ae51fSJens Axboe #include <linux/kernel.h>
10320ae51fSJens Axboe #include <linux/module.h>
11ad0d05dbSMing Lei #include <linux/slab.h>
12ad0d05dbSMing Lei #include <linux/mm.h>
13ad0d05dbSMing Lei #include <linux/kmemleak.h>
14320ae51fSJens Axboe 
15f9934a80SMing Lei #include <linux/delay.h>
16320ae51fSJens Axboe #include "blk.h"
17320ae51fSJens Axboe #include "blk-mq.h"
18d97e594cSJohn Garry #include "blk-mq-sched.h"
19320ae51fSJens Axboe 
200d2602caSJens Axboe /*
21180dccb0SLaibin Qiu  * Recalculate wakeup batch when tag is shared by hctx.
22180dccb0SLaibin Qiu  */
blk_mq_update_wake_batch(struct blk_mq_tags * tags,unsigned int users)23180dccb0SLaibin Qiu static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
24180dccb0SLaibin Qiu 		unsigned int users)
25180dccb0SLaibin Qiu {
26180dccb0SLaibin Qiu 	if (!users)
27180dccb0SLaibin Qiu 		return;
28180dccb0SLaibin Qiu 
29180dccb0SLaibin Qiu 	sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
30180dccb0SLaibin Qiu 			users);
31180dccb0SLaibin Qiu 	sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
32180dccb0SLaibin Qiu 			users);
33180dccb0SLaibin Qiu }
34180dccb0SLaibin Qiu 
35180dccb0SLaibin Qiu /*
360d2602caSJens Axboe  * If a previously inactive queue goes active, bump the active user count.
37d263ed99SJianchao Wang  * We need to do this before try to allocate driver tag, then even if fail
38d263ed99SJianchao Wang  * to get tag when first time, the other shared-tag users could reserve
39d263ed99SJianchao Wang  * budget for it.
400d2602caSJens Axboe  */
__blk_mq_tag_busy(struct blk_mq_hw_ctx * hctx)41ee78ec10SLiu Song void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
420d2602caSJens Axboe {
43180dccb0SLaibin Qiu 	unsigned int users;
44b313a8c8SLi Lingfeng 	unsigned long flags;
454f1731dfSYu Kuai 	struct blk_mq_tags *tags = hctx->tags;
46180dccb0SLaibin Qiu 
473e94d54eSTian Lan 	/*
483e94d54eSTian Lan 	 * calling test_bit() prior to test_and_set_bit() is intentional,
493e94d54eSTian Lan 	 * it avoids dirtying the cacheline if the queue is already active.
503e94d54eSTian Lan 	 */
51079a2e3eSJohn Garry 	if (blk_mq_is_shared_tags(hctx->flags)) {
52f1b49fdcSJohn Garry 		struct request_queue *q = hctx->queue;
53f1b49fdcSJohn Garry 
543e94d54eSTian Lan 		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
553e94d54eSTian Lan 		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
56ee78ec10SLiu Song 			return;
57180dccb0SLaibin Qiu 	} else {
583e94d54eSTian Lan 		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
593e94d54eSTian Lan 		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
60ee78ec10SLiu Song 			return;
61180dccb0SLaibin Qiu 	}
62180dccb0SLaibin Qiu 
63b313a8c8SLi Lingfeng 	spin_lock_irqsave(&tags->lock, flags);
644f1731dfSYu Kuai 	users = tags->active_queues + 1;
654f1731dfSYu Kuai 	WRITE_ONCE(tags->active_queues, users);
664f1731dfSYu Kuai 	blk_mq_update_wake_batch(tags, users);
67b313a8c8SLi Lingfeng 	spin_unlock_irqrestore(&tags->lock, flags);
680d2602caSJens Axboe }
690d2602caSJens Axboe 
700d2602caSJens Axboe /*
71aed3ea94SJens Axboe  * Wakeup all potentially sleeping on tags
720d2602caSJens Axboe  */
blk_mq_tag_wakeup_all(struct blk_mq_tags * tags,bool include_reserve)73aed3ea94SJens Axboe void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
740d2602caSJens Axboe {
75ae0f1a73SJohn Garry 	sbitmap_queue_wake_all(&tags->bitmap_tags);
7688459642SOmar Sandoval 	if (include_reserve)
77ae0f1a73SJohn Garry 		sbitmap_queue_wake_all(&tags->breserved_tags);
780d2602caSJens Axboe }
790d2602caSJens Axboe 
800d2602caSJens Axboe /*
81e3a2b3f9SJens Axboe  * If a previously busy queue goes inactive, potential waiters could now
82e3a2b3f9SJens Axboe  * be allowed to queue. Wake them up and check.
83e3a2b3f9SJens Axboe  */
__blk_mq_tag_idle(struct blk_mq_hw_ctx * hctx)84e3a2b3f9SJens Axboe void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
85e3a2b3f9SJens Axboe {
86e3a2b3f9SJens Axboe 	struct blk_mq_tags *tags = hctx->tags;
87180dccb0SLaibin Qiu 	unsigned int users;
88e3a2b3f9SJens Axboe 
89079a2e3eSJohn Garry 	if (blk_mq_is_shared_tags(hctx->flags)) {
90e155b0c2SJohn Garry 		struct request_queue *q = hctx->queue;
91e155b0c2SJohn Garry 
92f1b49fdcSJohn Garry 		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
93f1b49fdcSJohn Garry 					&q->queue_flags))
94f1b49fdcSJohn Garry 			return;
95f1b49fdcSJohn Garry 	} else {
96e3a2b3f9SJens Axboe 		if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
97e3a2b3f9SJens Axboe 			return;
98f1b49fdcSJohn Garry 	}
99e3a2b3f9SJens Axboe 
1004f1731dfSYu Kuai 	spin_lock_irq(&tags->lock);
1014f1731dfSYu Kuai 	users = tags->active_queues - 1;
1024f1731dfSYu Kuai 	WRITE_ONCE(tags->active_queues, users);
103180dccb0SLaibin Qiu 	blk_mq_update_wake_batch(tags, users);
1044f1731dfSYu Kuai 	spin_unlock_irq(&tags->lock);
105079a2e3eSJohn Garry 
106aed3ea94SJens Axboe 	blk_mq_tag_wakeup_all(tags, false);
107e3a2b3f9SJens Axboe }
108e3a2b3f9SJens Axboe 
__blk_mq_get_tag(struct blk_mq_alloc_data * data,struct sbitmap_queue * bt)109200e86b3SJens Axboe static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
110200e86b3SJens Axboe 			    struct sbitmap_queue *bt)
1114bb659b1SJens Axboe {
11228500850SMing Lei 	if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
11328500850SMing Lei 			!hctx_may_queue(data->hctx, bt))
11476647368SChristoph Hellwig 		return BLK_MQ_NO_TAG;
11542fdc5e4SChristoph Hellwig 
116229a9287SOmar Sandoval 	if (data->shallow_depth)
1173f607293SJohn Garry 		return sbitmap_queue_get_shallow(bt, data->shallow_depth);
118229a9287SOmar Sandoval 	else
119f4a644dbSOmar Sandoval 		return __sbitmap_queue_get(bt);
1204bb659b1SJens Axboe }
1214bb659b1SJens Axboe 
blk_mq_get_tags(struct blk_mq_alloc_data * data,int nr_tags,unsigned int * offset)122349302daSJens Axboe unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
123349302daSJens Axboe 			      unsigned int *offset)
124349302daSJens Axboe {
125349302daSJens Axboe 	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
126349302daSJens Axboe 	struct sbitmap_queue *bt = &tags->bitmap_tags;
127349302daSJens Axboe 	unsigned long ret;
128349302daSJens Axboe 
129349302daSJens Axboe 	if (data->shallow_depth ||data->flags & BLK_MQ_REQ_RESERVED ||
130349302daSJens Axboe 	    data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
131349302daSJens Axboe 		return 0;
132349302daSJens Axboe 	ret = __sbitmap_queue_get_batch(bt, nr_tags, offset);
133349302daSJens Axboe 	*offset += tags->nr_reserved_tags;
134349302daSJens Axboe 	return ret;
135349302daSJens Axboe }
136349302daSJens Axboe 
blk_mq_get_tag(struct blk_mq_alloc_data * data)1374941115bSJens Axboe unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
1384bb659b1SJens Axboe {
1394941115bSJens Axboe 	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
1404941115bSJens Axboe 	struct sbitmap_queue *bt;
14188459642SOmar Sandoval 	struct sbq_wait_state *ws;
1425d2ee712SJens Axboe 	DEFINE_SBQ_WAIT(wait);
1434941115bSJens Axboe 	unsigned int tag_offset;
1444bb659b1SJens Axboe 	int tag;
1454bb659b1SJens Axboe 
1464941115bSJens Axboe 	if (data->flags & BLK_MQ_REQ_RESERVED) {
1474941115bSJens Axboe 		if (unlikely(!tags->nr_reserved_tags)) {
1484941115bSJens Axboe 			WARN_ON_ONCE(1);
149419c3d5eSChristoph Hellwig 			return BLK_MQ_NO_TAG;
1504941115bSJens Axboe 		}
151ae0f1a73SJohn Garry 		bt = &tags->breserved_tags;
1524941115bSJens Axboe 		tag_offset = 0;
1534941115bSJens Axboe 	} else {
154ae0f1a73SJohn Garry 		bt = &tags->bitmap_tags;
1554941115bSJens Axboe 		tag_offset = tags->nr_reserved_tags;
1564941115bSJens Axboe 	}
1574941115bSJens Axboe 
158200e86b3SJens Axboe 	tag = __blk_mq_get_tag(data, bt);
15976647368SChristoph Hellwig 	if (tag != BLK_MQ_NO_TAG)
1604941115bSJens Axboe 		goto found_tag;
1614bb659b1SJens Axboe 
1626f3b0e8bSChristoph Hellwig 	if (data->flags & BLK_MQ_REQ_NOWAIT)
163419c3d5eSChristoph Hellwig 		return BLK_MQ_NO_TAG;
1644bb659b1SJens Axboe 
1654941115bSJens Axboe 	ws = bt_wait_ptr(bt, data->hctx);
16635d37c66SJens Axboe 	do {
167e6fc4649SMing Lei 		struct sbitmap_queue *bt_prev;
168e6fc4649SMing Lei 
169b3223207SBart Van Assche 		/*
170b3223207SBart Van Assche 		 * We're out of tags on this hardware queue, kick any
171b3223207SBart Van Assche 		 * pending IO submits before going to sleep waiting for
1728cecb07dSJens Axboe 		 * some to complete.
173b3223207SBart Van Assche 		 */
1744941115bSJens Axboe 		blk_mq_run_hw_queue(data->hctx, false);
175b3223207SBart Van Assche 
176080ff351SJens Axboe 		/*
177080ff351SJens Axboe 		 * Retry tag allocation after running the hardware queue,
178080ff351SJens Axboe 		 * as running the queue may also have found completions.
179080ff351SJens Axboe 		 */
180200e86b3SJens Axboe 		tag = __blk_mq_get_tag(data, bt);
18176647368SChristoph Hellwig 		if (tag != BLK_MQ_NO_TAG)
182080ff351SJens Axboe 			break;
183080ff351SJens Axboe 
1845d2ee712SJens Axboe 		sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
1854e5dff41SJens Axboe 
1864e5dff41SJens Axboe 		tag = __blk_mq_get_tag(data, bt);
18776647368SChristoph Hellwig 		if (tag != BLK_MQ_NO_TAG)
1884e5dff41SJens Axboe 			break;
1894e5dff41SJens Axboe 
190e6fc4649SMing Lei 		bt_prev = bt;
1914bb659b1SJens Axboe 		io_schedule();
192cb96a42cSMing Lei 
1935d2ee712SJens Axboe 		sbitmap_finish_wait(bt, ws, &wait);
1945d2ee712SJens Axboe 
195cb96a42cSMing Lei 		data->ctx = blk_mq_get_ctx(data->q);
19661667cb6SGuixin Liu 		data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx);
1974941115bSJens Axboe 		tags = blk_mq_tags_from_data(data);
1984941115bSJens Axboe 		if (data->flags & BLK_MQ_REQ_RESERVED)
199ae0f1a73SJohn Garry 			bt = &tags->breserved_tags;
2004941115bSJens Axboe 		else
201ae0f1a73SJohn Garry 			bt = &tags->bitmap_tags;
2024941115bSJens Axboe 
203e6fc4649SMing Lei 		/*
204e6fc4649SMing Lei 		 * If destination hw queue is changed, fake wake up on
205e6fc4649SMing Lei 		 * previous queue for compensating the wake up miss, so
206e6fc4649SMing Lei 		 * other allocations on previous queue won't be starved.
207e6fc4649SMing Lei 		 */
208e6fc4649SMing Lei 		if (bt != bt_prev)
2094acb8341SKeith Busch 			sbitmap_queue_wake_up(bt_prev, 1);
210e6fc4649SMing Lei 
2114941115bSJens Axboe 		ws = bt_wait_ptr(bt, data->hctx);
2124bb659b1SJens Axboe 	} while (1);
2134bb659b1SJens Axboe 
2145d2ee712SJens Axboe 	sbitmap_finish_wait(bt, ws, &wait);
2154941115bSJens Axboe 
2164941115bSJens Axboe found_tag:
217bf0beec0SMing Lei 	/*
218bf0beec0SMing Lei 	 * Give up this allocation if the hctx is inactive.  The caller will
219bf0beec0SMing Lei 	 * retry on an active hctx.
220bf0beec0SMing Lei 	 */
221bf0beec0SMing Lei 	if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
222bf0beec0SMing Lei 		blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
223bf0beec0SMing Lei 		return BLK_MQ_NO_TAG;
224bf0beec0SMing Lei 	}
2254941115bSJens Axboe 	return tag + tag_offset;
2264bb659b1SJens Axboe }
2274bb659b1SJens Axboe 
blk_mq_put_tag(struct blk_mq_tags * tags,struct blk_mq_ctx * ctx,unsigned int tag)228cae740a0SJohn Garry void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
229cae740a0SJohn Garry 		    unsigned int tag)
230320ae51fSJens Axboe {
231415b806dSSagi Grimberg 	if (!blk_mq_tag_is_reserved(tags, tag)) {
2324bb659b1SJens Axboe 		const int real_tag = tag - tags->nr_reserved_tags;
2334bb659b1SJens Axboe 
23470114c39SJens Axboe 		BUG_ON(real_tag >= tags->nr_tags);
235ae0f1a73SJohn Garry 		sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
23670114c39SJens Axboe 	} else {
237ae0f1a73SJohn Garry 		sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
23870114c39SJens Axboe 	}
239320ae51fSJens Axboe }
240320ae51fSJens Axboe 
blk_mq_put_tags(struct blk_mq_tags * tags,int * tag_array,int nr_tags)241f794f335SJens Axboe void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
242f794f335SJens Axboe {
243f794f335SJens Axboe 	sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags,
244f794f335SJens Axboe 					tag_array, nr_tags);
245f794f335SJens Axboe }
246f794f335SJens Axboe 
24788459642SOmar Sandoval struct bt_iter_data {
24888459642SOmar Sandoval 	struct blk_mq_hw_ctx *hctx;
249fea9f92fSJohn Garry 	struct request_queue *q;
250fc39f8d2SJohn Garry 	busy_tag_iter_fn *fn;
25188459642SOmar Sandoval 	void *data;
25288459642SOmar Sandoval 	bool reserved;
25388459642SOmar Sandoval };
25488459642SOmar Sandoval 
blk_mq_find_and_get_req(struct blk_mq_tags * tags,unsigned int bitnr)2552e315dc0SMing Lei static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
2562e315dc0SMing Lei 		unsigned int bitnr)
2572e315dc0SMing Lei {
258bd63141dSMing Lei 	struct request *rq;
2592e315dc0SMing Lei 
260bd63141dSMing Lei 	rq = tags->rqs[bitnr];
2610a467d0fSJens Axboe 	if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
262bd63141dSMing Lei 		rq = NULL;
2632e315dc0SMing Lei 	return rq;
2642e315dc0SMing Lei }
2652e315dc0SMing Lei 
bt_iter(struct sbitmap * bitmap,unsigned int bitnr,void * data)26688459642SOmar Sandoval static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
26788459642SOmar Sandoval {
26888459642SOmar Sandoval 	struct bt_iter_data *iter_data = data;
26988459642SOmar Sandoval 	struct blk_mq_hw_ctx *hctx = iter_data->hctx;
270fea9f92fSJohn Garry 	struct request_queue *q = iter_data->q;
271fea9f92fSJohn Garry 	struct blk_mq_tag_set *set = q->tag_set;
272fea9f92fSJohn Garry 	struct blk_mq_tags *tags;
27388459642SOmar Sandoval 	struct request *rq;
2742e315dc0SMing Lei 	bool ret = true;
27588459642SOmar Sandoval 
276fea9f92fSJohn Garry 	if (blk_mq_is_shared_tags(set->flags))
277fea9f92fSJohn Garry 		tags = set->shared_tags;
278fea9f92fSJohn Garry 	else
279fea9f92fSJohn Garry 		tags = hctx->tags;
280fea9f92fSJohn Garry 
2814cf6e6c0SJohn Garry 	if (!iter_data->reserved)
28288459642SOmar Sandoval 		bitnr += tags->nr_reserved_tags;
2837f5562d5SJens Axboe 	/*
2847f5562d5SJens Axboe 	 * We can hit rq == NULL here, because the tagging functions
285c7b1bf5cSBart Van Assche 	 * test and set the bit before assigning ->rqs[].
2867f5562d5SJens Axboe 	 */
2872e315dc0SMing Lei 	rq = blk_mq_find_and_get_req(tags, bitnr);
2882e315dc0SMing Lei 	if (!rq)
28988459642SOmar Sandoval 		return true;
2902e315dc0SMing Lei 
291fea9f92fSJohn Garry 	if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
2922dd6532eSJohn Garry 		ret = iter_data->fn(rq, iter_data->data);
2932e315dc0SMing Lei 	blk_mq_put_rq_ref(rq);
2942e315dc0SMing Lei 	return ret;
29588459642SOmar Sandoval }
29688459642SOmar Sandoval 
297c7b1bf5cSBart Van Assche /**
298c7b1bf5cSBart Van Assche  * bt_for_each - iterate over the requests associated with a hardware queue
299c7b1bf5cSBart Van Assche  * @hctx:	Hardware queue to examine.
300fea55691SBart Van Assche  * @q:		Request queue @hctx is associated with (@hctx->queue).
301c7b1bf5cSBart Van Assche  * @bt:		sbitmap to examine. This is either the breserved_tags member
302c7b1bf5cSBart Van Assche  *		or the bitmap_tags member of struct blk_mq_tags.
303c7b1bf5cSBart Van Assche  * @fn:		Pointer to the function that will be called for each request
304c7b1bf5cSBart Van Assche  *		associated with @hctx that has been assigned a driver tag.
305fea55691SBart Van Assche  *		@fn will be called as follows: @fn(rq, @data) where rq is a
306fea55691SBart Van Assche  *		pointer to a request. Return %true to continue iterating tags;
307fea55691SBart Van Assche  *		%false to stop.
308fea55691SBart Van Assche  * @data:	Will be passed as second argument to @fn.
309c7b1bf5cSBart Van Assche  * @reserved:	Indicates whether @bt is the breserved_tags member or the
310c7b1bf5cSBart Van Assche  *		bitmap_tags member of struct blk_mq_tags.
311c7b1bf5cSBart Van Assche  */
bt_for_each(struct blk_mq_hw_ctx * hctx,struct request_queue * q,struct sbitmap_queue * bt,busy_tag_iter_fn * fn,void * data,bool reserved)312fea9f92fSJohn Garry static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
313fea9f92fSJohn Garry 			struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
314fea9f92fSJohn Garry 			void *data, bool reserved)
315320ae51fSJens Axboe {
31688459642SOmar Sandoval 	struct bt_iter_data iter_data = {
31788459642SOmar Sandoval 		.hctx = hctx,
31888459642SOmar Sandoval 		.fn = fn,
31988459642SOmar Sandoval 		.data = data,
32088459642SOmar Sandoval 		.reserved = reserved,
321fea9f92fSJohn Garry 		.q = q,
32288459642SOmar Sandoval 	};
32388459642SOmar Sandoval 
32488459642SOmar Sandoval 	sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
32588459642SOmar Sandoval }
32688459642SOmar Sandoval 
32788459642SOmar Sandoval struct bt_tags_iter_data {
32888459642SOmar Sandoval 	struct blk_mq_tags *tags;
32988459642SOmar Sandoval 	busy_tag_iter_fn *fn;
33088459642SOmar Sandoval 	void *data;
331602380d2SMing Lei 	unsigned int flags;
33288459642SOmar Sandoval };
33388459642SOmar Sandoval 
334602380d2SMing Lei #define BT_TAG_ITER_RESERVED		(1 << 0)
335602380d2SMing Lei #define BT_TAG_ITER_STARTED		(1 << 1)
33622f614bcSMing Lei #define BT_TAG_ITER_STATIC_RQS		(1 << 2)
337602380d2SMing Lei 
bt_tags_iter(struct sbitmap * bitmap,unsigned int bitnr,void * data)33888459642SOmar Sandoval static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
33988459642SOmar Sandoval {
34088459642SOmar Sandoval 	struct bt_tags_iter_data *iter_data = data;
34188459642SOmar Sandoval 	struct blk_mq_tags *tags = iter_data->tags;
34281481eb4SChristoph Hellwig 	struct request *rq;
3432e315dc0SMing Lei 	bool ret = true;
3442e315dc0SMing Lei 	bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
3454bb659b1SJens Axboe 
3464cf6e6c0SJohn Garry 	if (!(iter_data->flags & BT_TAG_ITER_RESERVED))
34788459642SOmar Sandoval 		bitnr += tags->nr_reserved_tags;
3484bb659b1SJens Axboe 
3497f5562d5SJens Axboe 	/*
3507f5562d5SJens Axboe 	 * We can hit rq == NULL here, because the tagging functions
35122f614bcSMing Lei 	 * test and set the bit before assigning ->rqs[].
3527f5562d5SJens Axboe 	 */
3532e315dc0SMing Lei 	if (iter_static_rqs)
35422f614bcSMing Lei 		rq = tags->static_rqs[bitnr];
35522f614bcSMing Lei 	else
3562e315dc0SMing Lei 		rq = blk_mq_find_and_get_req(tags, bitnr);
357602380d2SMing Lei 	if (!rq)
35888459642SOmar Sandoval 		return true;
3592e315dc0SMing Lei 
3602e315dc0SMing Lei 	if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
3612e315dc0SMing Lei 	    blk_mq_request_started(rq))
3622dd6532eSJohn Garry 		ret = iter_data->fn(rq, iter_data->data);
3632e315dc0SMing Lei 	if (!iter_static_rqs)
3642e315dc0SMing Lei 		blk_mq_put_rq_ref(rq);
3652e315dc0SMing Lei 	return ret;
36681481eb4SChristoph Hellwig }
3674bb659b1SJens Axboe 
368c7b1bf5cSBart Van Assche /**
369c7b1bf5cSBart Van Assche  * bt_tags_for_each - iterate over the requests in a tag map
370c7b1bf5cSBart Van Assche  * @tags:	Tag map to iterate over.
371c7b1bf5cSBart Van Assche  * @bt:		sbitmap to examine. This is either the breserved_tags member
372c7b1bf5cSBart Van Assche  *		or the bitmap_tags member of struct blk_mq_tags.
373c7b1bf5cSBart Van Assche  * @fn:		Pointer to the function that will be called for each started
374fea55691SBart Van Assche  *		request. @fn will be called as follows: @fn(rq, @data) where rq
375fea55691SBart Van Assche  *		is a pointer to a request. Return %true to continue iterating
376fea55691SBart Van Assche  *		tags; %false to stop.
377c7b1bf5cSBart Van Assche  * @data:	Will be passed as second argument to @fn.
378602380d2SMing Lei  * @flags:	BT_TAG_ITER_*
379c7b1bf5cSBart Van Assche  */
bt_tags_for_each(struct blk_mq_tags * tags,struct sbitmap_queue * bt,busy_tag_iter_fn * fn,void * data,unsigned int flags)38088459642SOmar Sandoval static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
381602380d2SMing Lei 			     busy_tag_iter_fn *fn, void *data, unsigned int flags)
382f26cdc85SKeith Busch {
38388459642SOmar Sandoval 	struct bt_tags_iter_data iter_data = {
38488459642SOmar Sandoval 		.tags = tags,
38588459642SOmar Sandoval 		.fn = fn,
38688459642SOmar Sandoval 		.data = data,
387602380d2SMing Lei 		.flags = flags,
38888459642SOmar Sandoval 	};
389f26cdc85SKeith Busch 
39088459642SOmar Sandoval 	if (tags->rqs)
39188459642SOmar Sandoval 		sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
392f26cdc85SKeith Busch }
393f26cdc85SKeith Busch 
__blk_mq_all_tag_iter(struct blk_mq_tags * tags,busy_tag_iter_fn * fn,void * priv,unsigned int flags)394602380d2SMing Lei static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
395602380d2SMing Lei 		busy_tag_iter_fn *fn, void *priv, unsigned int flags)
396602380d2SMing Lei {
397602380d2SMing Lei 	WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
398602380d2SMing Lei 
399602380d2SMing Lei 	if (tags->nr_reserved_tags)
400ae0f1a73SJohn Garry 		bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
401602380d2SMing Lei 				 flags | BT_TAG_ITER_RESERVED);
402ae0f1a73SJohn Garry 	bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
403602380d2SMing Lei }
404602380d2SMing Lei 
405c7b1bf5cSBart Van Assche /**
406602380d2SMing Lei  * blk_mq_all_tag_iter - iterate over all requests in a tag map
407c7b1bf5cSBart Van Assche  * @tags:	Tag map to iterate over.
408602380d2SMing Lei  * @fn:		Pointer to the function that will be called for each
409fea55691SBart Van Assche  *		request. @fn will be called as follows: @fn(rq, @priv) where rq
410fea55691SBart Van Assche  *		is a pointer to a request. Return %true to continue iterating
411fea55691SBart Van Assche  *		tags; %false to stop.
412c7b1bf5cSBart Van Assche  * @priv:	Will be passed as second argument to @fn.
41322f614bcSMing Lei  *
41422f614bcSMing Lei  * Caller has to pass the tag map from which requests are allocated.
415c7b1bf5cSBart Van Assche  */
blk_mq_all_tag_iter(struct blk_mq_tags * tags,busy_tag_iter_fn * fn,void * priv)416602380d2SMing Lei void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
417602380d2SMing Lei 		void *priv)
418f26cdc85SKeith Busch {
419a8a5e383SBaolin Wang 	__blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
420f26cdc85SKeith Busch }
421f26cdc85SKeith Busch 
422c7b1bf5cSBart Van Assche /**
423c7b1bf5cSBart Van Assche  * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
424c7b1bf5cSBart Van Assche  * @tagset:	Tag set to iterate over.
425c7b1bf5cSBart Van Assche  * @fn:		Pointer to the function that will be called for each started
4260b507305SBart Van Assche  *		request. @fn will be called as follows: @fn(rq, @priv) where
4270b507305SBart Van Assche  *		rq is a pointer to a request. Return true to continue iterating
4280b507305SBart Van Assche  *		tags, false to stop.
429c7b1bf5cSBart Van Assche  * @priv:	Will be passed as second argument to @fn.
4302e315dc0SMing Lei  *
4312e315dc0SMing Lei  * We grab one request reference before calling @fn and release it after
4322e315dc0SMing Lei  * @fn returns.
433c7b1bf5cSBart Van Assche  */
blk_mq_tagset_busy_iter(struct blk_mq_tag_set * tagset,busy_tag_iter_fn * fn,void * priv)434e0489487SSagi Grimberg void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
435e0489487SSagi Grimberg 		busy_tag_iter_fn *fn, void *priv)
436e0489487SSagi Grimberg {
4370994c64eSJohn Garry 	unsigned int flags = tagset->flags;
438995412e2SMing Lei 	int i, nr_tags, srcu_idx;
439995412e2SMing Lei 
440995412e2SMing Lei 	srcu_idx = srcu_read_lock(&tagset->tags_srcu);
441e0489487SSagi Grimberg 
4420994c64eSJohn Garry 	nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues;
4430994c64eSJohn Garry 
4440994c64eSJohn Garry 	for (i = 0; i < nr_tags; i++) {
445e0489487SSagi Grimberg 		if (tagset->tags && tagset->tags[i])
446602380d2SMing Lei 			__blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
447602380d2SMing Lei 					      BT_TAG_ITER_STARTED);
448e0489487SSagi Grimberg 	}
449995412e2SMing Lei 	srcu_read_unlock(&tagset->tags_srcu, srcu_idx);
450e0489487SSagi Grimberg }
451e0489487SSagi Grimberg EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
452e0489487SSagi Grimberg 
blk_mq_tagset_count_completed_rqs(struct request * rq,void * data)4532dd6532eSJohn Garry static bool blk_mq_tagset_count_completed_rqs(struct request *rq, void *data)
454f9934a80SMing Lei {
455f9934a80SMing Lei 	unsigned *count = data;
456f9934a80SMing Lei 
457f9934a80SMing Lei 	if (blk_mq_request_completed(rq))
458f9934a80SMing Lei 		(*count)++;
459f9934a80SMing Lei 	return true;
460f9934a80SMing Lei }
461f9934a80SMing Lei 
462f9934a80SMing Lei /**
4639cf1adc6SBhaskar Chowdhury  * blk_mq_tagset_wait_completed_request - Wait until all scheduled request
4649cf1adc6SBhaskar Chowdhury  * completions have finished.
465f9934a80SMing Lei  * @tagset:	Tag set to drain completed request
466f9934a80SMing Lei  *
467f9934a80SMing Lei  * Note: This function has to be run after all IO queues are shutdown
468f9934a80SMing Lei  */
blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set * tagset)469f9934a80SMing Lei void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
470f9934a80SMing Lei {
471f9934a80SMing Lei 	while (true) {
472f9934a80SMing Lei 		unsigned count = 0;
473f9934a80SMing Lei 
474f9934a80SMing Lei 		blk_mq_tagset_busy_iter(tagset,
475f9934a80SMing Lei 				blk_mq_tagset_count_completed_rqs, &count);
476f9934a80SMing Lei 		if (!count)
477f9934a80SMing Lei 			break;
478f9934a80SMing Lei 		msleep(5);
479f9934a80SMing Lei 	}
480f9934a80SMing Lei }
481f9934a80SMing Lei EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
482f9934a80SMing Lei 
483c7b1bf5cSBart Van Assche /**
484c7b1bf5cSBart Van Assche  * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
485c7b1bf5cSBart Van Assche  * @q:		Request queue to examine.
486c7b1bf5cSBart Van Assche  * @fn:		Pointer to the function that will be called for each request
487fea55691SBart Van Assche  *		on @q. @fn will be called as follows: @fn(rq, @priv) where rq
488fea55691SBart Van Assche  *		is a pointer to a request and hctx points to the hardware queue
489fea55691SBart Van Assche  *		associated with the request.
490fea55691SBart Van Assche  * @priv:	Will be passed as second argument to @fn.
491c7b1bf5cSBart Van Assche  *
492c7b1bf5cSBart Van Assche  * Note: if @q->tag_set is shared with other request queues then @fn will be
493c7b1bf5cSBart Van Assche  * called for all requests on all queues that share that tag set and not only
494c7b1bf5cSBart Van Assche  * for requests associated with @q.
495c7b1bf5cSBart Van Assche  */
blk_mq_queue_tag_busy_iter(struct request_queue * q,busy_tag_iter_fn * fn,void * priv)496fc39f8d2SJohn Garry void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
49781481eb4SChristoph Hellwig 		void *priv)
498320ae51fSJens Axboe {
499995412e2SMing Lei 	int srcu_idx;
500995412e2SMing Lei 
501f5bbbbe4SJianchao Wang 	/*
502*d0c98769SFengnan Chang 	 * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
503c7b1bf5cSBart Van Assche 	 * while the queue is frozen. So we can use q_usage_counter to avoid
50476cffccdSyangerkun 	 * racing with it.
505f5bbbbe4SJianchao Wang 	 */
506530ca2c9SKeith Busch 	if (!percpu_ref_tryget(&q->q_usage_counter))
507f5bbbbe4SJianchao Wang 		return;
5080bf6cd5bSChristoph Hellwig 
509995412e2SMing Lei 	srcu_idx = srcu_read_lock(&q->tag_set->tags_srcu);
510fea9f92fSJohn Garry 	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
511fea9f92fSJohn Garry 		struct blk_mq_tags *tags = q->tag_set->shared_tags;
512fea9f92fSJohn Garry 		struct sbitmap_queue *bresv = &tags->breserved_tags;
513fea9f92fSJohn Garry 		struct sbitmap_queue *btags = &tags->bitmap_tags;
514fea9f92fSJohn Garry 
515fea9f92fSJohn Garry 		if (tags->nr_reserved_tags)
516fea9f92fSJohn Garry 			bt_for_each(NULL, q, bresv, fn, priv, true);
517fea9f92fSJohn Garry 		bt_for_each(NULL, q, btags, fn, priv, false);
518fea9f92fSJohn Garry 	} else {
519fea9f92fSJohn Garry 		struct blk_mq_hw_ctx *hctx;
5204f481208SMing Lei 		unsigned long i;
521fea9f92fSJohn Garry 
5220bf6cd5bSChristoph Hellwig 		queue_for_each_hw_ctx(q, hctx, i) {
52381481eb4SChristoph Hellwig 			struct blk_mq_tags *tags = hctx->tags;
524fea9f92fSJohn Garry 			struct sbitmap_queue *bresv = &tags->breserved_tags;
525fea9f92fSJohn Garry 			struct sbitmap_queue *btags = &tags->bitmap_tags;
526320ae51fSJens Axboe 
5270bf6cd5bSChristoph Hellwig 			/*
528c7b1bf5cSBart Van Assche 			 * If no software queues are currently mapped to this
5290bf6cd5bSChristoph Hellwig 			 * hardware queue, there's nothing to check
5300bf6cd5bSChristoph Hellwig 			 */
5310bf6cd5bSChristoph Hellwig 			if (!blk_mq_hw_queue_mapped(hctx))
5320bf6cd5bSChristoph Hellwig 				continue;
5330bf6cd5bSChristoph Hellwig 
534320ae51fSJens Axboe 			if (tags->nr_reserved_tags)
535fea9f92fSJohn Garry 				bt_for_each(hctx, q, bresv, fn, priv, true);
536fea9f92fSJohn Garry 			bt_for_each(hctx, q, btags, fn, priv, false);
537fea9f92fSJohn Garry 		}
538320ae51fSJens Axboe 	}
539995412e2SMing Lei 	srcu_read_unlock(&q->tag_set->tags_srcu, srcu_idx);
540530ca2c9SKeith Busch 	blk_queue_exit(q);
5410bf6cd5bSChristoph Hellwig }
542320ae51fSJens Axboe 
bt_alloc(struct sbitmap_queue * bt,unsigned int depth,bool round_robin,int node)543f4a644dbSOmar Sandoval static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
544f4a644dbSOmar Sandoval 		    bool round_robin, int node)
545e3a2b3f9SJens Axboe {
546f4a644dbSOmar Sandoval 	return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
547f4a644dbSOmar Sandoval 				       node);
5484bb659b1SJens Axboe }
5494bb659b1SJens Axboe 
blk_mq_init_tags(unsigned int total_tags,unsigned int reserved_tags,unsigned int flags,int node)550320ae51fSJens Axboe struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
551ce32496eSChristoph Hellwig 		unsigned int reserved_tags, unsigned int flags, int node)
552320ae51fSJens Axboe {
55368ed4512SChristoph Hellwig 	unsigned int depth = total_tags - reserved_tags;
554ce32496eSChristoph Hellwig 	bool round_robin = flags & BLK_MQ_F_TAG_RR;
555320ae51fSJens Axboe 	struct blk_mq_tags *tags;
556320ae51fSJens Axboe 
557320ae51fSJens Axboe 	if (total_tags > BLK_MQ_TAG_MAX) {
558320ae51fSJens Axboe 		pr_err("blk-mq: tag depth too large\n");
559320ae51fSJens Axboe 		return NULL;
560320ae51fSJens Axboe 	}
561320ae51fSJens Axboe 
562320ae51fSJens Axboe 	tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
563320ae51fSJens Axboe 	if (!tags)
564320ae51fSJens Axboe 		return NULL;
565320ae51fSJens Axboe 
566320ae51fSJens Axboe 	tags->nr_tags = total_tags;
567320ae51fSJens Axboe 	tags->nr_reserved_tags = reserved_tags;
568bd63141dSMing Lei 	spin_lock_init(&tags->lock);
569670bfe68SYu Kuai 	INIT_LIST_HEAD(&tags->page_list);
570670bfe68SYu Kuai 
57168ed4512SChristoph Hellwig 	if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
57268ed4512SChristoph Hellwig 		goto out_free_tags;
57368ed4512SChristoph Hellwig 	if (bt_alloc(&tags->breserved_tags, reserved_tags, round_robin, node))
57468ed4512SChristoph Hellwig 		goto out_free_bitmap_tags;
575320ae51fSJens Axboe 
57668ed4512SChristoph Hellwig 	return tags;
57768ed4512SChristoph Hellwig 
57868ed4512SChristoph Hellwig out_free_bitmap_tags:
57968ed4512SChristoph Hellwig 	sbitmap_queue_free(&tags->bitmap_tags);
58068ed4512SChristoph Hellwig out_free_tags:
5814d063237SHannes Reinecke 	kfree(tags);
5824d063237SHannes Reinecke 	return NULL;
5834d063237SHannes Reinecke }
584320ae51fSJens Axboe 
blk_mq_free_tags_callback(struct rcu_head * head)585ad0d05dbSMing Lei static void blk_mq_free_tags_callback(struct rcu_head *head)
586ad0d05dbSMing Lei {
587ad0d05dbSMing Lei 	struct blk_mq_tags *tags = container_of(head, struct blk_mq_tags,
588ad0d05dbSMing Lei 						rcu_head);
589ad0d05dbSMing Lei 	struct page *page;
590ad0d05dbSMing Lei 
591ad0d05dbSMing Lei 	while (!list_empty(&tags->page_list)) {
592ad0d05dbSMing Lei 		page = list_first_entry(&tags->page_list, struct page, lru);
593ad0d05dbSMing Lei 		list_del_init(&page->lru);
594ad0d05dbSMing Lei 		/*
595ad0d05dbSMing Lei 		 * Remove kmemleak object previously allocated in
596ad0d05dbSMing Lei 		 * blk_mq_alloc_rqs().
597ad0d05dbSMing Lei 		 */
598ad0d05dbSMing Lei 		kmemleak_free(page_address(page));
599ad0d05dbSMing Lei 		__free_pages(page, page->private);
600ad0d05dbSMing Lei 	}
601ad0d05dbSMing Lei 	kfree(tags);
602ad0d05dbSMing Lei }
603ad0d05dbSMing Lei 
blk_mq_free_tags(struct blk_mq_tag_set * set,struct blk_mq_tags * tags)6049ad8e5afSMing Lei void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags)
605320ae51fSJens Axboe {
606ae0f1a73SJohn Garry 	sbitmap_queue_free(&tags->bitmap_tags);
607ae0f1a73SJohn Garry 	sbitmap_queue_free(&tags->breserved_tags);
608670bfe68SYu Kuai 
609670bfe68SYu Kuai 	/* if tags pages is not allocated yet, free tags directly */
610670bfe68SYu Kuai 	if (list_empty(&tags->page_list)) {
611670bfe68SYu Kuai 		kfree(tags);
612670bfe68SYu Kuai 		return;
613670bfe68SYu Kuai 	}
614670bfe68SYu Kuai 
615ad0d05dbSMing Lei 	call_srcu(&set->tags_srcu, &tags->rcu_head, blk_mq_free_tags_callback);
616320ae51fSJens Axboe }
617320ae51fSJens Axboe 
blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set * set,unsigned int size)618079a2e3eSJohn Garry void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size)
61932bc15afSJohn Garry {
620079a2e3eSJohn Garry 	struct blk_mq_tags *tags = set->shared_tags;
621e155b0c2SJohn Garry 
622ae0f1a73SJohn Garry 	sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags);
62332bc15afSJohn Garry }
62432bc15afSJohn Garry 
blk_mq_tag_update_sched_shared_tags(struct request_queue * q,unsigned int nr)625dc96cefeSYu Kuai void blk_mq_tag_update_sched_shared_tags(struct request_queue *q,
626dc96cefeSYu Kuai 					 unsigned int nr)
627a7e7388dSJohn Garry {
628079a2e3eSJohn Garry 	sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags,
629dc96cefeSYu Kuai 			     nr - q->tag_set->reserved_tags);
630a7e7388dSJohn Garry }
631a7e7388dSJohn Garry 
632205fb5f5SBart Van Assche /**
633205fb5f5SBart Van Assche  * blk_mq_unique_tag() - return a tag that is unique queue-wide
634205fb5f5SBart Van Assche  * @rq: request for which to compute a unique tag
635205fb5f5SBart Van Assche  *
636205fb5f5SBart Van Assche  * The tag field in struct request is unique per hardware queue but not over
637205fb5f5SBart Van Assche  * all hardware queues. Hence this function that returns a tag with the
638205fb5f5SBart Van Assche  * hardware context index in the upper bits and the per hardware queue tag in
639205fb5f5SBart Van Assche  * the lower bits.
640205fb5f5SBart Van Assche  *
641205fb5f5SBart Van Assche  * Note: When called for a request that is queued on a non-multiqueue request
642205fb5f5SBart Van Assche  * queue, the hardware context index is set to zero.
643205fb5f5SBart Van Assche  */
blk_mq_unique_tag(struct request * rq)644205fb5f5SBart Van Assche u32 blk_mq_unique_tag(struct request *rq)
645205fb5f5SBart Van Assche {
646ea4f995eSJens Axboe 	return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
647205fb5f5SBart Van Assche 		(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
648205fb5f5SBart Van Assche }
649205fb5f5SBart Van Assche EXPORT_SYMBOL(blk_mq_unique_tag);
650