13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0
275bb4625SJens Axboe /*
388459642SOmar Sandoval * Tag allocation using scalable bitmaps. Uses active queue tracking to support
488459642SOmar Sandoval * fairer distribution of tags between multiple submitters when a shared tag map
588459642SOmar Sandoval * is used.
675bb4625SJens Axboe *
775bb4625SJens Axboe * Copyright (C) 2013-2014 Jens Axboe
875bb4625SJens Axboe */
9320ae51fSJens Axboe #include <linux/kernel.h>
10320ae51fSJens Axboe #include <linux/module.h>
11ad0d05dbSMing Lei #include <linux/slab.h>
12ad0d05dbSMing Lei #include <linux/mm.h>
13ad0d05dbSMing Lei #include <linux/kmemleak.h>
14320ae51fSJens Axboe
15f9934a80SMing Lei #include <linux/delay.h>
16320ae51fSJens Axboe #include "blk.h"
17320ae51fSJens Axboe #include "blk-mq.h"
18d97e594cSJohn Garry #include "blk-mq-sched.h"
19320ae51fSJens Axboe
200d2602caSJens Axboe /*
21180dccb0SLaibin Qiu * Recalculate wakeup batch when tag is shared by hctx.
22180dccb0SLaibin Qiu */
blk_mq_update_wake_batch(struct blk_mq_tags * tags,unsigned int users)23180dccb0SLaibin Qiu static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
24180dccb0SLaibin Qiu unsigned int users)
25180dccb0SLaibin Qiu {
26180dccb0SLaibin Qiu if (!users)
27180dccb0SLaibin Qiu return;
28180dccb0SLaibin Qiu
29180dccb0SLaibin Qiu sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
30180dccb0SLaibin Qiu users);
31180dccb0SLaibin Qiu sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
32180dccb0SLaibin Qiu users);
33180dccb0SLaibin Qiu }
34180dccb0SLaibin Qiu
35180dccb0SLaibin Qiu /*
360d2602caSJens Axboe * If a previously inactive queue goes active, bump the active user count.
37d263ed99SJianchao Wang * We need to do this before try to allocate driver tag, then even if fail
38d263ed99SJianchao Wang * to get tag when first time, the other shared-tag users could reserve
39d263ed99SJianchao Wang * budget for it.
400d2602caSJens Axboe */
__blk_mq_tag_busy(struct blk_mq_hw_ctx * hctx)41ee78ec10SLiu Song void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
420d2602caSJens Axboe {
43180dccb0SLaibin Qiu unsigned int users;
44b313a8c8SLi Lingfeng unsigned long flags;
454f1731dfSYu Kuai struct blk_mq_tags *tags = hctx->tags;
46180dccb0SLaibin Qiu
473e94d54eSTian Lan /*
483e94d54eSTian Lan * calling test_bit() prior to test_and_set_bit() is intentional,
493e94d54eSTian Lan * it avoids dirtying the cacheline if the queue is already active.
503e94d54eSTian Lan */
51079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) {
52f1b49fdcSJohn Garry struct request_queue *q = hctx->queue;
53f1b49fdcSJohn Garry
543e94d54eSTian Lan if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
553e94d54eSTian Lan test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
56ee78ec10SLiu Song return;
57180dccb0SLaibin Qiu } else {
583e94d54eSTian Lan if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
593e94d54eSTian Lan test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
60ee78ec10SLiu Song return;
61180dccb0SLaibin Qiu }
62180dccb0SLaibin Qiu
63b313a8c8SLi Lingfeng spin_lock_irqsave(&tags->lock, flags);
644f1731dfSYu Kuai users = tags->active_queues + 1;
654f1731dfSYu Kuai WRITE_ONCE(tags->active_queues, users);
664f1731dfSYu Kuai blk_mq_update_wake_batch(tags, users);
67b313a8c8SLi Lingfeng spin_unlock_irqrestore(&tags->lock, flags);
680d2602caSJens Axboe }
690d2602caSJens Axboe
700d2602caSJens Axboe /*
71aed3ea94SJens Axboe * Wakeup all potentially sleeping on tags
720d2602caSJens Axboe */
blk_mq_tag_wakeup_all(struct blk_mq_tags * tags,bool include_reserve)73aed3ea94SJens Axboe void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
740d2602caSJens Axboe {
75ae0f1a73SJohn Garry sbitmap_queue_wake_all(&tags->bitmap_tags);
7688459642SOmar Sandoval if (include_reserve)
77ae0f1a73SJohn Garry sbitmap_queue_wake_all(&tags->breserved_tags);
780d2602caSJens Axboe }
790d2602caSJens Axboe
800d2602caSJens Axboe /*
81e3a2b3f9SJens Axboe * If a previously busy queue goes inactive, potential waiters could now
82e3a2b3f9SJens Axboe * be allowed to queue. Wake them up and check.
83e3a2b3f9SJens Axboe */
__blk_mq_tag_idle(struct blk_mq_hw_ctx * hctx)84e3a2b3f9SJens Axboe void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
85e3a2b3f9SJens Axboe {
86e3a2b3f9SJens Axboe struct blk_mq_tags *tags = hctx->tags;
87180dccb0SLaibin Qiu unsigned int users;
88e3a2b3f9SJens Axboe
89079a2e3eSJohn Garry if (blk_mq_is_shared_tags(hctx->flags)) {
90e155b0c2SJohn Garry struct request_queue *q = hctx->queue;
91e155b0c2SJohn Garry
92f1b49fdcSJohn Garry if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
93f1b49fdcSJohn Garry &q->queue_flags))
94f1b49fdcSJohn Garry return;
95f1b49fdcSJohn Garry } else {
96e3a2b3f9SJens Axboe if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
97e3a2b3f9SJens Axboe return;
98f1b49fdcSJohn Garry }
99e3a2b3f9SJens Axboe
1004f1731dfSYu Kuai spin_lock_irq(&tags->lock);
1014f1731dfSYu Kuai users = tags->active_queues - 1;
1024f1731dfSYu Kuai WRITE_ONCE(tags->active_queues, users);
103180dccb0SLaibin Qiu blk_mq_update_wake_batch(tags, users);
1044f1731dfSYu Kuai spin_unlock_irq(&tags->lock);
105079a2e3eSJohn Garry
106aed3ea94SJens Axboe blk_mq_tag_wakeup_all(tags, false);
107e3a2b3f9SJens Axboe }
108e3a2b3f9SJens Axboe
__blk_mq_get_tag(struct blk_mq_alloc_data * data,struct sbitmap_queue * bt)109200e86b3SJens Axboe static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
110200e86b3SJens Axboe struct sbitmap_queue *bt)
1114bb659b1SJens Axboe {
11228500850SMing Lei if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
11328500850SMing Lei !hctx_may_queue(data->hctx, bt))
11476647368SChristoph Hellwig return BLK_MQ_NO_TAG;
11542fdc5e4SChristoph Hellwig
116229a9287SOmar Sandoval if (data->shallow_depth)
1173f607293SJohn Garry return sbitmap_queue_get_shallow(bt, data->shallow_depth);
118229a9287SOmar Sandoval else
119f4a644dbSOmar Sandoval return __sbitmap_queue_get(bt);
1204bb659b1SJens Axboe }
1214bb659b1SJens Axboe
blk_mq_get_tags(struct blk_mq_alloc_data * data,int nr_tags,unsigned int * offset)122349302daSJens Axboe unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
123349302daSJens Axboe unsigned int *offset)
124349302daSJens Axboe {
125349302daSJens Axboe struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
126349302daSJens Axboe struct sbitmap_queue *bt = &tags->bitmap_tags;
127349302daSJens Axboe unsigned long ret;
128349302daSJens Axboe
129349302daSJens Axboe if (data->shallow_depth ||data->flags & BLK_MQ_REQ_RESERVED ||
130349302daSJens Axboe data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
131349302daSJens Axboe return 0;
132349302daSJens Axboe ret = __sbitmap_queue_get_batch(bt, nr_tags, offset);
133349302daSJens Axboe *offset += tags->nr_reserved_tags;
134349302daSJens Axboe return ret;
135349302daSJens Axboe }
136349302daSJens Axboe
blk_mq_get_tag(struct blk_mq_alloc_data * data)1374941115bSJens Axboe unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
1384bb659b1SJens Axboe {
1394941115bSJens Axboe struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
1404941115bSJens Axboe struct sbitmap_queue *bt;
14188459642SOmar Sandoval struct sbq_wait_state *ws;
1425d2ee712SJens Axboe DEFINE_SBQ_WAIT(wait);
1434941115bSJens Axboe unsigned int tag_offset;
1444bb659b1SJens Axboe int tag;
1454bb659b1SJens Axboe
1464941115bSJens Axboe if (data->flags & BLK_MQ_REQ_RESERVED) {
1474941115bSJens Axboe if (unlikely(!tags->nr_reserved_tags)) {
1484941115bSJens Axboe WARN_ON_ONCE(1);
149419c3d5eSChristoph Hellwig return BLK_MQ_NO_TAG;
1504941115bSJens Axboe }
151ae0f1a73SJohn Garry bt = &tags->breserved_tags;
1524941115bSJens Axboe tag_offset = 0;
1534941115bSJens Axboe } else {
154ae0f1a73SJohn Garry bt = &tags->bitmap_tags;
1554941115bSJens Axboe tag_offset = tags->nr_reserved_tags;
1564941115bSJens Axboe }
1574941115bSJens Axboe
158200e86b3SJens Axboe tag = __blk_mq_get_tag(data, bt);
15976647368SChristoph Hellwig if (tag != BLK_MQ_NO_TAG)
1604941115bSJens Axboe goto found_tag;
1614bb659b1SJens Axboe
1626f3b0e8bSChristoph Hellwig if (data->flags & BLK_MQ_REQ_NOWAIT)
163419c3d5eSChristoph Hellwig return BLK_MQ_NO_TAG;
1644bb659b1SJens Axboe
1654941115bSJens Axboe ws = bt_wait_ptr(bt, data->hctx);
16635d37c66SJens Axboe do {
167e6fc4649SMing Lei struct sbitmap_queue *bt_prev;
168e6fc4649SMing Lei
169b3223207SBart Van Assche /*
170b3223207SBart Van Assche * We're out of tags on this hardware queue, kick any
171b3223207SBart Van Assche * pending IO submits before going to sleep waiting for
1728cecb07dSJens Axboe * some to complete.
173b3223207SBart Van Assche */
1744941115bSJens Axboe blk_mq_run_hw_queue(data->hctx, false);
175b3223207SBart Van Assche
176080ff351SJens Axboe /*
177080ff351SJens Axboe * Retry tag allocation after running the hardware queue,
178080ff351SJens Axboe * as running the queue may also have found completions.
179080ff351SJens Axboe */
180200e86b3SJens Axboe tag = __blk_mq_get_tag(data, bt);
18176647368SChristoph Hellwig if (tag != BLK_MQ_NO_TAG)
182080ff351SJens Axboe break;
183080ff351SJens Axboe
1845d2ee712SJens Axboe sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
1854e5dff41SJens Axboe
1864e5dff41SJens Axboe tag = __blk_mq_get_tag(data, bt);
18776647368SChristoph Hellwig if (tag != BLK_MQ_NO_TAG)
1884e5dff41SJens Axboe break;
1894e5dff41SJens Axboe
190e6fc4649SMing Lei bt_prev = bt;
1914bb659b1SJens Axboe io_schedule();
192cb96a42cSMing Lei
1935d2ee712SJens Axboe sbitmap_finish_wait(bt, ws, &wait);
1945d2ee712SJens Axboe
195cb96a42cSMing Lei data->ctx = blk_mq_get_ctx(data->q);
19661667cb6SGuixin Liu data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx);
1974941115bSJens Axboe tags = blk_mq_tags_from_data(data);
1984941115bSJens Axboe if (data->flags & BLK_MQ_REQ_RESERVED)
199ae0f1a73SJohn Garry bt = &tags->breserved_tags;
2004941115bSJens Axboe else
201ae0f1a73SJohn Garry bt = &tags->bitmap_tags;
2024941115bSJens Axboe
203e6fc4649SMing Lei /*
204e6fc4649SMing Lei * If destination hw queue is changed, fake wake up on
205e6fc4649SMing Lei * previous queue for compensating the wake up miss, so
206e6fc4649SMing Lei * other allocations on previous queue won't be starved.
207e6fc4649SMing Lei */
208e6fc4649SMing Lei if (bt != bt_prev)
2094acb8341SKeith Busch sbitmap_queue_wake_up(bt_prev, 1);
210e6fc4649SMing Lei
2114941115bSJens Axboe ws = bt_wait_ptr(bt, data->hctx);
2124bb659b1SJens Axboe } while (1);
2134bb659b1SJens Axboe
2145d2ee712SJens Axboe sbitmap_finish_wait(bt, ws, &wait);
2154941115bSJens Axboe
2164941115bSJens Axboe found_tag:
217bf0beec0SMing Lei /*
218bf0beec0SMing Lei * Give up this allocation if the hctx is inactive. The caller will
219bf0beec0SMing Lei * retry on an active hctx.
220bf0beec0SMing Lei */
221bf0beec0SMing Lei if (unlikely(test_bit(BLK_MQ_S_INACTIVE, &data->hctx->state))) {
222bf0beec0SMing Lei blk_mq_put_tag(tags, data->ctx, tag + tag_offset);
223bf0beec0SMing Lei return BLK_MQ_NO_TAG;
224bf0beec0SMing Lei }
2254941115bSJens Axboe return tag + tag_offset;
2264bb659b1SJens Axboe }
2274bb659b1SJens Axboe
blk_mq_put_tag(struct blk_mq_tags * tags,struct blk_mq_ctx * ctx,unsigned int tag)228cae740a0SJohn Garry void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
229cae740a0SJohn Garry unsigned int tag)
230320ae51fSJens Axboe {
231415b806dSSagi Grimberg if (!blk_mq_tag_is_reserved(tags, tag)) {
2324bb659b1SJens Axboe const int real_tag = tag - tags->nr_reserved_tags;
2334bb659b1SJens Axboe
23470114c39SJens Axboe BUG_ON(real_tag >= tags->nr_tags);
235ae0f1a73SJohn Garry sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
23670114c39SJens Axboe } else {
237ae0f1a73SJohn Garry sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
23870114c39SJens Axboe }
239320ae51fSJens Axboe }
240320ae51fSJens Axboe
blk_mq_put_tags(struct blk_mq_tags * tags,int * tag_array,int nr_tags)241f794f335SJens Axboe void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
242f794f335SJens Axboe {
243f794f335SJens Axboe sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags,
244f794f335SJens Axboe tag_array, nr_tags);
245f794f335SJens Axboe }
246f794f335SJens Axboe
24788459642SOmar Sandoval struct bt_iter_data {
24888459642SOmar Sandoval struct blk_mq_hw_ctx *hctx;
249fea9f92fSJohn Garry struct request_queue *q;
250fc39f8d2SJohn Garry busy_tag_iter_fn *fn;
25188459642SOmar Sandoval void *data;
25288459642SOmar Sandoval bool reserved;
25388459642SOmar Sandoval };
25488459642SOmar Sandoval
blk_mq_find_and_get_req(struct blk_mq_tags * tags,unsigned int bitnr)2552e315dc0SMing Lei static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
2562e315dc0SMing Lei unsigned int bitnr)
2572e315dc0SMing Lei {
258bd63141dSMing Lei struct request *rq;
2592e315dc0SMing Lei
260bd63141dSMing Lei rq = tags->rqs[bitnr];
2610a467d0fSJens Axboe if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
262bd63141dSMing Lei rq = NULL;
2632e315dc0SMing Lei return rq;
2642e315dc0SMing Lei }
2652e315dc0SMing Lei
bt_iter(struct sbitmap * bitmap,unsigned int bitnr,void * data)26688459642SOmar Sandoval static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
26788459642SOmar Sandoval {
26888459642SOmar Sandoval struct bt_iter_data *iter_data = data;
26988459642SOmar Sandoval struct blk_mq_hw_ctx *hctx = iter_data->hctx;
270fea9f92fSJohn Garry struct request_queue *q = iter_data->q;
271fea9f92fSJohn Garry struct blk_mq_tag_set *set = q->tag_set;
272fea9f92fSJohn Garry struct blk_mq_tags *tags;
27388459642SOmar Sandoval struct request *rq;
2742e315dc0SMing Lei bool ret = true;
27588459642SOmar Sandoval
276fea9f92fSJohn Garry if (blk_mq_is_shared_tags(set->flags))
277fea9f92fSJohn Garry tags = set->shared_tags;
278fea9f92fSJohn Garry else
279fea9f92fSJohn Garry tags = hctx->tags;
280fea9f92fSJohn Garry
2814cf6e6c0SJohn Garry if (!iter_data->reserved)
28288459642SOmar Sandoval bitnr += tags->nr_reserved_tags;
2837f5562d5SJens Axboe /*
2847f5562d5SJens Axboe * We can hit rq == NULL here, because the tagging functions
285c7b1bf5cSBart Van Assche * test and set the bit before assigning ->rqs[].
2867f5562d5SJens Axboe */
2872e315dc0SMing Lei rq = blk_mq_find_and_get_req(tags, bitnr);
2882e315dc0SMing Lei if (!rq)
28988459642SOmar Sandoval return true;
2902e315dc0SMing Lei
291fea9f92fSJohn Garry if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
2922dd6532eSJohn Garry ret = iter_data->fn(rq, iter_data->data);
2932e315dc0SMing Lei blk_mq_put_rq_ref(rq);
2942e315dc0SMing Lei return ret;
29588459642SOmar Sandoval }
29688459642SOmar Sandoval
297c7b1bf5cSBart Van Assche /**
298c7b1bf5cSBart Van Assche * bt_for_each - iterate over the requests associated with a hardware queue
299c7b1bf5cSBart Van Assche * @hctx: Hardware queue to examine.
300fea55691SBart Van Assche * @q: Request queue @hctx is associated with (@hctx->queue).
301c7b1bf5cSBart Van Assche * @bt: sbitmap to examine. This is either the breserved_tags member
302c7b1bf5cSBart Van Assche * or the bitmap_tags member of struct blk_mq_tags.
303c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each request
304c7b1bf5cSBart Van Assche * associated with @hctx that has been assigned a driver tag.
305fea55691SBart Van Assche * @fn will be called as follows: @fn(rq, @data) where rq is a
306fea55691SBart Van Assche * pointer to a request. Return %true to continue iterating tags;
307fea55691SBart Van Assche * %false to stop.
308fea55691SBart Van Assche * @data: Will be passed as second argument to @fn.
309c7b1bf5cSBart Van Assche * @reserved: Indicates whether @bt is the breserved_tags member or the
310c7b1bf5cSBart Van Assche * bitmap_tags member of struct blk_mq_tags.
311c7b1bf5cSBart Van Assche */
bt_for_each(struct blk_mq_hw_ctx * hctx,struct request_queue * q,struct sbitmap_queue * bt,busy_tag_iter_fn * fn,void * data,bool reserved)312fea9f92fSJohn Garry static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
313fea9f92fSJohn Garry struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
314fea9f92fSJohn Garry void *data, bool reserved)
315320ae51fSJens Axboe {
31688459642SOmar Sandoval struct bt_iter_data iter_data = {
31788459642SOmar Sandoval .hctx = hctx,
31888459642SOmar Sandoval .fn = fn,
31988459642SOmar Sandoval .data = data,
32088459642SOmar Sandoval .reserved = reserved,
321fea9f92fSJohn Garry .q = q,
32288459642SOmar Sandoval };
32388459642SOmar Sandoval
32488459642SOmar Sandoval sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
32588459642SOmar Sandoval }
32688459642SOmar Sandoval
32788459642SOmar Sandoval struct bt_tags_iter_data {
32888459642SOmar Sandoval struct blk_mq_tags *tags;
32988459642SOmar Sandoval busy_tag_iter_fn *fn;
33088459642SOmar Sandoval void *data;
331602380d2SMing Lei unsigned int flags;
33288459642SOmar Sandoval };
33388459642SOmar Sandoval
334602380d2SMing Lei #define BT_TAG_ITER_RESERVED (1 << 0)
335602380d2SMing Lei #define BT_TAG_ITER_STARTED (1 << 1)
33622f614bcSMing Lei #define BT_TAG_ITER_STATIC_RQS (1 << 2)
337602380d2SMing Lei
bt_tags_iter(struct sbitmap * bitmap,unsigned int bitnr,void * data)33888459642SOmar Sandoval static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
33988459642SOmar Sandoval {
34088459642SOmar Sandoval struct bt_tags_iter_data *iter_data = data;
34188459642SOmar Sandoval struct blk_mq_tags *tags = iter_data->tags;
34281481eb4SChristoph Hellwig struct request *rq;
3432e315dc0SMing Lei bool ret = true;
3442e315dc0SMing Lei bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
3454bb659b1SJens Axboe
3464cf6e6c0SJohn Garry if (!(iter_data->flags & BT_TAG_ITER_RESERVED))
34788459642SOmar Sandoval bitnr += tags->nr_reserved_tags;
3484bb659b1SJens Axboe
3497f5562d5SJens Axboe /*
3507f5562d5SJens Axboe * We can hit rq == NULL here, because the tagging functions
35122f614bcSMing Lei * test and set the bit before assigning ->rqs[].
3527f5562d5SJens Axboe */
3532e315dc0SMing Lei if (iter_static_rqs)
35422f614bcSMing Lei rq = tags->static_rqs[bitnr];
35522f614bcSMing Lei else
3562e315dc0SMing Lei rq = blk_mq_find_and_get_req(tags, bitnr);
357602380d2SMing Lei if (!rq)
35888459642SOmar Sandoval return true;
3592e315dc0SMing Lei
3602e315dc0SMing Lei if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
3612e315dc0SMing Lei blk_mq_request_started(rq))
3622dd6532eSJohn Garry ret = iter_data->fn(rq, iter_data->data);
3632e315dc0SMing Lei if (!iter_static_rqs)
3642e315dc0SMing Lei blk_mq_put_rq_ref(rq);
3652e315dc0SMing Lei return ret;
36681481eb4SChristoph Hellwig }
3674bb659b1SJens Axboe
368c7b1bf5cSBart Van Assche /**
369c7b1bf5cSBart Van Assche * bt_tags_for_each - iterate over the requests in a tag map
370c7b1bf5cSBart Van Assche * @tags: Tag map to iterate over.
371c7b1bf5cSBart Van Assche * @bt: sbitmap to examine. This is either the breserved_tags member
372c7b1bf5cSBart Van Assche * or the bitmap_tags member of struct blk_mq_tags.
373c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each started
374fea55691SBart Van Assche * request. @fn will be called as follows: @fn(rq, @data) where rq
375fea55691SBart Van Assche * is a pointer to a request. Return %true to continue iterating
376fea55691SBart Van Assche * tags; %false to stop.
377c7b1bf5cSBart Van Assche * @data: Will be passed as second argument to @fn.
378602380d2SMing Lei * @flags: BT_TAG_ITER_*
379c7b1bf5cSBart Van Assche */
bt_tags_for_each(struct blk_mq_tags * tags,struct sbitmap_queue * bt,busy_tag_iter_fn * fn,void * data,unsigned int flags)38088459642SOmar Sandoval static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
381602380d2SMing Lei busy_tag_iter_fn *fn, void *data, unsigned int flags)
382f26cdc85SKeith Busch {
38388459642SOmar Sandoval struct bt_tags_iter_data iter_data = {
38488459642SOmar Sandoval .tags = tags,
38588459642SOmar Sandoval .fn = fn,
38688459642SOmar Sandoval .data = data,
387602380d2SMing Lei .flags = flags,
38888459642SOmar Sandoval };
389f26cdc85SKeith Busch
39088459642SOmar Sandoval if (tags->rqs)
39188459642SOmar Sandoval sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
392f26cdc85SKeith Busch }
393f26cdc85SKeith Busch
__blk_mq_all_tag_iter(struct blk_mq_tags * tags,busy_tag_iter_fn * fn,void * priv,unsigned int flags)394602380d2SMing Lei static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
395602380d2SMing Lei busy_tag_iter_fn *fn, void *priv, unsigned int flags)
396602380d2SMing Lei {
397602380d2SMing Lei WARN_ON_ONCE(flags & BT_TAG_ITER_RESERVED);
398602380d2SMing Lei
399602380d2SMing Lei if (tags->nr_reserved_tags)
400ae0f1a73SJohn Garry bt_tags_for_each(tags, &tags->breserved_tags, fn, priv,
401602380d2SMing Lei flags | BT_TAG_ITER_RESERVED);
402ae0f1a73SJohn Garry bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, flags);
403602380d2SMing Lei }
404602380d2SMing Lei
405c7b1bf5cSBart Van Assche /**
406602380d2SMing Lei * blk_mq_all_tag_iter - iterate over all requests in a tag map
407c7b1bf5cSBart Van Assche * @tags: Tag map to iterate over.
408602380d2SMing Lei * @fn: Pointer to the function that will be called for each
409fea55691SBart Van Assche * request. @fn will be called as follows: @fn(rq, @priv) where rq
410fea55691SBart Van Assche * is a pointer to a request. Return %true to continue iterating
411fea55691SBart Van Assche * tags; %false to stop.
412c7b1bf5cSBart Van Assche * @priv: Will be passed as second argument to @fn.
41322f614bcSMing Lei *
41422f614bcSMing Lei * Caller has to pass the tag map from which requests are allocated.
415c7b1bf5cSBart Van Assche */
blk_mq_all_tag_iter(struct blk_mq_tags * tags,busy_tag_iter_fn * fn,void * priv)416602380d2SMing Lei void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
417602380d2SMing Lei void *priv)
418f26cdc85SKeith Busch {
419a8a5e383SBaolin Wang __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
420f26cdc85SKeith Busch }
421f26cdc85SKeith Busch
422c7b1bf5cSBart Van Assche /**
423c7b1bf5cSBart Van Assche * blk_mq_tagset_busy_iter - iterate over all started requests in a tag set
424c7b1bf5cSBart Van Assche * @tagset: Tag set to iterate over.
425c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each started
4260b507305SBart Van Assche * request. @fn will be called as follows: @fn(rq, @priv) where
4270b507305SBart Van Assche * rq is a pointer to a request. Return true to continue iterating
4280b507305SBart Van Assche * tags, false to stop.
429c7b1bf5cSBart Van Assche * @priv: Will be passed as second argument to @fn.
4302e315dc0SMing Lei *
4312e315dc0SMing Lei * We grab one request reference before calling @fn and release it after
4322e315dc0SMing Lei * @fn returns.
433c7b1bf5cSBart Van Assche */
blk_mq_tagset_busy_iter(struct blk_mq_tag_set * tagset,busy_tag_iter_fn * fn,void * priv)434e0489487SSagi Grimberg void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
435e0489487SSagi Grimberg busy_tag_iter_fn *fn, void *priv)
436e0489487SSagi Grimberg {
4370994c64eSJohn Garry unsigned int flags = tagset->flags;
438995412e2SMing Lei int i, nr_tags, srcu_idx;
439995412e2SMing Lei
440995412e2SMing Lei srcu_idx = srcu_read_lock(&tagset->tags_srcu);
441e0489487SSagi Grimberg
4420994c64eSJohn Garry nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues;
4430994c64eSJohn Garry
4440994c64eSJohn Garry for (i = 0; i < nr_tags; i++) {
445e0489487SSagi Grimberg if (tagset->tags && tagset->tags[i])
446602380d2SMing Lei __blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
447602380d2SMing Lei BT_TAG_ITER_STARTED);
448e0489487SSagi Grimberg }
449995412e2SMing Lei srcu_read_unlock(&tagset->tags_srcu, srcu_idx);
450e0489487SSagi Grimberg }
451e0489487SSagi Grimberg EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
452e0489487SSagi Grimberg
blk_mq_tagset_count_completed_rqs(struct request * rq,void * data)4532dd6532eSJohn Garry static bool blk_mq_tagset_count_completed_rqs(struct request *rq, void *data)
454f9934a80SMing Lei {
455f9934a80SMing Lei unsigned *count = data;
456f9934a80SMing Lei
457f9934a80SMing Lei if (blk_mq_request_completed(rq))
458f9934a80SMing Lei (*count)++;
459f9934a80SMing Lei return true;
460f9934a80SMing Lei }
461f9934a80SMing Lei
462f9934a80SMing Lei /**
4639cf1adc6SBhaskar Chowdhury * blk_mq_tagset_wait_completed_request - Wait until all scheduled request
4649cf1adc6SBhaskar Chowdhury * completions have finished.
465f9934a80SMing Lei * @tagset: Tag set to drain completed request
466f9934a80SMing Lei *
467f9934a80SMing Lei * Note: This function has to be run after all IO queues are shutdown
468f9934a80SMing Lei */
blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set * tagset)469f9934a80SMing Lei void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
470f9934a80SMing Lei {
471f9934a80SMing Lei while (true) {
472f9934a80SMing Lei unsigned count = 0;
473f9934a80SMing Lei
474f9934a80SMing Lei blk_mq_tagset_busy_iter(tagset,
475f9934a80SMing Lei blk_mq_tagset_count_completed_rqs, &count);
476f9934a80SMing Lei if (!count)
477f9934a80SMing Lei break;
478f9934a80SMing Lei msleep(5);
479f9934a80SMing Lei }
480f9934a80SMing Lei }
481f9934a80SMing Lei EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
482f9934a80SMing Lei
483c7b1bf5cSBart Van Assche /**
484c7b1bf5cSBart Van Assche * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
485c7b1bf5cSBart Van Assche * @q: Request queue to examine.
486c7b1bf5cSBart Van Assche * @fn: Pointer to the function that will be called for each request
487fea55691SBart Van Assche * on @q. @fn will be called as follows: @fn(rq, @priv) where rq
488fea55691SBart Van Assche * is a pointer to a request and hctx points to the hardware queue
489fea55691SBart Van Assche * associated with the request.
490fea55691SBart Van Assche * @priv: Will be passed as second argument to @fn.
491c7b1bf5cSBart Van Assche *
492c7b1bf5cSBart Van Assche * Note: if @q->tag_set is shared with other request queues then @fn will be
493c7b1bf5cSBart Van Assche * called for all requests on all queues that share that tag set and not only
494c7b1bf5cSBart Van Assche * for requests associated with @q.
495c7b1bf5cSBart Van Assche */
blk_mq_queue_tag_busy_iter(struct request_queue * q,busy_tag_iter_fn * fn,void * priv)496fc39f8d2SJohn Garry void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
49781481eb4SChristoph Hellwig void *priv)
498320ae51fSJens Axboe {
499995412e2SMing Lei int srcu_idx;
500995412e2SMing Lei
501f5bbbbe4SJianchao Wang /*
502*d0c98769SFengnan Chang * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
503c7b1bf5cSBart Van Assche * while the queue is frozen. So we can use q_usage_counter to avoid
50476cffccdSyangerkun * racing with it.
505f5bbbbe4SJianchao Wang */
506530ca2c9SKeith Busch if (!percpu_ref_tryget(&q->q_usage_counter))
507f5bbbbe4SJianchao Wang return;
5080bf6cd5bSChristoph Hellwig
509995412e2SMing Lei srcu_idx = srcu_read_lock(&q->tag_set->tags_srcu);
510fea9f92fSJohn Garry if (blk_mq_is_shared_tags(q->tag_set->flags)) {
511fea9f92fSJohn Garry struct blk_mq_tags *tags = q->tag_set->shared_tags;
512fea9f92fSJohn Garry struct sbitmap_queue *bresv = &tags->breserved_tags;
513fea9f92fSJohn Garry struct sbitmap_queue *btags = &tags->bitmap_tags;
514fea9f92fSJohn Garry
515fea9f92fSJohn Garry if (tags->nr_reserved_tags)
516fea9f92fSJohn Garry bt_for_each(NULL, q, bresv, fn, priv, true);
517fea9f92fSJohn Garry bt_for_each(NULL, q, btags, fn, priv, false);
518fea9f92fSJohn Garry } else {
519fea9f92fSJohn Garry struct blk_mq_hw_ctx *hctx;
5204f481208SMing Lei unsigned long i;
521fea9f92fSJohn Garry
5220bf6cd5bSChristoph Hellwig queue_for_each_hw_ctx(q, hctx, i) {
52381481eb4SChristoph Hellwig struct blk_mq_tags *tags = hctx->tags;
524fea9f92fSJohn Garry struct sbitmap_queue *bresv = &tags->breserved_tags;
525fea9f92fSJohn Garry struct sbitmap_queue *btags = &tags->bitmap_tags;
526320ae51fSJens Axboe
5270bf6cd5bSChristoph Hellwig /*
528c7b1bf5cSBart Van Assche * If no software queues are currently mapped to this
5290bf6cd5bSChristoph Hellwig * hardware queue, there's nothing to check
5300bf6cd5bSChristoph Hellwig */
5310bf6cd5bSChristoph Hellwig if (!blk_mq_hw_queue_mapped(hctx))
5320bf6cd5bSChristoph Hellwig continue;
5330bf6cd5bSChristoph Hellwig
534320ae51fSJens Axboe if (tags->nr_reserved_tags)
535fea9f92fSJohn Garry bt_for_each(hctx, q, bresv, fn, priv, true);
536fea9f92fSJohn Garry bt_for_each(hctx, q, btags, fn, priv, false);
537fea9f92fSJohn Garry }
538320ae51fSJens Axboe }
539995412e2SMing Lei srcu_read_unlock(&q->tag_set->tags_srcu, srcu_idx);
540530ca2c9SKeith Busch blk_queue_exit(q);
5410bf6cd5bSChristoph Hellwig }
542320ae51fSJens Axboe
bt_alloc(struct sbitmap_queue * bt,unsigned int depth,bool round_robin,int node)543f4a644dbSOmar Sandoval static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
544f4a644dbSOmar Sandoval bool round_robin, int node)
545e3a2b3f9SJens Axboe {
546f4a644dbSOmar Sandoval return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
547f4a644dbSOmar Sandoval node);
5484bb659b1SJens Axboe }
5494bb659b1SJens Axboe
blk_mq_init_tags(unsigned int total_tags,unsigned int reserved_tags,unsigned int flags,int node)550320ae51fSJens Axboe struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
551ce32496eSChristoph Hellwig unsigned int reserved_tags, unsigned int flags, int node)
552320ae51fSJens Axboe {
55368ed4512SChristoph Hellwig unsigned int depth = total_tags - reserved_tags;
554ce32496eSChristoph Hellwig bool round_robin = flags & BLK_MQ_F_TAG_RR;
555320ae51fSJens Axboe struct blk_mq_tags *tags;
556320ae51fSJens Axboe
557320ae51fSJens Axboe if (total_tags > BLK_MQ_TAG_MAX) {
558320ae51fSJens Axboe pr_err("blk-mq: tag depth too large\n");
559320ae51fSJens Axboe return NULL;
560320ae51fSJens Axboe }
561320ae51fSJens Axboe
562320ae51fSJens Axboe tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
563320ae51fSJens Axboe if (!tags)
564320ae51fSJens Axboe return NULL;
565320ae51fSJens Axboe
566320ae51fSJens Axboe tags->nr_tags = total_tags;
567320ae51fSJens Axboe tags->nr_reserved_tags = reserved_tags;
568bd63141dSMing Lei spin_lock_init(&tags->lock);
569670bfe68SYu Kuai INIT_LIST_HEAD(&tags->page_list);
570670bfe68SYu Kuai
57168ed4512SChristoph Hellwig if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
57268ed4512SChristoph Hellwig goto out_free_tags;
57368ed4512SChristoph Hellwig if (bt_alloc(&tags->breserved_tags, reserved_tags, round_robin, node))
57468ed4512SChristoph Hellwig goto out_free_bitmap_tags;
575320ae51fSJens Axboe
57668ed4512SChristoph Hellwig return tags;
57768ed4512SChristoph Hellwig
57868ed4512SChristoph Hellwig out_free_bitmap_tags:
57968ed4512SChristoph Hellwig sbitmap_queue_free(&tags->bitmap_tags);
58068ed4512SChristoph Hellwig out_free_tags:
5814d063237SHannes Reinecke kfree(tags);
5824d063237SHannes Reinecke return NULL;
5834d063237SHannes Reinecke }
584320ae51fSJens Axboe
blk_mq_free_tags_callback(struct rcu_head * head)585ad0d05dbSMing Lei static void blk_mq_free_tags_callback(struct rcu_head *head)
586ad0d05dbSMing Lei {
587ad0d05dbSMing Lei struct blk_mq_tags *tags = container_of(head, struct blk_mq_tags,
588ad0d05dbSMing Lei rcu_head);
589ad0d05dbSMing Lei struct page *page;
590ad0d05dbSMing Lei
591ad0d05dbSMing Lei while (!list_empty(&tags->page_list)) {
592ad0d05dbSMing Lei page = list_first_entry(&tags->page_list, struct page, lru);
593ad0d05dbSMing Lei list_del_init(&page->lru);
594ad0d05dbSMing Lei /*
595ad0d05dbSMing Lei * Remove kmemleak object previously allocated in
596ad0d05dbSMing Lei * blk_mq_alloc_rqs().
597ad0d05dbSMing Lei */
598ad0d05dbSMing Lei kmemleak_free(page_address(page));
599ad0d05dbSMing Lei __free_pages(page, page->private);
600ad0d05dbSMing Lei }
601ad0d05dbSMing Lei kfree(tags);
602ad0d05dbSMing Lei }
603ad0d05dbSMing Lei
blk_mq_free_tags(struct blk_mq_tag_set * set,struct blk_mq_tags * tags)6049ad8e5afSMing Lei void blk_mq_free_tags(struct blk_mq_tag_set *set, struct blk_mq_tags *tags)
605320ae51fSJens Axboe {
606ae0f1a73SJohn Garry sbitmap_queue_free(&tags->bitmap_tags);
607ae0f1a73SJohn Garry sbitmap_queue_free(&tags->breserved_tags);
608670bfe68SYu Kuai
609670bfe68SYu Kuai /* if tags pages is not allocated yet, free tags directly */
610670bfe68SYu Kuai if (list_empty(&tags->page_list)) {
611670bfe68SYu Kuai kfree(tags);
612670bfe68SYu Kuai return;
613670bfe68SYu Kuai }
614670bfe68SYu Kuai
615ad0d05dbSMing Lei call_srcu(&set->tags_srcu, &tags->rcu_head, blk_mq_free_tags_callback);
616320ae51fSJens Axboe }
617320ae51fSJens Axboe
blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set * set,unsigned int size)618079a2e3eSJohn Garry void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size)
61932bc15afSJohn Garry {
620079a2e3eSJohn Garry struct blk_mq_tags *tags = set->shared_tags;
621e155b0c2SJohn Garry
622ae0f1a73SJohn Garry sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags);
62332bc15afSJohn Garry }
62432bc15afSJohn Garry
blk_mq_tag_update_sched_shared_tags(struct request_queue * q,unsigned int nr)625dc96cefeSYu Kuai void blk_mq_tag_update_sched_shared_tags(struct request_queue *q,
626dc96cefeSYu Kuai unsigned int nr)
627a7e7388dSJohn Garry {
628079a2e3eSJohn Garry sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags,
629dc96cefeSYu Kuai nr - q->tag_set->reserved_tags);
630a7e7388dSJohn Garry }
631a7e7388dSJohn Garry
632205fb5f5SBart Van Assche /**
633205fb5f5SBart Van Assche * blk_mq_unique_tag() - return a tag that is unique queue-wide
634205fb5f5SBart Van Assche * @rq: request for which to compute a unique tag
635205fb5f5SBart Van Assche *
636205fb5f5SBart Van Assche * The tag field in struct request is unique per hardware queue but not over
637205fb5f5SBart Van Assche * all hardware queues. Hence this function that returns a tag with the
638205fb5f5SBart Van Assche * hardware context index in the upper bits and the per hardware queue tag in
639205fb5f5SBart Van Assche * the lower bits.
640205fb5f5SBart Van Assche *
641205fb5f5SBart Van Assche * Note: When called for a request that is queued on a non-multiqueue request
642205fb5f5SBart Van Assche * queue, the hardware context index is set to zero.
643205fb5f5SBart Van Assche */
blk_mq_unique_tag(struct request * rq)644205fb5f5SBart Van Assche u32 blk_mq_unique_tag(struct request *rq)
645205fb5f5SBart Van Assche {
646ea4f995eSJens Axboe return (rq->mq_hctx->queue_num << BLK_MQ_UNIQUE_TAG_BITS) |
647205fb5f5SBart Van Assche (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
648205fb5f5SBart Van Assche }
649205fb5f5SBart Van Assche EXPORT_SYMBOL(blk_mq_unique_tag);
650