xref: /linux/block/blk-zoned.c (revision 0fc8f6200d2313278fbf4539bbab74677c685531)
13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0
26a0cb1bcSHannes Reinecke /*
36a0cb1bcSHannes Reinecke  * Zoned block device handling
46a0cb1bcSHannes Reinecke  *
56a0cb1bcSHannes Reinecke  * Copyright (c) 2015, Hannes Reinecke
66a0cb1bcSHannes Reinecke  * Copyright (c) 2015, SUSE Linux GmbH
76a0cb1bcSHannes Reinecke  *
86a0cb1bcSHannes Reinecke  * Copyright (c) 2016, Damien Le Moal
96a0cb1bcSHannes Reinecke  * Copyright (c) 2016, Western Digital
10dd291d77SDamien Le Moal  * Copyright (c) 2024, Western Digital Corporation or its affiliates.
116a0cb1bcSHannes Reinecke  */
126a0cb1bcSHannes Reinecke 
136a0cb1bcSHannes Reinecke #include <linux/kernel.h>
146a0cb1bcSHannes Reinecke #include <linux/blkdev.h>
15bf505456SDamien Le Moal #include <linux/blk-mq.h>
16dd291d77SDamien Le Moal #include <linux/spinlock.h>
174122fef1SDamien Le Moal #include <linux/refcount.h>
18dd291d77SDamien Le Moal #include <linux/mempool.h>
191365b690SDamien Le Moal #include <linux/kthread.h>
201365b690SDamien Le Moal #include <linux/freezer.h>
216a0cb1bcSHannes Reinecke 
224cc21a00SJohannes Thumshirn #include <trace/events/block.h>
234cc21a00SJohannes Thumshirn 
24a2d6b3a2SDamien Le Moal #include "blk.h"
25dd291d77SDamien Le Moal #include "blk-mq-sched.h"
26d9f1439aSDamien Le Moal #include "blk-mq-debugfs.h"
27a2d6b3a2SDamien Le Moal 
2802694e86SChaitanya Kulkarni #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
2902694e86SChaitanya Kulkarni static const char *const zone_cond_name[] = {
3002694e86SChaitanya Kulkarni 	ZONE_COND_NAME(NOT_WP),
3102694e86SChaitanya Kulkarni 	ZONE_COND_NAME(EMPTY),
3202694e86SChaitanya Kulkarni 	ZONE_COND_NAME(IMP_OPEN),
3302694e86SChaitanya Kulkarni 	ZONE_COND_NAME(EXP_OPEN),
3402694e86SChaitanya Kulkarni 	ZONE_COND_NAME(CLOSED),
3502694e86SChaitanya Kulkarni 	ZONE_COND_NAME(READONLY),
3602694e86SChaitanya Kulkarni 	ZONE_COND_NAME(FULL),
3702694e86SChaitanya Kulkarni 	ZONE_COND_NAME(OFFLINE),
380bf0e2e4SDamien Le Moal 	ZONE_COND_NAME(ACTIVE),
3902694e86SChaitanya Kulkarni };
4002694e86SChaitanya Kulkarni #undef ZONE_COND_NAME
4102694e86SChaitanya Kulkarni 
42dd291d77SDamien Le Moal /*
43dd291d77SDamien Le Moal  * Per-zone write plug.
44dd291d77SDamien Le Moal  * @node: hlist_node structure for managing the plug using a hash table.
451365b690SDamien Le Moal  * @entry: list_head structure for listing the plug in the disk list of active
461365b690SDamien Le Moal  *         zone write plugs.
47ca1a897fSDamien Le Moal  * @bio_list: The list of BIOs that are currently plugged.
48ca1a897fSDamien Le Moal  * @bio_work: Work struct to handle issuing of plugged BIOs
49ca1a897fSDamien Le Moal  * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
50ca1a897fSDamien Le Moal  * @disk: The gendisk the plug belongs to.
51ca1a897fSDamien Le Moal  * @lock: Spinlock to atomically manipulate the plug.
52dd291d77SDamien Le Moal  * @ref: Zone write plug reference counter. A zone write plug reference is
53dd291d77SDamien Le Moal  *       always at least 1 when the plug is hashed in the disk plug hash table.
54dd291d77SDamien Le Moal  *       The reference is incremented whenever a new BIO needing plugging is
55dd291d77SDamien Le Moal  *       submitted and when a function needs to manipulate a plug. The
56dd291d77SDamien Le Moal  *       reference count is decremented whenever a plugged BIO completes and
57dd291d77SDamien Le Moal  *       when a function that referenced the plug returns. The initial
58dd291d77SDamien Le Moal  *       reference is dropped whenever the zone of the zone write plug is reset,
59dd291d77SDamien Le Moal  *       finished and when the zone becomes full (last write BIO to the zone
60dd291d77SDamien Le Moal  *       completes).
61dd291d77SDamien Le Moal  * @flags: Flags indicating the plug state.
62dd291d77SDamien Le Moal  * @zone_no: The number of the zone the plug is managing.
63dd291d77SDamien Le Moal  * @wp_offset: The zone write pointer location relative to the start of the zone
64dd291d77SDamien Le Moal  *             as a number of 512B sectors.
650bf0e2e4SDamien Le Moal  * @cond: Condition of the zone
66dd291d77SDamien Le Moal  */
67dd291d77SDamien Le Moal struct blk_zone_wplug {
68dd291d77SDamien Le Moal 	struct hlist_node	node;
691365b690SDamien Le Moal 	struct list_head	entry;
70dd291d77SDamien Le Moal 	struct bio_list		bio_list;
71dd291d77SDamien Le Moal 	struct work_struct	bio_work;
72dd291d77SDamien Le Moal 	struct rcu_head		rcu_head;
73dd291d77SDamien Le Moal 	struct gendisk		*disk;
74ca1a897fSDamien Le Moal 	spinlock_t		lock;
75ca1a897fSDamien Le Moal 	refcount_t		ref;
76ca1a897fSDamien Le Moal 	unsigned int		flags;
77ca1a897fSDamien Le Moal 	unsigned int		zone_no;
78ca1a897fSDamien Le Moal 	unsigned int		wp_offset;
790bf0e2e4SDamien Le Moal 	enum blk_zone_cond	cond;
80dd291d77SDamien Le Moal };
81dd291d77SDamien Le Moal 
disk_need_zone_resources(struct gendisk * disk)8231f0656aSDamien Le Moal static inline bool disk_need_zone_resources(struct gendisk *disk)
8331f0656aSDamien Le Moal {
8431f0656aSDamien Le Moal 	/*
8531f0656aSDamien Le Moal 	 * All request-based zoned devices need zone resources so that the
8631f0656aSDamien Le Moal 	 * block layer can automatically handle write BIO plugging. BIO-based
8731f0656aSDamien Le Moal 	 * device drivers (e.g. DM devices) are normally responsible for
8831f0656aSDamien Le Moal 	 * handling zone write ordering and do not need zone resources, unless
8931f0656aSDamien Le Moal 	 * the driver requires zone append emulation.
9031f0656aSDamien Le Moal 	 */
9131f0656aSDamien Le Moal 	return queue_is_mq(disk->queue) ||
9231f0656aSDamien Le Moal 		queue_emulates_zone_append(disk->queue);
9331f0656aSDamien Le Moal }
9431f0656aSDamien Le Moal 
disk_zone_wplugs_hash_size(struct gendisk * disk)95efae226cSDamien Le Moal static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
96efae226cSDamien Le Moal {
97efae226cSDamien Le Moal 	return 1U << disk->zone_wplugs_hash_bits;
98efae226cSDamien Le Moal }
99efae226cSDamien Le Moal 
100dd291d77SDamien Le Moal /*
101dd291d77SDamien Le Moal  * Zone write plug flags bits:
102dd291d77SDamien Le Moal  *  - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
103dd291d77SDamien Le Moal  *    that is, that write BIOs are being throttled due to a write BIO already
104dd291d77SDamien Le Moal  *    being executed or the zone write plug bio list is not empty.
105fe0418ebSDamien Le Moal  *  - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
106fe0418ebSDamien Le Moal  *    write pointer offset and need to update it.
107b7d4ffb5SDamien Le Moal  *  - BLK_ZONE_WPLUG_DEAD: Indicates that the zone write plug will be
108b7d4ffb5SDamien Le Moal  *    removed from the disk hash table of zone write plugs when the last
109b7d4ffb5SDamien Le Moal  *    reference on the zone write plug is dropped. If set, this flag also
110b7d4ffb5SDamien Le Moal  *    indicates that the initial extra reference on the zone write plug was
111b7d4ffb5SDamien Le Moal  *    dropped, meaning that the reference count indicates the current number of
112b7d4ffb5SDamien Le Moal  *    active users (code context or BIOs and requests in flight). This flag is
113b7d4ffb5SDamien Le Moal  *    set when a zone is reset, finished or becomes full.
114dd291d77SDamien Le Moal  */
115dd291d77SDamien Le Moal #define BLK_ZONE_WPLUG_PLUGGED		(1U << 0)
116fe0418ebSDamien Le Moal #define BLK_ZONE_WPLUG_NEED_WP_UPDATE	(1U << 1)
117b7d4ffb5SDamien Le Moal #define BLK_ZONE_WPLUG_DEAD		(1U << 2)
118dd291d77SDamien Le Moal 
11902694e86SChaitanya Kulkarni /**
12041ee77b7SDamien Le Moal  * blk_zone_cond_str - Return a zone condition name string
12141ee77b7SDamien Le Moal  * @zone_cond: a zone condition BLK_ZONE_COND_name
12202694e86SChaitanya Kulkarni  *
12341ee77b7SDamien Le Moal  * Convert a BLK_ZONE_COND_name zone condition into the string "name". Useful
12441ee77b7SDamien Le Moal  * for the debugging and tracing zone conditions. For an invalid zone
12541ee77b7SDamien Le Moal  * conditions, the string "UNKNOWN" is returned.
12602694e86SChaitanya Kulkarni  */
blk_zone_cond_str(enum blk_zone_cond zone_cond)12702694e86SChaitanya Kulkarni const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
12802694e86SChaitanya Kulkarni {
12902694e86SChaitanya Kulkarni 	static const char *zone_cond_str = "UNKNOWN";
13002694e86SChaitanya Kulkarni 
13102694e86SChaitanya Kulkarni 	if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
13202694e86SChaitanya Kulkarni 		zone_cond_str = zone_cond_name[zone_cond];
13302694e86SChaitanya Kulkarni 
13402694e86SChaitanya Kulkarni 	return zone_cond_str;
13502694e86SChaitanya Kulkarni }
13602694e86SChaitanya Kulkarni EXPORT_SYMBOL_GPL(blk_zone_cond_str);
13702694e86SChaitanya Kulkarni 
blk_zone_set_cond(u8 * zones_cond,unsigned int zno,enum blk_zone_cond cond)1380bf0e2e4SDamien Le Moal static void blk_zone_set_cond(u8 *zones_cond, unsigned int zno,
1390bf0e2e4SDamien Le Moal 			      enum blk_zone_cond cond)
1400bf0e2e4SDamien Le Moal {
1410bf0e2e4SDamien Le Moal 	if (!zones_cond)
1420bf0e2e4SDamien Le Moal 		return;
1430bf0e2e4SDamien Le Moal 
1440bf0e2e4SDamien Le Moal 	switch (cond) {
1450bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_IMP_OPEN:
1460bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_EXP_OPEN:
1470bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_CLOSED:
1480bf0e2e4SDamien Le Moal 		zones_cond[zno] = BLK_ZONE_COND_ACTIVE;
1490bf0e2e4SDamien Le Moal 		return;
1500bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_NOT_WP:
1510bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_EMPTY:
1520bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_FULL:
1530bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_OFFLINE:
1540bf0e2e4SDamien Le Moal 	case BLK_ZONE_COND_READONLY:
1550bf0e2e4SDamien Le Moal 	default:
1560bf0e2e4SDamien Le Moal 		zones_cond[zno] = cond;
1570bf0e2e4SDamien Le Moal 		return;
1580bf0e2e4SDamien Le Moal 	}
1590bf0e2e4SDamien Le Moal }
1600bf0e2e4SDamien Le Moal 
disk_zone_set_cond(struct gendisk * disk,sector_t sector,enum blk_zone_cond cond)1610bf0e2e4SDamien Le Moal static void disk_zone_set_cond(struct gendisk *disk, sector_t sector,
1620bf0e2e4SDamien Le Moal 			       enum blk_zone_cond cond)
1630bf0e2e4SDamien Le Moal {
1640bf0e2e4SDamien Le Moal 	u8 *zones_cond;
1650bf0e2e4SDamien Le Moal 
1660bf0e2e4SDamien Le Moal 	rcu_read_lock();
1670bf0e2e4SDamien Le Moal 	zones_cond = rcu_dereference(disk->zones_cond);
1680bf0e2e4SDamien Le Moal 	if (zones_cond) {
1690bf0e2e4SDamien Le Moal 		unsigned int zno = disk_zone_no(disk, sector);
1700bf0e2e4SDamien Le Moal 
1710bf0e2e4SDamien Le Moal 		/*
1720bf0e2e4SDamien Le Moal 		 * The condition of a conventional, readonly and offline zones
1730bf0e2e4SDamien Le Moal 		 * never changes, so do nothing if the target zone is in one of
1740bf0e2e4SDamien Le Moal 		 * these conditions.
1750bf0e2e4SDamien Le Moal 		 */
1760bf0e2e4SDamien Le Moal 		switch (zones_cond[zno]) {
1770bf0e2e4SDamien Le Moal 		case BLK_ZONE_COND_NOT_WP:
1780bf0e2e4SDamien Le Moal 		case BLK_ZONE_COND_READONLY:
1790bf0e2e4SDamien Le Moal 		case BLK_ZONE_COND_OFFLINE:
1800bf0e2e4SDamien Le Moal 			break;
1810bf0e2e4SDamien Le Moal 		default:
1820bf0e2e4SDamien Le Moal 			blk_zone_set_cond(zones_cond, zno, cond);
1830bf0e2e4SDamien Le Moal 			break;
1840bf0e2e4SDamien Le Moal 		}
1850bf0e2e4SDamien Le Moal 	}
1860bf0e2e4SDamien Le Moal 	rcu_read_unlock();
1870bf0e2e4SDamien Le Moal }
1880bf0e2e4SDamien Le Moal 
1896e945ffbSDamien Le Moal /**
1906e945ffbSDamien Le Moal  * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
1916e945ffbSDamien Le Moal  * @bdev:       block device to check
1926e945ffbSDamien Le Moal  * @sector:     sector number
1936e945ffbSDamien Le Moal  *
1946e945ffbSDamien Le Moal  * Check if @sector on @bdev is contained in a sequential write required zone.
1956e945ffbSDamien Le Moal  */
bdev_zone_is_seq(struct block_device * bdev,sector_t sector)1966e945ffbSDamien Le Moal bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
1976e945ffbSDamien Le Moal {
1986e945ffbSDamien Le Moal 	struct gendisk *disk = bdev->bd_disk;
1996e945ffbSDamien Le Moal 	unsigned int zno = disk_zone_no(disk, sector);
2006e945ffbSDamien Le Moal 	bool is_seq = false;
2016e945ffbSDamien Le Moal 	u8 *zones_cond;
2026e945ffbSDamien Le Moal 
2036e945ffbSDamien Le Moal 	if (!bdev_is_zoned(bdev))
2046e945ffbSDamien Le Moal 		return false;
2056e945ffbSDamien Le Moal 
2066e945ffbSDamien Le Moal 	rcu_read_lock();
2076e945ffbSDamien Le Moal 	zones_cond = rcu_dereference(disk->zones_cond);
2086e945ffbSDamien Le Moal 	if (zones_cond && zno < disk->nr_zones)
2096e945ffbSDamien Le Moal 		is_seq = zones_cond[zno] != BLK_ZONE_COND_NOT_WP;
2106e945ffbSDamien Le Moal 	rcu_read_unlock();
2116e945ffbSDamien Le Moal 
2126e945ffbSDamien Le Moal 	return is_seq;
2136e945ffbSDamien Le Moal }
2146e945ffbSDamien Le Moal EXPORT_SYMBOL_GPL(bdev_zone_is_seq);
2156e945ffbSDamien Le Moal 
216fdb9aed8SDamien Le Moal /*
217fdb9aed8SDamien Le Moal  * Zone report arguments for block device drivers report_zones operation.
218fdb9aed8SDamien Le Moal  * @cb: report_zones_cb callback for each reported zone.
219fdb9aed8SDamien Le Moal  * @data: Private data passed to report_zones_cb.
220fdb9aed8SDamien Le Moal  */
221fdb9aed8SDamien Le Moal struct blk_report_zones_args {
222fdb9aed8SDamien Le Moal 	report_zones_cb cb;
223fdb9aed8SDamien Le Moal 	void		*data;
224f2284eecSDamien Le Moal 	bool		report_active;
225b76b840fSDamien Le Moal };
226b76b840fSDamien Le Moal 
blkdev_do_report_zones(struct block_device * bdev,sector_t sector,unsigned int nr_zones,struct blk_report_zones_args * args)2271af3f4e0SDamien Le Moal static int blkdev_do_report_zones(struct block_device *bdev, sector_t sector,
2281af3f4e0SDamien Le Moal 				  unsigned int nr_zones,
2291af3f4e0SDamien Le Moal 				  struct blk_report_zones_args *args)
2301af3f4e0SDamien Le Moal {
2311af3f4e0SDamien Le Moal 	struct gendisk *disk = bdev->bd_disk;
2321af3f4e0SDamien Le Moal 
2331af3f4e0SDamien Le Moal 	if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones))
2341af3f4e0SDamien Le Moal 		return -EOPNOTSUPP;
2351af3f4e0SDamien Le Moal 
2361af3f4e0SDamien Le Moal 	if (!nr_zones || sector >= get_capacity(disk))
2371af3f4e0SDamien Le Moal 		return 0;
2381af3f4e0SDamien Le Moal 
2391af3f4e0SDamien Le Moal 	return disk->fops->report_zones(disk, sector, nr_zones, args);
2401af3f4e0SDamien Le Moal }
2411af3f4e0SDamien Le Moal 
242a91e1380SDamien Le Moal /**
2436a0cb1bcSHannes Reinecke  * blkdev_report_zones - Get zones information
2446a0cb1bcSHannes Reinecke  * @bdev:	Target block device
2456a0cb1bcSHannes Reinecke  * @sector:	Sector from which to report zones
246d4100351SChristoph Hellwig  * @nr_zones:	Maximum number of zones to report
247d4100351SChristoph Hellwig  * @cb:		Callback function called for each reported zone
248d4100351SChristoph Hellwig  * @data:	Private data for the callback
2496a0cb1bcSHannes Reinecke  *
2506a0cb1bcSHannes Reinecke  * Description:
251d4100351SChristoph Hellwig  *    Get zone information starting from the zone containing @sector for at most
252d4100351SChristoph Hellwig  *    @nr_zones, and call @cb for each zone reported by the device.
253d4100351SChristoph Hellwig  *    To report all zones in a device starting from @sector, the BLK_ALL_ZONES
254d4100351SChristoph Hellwig  *    constant can be passed to @nr_zones.
255d4100351SChristoph Hellwig  *    Returns the number of zones reported by the device, or a negative errno
256d4100351SChristoph Hellwig  *    value in case of failure.
257d4100351SChristoph Hellwig  *
258d4100351SChristoph Hellwig  *    Note: The caller must use memalloc_noXX_save/restore() calls to control
259d4100351SChristoph Hellwig  *    memory allocations done within this function.
2606a0cb1bcSHannes Reinecke  */
blkdev_report_zones(struct block_device * bdev,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)261e76239a3SChristoph Hellwig int blkdev_report_zones(struct block_device *bdev, sector_t sector,
262d4100351SChristoph Hellwig 			unsigned int nr_zones, report_zones_cb cb, void *data)
2636a0cb1bcSHannes Reinecke {
264fdb9aed8SDamien Le Moal 	struct blk_report_zones_args args = {
265fdb9aed8SDamien Le Moal 		.cb = cb,
266fdb9aed8SDamien Le Moal 		.data = data,
267fe0418ebSDamien Le Moal 	};
2686a0cb1bcSHannes Reinecke 
2691af3f4e0SDamien Le Moal 	return blkdev_do_report_zones(bdev, sector, nr_zones, &args);
2706a0cb1bcSHannes Reinecke }
2716a0cb1bcSHannes Reinecke EXPORT_SYMBOL_GPL(blkdev_report_zones);
2726a0cb1bcSHannes Reinecke 
blkdev_zone_reset_all(struct block_device * bdev)27371f4ecdbSJohannes Thumshirn static int blkdev_zone_reset_all(struct block_device *bdev)
2741ee533ecSDamien Le Moal {
2751ee533ecSDamien Le Moal 	struct bio bio;
2761ee533ecSDamien Le Moal 
27749add496SChristoph Hellwig 	bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC);
2784020d22fSJohannes Thumshirn 	trace_blkdev_zone_mgmt(&bio, 0);
2791ee533ecSDamien Le Moal 	return submit_bio_wait(&bio);
2806e33dbf2SChaitanya Kulkarni }
2816e33dbf2SChaitanya Kulkarni 
2826a0cb1bcSHannes Reinecke /**
2836c1b1da5SAjay Joshi  * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
2846a0cb1bcSHannes Reinecke  * @bdev:	Target block device
2856c1b1da5SAjay Joshi  * @op:		Operation to be performed on the zones
2866c1b1da5SAjay Joshi  * @sector:	Start sector of the first zone to operate on
2876c1b1da5SAjay Joshi  * @nr_sectors:	Number of sectors, should be at least the length of one zone and
2886c1b1da5SAjay Joshi  *		must be zone size aligned.
2896a0cb1bcSHannes Reinecke  *
2906a0cb1bcSHannes Reinecke  * Description:
2916c1b1da5SAjay Joshi  *    Perform the specified operation on the range of zones specified by
2926a0cb1bcSHannes Reinecke  *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
2936a0cb1bcSHannes Reinecke  *    is valid, but the specified range should not contain conventional zones.
2946c1b1da5SAjay Joshi  *    The operation to execute on each zone can be a zone reset, open, close
2956c1b1da5SAjay Joshi  *    or finish request.
2966a0cb1bcSHannes Reinecke  */
blkdev_zone_mgmt(struct block_device * bdev,enum req_op op,sector_t sector,sector_t nr_sectors)297ff07a02eSBart Van Assche int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
29871f4ecdbSJohannes Thumshirn 		     sector_t sector, sector_t nr_sectors)
2996a0cb1bcSHannes Reinecke {
300375c140cSChristoph Hellwig 	sector_t zone_sectors = bdev_zone_sectors(bdev);
301375c140cSChristoph Hellwig 	sector_t capacity = bdev_nr_sectors(bdev);
3026a0cb1bcSHannes Reinecke 	sector_t end_sector = sector + nr_sectors;
303a2d6b3a2SDamien Le Moal 	struct bio *bio = NULL;
3041ee533ecSDamien Le Moal 	int ret = 0;
3056a0cb1bcSHannes Reinecke 
306edd1dbc8SChristoph Hellwig 	if (!bdev_is_zoned(bdev))
3076a0cb1bcSHannes Reinecke 		return -EOPNOTSUPP;
3086a0cb1bcSHannes Reinecke 
309a2d6b3a2SDamien Le Moal 	if (bdev_read_only(bdev))
310a2d6b3a2SDamien Le Moal 		return -EPERM;
311a2d6b3a2SDamien Le Moal 
3126c1b1da5SAjay Joshi 	if (!op_is_zone_mgmt(op))
3136c1b1da5SAjay Joshi 		return -EOPNOTSUPP;
3146c1b1da5SAjay Joshi 
31511bde986SAlexey Dobriyan 	if (end_sector <= sector || end_sector > capacity)
3166a0cb1bcSHannes Reinecke 		/* Out of range */
3176a0cb1bcSHannes Reinecke 		return -EINVAL;
3186a0cb1bcSHannes Reinecke 
3196a0cb1bcSHannes Reinecke 	/* Check alignment (handle eventual smaller last zone) */
320e29b2100SPankaj Raghav 	if (!bdev_is_zone_start(bdev, sector))
3216a0cb1bcSHannes Reinecke 		return -EINVAL;
3226a0cb1bcSHannes Reinecke 
323e29b2100SPankaj Raghav 	if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity)
3246a0cb1bcSHannes Reinecke 		return -EINVAL;
3256a0cb1bcSHannes Reinecke 
3261ee533ecSDamien Le Moal 	/*
327f2a7bea2SDamien Le Moal 	 * In the case of a zone reset operation over all zones, use
328f2a7bea2SDamien Le Moal 	 * REQ_OP_ZONE_RESET_ALL.
3291ee533ecSDamien Le Moal 	 */
330f2a7bea2SDamien Le Moal 	if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity)
33171f4ecdbSJohannes Thumshirn 		return blkdev_zone_reset_all(bdev);
3321ee533ecSDamien Le Moal 
3336a0cb1bcSHannes Reinecke 	while (sector < end_sector) {
33471f4ecdbSJohannes Thumshirn 		bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
335c7a1d926SDamien Le Moal 		bio->bi_iter.bi_sector = sector;
3366a0cb1bcSHannes Reinecke 		sector += zone_sectors;
3376a0cb1bcSHannes Reinecke 
3386a0cb1bcSHannes Reinecke 		/* This may take a while, so be nice to others */
3396a0cb1bcSHannes Reinecke 		cond_resched();
3406a0cb1bcSHannes Reinecke 	}
3416a0cb1bcSHannes Reinecke 
3424020d22fSJohannes Thumshirn 	trace_blkdev_zone_mgmt(bio, nr_sectors);
343a2d6b3a2SDamien Le Moal 	ret = submit_bio_wait(bio);
344a2d6b3a2SDamien Le Moal 	bio_put(bio);
345a2d6b3a2SDamien Le Moal 
346a2d6b3a2SDamien Le Moal 	return ret;
3476a0cb1bcSHannes Reinecke }
3486c1b1da5SAjay Joshi EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
3493ed05a98SShaun Tancheff 
350d4100351SChristoph Hellwig struct zone_report_args {
351d4100351SChristoph Hellwig 	struct blk_zone __user *zones;
352d4100351SChristoph Hellwig };
353d4100351SChristoph Hellwig 
blkdev_copy_zone_to_user(struct blk_zone * zone,unsigned int idx,void * data)354d4100351SChristoph Hellwig static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
355d4100351SChristoph Hellwig 				    void *data)
356d4100351SChristoph Hellwig {
357d4100351SChristoph Hellwig 	struct zone_report_args *args = data;
358d4100351SChristoph Hellwig 
359d4100351SChristoph Hellwig 	if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
360d4100351SChristoph Hellwig 		return -EFAULT;
361d4100351SChristoph Hellwig 	return 0;
362d4100351SChristoph Hellwig }
363d4100351SChristoph Hellwig 
36456c4bddbSBart Van Assche /*
365b30ffcdcSDamien Le Moal  * Mask of valid input flags for BLKREPORTZONEV2 ioctl.
366b30ffcdcSDamien Le Moal  */
367b30ffcdcSDamien Le Moal #define BLK_ZONE_REPV2_INPUT_FLAGS	BLK_ZONE_REP_CACHED
368b30ffcdcSDamien Le Moal 
369b30ffcdcSDamien Le Moal /*
370b30ffcdcSDamien Le Moal  * BLKREPORTZONE and BLKREPORTZONEV2 ioctl processing.
3713ed05a98SShaun Tancheff  * Called from blkdev_ioctl.
3723ed05a98SShaun Tancheff  */
blkdev_report_zones_ioctl(struct block_device * bdev,unsigned int cmd,unsigned long arg)3735e4ea834SChristoph Hellwig int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
3745e4ea834SChristoph Hellwig 		unsigned long arg)
3753ed05a98SShaun Tancheff {
3763ed05a98SShaun Tancheff 	void __user *argp = (void __user *)arg;
377d4100351SChristoph Hellwig 	struct zone_report_args args;
3783ed05a98SShaun Tancheff 	struct blk_zone_report rep;
3793ed05a98SShaun Tancheff 	int ret;
3803ed05a98SShaun Tancheff 
3813ed05a98SShaun Tancheff 	if (!argp)
3823ed05a98SShaun Tancheff 		return -EINVAL;
3833ed05a98SShaun Tancheff 
384edd1dbc8SChristoph Hellwig 	if (!bdev_is_zoned(bdev))
3853ed05a98SShaun Tancheff 		return -ENOTTY;
3863ed05a98SShaun Tancheff 
3873ed05a98SShaun Tancheff 	if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
3883ed05a98SShaun Tancheff 		return -EFAULT;
3893ed05a98SShaun Tancheff 
3903ed05a98SShaun Tancheff 	if (!rep.nr_zones)
3913ed05a98SShaun Tancheff 		return -EINVAL;
3923ed05a98SShaun Tancheff 
393d4100351SChristoph Hellwig 	args.zones = argp + sizeof(struct blk_zone_report);
394b30ffcdcSDamien Le Moal 
395b30ffcdcSDamien Le Moal 	switch (cmd) {
396b30ffcdcSDamien Le Moal 	case BLKREPORTZONE:
397d4100351SChristoph Hellwig 		ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
398d4100351SChristoph Hellwig 					  blkdev_copy_zone_to_user, &args);
399b30ffcdcSDamien Le Moal 		break;
400b30ffcdcSDamien Le Moal 	case BLKREPORTZONEV2:
401b30ffcdcSDamien Le Moal 		if (rep.flags & ~BLK_ZONE_REPV2_INPUT_FLAGS)
402b30ffcdcSDamien Le Moal 			return -EINVAL;
403b30ffcdcSDamien Le Moal 		ret = blkdev_report_zones_cached(bdev, rep.sector, rep.nr_zones,
404b30ffcdcSDamien Le Moal 					 blkdev_copy_zone_to_user, &args);
405b30ffcdcSDamien Le Moal 		break;
406b30ffcdcSDamien Le Moal 	default:
407b30ffcdcSDamien Le Moal 		return -EINVAL;
408b30ffcdcSDamien Le Moal 	}
409b30ffcdcSDamien Le Moal 
410d4100351SChristoph Hellwig 	if (ret < 0)
4113ed05a98SShaun Tancheff 		return ret;
412d4100351SChristoph Hellwig 
413d4100351SChristoph Hellwig 	rep.nr_zones = ret;
41482394db7SMatias Bjørling 	rep.flags = BLK_ZONE_REP_CAPACITY;
415d4100351SChristoph Hellwig 	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
416d4100351SChristoph Hellwig 		return -EFAULT;
417d4100351SChristoph Hellwig 	return 0;
4183ed05a98SShaun Tancheff }
4193ed05a98SShaun Tancheff 
blkdev_reset_zone(struct block_device * bdev,blk_mode_t mode,struct blk_zone_range * zrange)420*539fb773SChristoph Hellwig static int blkdev_reset_zone(struct block_device *bdev, blk_mode_t mode,
421*539fb773SChristoph Hellwig 			     struct blk_zone_range *zrange)
422e5113505SShin'ichiro Kawasaki {
423e5113505SShin'ichiro Kawasaki 	loff_t start, end;
424*539fb773SChristoph Hellwig 	int ret = -EINVAL;
425e5113505SShin'ichiro Kawasaki 
426*539fb773SChristoph Hellwig 	inode_lock(bdev->bd_mapping->host);
427*539fb773SChristoph Hellwig 	filemap_invalidate_lock(bdev->bd_mapping);
428e5113505SShin'ichiro Kawasaki 	if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
429e5113505SShin'ichiro Kawasaki 	    zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
430e5113505SShin'ichiro Kawasaki 		/* Out of range */
431*539fb773SChristoph Hellwig 		goto out_unlock;
432e5113505SShin'ichiro Kawasaki 
433e5113505SShin'ichiro Kawasaki 	start = zrange->sector << SECTOR_SHIFT;
434e5113505SShin'ichiro Kawasaki 	end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
435e5113505SShin'ichiro Kawasaki 
436*539fb773SChristoph Hellwig 	ret = truncate_bdev_range(bdev, mode, start, end);
437*539fb773SChristoph Hellwig 	if (ret)
438*539fb773SChristoph Hellwig 		goto out_unlock;
439*539fb773SChristoph Hellwig 
440*539fb773SChristoph Hellwig 	ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, zrange->sector,
441*539fb773SChristoph Hellwig 			       zrange->nr_sectors);
442*539fb773SChristoph Hellwig out_unlock:
443*539fb773SChristoph Hellwig 	filemap_invalidate_unlock(bdev->bd_mapping);
444*539fb773SChristoph Hellwig 	inode_unlock(bdev->bd_mapping->host);
445*539fb773SChristoph Hellwig 	return ret;
446e5113505SShin'ichiro Kawasaki }
447e5113505SShin'ichiro Kawasaki 
44856c4bddbSBart Van Assche /*
449e876df1fSAjay Joshi  * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
4503ed05a98SShaun Tancheff  * Called from blkdev_ioctl.
4513ed05a98SShaun Tancheff  */
blkdev_zone_mgmt_ioctl(struct block_device * bdev,blk_mode_t mode,unsigned int cmd,unsigned long arg)45205bdb996SChristoph Hellwig int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
4533ed05a98SShaun Tancheff 			   unsigned int cmd, unsigned long arg)
4543ed05a98SShaun Tancheff {
4553ed05a98SShaun Tancheff 	void __user *argp = (void __user *)arg;
4563ed05a98SShaun Tancheff 	struct blk_zone_range zrange;
457ff07a02eSBart Van Assche 	enum req_op op;
4583ed05a98SShaun Tancheff 
4593ed05a98SShaun Tancheff 	if (!argp)
4603ed05a98SShaun Tancheff 		return -EINVAL;
4613ed05a98SShaun Tancheff 
462edd1dbc8SChristoph Hellwig 	if (!bdev_is_zoned(bdev))
4633ed05a98SShaun Tancheff 		return -ENOTTY;
4643ed05a98SShaun Tancheff 
46505bdb996SChristoph Hellwig 	if (!(mode & BLK_OPEN_WRITE))
4663ed05a98SShaun Tancheff 		return -EBADF;
4673ed05a98SShaun Tancheff 
4683ed05a98SShaun Tancheff 	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
4693ed05a98SShaun Tancheff 		return -EFAULT;
4703ed05a98SShaun Tancheff 
471e876df1fSAjay Joshi 	switch (cmd) {
472e876df1fSAjay Joshi 	case BLKRESETZONE:
473*539fb773SChristoph Hellwig 		return blkdev_reset_zone(bdev, mode, &zrange);
474e876df1fSAjay Joshi 	case BLKOPENZONE:
475e876df1fSAjay Joshi 		op = REQ_OP_ZONE_OPEN;
476e876df1fSAjay Joshi 		break;
477e876df1fSAjay Joshi 	case BLKCLOSEZONE:
478e876df1fSAjay Joshi 		op = REQ_OP_ZONE_CLOSE;
479e876df1fSAjay Joshi 		break;
480e876df1fSAjay Joshi 	case BLKFINISHZONE:
481e876df1fSAjay Joshi 		op = REQ_OP_ZONE_FINISH;
482e876df1fSAjay Joshi 		break;
483e876df1fSAjay Joshi 	default:
484e876df1fSAjay Joshi 		return -ENOTTY;
485e876df1fSAjay Joshi 	}
486e876df1fSAjay Joshi 
487*539fb773SChristoph Hellwig 	return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
4883ed05a98SShaun Tancheff }
489bf505456SDamien Le Moal 
disk_zone_is_last(struct gendisk * disk,struct blk_zone * zone)490cd639993SDamien Le Moal static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
491cd639993SDamien Le Moal {
492cd639993SDamien Le Moal 	return zone->start + zone->len >= get_capacity(disk);
493cd639993SDamien Le Moal }
494cd639993SDamien Le Moal 
disk_zone_wplug_is_full(struct gendisk * disk,struct blk_zone_wplug * zwplug)49529459c3eSDamien Le Moal static bool disk_zone_wplug_is_full(struct gendisk *disk,
49629459c3eSDamien Le Moal 				    struct blk_zone_wplug *zwplug)
49729459c3eSDamien Le Moal {
498c30e8c4bSDamien Le Moal 	if (zwplug->zone_no < disk->nr_zones - 1)
499c30e8c4bSDamien Le Moal 		return zwplug->wp_offset >= disk->zone_capacity;
500c30e8c4bSDamien Le Moal 	return zwplug->wp_offset >= disk->last_zone_capacity;
50129459c3eSDamien Le Moal }
50229459c3eSDamien Le Moal 
disk_insert_zone_wplug(struct gendisk * disk,struct blk_zone_wplug * zwplug)503dd291d77SDamien Le Moal static bool disk_insert_zone_wplug(struct gendisk *disk,
504dd291d77SDamien Le Moal 				   struct blk_zone_wplug *zwplug)
505dd291d77SDamien Le Moal {
506dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplg;
507dd291d77SDamien Le Moal 	unsigned long flags;
5080bf0e2e4SDamien Le Moal 	u8 *zones_cond;
509dd291d77SDamien Le Moal 	unsigned int idx =
510dd291d77SDamien Le Moal 		hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits);
511dd291d77SDamien Le Moal 
512dd291d77SDamien Le Moal 	/*
513dd291d77SDamien Le Moal 	 * Add the new zone write plug to the hash table, but carefully as we
514dd291d77SDamien Le Moal 	 * are racing with other submission context, so we may already have a
515dd291d77SDamien Le Moal 	 * zone write plug for the same zone.
516dd291d77SDamien Le Moal 	 */
517b7cbc30eSDamien Le Moal 	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
518dd291d77SDamien Le Moal 	hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) {
519dd291d77SDamien Le Moal 		if (zwplg->zone_no == zwplug->zone_no) {
520b7cbc30eSDamien Le Moal 			spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock,
521b7cbc30eSDamien Le Moal 					       flags);
522dd291d77SDamien Le Moal 			return false;
523dd291d77SDamien Le Moal 		}
524dd291d77SDamien Le Moal 	}
5250bf0e2e4SDamien Le Moal 
5260bf0e2e4SDamien Le Moal 	/*
5270bf0e2e4SDamien Le Moal 	 * Set the zone condition: if we do not yet have a zones_cond array
5280bf0e2e4SDamien Le Moal 	 * attached to the disk, then this is a zone write plug insert from the
5290bf0e2e4SDamien Le Moal 	 * first call to blk_revalidate_disk_zones(), in which case the zone is
5300bf0e2e4SDamien Le Moal 	 * necessarilly in the active condition.
5310bf0e2e4SDamien Le Moal 	 */
5320bf0e2e4SDamien Le Moal 	zones_cond = rcu_dereference_check(disk->zones_cond,
533b7cbc30eSDamien Le Moal 				lockdep_is_held(&disk->zone_wplugs_hash_lock));
5340bf0e2e4SDamien Le Moal 	if (zones_cond)
5350bf0e2e4SDamien Le Moal 		zwplug->cond = zones_cond[zwplug->zone_no];
5360bf0e2e4SDamien Le Moal 	else
5370bf0e2e4SDamien Le Moal 		zwplug->cond = BLK_ZONE_COND_ACTIVE;
5380bf0e2e4SDamien Le Moal 
539dd291d77SDamien Le Moal 	hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
540a6aa36e9SDamien Le Moal 	atomic_inc(&disk->nr_zone_wplugs);
541b7cbc30eSDamien Le Moal 	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
542dd291d77SDamien Le Moal 
543dd291d77SDamien Le Moal 	return true;
544dd291d77SDamien Le Moal }
545dd291d77SDamien Le Moal 
disk_get_hashed_zone_wplug(struct gendisk * disk,sector_t sector)546a6aa36e9SDamien Le Moal static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk,
547dd291d77SDamien Le Moal 							 sector_t sector)
548dd291d77SDamien Le Moal {
549dd291d77SDamien Le Moal 	unsigned int zno = disk_zone_no(disk, sector);
550dd291d77SDamien Le Moal 	unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits);
551dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug;
552dd291d77SDamien Le Moal 
553dd291d77SDamien Le Moal 	rcu_read_lock();
554dd291d77SDamien Le Moal 
555dd291d77SDamien Le Moal 	hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) {
556dd291d77SDamien Le Moal 		if (zwplug->zone_no == zno &&
5574122fef1SDamien Le Moal 		    refcount_inc_not_zero(&zwplug->ref)) {
558dd291d77SDamien Le Moal 			rcu_read_unlock();
559dd291d77SDamien Le Moal 			return zwplug;
560dd291d77SDamien Le Moal 		}
561dd291d77SDamien Le Moal 	}
562dd291d77SDamien Le Moal 
563dd291d77SDamien Le Moal 	rcu_read_unlock();
564dd291d77SDamien Le Moal 
565dd291d77SDamien Le Moal 	return NULL;
566dd291d77SDamien Le Moal }
567dd291d77SDamien Le Moal 
disk_get_zone_wplug(struct gendisk * disk,sector_t sector)568a6aa36e9SDamien Le Moal static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
569a6aa36e9SDamien Le Moal 							 sector_t sector)
570a6aa36e9SDamien Le Moal {
571a6aa36e9SDamien Le Moal 	if (!atomic_read(&disk->nr_zone_wplugs))
572a6aa36e9SDamien Le Moal 		return NULL;
573a6aa36e9SDamien Le Moal 
574a6aa36e9SDamien Le Moal 	return disk_get_hashed_zone_wplug(disk, sector);
575a6aa36e9SDamien Le Moal }
576a6aa36e9SDamien Le Moal 
disk_free_zone_wplug_rcu(struct rcu_head * rcu_head)577dd291d77SDamien Le Moal static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
578dd291d77SDamien Le Moal {
579dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug =
580dd291d77SDamien Le Moal 		container_of(rcu_head, struct blk_zone_wplug, rcu_head);
581dd291d77SDamien Le Moal 
582dd291d77SDamien Le Moal 	mempool_free(zwplug, zwplug->disk->zone_wplugs_pool);
583dd291d77SDamien Le Moal }
584dd291d77SDamien Le Moal 
disk_free_zone_wplug(struct blk_zone_wplug * zwplug)585b7d4ffb5SDamien Le Moal static void disk_free_zone_wplug(struct blk_zone_wplug *zwplug)
586dd291d77SDamien Le Moal {
587b7d4ffb5SDamien Le Moal 	struct gendisk *disk = zwplug->disk;
58879ae35a4SDamien Le Moal 	unsigned long flags;
58979ae35a4SDamien Le Moal 
590b7d4ffb5SDamien Le Moal 	WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_DEAD));
591b7d4ffb5SDamien Le Moal 	WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED);
592b7d4ffb5SDamien Le Moal 	WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
59379ae35a4SDamien Le Moal 
594b7cbc30eSDamien Le Moal 	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
5950bf0e2e4SDamien Le Moal 	blk_zone_set_cond(rcu_dereference_check(disk->zones_cond,
596b7cbc30eSDamien Le Moal 				lockdep_is_held(&disk->zone_wplugs_hash_lock)),
5970bf0e2e4SDamien Le Moal 			  zwplug->zone_no, zwplug->cond);
59879ae35a4SDamien Le Moal 	hlist_del_init_rcu(&zwplug->node);
599a6aa36e9SDamien Le Moal 	atomic_dec(&disk->nr_zone_wplugs);
600b7cbc30eSDamien Le Moal 	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
601b7d4ffb5SDamien Le Moal 
602b7d4ffb5SDamien Le Moal 	call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu);
603b7d4ffb5SDamien Le Moal }
604b7d4ffb5SDamien Le Moal 
disk_put_zone_wplug(struct blk_zone_wplug * zwplug)605b7d4ffb5SDamien Le Moal static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
606b7d4ffb5SDamien Le Moal {
607b7d4ffb5SDamien Le Moal 	if (refcount_dec_and_test(&zwplug->ref))
608b7d4ffb5SDamien Le Moal 		disk_free_zone_wplug(zwplug);
609b7d4ffb5SDamien Le Moal }
610b7d4ffb5SDamien Le Moal 
611b7d4ffb5SDamien Le Moal /*
612b7d4ffb5SDamien Le Moal  * Flag the zone write plug as dead and drop the initial reference we got when
613b7d4ffb5SDamien Le Moal  * the zone write plug was added to the hash table. The zone write plug will be
614b7d4ffb5SDamien Le Moal  * unhashed when its last reference is dropped.
615b7d4ffb5SDamien Le Moal  */
disk_mark_zone_wplug_dead(struct blk_zone_wplug * zwplug)616b7d4ffb5SDamien Le Moal static void disk_mark_zone_wplug_dead(struct blk_zone_wplug *zwplug)
617b7d4ffb5SDamien Le Moal {
618b7d4ffb5SDamien Le Moal 	lockdep_assert_held(&zwplug->lock);
619b7d4ffb5SDamien Le Moal 
620b7d4ffb5SDamien Le Moal 	if (!(zwplug->flags & BLK_ZONE_WPLUG_DEAD)) {
621b7d4ffb5SDamien Le Moal 		zwplug->flags |= BLK_ZONE_WPLUG_DEAD;
62279ae35a4SDamien Le Moal 		disk_put_zone_wplug(zwplug);
62379ae35a4SDamien Le Moal 	}
624b7d4ffb5SDamien Le Moal }
62579ae35a4SDamien Le Moal 
6261365b690SDamien Le Moal static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
6271365b690SDamien Le Moal 				       struct blk_zone_wplug *zwplug);
6281365b690SDamien Le Moal 
blk_zone_wplug_bio_work(struct work_struct * work)6291365b690SDamien Le Moal static void blk_zone_wplug_bio_work(struct work_struct *work)
6301365b690SDamien Le Moal {
6311365b690SDamien Le Moal 	struct blk_zone_wplug *zwplug =
6321365b690SDamien Le Moal 		container_of(work, struct blk_zone_wplug, bio_work);
6331365b690SDamien Le Moal 
6341365b690SDamien Le Moal 	disk_zone_wplug_submit_bio(zwplug->disk, zwplug);
6351365b690SDamien Le Moal 
6361365b690SDamien Le Moal 	/* Drop the reference we took in disk_zone_wplug_schedule_work(). */
6371365b690SDamien Le Moal 	disk_put_zone_wplug(zwplug);
6381365b690SDamien Le Moal }
639dd291d77SDamien Le Moal 
640dd291d77SDamien Le Moal /*
6411084e41dSDamien Le Moal  * Get a zone write plug for the zone containing @sector.
6421084e41dSDamien Le Moal  * If the plug does not exist, it is allocated and inserted in the disk hash
6431084e41dSDamien Le Moal  * table.
644dd291d77SDamien Le Moal  */
disk_get_or_alloc_zone_wplug(struct gendisk * disk,sector_t sector,gfp_t gfp_mask)6451084e41dSDamien Le Moal static struct blk_zone_wplug *disk_get_or_alloc_zone_wplug(struct gendisk *disk,
6461084e41dSDamien Le Moal 					sector_t sector, gfp_t gfp_mask)
647dd291d77SDamien Le Moal {
648dd291d77SDamien Le Moal 	unsigned int zno = disk_zone_no(disk, sector);
649dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug;
650dd291d77SDamien Le Moal 
651dd291d77SDamien Le Moal again:
652dd291d77SDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, sector);
6531084e41dSDamien Le Moal 	if (zwplug)
654dd291d77SDamien Le Moal 		return zwplug;
655dd291d77SDamien Le Moal 
656dd291d77SDamien Le Moal 	/*
657dd291d77SDamien Le Moal 	 * Allocate and initialize a zone write plug with an extra reference
658dd291d77SDamien Le Moal 	 * so that it is not freed when the zone write plug becomes idle without
659dd291d77SDamien Le Moal 	 * the zone being full.
660dd291d77SDamien Le Moal 	 */
661dd291d77SDamien Le Moal 	zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask);
662dd291d77SDamien Le Moal 	if (!zwplug)
663dd291d77SDamien Le Moal 		return NULL;
664dd291d77SDamien Le Moal 
665dd291d77SDamien Le Moal 	INIT_HLIST_NODE(&zwplug->node);
6664122fef1SDamien Le Moal 	refcount_set(&zwplug->ref, 2);
667dd291d77SDamien Le Moal 	spin_lock_init(&zwplug->lock);
668dd291d77SDamien Le Moal 	zwplug->flags = 0;
669dd291d77SDamien Le Moal 	zwplug->zone_no = zno;
670790eb09eSLongPing Wei 	zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector);
671dd291d77SDamien Le Moal 	bio_list_init(&zwplug->bio_list);
672dd291d77SDamien Le Moal 	INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
6731365b690SDamien Le Moal 	INIT_LIST_HEAD(&zwplug->entry);
674dd291d77SDamien Le Moal 	zwplug->disk = disk;
675dd291d77SDamien Le Moal 
676dd291d77SDamien Le Moal 	/*
677dd291d77SDamien Le Moal 	 * Insert the new zone write plug in the hash table. This can fail only
678dd291d77SDamien Le Moal 	 * if another context already inserted a plug. Retry from the beginning
679dd291d77SDamien Le Moal 	 * in such case.
680dd291d77SDamien Le Moal 	 */
681dd291d77SDamien Le Moal 	if (!disk_insert_zone_wplug(disk, zwplug)) {
682dd291d77SDamien Le Moal 		mempool_free(zwplug, disk->zone_wplugs_pool);
683dd291d77SDamien Le Moal 		goto again;
684dd291d77SDamien Le Moal 	}
685dd291d77SDamien Le Moal 
686dd291d77SDamien Le Moal 	return zwplug;
687dd291d77SDamien Le Moal }
688dd291d77SDamien Le Moal 
blk_zone_wplug_bio_io_error(struct blk_zone_wplug * zwplug,struct bio * bio)689c9c8aea0SDamien Le Moal static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug,
690c9c8aea0SDamien Le Moal 					       struct bio *bio)
691dd291d77SDamien Le Moal {
692c9c8aea0SDamien Le Moal 	struct request_queue *q = zwplug->disk->queue;
693dd291d77SDamien Le Moal 
694dd291d77SDamien Le Moal 	bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING);
695dd291d77SDamien Le Moal 	bio_io_error(bio);
696c9c8aea0SDamien Le Moal 	disk_put_zone_wplug(zwplug);
697fd0ae475SBart Van Assche 	/* Drop the reference taken by disk_zone_wplug_add_bio(). */
698dd291d77SDamien Le Moal 	blk_queue_exit(q);
699dd291d77SDamien Le Moal }
700dd291d77SDamien Le Moal 
701dd291d77SDamien Le Moal /*
702dd291d77SDamien Le Moal  * Abort (fail) all plugged BIOs of a zone write plug.
703dd291d77SDamien Le Moal  */
disk_zone_wplug_abort(struct blk_zone_wplug * zwplug)704dd291d77SDamien Le Moal static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
705dd291d77SDamien Le Moal {
7061365b690SDamien Le Moal 	struct gendisk *disk = zwplug->disk;
707dd291d77SDamien Le Moal 	struct bio *bio;
708dd291d77SDamien Le Moal 
709552c1149SDamien Le Moal 	lockdep_assert_held(&zwplug->lock);
710552c1149SDamien Le Moal 
711a6aa36e9SDamien Le Moal 	if (bio_list_empty(&zwplug->bio_list))
712a6aa36e9SDamien Le Moal 		return;
713a6aa36e9SDamien Le Moal 
714a6aa36e9SDamien Le Moal 	pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n",
715a6aa36e9SDamien Le Moal 			    zwplug->disk->disk_name, zwplug->zone_no);
716c9c8aea0SDamien Le Moal 	while ((bio = bio_list_pop(&zwplug->bio_list)))
717c9c8aea0SDamien Le Moal 		blk_zone_wplug_bio_io_error(zwplug, bio);
718552c1149SDamien Le Moal 
719552c1149SDamien Le Moal 	zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
7201365b690SDamien Le Moal 
7211365b690SDamien Le Moal 	/*
7221365b690SDamien Le Moal 	 * If we are using the per disk zone write plugs worker thread, remove
7231365b690SDamien Le Moal 	 * the zone write plug from the work list and drop the reference we
7241365b690SDamien Le Moal 	 * took when the zone write plug was added to that list.
7251365b690SDamien Le Moal 	 */
7261365b690SDamien Le Moal 	if (blk_queue_zoned_qd1_writes(disk->queue)) {
7271365b690SDamien Le Moal 		spin_lock(&disk->zone_wplugs_list_lock);
7281365b690SDamien Le Moal 		if (!list_empty(&zwplug->entry)) {
7291365b690SDamien Le Moal 			list_del_init(&zwplug->entry);
7301365b690SDamien Le Moal 			disk_put_zone_wplug(zwplug);
7311365b690SDamien Le Moal 		}
7321365b690SDamien Le Moal 		spin_unlock(&disk->zone_wplugs_list_lock);
7331365b690SDamien Le Moal 	}
734dd291d77SDamien Le Moal }
735dd291d77SDamien Le Moal 
736dd291d77SDamien Le Moal /*
7370bf0e2e4SDamien Le Moal  * Update a zone write plug condition based on the write pointer offset.
7380bf0e2e4SDamien Le Moal  */
disk_zone_wplug_update_cond(struct gendisk * disk,struct blk_zone_wplug * zwplug)7390bf0e2e4SDamien Le Moal static void disk_zone_wplug_update_cond(struct gendisk *disk,
7400bf0e2e4SDamien Le Moal 					struct blk_zone_wplug *zwplug)
7410bf0e2e4SDamien Le Moal {
7420bf0e2e4SDamien Le Moal 	lockdep_assert_held(&zwplug->lock);
7430bf0e2e4SDamien Le Moal 
7440bf0e2e4SDamien Le Moal 	if (disk_zone_wplug_is_full(disk, zwplug))
7450bf0e2e4SDamien Le Moal 		zwplug->cond = BLK_ZONE_COND_FULL;
7460bf0e2e4SDamien Le Moal 	else if (!zwplug->wp_offset)
7470bf0e2e4SDamien Le Moal 		zwplug->cond = BLK_ZONE_COND_EMPTY;
7480bf0e2e4SDamien Le Moal 	else
7490bf0e2e4SDamien Le Moal 		zwplug->cond = BLK_ZONE_COND_ACTIVE;
7500bf0e2e4SDamien Le Moal }
7510bf0e2e4SDamien Le Moal 
7520bf0e2e4SDamien Le Moal /*
753fe0418ebSDamien Le Moal  * Set a zone write plug write pointer offset to the specified value.
754fe0418ebSDamien Le Moal  * This aborts all plugged BIOs, which is fine as this function is called for
755fe0418ebSDamien Le Moal  * a zone reset operation, a zone finish operation or if the zone needs a wp
756fe0418ebSDamien Le Moal  * update from a report zone after a write error.
757dd291d77SDamien Le Moal  */
disk_zone_wplug_set_wp_offset(struct gendisk * disk,struct blk_zone_wplug * zwplug,unsigned int wp_offset)758dd291d77SDamien Le Moal static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
759dd291d77SDamien Le Moal 					  struct blk_zone_wplug *zwplug,
760dd291d77SDamien Le Moal 					  unsigned int wp_offset)
761dd291d77SDamien Le Moal {
762fe0418ebSDamien Le Moal 	lockdep_assert_held(&zwplug->lock);
763dd291d77SDamien Le Moal 
764dd291d77SDamien Le Moal 	/* Update the zone write pointer and abort all plugged BIOs. */
765fe0418ebSDamien Le Moal 	zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE;
766dd291d77SDamien Le Moal 	zwplug->wp_offset = wp_offset;
7670bf0e2e4SDamien Le Moal 	disk_zone_wplug_update_cond(disk, zwplug);
7680bf0e2e4SDamien Le Moal 
769dd291d77SDamien Le Moal 	disk_zone_wplug_abort(zwplug);
770b7d4ffb5SDamien Le Moal 	if (!zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug))
771b7d4ffb5SDamien Le Moal 		disk_mark_zone_wplug_dead(zwplug);
772dd291d77SDamien Le Moal }
773dd291d77SDamien Le Moal 
blk_zone_wp_offset(struct blk_zone * zone)774b76b840fSDamien Le Moal static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
775b76b840fSDamien Le Moal {
776b76b840fSDamien Le Moal 	switch (zone->cond) {
777b76b840fSDamien Le Moal 	case BLK_ZONE_COND_IMP_OPEN:
778b76b840fSDamien Le Moal 	case BLK_ZONE_COND_EXP_OPEN:
779b76b840fSDamien Le Moal 	case BLK_ZONE_COND_CLOSED:
780bbac6e0fSDamien Le Moal 	case BLK_ZONE_COND_ACTIVE:
781b76b840fSDamien Le Moal 		return zone->wp - zone->start;
782b76b840fSDamien Le Moal 	case BLK_ZONE_COND_EMPTY:
783b76b840fSDamien Le Moal 		return 0;
784bbac6e0fSDamien Le Moal 	case BLK_ZONE_COND_FULL:
785b76b840fSDamien Le Moal 	case BLK_ZONE_COND_NOT_WP:
786b76b840fSDamien Le Moal 	case BLK_ZONE_COND_OFFLINE:
787b76b840fSDamien Le Moal 	case BLK_ZONE_COND_READONLY:
788b76b840fSDamien Le Moal 	default:
789b76b840fSDamien Le Moal 		/*
790bbac6e0fSDamien Le Moal 		 * Conventional, full, offline and read-only zones do not have
791bbac6e0fSDamien Le Moal 		 * a valid write pointer.
792b76b840fSDamien Le Moal 		 */
793b76b840fSDamien Le Moal 		return UINT_MAX;
794b76b840fSDamien Le Moal 	}
795b76b840fSDamien Le Moal }
796b76b840fSDamien Le Moal 
disk_zone_wplug_sync_wp_offset(struct gendisk * disk,struct blk_zone * zone)797e2b0ec77SDamien Le Moal static unsigned int disk_zone_wplug_sync_wp_offset(struct gendisk *disk,
798b76b840fSDamien Le Moal 						   struct blk_zone *zone)
799b76b840fSDamien Le Moal {
800b76b840fSDamien Le Moal 	struct blk_zone_wplug *zwplug;
801e2b0ec77SDamien Le Moal 	unsigned int wp_offset = blk_zone_wp_offset(zone);
802b76b840fSDamien Le Moal 
803b76b840fSDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, zone->start);
804e2b0ec77SDamien Le Moal 	if (zwplug) {
805e2b0ec77SDamien Le Moal 		unsigned long flags;
806b76b840fSDamien Le Moal 
807b76b840fSDamien Le Moal 		spin_lock_irqsave(&zwplug->lock, flags);
808fe0418ebSDamien Le Moal 		if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
809e2b0ec77SDamien Le Moal 			disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset);
810b76b840fSDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
811b76b840fSDamien Le Moal 		disk_put_zone_wplug(zwplug);
812b76b840fSDamien Le Moal 	}
813b76b840fSDamien Le Moal 
814e2b0ec77SDamien Le Moal 	return wp_offset;
815e2b0ec77SDamien Le Moal }
816e2b0ec77SDamien Le Moal 
817fdb9aed8SDamien Le Moal /**
818fdb9aed8SDamien Le Moal  * disk_report_zone - Report one zone
819fdb9aed8SDamien Le Moal  * @disk:	Target disk
820fdb9aed8SDamien Le Moal  * @zone:	The zone to report
821fdb9aed8SDamien Le Moal  * @idx:	The index of the zone in the overall zone report
822fdb9aed8SDamien Le Moal  * @args:	report zones callback and data
823fdb9aed8SDamien Le Moal  *
824fdb9aed8SDamien Le Moal  * Description:
825fdb9aed8SDamien Le Moal  *    Helper function for block device drivers to report one zone of a zone
826fdb9aed8SDamien Le Moal  *    report initiated with blkdev_report_zones(). The zone being reported is
827fdb9aed8SDamien Le Moal  *    specified by @zone and used to update, if necessary, the zone write plug
828fdb9aed8SDamien Le Moal  *    information for the zone. If @args specifies a user callback function,
829fdb9aed8SDamien Le Moal  *    this callback is executed.
830fdb9aed8SDamien Le Moal  */
disk_report_zone(struct gendisk * disk,struct blk_zone * zone,unsigned int idx,struct blk_report_zones_args * args)831fdb9aed8SDamien Le Moal int disk_report_zone(struct gendisk *disk, struct blk_zone *zone,
832fdb9aed8SDamien Le Moal 		     unsigned int idx, struct blk_report_zones_args *args)
833b76b840fSDamien Le Moal {
834881880b6SDamien Le Moal 	if (args && args->report_active) {
835f2284eecSDamien Le Moal 		/*
836f2284eecSDamien Le Moal 		 * If we come here, then this is a report zones as a fallback
837f2284eecSDamien Le Moal 		 * for a cached report. So collapse the implicit open, explicit
838f2284eecSDamien Le Moal 		 * open and closed conditions into the active zone condition.
839f2284eecSDamien Le Moal 		 */
840f2284eecSDamien Le Moal 		switch (zone->cond) {
841f2284eecSDamien Le Moal 		case BLK_ZONE_COND_IMP_OPEN:
842f2284eecSDamien Le Moal 		case BLK_ZONE_COND_EXP_OPEN:
843f2284eecSDamien Le Moal 		case BLK_ZONE_COND_CLOSED:
844f2284eecSDamien Le Moal 			zone->cond = BLK_ZONE_COND_ACTIVE;
845f2284eecSDamien Le Moal 			break;
846f2284eecSDamien Le Moal 		default:
847f2284eecSDamien Le Moal 			break;
848f2284eecSDamien Le Moal 		}
849f2284eecSDamien Le Moal 	}
850f2284eecSDamien Le Moal 
851fdb9aed8SDamien Le Moal 	if (disk->zone_wplugs_hash)
852fdb9aed8SDamien Le Moal 		disk_zone_wplug_sync_wp_offset(disk, zone);
853b76b840fSDamien Le Moal 
854fdb9aed8SDamien Le Moal 	if (args && args->cb)
855fdb9aed8SDamien Le Moal 		return args->cb(zone, idx, args->data);
856fdb9aed8SDamien Le Moal 
857fdb9aed8SDamien Le Moal 	return 0;
858b76b840fSDamien Le Moal }
859fdb9aed8SDamien Le Moal EXPORT_SYMBOL_GPL(disk_report_zone);
860b76b840fSDamien Le Moal 
blkdev_report_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)861f2284eecSDamien Le Moal static int blkdev_report_zone_cb(struct blk_zone *zone, unsigned int idx,
862f2284eecSDamien Le Moal 				 void *data)
863f2284eecSDamien Le Moal {
864f2284eecSDamien Le Moal 	memcpy(data, zone, sizeof(struct blk_zone));
865f2284eecSDamien Le Moal 	return 0;
866f2284eecSDamien Le Moal }
867f2284eecSDamien Le Moal 
blkdev_report_zone_fallback(struct block_device * bdev,sector_t sector,struct blk_zone * zone)868f2284eecSDamien Le Moal static int blkdev_report_zone_fallback(struct block_device *bdev,
869f2284eecSDamien Le Moal 				       sector_t sector, struct blk_zone *zone)
870f2284eecSDamien Le Moal {
871f2284eecSDamien Le Moal 	struct blk_report_zones_args args = {
872f2284eecSDamien Le Moal 		.cb = blkdev_report_zone_cb,
873f2284eecSDamien Le Moal 		.data = zone,
874f2284eecSDamien Le Moal 		.report_active = true,
875f2284eecSDamien Le Moal 	};
87686a9ce21SChristoph Hellwig 	int error;
877f2284eecSDamien Le Moal 
87886a9ce21SChristoph Hellwig 	error = blkdev_do_report_zones(bdev, sector, 1, &args);
87986a9ce21SChristoph Hellwig 	if (error < 0)
88086a9ce21SChristoph Hellwig 		return error;
88186a9ce21SChristoph Hellwig 	if (error == 0)
88286a9ce21SChristoph Hellwig 		return -EIO;
88386a9ce21SChristoph Hellwig 	return 0;
884f2284eecSDamien Le Moal }
885f2284eecSDamien Le Moal 
88615638d52SChristoph Hellwig /*
88715638d52SChristoph Hellwig  * For devices that natively support zone append operations, we do not use zone
88815638d52SChristoph Hellwig  * write plugging for zone append writes, which makes the zone condition
88915638d52SChristoph Hellwig  * tracking invalid once zone append was used.  In that case fall back to a
89015638d52SChristoph Hellwig  * regular report zones to get correct information.
89115638d52SChristoph Hellwig  */
blkdev_has_cached_report_zones(struct block_device * bdev)89215638d52SChristoph Hellwig static inline bool blkdev_has_cached_report_zones(struct block_device *bdev)
89315638d52SChristoph Hellwig {
89415638d52SChristoph Hellwig 	return disk_need_zone_resources(bdev->bd_disk) &&
89515638d52SChristoph Hellwig 		(bdev_emulates_zone_append(bdev) ||
89615638d52SChristoph Hellwig 		 !test_bit(GD_ZONE_APPEND_USED, &bdev->bd_disk->state));
89715638d52SChristoph Hellwig }
89815638d52SChristoph Hellwig 
899f2284eecSDamien Le Moal /**
900f2284eecSDamien Le Moal  * blkdev_get_zone_info - Get a single zone information from cached data
901f2284eecSDamien Le Moal  * @bdev:   Target block device
902f2284eecSDamien Le Moal  * @sector: Sector contained by the target zone
903f2284eecSDamien Le Moal  * @zone:   zone structure to return the zone information
904f2284eecSDamien Le Moal  *
905f2284eecSDamien Le Moal  * Description:
906f2284eecSDamien Le Moal  *    Get the zone information for the zone containing @sector using the zone
907f2284eecSDamien Le Moal  *    write plug of the target zone, if one exist, or the disk zone condition
908f2284eecSDamien Le Moal  *    array otherwise. The zone condition may be reported as being
909f2284eecSDamien Le Moal  *    the BLK_ZONE_COND_ACTIVE condition for a zone that is in the implicit
910f2284eecSDamien Le Moal  *    open, explicit open or closed condition.
911f2284eecSDamien Le Moal  *
912f2284eecSDamien Le Moal  *    Returns 0 on success and a negative error code on failure.
913f2284eecSDamien Le Moal  */
blkdev_get_zone_info(struct block_device * bdev,sector_t sector,struct blk_zone * zone)914f2284eecSDamien Le Moal int blkdev_get_zone_info(struct block_device *bdev, sector_t sector,
915f2284eecSDamien Le Moal 			 struct blk_zone *zone)
916f2284eecSDamien Le Moal {
917f2284eecSDamien Le Moal 	struct gendisk *disk = bdev->bd_disk;
918f2284eecSDamien Le Moal 	sector_t zone_sectors = bdev_zone_sectors(bdev);
919f2284eecSDamien Le Moal 	struct blk_zone_wplug *zwplug;
920f2284eecSDamien Le Moal 	unsigned long flags;
921f2284eecSDamien Le Moal 	u8 *zones_cond;
922f2284eecSDamien Le Moal 
923f2284eecSDamien Le Moal 	if (!bdev_is_zoned(bdev))
924f2284eecSDamien Le Moal 		return -EOPNOTSUPP;
925f2284eecSDamien Le Moal 
926f2284eecSDamien Le Moal 	if (sector >= get_capacity(disk))
927f2284eecSDamien Le Moal 		return -EINVAL;
928f2284eecSDamien Le Moal 
929f2284eecSDamien Le Moal 	memset(zone, 0, sizeof(*zone));
93025976c31SDamien Le Moal 	sector = bdev_zone_start(bdev, sector);
931f2284eecSDamien Le Moal 
93215638d52SChristoph Hellwig 	if (!blkdev_has_cached_report_zones(bdev))
93315638d52SChristoph Hellwig 		return blkdev_report_zone_fallback(bdev, sector, zone);
93415638d52SChristoph Hellwig 
935f2284eecSDamien Le Moal 	rcu_read_lock();
936f2284eecSDamien Le Moal 	zones_cond = rcu_dereference(disk->zones_cond);
937f2284eecSDamien Le Moal 	if (!disk->zone_wplugs_hash || !zones_cond) {
938f2284eecSDamien Le Moal 		rcu_read_unlock();
939f2284eecSDamien Le Moal 		return blkdev_report_zone_fallback(bdev, sector, zone);
940f2284eecSDamien Le Moal 	}
941f2284eecSDamien Le Moal 	zone->cond = zones_cond[disk_zone_no(disk, sector)];
942f2284eecSDamien Le Moal 	rcu_read_unlock();
943f2284eecSDamien Le Moal 
944f2284eecSDamien Le Moal 	zone->start = sector;
945f2284eecSDamien Le Moal 	zone->len = zone_sectors;
946f2284eecSDamien Le Moal 
947f2284eecSDamien Le Moal 	/*
948f2284eecSDamien Le Moal 	 * If this is a conventional zone, we do not have a zone write plug and
949f2284eecSDamien Le Moal 	 * can report the zone immediately.
950f2284eecSDamien Le Moal 	 */
951f2284eecSDamien Le Moal 	if (zone->cond == BLK_ZONE_COND_NOT_WP) {
952f2284eecSDamien Le Moal 		zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
953f2284eecSDamien Le Moal 		zone->capacity = zone_sectors;
954f2284eecSDamien Le Moal 		zone->wp = ULLONG_MAX;
955f2284eecSDamien Le Moal 		return 0;
956f2284eecSDamien Le Moal 	}
957f2284eecSDamien Le Moal 
958f2284eecSDamien Le Moal 	/*
959f2284eecSDamien Le Moal 	 * This is a sequential write required zone. If the zone is read-only or
960f2284eecSDamien Le Moal 	 * offline, only set the zone write pointer to an invalid value and
961f2284eecSDamien Le Moal 	 * report the zone.
962f2284eecSDamien Le Moal 	 */
963f2284eecSDamien Le Moal 	zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
964f2284eecSDamien Le Moal 	if (disk_zone_is_last(disk, zone))
965f2284eecSDamien Le Moal 		zone->capacity = disk->last_zone_capacity;
966f2284eecSDamien Le Moal 	else
967f2284eecSDamien Le Moal 		zone->capacity = disk->zone_capacity;
968f2284eecSDamien Le Moal 
969f2284eecSDamien Le Moal 	if (zone->cond == BLK_ZONE_COND_READONLY ||
970f2284eecSDamien Le Moal 	    zone->cond == BLK_ZONE_COND_OFFLINE) {
971f2284eecSDamien Le Moal 		zone->wp = ULLONG_MAX;
972f2284eecSDamien Le Moal 		return 0;
973f2284eecSDamien Le Moal 	}
974f2284eecSDamien Le Moal 
975f2284eecSDamien Le Moal 	/*
976f2284eecSDamien Le Moal 	 * If the zone does not have a zone write plug, it is either full or
977f2284eecSDamien Le Moal 	 * empty, as we otherwise would have a zone write plug for it. In this
978f2284eecSDamien Le Moal 	 * case, set the write pointer accordingly and report the zone.
979f2284eecSDamien Le Moal 	 * Otherwise, if we have a zone write plug, use it.
980f2284eecSDamien Le Moal 	 */
981f2284eecSDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, sector);
982f2284eecSDamien Le Moal 	if (!zwplug) {
983f2284eecSDamien Le Moal 		if (zone->cond == BLK_ZONE_COND_FULL)
984f2284eecSDamien Le Moal 			zone->wp = ULLONG_MAX;
985f2284eecSDamien Le Moal 		else
986f2284eecSDamien Le Moal 			zone->wp = sector;
987f2284eecSDamien Le Moal 		return 0;
988f2284eecSDamien Le Moal 	}
989f2284eecSDamien Le Moal 
990f2284eecSDamien Le Moal 	spin_lock_irqsave(&zwplug->lock, flags);
991f2284eecSDamien Le Moal 	if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) {
992f2284eecSDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
993f2284eecSDamien Le Moal 		disk_put_zone_wplug(zwplug);
994f2284eecSDamien Le Moal 		return blkdev_report_zone_fallback(bdev, sector, zone);
995f2284eecSDamien Le Moal 	}
996f2284eecSDamien Le Moal 	zone->cond = zwplug->cond;
997f2284eecSDamien Le Moal 	zone->wp = sector + zwplug->wp_offset;
998f2284eecSDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
999f2284eecSDamien Le Moal 
1000f2284eecSDamien Le Moal 	disk_put_zone_wplug(zwplug);
1001f2284eecSDamien Le Moal 
1002f2284eecSDamien Le Moal 	return 0;
1003f2284eecSDamien Le Moal }
1004f2284eecSDamien Le Moal EXPORT_SYMBOL_GPL(blkdev_get_zone_info);
1005f2284eecSDamien Le Moal 
100631f0656aSDamien Le Moal /**
100731f0656aSDamien Le Moal  * blkdev_report_zones_cached - Get cached zones information
100831f0656aSDamien Le Moal  * @bdev:     Target block device
100931f0656aSDamien Le Moal  * @sector:   Sector from which to report zones
101031f0656aSDamien Le Moal  * @nr_zones: Maximum number of zones to report
101131f0656aSDamien Le Moal  * @cb:       Callback function called for each reported zone
101231f0656aSDamien Le Moal  * @data:     Private data for the callback function
101331f0656aSDamien Le Moal  *
101431f0656aSDamien Le Moal  * Description:
101531f0656aSDamien Le Moal  *    Similar to blkdev_report_zones() but instead of calling into the low level
101631f0656aSDamien Le Moal  *    device driver to get the zone report from the device, use
101731f0656aSDamien Le Moal  *    blkdev_get_zone_info() to generate the report from the disk zone write
101831f0656aSDamien Le Moal  *    plugs and zones condition array. Since calling this function without a
101931f0656aSDamien Le Moal  *    callback does not make sense, @cb must be specified.
102031f0656aSDamien Le Moal  */
blkdev_report_zones_cached(struct block_device * bdev,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)102131f0656aSDamien Le Moal int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector,
102231f0656aSDamien Le Moal 			unsigned int nr_zones, report_zones_cb cb, void *data)
102331f0656aSDamien Le Moal {
102431f0656aSDamien Le Moal 	struct gendisk *disk = bdev->bd_disk;
102531f0656aSDamien Le Moal 	sector_t capacity = get_capacity(disk);
102631f0656aSDamien Le Moal 	sector_t zone_sectors = bdev_zone_sectors(bdev);
102731f0656aSDamien Le Moal 	unsigned int idx = 0;
102831f0656aSDamien Le Moal 	struct blk_zone zone;
102931f0656aSDamien Le Moal 	int ret;
103031f0656aSDamien Le Moal 
103131f0656aSDamien Le Moal 	if (!cb || !bdev_is_zoned(bdev) ||
103231f0656aSDamien Le Moal 	    WARN_ON_ONCE(!disk->fops->report_zones))
103331f0656aSDamien Le Moal 		return -EOPNOTSUPP;
103431f0656aSDamien Le Moal 
103531f0656aSDamien Le Moal 	if (!nr_zones || sector >= capacity)
103631f0656aSDamien Le Moal 		return 0;
103731f0656aSDamien Le Moal 
103815638d52SChristoph Hellwig 	if (!blkdev_has_cached_report_zones(bdev)) {
103931f0656aSDamien Le Moal 		struct blk_report_zones_args args = {
104031f0656aSDamien Le Moal 			.cb = cb,
104131f0656aSDamien Le Moal 			.data = data,
104231f0656aSDamien Le Moal 			.report_active = true,
104331f0656aSDamien Le Moal 		};
104431f0656aSDamien Le Moal 
104531f0656aSDamien Le Moal 		return blkdev_do_report_zones(bdev, sector, nr_zones, &args);
104631f0656aSDamien Le Moal 	}
104731f0656aSDamien Le Moal 
104825976c31SDamien Le Moal 	for (sector = bdev_zone_start(bdev, sector);
104931f0656aSDamien Le Moal 	     sector < capacity && idx < nr_zones;
105031f0656aSDamien Le Moal 	     sector += zone_sectors, idx++) {
105131f0656aSDamien Le Moal 		ret = blkdev_get_zone_info(bdev, sector, &zone);
105231f0656aSDamien Le Moal 		if (ret)
105331f0656aSDamien Le Moal 			return ret;
105431f0656aSDamien Le Moal 
105531f0656aSDamien Le Moal 		ret = cb(&zone, idx, data);
105631f0656aSDamien Le Moal 		if (ret)
105731f0656aSDamien Le Moal 			return ret;
105831f0656aSDamien Le Moal 	}
105931f0656aSDamien Le Moal 
106031f0656aSDamien Le Moal 	return idx;
106131f0656aSDamien Le Moal }
106231f0656aSDamien Le Moal EXPORT_SYMBOL_GPL(blkdev_report_zones_cached);
106331f0656aSDamien Le Moal 
blk_zone_reset_bio_endio(struct bio * bio)1064efae226cSDamien Le Moal static void blk_zone_reset_bio_endio(struct bio *bio)
1065dd291d77SDamien Le Moal {
1066dd291d77SDamien Le Moal 	struct gendisk *disk = bio->bi_bdev->bd_disk;
10670bf0e2e4SDamien Le Moal 	sector_t sector = bio->bi_iter.bi_sector;
1068dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug;
1069dd291d77SDamien Le Moal 
1070dd291d77SDamien Le Moal 	/*
1071efae226cSDamien Le Moal 	 * If we have a zone write plug, set its write pointer offset to 0.
1072efae226cSDamien Le Moal 	 * This will abort all BIOs plugged for the target zone. It is fine as
1073efae226cSDamien Le Moal 	 * resetting zones while writes are still in-flight will result in the
1074dd291d77SDamien Le Moal 	 * writes failing anyway.
1075dd291d77SDamien Le Moal 	 */
10760bf0e2e4SDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, sector);
1077dd291d77SDamien Le Moal 	if (zwplug) {
1078fe0418ebSDamien Le Moal 		unsigned long flags;
1079dd291d77SDamien Le Moal 
1080fe0418ebSDamien Le Moal 		spin_lock_irqsave(&zwplug->lock, flags);
1081dd291d77SDamien Le Moal 		disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
1082fe0418ebSDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
1083dd291d77SDamien Le Moal 		disk_put_zone_wplug(zwplug);
10840bf0e2e4SDamien Le Moal 	} else {
10850bf0e2e4SDamien Le Moal 		disk_zone_set_cond(disk, sector, BLK_ZONE_COND_EMPTY);
1086dd291d77SDamien Le Moal 	}
1087dd291d77SDamien Le Moal }
1088dd291d77SDamien Le Moal 
blk_zone_reset_all_bio_endio(struct bio * bio)1089efae226cSDamien Le Moal static void blk_zone_reset_all_bio_endio(struct bio *bio)
1090efae226cSDamien Le Moal {
1091efae226cSDamien Le Moal 	struct gendisk *disk = bio->bi_bdev->bd_disk;
10920bf0e2e4SDamien Le Moal 	sector_t capacity = get_capacity(disk);
1093efae226cSDamien Le Moal 	struct blk_zone_wplug *zwplug;
1094efae226cSDamien Le Moal 	unsigned long flags;
10950bf0e2e4SDamien Le Moal 	sector_t sector;
1096efae226cSDamien Le Moal 	unsigned int i;
1097efae226cSDamien Le Moal 
1098c2b8d206SDamien Le Moal 	if (atomic_read(&disk->nr_zone_wplugs)) {
1099efae226cSDamien Le Moal 		/* Update the condition of all zone write plugs. */
1100efae226cSDamien Le Moal 		rcu_read_lock();
1101efae226cSDamien Le Moal 		for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) {
1102c2b8d206SDamien Le Moal 			hlist_for_each_entry_rcu(zwplug,
1103c2b8d206SDamien Le Moal 						 &disk->zone_wplugs_hash[i],
1104efae226cSDamien Le Moal 						 node) {
1105efae226cSDamien Le Moal 				spin_lock_irqsave(&zwplug->lock, flags);
1106efae226cSDamien Le Moal 				disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
1107efae226cSDamien Le Moal 				spin_unlock_irqrestore(&zwplug->lock, flags);
1108efae226cSDamien Le Moal 			}
1109efae226cSDamien Le Moal 		}
1110efae226cSDamien Le Moal 		rcu_read_unlock();
1111c2b8d206SDamien Le Moal 	}
11120bf0e2e4SDamien Le Moal 
11130bf0e2e4SDamien Le Moal 	/* Update the cached zone conditions. */
11140bf0e2e4SDamien Le Moal 	for (sector = 0; sector < capacity;
11150bf0e2e4SDamien Le Moal 	     sector += bdev_zone_sectors(bio->bi_bdev))
11160bf0e2e4SDamien Le Moal 		disk_zone_set_cond(disk, sector, BLK_ZONE_COND_EMPTY);
111715638d52SChristoph Hellwig 	clear_bit(GD_ZONE_APPEND_USED, &disk->state);
1118efae226cSDamien Le Moal }
1119efae226cSDamien Le Moal 
blk_zone_finish_bio_endio(struct bio * bio)1120efae226cSDamien Le Moal static void blk_zone_finish_bio_endio(struct bio *bio)
1121efae226cSDamien Le Moal {
1122efae226cSDamien Le Moal 	struct block_device *bdev = bio->bi_bdev;
1123efae226cSDamien Le Moal 	struct gendisk *disk = bdev->bd_disk;
11240bf0e2e4SDamien Le Moal 	sector_t sector = bio->bi_iter.bi_sector;
1125efae226cSDamien Le Moal 	struct blk_zone_wplug *zwplug;
1126efae226cSDamien Le Moal 
1127efae226cSDamien Le Moal 	/*
1128efae226cSDamien Le Moal 	 * If we have a zone write plug, set its write pointer offset to the
1129efae226cSDamien Le Moal 	 * zone size. This will abort all BIOs plugged for the target zone. It
1130efae226cSDamien Le Moal 	 * is fine as resetting zones while writes are still in-flight will
1131efae226cSDamien Le Moal 	 * result in the writes failing anyway.
1132efae226cSDamien Le Moal 	 */
11330bf0e2e4SDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, sector);
1134efae226cSDamien Le Moal 	if (zwplug) {
1135efae226cSDamien Le Moal 		unsigned long flags;
1136efae226cSDamien Le Moal 
1137efae226cSDamien Le Moal 		spin_lock_irqsave(&zwplug->lock, flags);
1138efae226cSDamien Le Moal 		disk_zone_wplug_set_wp_offset(disk, zwplug,
1139efae226cSDamien Le Moal 					      bdev_zone_sectors(bdev));
1140efae226cSDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
1141efae226cSDamien Le Moal 		disk_put_zone_wplug(zwplug);
11420bf0e2e4SDamien Le Moal 	} else {
11430bf0e2e4SDamien Le Moal 		disk_zone_set_cond(disk, sector, BLK_ZONE_COND_FULL);
1144efae226cSDamien Le Moal 	}
1145efae226cSDamien Le Moal }
1146efae226cSDamien Le Moal 
blk_zone_mgmt_bio_endio(struct bio * bio)1147efae226cSDamien Le Moal void blk_zone_mgmt_bio_endio(struct bio *bio)
1148efae226cSDamien Le Moal {
1149efae226cSDamien Le Moal 	/* If the BIO failed, we have nothing to do. */
1150efae226cSDamien Le Moal 	if (bio->bi_status != BLK_STS_OK)
1151efae226cSDamien Le Moal 		return;
1152efae226cSDamien Le Moal 
1153efae226cSDamien Le Moal 	switch (bio_op(bio)) {
1154efae226cSDamien Le Moal 	case REQ_OP_ZONE_RESET:
1155efae226cSDamien Le Moal 		blk_zone_reset_bio_endio(bio);
1156efae226cSDamien Le Moal 		return;
1157efae226cSDamien Le Moal 	case REQ_OP_ZONE_RESET_ALL:
1158efae226cSDamien Le Moal 		blk_zone_reset_all_bio_endio(bio);
1159efae226cSDamien Le Moal 		return;
1160efae226cSDamien Le Moal 	case REQ_OP_ZONE_FINISH:
1161efae226cSDamien Le Moal 		blk_zone_finish_bio_endio(bio);
1162efae226cSDamien Le Moal 		return;
1163efae226cSDamien Le Moal 	default:
1164efae226cSDamien Le Moal 		return;
1165efae226cSDamien Le Moal 	}
1166dd291d77SDamien Le Moal }
1167dd291d77SDamien Le Moal 
disk_zone_wplug_schedule_work(struct gendisk * disk,struct blk_zone_wplug * zwplug)11681365b690SDamien Le Moal static void disk_zone_wplug_schedule_work(struct gendisk *disk,
1169cae00567SDamien Le Moal 					  struct blk_zone_wplug *zwplug)
1170cae00567SDamien Le Moal {
1171faa3be1aSBart Van Assche 	lockdep_assert_held(&zwplug->lock);
1172faa3be1aSBart Van Assche 
1173cae00567SDamien Le Moal 	/*
11740a8b8af8SDamien Le Moal 	 * Schedule the submission of the next plugged BIO. Taking a reference
11750a8b8af8SDamien Le Moal 	 * to the zone write plug is required as the bio_work belongs to the
11760a8b8af8SDamien Le Moal 	 * plug, and thus we must ensure that the write plug does not go away
11770a8b8af8SDamien Le Moal 	 * while the work is being scheduled but has not run yet.
11780a8b8af8SDamien Le Moal 	 * blk_zone_wplug_bio_work() will release the reference we take here,
11790a8b8af8SDamien Le Moal 	 * and we also drop this reference if the work is already scheduled.
1180cae00567SDamien Le Moal 	 */
1181cae00567SDamien Le Moal 	WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
11821365b690SDamien Le Moal 	WARN_ON_ONCE(blk_queue_zoned_qd1_writes(disk->queue));
1183cae00567SDamien Le Moal 	refcount_inc(&zwplug->ref);
11840a8b8af8SDamien Le Moal 	if (!queue_work(disk->zone_wplugs_wq, &zwplug->bio_work))
11850a8b8af8SDamien Le Moal 		disk_put_zone_wplug(zwplug);
1186cae00567SDamien Le Moal }
1187cae00567SDamien Le Moal 
disk_zone_wplug_add_bio(struct gendisk * disk,struct blk_zone_wplug * zwplug,struct bio * bio,unsigned int nr_segs)1188cae00567SDamien Le Moal static inline void disk_zone_wplug_add_bio(struct gendisk *disk,
1189cae00567SDamien Le Moal 				struct blk_zone_wplug *zwplug,
1190dd291d77SDamien Le Moal 				struct bio *bio, unsigned int nr_segs)
1191dd291d77SDamien Le Moal {
1192dd291d77SDamien Le Moal 	/*
1193dd291d77SDamien Le Moal 	 * Grab an extra reference on the BIO request queue usage counter.
1194dd291d77SDamien Le Moal 	 * This reference will be reused to submit a request for the BIO for
1195dd291d77SDamien Le Moal 	 * blk-mq devices and dropped when the BIO is failed and after
1196dd291d77SDamien Le Moal 	 * it is issued in the case of BIO-based devices.
1197dd291d77SDamien Le Moal 	 */
1198dd291d77SDamien Le Moal 	percpu_ref_get(&bio->bi_bdev->bd_disk->queue->q_usage_counter);
1199dd291d77SDamien Le Moal 
1200dd291d77SDamien Le Moal 	/*
1201dd291d77SDamien Le Moal 	 * The BIO is being plugged and thus will have to wait for the on-going
1202dd291d77SDamien Le Moal 	 * write and for all other writes already plugged. So polling makes
1203dd291d77SDamien Le Moal 	 * no sense.
1204dd291d77SDamien Le Moal 	 */
1205dd291d77SDamien Le Moal 	bio_clear_polled(bio);
1206dd291d77SDamien Le Moal 
1207dd291d77SDamien Le Moal 	/*
1208dd291d77SDamien Le Moal 	 * Reuse the poll cookie field to store the number of segments when
1209dd291d77SDamien Le Moal 	 * split to the hardware limits.
1210dd291d77SDamien Le Moal 	 */
1211dd291d77SDamien Le Moal 	bio->__bi_nr_segments = nr_segs;
1212dd291d77SDamien Le Moal 
1213dd291d77SDamien Le Moal 	/*
1214dd291d77SDamien Le Moal 	 * We always receive BIOs after they are split and ready to be issued.
1215dd291d77SDamien Le Moal 	 * The block layer passes the parts of a split BIO in order, and the
1216dd291d77SDamien Le Moal 	 * user must also issue write sequentially. So simply add the new BIO
1217dd291d77SDamien Le Moal 	 * at the tail of the list to preserve the sequential write order.
1218dd291d77SDamien Le Moal 	 */
1219dd291d77SDamien Le Moal 	bio_list_add(&zwplug->bio_list, bio);
12202e92ac61SJohannes Thumshirn 	trace_disk_zone_wplug_add_bio(zwplug->disk->queue, zwplug->zone_no,
12212e92ac61SJohannes Thumshirn 				      bio->bi_iter.bi_sector, bio_sectors(bio));
12221365b690SDamien Le Moal 
12231365b690SDamien Le Moal 	/*
12241365b690SDamien Le Moal 	 * If we are using the disk zone write plugs worker instead of the per
12251365b690SDamien Le Moal 	 * zone write plug BIO work, add the zone write plug to the work list
12261365b690SDamien Le Moal 	 * if it is not already there. Make sure to also get an extra reference
12271365b690SDamien Le Moal 	 * on the zone write plug so that it does not go away until it is
12281365b690SDamien Le Moal 	 * removed from the work list.
12291365b690SDamien Le Moal 	 */
12301365b690SDamien Le Moal 	if (blk_queue_zoned_qd1_writes(disk->queue)) {
12311365b690SDamien Le Moal 		spin_lock(&disk->zone_wplugs_list_lock);
12321365b690SDamien Le Moal 		if (list_empty(&zwplug->entry)) {
12331365b690SDamien Le Moal 			list_add_tail(&zwplug->entry, &disk->zone_wplugs_list);
12341365b690SDamien Le Moal 			refcount_inc(&zwplug->ref);
12351365b690SDamien Le Moal 		}
12361365b690SDamien Le Moal 		spin_unlock(&disk->zone_wplugs_list_lock);
12371365b690SDamien Le Moal 	}
1238dd291d77SDamien Le Moal }
1239dd291d77SDamien Le Moal 
1240dd291d77SDamien Le Moal /*
1241dd291d77SDamien Le Moal  * Called from bio_attempt_back_merge() when a BIO was merged with a request.
1242dd291d77SDamien Le Moal  */
blk_zone_write_plug_bio_merged(struct bio * bio)1243dd291d77SDamien Le Moal void blk_zone_write_plug_bio_merged(struct bio *bio)
1244dd291d77SDamien Le Moal {
12450bf0e2e4SDamien Le Moal 	struct gendisk *disk = bio->bi_bdev->bd_disk;
1246dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug;
1247dd291d77SDamien Le Moal 	unsigned long flags;
1248dd291d77SDamien Le Moal 
1249dd291d77SDamien Le Moal 	/*
1250dd291d77SDamien Le Moal 	 * If the BIO was already plugged, then we were called through
1251096bc7eaSDamien Le Moal 	 * blk_zone_write_plug_init_request() -> blk_attempt_bio_merge().
1252096bc7eaSDamien Le Moal 	 * For this case, we already hold a reference on the zone write plug for
1253096bc7eaSDamien Le Moal 	 * the BIO and blk_zone_write_plug_init_request() will handle the
1254dd291d77SDamien Le Moal 	 * zone write pointer offset update.
1255dd291d77SDamien Le Moal 	 */
1256dd291d77SDamien Le Moal 	if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING))
1257dd291d77SDamien Le Moal 		return;
1258dd291d77SDamien Le Moal 
1259dd291d77SDamien Le Moal 	bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
1260dd291d77SDamien Le Moal 
1261dd291d77SDamien Le Moal 	/*
1262c4c3ffdaSDamien Le Moal 	 * Get a reference on the zone write plug of the target zone and advance
1263c4c3ffdaSDamien Le Moal 	 * the zone write pointer offset. Given that this is a merge, we already
1264c4c3ffdaSDamien Le Moal 	 * have at least one request and one BIO referencing the zone write
1265c4c3ffdaSDamien Le Moal 	 * plug. So this should not fail.
1266dd291d77SDamien Le Moal 	 */
12670bf0e2e4SDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
1268c4c3ffdaSDamien Le Moal 	if (WARN_ON_ONCE(!zwplug))
1269c4c3ffdaSDamien Le Moal 		return;
1270c4c3ffdaSDamien Le Moal 
1271dd291d77SDamien Le Moal 	spin_lock_irqsave(&zwplug->lock, flags);
1272dd291d77SDamien Le Moal 	zwplug->wp_offset += bio_sectors(bio);
12730bf0e2e4SDamien Le Moal 	disk_zone_wplug_update_cond(disk, zwplug);
1274dd291d77SDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
1275dd291d77SDamien Le Moal }
1276dd291d77SDamien Le Moal 
1277dd291d77SDamien Le Moal /*
1278dd291d77SDamien Le Moal  * Attempt to merge plugged BIOs with a newly prepared request for a BIO that
1279dd291d77SDamien Le Moal  * already went through zone write plugging (either a new BIO or one that was
1280dd291d77SDamien Le Moal  * unplugged).
1281dd291d77SDamien Le Moal  */
blk_zone_write_plug_init_request(struct request * req)1282096bc7eaSDamien Le Moal void blk_zone_write_plug_init_request(struct request *req)
1283dd291d77SDamien Le Moal {
1284dd291d77SDamien Le Moal 	sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req);
1285dd291d77SDamien Le Moal 	struct request_queue *q = req->q;
1286dd291d77SDamien Le Moal 	struct gendisk *disk = q->disk;
1287dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug =
1288dd291d77SDamien Le Moal 		disk_get_zone_wplug(disk, blk_rq_pos(req));
1289dd291d77SDamien Le Moal 	unsigned long flags;
1290dd291d77SDamien Le Moal 	struct bio *bio;
1291dd291d77SDamien Le Moal 
1292096bc7eaSDamien Le Moal 	if (WARN_ON_ONCE(!zwplug))
1293096bc7eaSDamien Le Moal 		return;
1294096bc7eaSDamien Le Moal 
1295dd291d77SDamien Le Moal 	/*
12967b295187SDamien Le Moal 	 * Indicate that completion of this request needs to be handled with
1297347bde9dSDamien Le Moal 	 * blk_zone_write_plug_finish_request(), which will drop the reference
12987b295187SDamien Le Moal 	 * on the zone write plug we took above on entry to this function.
1299dd291d77SDamien Le Moal 	 */
1300dd291d77SDamien Le Moal 	req->rq_flags |= RQF_ZONE_WRITE_PLUGGING;
1301dd291d77SDamien Le Moal 
1302dd291d77SDamien Le Moal 	if (blk_queue_nomerges(q))
1303dd291d77SDamien Le Moal 		return;
1304dd291d77SDamien Le Moal 
1305dd291d77SDamien Le Moal 	/*
1306dd291d77SDamien Le Moal 	 * Walk through the list of plugged BIOs to check if they can be merged
1307dd291d77SDamien Le Moal 	 * into the back of the request.
1308dd291d77SDamien Le Moal 	 */
1309dd291d77SDamien Le Moal 	spin_lock_irqsave(&zwplug->lock, flags);
131029459c3eSDamien Le Moal 	while (!disk_zone_wplug_is_full(disk, zwplug)) {
1311dd291d77SDamien Le Moal 		bio = bio_list_peek(&zwplug->bio_list);
1312dd291d77SDamien Le Moal 		if (!bio)
1313dd291d77SDamien Le Moal 			break;
1314dd291d77SDamien Le Moal 
1315dd291d77SDamien Le Moal 		if (bio->bi_iter.bi_sector != req_back_sector ||
1316dd291d77SDamien Le Moal 		    !blk_rq_merge_ok(req, bio))
1317dd291d77SDamien Le Moal 			break;
1318dd291d77SDamien Le Moal 
1319dd291d77SDamien Le Moal 		WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE_ZEROES &&
1320dd291d77SDamien Le Moal 			     !bio->__bi_nr_segments);
1321dd291d77SDamien Le Moal 
1322dd291d77SDamien Le Moal 		bio_list_pop(&zwplug->bio_list);
1323dd291d77SDamien Le Moal 		if (bio_attempt_back_merge(req, bio, bio->__bi_nr_segments) !=
1324dd291d77SDamien Le Moal 		    BIO_MERGE_OK) {
1325dd291d77SDamien Le Moal 			bio_list_add_head(&zwplug->bio_list, bio);
1326dd291d77SDamien Le Moal 			break;
1327dd291d77SDamien Le Moal 		}
1328dd291d77SDamien Le Moal 
1329fa855563SBart Van Assche 		/* Drop the reference taken by disk_zone_wplug_add_bio(). */
1330dd291d77SDamien Le Moal 		blk_queue_exit(q);
1331dd291d77SDamien Le Moal 		zwplug->wp_offset += bio_sectors(bio);
13320bf0e2e4SDamien Le Moal 		disk_zone_wplug_update_cond(disk, zwplug);
1333dd291d77SDamien Le Moal 
1334dd291d77SDamien Le Moal 		req_back_sector += bio_sectors(bio);
1335dd291d77SDamien Le Moal 	}
1336dd291d77SDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
1337dd291d77SDamien Le Moal }
1338dd291d77SDamien Le Moal 
1339dd291d77SDamien Le Moal /*
1340dd291d77SDamien Le Moal  * Check and prepare a BIO for submission by incrementing the write pointer
13419b1ce7f0SDamien Le Moal  * offset of its zone write plug and changing zone append operations into
13429b1ce7f0SDamien Le Moal  * regular write when zone append emulation is needed.
1343dd291d77SDamien Le Moal  */
blk_zone_wplug_prepare_bio(struct blk_zone_wplug * zwplug,struct bio * bio)1344dd291d77SDamien Le Moal static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
1345dd291d77SDamien Le Moal 				       struct bio *bio)
1346dd291d77SDamien Le Moal {
1347dd291d77SDamien Le Moal 	struct gendisk *disk = bio->bi_bdev->bd_disk;
1348dd291d77SDamien Le Moal 
1349cbac56e5SBart Van Assche 	lockdep_assert_held(&zwplug->lock);
1350cbac56e5SBart Van Assche 
1351dd291d77SDamien Le Moal 	/*
1352fe0418ebSDamien Le Moal 	 * If we lost track of the zone write pointer due to a write error,
1353fe0418ebSDamien Le Moal 	 * the user must either execute a report zones, reset the zone or finish
1354fe0418ebSDamien Le Moal 	 * the to recover a reliable write pointer position. Fail BIOs if the
1355fe0418ebSDamien Le Moal 	 * user did not do that as we cannot handle emulated zone append
1356fe0418ebSDamien Le Moal 	 * otherwise.
1357fe0418ebSDamien Le Moal 	 */
1358fe0418ebSDamien Le Moal 	if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE)
1359fe0418ebSDamien Le Moal 		return false;
1360fe0418ebSDamien Le Moal 
1361fe0418ebSDamien Le Moal 	/*
1362dd291d77SDamien Le Moal 	 * Check that the user is not attempting to write to a full zone.
1363dd291d77SDamien Le Moal 	 * We know such BIO will fail, and that would potentially overflow our
1364dd291d77SDamien Le Moal 	 * write pointer offset beyond the end of the zone.
1365dd291d77SDamien Le Moal 	 */
136629459c3eSDamien Le Moal 	if (disk_zone_wplug_is_full(disk, zwplug))
1367fe0418ebSDamien Le Moal 		return false;
1368dd291d77SDamien Le Moal 
13699b1ce7f0SDamien Le Moal 	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
13709b1ce7f0SDamien Le Moal 		/*
13719b1ce7f0SDamien Le Moal 		 * Use a regular write starting at the current write pointer.
13729b1ce7f0SDamien Le Moal 		 * Similarly to native zone append operations, do not allow
13739b1ce7f0SDamien Le Moal 		 * merging.
13749b1ce7f0SDamien Le Moal 		 */
13759b1ce7f0SDamien Le Moal 		bio->bi_opf &= ~REQ_OP_MASK;
13769b1ce7f0SDamien Le Moal 		bio->bi_opf |= REQ_OP_WRITE | REQ_NOMERGE;
13779b1ce7f0SDamien Le Moal 		bio->bi_iter.bi_sector += zwplug->wp_offset;
13789b1ce7f0SDamien Le Moal 
13799b1ce7f0SDamien Le Moal 		/*
13809b1ce7f0SDamien Le Moal 		 * Remember that this BIO is in fact a zone append operation
13819b1ce7f0SDamien Le Moal 		 * so that we can restore its operation code on completion.
13829b1ce7f0SDamien Le Moal 		 */
13839b1ce7f0SDamien Le Moal 		bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND);
13849b1ce7f0SDamien Le Moal 	} else {
1385dd291d77SDamien Le Moal 		/*
1386fe0418ebSDamien Le Moal 		 * Check for non-sequential writes early as we know that BIOs
1387fe0418ebSDamien Le Moal 		 * with a start sector not unaligned to the zone write pointer
1388fe0418ebSDamien Le Moal 		 * will fail.
1389dd291d77SDamien Le Moal 		 */
1390dd291d77SDamien Le Moal 		if (bio_offset_from_zone_start(bio) != zwplug->wp_offset)
1391fe0418ebSDamien Le Moal 			return false;
13929b1ce7f0SDamien Le Moal 	}
1393dd291d77SDamien Le Moal 
1394dd291d77SDamien Le Moal 	/* Advance the zone write pointer offset. */
1395dd291d77SDamien Le Moal 	zwplug->wp_offset += bio_sectors(bio);
13960bf0e2e4SDamien Le Moal 	disk_zone_wplug_update_cond(disk, zwplug);
1397dd291d77SDamien Le Moal 
1398dd291d77SDamien Le Moal 	return true;
1399dd291d77SDamien Le Moal }
1400dd291d77SDamien Le Moal 
blk_zone_wplug_handle_write(struct bio * bio,unsigned int nr_segs)1401dd291d77SDamien Le Moal static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
1402dd291d77SDamien Le Moal {
1403dd291d77SDamien Le Moal 	struct gendisk *disk = bio->bi_bdev->bd_disk;
1404dd291d77SDamien Le Moal 	sector_t sector = bio->bi_iter.bi_sector;
1405dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug;
1406dd291d77SDamien Le Moal 	gfp_t gfp_mask = GFP_NOIO;
1407dd291d77SDamien Le Moal 	unsigned long flags;
1408dd291d77SDamien Le Moal 
1409dd291d77SDamien Le Moal 	/*
1410dd291d77SDamien Le Moal 	 * BIOs must be fully contained within a zone so that we use the correct
1411dd291d77SDamien Le Moal 	 * zone write plug for the entire BIO. For blk-mq devices, the block
1412dd291d77SDamien Le Moal 	 * layer should already have done any splitting required to ensure this
1413dd291d77SDamien Le Moal 	 * and this BIO should thus not be straddling zone boundaries. For
1414dd291d77SDamien Le Moal 	 * BIO-based devices, it is the responsibility of the driver to split
1415dd291d77SDamien Le Moal 	 * the bio before submitting it.
1416dd291d77SDamien Le Moal 	 */
1417dd291d77SDamien Le Moal 	if (WARN_ON_ONCE(bio_straddles_zones(bio))) {
1418dd291d77SDamien Le Moal 		bio_io_error(bio);
1419dd291d77SDamien Le Moal 		return true;
1420dd291d77SDamien Le Moal 	}
1421dd291d77SDamien Le Moal 
1422dd291d77SDamien Le Moal 	/* Conventional zones do not need write plugging. */
1423f3d9bf05SDamien Le Moal 	if (!bdev_zone_is_seq(bio->bi_bdev, sector)) {
14249b1ce7f0SDamien Le Moal 		/* Zone append to conventional zones is not allowed. */
14259b1ce7f0SDamien Le Moal 		if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
14269b1ce7f0SDamien Le Moal 			bio_io_error(bio);
14279b1ce7f0SDamien Le Moal 			return true;
14289b1ce7f0SDamien Le Moal 		}
1429dd291d77SDamien Le Moal 		return false;
14309b1ce7f0SDamien Le Moal 	}
1431dd291d77SDamien Le Moal 
1432dd291d77SDamien Le Moal 	if (bio->bi_opf & REQ_NOWAIT)
1433dd291d77SDamien Le Moal 		gfp_mask = GFP_NOWAIT;
1434dd291d77SDamien Le Moal 
14351084e41dSDamien Le Moal 	zwplug = disk_get_or_alloc_zone_wplug(disk, sector, gfp_mask);
1436dd291d77SDamien Le Moal 	if (!zwplug) {
1437cae00567SDamien Le Moal 		if (bio->bi_opf & REQ_NOWAIT)
1438cae00567SDamien Le Moal 			bio_wouldblock_error(bio);
1439cae00567SDamien Le Moal 		else
1440dd291d77SDamien Le Moal 			bio_io_error(bio);
1441dd291d77SDamien Le Moal 		return true;
1442dd291d77SDamien Le Moal 	}
1443dd291d77SDamien Le Moal 
14441084e41dSDamien Le Moal 	spin_lock_irqsave(&zwplug->lock, flags);
14451084e41dSDamien Le Moal 
1446b7d4ffb5SDamien Le Moal 	/*
1447b7d4ffb5SDamien Le Moal 	 * If we got a zone write plug marked as dead, then the user is issuing
1448b7d4ffb5SDamien Le Moal 	 * writes to a full zone, or without synchronizing with zone reset or
1449b7d4ffb5SDamien Le Moal 	 * zone finish operations. In such case, fail the BIO to signal this
1450b7d4ffb5SDamien Le Moal 	 * invalid usage.
1451b7d4ffb5SDamien Le Moal 	 */
1452b7d4ffb5SDamien Le Moal 	if (zwplug->flags & BLK_ZONE_WPLUG_DEAD) {
1453b7d4ffb5SDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
1454b7d4ffb5SDamien Le Moal 		disk_put_zone_wplug(zwplug);
1455b7d4ffb5SDamien Le Moal 		bio_io_error(bio);
1456b7d4ffb5SDamien Le Moal 		return true;
1457b7d4ffb5SDamien Le Moal 	}
1458b7d4ffb5SDamien Le Moal 
1459dd291d77SDamien Le Moal 	/* Indicate that this BIO is being handled using zone write plugging. */
1460dd291d77SDamien Le Moal 	bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
1461dd291d77SDamien Le Moal 
1462dd291d77SDamien Le Moal 	/*
1463f2333391SBart Van Assche 	 * Add REQ_NOWAIT BIOs to the plug list to ensure that we will not see a
1464f2333391SBart Van Assche 	 * BLK_STS_AGAIN failure if we let the caller submit the BIO.
1465dd291d77SDamien Le Moal 	 */
1466f2333391SBart Van Assche 	if (bio->bi_opf & REQ_NOWAIT) {
1467f2333391SBart Van Assche 		bio->bi_opf &= ~REQ_NOWAIT;
1468f2333391SBart Van Assche 		goto queue_bio;
1469f2333391SBart Van Assche 	}
1470f2333391SBart Van Assche 
14711365b690SDamien Le Moal 	/*
14721365b690SDamien Le Moal 	 * For rotational devices, we will use the gendisk zone write plugs
14731365b690SDamien Le Moal 	 * work instead of the per zone write plug BIO work, so queue the BIO.
14741365b690SDamien Le Moal 	 */
14751365b690SDamien Le Moal 	if (blk_queue_zoned_qd1_writes(disk->queue))
14761365b690SDamien Le Moal 		goto queue_bio;
14771365b690SDamien Le Moal 
1478f2333391SBart Van Assche 	/* If the zone is already plugged, add the BIO to the BIO plug list. */
1479f2333391SBart Van Assche 	if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)
1480f2333391SBart Van Assche 		goto queue_bio;
1481dd291d77SDamien Le Moal 
1482fe0418ebSDamien Le Moal 	if (!blk_zone_wplug_prepare_bio(zwplug, bio)) {
1483fe0418ebSDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
1484fe0418ebSDamien Le Moal 		bio_io_error(bio);
1485fe0418ebSDamien Le Moal 		return true;
1486fe0418ebSDamien Le Moal 	}
1487dd291d77SDamien Le Moal 
1488f2333391SBart Van Assche 	/* Otherwise, plug and let the caller submit the BIO. */
1489dd291d77SDamien Le Moal 	zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
1490dd291d77SDamien Le Moal 
1491dd291d77SDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
1492dd291d77SDamien Le Moal 
1493dd291d77SDamien Le Moal 	return false;
1494dd291d77SDamien Le Moal 
1495f2333391SBart Van Assche queue_bio:
1496cae00567SDamien Le Moal 	disk_zone_wplug_add_bio(disk, zwplug, bio, nr_segs);
1497dd291d77SDamien Le Moal 
1498f2333391SBart Van Assche 	if (!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)) {
1499f2333391SBart Van Assche 		zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
15001365b690SDamien Le Moal 		if (blk_queue_zoned_qd1_writes(disk->queue))
15011365b690SDamien Le Moal 			wake_up_process(disk->zone_wplugs_worker);
15021365b690SDamien Le Moal 		else
15031365b690SDamien Le Moal 			disk_zone_wplug_schedule_work(disk, zwplug);
1504f2333391SBart Van Assche 	}
1505f2333391SBart Van Assche 
1506dd291d77SDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
1507dd291d77SDamien Le Moal 
1508dd291d77SDamien Le Moal 	return true;
1509dd291d77SDamien Le Moal }
1510dd291d77SDamien Le Moal 
blk_zone_wplug_handle_native_zone_append(struct bio * bio)1511a6aa36e9SDamien Le Moal static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
1512a6aa36e9SDamien Le Moal {
1513a6aa36e9SDamien Le Moal 	struct gendisk *disk = bio->bi_bdev->bd_disk;
1514a6aa36e9SDamien Le Moal 	struct blk_zone_wplug *zwplug;
1515a6aa36e9SDamien Le Moal 	unsigned long flags;
1516a6aa36e9SDamien Le Moal 
151715638d52SChristoph Hellwig 	if (!test_bit(GD_ZONE_APPEND_USED, &disk->state))
151815638d52SChristoph Hellwig 		set_bit(GD_ZONE_APPEND_USED, &disk->state);
151915638d52SChristoph Hellwig 
1520a6aa36e9SDamien Le Moal 	/*
1521a6aa36e9SDamien Le Moal 	 * We have native support for zone append operations, so we are not
1522a6aa36e9SDamien Le Moal 	 * going to handle @bio through plugging. However, we may already have a
1523a6aa36e9SDamien Le Moal 	 * zone write plug for the target zone if that zone was previously
1524a6aa36e9SDamien Le Moal 	 * partially written using regular writes. In such case, we risk leaving
1525a6aa36e9SDamien Le Moal 	 * the plug in the disk hash table if the zone is fully written using
1526a6aa36e9SDamien Le Moal 	 * zone append operations. Avoid this by removing the zone write plug.
1527a6aa36e9SDamien Le Moal 	 */
1528a6aa36e9SDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
1529a6aa36e9SDamien Le Moal 	if (likely(!zwplug))
1530a6aa36e9SDamien Le Moal 		return;
1531a6aa36e9SDamien Le Moal 
1532a6aa36e9SDamien Le Moal 	spin_lock_irqsave(&zwplug->lock, flags);
1533a6aa36e9SDamien Le Moal 
1534a6aa36e9SDamien Le Moal 	/*
1535a6aa36e9SDamien Le Moal 	 * We are about to remove the zone write plug. But if the user
1536a6aa36e9SDamien Le Moal 	 * (mistakenly) has issued regular writes together with native zone
1537a6aa36e9SDamien Le Moal 	 * append, we must aborts the writes as otherwise the plugged BIOs would
1538a6aa36e9SDamien Le Moal 	 * not be executed by the plug BIO work as disk_get_zone_wplug() will
1539a6aa36e9SDamien Le Moal 	 * return NULL after the plug is removed. Aborting the plugged write
1540a6aa36e9SDamien Le Moal 	 * BIOs is consistent with the fact that these writes will most likely
1541a6aa36e9SDamien Le Moal 	 * fail anyway as there is no ordering guarantees between zone append
1542a6aa36e9SDamien Le Moal 	 * operations and regular write operations.
1543a6aa36e9SDamien Le Moal 	 */
1544a6aa36e9SDamien Le Moal 	if (!bio_list_empty(&zwplug->bio_list)) {
1545a6aa36e9SDamien Le Moal 		pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n",
1546a6aa36e9SDamien Le Moal 				    disk->disk_name, zwplug->zone_no);
1547a6aa36e9SDamien Le Moal 		disk_zone_wplug_abort(zwplug);
1548a6aa36e9SDamien Le Moal 	}
1549b7d4ffb5SDamien Le Moal 	disk_mark_zone_wplug_dead(zwplug);
1550a6aa36e9SDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
1551a6aa36e9SDamien Le Moal 
1552a6aa36e9SDamien Le Moal 	disk_put_zone_wplug(zwplug);
1553a6aa36e9SDamien Le Moal }
1554a6aa36e9SDamien Le Moal 
blk_zone_wplug_handle_zone_mgmt(struct bio * bio)1555efae226cSDamien Le Moal static bool blk_zone_wplug_handle_zone_mgmt(struct bio *bio)
1556efae226cSDamien Le Moal {
1557efae226cSDamien Le Moal 	if (bio_op(bio) != REQ_OP_ZONE_RESET_ALL &&
1558efae226cSDamien Le Moal 	    !bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) {
1559efae226cSDamien Le Moal 		/*
1560efae226cSDamien Le Moal 		 * Zone reset and zone finish operations do not apply to
1561efae226cSDamien Le Moal 		 * conventional zones.
1562efae226cSDamien Le Moal 		 */
1563efae226cSDamien Le Moal 		bio_io_error(bio);
1564efae226cSDamien Le Moal 		return true;
1565efae226cSDamien Le Moal 	}
1566efae226cSDamien Le Moal 
1567efae226cSDamien Le Moal 	/*
1568efae226cSDamien Le Moal 	 * No-wait zone management BIOs do not make much sense as the callers
1569efae226cSDamien Le Moal 	 * issue these as blocking operations in most cases. To avoid issues
1570efae226cSDamien Le Moal 	 * with the BIO execution potentially failing with BLK_STS_AGAIN, warn
1571efae226cSDamien Le Moal 	 * about REQ_NOWAIT being set and ignore that flag.
1572efae226cSDamien Le Moal 	 */
1573efae226cSDamien Le Moal 	if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT))
1574efae226cSDamien Le Moal 		bio->bi_opf &= ~REQ_NOWAIT;
1575efae226cSDamien Le Moal 
1576efae226cSDamien Le Moal 	return false;
1577efae226cSDamien Le Moal }
1578efae226cSDamien Le Moal 
1579dd291d77SDamien Le Moal /**
1580dd291d77SDamien Le Moal  * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
1581dd291d77SDamien Le Moal  * @bio: The BIO being submitted
1582dd291d77SDamien Le Moal  * @nr_segs: The number of physical segments of @bio
1583dd291d77SDamien Le Moal  *
15849b1ce7f0SDamien Le Moal  * Handle write, write zeroes and zone append operations requiring emulation
15859b1ce7f0SDamien Le Moal  * using zone write plugging.
1586dd291d77SDamien Le Moal  *
1587dd291d77SDamien Le Moal  * Return true whenever @bio execution needs to be delayed through the zone
1588dd291d77SDamien Le Moal  * write plug. Otherwise, return false to let the submission path process
1589dd291d77SDamien Le Moal  * @bio normally.
1590dd291d77SDamien Le Moal  */
blk_zone_plug_bio(struct bio * bio,unsigned int nr_segs)1591dd291d77SDamien Le Moal bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
1592dd291d77SDamien Le Moal {
1593dd291d77SDamien Le Moal 	struct block_device *bdev = bio->bi_bdev;
1594dd291d77SDamien Le Moal 
1595f7029141SDamien Le Moal 	if (WARN_ON_ONCE(!bdev->bd_disk->zone_wplugs_hash))
1596dd291d77SDamien Le Moal 		return false;
1597dd291d77SDamien Le Moal 
1598dd291d77SDamien Le Moal 	/*
1599dd291d77SDamien Le Moal 	 * Regular writes and write zeroes need to be handled through the target
1600dd291d77SDamien Le Moal 	 * zone write plug. This includes writes with REQ_FUA | REQ_PREFLUSH
1601dd291d77SDamien Le Moal 	 * which may need to go through the flush machinery depending on the
1602dd291d77SDamien Le Moal 	 * target device capabilities. Plugging such writes is fine as the flush
1603dd291d77SDamien Le Moal 	 * machinery operates at the request level, below the plug, and
1604dd291d77SDamien Le Moal 	 * completion of the flush sequence will go through the regular BIO
1605dd291d77SDamien Le Moal 	 * completion, which will handle zone write plugging.
16069b1ce7f0SDamien Le Moal 	 * Zone append operations for devices that requested emulation must
16079b1ce7f0SDamien Le Moal 	 * also be plugged so that these BIOs can be changed into regular
16089b1ce7f0SDamien Le Moal 	 * write BIOs.
1609dd291d77SDamien Le Moal 	 * Zone reset, reset all and finish commands need special treatment
1610dd291d77SDamien Le Moal 	 * to correctly track the write pointer offset of zones. These commands
1611dd291d77SDamien Le Moal 	 * are not plugged as we do not need serialization with write
1612dd291d77SDamien Le Moal 	 * operations. It is the responsibility of the user to not issue reset
1613dd291d77SDamien Le Moal 	 * and finish commands when write operations are in flight.
1614dd291d77SDamien Le Moal 	 */
1615dd291d77SDamien Le Moal 	switch (bio_op(bio)) {
16169b1ce7f0SDamien Le Moal 	case REQ_OP_ZONE_APPEND:
1617a6aa36e9SDamien Le Moal 		if (!bdev_emulates_zone_append(bdev)) {
1618a6aa36e9SDamien Le Moal 			blk_zone_wplug_handle_native_zone_append(bio);
16199b1ce7f0SDamien Le Moal 			return false;
1620a6aa36e9SDamien Le Moal 		}
16219b1ce7f0SDamien Le Moal 		fallthrough;
1622dd291d77SDamien Le Moal 	case REQ_OP_WRITE:
1623dd291d77SDamien Le Moal 	case REQ_OP_WRITE_ZEROES:
1624dd291d77SDamien Le Moal 		return blk_zone_wplug_handle_write(bio, nr_segs);
1625dd291d77SDamien Le Moal 	case REQ_OP_ZONE_RESET:
1626dd291d77SDamien Le Moal 	case REQ_OP_ZONE_FINISH:
1627dd291d77SDamien Le Moal 	case REQ_OP_ZONE_RESET_ALL:
1628efae226cSDamien Le Moal 		return blk_zone_wplug_handle_zone_mgmt(bio);
1629dd291d77SDamien Le Moal 	default:
1630dd291d77SDamien Le Moal 		return false;
1631dd291d77SDamien Le Moal 	}
1632dd291d77SDamien Le Moal 
1633dd291d77SDamien Le Moal 	return false;
1634dd291d77SDamien Le Moal }
1635dd291d77SDamien Le Moal EXPORT_SYMBOL_GPL(blk_zone_plug_bio);
1636dd291d77SDamien Le Moal 
disk_zone_wplug_unplug_bio(struct gendisk * disk,struct blk_zone_wplug * zwplug)1637dd291d77SDamien Le Moal static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
1638dd291d77SDamien Le Moal 				       struct blk_zone_wplug *zwplug)
1639dd291d77SDamien Le Moal {
1640dd291d77SDamien Le Moal 	unsigned long flags;
1641dd291d77SDamien Le Moal 
1642dd291d77SDamien Le Moal 	spin_lock_irqsave(&zwplug->lock, flags);
1643dd291d77SDamien Le Moal 
16441365b690SDamien Le Moal 	/*
16451365b690SDamien Le Moal 	 * For rotational devices, signal the BIO completion to the zone write
16461365b690SDamien Le Moal 	 * plug work. Otherwise, schedule submission of the next plugged BIO
16471365b690SDamien Le Moal 	 * if we have one.
16481365b690SDamien Le Moal 	 */
16491365b690SDamien Le Moal 	if (bio_list_empty(&zwplug->bio_list))
1650dd291d77SDamien Le Moal 		zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
16511365b690SDamien Le Moal 
16521365b690SDamien Le Moal 	if (blk_queue_zoned_qd1_writes(disk->queue))
16531365b690SDamien Le Moal 		complete(&disk->zone_wplugs_worker_bio_done);
16541365b690SDamien Le Moal 	else if (!bio_list_empty(&zwplug->bio_list))
16551365b690SDamien Le Moal 		disk_zone_wplug_schedule_work(disk, zwplug);
16561365b690SDamien Le Moal 
1657b7d4ffb5SDamien Le Moal 	if (!zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug))
1658b7d4ffb5SDamien Le Moal 		disk_mark_zone_wplug_dead(zwplug);
16591365b690SDamien Le Moal 
1660dd291d77SDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
1661dd291d77SDamien Le Moal }
1662dd291d77SDamien Le Moal 
blk_zone_append_update_request_bio(struct request * rq,struct bio * bio)16635022dae7SJohannes Thumshirn void blk_zone_append_update_request_bio(struct request *rq, struct bio *bio)
16645022dae7SJohannes Thumshirn {
16655022dae7SJohannes Thumshirn 	/*
16665022dae7SJohannes Thumshirn 	 * For zone append requests, the request sector indicates the location
16675022dae7SJohannes Thumshirn 	 * at which the BIO data was written. Return this value to the BIO
16685022dae7SJohannes Thumshirn 	 * issuer through the BIO iter sector.
16695022dae7SJohannes Thumshirn 	 * For plugged zone writes, which include emulated zone append, we need
16705022dae7SJohannes Thumshirn 	 * the original BIO sector so that blk_zone_write_plug_bio_endio() can
16715022dae7SJohannes Thumshirn 	 * lookup the zone write plug.
16725022dae7SJohannes Thumshirn 	 */
16735022dae7SJohannes Thumshirn 	bio->bi_iter.bi_sector = rq->__sector;
16744cc21a00SJohannes Thumshirn 	trace_blk_zone_append_update_request_bio(rq);
16755022dae7SJohannes Thumshirn }
16765022dae7SJohannes Thumshirn 
blk_zone_write_plug_bio_endio(struct bio * bio)1677dd291d77SDamien Le Moal void blk_zone_write_plug_bio_endio(struct bio *bio)
1678dd291d77SDamien Le Moal {
1679dd291d77SDamien Le Moal 	struct gendisk *disk = bio->bi_bdev->bd_disk;
1680dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug =
1681b5a64ec2SDamien Le Moal 		disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
1682dd291d77SDamien Le Moal 	unsigned long flags;
1683dd291d77SDamien Le Moal 
1684dd291d77SDamien Le Moal 	if (WARN_ON_ONCE(!zwplug))
1685dd291d77SDamien Le Moal 		return;
1686dd291d77SDamien Le Moal 
1687dd291d77SDamien Le Moal 	/* Make sure we do not see this BIO again by clearing the plug flag. */
1688dd291d77SDamien Le Moal 	bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING);
1689dd291d77SDamien Le Moal 
1690dd291d77SDamien Le Moal 	/*
16919b1ce7f0SDamien Le Moal 	 * If this is a regular write emulating a zone append operation,
16929b1ce7f0SDamien Le Moal 	 * restore the original operation code.
16939b1ce7f0SDamien Le Moal 	 */
16949b1ce7f0SDamien Le Moal 	if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) {
16959b1ce7f0SDamien Le Moal 		bio->bi_opf &= ~REQ_OP_MASK;
16969b1ce7f0SDamien Le Moal 		bio->bi_opf |= REQ_OP_ZONE_APPEND;
1697f705d33cSDamien Le Moal 		bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND);
16989b1ce7f0SDamien Le Moal 	}
16999b1ce7f0SDamien Le Moal 
17009b1ce7f0SDamien Le Moal 	/*
1701fe0418ebSDamien Le Moal 	 * If the BIO failed, abort all plugged BIOs and mark the plug as
1702fe0418ebSDamien Le Moal 	 * needing a write pointer update.
1703dd291d77SDamien Le Moal 	 */
1704dd291d77SDamien Le Moal 	if (bio->bi_status != BLK_STS_OK) {
1705dd291d77SDamien Le Moal 		spin_lock_irqsave(&zwplug->lock, flags);
1706fe0418ebSDamien Le Moal 		disk_zone_wplug_abort(zwplug);
1707fe0418ebSDamien Le Moal 		zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE;
1708dd291d77SDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
1709dd291d77SDamien Le Moal 	}
1710dd291d77SDamien Le Moal 
17117b295187SDamien Le Moal 	/* Drop the reference we took when the BIO was issued. */
17127b295187SDamien Le Moal 	disk_put_zone_wplug(zwplug);
17137b295187SDamien Le Moal 
1714dd291d77SDamien Le Moal 	/*
1715347bde9dSDamien Le Moal 	 * For BIO-based devices, blk_zone_write_plug_finish_request()
1716dd291d77SDamien Le Moal 	 * is not called. So we need to schedule execution of the next
1717dd291d77SDamien Le Moal 	 * plugged BIO here.
1718dd291d77SDamien Le Moal 	 */
17193413efa8SLinus Torvalds 	if (bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO))
1720dd291d77SDamien Le Moal 		disk_zone_wplug_unplug_bio(disk, zwplug);
1721dd291d77SDamien Le Moal 
17227b295187SDamien Le Moal 	/* Drop the reference we took when entering this function. */
1723dd291d77SDamien Le Moal 	disk_put_zone_wplug(zwplug);
1724dd291d77SDamien Le Moal }
1725dd291d77SDamien Le Moal 
blk_zone_write_plug_finish_request(struct request * req)1726347bde9dSDamien Le Moal void blk_zone_write_plug_finish_request(struct request *req)
1727dd291d77SDamien Le Moal {
1728dd291d77SDamien Le Moal 	struct gendisk *disk = req->q->disk;
1729347bde9dSDamien Le Moal 	struct blk_zone_wplug *zwplug;
1730dd291d77SDamien Le Moal 
1731347bde9dSDamien Le Moal 	zwplug = disk_get_zone_wplug(disk, req->__sector);
1732dd291d77SDamien Le Moal 	if (WARN_ON_ONCE(!zwplug))
1733dd291d77SDamien Le Moal 		return;
1734dd291d77SDamien Le Moal 
1735dd291d77SDamien Le Moal 	req->rq_flags &= ~RQF_ZONE_WRITE_PLUGGING;
1736dd291d77SDamien Le Moal 
1737dd291d77SDamien Le Moal 	/*
1738dd291d77SDamien Le Moal 	 * Drop the reference we took when the request was initialized in
1739096bc7eaSDamien Le Moal 	 * blk_zone_write_plug_init_request().
1740dd291d77SDamien Le Moal 	 */
17417b295187SDamien Le Moal 	disk_put_zone_wplug(zwplug);
17427b295187SDamien Le Moal 
17437b295187SDamien Le Moal 	disk_zone_wplug_unplug_bio(disk, zwplug);
17447b295187SDamien Le Moal 
17457b295187SDamien Le Moal 	/* Drop the reference we took when entering this function. */
1746dd291d77SDamien Le Moal 	disk_put_zone_wplug(zwplug);
1747dd291d77SDamien Le Moal }
1748dd291d77SDamien Le Moal 
disk_zone_wplug_submit_bio(struct gendisk * disk,struct blk_zone_wplug * zwplug)17491365b690SDamien Le Moal static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
17501365b690SDamien Le Moal 				       struct blk_zone_wplug *zwplug)
1751dd291d77SDamien Le Moal {
1752dd291d77SDamien Le Moal 	struct block_device *bdev;
1753dd291d77SDamien Le Moal 	unsigned long flags;
1754dd291d77SDamien Le Moal 	struct bio *bio;
1755198f36f9SBart Van Assche 	bool prepared;
1756dd291d77SDamien Le Moal 
1757dd291d77SDamien Le Moal 	/*
1758dd291d77SDamien Le Moal 	 * Submit the next plugged BIO. If we do not have any, clear
1759dd291d77SDamien Le Moal 	 * the plugged flag.
1760dd291d77SDamien Le Moal 	 */
1761fe0418ebSDamien Le Moal again:
1762198f36f9SBart Van Assche 	spin_lock_irqsave(&zwplug->lock, flags);
1763dd291d77SDamien Le Moal 	bio = bio_list_pop(&zwplug->bio_list);
1764dd291d77SDamien Le Moal 	if (!bio) {
1765dd291d77SDamien Le Moal 		zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
1766dd291d77SDamien Le Moal 		spin_unlock_irqrestore(&zwplug->lock, flags);
17671365b690SDamien Le Moal 		return false;
1768dd291d77SDamien Le Moal 	}
1769dd291d77SDamien Le Moal 
17702e92ac61SJohannes Thumshirn 	trace_blk_zone_wplug_bio(zwplug->disk->queue, zwplug->zone_no,
17712e92ac61SJohannes Thumshirn 				 bio->bi_iter.bi_sector, bio_sectors(bio));
17722e92ac61SJohannes Thumshirn 
1773198f36f9SBart Van Assche 	prepared = blk_zone_wplug_prepare_bio(zwplug, bio);
1774198f36f9SBart Van Assche 	spin_unlock_irqrestore(&zwplug->lock, flags);
1775198f36f9SBart Van Assche 
1776198f36f9SBart Van Assche 	if (!prepared) {
1777fe0418ebSDamien Le Moal 		blk_zone_wplug_bio_io_error(zwplug, bio);
1778fe0418ebSDamien Le Moal 		goto again;
1779dd291d77SDamien Le Moal 	}
1780dd291d77SDamien Le Moal 
1781dd291d77SDamien Le Moal 	/*
1782dd291d77SDamien Le Moal 	 * blk-mq devices will reuse the extra reference on the request queue
1783dd291d77SDamien Le Moal 	 * usage counter we took when the BIO was plugged, but the submission
1784dd291d77SDamien Le Moal 	 * path for BIO-based devices will not do that. So drop this extra
1785dd291d77SDamien Le Moal 	 * reference here.
1786dd291d77SDamien Le Moal 	 */
17871365b690SDamien Le Moal 	if (blk_queue_zoned_qd1_writes(disk->queue))
17881365b690SDamien Le Moal 		reinit_completion(&disk->zone_wplugs_worker_bio_done);
17891365b690SDamien Le Moal 	bdev = bio->bi_bdev;
1790cf625013SChristoph Hellwig 	if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) {
1791cf625013SChristoph Hellwig 		bdev->bd_disk->fops->submit_bio(bio);
1792dd291d77SDamien Le Moal 		blk_queue_exit(bdev->bd_disk->queue);
1793cf625013SChristoph Hellwig 	} else {
1794cf625013SChristoph Hellwig 		blk_mq_submit_bio(bio);
1795cf625013SChristoph Hellwig 	}
17969e78c38aSDamien Le Moal 
17971365b690SDamien Le Moal 	return true;
17981365b690SDamien Le Moal }
17991365b690SDamien Le Moal 
disk_get_zone_wplugs_work(struct gendisk * disk)18001365b690SDamien Le Moal static struct blk_zone_wplug *disk_get_zone_wplugs_work(struct gendisk *disk)
18011365b690SDamien Le Moal {
18021365b690SDamien Le Moal 	struct blk_zone_wplug *zwplug;
18031365b690SDamien Le Moal 
18041365b690SDamien Le Moal 	spin_lock_irq(&disk->zone_wplugs_list_lock);
18051365b690SDamien Le Moal 	zwplug = list_first_entry_or_null(&disk->zone_wplugs_list,
18061365b690SDamien Le Moal 					  struct blk_zone_wplug, entry);
18071365b690SDamien Le Moal 	if (zwplug)
18081365b690SDamien Le Moal 		list_del_init(&zwplug->entry);
18091365b690SDamien Le Moal 	spin_unlock_irq(&disk->zone_wplugs_list_lock);
18101365b690SDamien Le Moal 
18111365b690SDamien Le Moal 	return zwplug;
18121365b690SDamien Le Moal }
18131365b690SDamien Le Moal 
disk_zone_wplugs_worker(void * data)18141365b690SDamien Le Moal static int disk_zone_wplugs_worker(void *data)
18151365b690SDamien Le Moal {
18161365b690SDamien Le Moal 	struct gendisk *disk = data;
18171365b690SDamien Le Moal 	struct blk_zone_wplug *zwplug;
18181365b690SDamien Le Moal 	unsigned int noio_flag;
18191365b690SDamien Le Moal 
18201365b690SDamien Le Moal 	noio_flag = memalloc_noio_save();
18211365b690SDamien Le Moal 	set_user_nice(current, MIN_NICE);
18221365b690SDamien Le Moal 	set_freezable();
18231365b690SDamien Le Moal 
18241365b690SDamien Le Moal 	for (;;) {
18251365b690SDamien Le Moal 		set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
18261365b690SDamien Le Moal 
18271365b690SDamien Le Moal 		zwplug = disk_get_zone_wplugs_work(disk);
18281365b690SDamien Le Moal 		if (zwplug) {
18291365b690SDamien Le Moal 			/*
18301365b690SDamien Le Moal 			 * Process all BIOs of this zone write plug and then
18311365b690SDamien Le Moal 			 * drop the reference we took when adding the zone write
18321365b690SDamien Le Moal 			 * plug to the active list.
18331365b690SDamien Le Moal 			 */
18341365b690SDamien Le Moal 			set_current_state(TASK_RUNNING);
18351365b690SDamien Le Moal 			while (disk_zone_wplug_submit_bio(disk, zwplug))
18361365b690SDamien Le Moal 				blk_wait_io(&disk->zone_wplugs_worker_bio_done);
18379e78c38aSDamien Le Moal 			disk_put_zone_wplug(zwplug);
18381365b690SDamien Le Moal 			continue;
18391365b690SDamien Le Moal 		}
18401365b690SDamien Le Moal 
18411365b690SDamien Le Moal 		/*
18421365b690SDamien Le Moal 		 * Only sleep if nothing sets the state to running. Else check
18431365b690SDamien Le Moal 		 * for zone write plugs work again as a newly submitted BIO
18441365b690SDamien Le Moal 		 * might have added a zone write plug to the work list.
18451365b690SDamien Le Moal 		 */
18461365b690SDamien Le Moal 		if (get_current_state() == TASK_RUNNING) {
18471365b690SDamien Le Moal 			try_to_freeze();
18481365b690SDamien Le Moal 		} else {
18491365b690SDamien Le Moal 			if (kthread_should_stop()) {
18501365b690SDamien Le Moal 				set_current_state(TASK_RUNNING);
18511365b690SDamien Le Moal 				break;
18521365b690SDamien Le Moal 			}
18531365b690SDamien Le Moal 			schedule();
18541365b690SDamien Le Moal 		}
18551365b690SDamien Le Moal 	}
18561365b690SDamien Le Moal 
18571365b690SDamien Le Moal 	WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
18581365b690SDamien Le Moal 	memalloc_noio_restore(noio_flag);
18591365b690SDamien Le Moal 
18601365b690SDamien Le Moal 	return 0;
1861dd291d77SDamien Le Moal }
1862dd291d77SDamien Le Moal 
disk_init_zone_resources(struct gendisk * disk)1863dd291d77SDamien Le Moal void disk_init_zone_resources(struct gendisk *disk)
1864dd291d77SDamien Le Moal {
1865b7cbc30eSDamien Le Moal 	spin_lock_init(&disk->zone_wplugs_hash_lock);
18661365b690SDamien Le Moal 	spin_lock_init(&disk->zone_wplugs_list_lock);
18671365b690SDamien Le Moal 	INIT_LIST_HEAD(&disk->zone_wplugs_list);
18681365b690SDamien Le Moal 	init_completion(&disk->zone_wplugs_worker_bio_done);
1869dd291d77SDamien Le Moal }
1870dd291d77SDamien Le Moal 
1871dd291d77SDamien Le Moal /*
1872dd291d77SDamien Le Moal  * For the size of a disk zone write plug hash table, use the size of the
1873dd291d77SDamien Le Moal  * zone write plug mempool, which is the maximum of the disk open zones and
1874dd291d77SDamien Le Moal  * active zones limits. But do not exceed 4KB (512 hlist head entries), that is,
1875dd291d77SDamien Le Moal  * 9 bits. For a disk that has no limits, mempool size defaults to 128.
1876dd291d77SDamien Le Moal  */
1877dd291d77SDamien Le Moal #define BLK_ZONE_WPLUG_MAX_HASH_BITS		9
1878dd291d77SDamien Le Moal #define BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE	128
1879dd291d77SDamien Le Moal 
disk_alloc_zone_resources(struct gendisk * disk,unsigned int pool_size)1880dd291d77SDamien Le Moal static int disk_alloc_zone_resources(struct gendisk *disk,
1881dd291d77SDamien Le Moal 				     unsigned int pool_size)
1882dd291d77SDamien Le Moal {
1883dd291d77SDamien Le Moal 	unsigned int i;
18841365b690SDamien Le Moal 	int ret = -ENOMEM;
1885dd291d77SDamien Le Moal 
1886a6aa36e9SDamien Le Moal 	atomic_set(&disk->nr_zone_wplugs, 0);
1887dd291d77SDamien Le Moal 	disk->zone_wplugs_hash_bits =
1888dd291d77SDamien Le Moal 		min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS);
1889dd291d77SDamien Le Moal 
1890dd291d77SDamien Le Moal 	disk->zone_wplugs_hash =
189169050f8dSKees Cook 		kzalloc_objs(struct hlist_head,
1892189f164eSKees Cook 			     disk_zone_wplugs_hash_size(disk));
1893dd291d77SDamien Le Moal 	if (!disk->zone_wplugs_hash)
1894dd291d77SDamien Le Moal 		return -ENOMEM;
1895dd291d77SDamien Le Moal 
1896dd291d77SDamien Le Moal 	for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++)
1897dd291d77SDamien Le Moal 		INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]);
1898dd291d77SDamien Le Moal 
1899dd291d77SDamien Le Moal 	disk->zone_wplugs_pool = mempool_create_kmalloc_pool(pool_size,
1900dd291d77SDamien Le Moal 						sizeof(struct blk_zone_wplug));
1901a8f59e5aSDamien Le Moal 	if (!disk->zone_wplugs_pool)
1902a8f59e5aSDamien Le Moal 		goto free_hash;
1903a8f59e5aSDamien Le Moal 
1904a8f59e5aSDamien Le Moal 	disk->zone_wplugs_wq =
1905a8f59e5aSDamien Le Moal 		alloc_workqueue("%s_zwplugs", WQ_MEM_RECLAIM | WQ_HIGHPRI,
1906a8f59e5aSDamien Le Moal 				pool_size, disk->disk_name);
1907a8f59e5aSDamien Le Moal 	if (!disk->zone_wplugs_wq)
1908a8f59e5aSDamien Le Moal 		goto destroy_pool;
1909a8f59e5aSDamien Le Moal 
19101365b690SDamien Le Moal 	disk->zone_wplugs_worker =
19111365b690SDamien Le Moal 		kthread_create(disk_zone_wplugs_worker, disk,
19121365b690SDamien Le Moal 			       "%s_zwplugs_worker", disk->disk_name);
19131365b690SDamien Le Moal 	if (IS_ERR(disk->zone_wplugs_worker)) {
19141365b690SDamien Le Moal 		ret = PTR_ERR(disk->zone_wplugs_worker);
19151365b690SDamien Le Moal 		disk->zone_wplugs_worker = NULL;
19161365b690SDamien Le Moal 		goto destroy_wq;
19171365b690SDamien Le Moal 	}
19181365b690SDamien Le Moal 	wake_up_process(disk->zone_wplugs_worker);
19191365b690SDamien Le Moal 
1920a8f59e5aSDamien Le Moal 	return 0;
1921a8f59e5aSDamien Le Moal 
19221365b690SDamien Le Moal destroy_wq:
19231365b690SDamien Le Moal 	destroy_workqueue(disk->zone_wplugs_wq);
19241365b690SDamien Le Moal 	disk->zone_wplugs_wq = NULL;
1925a8f59e5aSDamien Le Moal destroy_pool:
1926a8f59e5aSDamien Le Moal 	mempool_destroy(disk->zone_wplugs_pool);
1927a8f59e5aSDamien Le Moal 	disk->zone_wplugs_pool = NULL;
1928a8f59e5aSDamien Le Moal free_hash:
1929dd291d77SDamien Le Moal 	kfree(disk->zone_wplugs_hash);
1930dd291d77SDamien Le Moal 	disk->zone_wplugs_hash = NULL;
1931dd291d77SDamien Le Moal 	disk->zone_wplugs_hash_bits = 0;
19321365b690SDamien Le Moal 	return ret;
1933dd291d77SDamien Le Moal }
1934dd291d77SDamien Le Moal 
disk_destroy_zone_wplugs_hash_table(struct gendisk * disk)1935dd291d77SDamien Le Moal static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
1936dd291d77SDamien Le Moal {
1937dd291d77SDamien Le Moal 	struct blk_zone_wplug *zwplug;
1938dd291d77SDamien Le Moal 	unsigned int i;
1939dd291d77SDamien Le Moal 
1940dd291d77SDamien Le Moal 	if (!disk->zone_wplugs_hash)
1941dd291d77SDamien Le Moal 		return;
1942dd291d77SDamien Le Moal 
1943dd291d77SDamien Le Moal 	/* Free all the zone write plugs we have. */
1944dd291d77SDamien Le Moal 	for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) {
1945dd291d77SDamien Le Moal 		while (!hlist_empty(&disk->zone_wplugs_hash[i])) {
1946dd291d77SDamien Le Moal 			zwplug = hlist_entry(disk->zone_wplugs_hash[i].first,
1947dd291d77SDamien Le Moal 					     struct blk_zone_wplug, node);
1948b7d4ffb5SDamien Le Moal 			spin_lock_irq(&zwplug->lock);
1949b7d4ffb5SDamien Le Moal 			disk_mark_zone_wplug_dead(zwplug);
1950b7d4ffb5SDamien Le Moal 			spin_unlock_irq(&zwplug->lock);
1951dd291d77SDamien Le Moal 		}
1952dd291d77SDamien Le Moal 	}
1953dd291d77SDamien Le Moal 
1954a6aa36e9SDamien Le Moal 	WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs));
1955dd291d77SDamien Le Moal 	kfree(disk->zone_wplugs_hash);
1956dd291d77SDamien Le Moal 	disk->zone_wplugs_hash = NULL;
1957dd291d77SDamien Le Moal 	disk->zone_wplugs_hash_bits = 0;
1958c6886cf6SChristoph Hellwig 
1959c6886cf6SChristoph Hellwig 	/*
1960c6886cf6SChristoph Hellwig 	 * Wait for the zone write plugs to be RCU-freed before destroying the
1961c6886cf6SChristoph Hellwig 	 * mempool.
1962c6886cf6SChristoph Hellwig 	 */
1963c6886cf6SChristoph Hellwig 	rcu_barrier();
1964c6886cf6SChristoph Hellwig 	mempool_destroy(disk->zone_wplugs_pool);
1965c6886cf6SChristoph Hellwig 	disk->zone_wplugs_pool = NULL;
1966dd291d77SDamien Le Moal }
1967dd291d77SDamien Le Moal 
disk_set_zones_cond_array(struct gendisk * disk,u8 * zones_cond)19686e945ffbSDamien Le Moal static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)
1969d7cb6d74SDamien Le Moal {
1970d7cb6d74SDamien Le Moal 	unsigned long flags;
1971d7cb6d74SDamien Le Moal 
1972b7cbc30eSDamien Le Moal 	spin_lock_irqsave(&disk->zone_wplugs_hash_lock, flags);
19736e945ffbSDamien Le Moal 	zones_cond = rcu_replace_pointer(disk->zones_cond, zones_cond,
1974b7cbc30eSDamien Le Moal 				lockdep_is_held(&disk->zone_wplugs_hash_lock));
1975b7cbc30eSDamien Le Moal 	spin_unlock_irqrestore(&disk->zone_wplugs_hash_lock, flags);
1976d7cb6d74SDamien Le Moal 
19776e945ffbSDamien Le Moal 	kfree_rcu_mightsleep(zones_cond);
1978d7cb6d74SDamien Le Moal }
1979d7cb6d74SDamien Le Moal 
disk_free_zone_resources(struct gendisk * disk)1980dd291d77SDamien Le Moal void disk_free_zone_resources(struct gendisk *disk)
1981dd291d77SDamien Le Moal {
19821365b690SDamien Le Moal 	if (disk->zone_wplugs_worker)
19831365b690SDamien Le Moal 		kthread_stop(disk->zone_wplugs_worker);
19841365b690SDamien Le Moal 	WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));
19851365b690SDamien Le Moal 
1986a8f59e5aSDamien Le Moal 	if (disk->zone_wplugs_wq) {
1987a8f59e5aSDamien Le Moal 		destroy_workqueue(disk->zone_wplugs_wq);
1988a8f59e5aSDamien Le Moal 		disk->zone_wplugs_wq = NULL;
1989a8f59e5aSDamien Le Moal 	}
1990a8f59e5aSDamien Le Moal 
1991dd291d77SDamien Le Moal 	disk_destroy_zone_wplugs_hash_table(disk);
1992dd291d77SDamien Le Moal 
19936e945ffbSDamien Le Moal 	disk_set_zones_cond_array(disk, NULL);
1994dd291d77SDamien Le Moal 	disk->zone_capacity = 0;
199529459c3eSDamien Le Moal 	disk->last_zone_capacity = 0;
1996dd291d77SDamien Le Moal 	disk->nr_zones = 0;
1997dd291d77SDamien Le Moal }
1998dd291d77SDamien Le Moal 
19996e945ffbSDamien Le Moal struct blk_revalidate_zone_args {
20006e945ffbSDamien Le Moal 	struct gendisk	*disk;
20016e945ffbSDamien Le Moal 	u8		*zones_cond;
20026e945ffbSDamien Le Moal 	unsigned int	nr_zones;
20036e945ffbSDamien Le Moal 	unsigned int	nr_conv_zones;
20046e945ffbSDamien Le Moal 	unsigned int	zone_capacity;
20056e945ffbSDamien Le Moal 	unsigned int	last_zone_capacity;
20066e945ffbSDamien Le Moal 	sector_t	sector;
20076e945ffbSDamien Le Moal };
20086e945ffbSDamien Le Moal 
disk_revalidate_zone_resources(struct gendisk * disk,struct blk_revalidate_zone_args * args)2009dd291d77SDamien Le Moal static int disk_revalidate_zone_resources(struct gendisk *disk,
20106e945ffbSDamien Le Moal 				struct blk_revalidate_zone_args *args)
2011dd291d77SDamien Le Moal {
2012dd291d77SDamien Le Moal 	struct queue_limits *lim = &disk->queue->limits;
2013dd291d77SDamien Le Moal 	unsigned int pool_size;
20142a2f520fSJackie Liu 	int ret = 0;
2015dd291d77SDamien Le Moal 
20166e945ffbSDamien Le Moal 	args->disk = disk;
20176e945ffbSDamien Le Moal 	args->nr_zones =
20186e945ffbSDamien Le Moal 		DIV_ROUND_UP_ULL(get_capacity(disk), lim->chunk_sectors);
20196e945ffbSDamien Le Moal 
20206e945ffbSDamien Le Moal 	/* Cached zone conditions: 1 byte per zone */
20216e945ffbSDamien Le Moal 	args->zones_cond = kzalloc(args->nr_zones, GFP_NOIO);
20226e945ffbSDamien Le Moal 	if (!args->zones_cond)
20236e945ffbSDamien Le Moal 		return -ENOMEM;
20246e945ffbSDamien Le Moal 
2025946dd71eSDamien Le Moal 	if (!disk_need_zone_resources(disk))
2026946dd71eSDamien Le Moal 		return 0;
2027946dd71eSDamien Le Moal 
2028dd291d77SDamien Le Moal 	/*
2029dd291d77SDamien Le Moal 	 * If the device has no limit on the maximum number of open and active
2030dd291d77SDamien Le Moal 	 * zones, use BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE.
2031dd291d77SDamien Le Moal 	 */
2032dd291d77SDamien Le Moal 	pool_size = max(lim->max_open_zones, lim->max_active_zones);
2033dd291d77SDamien Le Moal 	if (!pool_size)
20346e945ffbSDamien Le Moal 		pool_size =
20356e945ffbSDamien Le Moal 			min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, args->nr_zones);
2036dd291d77SDamien Le Moal 
20372a2f520fSJackie Liu 	if (!disk->zone_wplugs_hash) {
20382a2f520fSJackie Liu 		ret = disk_alloc_zone_resources(disk, pool_size);
20392a2f520fSJackie Liu 		if (ret)
20402a2f520fSJackie Liu 			kfree(args->zones_cond);
20412a2f520fSJackie Liu 	}
2042dd291d77SDamien Le Moal 
20432a2f520fSJackie Liu 	return ret;
2044bf505456SDamien Le Moal }
2045bf505456SDamien Le Moal 
2046d9dd7308SDamien Le Moal /*
2047843283e9SDamien Le Moal  * Update the disk zone resources information and device queue limits.
2048843283e9SDamien Le Moal  * The disk queue is frozen when this is executed.
2049843283e9SDamien Le Moal  */
disk_update_zone_resources(struct gendisk * disk,struct blk_revalidate_zone_args * args)2050843283e9SDamien Le Moal static int disk_update_zone_resources(struct gendisk *disk,
2051843283e9SDamien Le Moal 				      struct blk_revalidate_zone_args *args)
2052843283e9SDamien Le Moal {
2053843283e9SDamien Le Moal 	struct request_queue *q = disk->queue;
20546e945ffbSDamien Le Moal 	unsigned int nr_seq_zones;
2055bba4322eSDamien Le Moal 	unsigned int pool_size, memflags;
2056843283e9SDamien Le Moal 	struct queue_limits lim;
2057bba4322eSDamien Le Moal 	int ret = 0;
2058bba4322eSDamien Le Moal 
2059bba4322eSDamien Le Moal 	lim = queue_limits_start_update(q);
2060bba4322eSDamien Le Moal 
2061bba4322eSDamien Le Moal 	memflags = blk_mq_freeze_queue(q);
2062843283e9SDamien Le Moal 
2063843283e9SDamien Le Moal 	disk->nr_zones = args->nr_zones;
20646e945ffbSDamien Le Moal 	if (args->nr_conv_zones >= disk->nr_zones) {
206507a1bc5cSBart Van Assche 		queue_limits_cancel_update(q);
20666b7593b5SDamien Le Moal 		pr_warn("%s: Invalid number of conventional zones %u / %u\n",
20676e945ffbSDamien Le Moal 			disk->disk_name, args->nr_conv_zones, disk->nr_zones);
2068bba4322eSDamien Le Moal 		ret = -ENODEV;
2069bba4322eSDamien Le Moal 		goto unfreeze;
20706b7593b5SDamien Le Moal 	}
20716b7593b5SDamien Le Moal 
20726e945ffbSDamien Le Moal 	disk->zone_capacity = args->zone_capacity;
20736e945ffbSDamien Le Moal 	disk->last_zone_capacity = args->last_zone_capacity;
20746e945ffbSDamien Le Moal 	disk_set_zones_cond_array(disk, args->zones_cond);
20752a2f520fSJackie Liu 	args->zones_cond = NULL;
20766e945ffbSDamien Le Moal 
2077e21d12c7SDamien Le Moal 	/*
20786e945ffbSDamien Le Moal 	 * Some devices can advertise zone resource limits that are larger than
2079e21d12c7SDamien Le Moal 	 * the number of sequential zones of the zoned block device, e.g. a
2080e21d12c7SDamien Le Moal 	 * small ZNS namespace. For such case, assume that the zoned device has
2081e21d12c7SDamien Le Moal 	 * no zone resource limits.
2082e21d12c7SDamien Le Moal 	 */
20836e945ffbSDamien Le Moal 	nr_seq_zones = disk->nr_zones - args->nr_conv_zones;
2084e21d12c7SDamien Le Moal 	if (lim.max_open_zones >= nr_seq_zones)
2085e21d12c7SDamien Le Moal 		lim.max_open_zones = 0;
2086e21d12c7SDamien Le Moal 	if (lim.max_active_zones >= nr_seq_zones)
2087e21d12c7SDamien Le Moal 		lim.max_active_zones = 0;
2088e21d12c7SDamien Le Moal 
20896b7593b5SDamien Le Moal 	if (!disk->zone_wplugs_pool)
2090e21d12c7SDamien Le Moal 		goto commit;
2091843283e9SDamien Le Moal 
2092843283e9SDamien Le Moal 	/*
2093843283e9SDamien Le Moal 	 * If the device has no limit on the maximum number of open and active
2094843283e9SDamien Le Moal 	 * zones, set its max open zone limit to the mempool size to indicate
2095843283e9SDamien Le Moal 	 * to the user that there is a potential performance impact due to
2096843283e9SDamien Le Moal 	 * dynamic zone write plug allocation when simultaneously writing to
2097843283e9SDamien Le Moal 	 * more zones than the size of the mempool.
2098843283e9SDamien Le Moal 	 */
20996b7593b5SDamien Le Moal 	pool_size = max(lim.max_open_zones, lim.max_active_zones);
21006b7593b5SDamien Le Moal 	if (!pool_size)
21016b7593b5SDamien Le Moal 		pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones);
21026b7593b5SDamien Le Moal 
21036b7593b5SDamien Le Moal 	mempool_resize(disk->zone_wplugs_pool, pool_size);
21046b7593b5SDamien Le Moal 
21056b7593b5SDamien Le Moal 	if (!lim.max_open_zones && !lim.max_active_zones) {
21066b7593b5SDamien Le Moal 		if (pool_size < nr_seq_zones)
21076b7593b5SDamien Le Moal 			lim.max_open_zones = pool_size;
21086b7593b5SDamien Le Moal 		else
21096b7593b5SDamien Le Moal 			lim.max_open_zones = 0;
2110843283e9SDamien Le Moal 	}
2111843283e9SDamien Le Moal 
2112e21d12c7SDamien Le Moal commit:
2113bba4322eSDamien Le Moal 	ret = queue_limits_commit_update(q, &lim);
2114bba4322eSDamien Le Moal 
2115bba4322eSDamien Le Moal unfreeze:
2116bba4322eSDamien Le Moal 	if (ret)
2117bba4322eSDamien Le Moal 		disk_free_zone_resources(disk);
2118bba4322eSDamien Le Moal 
2119bba4322eSDamien Le Moal 	blk_mq_unfreeze_queue(q, memflags);
2120bba4322eSDamien Le Moal 
2121bba4322eSDamien Le Moal 	return ret;
2122843283e9SDamien Le Moal }
2123843283e9SDamien Le Moal 
blk_revalidate_zone_cond(struct blk_zone * zone,unsigned int idx,struct blk_revalidate_zone_args * args)21246e945ffbSDamien Le Moal static int blk_revalidate_zone_cond(struct blk_zone *zone, unsigned int idx,
21256e945ffbSDamien Le Moal 				    struct blk_revalidate_zone_args *args)
21266e945ffbSDamien Le Moal {
21276e945ffbSDamien Le Moal 	enum blk_zone_cond cond = zone->cond;
21286e945ffbSDamien Le Moal 
21296e945ffbSDamien Le Moal 	/* Check that the zone condition is consistent with the zone type. */
21306e945ffbSDamien Le Moal 	switch (cond) {
21316e945ffbSDamien Le Moal 	case BLK_ZONE_COND_NOT_WP:
21326e945ffbSDamien Le Moal 		if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
21336e945ffbSDamien Le Moal 			goto invalid_condition;
21346e945ffbSDamien Le Moal 		break;
21356e945ffbSDamien Le Moal 	case BLK_ZONE_COND_IMP_OPEN:
21366e945ffbSDamien Le Moal 	case BLK_ZONE_COND_EXP_OPEN:
21376e945ffbSDamien Le Moal 	case BLK_ZONE_COND_CLOSED:
21386e945ffbSDamien Le Moal 	case BLK_ZONE_COND_EMPTY:
21396e945ffbSDamien Le Moal 	case BLK_ZONE_COND_FULL:
21406e945ffbSDamien Le Moal 	case BLK_ZONE_COND_OFFLINE:
21416e945ffbSDamien Le Moal 	case BLK_ZONE_COND_READONLY:
21426e945ffbSDamien Le Moal 		if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
21436e945ffbSDamien Le Moal 			goto invalid_condition;
21446e945ffbSDamien Le Moal 		break;
21456e945ffbSDamien Le Moal 	default:
21466e945ffbSDamien Le Moal 		pr_warn("%s: Invalid zone condition 0x%X\n",
21476e945ffbSDamien Le Moal 			args->disk->disk_name, cond);
21486e945ffbSDamien Le Moal 		return -ENODEV;
21496e945ffbSDamien Le Moal 	}
21506e945ffbSDamien Le Moal 
21510bf0e2e4SDamien Le Moal 	blk_zone_set_cond(args->zones_cond, idx, cond);
21526e945ffbSDamien Le Moal 
21536e945ffbSDamien Le Moal 	return 0;
21546e945ffbSDamien Le Moal 
21556e945ffbSDamien Le Moal invalid_condition:
21566e945ffbSDamien Le Moal 	pr_warn("%s: Invalid zone condition 0x%x for type 0x%x\n",
21576e945ffbSDamien Le Moal 		args->disk->disk_name, cond, zone->type);
21586e945ffbSDamien Le Moal 
21596e945ffbSDamien Le Moal 	return -ENODEV;
21606e945ffbSDamien Le Moal }
21616e945ffbSDamien Le Moal 
blk_revalidate_conv_zone(struct blk_zone * zone,unsigned int idx,struct blk_revalidate_zone_args * args)2162d7580149SDamien Le Moal static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
2163d7580149SDamien Le Moal 				    struct blk_revalidate_zone_args *args)
2164d7580149SDamien Le Moal {
2165d7580149SDamien Le Moal 	struct gendisk *disk = args->disk;
2166d7580149SDamien Le Moal 
2167d7580149SDamien Le Moal 	if (zone->capacity != zone->len) {
2168d7580149SDamien Le Moal 		pr_warn("%s: Invalid conventional zone capacity\n",
2169d7580149SDamien Le Moal 			disk->disk_name);
2170d7580149SDamien Le Moal 		return -ENODEV;
2171d7580149SDamien Le Moal 	}
2172d7580149SDamien Le Moal 
217329459c3eSDamien Le Moal 	if (disk_zone_is_last(disk, zone))
217429459c3eSDamien Le Moal 		args->last_zone_capacity = zone->capacity;
217529459c3eSDamien Le Moal 
21766e945ffbSDamien Le Moal 	args->nr_conv_zones++;
2177d7580149SDamien Le Moal 
2178d7580149SDamien Le Moal 	return 0;
2179d7580149SDamien Le Moal }
2180d7580149SDamien Le Moal 
blk_revalidate_seq_zone(struct blk_zone * zone,unsigned int idx,struct blk_revalidate_zone_args * args)2181d7580149SDamien Le Moal static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
2182d7580149SDamien Le Moal 				   struct blk_revalidate_zone_args *args)
2183d7580149SDamien Le Moal {
2184d7580149SDamien Le Moal 	struct gendisk *disk = args->disk;
2185d7580149SDamien Le Moal 	struct blk_zone_wplug *zwplug;
2186d7580149SDamien Le Moal 	unsigned int wp_offset;
2187d7580149SDamien Le Moal 
2188d7580149SDamien Le Moal 	/*
2189d7580149SDamien Le Moal 	 * Remember the capacity of the first sequential zone and check
2190cd639993SDamien Le Moal 	 * if it is constant for all zones, ignoring the last zone as it can be
2191cd639993SDamien Le Moal 	 * smaller.
2192d7580149SDamien Le Moal 	 */
2193d7580149SDamien Le Moal 	if (!args->zone_capacity)
2194d7580149SDamien Le Moal 		args->zone_capacity = zone->capacity;
219529459c3eSDamien Le Moal 	if (disk_zone_is_last(disk, zone)) {
219629459c3eSDamien Le Moal 		args->last_zone_capacity = zone->capacity;
219729459c3eSDamien Le Moal 	} else if (zone->capacity != args->zone_capacity) {
2198d7580149SDamien Le Moal 		pr_warn("%s: Invalid variable zone capacity\n",
2199d7580149SDamien Le Moal 			disk->disk_name);
2200d7580149SDamien Le Moal 		return -ENODEV;
2201d7580149SDamien Le Moal 	}
2202d7580149SDamien Le Moal 
2203d7580149SDamien Le Moal 	/*
2204a6aa36e9SDamien Le Moal 	 * If the device needs zone append emulation, we need to track the
2205a6aa36e9SDamien Le Moal 	 * write pointer of all zones that are not empty nor full. So make sure
2206a6aa36e9SDamien Le Moal 	 * we have a zone write plug for such zone if the device has a zone
2207a6aa36e9SDamien Le Moal 	 * write plug hash table.
2208d7580149SDamien Le Moal 	 */
22092c38ec93SJohannes Thumshirn 	if (!disk->zone_wplugs_hash)
2210d7580149SDamien Le Moal 		return 0;
2211d7580149SDamien Le Moal 
2212e2b0ec77SDamien Le Moal 	wp_offset = disk_zone_wplug_sync_wp_offset(disk, zone);
2213d7580149SDamien Le Moal 	if (!wp_offset || wp_offset >= zone->capacity)
2214d7580149SDamien Le Moal 		return 0;
2215d7580149SDamien Le Moal 
22161084e41dSDamien Le Moal 	zwplug = disk_get_or_alloc_zone_wplug(disk, zone->wp, GFP_NOIO);
2217d7580149SDamien Le Moal 	if (!zwplug)
2218d7580149SDamien Le Moal 		return -ENOMEM;
2219d7580149SDamien Le Moal 	disk_put_zone_wplug(zwplug);
2220d7580149SDamien Le Moal 
2221d7580149SDamien Le Moal 	return 0;
2222d7580149SDamien Le Moal }
2223d7580149SDamien Le Moal 
2224843283e9SDamien Le Moal /*
2225d9dd7308SDamien Le Moal  * Helper function to check the validity of zones of a zoned block device.
2226d9dd7308SDamien Le Moal  */
blk_revalidate_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)2227d4100351SChristoph Hellwig static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
2228d4100351SChristoph Hellwig 				  void *data)
2229d9dd7308SDamien Le Moal {
2230d4100351SChristoph Hellwig 	struct blk_revalidate_zone_args *args = data;
2231d4100351SChristoph Hellwig 	struct gendisk *disk = args->disk;
2232d7580149SDamien Le Moal 	sector_t zone_sectors = disk->queue->limits.chunk_sectors;
2233d7580149SDamien Le Moal 	int ret;
223403e51c4aSDamien Le Moal 
223503e51c4aSDamien Le Moal 	/* Check for bad zones and holes in the zone report */
223603e51c4aSDamien Le Moal 	if (zone->start != args->sector) {
223703e51c4aSDamien Le Moal 		pr_warn("%s: Zone gap at sectors %llu..%llu\n",
223803e51c4aSDamien Le Moal 			disk->disk_name, args->sector, zone->start);
223903e51c4aSDamien Le Moal 		return -ENODEV;
224003e51c4aSDamien Le Moal 	}
224103e51c4aSDamien Le Moal 
2242cd639993SDamien Le Moal 	if (zone->start >= get_capacity(disk) || !zone->len) {
224303e51c4aSDamien Le Moal 		pr_warn("%s: Invalid zone start %llu, length %llu\n",
224403e51c4aSDamien Le Moal 			disk->disk_name, zone->start, zone->len);
224503e51c4aSDamien Le Moal 		return -ENODEV;
224603e51c4aSDamien Le Moal 	}
2247d9dd7308SDamien Le Moal 
2248d9dd7308SDamien Le Moal 	/*
2249d9dd7308SDamien Le Moal 	 * All zones must have the same size, with the exception on an eventual
2250d9dd7308SDamien Le Moal 	 * smaller last zone.
2251d9dd7308SDamien Le Moal 	 */
2252cd639993SDamien Le Moal 	if (!disk_zone_is_last(disk, zone)) {
225303e51c4aSDamien Le Moal 		if (zone->len != zone_sectors) {
22546c6b3549SChristoph Hellwig 			pr_warn("%s: Invalid zoned device with non constant zone size\n",
22556c6b3549SChristoph Hellwig 				disk->disk_name);
22566c6b3549SChristoph Hellwig 			return -ENODEV;
22576c6b3549SChristoph Hellwig 		}
225803e51c4aSDamien Le Moal 	} else if (zone->len > zone_sectors) {
2259d9dd7308SDamien Le Moal 		pr_warn("%s: Invalid zoned device with larger last zone size\n",
2260d9dd7308SDamien Le Moal 			disk->disk_name);
2261d4100351SChristoph Hellwig 		return -ENODEV;
2262d9dd7308SDamien Le Moal 	}
2263d9dd7308SDamien Le Moal 
2264ecfe43b1SDamien Le Moal 	if (!zone->capacity || zone->capacity > zone->len) {
2265ecfe43b1SDamien Le Moal 		pr_warn("%s: Invalid zone capacity\n",
2266ecfe43b1SDamien Le Moal 			disk->disk_name);
2267ecfe43b1SDamien Le Moal 		return -ENODEV;
2268ecfe43b1SDamien Le Moal 	}
2269ecfe43b1SDamien Le Moal 
22706e945ffbSDamien Le Moal 	/* Check zone condition */
22716e945ffbSDamien Le Moal 	ret = blk_revalidate_zone_cond(zone, idx, args);
22726e945ffbSDamien Le Moal 	if (ret)
22736e945ffbSDamien Le Moal 		return ret;
22746e945ffbSDamien Le Moal 
2275d9dd7308SDamien Le Moal 	/* Check zone type */
2276d9dd7308SDamien Le Moal 	switch (zone->type) {
2277d9dd7308SDamien Le Moal 	case BLK_ZONE_TYPE_CONVENTIONAL:
2278d7580149SDamien Le Moal 		ret = blk_revalidate_conv_zone(zone, idx, args);
2279e94f5819SChristoph Hellwig 		break;
2280d9dd7308SDamien Le Moal 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
2281d7580149SDamien Le Moal 		ret = blk_revalidate_seq_zone(zone, idx, args);
2282d9dd7308SDamien Le Moal 		break;
2283587371edSDamien Le Moal 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
2284d9dd7308SDamien Le Moal 	default:
2285d9dd7308SDamien Le Moal 		pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
2286d9dd7308SDamien Le Moal 			disk->disk_name, (int)zone->type, zone->start);
2287d7580149SDamien Le Moal 		ret = -ENODEV;
2288d9dd7308SDamien Le Moal 	}
2289d9dd7308SDamien Le Moal 
2290d7580149SDamien Le Moal 	if (!ret)
2291d4100351SChristoph Hellwig 		args->sector += zone->len;
2292d7580149SDamien Le Moal 
2293d7580149SDamien Le Moal 	return ret;
2294d4100351SChristoph Hellwig }
2295d4100351SChristoph Hellwig 
2296bf505456SDamien Le Moal /**
229702ccd7c3SDamien Le Moal  * blk_revalidate_disk_zones - (re)allocate and initialize zone write plugs
2298bf505456SDamien Le Moal  * @disk:	Target disk
2299bf505456SDamien Le Moal  *
23009b3c08b9SDamien Le Moal  * Helper function for low-level device drivers to check, (re) allocate and
23019b3c08b9SDamien Le Moal  * initialize resources used for managing zoned disks. This function should
23029b3c08b9SDamien Le Moal  * normally be called by blk-mq based drivers when a zoned gendisk is probed
23039b3c08b9SDamien Le Moal  * and when the zone configuration of the gendisk changes (e.g. after a format).
230403e51c4aSDamien Le Moal  * Before calling this function, the device driver must already have set the
230503e51c4aSDamien Le Moal  * device zone size (chunk_sector limit) and the max zone append limit.
2306946dd71eSDamien Le Moal  * BIO based drivers can also use this function as long as the device queue
2307946dd71eSDamien Le Moal  * can be safely frozen.
2308bf505456SDamien Le Moal  */
blk_revalidate_disk_zones(struct gendisk * disk)23099b3c08b9SDamien Le Moal int blk_revalidate_disk_zones(struct gendisk *disk)
2310bf505456SDamien Le Moal {
2311bf505456SDamien Le Moal 	struct request_queue *q = disk->queue;
231203e51c4aSDamien Le Moal 	sector_t zone_sectors = q->limits.chunk_sectors;
231303e51c4aSDamien Le Moal 	sector_t capacity = get_capacity(disk);
231403e51c4aSDamien Le Moal 	struct blk_revalidate_zone_args args = { };
2315bba4322eSDamien Le Moal 	unsigned int memflags, noio_flag;
2316fdb9aed8SDamien Le Moal 	struct blk_report_zones_args rep_args = {
2317fdb9aed8SDamien Le Moal 		.cb = blk_revalidate_zone_cb,
2318fdb9aed8SDamien Le Moal 		.data = &args,
2319fdb9aed8SDamien Le Moal 	};
2320dd291d77SDamien Le Moal 	int ret = -ENOMEM;
2321bf505456SDamien Le Moal 
2322c98c3d09SChristoph Hellwig 	if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
2323c98c3d09SChristoph Hellwig 		return -EIO;
2324bf505456SDamien Le Moal 
232503e51c4aSDamien Le Moal 	if (!capacity)
232603e51c4aSDamien Le Moal 		return -ENODEV;
232703e51c4aSDamien Le Moal 
232803e51c4aSDamien Le Moal 	/*
232903e51c4aSDamien Le Moal 	 * Checks that the device driver indicated a valid zone size and that
233003e51c4aSDamien Le Moal 	 * the max zone append limit is set.
233103e51c4aSDamien Le Moal 	 */
233203e51c4aSDamien Le Moal 	if (!zone_sectors || !is_power_of_2(zone_sectors)) {
233303e51c4aSDamien Le Moal 		pr_warn("%s: Invalid non power of two zone size (%llu)\n",
233403e51c4aSDamien Le Moal 			disk->disk_name, zone_sectors);
233503e51c4aSDamien Le Moal 		return -ENODEV;
233603e51c4aSDamien Le Moal 	}
233703e51c4aSDamien Le Moal 
2338e94f5819SChristoph Hellwig 	/*
23396c6b3549SChristoph Hellwig 	 * Ensure that all memory allocations in this context are done as if
23406c6b3549SChristoph Hellwig 	 * GFP_NOIO was specified.
2341e94f5819SChristoph Hellwig 	 */
23426c6b3549SChristoph Hellwig 	noio_flag = memalloc_noio_save();
23436e945ffbSDamien Le Moal 	ret = disk_revalidate_zone_resources(disk, &args);
2344dd291d77SDamien Le Moal 	if (ret) {
2345dd291d77SDamien Le Moal 		memalloc_noio_restore(noio_flag);
2346dd291d77SDamien Le Moal 		return ret;
2347dd291d77SDamien Le Moal 	}
2348fe0418ebSDamien Le Moal 
2349fdb9aed8SDamien Le Moal 	ret = disk->fops->report_zones(disk, 0, UINT_MAX, &rep_args);
23502afdeb23SDamien Le Moal 	if (!ret) {
23512afdeb23SDamien Le Moal 		pr_warn("%s: No zones reported\n", disk->disk_name);
23522afdeb23SDamien Le Moal 		ret = -ENODEV;
23532afdeb23SDamien Le Moal 	}
2354e94f5819SChristoph Hellwig 	memalloc_noio_restore(noio_flag);
2355bd976e52SDamien Le Moal 
23562a2f520fSJackie Liu 	if (ret <= 0)
23572a2f520fSJackie Liu 		goto free_resources;
23582a2f520fSJackie Liu 
2359bf505456SDamien Le Moal 	/*
23602afdeb23SDamien Le Moal 	 * If zones where reported, make sure that the entire disk capacity
23612afdeb23SDamien Le Moal 	 * has been checked.
23622afdeb23SDamien Le Moal 	 */
23632a2f520fSJackie Liu 	if (args.sector != capacity) {
23642afdeb23SDamien Le Moal 		pr_warn("%s: Missing zones from sector %llu\n",
23652afdeb23SDamien Le Moal 			disk->disk_name, args.sector);
23662afdeb23SDamien Le Moal 		ret = -ENODEV;
23672a2f520fSJackie Liu 		goto free_resources;
23682afdeb23SDamien Le Moal 	}
23692afdeb23SDamien Le Moal 
23702a2f520fSJackie Liu 	ret = disk_update_zone_resources(disk, &args);
23712a2f520fSJackie Liu 	if (ret)
23722a2f520fSJackie Liu 		goto free_resources;
23731e1a9cecSChristoph Hellwig 
23742a2f520fSJackie Liu 	return 0;
23752a2f520fSJackie Liu 
23762a2f520fSJackie Liu free_resources:
2377bba4322eSDamien Le Moal 	pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
2378bba4322eSDamien Le Moal 
23792a2f520fSJackie Liu 	kfree(args.zones_cond);
2380bba4322eSDamien Le Moal 	memflags = blk_mq_freeze_queue(q);
2381843283e9SDamien Le Moal 	disk_free_zone_resources(disk);
23821e1a9cecSChristoph Hellwig 	blk_mq_unfreeze_queue(q, memflags);
2383bf505456SDamien Le Moal 
2384bf505456SDamien Le Moal 	return ret;
2385bf505456SDamien Le Moal }
2386bf505456SDamien Le Moal EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
2387d9f1439aSDamien Le Moal 
2388b76b840fSDamien Le Moal /**
2389b76b840fSDamien Le Moal  * blk_zone_issue_zeroout - zero-fill a block range in a zone
2390b76b840fSDamien Le Moal  * @bdev:	blockdev to write
2391b76b840fSDamien Le Moal  * @sector:	start sector
2392b76b840fSDamien Le Moal  * @nr_sects:	number of sectors to write
2393b76b840fSDamien Le Moal  * @gfp_mask:	memory allocation flags (for bio_alloc)
2394b76b840fSDamien Le Moal  *
2395b76b840fSDamien Le Moal  * Description:
2396b76b840fSDamien Le Moal  *  Zero-fill a block range in a zone (@sector must be equal to the zone write
2397b76b840fSDamien Le Moal  *  pointer), handling potential errors due to the (initially unknown) lack of
2398b76b840fSDamien Le Moal  *  hardware offload (See blkdev_issue_zeroout()).
2399b76b840fSDamien Le Moal  */
blk_zone_issue_zeroout(struct block_device * bdev,sector_t sector,sector_t nr_sects,gfp_t gfp_mask)2400b76b840fSDamien Le Moal int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
2401b76b840fSDamien Le Moal 			   sector_t nr_sects, gfp_t gfp_mask)
2402b76b840fSDamien Le Moal {
2403fdb9aed8SDamien Le Moal 	struct gendisk *disk = bdev->bd_disk;
2404b76b840fSDamien Le Moal 	int ret;
2405b76b840fSDamien Le Moal 
2406b76b840fSDamien Le Moal 	if (WARN_ON_ONCE(!bdev_is_zoned(bdev)))
2407b76b840fSDamien Le Moal 		return -EIO;
2408b76b840fSDamien Le Moal 
2409b76b840fSDamien Le Moal 	ret = blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
2410b76b840fSDamien Le Moal 				   BLKDEV_ZERO_NOFALLBACK);
2411b76b840fSDamien Le Moal 	if (ret != -EOPNOTSUPP)
2412b76b840fSDamien Le Moal 		return ret;
2413b76b840fSDamien Le Moal 
2414b76b840fSDamien Le Moal 	/*
2415b76b840fSDamien Le Moal 	 * The failed call to blkdev_issue_zeroout() advanced the zone write
2416b76b840fSDamien Le Moal 	 * pointer. Undo this using a report zone to update the zone write
2417b76b840fSDamien Le Moal 	 * pointer to the correct current value.
2418b76b840fSDamien Le Moal 	 */
2419fdb9aed8SDamien Le Moal 	ret = disk->fops->report_zones(disk, sector, 1, NULL);
2420b76b840fSDamien Le Moal 	if (ret != 1)
2421b76b840fSDamien Le Moal 		return ret < 0 ? ret : -EIO;
2422b76b840fSDamien Le Moal 
2423b76b840fSDamien Le Moal 	/*
2424b76b840fSDamien Le Moal 	 * Retry without BLKDEV_ZERO_NOFALLBACK to force the fallback to a
2425b76b840fSDamien Le Moal 	 * regular write with zero-pages.
2426b76b840fSDamien Le Moal 	 */
2427b76b840fSDamien Le Moal 	return blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, 0);
2428b76b840fSDamien Le Moal }
2429b76b840fSDamien Le Moal EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout);
2430b76b840fSDamien Le Moal 
2431d9f1439aSDamien Le Moal #ifdef CONFIG_BLK_DEBUG_FS
queue_zone_wplug_show(struct blk_zone_wplug * zwplug,struct seq_file * m)2432cb01ecb7SBart Van Assche static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
2433cb01ecb7SBart Van Assche 				  struct seq_file *m)
2434d9f1439aSDamien Le Moal {
2435a98b05b0SDamien Le Moal 	unsigned int zwp_wp_offset, zwp_flags;
2436a98b05b0SDamien Le Moal 	unsigned int zwp_zone_no, zwp_ref;
2437cb01ecb7SBart Van Assche 	unsigned int zwp_bio_list_size;
24381efbbc64SDamien Le Moal 	enum blk_zone_cond zwp_cond;
2439a98b05b0SDamien Le Moal 	unsigned long flags;
2440d9f1439aSDamien Le Moal 
2441a98b05b0SDamien Le Moal 	spin_lock_irqsave(&zwplug->lock, flags);
2442a98b05b0SDamien Le Moal 	zwp_zone_no = zwplug->zone_no;
2443a98b05b0SDamien Le Moal 	zwp_flags = zwplug->flags;
24444122fef1SDamien Le Moal 	zwp_ref = refcount_read(&zwplug->ref);
24451efbbc64SDamien Le Moal 	zwp_cond = zwplug->cond;
2446a98b05b0SDamien Le Moal 	zwp_wp_offset = zwplug->wp_offset;
2447a98b05b0SDamien Le Moal 	zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
2448a98b05b0SDamien Le Moal 	spin_unlock_irqrestore(&zwplug->lock, flags);
2449d9f1439aSDamien Le Moal 
24502b39d4a6SDamien Le Moal 	seq_printf(m,
24511efbbc64SDamien Le Moal 		"Zone no: %u, flags: 0x%x, ref: %u, cond: %s, wp ofst: %u, pending BIO: %u\n",
24521efbbc64SDamien Le Moal 		zwp_zone_no, zwp_flags, zwp_ref, blk_zone_cond_str(zwp_cond),
2453a98b05b0SDamien Le Moal 		zwp_wp_offset, zwp_bio_list_size);
2454a98b05b0SDamien Le Moal }
2455cb01ecb7SBart Van Assche 
queue_zone_wplugs_show(void * data,struct seq_file * m)2456cb01ecb7SBart Van Assche int queue_zone_wplugs_show(void *data, struct seq_file *m)
2457cb01ecb7SBart Van Assche {
2458cb01ecb7SBart Van Assche 	struct request_queue *q = data;
2459cb01ecb7SBart Van Assche 	struct gendisk *disk = q->disk;
2460cb01ecb7SBart Van Assche 	struct blk_zone_wplug *zwplug;
2461cb01ecb7SBart Van Assche 	unsigned int i;
2462cb01ecb7SBart Van Assche 
2463cb01ecb7SBart Van Assche 	if (!disk->zone_wplugs_hash)
2464cb01ecb7SBart Van Assche 		return 0;
2465cb01ecb7SBart Van Assche 
2466cb01ecb7SBart Van Assche 	rcu_read_lock();
2467cb01ecb7SBart Van Assche 	for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++)
2468cb01ecb7SBart Van Assche 		hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i],
2469cb01ecb7SBart Van Assche 					 node)
2470cb01ecb7SBart Van Assche 			queue_zone_wplug_show(zwplug, m);
2471a98b05b0SDamien Le Moal 	rcu_read_unlock();
2472d9f1439aSDamien Le Moal 
2473d9f1439aSDamien Le Moal 	return 0;
2474d9f1439aSDamien Le Moal }
2475d9f1439aSDamien Le Moal 
2476d9f1439aSDamien Le Moal #endif
2477