@@ -41,6 +41,11 @@ static const char *const zone_cond_name[] = {
4141/*
4242 * Per-zone write plug.
4343 * @node: hlist_node structure for managing the plug using a hash table.
44+ * @bio_list: The list of BIOs that are currently plugged.
45+ * @bio_work: Work struct to handle issuing of plugged BIOs
46+ * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
47+ * @disk: The gendisk the plug belongs to.
48+ * @lock: Spinlock to atomically manipulate the plug.
4449 * @ref: Zone write plug reference counter. A zone write plug reference is
4550 * always at least 1 when the plug is hashed in the disk plug hash table.
4651 * The reference is incremented whenever a new BIO needing plugging is
@@ -50,27 +55,22 @@ static const char *const zone_cond_name[] = {
5055 * reference is dropped whenever the zone of the zone write plug is reset,
5156 * finished and when the zone becomes full (last write BIO to the zone
5257 * completes).
53- * @lock: Spinlock to atomically manipulate the plug.
5458 * @flags: Flags indicating the plug state.
5559 * @zone_no: The number of the zone the plug is managing.
5660 * @wp_offset: The zone write pointer location relative to the start of the zone
5761 * as a number of 512B sectors.
58- * @bio_list: The list of BIOs that are currently plugged.
59- * @bio_work: Work struct to handle issuing of plugged BIOs
60- * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
61- * @disk: The gendisk the plug belongs to.
6262 */
6363struct blk_zone_wplug {
6464 struct hlist_node node ;
65- refcount_t ref ;
66- spinlock_t lock ;
67- unsigned int flags ;
68- unsigned int zone_no ;
69- unsigned int wp_offset ;
7065 struct bio_list bio_list ;
7166 struct work_struct bio_work ;
7267 struct rcu_head rcu_head ;
7368 struct gendisk * disk ;
69+ spinlock_t lock ;
70+ refcount_t ref ;
71+ unsigned int flags ;
72+ unsigned int zone_no ;
73+ unsigned int wp_offset ;
7474};
7575
7676static inline unsigned int disk_zone_wplugs_hash_size (struct gendisk * disk )
@@ -85,17 +85,17 @@ static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk)
8585 * being executed or the zone write plug bio list is not empty.
8686 * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone
8787 * write pointer offset and need to update it.
88- * - BLK_ZONE_WPLUG_UNHASHED : Indicates that the zone write plug was removed
89- * from the disk hash table and that the initial reference to the zone
90- * write plug set when the plug was first added to the hash table has been
91- * dropped. This flag is set when a zone is reset, finished or become full,
92- * to prevent new references to the zone write plug to be taken for
93- * newly incoming BIOs. A zone write plug flagged with this flag will be
94- * freed once all remaining references from BIOs or functions are dropped .
88+ * - BLK_ZONE_WPLUG_DEAD : Indicates that the zone write plug will be
89+ * removed from the disk hash table of zone write plugs when the last
90+ * reference on the zone write plug is dropped. If set, this flag also
91+ * indicates that the initial extra reference on the zone write plug was
92+ * dropped, meaning that the reference count indicates the current number of
93+ * active users (code context or BIOs and requests in flight). This flag is
94+ * set when a zone is reset, finished or becomes full .
9595 */
9696#define BLK_ZONE_WPLUG_PLUGGED (1U << 0)
9797#define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1)
98- #define BLK_ZONE_WPLUG_UNHASHED (1U << 2)
98+ #define BLK_ZONE_WPLUG_DEAD (1U << 2)
9999
100100/**
101101 * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
@@ -163,7 +163,6 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
163163 unsigned int nr_zones , report_zones_cb cb , void * data )
164164{
165165 struct gendisk * disk = bdev -> bd_disk ;
166- sector_t capacity = get_capacity (disk );
167166 struct disk_report_zones_cb_args args = {
168167 .disk = disk ,
169168 .user_cb = cb ,
@@ -173,7 +172,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
173172 if (!bdev_is_zoned (bdev ) || WARN_ON_ONCE (!disk -> fops -> report_zones ))
174173 return - EOPNOTSUPP ;
175174
176- if (!nr_zones || sector >= capacity )
175+ if (!nr_zones || sector >= get_capacity ( disk ) )
177176 return 0 ;
178177
179178 return disk -> fops -> report_zones (disk , sector , nr_zones ,
@@ -480,65 +479,42 @@ static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
480479 mempool_free (zwplug , zwplug -> disk -> zone_wplugs_pool );
481480}
482481
483- static inline void disk_put_zone_wplug (struct blk_zone_wplug * zwplug )
482+ static void disk_free_zone_wplug (struct blk_zone_wplug * zwplug )
484483{
485- if (refcount_dec_and_test (& zwplug -> ref )) {
486- WARN_ON_ONCE (!bio_list_empty (& zwplug -> bio_list ));
487- WARN_ON_ONCE (zwplug -> flags & BLK_ZONE_WPLUG_PLUGGED );
488- WARN_ON_ONCE (!(zwplug -> flags & BLK_ZONE_WPLUG_UNHASHED ));
489-
490- call_rcu (& zwplug -> rcu_head , disk_free_zone_wplug_rcu );
491- }
492- }
493-
494- static inline bool disk_should_remove_zone_wplug (struct gendisk * disk ,
495- struct blk_zone_wplug * zwplug )
496- {
497- /* If the zone write plug was already removed, we are done. */
498- if (zwplug -> flags & BLK_ZONE_WPLUG_UNHASHED )
499- return false;
484+ struct gendisk * disk = zwplug -> disk ;
485+ unsigned long flags ;
500486
501- /* If the zone write plug is still plugged, it cannot be removed. */
502- if (zwplug -> flags & BLK_ZONE_WPLUG_PLUGGED )
503- return false ;
487+ WARN_ON_ONCE (!( zwplug -> flags & BLK_ZONE_WPLUG_DEAD ));
488+ WARN_ON_ONCE (zwplug -> flags & BLK_ZONE_WPLUG_PLUGGED );
489+ WARN_ON_ONCE (! bio_list_empty ( & zwplug -> bio_list )) ;
504490
505- /*
506- * Completions of BIOs with blk_zone_write_plug_bio_endio() may
507- * happen after handling a request completion with
508- * blk_zone_write_plug_finish_request() (e.g. with split BIOs
509- * that are chained). In such case, disk_zone_wplug_unplug_bio()
510- * should not attempt to remove the zone write plug until all BIO
511- * completions are seen. Check by looking at the zone write plug
512- * reference count, which is 2 when the plug is unused (one reference
513- * taken when the plug was allocated and another reference taken by the
514- * caller context).
515- */
516- if (refcount_read (& zwplug -> ref ) > 2 )
517- return false;
491+ spin_lock_irqsave (& disk -> zone_wplugs_lock , flags );
492+ hlist_del_init_rcu (& zwplug -> node );
493+ atomic_dec (& disk -> nr_zone_wplugs );
494+ spin_unlock_irqrestore (& disk -> zone_wplugs_lock , flags );
518495
519- /* We can remove zone write plugs for zones that are empty or full. */
520- return !zwplug -> wp_offset || disk_zone_wplug_is_full (disk , zwplug );
496+ call_rcu (& zwplug -> rcu_head , disk_free_zone_wplug_rcu );
521497}
522498
523- static void disk_remove_zone_wplug (struct gendisk * disk ,
524- struct blk_zone_wplug * zwplug )
499+ static inline void disk_put_zone_wplug (struct blk_zone_wplug * zwplug )
525500{
526- unsigned long flags ;
501+ if (refcount_dec_and_test (& zwplug -> ref ))
502+ disk_free_zone_wplug (zwplug );
503+ }
527504
528- /* If the zone write plug was already removed, we have nothing to do. */
529- if (zwplug -> flags & BLK_ZONE_WPLUG_UNHASHED )
530- return ;
505+ /*
506+ * Flag the zone write plug as dead and drop the initial reference we got when
507+ * the zone write plug was added to the hash table. The zone write plug will be
508+ * unhashed when its last reference is dropped.
509+ */
510+ static void disk_mark_zone_wplug_dead (struct blk_zone_wplug * zwplug )
511+ {
512+ lockdep_assert_held (& zwplug -> lock );
531513
532- /*
533- * Mark the zone write plug as unhashed and drop the extra reference we
534- * took when the plug was inserted in the hash table.
535- */
536- zwplug -> flags |= BLK_ZONE_WPLUG_UNHASHED ;
537- spin_lock_irqsave (& disk -> zone_wplugs_lock , flags );
538- hlist_del_init_rcu (& zwplug -> node );
539- atomic_dec (& disk -> nr_zone_wplugs );
540- spin_unlock_irqrestore (& disk -> zone_wplugs_lock , flags );
541- disk_put_zone_wplug (zwplug );
514+ if (!(zwplug -> flags & BLK_ZONE_WPLUG_DEAD )) {
515+ zwplug -> flags |= BLK_ZONE_WPLUG_DEAD ;
516+ disk_put_zone_wplug (zwplug );
517+ }
542518}
543519
544520static void blk_zone_wplug_bio_work (struct work_struct * work );
@@ -558,18 +534,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
558534again :
559535 zwplug = disk_get_zone_wplug (disk , sector );
560536 if (zwplug ) {
561- /*
562- * Check that a BIO completion or a zone reset or finish
563- * operation has not already removed the zone write plug from
564- * the hash table and dropped its reference count. In such case,
565- * we need to get a new plug so start over from the beginning.
566- */
567537 spin_lock_irqsave (& zwplug -> lock , * flags );
568- if (zwplug -> flags & BLK_ZONE_WPLUG_UNHASHED ) {
569- spin_unlock_irqrestore (& zwplug -> lock , * flags );
570- disk_put_zone_wplug (zwplug );
571- goto again ;
572- }
573538 return zwplug ;
574539 }
575540
@@ -655,14 +620,8 @@ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
655620 zwplug -> flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE ;
656621 zwplug -> wp_offset = wp_offset ;
657622 disk_zone_wplug_abort (zwplug );
658-
659- /*
660- * The zone write plug now has no BIO plugged: remove it from the
661- * hash table so that it cannot be seen. The plug will be freed
662- * when the last reference is dropped.
663- */
664- if (disk_should_remove_zone_wplug (disk , zwplug ))
665- disk_remove_zone_wplug (disk , zwplug );
623+ if (!zwplug -> wp_offset || disk_zone_wplug_is_full (disk , zwplug ))
624+ disk_mark_zone_wplug_dead (zwplug );
666625}
667626
668627static unsigned int blk_zone_wp_offset (struct blk_zone * zone )
@@ -1077,6 +1036,19 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
10771036 return true;
10781037 }
10791038
1039+ /*
1040+ * If we got a zone write plug marked as dead, then the user is issuing
1041+ * writes to a full zone, or without synchronizing with zone reset or
1042+ * zone finish operations. In such case, fail the BIO to signal this
1043+ * invalid usage.
1044+ */
1045+ if (zwplug -> flags & BLK_ZONE_WPLUG_DEAD ) {
1046+ spin_unlock_irqrestore (& zwplug -> lock , flags );
1047+ disk_put_zone_wplug (zwplug );
1048+ bio_io_error (bio );
1049+ return true;
1050+ }
1051+
10801052 /* Indicate that this BIO is being handled using zone write plugging. */
10811053 bio_set_flag (bio , BIO_ZONE_WRITE_PLUGGING );
10821054
@@ -1145,7 +1117,7 @@ static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
11451117 disk -> disk_name , zwplug -> zone_no );
11461118 disk_zone_wplug_abort (zwplug );
11471119 }
1148- disk_remove_zone_wplug ( disk , zwplug );
1120+ disk_mark_zone_wplug_dead ( zwplug );
11491121 spin_unlock_irqrestore (& zwplug -> lock , flags );
11501122
11511123 disk_put_zone_wplug (zwplug );
@@ -1250,14 +1222,8 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
12501222 }
12511223
12521224 zwplug -> flags &= ~BLK_ZONE_WPLUG_PLUGGED ;
1253-
1254- /*
1255- * If the zone is full (it was fully written or finished, or empty
1256- * (it was reset), remove its zone write plug from the hash table.
1257- */
1258- if (disk_should_remove_zone_wplug (disk , zwplug ))
1259- disk_remove_zone_wplug (disk , zwplug );
1260-
1225+ if (!zwplug -> wp_offset || disk_zone_wplug_is_full (disk , zwplug ))
1226+ disk_mark_zone_wplug_dead (zwplug );
12611227 spin_unlock_irqrestore (& zwplug -> lock , flags );
12621228}
12631229
@@ -1451,9 +1417,9 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
14511417 while (!hlist_empty (& disk -> zone_wplugs_hash [i ])) {
14521418 zwplug = hlist_entry (disk -> zone_wplugs_hash [i ].first ,
14531419 struct blk_zone_wplug , node );
1454- refcount_inc (& zwplug -> ref );
1455- disk_remove_zone_wplug ( disk , zwplug );
1456- disk_put_zone_wplug ( zwplug );
1420+ spin_lock_irq (& zwplug -> lock );
1421+ disk_mark_zone_wplug_dead ( zwplug );
1422+ spin_unlock_irq ( & zwplug -> lock );
14571423 }
14581424 }
14591425
0 commit comments