[PATCH 4/8] blk-cgroup: don't nest queue_lock under rcu in blkg_lookup_create()

From: Yu Kuai

Date: Sun Jun 07 2026 - 23:45:55 EST


From: Yu Kuai <yukuai@xxxxxxx>

Change this in two steps:

1) hold rcu lock and do blkg_lookup() from fast path;
2) hold queue_lock directly from slow path, and don't nest it under rcu
lock;

Prepare to convert protecting blkcg with blkcg_mutex instead of
queue_lock.

Signed-off-by: Yu Kuai <yukuai@xxxxxxx>
---
block/blk-cgroup.c | 57 +++++++++++++++++++++++++++++-----------------
1 file changed, 36 insertions(+), 21 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 46fc65050c38..e2896d582235 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -466,26 +466,21 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct blkcg_gq *blkg;
- unsigned long flags;
-
- WARN_ON_ONCE(!rcu_read_lock_held());

- blkg = blkg_lookup(blkcg, q);
- if (blkg)
- return blkg;
-
- spin_lock_irqsave(&q->queue_lock, flags);
+ rcu_read_lock();
blkg = blkg_lookup(blkcg, q);
if (blkg) {
if (blkcg != &blkcg_root &&
blkg != rcu_dereference(blkcg->blkg_hint))
rcu_assign_pointer(blkcg->blkg_hint, blkg);
- goto found;
+ rcu_read_unlock();
+ return blkg;
}
+ rcu_read_unlock();

/*
* Create blkgs walking down from blkcg_root to @blkcg, so that all
* non-root blkgs have access to their parents. Returns the closest
* blkg to the intended blkg should blkg_create() fail.
@@ -513,12 +508,10 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
}
if (pos == blkcg)
break;
}

-found:
- spin_unlock_irqrestore(&q->queue_lock, flags);
return blkg;
}

static void blkg_destroy(struct blkcg_gq *blkg)
{
@@ -2098,10 +2091,22 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
return;
blkcg_scale_delay(blkg, now);
atomic64_add(delta, &blkg->delay_nsec);
}

+static inline struct blkcg_gq *blkg_lookup_tryget(struct blkcg_gq *blkg)
+{
+retry:
+ if (blkg_tryget(blkg))
+ return blkg;
+
+ blkg = blkg->parent;
+ if (blkg)
+ goto retry;
+
+ return NULL;
+}
/**
* blkg_tryget_closest - try and get a blkg ref on the closet blkg
* @bio: target bio
* @css: target css
*
@@ -2110,24 +2115,34 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
* up taking a reference on or %NULL if no reference was taken.
*/
static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio,
struct cgroup_subsys_state *css)
{
- struct blkcg_gq *blkg, *ret_blkg = NULL;
+ struct request_queue *q = bio->bi_bdev->bd_queue;
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct blkcg_gq *blkg;

rcu_read_lock();
- blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_bdev->bd_disk);
- while (blkg) {
- if (blkg_tryget(blkg)) {
- ret_blkg = blkg;
- break;
- }
- blkg = blkg->parent;
- }
+ blkg = blkg_lookup(blkcg, q);
+ if (likely(blkg))
+ blkg = blkg_lookup_tryget(blkg);
rcu_read_unlock();

- return ret_blkg;
+ if (blkg)
+ return blkg;
+
+ /*
+ * Fast path failed, we're probably issuing IO in this cgroup the first
+ * time, hold lock to create new blkg.
+ */
+ spin_lock_irq(&q->queue_lock);
+ blkg = blkg_lookup_create(blkcg, bio->bi_bdev->bd_disk);
+ if (blkg)
+ blkg = blkg_lookup_tryget(blkg);
+ spin_unlock_irq(&q->queue_lock);
+
+ return blkg;
}

/**
* bio_associate_blkg_from_css - associate a bio with a specified css
* @bio: target bio
--
2.51.0