Re: [PATCH v4] drm/gem: Make the GEM LRU lock part of drm_device
From: Boris Brezillon
Date: Mon May 18 2026 - 11:01:41 EST
On Mon, 18 May 2026 16:52:25 +0200
Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> wrote:
> +Mark for the silent conflict resolution needed to reconcile
> drm-misc-fixes and drm-next/drm-misc-next.
>
> On Mon, 18 May 2026 13:41:45 +0200
> Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> wrote:
>
> > Recently, a few races have been discovered in the GEM LRU logic, all
> > of them caused by the fact the LRU lock is accessed through
> > gem->lru->lock, and that very same lock also protects changes to
> > gem->lru, leading to situations where gem->lru needs to first be
> > accessed without the lock held, to then get the lru to access the lock
> > through and finally take the lock and do the expected operation.
> >
> > Currently, the only driver making use of this API (MSM) declares a
> > device-wide lock, and the user we're about to add (panthor) will
> > do the same. There's no evidence that we will ever have a driver
> > that wants different pools of LRUs protected by different locks under
> > the same drm_device. So we're better off moving this lock to drm_device
> > and always locking it through obj->dev->gem_lru_mutex, or directly
> > through dev->gem_lru_mutex.
> >
> > If anyone ever needs more fine-grained locking, this can be revisited
> > to pass some drm_gem_lru_pool object representing the pool of LRUs
> > under a specific lock, but for now, the per-device lock seems to be
> > enough.
> >
> > Fixes: e7c2af13f811 ("drm/gem: Add LRU/shrinker helper")
> > Reported-by: Chia-I Wu <olvaffe@xxxxxxxxx>
> > Closes: https://gitlab.freedesktop.org/panfrost/linux/-/work_items/86
> > Reviewed-by: Rob Clark <rob.clark@xxxxxxxxxxxxxxxx>
> > Reviewed-by: Liviu Dudau <liviu.dudau@xxxxxxx>
> > Reviewed-by: Steven Price <steven.price@xxxxxxx>
> > Reviewed-by: Chia-I Wu <olvaffe@xxxxxxxxx>
> > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx>
>
> Queued to drm-misc-next.
s/drm-misc-next/drm-misc-fixes/, sorry for the typo.
>
> Note for the linux-next maintainers: below is the conflict
> resolution currently stored in drm-tip.
>
> Note for the drm-misc maintainers: we'll need a backmerge of the
> next -rc into drm-misc-next so we can resolve the conflict there.
>
> --->8---
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> index 4e4607bca7cc..a412a50eec76 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -187,9 +187,6 @@ struct panthor_device {
> /** @reclaim.shrinker: Shrinker instance */
> struct shrinker *shrinker;
>
> - /** @reclaim.lock: Lock protecting all LRUs */
> - struct mutex lock;
> -
> /**
> * @reclaim.unused: BOs with unused pages
> *
> diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
> index 13295d7a593d..abe0c5bb1bca 100644
> --- a/drivers/gpu/drm/panthor/panthor_gem.c
> +++ b/drivers/gpu/drm/panthor/panthor_gem.c
> @@ -1495,13 +1495,13 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
> if (!can_swap())
> goto out;
>
> - freed += drm_gem_lru_scan(&ptdev->reclaim.unused,
> + freed += drm_gem_lru_scan(&ptdev->base, &ptdev->reclaim.unused,
> sc->nr_to_scan - freed, &remaining,
> panthor_gem_try_evict_no_resv_wait, NULL);
> if (freed >= sc->nr_to_scan)
> goto out;
>
> - freed += drm_gem_lru_scan(&ptdev->reclaim.mmapped,
> + freed += drm_gem_lru_scan(&ptdev->base, &ptdev->reclaim.mmapped,
> sc->nr_to_scan - freed, &remaining,
> panthor_gem_try_evict_no_resv_wait, NULL);
> if (freed >= sc->nr_to_scan)
> @@ -1515,7 +1515,7 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
> if (freed >= sc->nr_to_scan)
> goto out;
>
> - freed += drm_gem_lru_scan(&ptdev->reclaim.gpu_mapped_shared,
> + freed += drm_gem_lru_scan(&ptdev->base, &ptdev->reclaim.gpu_mapped_shared,
> sc->nr_to_scan - freed, &remaining,
> panthor_gem_try_evict, NULL);
>
> @@ -1544,21 +1544,16 @@ panthor_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
> int panthor_gem_shrinker_init(struct panthor_device *ptdev)
> {
> struct shrinker *shrinker;
> - int ret;
> -
> - ret = drmm_mutex_init(&ptdev->base, &ptdev->reclaim.lock);
> - if (ret)
> - return ret;
>
> INIT_LIST_HEAD(&ptdev->reclaim.vms);
> - drm_gem_lru_init(&ptdev->reclaim.unused, &ptdev->reclaim.lock);
> - drm_gem_lru_init(&ptdev->reclaim.mmapped, &ptdev->reclaim.lock);
> - drm_gem_lru_init(&ptdev->reclaim.gpu_mapped_shared, &ptdev->reclaim.lock);
> + drm_gem_lru_init(&ptdev->reclaim.unused);
> + drm_gem_lru_init(&ptdev->reclaim.mmapped);
> + drm_gem_lru_init(&ptdev->reclaim.gpu_mapped_shared);
> ptdev->reclaim.gpu_mapped_count = 0;
>
> /* Teach lockdep about lock ordering wrt. shrinker: */
> fs_reclaim_acquire(GFP_KERNEL);
> - might_lock(&ptdev->reclaim.lock);
> + might_lock(&ptdev->base.gem_lru_mutex);
> fs_reclaim_release(GFP_KERNEL);
>
> shrinker = shrinker_alloc(0, "drm-panthor-gem");
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 452d0b6d4668..9d4500850561 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -715,10 +715,10 @@ int panthor_vm_active(struct panthor_vm *vm)
> * never became active in the first place will be reclaimed last, but
> * that's an acceptable trade-off.
> */
> - mutex_lock(&ptdev->reclaim.lock);
> + mutex_lock(&ptdev->base.gem_lru_mutex);
> if (vm->reclaim.lru.count)
> list_move_tail(&vm->reclaim.lru_node, &ptdev->reclaim.vms);
> - mutex_unlock(&ptdev->reclaim.lock);
> + mutex_unlock(&ptdev->base.gem_lru_mutex);
>
> /* Make sure we don't race with lock/unlock_region() calls
> * happening around VM bind operations.
> @@ -1962,9 +1962,9 @@ static void panthor_vm_free(struct drm_gpuvm *gpuvm)
> struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base);
> struct panthor_device *ptdev = vm->ptdev;
>
> - mutex_lock(&ptdev->reclaim.lock);
> + mutex_lock(&ptdev->base.gem_lru_mutex);
> list_del_init(&vm->reclaim.lru_node);
> - mutex_unlock(&ptdev->reclaim.lock);
> + mutex_unlock(&ptdev->base.gem_lru_mutex);
>
> mutex_lock(&vm->heaps.lock);
> if (drm_WARN_ON(&ptdev->base, vm->heaps.pool))
> @@ -2360,11 +2360,11 @@ void panthor_vm_update_bo_reclaim_lru_locked(struct panthor_gem_object *bo)
> drm_WARN_ON(&ptdev->base, vm);
> vm = container_of(vm_bo->vm, struct panthor_vm, base);
>
> - mutex_lock(&ptdev->reclaim.lock);
> + mutex_lock(&ptdev->base.gem_lru_mutex);
> drm_gem_lru_move_tail_locked(&vm->reclaim.lru, &bo->base);
> if (list_empty(&vm->reclaim.lru_node))
> list_move(&vm->reclaim.lru_node, &ptdev->reclaim.vms);
> - mutex_unlock(&ptdev->reclaim.lock);
> + mutex_unlock(&ptdev->base.gem_lru_mutex);
> }
> }
>
> @@ -2774,7 +2774,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
> vm->kernel_auto_va.start = auto_kernel_va_start;
> vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1;
>
> - drm_gem_lru_init(&vm->reclaim.lru, &ptdev->reclaim.lock);
> + drm_gem_lru_init(&vm->reclaim.lru);
> INIT_LIST_HEAD(&vm->reclaim.lru_node);
> INIT_LIST_HEAD(&vm->node);
> INIT_LIST_HEAD(&vm->as.lru_node);
> @@ -3140,7 +3140,7 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
> LIST_HEAD(remaining_vms);
> LIST_HEAD(vms);
>
> - mutex_lock(&ptdev->reclaim.lock);
> + mutex_lock(&ptdev->base.gem_lru_mutex);
> list_splice_init(&ptdev->reclaim.vms, &vms);
>
> while (freed < nr_to_scan) {
> @@ -3156,12 +3156,13 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
> continue;
> }
>
> - mutex_unlock(&ptdev->reclaim.lock);
> + mutex_unlock(&ptdev->base.gem_lru_mutex);
>
> - freed += drm_gem_lru_scan(&vm->reclaim.lru, nr_to_scan - freed,
> + freed += drm_gem_lru_scan(&ptdev->base, &vm->reclaim.lru,
> + nr_to_scan - freed,
> remaining, shrink, NULL);
>
> - mutex_lock(&ptdev->reclaim.lock);
> + mutex_lock(&ptdev->base.gem_lru_mutex);
>
> /* If the VM is still in the temporary list, remove it so we
> * can proceed with the next VM.
> @@ -3177,11 +3178,11 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
> list_add_tail(&vm->reclaim.lru_node, &remaining_vms);
> }
>
> - mutex_unlock(&ptdev->reclaim.lock);
> + mutex_unlock(&ptdev->base.gem_lru_mutex);
>
> panthor_vm_put(vm);
>
> - mutex_lock(&ptdev->reclaim.lock);
> + mutex_lock(&ptdev->base.gem_lru_mutex);
> }
>
> /* Re-insert VMs with remaining data to reclaim at the beginning of
> @@ -3192,7 +3193,7 @@ panthor_mmu_reclaim_priv_bos(struct panthor_device *ptdev,
> */
> list_splice_tail(&vms, &remaining_vms);
> list_splice(&remaining_vms, &ptdev->reclaim.vms);
> - mutex_unlock(&ptdev->reclaim.lock);
> + mutex_unlock(&ptdev->base.gem_lru_mutex);
>
> return freed;
> }