Re: [RFC PATCH v2 1/3] mm/zsmalloc: encode class index in obj value for lockless class lookup
From: Nhat Pham
Date: Fri May 29 2026 - 18:52:23 EST
On Wed, May 27, 2026 at 5:01 AM Wenchao Hao <haowenchao22@xxxxxxxxx> wrote:
>
> Encode the size_class index (class_idx) into the obj value so that
> zs_free() can determine the correct size_class without dereferencing
> the handle->obj->PFN->zpdesc->zspage->class chain under pool->lock.
> class_idx is invariant across page migration (only PFN is rewritten),
> so a lockless read of obj always yields a valid class_idx.
>
> The space below the PFN field in obj is over-provisioned on 64-bit
> systems, with more bits than obj_idx needs. Split that space into
> class_idx and obj_idx subfields:
>
> |<-- _PFN_BITS -->|<-- ZS_OBJ_CLASS_BITS -->|<-- ZS_OBJ_IDX_BITS -->|
> +-----------------+-------------------------+-----------------------+
> | PFN | class_idx | obj_idx |
> +-----------------+-------------------------+-----------------------+
> MSB ^ LSB
> |
> +-- ZS_OBJ_PFN_SHIFT
>
> The macro layout changes as follows:
>
> Before After Meaning
> ---------------- ------------------ ----------------------------
> OBJ_INDEX_BITS ZS_OBJ_IDX_BITS width of obj_idx subfield
> OBJ_INDEX_MASK ZS_OBJ_IDX_MASK mask of obj_idx subfield
> (n/a) ZS_OBJ_CLASS_BITS width of class_idx subfield
> (n/a) ZS_OBJ_CLASS_MASK mask of class_idx subfield
> (n/a) ZS_OBJ_PFN_SHIFT bit offset of PFN in obj
>
> On 32-bit systems there is no spare room for class_idx, so the
> encoding is disabled (ZS_OBJ_CLASS_BITS = 0) and the obj layout
> remains [PFN | obj_idx].
>
> Signed-off-by: Wenchao Hao <haowenchao@xxxxxxxxxx>
LGTM.
Reviewed-by: Nhat Pham <nphamcs@xxxxxxxxx>
> ---
> mm/zsmalloc.c | 80 ++++++++++++++++++++++++++++++++++++++++++---------
> 1 file changed, 66 insertions(+), 14 deletions(-)
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 63128ddb7959..6b0014b43408 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -67,8 +67,8 @@
> #define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
> #else
> /*
> - * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
> - * be PAGE_SHIFT
> + * If this definition of MAX_PHYSMEM_BITS is used, ZS_OBJ_PFN_SHIFT will
> + * just be PAGE_SHIFT
> */
> #define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
> #endif
> @@ -88,8 +88,27 @@
> #define OBJ_TAG_BITS 1
> #define OBJ_TAG_MASK OBJ_ALLOCATED_TAG
>
> -#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
> -#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
> +/*
> + * obj is encoded as [PFN | class_idx | obj_idx] within an unsigned long:
> + *
> + * |<-- _PFN_BITS -->|<-- ZS_OBJ_CLASS_BITS -->|<-- ZS_OBJ_IDX_BITS -->|
> + * +-----------------+-------------------------+-----------------------+
> + * | PFN | class_idx | obj_idx |
> + * +-----------------+-------------------------+-----------------------+
> + * MSB ^ LSB
> + * |
> + * +-- ZS_OBJ_PFN_SHIFT
> + *
> + * Encoding class_idx into obj lets zs_free() locate the size_class
> + * without holding pool->lock; class_idx is invariant across page
> + * migration (only PFN changes), so a lockless read of the obj value
> + * always yields a valid class_idx.
> + *
> + * On 32-bit systems there is no spare room for class_idx, so
> + * ZS_OBJ_CLASS_BITS is 0 and the layout collapses to the original
> + * [PFN | obj_idx] without any ifdef in callers.
> + */
> +#define ZS_OBJ_PFN_SHIFT (BITS_PER_LONG - _PFN_BITS)
>
> #define HUGE_BITS 1
> #define FULLNESS_BITS 4
> @@ -98,9 +117,29 @@
>
> #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL))
>
> +/*
> + * Reuse the width that struct zspage already reserves for its
> + * class field (zspage->class:CLASS_BITS + 1) for the class_idx
> + * field encoded in obj. On 32-bit there is no spare room, so set
> + * it to 0; the encoded class_idx then folds to a constant 0 and
> + * the layout collapses back to [PFN | obj_idx].
> + */
> +#if BITS_PER_LONG >= 64
> +#define ZS_OBJ_CLASS_BITS (CLASS_BITS + 1)
> +#else
> +#define ZS_OBJ_CLASS_BITS 0
> +#endif
> +#define ZS_OBJ_CLASS_MASK ((_AC(1, UL) << ZS_OBJ_CLASS_BITS) - 1)
> +
> +#define ZS_OBJ_IDX_BITS (ZS_OBJ_PFN_SHIFT - ZS_OBJ_CLASS_BITS)
> +#define ZS_OBJ_IDX_MASK ((_AC(1, UL) << ZS_OBJ_IDX_BITS) - 1)
> +
> +static_assert(ZS_OBJ_IDX_BITS > 0,
> + "zsmalloc: PFN + class_idx leave no room for obj_idx");
> +
> /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
> #define ZS_MIN_ALLOC_SIZE \
> - MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
> + MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> ZS_OBJ_IDX_BITS))
> /* each chunk includes extra space to keep handle */
> #define ZS_MAX_ALLOC_SIZE PAGE_SIZE
>
> @@ -721,26 +760,38 @@ static struct zpdesc *get_next_zpdesc(struct zpdesc *zpdesc)
> static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc,
> unsigned int *obj_idx)
> {
> - *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
> - *obj_idx = (obj & OBJ_INDEX_MASK);
> + *zpdesc = pfn_zpdesc(obj >> ZS_OBJ_PFN_SHIFT);
> + *obj_idx = (obj & ZS_OBJ_IDX_MASK);
> }
>
> static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
> {
> - *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
> + *zpdesc = pfn_zpdesc(obj >> ZS_OBJ_PFN_SHIFT);
> +}
> +
> +/*
> + * On 32-bit systems ZS_OBJ_CLASS_BITS is 0 and ZS_OBJ_CLASS_MASK is 0,
> + * so this collapses to a constant 0. No ifdef needed at the call site.
nit: I think this comment is a bit redundant - you already explain
this at the data layout section above.
> + */
> +static unsigned int obj_to_class_idx(unsigned long obj)
> +{
> + return (obj >> ZS_OBJ_IDX_BITS) & ZS_OBJ_CLASS_MASK;
> }
>
> /**
> - * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
> + * location_to_obj - encode (<zpdesc>, <obj_idx>, <class_idx>) into obj value
> * @zpdesc: zpdesc object resides in zspage
> * @obj_idx: object index
> + * @class_idx: size class index; ignored on 32-bit (ZS_OBJ_CLASS_BITS == 0)
> */
> -static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx)
> +static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx,
> + unsigned int class_idx)
> {
> unsigned long obj;
>
> - obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS;
> - obj |= obj_idx & OBJ_INDEX_MASK;
> + obj = zpdesc_pfn(zpdesc) << ZS_OBJ_PFN_SHIFT;
> + obj |= (unsigned long)(class_idx & ZS_OBJ_CLASS_MASK) << ZS_OBJ_IDX_BITS;
> + obj |= obj_idx & ZS_OBJ_IDX_MASK;
>
> return obj;
> }
> @@ -1276,7 +1327,7 @@ static unsigned long obj_malloc(struct zs_pool *pool,
> kunmap_local(vaddr);
> mod_zspage_inuse(zspage, 1);
>
> - obj = location_to_obj(m_zpdesc, obj);
> + obj = location_to_obj(m_zpdesc, obj, zspage->class);
> record_obj(handle, obj);
>
> return obj;
> @@ -1762,7 +1813,8 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
>
> old_obj = handle_to_obj(handle);
> obj_to_location(old_obj, &dummy, &obj_idx);
> - new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx);
> + new_obj = location_to_obj(newzpdesc, obj_idx,
> + obj_to_class_idx(old_obj));
> record_obj(handle, new_obj);
> }
> }
> --
> 2.34.1
>