Re: [PATCH] gpu: nova-core: gsp: fix undefined behavior in command queue code
From: Eliot Courtney
Date: Thu Mar 19 2026 - 02:41:49 EST
On Thu Mar 19, 2026 at 2:36 PM JST, Alexandre Courbot wrote:
> `driver_read_area` and `driver_write_area` are internal methods that
> return slices containing the area of the command queue buffer that the
> driver has exclusive read of write access, respectively.
>
> While their returned value is correct and safe to use, internally they
> temporarily create a reference to the whole command-buffer slice,
> including GSP-owned regions. These regions can change without notice,
> and thus creating a slice to them is undefined behavior.
>
> Fix this by replacing the slice logic with pointer arithmetic and
> creating slices to valid regions only. It relies on unsafe code, but
> should be mostly replaced by `IoView` and `IoSlice` once they land.
>
> Fixes: 75f6b1de8133 ("gpu: nova-core: gsp: Add GSP command queue bindings and handling")
> Suggested-by: Danilo Krummrich <dakr@xxxxxxxxxx>
> Link: https://lore.kernel.org/all/DH47AVPEKN06.3BERUSJIB4M1R@xxxxxxxxxx/
> Signed-off-by: Alexandre Courbot <acourbot@xxxxxxxxxx>
> ---
> drivers/gpu/nova-core/gsp/cmdq.rs | 135 ++++++++++++++++++++++++++++----------
> 1 file changed, 100 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> index d36a62ba1c60..4200e7986774 100644
> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -251,38 +251,77 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
> /// As the message queue is a circular buffer, the region may be discontiguous in memory. In
> /// that case the second slice will have a non-zero length.
> fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) {
> - let tx = self.cpu_write_ptr() as usize;
> - let rx = self.gsp_read_ptr() as usize;
> + let tx = num::u32_as_usize(self.cpu_write_ptr());
> + let rx = num::u32_as_usize(self.gsp_read_ptr());
> + // Number of pages between `tx` and the end of the command queue.
> + // PANIC: Per the invariant of `cpu_write_ptr`, `tx < MSGQ_NUM_PAGES`.
> + let after_tx_len = num::u32_as_usize(MSGQ_NUM_PAGES) - tx;
>
> + // Pointer to the start of the CPU message queue.
> + //
> // SAFETY:
> - // - The `CoherentAllocation` contains exactly one object.
> - // - We will only access the driver-owned part of the shared memory.
> - // - Per the safety statement of the function, no concurrent access will be performed.
> - let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0];
> - // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `< MSGQ_NUM_PAGES`.
> - let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx);
> + // - `self.0` contains exactly one element.
> + // - `cpuq.msgq.data[0]` is within the bounds of that element.
> + let data = unsafe { &raw mut (*self.0.start_ptr_mut()).cpuq.msgq.data[0] };
>
> - // The area starting at `tx` and ending at `rx - 2` modulo MSGQ_NUM_PAGES, inclusive,
> - // belongs to the driver for writing.
> + // Safety/Panic comments to be referenced by the code below.
> + //
> + // SAFETY[1]:
> + // - `data` points to an array of `MSGQ_NUM_PAGES` elements.
> + // - The area starting at `tx` and ending at `rx - 2` modulo `MSGQ_NUM_PAGES`,
> + // inclusive, belongs to the driver for writing and is not accessed concurrently by
> + // the GSP.
> + // - `tx + after_tx_len` == `MSGQ_NUM_PAGES`.
> + //
> + // PANIC[1]:
> + // - Per the invariant of `cpu_write_ptr`, `tx < MSGQ_NUM_PAGES`.
> + // - Per the invariant of `gsp_read_ptr`, `rx < MSGQ_NUM_PAGES`.
>
> if rx == 0 {
> - // Since `rx` is zero, leave an empty slot at end of the buffer.
> - let last = after_tx.len() - 1;
> - (&mut after_tx[..last], &mut [])
> + (
> + // SAFETY: See SAFETY[1].
> + unsafe {
> + core::slice::from_raw_parts_mut(
> + data.add(tx),
> + // Since `rx` is zero, leave an empty slot at end of the buffer.
> + // PANIC: See PANIC[1].
> + after_tx_len - 1,
> + )
> + },
> + &mut [],
> + )
> } else if rx <= tx {
> // The area is discontiguous and we leave an empty slot before `rx`.
> - // PANIC:
> - // - The index `rx - 1` is non-negative because `rx != 0` in this branch.
> - // - The index does not exceed `before_tx.len()` (which equals `tx`) because
> - // `rx <= tx` in this branch.
> - (after_tx, &mut before_tx[..(rx - 1)])
> + (
> + // SAFETY: See SAFETY[1].
> + unsafe { core::slice::from_raw_parts_mut(data.add(tx), after_tx_len) },
> + // SAFETY: See SAFETY[1].
> + unsafe {
> + core::slice::from_raw_parts_mut(
> + data,
> + // Leave one empty slot before `rx`.
> + // PANIC:
> + // - See PANIC[1].
> + // - `rx - 1` is non-negative because `rx != 0` in this branch.
> + rx - 1,
> + )
> + },
> + )
> } else {
> // The area is contiguous and we leave an empty slot before `rx`.
> - // PANIC:
> - // - The index `rx - tx - 1` is non-negative because `rx > tx` in this branch.
> - // - The index does not exceed `after_tx.len()` (which is `MSGQ_NUM_PAGES - tx`)
> - // because `rx < MSGQ_NUM_PAGES` by the `gsp_read_ptr` invariant.
> - (&mut after_tx[..(rx - tx - 1)], &mut [])
> + (
> + // SAFETY: See SAFETY[1].
> + unsafe {
> + core::slice::from_raw_parts_mut(
> + data.add(tx),
> + // PANIC:
> + // - See PANIC[1].
> + // - `rx - tx - 1` is non-negative because `rx > tx` in this branch.
> + rx - tx - 1,
> + )
> + },
> + &mut [],
> + )
> }
> }
>
> @@ -308,24 +347,50 @@ fn driver_write_area_size(&self) -> usize {
> let tx = self.gsp_write_ptr() as usize;
> let rx = self.cpu_read_ptr() as usize;
Should we use u32_as_usize here too?
Reviewed-by: Eliot Courtney <ecourtney@xxxxxxxxxx>