Re: [PATCH v9 05/31] gpu: nova-core: set DMA mask width based on GPU architecture

From: Alexandre Courbot

Date: Mon Mar 30 2026 - 10:43:56 EST

On Thu Mar 26, 2026 at 10:38 AM JST, John Hubbard wrote:
> Replace the hardcoded 47-bit DMA mask with per-architecture values.
> Add Architecture::dma_mask() with an exhaustive match, so new
> architectures get a compile-time reminder to specify their width.
>
> Set the DMA mask in Gpu::new(). Gpu owns all DMA allocations for
> the device, so no concurrent allocations can exist while the
> constructor is still running.
>
> Move Spec creation into probe() so the dev_info is printed early,
> and pass Spec into Gpu::new().
>
> Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
> ---
> drivers/gpu/nova-core/driver.rs | 24 ++++++--------------
> drivers/gpu/nova-core/gpu.rs | 39 +++++++++++++++++++++++----------
> 2 files changed, 35 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
> index 84b0e1703150..bb82e63af044 100644
> --- a/drivers/gpu/nova-core/driver.rs
> +++ b/drivers/gpu/nova-core/driver.rs
> @@ -4,8 +4,6 @@
> auxiliary,
> device::Core,
> devres::Devres,
> - dma::Device,
> - dma::DmaMask,
> pci,
> pci::{
> Class,
> @@ -23,7 +21,10 @@
> },
> };
>
> -use crate::gpu::Gpu;
> +use crate::gpu::{
> + Gpu,
> + Spec, //
> +};
>
> /// Counter for generating unique auxiliary device IDs.
> static AUXILIARY_ID_COUNTER: Atomic<u32> = Atomic::new(0);
> @@ -38,14 +39,6 @@ pub(crate) struct NovaCore {
>
> const BAR0_SIZE: usize = SZ_16M;
>
> -// For now we only support Ampere which can use up to 47-bit DMA addresses.
> -//
> -// TODO: Add an abstraction for this to support newer GPUs which may support
> -// larger DMA addresses. Limiting these GPUs to smaller address widths won't
> -// have any adverse affects, unless installed on systems which require larger
> -// DMA addresses. These systems should be quite rare.
> -const GPU_DMA_BITS: u32 = 47;
> -
> pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
>
> kernel::pci_device_table!(
> @@ -84,18 +77,15 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E
> pdev.enable_device_mem()?;
> pdev.set_master();
>
> - // SAFETY: No concurrent DMA allocations or mappings can be made because
> - // the device is still being probed and therefore isn't being used by
> - // other threads of execution.
> - unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
> -
> let bar = Arc::pin_init(
> pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
> GFP_KERNEL,
> )?;
> + let spec = Spec::new(pdev.as_ref(), bar.access(pdev.as_ref())?)?;
> + dev_info!(pdev, "NVIDIA ({})\n", spec);

Now that we have moved to DMA mask into `Gpu::new`, what is the point of
creating `spec` here? Its sole purpose is to be passed to `Gpu::new`,
and that doesn't change by the end of the series.

>
> Ok(try_pin_init!(Self {
> - gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?),
> + gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?, spec),
> _reg <- auxiliary::Registration::new(
> pdev.as_ref(),
> c"nova-drm",
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index 685ae4c81268..f70bfbda1614 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -3,6 +3,10 @@
> use kernel::{
> device,
> devres::Devres,
> + dma::{
> + Device,
> + DmaMask, //
> + },
> fmt,
> pci,
> prelude::*,
> @@ -160,6 +164,16 @@ pub(crate) enum Architecture {
> BlackwellGB20x = 0x1b,
> }
>
> +impl Architecture {
> + /// Returns the DMA mask supported by this architecture.
> + pub(crate) const fn dma_mask(&self) -> DmaMask {
> + match self {
> + Self::Turing | Self::Ampere | Self::Ada => DmaMask::new::<47>(),
> + Self::Hopper | Self::BlackwellGB10x | Self::BlackwellGB20x => DmaMask::new::<52>(),
> + }
> + }
> +}
> +
> impl TryFrom<u8> for Architecture {
> type Error = Error;
>
> @@ -212,7 +226,7 @@ pub(crate) struct Spec {
> }
>
> impl Spec {
> - fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
> + pub(crate) fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
> // Some brief notes about boot0 and boot42, in chronological order:
> //
> // NV04 through NV50:
> @@ -244,7 +258,6 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
> }
>
> /// Returns this GPU's chipset.
> - #[expect(dead_code)]
> pub(crate) fn chipset(self) -> Chipset {
> self.chipset
> }
> @@ -292,36 +305,40 @@ pub(crate) struct Gpu {
>
> impl Gpu {
> pub(crate) fn new<'a>(
> - pdev: &'a pci::Device<device::Bound>,
> + pdev: &'a pci::Device<device::Core>,
> devres_bar: Arc<Devres<Bar0>>,
> bar: &'a Bar0,
> + spec: Spec,
> ) -> impl PinInit<Self, Error> + 'a {
> - try_pin_init!(Self {
> - spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
> - dev_info!(pdev,"NVIDIA ({})\n", spec);
> - })?,
> + let dma_mask = spec.chipset().arch().dma_mask();
>
> + try_pin_init!(Self {
> // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
> _: {
> + // SAFETY: `Gpu` owns all DMA allocations for this device, and we are
> + // still constructing it, so no concurrent DMA allocations can exist.
> + unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
> +
> gfw::wait_gfw_boot_completion(bar)
> .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
> },
>
> - sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
> + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset())?,
>
> gsp_falcon: Falcon::new(
> pdev.as_ref(),
> - spec.chipset,
> + spec.chipset(),
> )
> .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
>
> - sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
> + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset())?,

Ok, so you've removed the local variable as per my v8 feedback - now you
can also access `chipset` (the member, not the method - which should not
be needed anyway as `spec` doesn't need to be created in `driver.rs`)
directly, and remove some noise from the diff.