[PATCH v1 15/16] gpu: nova-core: mm: Add BAR1 user interface

From: Joel Fernandes

Date: Mon May 18 2026 - 14:22:24 EST


Add the BAR1 user interface for CPU access to GPU virtual memory through
the BAR1 aperture.

Signed-off-by: Joel Fernandes <joelagnelf@xxxxxxxxxx>
---
drivers/gpu/nova-core/driver.rs | 22 ++-
drivers/gpu/nova-core/gpu.rs | 41 +++++-
drivers/gpu/nova-core/gsp/commands.rs | 1 -
drivers/gpu/nova-core/mm.rs | 1 +
drivers/gpu/nova-core/mm/bar_user.rs | 194 ++++++++++++++++++++++++++
5 files changed, 255 insertions(+), 4 deletions(-)
create mode 100644 drivers/gpu/nova-core/mm/bar_user.rs

diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index b14d4b599783..207ba164cf4e 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -2,10 +2,12 @@

use kernel::{
auxiliary,
+ device::Bound,
device::Core,
devres::Devres,
dma::Device,
dma::DmaMask,
+ io::resource,
pci,
pci::{
Class,
@@ -47,9 +49,27 @@ pub(crate) struct NovaCore {
const GPU_DMA_BITS: u32 = 47;

pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
-#[expect(dead_code)]
pub(crate) type Bar1 = pci::Bar;

+/// Returns the Linux PCI resource index that holds BAR1 for an NVIDIA GPU.
+///
+/// On Maxwell through Ada, BAR0 is a 32-bit memory BAR occupying a single
+/// Linux PCI resource slot, so BAR1 lives at index 1. Starting with Blackwell
+/// (and on some Ampere GA100 / Hopper SKUs) BAR0 is a 64-bit memory BAR that
+/// consumes two consecutive resource slots: index 0 holds the low 32 bits and
+/// index 1 holds the high 32 bits (with no `flags` / or size of its own),
+/// shifting BAR1 to index 2.
+pub(crate) fn bar1_resource_index(pdev: &pci::Device<Bound>) -> Result<u32> {
+ // Probe the `IORESOURCE_MEM_64` flag of BAR0 as a robust way of exposing
+ // if BAR0 and hence BAR1 is 64-bit.
+ let flags0 = pdev.resource_flags(0)?;
+ if flags0.contains(resource::Flags::IORESOURCE_MEM_64) {
+ Ok(2)
+ } else {
+ Ok(1)
+ }
+}
+
kernel::pci_device_table!(
PCI_TABLE,
MODULE_PCI_TABLE,
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index f789d956cc49..b0eebe6406e5 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -19,7 +19,10 @@

use crate::{
bounded_enum,
- driver::Bar0,
+ driver::{
+ Bar0,
+ Bar1, //
+ },
falcon::{
gsp::Gsp as GspFalcon,
sec2::Sec2 as Sec2Falcon,
@@ -31,8 +34,11 @@
Gsp, //
},
mm::{
+ bar_user::BarUser,
+ pagetable::MmuVersion,
GpuMm,
- IntoVramRange, //
+ IntoVramRange,
+ VramAddress, //
},
regs,
};
@@ -145,6 +151,11 @@ pub(crate) const fn arch(self) -> Architecture {
pub(crate) const fn needs_fwsec_bootloader(self) -> bool {
matches!(self.arch(), Architecture::Turing) || matches!(self, Self::GA100)
}
+
+ /// Returns the MMU version for this chipset.
+ pub(crate) fn mmu_version(self) -> MmuVersion {
+ MmuVersion::from(self.arch())
+ }
}

// TODO
@@ -263,6 +274,8 @@ pub(crate) struct Gpu {
spec: Spec,
/// MMIO mapping of PCI BAR 0
bar: Arc<Devres<Bar0>>,
+ /// MMIO mapping of PCI BAR 1.
+ bar1: Arc<Devres<Bar1>>,
/// System memory page required for flushing all pending GPU-side memory writes done through
/// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
sysmem_flush: SysmemFlush,
@@ -276,6 +289,8 @@ pub(crate) struct Gpu {
#[pin]
gsp: Gsp,
gsp_static_info: GetGspStaticInfoReply,
+ /// BAR1 user interface for CPU access to GPU virtual memory.
+ bar_user: Arc<BarUser>,
}

impl Gpu {
@@ -348,6 +363,28 @@ pub(crate) fn new<'a>(
)?
},

+ bar1: {
+ let bar1_idx = crate::driver::bar1_resource_index(pdev)?;
+ Arc::pin_init(pdev.iomap_region(bar1_idx, c"nova-core/bar1"), GFP_KERNEL)?
+ },
+
+ // Create BAR1 user interface for CPU access to GPU virtual memory.
+ bar_user: {
+ let pdb_addr = VramAddress::new(gsp_static_info.bar1_pde_base);
+ let bar1_idx = crate::driver::bar1_resource_index(pdev)?;
+ let bar1_size = pdev.resource_len(bar1_idx)?;
+ Arc::pin_init(
+ BarUser::new(
+ pdb_addr,
+ spec.chipset,
+ bar1_size,
+ mm.clone(),
+ bar1.clone(),
+ )?,
+ GFP_KERNEL,
+ )?
+ },
+
bar: devres_bar,
})
}
diff --git a/drivers/gpu/nova-core/gsp/commands.rs b/drivers/gpu/nova-core/gsp/commands.rs
index bee7539eff60..301c95686efd 100644
--- a/drivers/gpu/nova-core/gsp/commands.rs
+++ b/drivers/gpu/nova-core/gsp/commands.rs
@@ -194,7 +194,6 @@ fn init(&self) -> impl Init<Self::Command, Self::InitError> {
pub(crate) struct GetGspStaticInfoReply {
gpu_name: [u8; 64],
/// BAR1 Page Directory Entry base address.
- #[expect(dead_code)]
pub(crate) bar1_pde_base: u64,
/// Usable FB (VRAM) region for driver memory allocation.
pub(crate) usable_fb_region: Range<u64>,
diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
index 502c7fdceba2..4741ef60593b 100644
--- a/drivers/gpu/nova-core/mm.rs
+++ b/drivers/gpu/nova-core/mm.rs
@@ -31,6 +31,7 @@ macro_rules! impl_pfn_bounded {
};
}

+pub(crate) mod bar_user;
pub(super) mod pagetable;
pub(crate) mod pramin;
pub(super) mod tlb;
diff --git a/drivers/gpu/nova-core/mm/bar_user.rs b/drivers/gpu/nova-core/mm/bar_user.rs
new file mode 100644
index 000000000000..bb9742c036b7
--- /dev/null
+++ b/drivers/gpu/nova-core/mm/bar_user.rs
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! BAR1 user interface for CPU access to GPU virtual memory. Used for USERD
+//! for GPU work submission, and applications to access GPU buffers via mmap().
+
+use kernel::{
+ device,
+ devres::Devres,
+ io::Io,
+ new_mutex,
+ prelude::*,
+ sync::{
+ Arc,
+ Mutex, //
+ },
+};
+
+use crate::{
+ driver::Bar1,
+ gpu::Chipset,
+ mm::{
+ vmm::{
+ MappedRange,
+ Vmm, //
+ },
+ GpuMm,
+ Pfn,
+ Vfn,
+ VirtualAddress,
+ VramAddress,
+ PAGE_SIZE, //
+ },
+ num::IntoSafeCast,
+};
+
+/// BAR1 user interface for virtual memory mappings.
+///
+/// Owns the [`Vmm`] for the BAR1 address space.
+#[pin_data]
+pub(crate) struct BarUser {
+ #[pin]
+ vmm: Mutex<Vmm>,
+ mm: Arc<GpuMm>,
+ bar1: Arc<Devres<Bar1>>,
+}
+
+impl BarUser {
+ /// Create a pin-initializer for [`BarUser`].
+ pub(crate) fn new(
+ pdb_addr: VramAddress,
+ chipset: Chipset,
+ va_size: u64,
+ mm: Arc<GpuMm>,
+ bar1: Arc<Devres<Bar1>>,
+ ) -> Result<impl PinInit<Self>> {
+ let vmm = Vmm::new(pdb_addr, chipset.mmu_version(), va_size)?;
+ Ok(pin_init!(Self {
+ vmm <- new_mutex!(vmm, "bar_user_vmm"),
+ mm,
+ bar1,
+ }))
+ }
+
+ /// Map physical pages to a contiguous BAR1 virtual range.
+ pub(crate) fn map(
+ self: &Arc<Self>,
+ dev: &device::Device<device::Bound>,
+ pfns: &[Pfn],
+ writable: bool,
+ ) -> Result<BarUserAccess> {
+ if pfns.is_empty() {
+ return Err(EINVAL);
+ }
+ let mut vmm = self.vmm.lock();
+ let mapped = vmm.map_pages(dev, &self.mm, pfns, None, writable)?;
+
+ Ok(BarUserAccess {
+ bar_user: self.clone(),
+ mapped: Some(mapped),
+ })
+ }
+}
+
+/// Access object for a mapped BAR1 region.
+pub(crate) struct BarUserAccess {
+ bar_user: Arc<BarUser>,
+ /// [`BarUserAccess::release`] [`Option::take`]s this; `Some` at
+ /// drop time means `release()` was never called.
+ mapped: Option<MappedRange>,
+}
+
+impl BarUserAccess {
+ /// Tear down the BAR1 mapping using a caller-supplied bound device.
+ pub(crate) fn release(mut self, dev: &device::Device<device::Bound>) -> Result {
+ let mapped = self.mapped.take().ok_or(EINVAL)?;
+ let mut vmm = self.bar_user.vmm.lock();
+ vmm.unmap_pages(dev, &self.bar_user.mm, mapped)?;
+ Ok(())
+ }
+
+ /// Returns the active mapping.
+ fn mapped(&self) -> &MappedRange {
+ // `mapped` is only `None` after `take()` in `release`; hence unwrap()
+ // cannot panic here.
+ self.mapped.as_ref().unwrap()
+ }
+
+ /// Get the base virtual address of this mapping.
+ pub(crate) fn base(&self) -> VirtualAddress {
+ VirtualAddress::from(self.mapped().vfn_start)
+ }
+
+ /// Get the total size of the mapped region in bytes.
+ pub(crate) fn size(&self) -> usize {
+ self.mapped().num_pages * PAGE_SIZE
+ }
+
+ /// Get the starting virtual frame number.
+ pub(crate) fn vfn_start(&self) -> Vfn {
+ self.mapped().vfn_start
+ }
+
+ /// Get the number of pages in this mapping.
+ pub(crate) fn num_pages(&self) -> usize {
+ self.mapped().num_pages
+ }
+
+ /// Translate an offset within this mapping to a BAR1 aperture offset.
+ fn bar_offset(&self, offset: usize) -> Result<usize> {
+ if offset >= self.size() {
+ return Err(EINVAL);
+ }
+
+ let base_vfn: usize = self.mapped().vfn_start.raw().into_safe_cast();
+ let base = base_vfn.checked_mul(PAGE_SIZE).ok_or(EOVERFLOW)?;
+ base.checked_add(offset).ok_or(EOVERFLOW)
+ }
+
+ // Fallible accessors with runtime bounds checking.
+
+ /// Read a 32-bit value at the given offset.
+ pub(crate) fn try_read32(
+ &self,
+ dev: &device::Device<device::Bound>,
+ offset: usize,
+ ) -> Result<u32> {
+ let off = self.bar_offset(offset)?;
+ self.bar_user.bar1.access(dev)?.try_read32(off)
+ }
+
+ /// Write a 32-bit value at the given offset.
+ pub(crate) fn try_write32(
+ &self,
+ dev: &device::Device<device::Bound>,
+ value: u32,
+ offset: usize,
+ ) -> Result {
+ let off = self.bar_offset(offset)?;
+ self.bar_user.bar1.access(dev)?.try_write32(value, off)
+ }
+
+ /// Read a 64-bit value at the given offset.
+ pub(crate) fn try_read64(
+ &self,
+ dev: &device::Device<device::Bound>,
+ offset: usize,
+ ) -> Result<u64> {
+ let off = self.bar_offset(offset)?;
+ self.bar_user.bar1.access(dev)?.try_read64(off)
+ }
+
+ /// Write a 64-bit value at the given offset.
+ pub(crate) fn try_write64(
+ &self,
+ dev: &device::Device<device::Bound>,
+ value: u64,
+ offset: usize,
+ ) -> Result {
+ let off = self.bar_offset(offset)?;
+ self.bar_user.bar1.access(dev)?.try_write64(value, off)
+ }
+}
+
+impl Drop for BarUserAccess {
+ fn drop(&mut self) {
+ if self.mapped.is_some() {
+ kernel::pr_warn!(
+ "BarUserAccess dropped without calling release(). BarUser address space will leak.\n"
+ );
+ }
+ // The inner `MappedRange`'s own `MustUnmapGuard` will also fire,
+ // identifying the leaked VA range.
+ }
+}
--
2.34.1