Re: [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers
From: Eliot Courtney
Date: Mon Jun 01 2026 - 03:34:40 EST
On Sat May 30, 2026 at 12:09 PM JST, John Hubbard wrote:
> Blackwell GPUs moved the sysmem flush page registers away from the
> Ampere/Ada location. GB10x routes the flush through a pair of HSHUB0
> register sets (primary and egress) that must both be programmed to
> the same address. GB20x routes it through FBHUB0.
>
> Implement these paths in the GB10x and GB20x framebuffer HALs.
>
> Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
> ---
> drivers/gpu/nova-core/fb/hal/gb100.rs | 46 +++++++++++++++++++++++++--
> drivers/gpu/nova-core/fb/hal/gb202.rs | 40 +++++++++++++++++++++--
> drivers/gpu/nova-core/regs.rs | 37 +++++++++++++++++++++
> 3 files changed, 117 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/fb/hal/gb100.rs b/drivers/gpu/nova-core/fb/hal/gb100.rs
> index 8d63350abf8a..70f4c11b1e77 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb100.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb100.rs
> @@ -4,6 +4,8 @@
> //! Blackwell GB10x framebuffer HAL.
>
> use kernel::{
> + io::Io,
> + num::Bounded,
> prelude::*,
> ptr::{
> const_align_up,
> @@ -15,11 +17,45 @@
> use crate::{
> driver::Bar0,
> fb::hal::FbHal,
> - num::usize_into_u32, //
> + num::usize_into_u32,
> + regs, //
> };
>
> struct Gb100;
>
> +fn read_sysmem_flush_page_gb100(bar: &Bar0) -> u64 {
> + let lo = u64::from(
> + bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> + .adr(),
> + );
> + let hi = u64::from(
> + bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> + .adr(),
> + );
> +
> + lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB10x HSHUB0 registers.
> +///
> +/// Both the primary and EG (egress) register pairs must be programmed to the same address,
> +/// as required by hardware.
> +fn write_sysmem_flush_page_gb100(bar: &Bar0, addr: Bounded<u64, 52>) {
> + // CAST: lower 32 bits. Hardware ignores bits 7:0.
> + let addr_lo = *addr as u32;
> + let addr_hi = addr.shr::<32, 20>().cast::<u32>();
> +
> + // Write HI first. The hardware will trigger the flush on the LO write.
> +
> + // Primary HSHUB pair.
> + bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> + bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +
> + // EG (egress) pair -- must match the primary pair.
> + bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> + bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +}
> +
> pub(super) const fn pmu_reserved_size_gb100() -> u32 {
> usize_into_u32::<{ const_align_up(SZ_8M + SZ_16M + SZ_4K, Alignment::new::<SZ_128K>()).unwrap() }>(
> )
> @@ -27,11 +63,15 @@ pub(super) const fn pmu_reserved_size_gb100() -> u32 {
>
> impl FbHal for Gb100 {
> fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> - super::ga100::read_sysmem_flush_page_ga100(bar)
> + read_sysmem_flush_page_gb100(bar)
> }
>
> fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> - super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> + let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> + .try_shrink::<52>()
> + .ok_or(EINVAL)?;
Maybe more simply written:
`let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;`
> +
> + write_sysmem_flush_page_gb100(bar, addr);
>
> Ok(())
> }
> diff --git a/drivers/gpu/nova-core/fb/hal/gb202.rs b/drivers/gpu/nova-core/fb/hal/gb202.rs
> index 542c1d7429e9..5a6b815eec3d 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb202.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb202.rs
> @@ -4,24 +4,58 @@
> //! Blackwell GB20x framebuffer HAL.
>
> use kernel::{
> + io::Io,
> + num::Bounded,
> prelude::*,
> sizes::SizeConstants, //
> };
>
> use crate::{
> driver::Bar0,
> - fb::hal::FbHal, //
> + fb::hal::FbHal,
> + regs, //
> };
>
> struct Gb202;
>
> +fn read_sysmem_flush_page_gb202(bar: &Bar0) -> u64 {
> + let lo = u64::from(
> + bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> + .adr(),
> + );
> + let hi = u64::from(
> + bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> + .adr(),
> + );
> +
> + lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB20x FBHUB0 registers.
> +fn write_sysmem_flush_page_gb202(bar: &Bar0, addr: Bounded<u64, 52>) {
> + // Write HI first. The hardware will trigger the flush on the LO write.
> + bar.write_reg(
> + regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed()
> + .with_adr(addr.shr::<32, 20>().cast::<u32>()),
> + );
> + bar.write_reg(
> + regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed()
> + // CAST: lower 32 bits. Hardware ignores bits 7:0.
> + .with_adr(*addr as u32),
> + );
> +}
> +
> impl FbHal for Gb202 {
> fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> - super::ga100::read_sysmem_flush_page_ga100(bar)
> + read_sysmem_flush_page_gb202(bar)
> }
>
> fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> - super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> + let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> + .try_shrink::<52>()
> + .ok_or(EINVAL)?;
Same here.
> +
> + write_sysmem_flush_page_gb202(bar, addr);
>
> Ok(())
> }
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index 356fbf364ea5..65be6ec71ed4 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -1,4 +1,5 @@
> // SPDX-License-Identifier: GPL-2.0
> +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>
> use kernel::{
> io::{
> @@ -145,6 +146,42 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
> /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region.
> 31:4 hi_val;
> }
> +
> + // Blackwell GB10x sysmem flush registers (HSHUB0).
> + //
> + // GB10x GPUs use two pairs of HSHUB registers for sysmembar: a primary pair and an EG
> + // (egress) pair. Both must be programmed to the same address. Hardware ignores bits 7:0
> + // of each LO register. HSHUB0 base is 0x00891000.
> +
> + pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x00891e50 {
> + 31:0 adr => u32;
> + }
> +
> + pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00891e54 {
> + 19:0 adr;
> + }
> +
> + pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008916c0 {
> + 31:0 adr => u32;
> + }
> +
> + pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008916c4 {
> + 19:0 adr;
> + }
> +
> + // Blackwell GB20x sysmem flush registers (FBHUB0).
> + //
> + // Unlike the older NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an
> + // 8-bit right-shift, these registers take the raw address split into lower/upper 32-bit halves.
> + // The hardware ignores bits 7:0 of the LO register.
> +
> + pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008a1d58 {
> + 31:0 adr => u32;
> + }
> +
> + pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008a1d5c {
> + 19:0 adr;
> + }
> }
May be nice to move these to the place (HAL) they are used if they
aren't used anywhere else (and reduce visibility). I am also curious
about where 0x00891000 comes from.