Re: [PATCH v13 7/9] gpu: nova-core: Hopper/Blackwell: add GSP lockdown release polling

From: Alexandre Courbot

Date: Wed Jun 03 2026 - 11:27:07 EST


On Wed Jun 3, 2026 at 8:53 PM JST, Eliot Courtney wrote:
> On Wed Jun 3, 2026 at 7:17 PM JST, Alexandre Courbot wrote:
>> On Wed Jun 3, 2026 at 4:30 PM JST, Alexandre Courbot wrote:
>>> From: John Hubbard <jhubbard@xxxxxxxxxx>
>>>
>>> On Hopper and Blackwell, FSP boots GSP with hardware lockdown enabled.
>>> After FSP Chain of Trust completes, the driver must poll for lockdown
>>> release before proceeding with GSP initialization. Add the register
>>> bit and helper functions needed for this polling.
>>>
>>> Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
>>> Signed-off-by: Alexandre Courbot <acourbot@xxxxxxxxxx>
>>> ---
>>> drivers/gpu/nova-core/falcon/gsp.rs | 6 +++
>>> drivers/gpu/nova-core/fsp.rs | 6 +++
>>> drivers/gpu/nova-core/gsp/hal/gh100.rs | 88 +++++++++++++++++++++++++++++++++-
>>> drivers/gpu/nova-core/regs.rs | 2 +
>>> 4 files changed, 100 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs
>>> index df6d5a382c7a..136d6b24103f 100644
>>> --- a/drivers/gpu/nova-core/falcon/gsp.rs
>>> +++ b/drivers/gpu/nova-core/falcon/gsp.rs
>>> @@ -57,4 +57,10 @@ pub(crate) fn check_reload_completed(&self, bar: &Bar0, timeout: Delta) -> Resul
>>> )
>>> .map(|_| true)
>>> }
>>> +
>>> + /// Returns whether the RISC-V branch privilege lockdown bit is set.
>>> + pub(crate) fn riscv_branch_privilege_lockdown(&self, bar: &Bar0) -> bool {
>>> + bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<Gsp>())
>>> + .riscv_br_priv_lockdown()
>>> + }
>>> }
>>> diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
>>> index 883ac4f8b811..872898ffe0a3 100644
>>> --- a/drivers/gpu/nova-core/fsp.rs
>>> +++ b/drivers/gpu/nova-core/fsp.rs
>>> @@ -184,6 +184,12 @@ pub(crate) fn new(
>>> resume,
>>> })
>>> }
>>> +
>>> + /// DMA address of the FMC boot parameters, needed after boot for lockdown
>>> + /// release polling.
>>> + pub(crate) fn boot_params_dma_handle(&self) -> u64 {
>>> + self.fmc_boot_params.dma_handle()
>>> + }
>>> }
>>>
>>> /// FSP interface for Hopper/Blackwell GPUs.
>>> diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs
>>> index f41f3fea15ff..def41745a30f 100644
>>> --- a/drivers/gpu/nova-core/gsp/hal/gh100.rs
>>> +++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs
>>> @@ -5,7 +5,9 @@
>>>
>>> use kernel::{
>>> device,
>>> - dma::Coherent, //
>>> + dma::Coherent,
>>> + io::poll::read_poll_timeout,
>>> + time::Delta, //
>>> };
>>>
>>> use crate::{
>>> @@ -33,6 +35,86 @@
>>> },
>>> };
>>>
>>> +/// GSP lockdown pattern written by firmware to mbox0 while RISC-V branch privilege
>>> +/// lockdown is active. The low byte varies, the upper 24 bits are fixed.
>>> +const GSP_LOCKDOWN_PATTERN: u32 = 0xbadf_4100;
>>> +const GSP_LOCKDOWN_MASK: u32 = 0xffff_ff00;
>
> nit: these constants can be moved into impl GspMbox or made local to
> `is_locked_down`
>
>>> +
>>> +/// GSP falcon mailbox state, used to track lockdown release status.
>>> +struct GspMbox {
>>> + mbox0: u32,
>>> + mbox1: u32,
>>> +}
>>> +
>>> +impl GspMbox {
>>> + /// Reads both mailboxes from the GSP falcon.
>>> + fn read(gsp_falcon: &Falcon<GspEngine>, bar: &Bar0) -> Self {
>>> + Self {
>>> + mbox0: gsp_falcon.read_mailbox0(bar),
>>> + mbox1: gsp_falcon.read_mailbox1(bar),
>>> + }
>>> + }
>>> +
>>> + /// Returns `true` if the lockdown pattern is present in `mbox0`.
>>> + fn is_locked_down(&self) -> bool {
>>> + (self.mbox0 & GSP_LOCKDOWN_MASK) == GSP_LOCKDOWN_PATTERN
>>> + }
>>> +
>>> + /// Combines mailbox0 and mailbox1 into a 64-bit address.
>>> + fn combined_addr(&self) -> u64 {
>>> + (u64::from(self.mbox1) << 32) | u64::from(self.mbox0)
>>> + }
>>> +
>>> + /// Returns `true` if GSP lockdown has been released.
>>> + ///
>>> + /// Checks the lockdown pattern, validates the boot params address,
>>> + /// and verifies the `HWCFG2` lockdown bit is clear.
>>> + fn lockdown_released(
>>> + &self,
>>> + gsp_falcon: &Falcon<GspEngine>,
>>> + bar: &Bar0,
>>> + fmc_boot_params_addr: u64,
>>> + ) -> bool {
>>> + if self.is_locked_down() {
>>> + return false;
>>> + }
>>> +
>>> + if self.mbox0 != 0 && self.combined_addr() != fmc_boot_params_addr {
>>> + return true;
>>> + }
>>
>> This looks like a bug - if the mailboxes still contain the boot
>> parameters address, we will keep going and might return true on the next
>> line, which the caller will interpret as an error. OpenRM does the
>> opposite check and has an additional test for `mailbox0 != 0`, which we
>> can translate into this logic:
>>
>> if self.mbox0 != 0 {
>> return self.combined_addr() != fmc_boot_params_addr;
>> }
>>
>> I'll fix it and add a few comments explaining what the code does as it
>> can be a bit convoluted.
>
> Yeah, I agree. I think the logic openrm uses is like:
>
> 1. wait until HWCFG2 != 0 && (HWCFG2 & 0xffffff00) != 0xbadf4100
> 2. wait until mbox0 == 0 || addr != fmc_boot_params_addr
> 3. wait until riscv_br_priv_lockdown == 0 || mbox0 != 0
>
> So several things are different to openrm (not sure if they are wrong
> though):
>
> 1. `is_locked_down` is checking the wrong register (should check HWCFG2
> AFAICT). I think we should name this more like
> 'gsp_mailboxes_readable' or something, since IIUC it is meant to test
> when it's valid to read mboxs.
> 2. other logic is wrong as you mentioned.
>
> Maybe we could structure lockdown_released like this:
>
> // can't read the mailboxes yet (even though we already did but the
> // result is actually not valid so maybe this should be restructured)
> if !gsp_mailboxes_readable {
> return false;
> }
>
> // we can read the registers and we are still waiting for mbox0 to
> // be zero
> if mbox0 != 0 && addr == fmc_boot_params_addr {
> return false;
> }
>
> // either lockdown is released or some error happened
> return riscv_br_priv_lockdown == 0 || mbox0 != 0
>
> I think your suggested change w.r.t. if self.mbox0 != 0; is also
> correct. I think we should update the comment on the function and the
> function name too to say it returns true on lockdown release OR an error
> happened.
>
> I don't know if there's a simpler logic that will work, just commenting
> on how it compares to openrm.

Wow, the wrong register check in `is_locked_down` is pretty significant.
Thanks for catching this.

I'll fix the logic when applying; the resulting diff is a bit longer
than I wish it was, but not so scary.

diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs
index 136d6b24103f..98a1c1dc8465 100644
--- a/drivers/gpu/nova-core/falcon/gsp.rs
+++ b/drivers/gpu/nova-core/falcon/gsp.rs
@@ -24,6 +24,10 @@
regs,
};

+/// Pattern returned by GSP register reads while the PRIV target mask still blocks CPU access.
+const GSP_TARGET_MASK_LOCKED_PATTERN: u32 = 0xbadf_4100;
+const GSP_TARGET_MASK_LOCKED_MASK: u32 = 0xffff_ff00;
+
/// Type specifying the `Gsp` falcon engine. Cannot be instantiated.
pub(crate) struct Gsp(());

@@ -63,4 +67,13 @@ pub(crate) fn riscv_branch_privilege_lockdown(&self, bar: &Bar0) -> bool {
bar.read(regs::NV_PFALCON_FALCON_HWCFG2::of::<Gsp>())
.riscv_br_priv_lockdown()
}
+
+ /// Returns whether GSP registers can be read by the CPU.
+ pub(crate) fn priv_target_mask_released(&self, bar: &Bar0) -> bool {
+ let hwcfg2 = bar
+ .read(regs::NV_PFALCON_FALCON_HWCFG2::of::<Gsp>())
+ .into_raw();
+
+ hwcfg2 != 0 && (hwcfg2 & GSP_TARGET_MASK_LOCKED_MASK) != GSP_TARGET_MASK_LOCKED_PATTERN
+ }
}
diff --git a/drivers/gpu/nova-core/gsp/hal/gh100.rs b/drivers/gpu/nova-core/gsp/hal/gh100.rs
index acdfb7fc06fc..57e31ef4819d 100644
--- a/drivers/gpu/nova-core/gsp/hal/gh100.rs
+++ b/drivers/gpu/nova-core/gsp/hal/gh100.rs
@@ -35,11 +35,6 @@
},
};

-/// GSP lockdown pattern written by firmware to mbox0 while RISC-V branch privilege
-/// lockdown is active. The low byte varies, the upper 24 bits are fixed.
-const GSP_LOCKDOWN_PATTERN: u32 = 0xbadf_4100;
-const GSP_LOCKDOWN_MASK: u32 = 0xffff_ff00;
-
/// GSP falcon mailbox state, used to track lockdown release status.
struct GspMbox {
mbox0: u32,
@@ -55,30 +50,21 @@ fn read(gsp_falcon: &Falcon<GspEngine>, bar: &Bar0) -> Self {
}
}

- /// Returns `true` if the lockdown pattern is present in `mbox0`.
- fn is_locked_down(&self) -> bool {
- (self.mbox0 & GSP_LOCKDOWN_MASK) == GSP_LOCKDOWN_PATTERN
- }
-
/// Combines mailbox0 and mailbox1 into a 64-bit address.
fn combined_addr(&self) -> u64 {
(u64::from(self.mbox1) << 32) | u64::from(self.mbox0)
}

- /// Returns `true` if GSP lockdown has been released.
+ /// Returns `true` if GSP lockdown has been released or a GSP-FMC error happened.
///
/// Returns `true` both on successful lockdown release and on GSP-FMC-reported errors, since
/// either condition should stop the poll loop.
- fn lockdown_released(
+ fn lockdown_released_or_error(
&self,
gsp_falcon: &Falcon<GspEngine>,
bar: &Bar0,
fmc_boot_params_addr: u64,
) -> bool {
- if self.is_locked_down() {
- return false;
- }
-
// GSP-FMC normally clears the boot parameters address from the mailboxes early during
// boot. If the address is still there, keep polling rather than treating it as an error.
// Any other non-zero mailbox0 value is a GSP-FMC error code.
@@ -100,14 +86,25 @@ fn wait_for_gsp_lockdown_release(
dev_dbg!(dev, "Waiting for GSP lockdown release\n");

let mbox = read_poll_timeout(
- || Ok(GspMbox::read(gsp_falcon, bar)),
- |mbox| mbox.lockdown_released(gsp_falcon, bar, fmc_boot_params_addr),
+ || {
+ // While the PRIV target mask is still locked to FSP, GSP register and mailbox reads
+ // are not meaningful. Wait until HWCFG2 says the CPU can read them.
+ Ok(match gsp_falcon.priv_target_mask_released(bar) {
+ false => None,
+ true => Some(GspMbox::read(gsp_falcon, bar)),
+ })
+ },
+ |mbox| match mbox {
+ None => false,
+ Some(mbox) => mbox.lockdown_released_or_error(gsp_falcon, bar, fmc_boot_params_addr),
+ },
Delta::from_millis(10),
Delta::from_secs(30),
)
.inspect_err(|_| {
dev_err!(dev, "GSP lockdown release timeout\n");
- })?;
+ })?
+ .ok_or(EIO)?;

// If polling stopped with a non-zero mailbox0, it was not the boot parameters address
// anymore and therefore represents a GSP-FMC error code.