[RFC PATCH v2 7/8] timekeeping: Drive time_offset skew via per-tick ntp_error transfer

From: David Woodhouse

Date: Sun May 17 2026 - 18:03:52 EST


From: David Woodhouse <dwmw@xxxxxxxxxxxx>

Instead of inflating tick_length to effect the time_offset slew,
transfer the skew to ntp_error per-tick and drain time_offset at the
equivalent per-tick rate:

- ntp_error += skew_delta << shift (biases dithering to deliver skew)
- time_offset -= skew_delta / NTP_INTERVAL_FREQ (per-tick drain)

This simplifies the accounting and allows the skew towards time_offset
to be fairly much cycle accurate.

Compute mult from (ntp_tick + skew_delta) so the dithering has enough
bandwidth to deliver the skew rate by selecting between mult and mult+1.
This applies a skew equivalent to the old tick_length += delta approach
but without modifying tick_length.

To eliminate remainder error in the per-tick division, skew_delta is
rounded to a multiple of NTP_INTERVAL_FREQ in second_overflow().

second_overflow() computes skew_delta (the exponential decay rate)
but no longer drains time_offset or inflates tick_length directly.

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
---
include/linux/timekeeper_internal.h | 1 +
kernel/time/ntp.c | 29 +++++++++++++++++++++++++++--
kernel/time/ntp_internal.h | 2 ++
kernel/time/timekeeping.c | 27 ++++++++++++++++++++++-----
4 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 2f4cfcfcaac0..006437761262 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -185,6 +185,7 @@ struct timekeeper {
u32 ntp_error_shift;
u32 ntp_err_mult;
u32 skip_second_overflow;
+ s64 skew_delta;
s32 tai_offset;
};

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 79e76bb6942b..f4bf7e78c230 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -63,6 +63,7 @@ struct ntp_data {
int time_state;
int time_status;
s64 time_offset;
+ s64 skew_delta;
long time_constant;
long time_maxerror;
long time_esterror;
@@ -371,6 +372,25 @@ u64 ntp_tick_length_base(unsigned int tkid)
return tk_ntp_data[tkid].tick_length_base;
}

+s64 ntp_get_skew_delta(unsigned int tkid)
+{
+ return tk_ntp_data[tkid].skew_delta;
+}
+s64 ntp_drain_time_offset(unsigned int tkid, s64 amount)
+{
+ struct ntp_data *ntpdata = &tk_ntp_data[tkid];
+ s64 undrained = 0;
+
+ if ((amount > 0 && ntpdata->time_offset < amount) ||
+ (amount < 0 && ntpdata->time_offset > amount)) {
+ undrained = amount - ntpdata->time_offset;
+ ntpdata->time_offset = 0;
+ } else {
+ ntpdata->time_offset -= amount;
+ }
+ return undrained;
+}
+
void ntp_set_time_offset(unsigned int tkid, s64 offset_ns)
{
struct ntp_data *ntpdata = &tk_ntp_data[tkid];
@@ -501,8 +521,13 @@ int second_overflow(unsigned int tkid, time64_t secs)
if (delta < ntpdata->time_offset)
delta = ntpdata->time_offset;
}
- ntpdata->time_offset -= delta;
- ntpdata->tick_length += delta;
+ /*
+ * Set the per-tick skew rate for the tick code. This is in the
+ * same units as tick_length (ns << NTP_SCALE_SHIFT), and is
+ * rounded to a multiple of NTP_INTERVAL_FREQ so that the per-tick
+ * division in the tick code is exact.
+ */
+ ntpdata->skew_delta = delta - delta % NTP_INTERVAL_FREQ;

/* Check PPS signal */
pps_dec_valid(ntpdata);
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 44306ffe25ff..d0460449eb50 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -7,6 +7,8 @@ extern void ntp_clear(unsigned int tkid);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(unsigned int tkid);
extern u64 ntp_tick_length_base(unsigned int tkid);
+extern s64 ntp_get_skew_delta(unsigned int tkid);
+extern s64 ntp_drain_time_offset(unsigned int tkid, s64 amount);
extern void ntp_set_time_offset(unsigned int tkid, s64 offset_ns);
extern void ntp_set_tick_length(unsigned int tkid, u64 tick_length);
extern ktime_t ntp_get_next_leap(unsigned int tkid);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 1cc98fdda4f8..f20bc76f43ca 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2354,20 +2354,23 @@ EXPORT_SYMBOL_GPL(timekeeping_set_reference);
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
u64 ntp_tl = ntp_tick_length(tk->id);
+ s64 skew = ntp_get_skew_delta(tk->id);
u32 mult;

/*
- * Determine the multiplier from the current NTP tick length.
- * Avoid expensive division when the tick length doesn't change.
+ * Determine the multiplier from the current NTP tick length plus
+ * skew_delta. The skew biases mult so that ±1 dithering can deliver
+ * the time_offset slew rate. Recompute when either changes.
*/
- if (likely(tk->ntp_tick == ntp_tl)) {
+ if (likely(tk->ntp_tick == ntp_tl && tk->skew_delta == skew)) {
mult = tk->tkr_mono.mult - tk->ntp_err_mult;
} else {
if (unlikely(!tk->cycle_interval))
return;
tk->ntp_tick = ntp_tl;
- mult = div64_u64(tk->ntp_tick >> tk->ntp_error_shift,
- tk->cycle_interval);
+ tk->skew_delta = skew;
+ mult = div64_u64((tk->ntp_tick + skew) >> tk->ntp_error_shift,
+ tk->cycle_interval);
if (tk_ref_pending && tk->cs_id == tk_ref.cs_id) {
u64 d = tk->tkr_mono.cycle_last - tk_ref.counter_value;
__uint128_t p = (__uint128_t)d * tk_ref.period_frac_sec;
@@ -2512,6 +2515,20 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
tk->ntp_error -= tk->xtime_interval <<
(tk->ntp_error_shift + shift);

+ /*
+ * During clock skew driven by ntpdata->time_offset, transfer a
+ * *portion* of the requested total delta into ntp_error from
+ * time_offset each tick. The second_overflow() function sets
+ * the rate of skew, and the value of 'mult' has been selected
+ * in order to allow the dithering to keep ntp_error around zero
+ * even while this adjustment is being applied.
+ */
+ if (tk->skew_delta) {
+ s64 drain = div_s64(tk->skew_delta << shift, NTP_INTERVAL_FREQ);
+ tk->ntp_error += tk->skew_delta << shift;
+ ntp_drain_time_offset(tk->id, drain);
+ }
+
return offset;
}

--
2.51.0