@@ -221,6 +221,27 @@ typedef struct {
221221
222222/* Time/timer syscall handlers. */
223223
224+ #define LINUX_COARSE_CLOCK_RES_NS 1000000
225+
226+ static bool linux_clock_getres_fixed (int clockid , linux_timespec_t * ts )
227+ {
228+ switch (clockid ) {
229+ case 0 : /* CLOCK_REALTIME */
230+ case 1 : /* CLOCK_MONOTONIC */
231+ case 4 : /* CLOCK_MONOTONIC_RAW */
232+ case 7 : /* CLOCK_BOOTTIME */
233+ * ts = (linux_timespec_t ) {.tv_sec = 0 , .tv_nsec = 1 };
234+ return true;
235+ case 5 : /* CLOCK_REALTIME_COARSE */
236+ case 6 : /* CLOCK_MONOTONIC_COARSE */
237+ * ts = (linux_timespec_t ) {.tv_sec = 0 ,
238+ .tv_nsec = LINUX_COARSE_CLOCK_RES_NS };
239+ return true;
240+ default :
241+ return false;
242+ }
243+ }
244+
224245int64_t sys_clock_getres (guest_t * g , int clockid , uint64_t tp_gva )
225246{
226247 int mac_clockid = translate_clockid (clockid );
@@ -231,9 +252,16 @@ int64_t sys_clock_getres(guest_t *g, int clockid, uint64_t tp_gva)
231252 if (!tp_gva )
232253 return 0 ;
233254
234- struct timespec ts ;
235- if (clock_getres (mac_clockid , & ts ) < 0 )
236- return linux_errno ();
255+ linux_timespec_t ts ;
256+ if (!linux_clock_getres_fixed (clockid , & ts )) {
257+ struct timespec host_ts ;
258+ if (clock_getres (mac_clockid , & host_ts ) < 0 )
259+ return linux_errno ();
260+ ts = (linux_timespec_t ) {
261+ .tv_sec = host_ts .tv_sec ,
262+ .tv_nsec = host_ts .tv_nsec ,
263+ };
264+ }
237265
238266 if (guest_write_small (g , tp_gva , & ts , sizeof (ts )) < 0 )
239267 return - LINUX_EFAULT ;
@@ -247,64 +275,46 @@ int64_t sys_clock_gettime(guest_t *g, int clockid, uint64_t tp_gva)
247275 if (mac_clockid < 0 )
248276 return - LINUX_EINVAL ;
249277
250- /* If this trap came from the __kernel_clock_gettime vDSO svc_fallback,
251- * the trampoline parked the guest's CNTVCT_EL0 read in X9 before
252- * issuing SVC, and ELR_EL1 holds the address immediately after that
253- * SVC. Pair X9 with both the MONOTONIC and REALTIME wall_clocks and
254- * seed the vvar so subsequent calls hit the fast path for either
255- * clockid. Skip the seed for any other trap (raw
256- * syscall(SYS_clock_gettime, ...) from guest code, etc.): X9 is
257- * then arbitrary guest state, and seeding from it would poison the
258- * anchor and break every later fast-path call.
259- *
260- * Skip the gate entirely once the anchor is published: vdso_seed_anchor
261- * is a one-shot CAS that can never fire again, so the HVF reads of
262- * ELR_EL1 and X9 below would be pure waste on every subsequent trap.
263- * Both clockid 0 (REALTIME) and clockid 1 (MONOTONIC) take the vDSO
264- * fast path, so either may be the first caller; either way both
265- * anchor pairs are seeded from a single set of host clock_gettime
266- * calls.
278+ /* When the trap came from the __kernel_clock_gettime vDSO
279+ * svc_fallback, the trampoline parked the guest's CNTVCT_EL0 read in
280+ * X9 before SVC, and ELR_EL1 holds SVC_PC + 4. Use X9 to seed (or
281+ * refresh) the vvar anchor so subsequent calls hit the fast path.
282+ * Reject any other trap: X9 would then be arbitrary guest state and
283+ * seeding from it would poison the anchor.
267284 *
268- * Order matters: read X9 first, then sample both host wall clocks
269- * back-to-back, then write to guest and seed. Sampling host clocks
270- * before checking X9 would bake a permanent positive bias (~50-200 ns)
271- * into the anchor because every host call ages the X9 timestamp by
272- * the seeding gate's HVF round-trip. The back-to-back wall-clock
273- * reads minimize MONO/REAL skew within the anchor.
285+ * Order matters: read X9 first, then sample host wall clocks
286+ * back-to-back, then write the guest result and seed. Sampling host
287+ * clocks before checking X9 would bake a permanent positive bias
288+ * into the anchor from the HVF round-trip in the seeding gate.
274289 */
275- bool seed_eligible = (clockid == 0 /* CLOCK_REALTIME */ ||
276- clockid == 1 /* CLOCK_MONOTONIC */ ) &&
277- current_thread && ! vdso_anchor_is_seeded ( g ) ;
290+ bool from_trampoline = (clockid == 0 /* CLOCK_REALTIME */ ||
291+ clockid == 1 /* CLOCK_MONOTONIC */ ) &&
292+ current_thread ;
278293
279294 uint64_t guest_cntvct = 0 ;
280- if (seed_eligible ) {
295+ if (from_trampoline ) {
281296 uint64_t elr = 0 ;
282297 if (hv_vcpu_get_sys_reg (current_thread -> vcpu , HV_SYS_REG_ELR_EL1 ,
283298 & elr ) != HV_SUCCESS ||
284299 elr != vdso_clock_gettime_svc_pc () + 4 ||
285300 hv_vcpu_get_reg (current_thread -> vcpu , HV_REG_X9 , & guest_cntvct ) !=
286301 HV_SUCCESS ||
287- guest_cntvct == 0 ) {
288- /* Trap came from a path other than the vDSO trampoline; X9 is
289- * arbitrary, fall through to the non-seeding path.
290- */
291- seed_eligible = false;
292- }
302+ guest_cntvct == 0 )
303+ from_trampoline = false;
293304 }
294305
295306 struct timespec ts ;
296307 if (clock_gettime (mac_clockid , & ts ) < 0 )
297308 return linux_errno ();
298309
299- /* For the seeding path, sample the OTHER clockid back-to-back so both
300- * anchor pairs reflect roughly the same host moment. If the second
301- * clock_gettime fails (unreachable on macOS but defensive), skip
302- * seeding rather than fail the user's request: the user already has
303- * the value they asked for.
310+ /* Sample the OTHER clockid back-to-back so both anchor pairs reflect
311+ * roughly the same host moment. If the second clock_gettime fails
312+ * (defensive; unreachable on macOS), skip seeding rather than fail
313+ * the user's request.
304314 */
305315 struct timespec ts_other ;
306316 bool can_seed = false;
307- if (seed_eligible ) {
317+ if (from_trampoline ) {
308318 int other_mac = (clockid == 1 ) ? CLOCK_REALTIME : CLOCK_MONOTONIC ;
309319 if (clock_gettime (other_mac , & ts_other ) == 0 )
310320 can_seed = true;
@@ -316,8 +326,17 @@ int64_t sys_clock_gettime(guest_t *g, int clockid, uint64_t tp_gva)
316326 if (can_seed ) {
317327 const struct timespec * ts_mono = (clockid == 1 ) ? & ts : & ts_other ;
318328 const struct timespec * ts_real = (clockid == 0 ) ? & ts : & ts_other ;
319- vdso_seed_anchor (g , guest_cntvct , ts_mono -> tv_sec , ts_mono -> tv_nsec ,
320- ts_real -> tv_sec , ts_real -> tv_nsec );
329+
330+ /* Publish when the vvar is unseeded, has aged out, or has
331+ * drifted relative to the freshly-sampled REALTIME (catches
332+ * macOS NTP steps).
333+ */
334+ if (!vdso_anchor_is_seeded (g ) ||
335+ vdso_anchor_age_exceeded (g , guest_cntvct ) ||
336+ vdso_realtime_drift_exceeded (g , guest_cntvct , ts_real -> tv_sec ,
337+ ts_real -> tv_nsec ))
338+ vdso_seed_anchor (g , guest_cntvct , ts_mono -> tv_sec , ts_mono -> tv_nsec ,
339+ ts_real -> tv_sec , ts_real -> tv_nsec );
321340 }
322341
323342 return 0 ;
@@ -391,13 +410,55 @@ int64_t sys_clock_nanosleep(guest_t *g,
391410
392411int64_t sys_gettimeofday (guest_t * g , uint64_t tv_gva , uint64_t tz_gva )
393412{
394- (void ) tz_gva ; /* timezone is obsolete */
413+ bool from_trampoline = current_thread ;
414+ uint64_t guest_cntvct = 0 ;
415+ if (from_trampoline ) {
416+ uint64_t elr = 0 ;
417+ if (hv_vcpu_get_sys_reg (current_thread -> vcpu , HV_SYS_REG_ELR_EL1 ,
418+ & elr ) != HV_SUCCESS ||
419+ elr != vdso_gettimeofday_svc_pc () + 4 ||
420+ hv_vcpu_get_reg (current_thread -> vcpu , HV_REG_X9 , & guest_cntvct ) !=
421+ HV_SUCCESS ||
422+ guest_cntvct == 0 )
423+ from_trampoline = false;
424+ }
425+
395426 struct timeval tv ;
396427 if (gettimeofday (& tv , NULL ) < 0 )
397428 return linux_errno ();
398429
399- if (tv_gva && guest_write_small (g , tv_gva , & tv , sizeof (tv )) < 0 )
430+ struct timespec ts_mono ;
431+ struct timespec ts_real ;
432+ bool can_seed = false;
433+ if (from_trampoline && clock_gettime (CLOCK_MONOTONIC , & ts_mono ) == 0 &&
434+ clock_gettime (CLOCK_REALTIME , & ts_real ) == 0 )
435+ can_seed = true;
436+
437+ linux_timeval_t ltv = {
438+ .tv_sec = tv .tv_sec ,
439+ .tv_usec = tv .tv_usec ,
440+ };
441+ if (tv_gva && guest_write_small (g , tv_gva , & ltv , sizeof (ltv )) < 0 )
400442 return - LINUX_EFAULT ;
443+
444+ /* tz is obsolete on Linux but the kernel still zeroes a non-null
445+ * pointer (struct timezone has two int32 fields, 8 bytes total).
446+ * Matching the vDSO fast path's `str xzr, [tz]` here keeps SVC and
447+ * fast-path callers observationally identical.
448+ */
449+ if (tz_gva ) {
450+ const uint64_t tz_zero = 0 ;
451+ if (guest_write_small (g , tz_gva , & tz_zero , sizeof (tz_zero )) < 0 )
452+ return - LINUX_EFAULT ;
453+ }
454+
455+ if (can_seed && (!vdso_anchor_is_seeded (g ) ||
456+ vdso_anchor_age_exceeded (g , guest_cntvct ) ||
457+ vdso_realtime_drift_exceeded (
458+ g , guest_cntvct , ts_real .tv_sec , ts_real .tv_nsec )))
459+ vdso_seed_anchor (g , guest_cntvct , ts_mono .tv_sec , ts_mono .tv_nsec ,
460+ ts_real .tv_sec , ts_real .tv_nsec );
461+
401462 return 0 ;
402463}
403464
0 commit comments