99 * src/core/shim.S.
1010 */
1111
12+ #include <pthread.h>
1213#include <stdint.h>
14+ #include <stdio.h>
1315#include <stdlib.h>
1416#include <string.h>
1517#include <sched.h>
4648 * fast path; if they drift here the shim reads from the wrong
4749 * place. Catch the drift at compile time.
4850 */
51+ _Static_assert (SHIM_GLOBALS_OFF_STATS_EN == 0x04 ,
52+ "shim.S COUNTER_INC hard-codes STATS_EN byte off 0x04" );
53+ _Static_assert (SHIM_GLOBALS_OFF_STATS_EN >= 4 &&
54+ SHIM_GLOBALS_OFF_STATS_EN < SHIM_IDENTITY_BASE ,
55+ "STATS_EN byte must sit in the attention/identity padding" );
4956_Static_assert (SHIM_URANDOM_OFF_BITMAP == 0x38 ,
5057 "shim.S urandom fast path hard-codes BITMAP off 0x38" );
5158_Static_assert (SHIM_URANDOM_OFF_RING_HEAD == 0xB8 ,
@@ -60,6 +67,35 @@ _Static_assert(SHIM_URANDOM_OFF_RING_LOCK == 0x10C0,
6067 "shim.S urandom fast path hard-codes RING_LOCK off 0x10C0" );
6168_Static_assert (FD_TABLE_SIZE == 1024 ,
6269 "shim.S urandom fast path hard-codes FD_TABLE_SIZE 1024" );
70+ _Static_assert (SHIM_URANDOM_INLINE_LIMIT == 256 ,
71+ "shim.S urandom/getrandom fast path hard-codes 256-byte cap" );
72+
73+ /* shim.S COUNTER_INC macro hardcodes (SHIM_COUNTERS_OFF & 0xFFF) and the
74+ * 0x1, lsl #12 carry. Keep the literal in sync so a layout shift fails
75+ * the build rather than silently routing increments to the wrong slot.
76+ */
77+ _Static_assert (SHIM_COUNTERS_OFF == 0x10C8 ,
78+ "shim.S COUNTER_INC hard-codes SHIM_COUNTERS_OFF=0x10C8" );
79+ /* shim.S splits SHIM_COUNTERS_OFF into a shifted-add carry (0x1000) plus
80+ * an imm12 load/store offset (0xC8 + slot byte). Pin the split so any
81+ * future layout shift fails the build instead of silently routing
82+ * increments to the wrong slot.
83+ */
84+ _Static_assert ((SHIM_COUNTERS_OFF & 0xFFF ) == 0xC8 ,
85+ "shim.S SHIM_COUNTERS_OFF_LO12 hard-coded to 0xC8" );
86+ _Static_assert ((SHIM_COUNTERS_OFF & ~0xFFF ) == 0x1000 ,
87+ "shim.S SHIM_COUNTERS_OFF_HI hard-coded to 0x1000" );
88+ _Static_assert (SHIM_IDENTITY_OFF_PGID == 0x1148 ,
89+ "shim.S getpgid fast path hard-codes PGID off 0x1148" );
90+ _Static_assert (SHIM_IDENTITY_OFF_SID == 0x1150 ,
91+ "shim.S getsid fast path hard-codes SID off 0x1150" );
92+ _Static_assert (SHIM_GLOBALS_SIZE >= SHIM_IDENTITY_OFF_SID + 8 ,
93+ "SHIM_GLOBALS_SIZE must cover the PGID/SID slots" );
94+ _Static_assert (SHIM_GLOBALS_SIZE <= BLOCK_2MIB ,
95+ "SHIM_GLOBALS_SIZE must fit inside the 2 MiB shim_data block" );
96+ _Static_assert (SHIM_COUNTERS_OFF + SHIM_COUNTERS_N * 8 <=
97+ SHIM_IDENTITY_OFF_PGID ,
98+ "counter array must not overlap the PGID slot" );
6399
64100static uint8_t * cache_base (const guest_t * g )
65101{
@@ -114,6 +150,13 @@ void shim_globals_publish_creds(guest_t *g,
114150 store_u64 (page , SHIM_IDENTITY_OFF_EGID , egid );
115151}
116152
153+ void shim_globals_publish_pgsid (guest_t * g , int64_t pgid , int64_t sid )
154+ {
155+ uint8_t * page = cache_base (g );
156+ store_u64 (page , SHIM_IDENTITY_OFF_PGID , (uint64_t ) pgid );
157+ store_u64 (page , SHIM_IDENTITY_OFF_SID , (uint64_t ) sid );
158+ }
159+
117160uint64_t shim_globals_gva (const guest_t * g )
118161{
119162 return g -> shim_data_base ;
@@ -242,9 +285,18 @@ void shim_globals_rebuild_urandom_bitmap(void)
242285}
243286
244287/* arc4random_buf is documented as deadlock-free and re-entrant. Used
245- * by both the initial fill at bootstrap and by the slow-path refill
246- * that runs from sys_read when the shim's fast path falls through due
247- * to an empty ring.
288+ * by the initial fill at bootstrap and by the slow-path refill that
289+ * runs from sys_read/sys_getrandom when the shim's fast path falls
290+ * through due to an empty ring.
291+ *
292+ * Entropy is generated OUTSIDE the ring_lock: arc4random_buf can take
293+ * microseconds, and any sibling vCPU that hits the fast path while the
294+ * lock is held spins (yield) until release. Generate up to a full ring
295+ * into a stack scratch buffer, then take the lock only to re-read
296+ * head/fill and copy the publishable prefix into the ring. The recheck
297+ * after lock acquire matters: a concurrent fast path may have advanced
298+ * head while entropy was being generated, raising the publishable
299+ * count beyond the pre-lock estimate.
248300 */
249301void shim_globals_refill_urandom_ring (guest_t * g )
250302{
@@ -254,13 +306,31 @@ void shim_globals_refill_urandom_ring(guest_t *g)
254306 uint32_t * lock_p = (uint32_t * ) (base + SHIM_URANDOM_OFF_RING_LOCK );
255307 uint8_t * ring = base + SHIM_URANDOM_OFF_RING ;
256308
309+ /* Pre-lock estimate: skip the arc4random_buf + lock when the ring
310+ * is already full. Both cursors are read RELAXED so a torn snapshot
311+ * (head_pre observed past a producer step but tail_pre observed
312+ * before it) can make tail_pre - head_pre wrap to a huge unsigned
313+ * value. A loose ">= RING_SIZE" check would treat that garbage as
314+ * "already full" and skip a genuinely-needed refill. Only the exact
315+ * == RING_SIZE value is a safe full-detection; any other (valid or
316+ * torn) reading falls through to the lock-held recheck below.
317+ */
318+ uint32_t head_pre = __atomic_load_n (head_p , __ATOMIC_RELAXED );
319+ uint32_t tail_pre = __atomic_load_n (tail_p , __ATOMIC_RELAXED );
320+ uint32_t fill_pre = tail_pre - head_pre ;
321+ if (fill_pre == SHIM_URANDOM_RING_SIZE )
322+ return ;
323+
324+ uint8_t scratch [SHIM_URANDOM_RING_SIZE ];
325+ arc4random_buf (scratch , sizeof (scratch ));
326+
257327 urandom_ring_lock (lock_p );
258328
259329 uint32_t head = __atomic_load_n (head_p , __ATOMIC_ACQUIRE );
260330 uint32_t tail = __atomic_load_n (tail_p , __ATOMIC_RELAXED );
261331 uint32_t fill = tail - head ;
262332 if (fill >= SHIM_URANDOM_RING_SIZE )
263- goto out ; /* already full */
333+ goto out ; /* concurrent refill caught up */
264334 uint32_t to_fill = SHIM_URANDOM_RING_SIZE - fill ;
265335
266336 /* Producer writes from ring[tail & (SIZE-1)] forward, wrapping
@@ -270,9 +340,9 @@ void shim_globals_refill_urandom_ring(guest_t *g)
270340 uint32_t first = SHIM_URANDOM_RING_SIZE - pos ;
271341 if (first > to_fill )
272342 first = to_fill ;
273- arc4random_buf (ring + pos , first );
343+ memcpy (ring + pos , scratch , first );
274344 if (to_fill > first )
275- arc4random_buf (ring , to_fill - first );
345+ memcpy (ring , scratch + first , to_fill - first );
276346
277347 /* Release-store the new tail so any fast-path consumer that loads
278348 * tail with an acquiring read sees the bytes already in the ring.
@@ -359,3 +429,78 @@ void shim_globals_set_trace_enabled(guest_t *g, bool enabled)
359429 else
360430 shim_globals_attn_and (g , ~ATTN_BIT_TRACE );
361431}
432+
433+ static const char * const counter_names [SHIM_COUNTERS_N ] = {
434+ [SHIM_COUNTER_ATTN_BAIL ] = "ATTN_BAIL" ,
435+ [SHIM_COUNTER_URANDOM_FD_OOR ] = "URANDOM_FD_OOR" ,
436+ [SHIM_COUNTER_URANDOM_FD_BMISS ] = "URANDOM_FD_BMISS" ,
437+ [SHIM_COUNTER_URANDOM_LEN_ZERO ] = "URANDOM_LEN_ZERO" ,
438+ [SHIM_COUNTER_URANDOM_LEN_OVER ] = "URANDOM_LEN_OVER" ,
439+ [SHIM_COUNTER_URANDOM_RING_LOW ] = "URANDOM_RING_LOW" ,
440+ [SHIM_COUNTER_URANDOM_RING_WRAP ] = "URANDOM_RING_WRAP" ,
441+ [SHIM_COUNTER_URANDOM_PROBE_FAIL ] = "URANDOM_PROBE_FAIL" ,
442+ [SHIM_COUNTER_IDENTITY_HIT ] = "IDENTITY_HIT" ,
443+ [SHIM_COUNTER_URANDOM_HIT ] = "URANDOM_HIT" ,
444+ [SHIM_COUNTER_GETRANDOM_HIT ] = "GETRANDOM_HIT" ,
445+ [SHIM_COUNTER_PGSID_HIT ] = "PGSID_HIT" ,
446+ /* Slots 12..15 (SHIM_COUNTERS_N == 16) are intentionally unnamed;
447+ * the dump prints "(reserved)" so they appear in the output when
448+ * non-zero, which would flag an out-of-band increment. Bind a name
449+ * here when a future EL1 service claims one of these slots.
450+ */
451+ };
452+
453+ uint64_t shim_globals_counter_get (const guest_t * g , unsigned slot )
454+ {
455+ if (slot >= SHIM_COUNTERS_N )
456+ return 0 ;
457+ const uint8_t * page = (const uint8_t * ) g -> host_base + g -> shim_data_base ;
458+ const uint64_t * slot_p =
459+ (const uint64_t * ) (page + SHIM_COUNTERS_OFF ) + slot ;
460+ return __atomic_load_n (slot_p , __ATOMIC_RELAXED );
461+ }
462+
463+ void shim_globals_counters_dump (const guest_t * g )
464+ {
465+ fprintf (stderr , "shim-stats (pid=%lld)\n" , (long long ) proc_get_pid ());
466+ for (unsigned i = 0 ; i < SHIM_COUNTERS_N ; i ++ ) {
467+ const char * name = counter_names [i ];
468+ uint64_t v = shim_globals_counter_get (g , i );
469+ if (!name && v == 0 )
470+ continue ;
471+ fprintf (stderr , " %-20s %llu\n" , name ? name : "(reserved)" ,
472+ (unsigned long long ) v );
473+ }
474+ }
475+
476+ static pthread_once_t stats_once = PTHREAD_ONCE_INIT ;
477+ static bool stats_enabled_cache ;
478+
479+ static void stats_resolve (void )
480+ {
481+ const char * v = getenv ("ELFUSE_SHIM_STATS" );
482+ stats_enabled_cache = v && v [0 ] && strcmp (v , "0" ) != 0 ;
483+ }
484+
485+ bool shim_globals_stats_enabled (void )
486+ {
487+ pthread_once (& stats_once , stats_resolve );
488+ return stats_enabled_cache ;
489+ }
490+
491+ void shim_globals_publish_stats_gate (guest_t * g )
492+ {
493+ uint8_t * slot = cache_base (g ) + SHIM_GLOBALS_OFF_STATS_EN ;
494+ uint8_t v = shim_globals_stats_enabled () ? 1 : 0 ;
495+ /* One-shot bring-up publish. Every caller (bootstrap, fork-child
496+ * receive, execve) runs before the guest vCPU starts executing,
497+ * so the host-side ordering between this store and the first
498+ * hv_vcpu_run is what makes the shim observe the published value;
499+ * the release semantics here are conservative, not load-bearing.
500+ * A future runtime setter that mutates the gate after guest entry
501+ * would also need the shim side to upgrade its ldrb to ldarb (or
502+ * gate the read on the attention flag) -- a release-store alone
503+ * does not synchronize with a plain ldrb on the same address.
504+ */
505+ __atomic_store_n (slot , v , __ATOMIC_RELEASE );
506+ }
0 commit comments