Skip to content

Commit bc4c7e0

Browse files
author
Trung
committed
Improve Linux distro rootfs compatibility on Apple Silicon
Improve compatibility with real Linux distro rootfs environments on Apple Silicon hosts. Package-manager and shell workflows need behavior closer to Linux for credentials, script execution, fork/clone state, wait handling, pipes, /proc, and shared mappings. Preserve dynamic guest UID/GID state in auxv instead of always reporting fixed guest IDs, and allow the initial guest identity to be configured with ELFUSE_GUEST_UID and ELFUSE_GUEST_GID. This lets distro workflows such as apt post-install scripts run with root-like guest credentials when needed. Probe ELF binaries quietly before falling back to shebang handling, so script execution does not emit misleading "not an ELF" diagnostics. Extend fork IPC state and child restore handling to carry more complete CPU state, including TLS-related registers, PAC keys, clone flags, child TID handling, TPIDRRO_EL0, TPIDR2_EL0, and the original SPSR. Add child process monitoring so host child exit can wake Linux-style wait and signal behavior. Align non-fixed file-backed MAP_SHARED mappings to 2 MiB stage-2 boundaries to avoid HVF mapping issues on Apple Silicon. Improve sysroot symlink creation for absolute guest symlink targets, and add small Linux compatibility behavior for sync_file_range and pipe F_SETNOSIGPIPE. These changes were tested with an Ubuntu arm64 rootfs using shell pipelines, /proc checks, and apt-get update smoke testing.
1 parent 8441714 commit bc4c7e0

19 files changed

Lines changed: 570 additions & 161 deletions

File tree

src/core/elf.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* segments, and copies them into guest memory.
99
*/
1010

11+
#include <stdbool.h>
1112
#include <stdio.h>
1213
#include <stdlib.h>
1314
#include <string.h>
@@ -19,41 +20,46 @@
1920
#include "debug/log.h"
2021
#include "utils.h"
2122

22-
int elf_load(const char *path, elf_info_t *info)
23+
static int elf_load_impl(const char *path, elf_info_t *info, bool quiet)
2324
{
2425
memset(info, 0, sizeof(*info));
2526

2627
FILE *f = fopen(path, "rb");
2728
if (!f) {
28-
perror(path);
29+
if (!quiet)
30+
perror(path);
2931
return -1;
3032
}
3133

3234
elf64_ehdr_t ehdr;
3335
if (fread(&ehdr, sizeof(ehdr), 1, f) != 1) {
34-
log_error("%s: failed to read ELF header", path);
36+
if (!quiet)
37+
log_error("%s: failed to read ELF header", path);
3538
fclose(f);
3639
return -1;
3740
}
3841

3942
/* Reject non-ELF inputs before interpreting the rest of the header. */
4043
if (ehdr.e_ident[0] != ELFMAG0 || ehdr.e_ident[1] != ELFMAG1 ||
4144
ehdr.e_ident[2] != ELFMAG2 || ehdr.e_ident[3] != ELFMAG3) {
42-
log_error("%s: not an ELF file", path);
45+
if (!quiet)
46+
log_error("%s: not an ELF file", path);
4347
fclose(f);
4448
return -1;
4549
}
4650

4751
/* elfuse only implements the 64-bit Linux ABI. */
4852
if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
49-
log_error("%s: not a 64-bit ELF", path);
53+
if (!quiet)
54+
log_error("%s: not a 64-bit ELF", path);
5055
fclose(f);
5156
return -1;
5257
}
5358

5459
/* aarch64-linux user binaries are little-endian in the supported mode. */
5560
if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
56-
log_error("%s: not little-endian", path);
61+
if (!quiet)
62+
log_error("%s: not little-endian", path);
5763
fclose(f);
5864
return -1;
5965
}
@@ -62,8 +68,9 @@ int elf_load(const char *path, elf_info_t *info)
6268
* diagnostic instead of a generic parse failure.
6369
*/
6470
if (ehdr.e_machine != EM_AARCH64 && ehdr.e_machine != EM_X86_64) {
65-
log_error("%s: unsupported architecture (e_machine=%u)", path,
66-
ehdr.e_machine);
71+
if (!quiet)
72+
log_error("%s: unsupported architecture (e_machine=%u)", path,
73+
ehdr.e_machine);
6774
fclose(f);
6875
return -1;
6976
}
@@ -72,7 +79,8 @@ int elf_load(const char *path, elf_info_t *info)
7279
* the load base that keeps them away from elfuse's reserved regions.
7380
*/
7481
if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) {
75-
log_error("%s: not an executable (e_type=%u)", path, ehdr.e_type);
82+
if (!quiet)
83+
log_error("%s: not an executable (e_type=%u)", path, ehdr.e_type);
7684
fclose(f);
7785
return -1;
7886
}
@@ -204,6 +212,16 @@ int elf_load(const char *path, elf_info_t *info)
204212
return 0;
205213
}
206214

215+
int elf_load(const char *path, elf_info_t *info)
216+
{
217+
return elf_load_impl(path, info, false);
218+
}
219+
220+
int elf_load_quiet(const char *path, elf_info_t *info)
221+
{
222+
return elf_load_impl(path, info, true);
223+
}
224+
207225
int elf_map_segments(const elf_info_t *info,
208226
const char *path,
209227
void *guest_base,

src/core/elf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ typedef struct {
105105
* Returns 0 on success, -1 on failure. Does NOT copy to guest yet.
106106
*/
107107
int elf_load(const char *path, elf_info_t *info);
108+
int elf_load_quiet(const char *path, elf_info_t *info);
108109

109110
/* Copy ELF segments into guest memory. Call after elf_load() and guest_init().
110111
* Also copies program headers into guest memory for AT_PHDR. load_base is added

src/core/guest.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,6 +1661,19 @@ int guest_get_used_regions(const guest_t *g,
16611661
n++;
16621662
}
16631663

1664+
/* Interpreter high block. The dynamic linker stores process-global state
1665+
* such as __stack_chk_guard in its high mapping just above interp_base.
1666+
* Fork children that take the region-copy path must inherit those bytes;
1667+
* otherwise libc's post-fork canary check observes zeroed guard storage
1668+
* and aborts before the child can exec.
1669+
*/
1670+
if (n < max && g->interp_base > 0 &&
1671+
g->interp_base <= g->guest_size - BLOCK_2MIB) {
1672+
out[n].offset = g->interp_base;
1673+
out[n].size = BLOCK_2MIB;
1674+
n++;
1675+
}
1676+
16641677
/* ELF + brk region: from elf_load_min (set by ELF loader) to brk_current.
16651678
* The lower bound is the actual ELF load address, not ELF_DEFAULT_BASE:
16661679
* ET_EXECs linked below 0x400000 (e.g. at 0x200000) have segments below the

src/core/stack.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include <sys/random.h>
1717

1818
#include "core/stack.h"
19-
#include "syscall/abi.h" /* GUEST_UID, GUEST_GID */
19+
#include "syscall/proc.h"
2020

2121
/* Linux aarch64 HWCAP bits (from asm/hwcap.h). Only the bits the VZ-sanitized
2222
* ID registers actually advertise are listed here; HWCAP bits left out (e.g.,
@@ -284,12 +284,12 @@ uint64_t build_linux_stack(guest_t *g,
284284
AUX(AT_PHENT, elf_info->phentsize);
285285
AUX(AT_PHNUM, elf_info->phnum);
286286
AUX(AT_ENTRY, elf_info->entry + elf_load_base);
287-
AUX(AT_UID, GUEST_UID);
288-
AUX(AT_EUID, GUEST_UID);
289-
AUX(AT_GID, GUEST_GID);
290-
AUX(AT_EGID, GUEST_GID);
291-
/* Bionic's __libc_init_AT_SECURE aborts when AT_SECURE is absent. elfuse
292-
* never elevates privileges, so AT_SECURE is always 0.
287+
AUX(AT_UID, proc_get_uid());
288+
AUX(AT_EUID, proc_get_euid());
289+
AUX(AT_GID, proc_get_gid());
290+
AUX(AT_EGID, proc_get_egid());
291+
/* Bionic's __libc_init_AT_SECURE aborts when AT_SECURE is absent.
292+
* elfuse never elevates privileges, so AT_SECURE is always 0.
293293
*/
294294
AUX(AT_SECURE, 0);
295295
AUX(AT_HWCAP2, query_hwcap2());

src/runtime/fork-state.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ int fork_ipc_read_all(int fd, void *buf, size_t len)
7171
* message comfortably below that limit and stream large fd sets in multiple
7272
* chunks.
7373
*/
74-
#define FORK_IPC_FD_CHUNK 120
74+
#define FORK_IPC_FD_CHUNK 32
7575

7676
int fork_ipc_send_fds(int sock, const int *fds, int count)
7777
{

src/runtime/fork-state.h

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,30 @@
1818
/* Magic values for IPC frame delimiters */
1919
#define IPC_MAGIC_HEADER 0x454C464BU /* "ELFK" */
2020
#define IPC_MAGIC_SENTINEL 0x454C4F4BU /* "ELOK" */
21-
/* Bumped to 11 when regions_tracker_stale was added to process state so forked
22-
* children preserve mprotect fast-path correctness.
21+
/* Bumped to 13 when pointer-authentication key registers and the remaining
22+
* EL0 TLS registers were added so forked children and clone-created vCPUs
23+
* resume with the same userspace CPU context as the parent. New Ubuntu arm64
24+
* userspace can use PAC in libc and TLS-adjacent state during fork return.
25+
*
26+
* Bumped to 12 when clone_flags/child_tid_gva were added so fork-process
27+
* children can apply CLONE_CHILD_SETTID/CLEARTID inside their own snapshot.
28+
*
29+
* Bumped to 11 when regions_tracker_stale was added to process state so
30+
* forked children preserve mprotect fast-path correctness.
2331
*
2432
* Bumped to 10 when the rosetta placement / kbuf / ttbr1 tuple was added so a
2533
* rosetta-aware child rejects an older parent's header instead of trying to
2634
* interpret unknown trailing fields.
2735
*/
28-
#define IPC_VERSION 11
36+
#define IPC_VERSION 13
37+
38+
typedef struct {
39+
uint64_t apiakeylo_el1, apiakeyhi_el1;
40+
uint64_t apibkeylo_el1, apibkeyhi_el1;
41+
uint64_t apdakeylo_el1, apdakeyhi_el1;
42+
uint64_t apdbkeylo_el1, apdbkeyhi_el1;
43+
uint64_t apgakeylo_el1, apgakeyhi_el1;
44+
} ipc_pauth_keys_t;
2945

3046
typedef struct {
3147
uint32_t magic;
@@ -60,6 +76,8 @@ typedef struct {
6076
uint64_t rosetta_entry;
6177
uint64_t kbuf_gpa;
6278
uint64_t ttbr1;
79+
uint64_t clone_flags;
80+
uint64_t child_tid_gva;
6381
} ipc_header_t;
6482

6583
typedef struct {
@@ -74,8 +92,10 @@ typedef struct {
7492
* access faults.
7593
*/
7694
uint64_t ttbr1_el1;
77-
uint64_t sctlr_el1, tcr_el1, mair_el1, cpacr_el1, tpidr_el0, sp_el1;
95+
uint64_t sctlr_el1, tcr_el1, mair_el1, cpacr_el1;
96+
uint64_t tpidr_el0, tpidrro_el0, tpidr2_el0, sp_el1;
7897
uint64_t x[31];
98+
ipc_pauth_keys_t pauth_keys;
7999
vcpu_simd_state_t simd_state;
80100
} ipc_registers_t;
81101

0 commit comments

Comments
 (0)