Skip to content

Commit c503893

Browse files
author
Trung
committed
Align guest MAP_SHARED allocations to stage-2 block boundaries (2 MiB)
1 parent 8441714 commit c503893

19 files changed

Lines changed: 556 additions & 144 deletions

File tree

src/core/elf.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* segments, and copies them into guest memory.
99
*/
1010

11+
#include <stdbool.h>
1112
#include <stdio.h>
1213
#include <stdlib.h>
1314
#include <string.h>
@@ -19,41 +20,46 @@
1920
#include "debug/log.h"
2021
#include "utils.h"
2122

22-
int elf_load(const char *path, elf_info_t *info)
23+
static int elf_load_impl(const char *path, elf_info_t *info, bool quiet)
2324
{
2425
memset(info, 0, sizeof(*info));
2526

2627
FILE *f = fopen(path, "rb");
2728
if (!f) {
28-
perror(path);
29+
if (!quiet)
30+
perror(path);
2931
return -1;
3032
}
3133

3234
elf64_ehdr_t ehdr;
3335
if (fread(&ehdr, sizeof(ehdr), 1, f) != 1) {
34-
log_error("%s: failed to read ELF header", path);
36+
if (!quiet)
37+
log_error("%s: failed to read ELF header", path);
3538
fclose(f);
3639
return -1;
3740
}
3841

3942
/* Reject non-ELF inputs before interpreting the rest of the header. */
4043
if (ehdr.e_ident[0] != ELFMAG0 || ehdr.e_ident[1] != ELFMAG1 ||
4144
ehdr.e_ident[2] != ELFMAG2 || ehdr.e_ident[3] != ELFMAG3) {
42-
log_error("%s: not an ELF file", path);
45+
if (!quiet)
46+
log_error("%s: not an ELF file", path);
4347
fclose(f);
4448
return -1;
4549
}
4650

4751
/* elfuse only implements the 64-bit Linux ABI. */
4852
if (ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
49-
log_error("%s: not a 64-bit ELF", path);
53+
if (!quiet)
54+
log_error("%s: not a 64-bit ELF", path);
5055
fclose(f);
5156
return -1;
5257
}
5358

5459
/* aarch64-linux user binaries are little-endian in the supported mode. */
5560
if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
56-
log_error("%s: not little-endian", path);
61+
if (!quiet)
62+
log_error("%s: not little-endian", path);
5763
fclose(f);
5864
return -1;
5965
}
@@ -62,8 +68,9 @@ int elf_load(const char *path, elf_info_t *info)
6268
* diagnostic instead of a generic parse failure.
6369
*/
6470
if (ehdr.e_machine != EM_AARCH64 && ehdr.e_machine != EM_X86_64) {
65-
log_error("%s: unsupported architecture (e_machine=%u)", path,
66-
ehdr.e_machine);
71+
if (!quiet)
72+
log_error("%s: unsupported architecture (e_machine=%u)", path,
73+
ehdr.e_machine);
6774
fclose(f);
6875
return -1;
6976
}
@@ -72,7 +79,8 @@ int elf_load(const char *path, elf_info_t *info)
7279
* the load base that keeps them away from elfuse's reserved regions.
7380
*/
7481
if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) {
75-
log_error("%s: not an executable (e_type=%u)", path, ehdr.e_type);
82+
if (!quiet)
83+
log_error("%s: not an executable (e_type=%u)", path, ehdr.e_type);
7684
fclose(f);
7785
return -1;
7886
}
@@ -204,6 +212,16 @@ int elf_load(const char *path, elf_info_t *info)
204212
return 0;
205213
}
206214

215+
int elf_load(const char *path, elf_info_t *info)
216+
{
217+
return elf_load_impl(path, info, false);
218+
}
219+
220+
int elf_load_quiet(const char *path, elf_info_t *info)
221+
{
222+
return elf_load_impl(path, info, true);
223+
}
224+
207225
int elf_map_segments(const elf_info_t *info,
208226
const char *path,
209227
void *guest_base,

src/core/elf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ typedef struct {
105105
* Returns 0 on success, -1 on failure. Does NOT copy to guest yet.
106106
*/
107107
int elf_load(const char *path, elf_info_t *info);
108+
int elf_load_quiet(const char *path, elf_info_t *info);
108109

109110
/* Copy ELF segments into guest memory. Call after elf_load() and guest_init().
110111
* Also copies program headers into guest memory for AT_PHDR. load_base is added

src/core/guest.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,6 +1661,19 @@ int guest_get_used_regions(const guest_t *g,
16611661
n++;
16621662
}
16631663

1664+
/* Interpreter high block. The dynamic linker stores process-global state
1665+
* such as __stack_chk_guard in its high mapping just above interp_base.
1666+
* Fork children that take the region-copy path must inherit those bytes;
1667+
* otherwise libc's post-fork canary check observes zeroed guard storage
1668+
* and aborts before the child can exec.
1669+
*/
1670+
if (n < max && g->interp_base > 0 &&
1671+
g->interp_base <= g->guest_size - BLOCK_2MIB) {
1672+
out[n].offset = g->interp_base;
1673+
out[n].size = BLOCK_2MIB;
1674+
n++;
1675+
}
1676+
16641677
/* ELF + brk region: from elf_load_min (set by ELF loader) to brk_current.
16651678
* The lower bound is the actual ELF load address, not ELF_DEFAULT_BASE:
16661679
* ET_EXECs linked below 0x400000 (e.g. at 0x200000) have segments below the

src/core/stack.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include <sys/random.h>
1717

1818
#include "core/stack.h"
19-
#include "syscall/abi.h" /* GUEST_UID, GUEST_GID */
19+
#include "syscall/proc.h"
2020

2121
/* Linux aarch64 HWCAP bits (from asm/hwcap.h). Only the bits the VZ-sanitized
2222
* ID registers actually advertise are listed here; HWCAP bits left out (e.g.,
@@ -284,12 +284,12 @@ uint64_t build_linux_stack(guest_t *g,
284284
AUX(AT_PHENT, elf_info->phentsize);
285285
AUX(AT_PHNUM, elf_info->phnum);
286286
AUX(AT_ENTRY, elf_info->entry + elf_load_base);
287-
AUX(AT_UID, GUEST_UID);
288-
AUX(AT_EUID, GUEST_UID);
289-
AUX(AT_GID, GUEST_GID);
290-
AUX(AT_EGID, GUEST_GID);
291-
/* Bionic's __libc_init_AT_SECURE aborts when AT_SECURE is absent. elfuse
292-
* never elevates privileges, so AT_SECURE is always 0.
287+
AUX(AT_UID, proc_get_uid());
288+
AUX(AT_EUID, proc_get_euid());
289+
AUX(AT_GID, proc_get_gid());
290+
AUX(AT_EGID, proc_get_egid());
291+
/* Bionic's __libc_init_AT_SECURE aborts when AT_SECURE is absent.
292+
* elfuse never elevates privileges, so AT_SECURE is always 0.
293293
*/
294294
AUX(AT_SECURE, 0);
295295
AUX(AT_HWCAP2, query_hwcap2());

src/runtime/fork-state.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ int fork_ipc_read_all(int fd, void *buf, size_t len)
7171
* message comfortably below that limit and stream large fd sets in multiple
7272
* chunks.
7373
*/
74-
#define FORK_IPC_FD_CHUNK 120
74+
#define FORK_IPC_FD_CHUNK 32
7575

7676
int fork_ipc_send_fds(int sock, const int *fds, int count)
7777
{

src/runtime/fork-state.h

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,30 @@
1818
/* Magic values for IPC frame delimiters */
1919
#define IPC_MAGIC_HEADER 0x454C464BU /* "ELFK" */
2020
#define IPC_MAGIC_SENTINEL 0x454C4F4BU /* "ELOK" */
21-
/* Bumped to 11 when regions_tracker_stale was added to process state so forked
22-
* children preserve mprotect fast-path correctness.
21+
/* Bumped to 13 when pointer-authentication key registers and the remaining
22+
* EL0 TLS registers were added so forked children and clone-created vCPUs
23+
* resume with the same userspace CPU context as the parent. New Ubuntu arm64
24+
* userspace can use PAC in libc and TLS-adjacent state during fork return.
25+
*
26+
* Bumped to 12 when clone_flags/child_tid_gva were added so fork-process
27+
* children can apply CLONE_CHILD_SETTID/CLEARTID inside their own snapshot.
28+
*
29+
* Bumped to 11 when regions_tracker_stale was added to process state so
30+
* forked children preserve mprotect fast-path correctness.
2331
*
2432
* Bumped to 10 when the rosetta placement / kbuf / ttbr1 tuple was added so a
2533
* rosetta-aware child rejects an older parent's header instead of trying to
2634
* interpret unknown trailing fields.
2735
*/
28-
#define IPC_VERSION 11
36+
#define IPC_VERSION 13
37+
38+
typedef struct {
39+
uint64_t apiakeylo_el1, apiakeyhi_el1;
40+
uint64_t apibkeylo_el1, apibkeyhi_el1;
41+
uint64_t apdakeylo_el1, apdakeyhi_el1;
42+
uint64_t apdbkeylo_el1, apdbkeyhi_el1;
43+
uint64_t apgakeylo_el1, apgakeyhi_el1;
44+
} ipc_pauth_keys_t;
2945

3046
typedef struct {
3147
uint32_t magic;
@@ -60,6 +76,8 @@ typedef struct {
6076
uint64_t rosetta_entry;
6177
uint64_t kbuf_gpa;
6278
uint64_t ttbr1;
79+
uint64_t clone_flags;
80+
uint64_t child_tid_gva;
6381
} ipc_header_t;
6482

6583
typedef struct {
@@ -74,8 +92,10 @@ typedef struct {
7492
* access faults.
7593
*/
7694
uint64_t ttbr1_el1;
77-
uint64_t sctlr_el1, tcr_el1, mair_el1, cpacr_el1, tpidr_el0, sp_el1;
95+
uint64_t sctlr_el1, tcr_el1, mair_el1, cpacr_el1;
96+
uint64_t tpidr_el0, tpidrro_el0, tpidr2_el0, sp_el1;
7897
uint64_t x[31];
98+
ipc_pauth_keys_t pauth_keys;
7999
vcpu_simd_state_t simd_state;
80100
} ipc_registers_t;
81101

0 commit comments

Comments
 (0)