Skip to content

Commit a87481f

Browse files
committed
Defensive
1 parent 265d40e commit a87481f

5 files changed

Lines changed: 126 additions & 4 deletions

File tree

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import { fork } from 'node:child_process';
2+
import { fileURLToPath } from 'node:url';
3+
import { CompileError } from './errors.js';
4+
5+
// Run the worker under tsx (the service itself runs via tsx, so it's installed).
6+
const WORKER_PATH = fileURLToPath(new URL('./compile-worker.ts', import.meta.url));
7+
const COMPILE_TIMEOUT_MS = parseInt(process.env.COMPILE_TIMEOUT_MS ?? '20000', 10);
8+
9+
/**
10+
* Compile in a forked child and SIGKILL it if it overruns. This is the only way
11+
* to bound an in-process WASM compiler: a synchronous hang blocks the event
12+
* loop, so a main-thread timer can't interrupt it, but killing a separate
13+
* process always works. The service is single-client and sequential, so one
14+
* child per request is fine.
15+
*/
16+
export function compileProjectIsolated(lang: string, code: string): Promise<Buffer> {
17+
return new Promise((resolve, reject) => {
18+
const child = fork(WORKER_PATH, { execArgv: ['--import', 'tsx'] });
19+
let settled = false;
20+
21+
const finish = (action: () => void): void => {
22+
if (settled) return;
23+
settled = true;
24+
clearTimeout(timer);
25+
child.kill('SIGKILL');
26+
action();
27+
};
28+
29+
const timer = setTimeout(
30+
() => finish(() => reject(new CompileError(`Compilation timed out after ${COMPILE_TIMEOUT_MS}ms`))),
31+
COMPILE_TIMEOUT_MS,
32+
);
33+
34+
child.on('message', (m: any) =>
35+
finish(() => {
36+
if (m?.ok) {
37+
resolve(Buffer.from(m.tapB64, 'base64'));
38+
} else {
39+
reject(m?.compile ? new CompileError(m.error) : new Error(m?.error ?? 'compile failed'));
40+
}
41+
}),
42+
);
43+
child.on('error', (err) => finish(() => reject(err)));
44+
child.on('exit', (codeNum) =>
45+
finish(() => reject(new Error(`compile worker exited unexpectedly (code ${codeNum})`))),
46+
);
47+
48+
child.send({ lang, code });
49+
});
50+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { compileProject } from './compile.js';
2+
import { CompileError } from './errors.js';
3+
4+
// Child process for one compile. The parent (compile-isolated.ts) forks this per
5+
// request and SIGKILLs it on timeout, so a synchronous hang in a WASM compiler
6+
// dies with the child instead of wedging the service's event loop.
7+
interface CompileRequest {
8+
lang: string;
9+
code: string;
10+
}
11+
12+
process.on('message', async (msg: CompileRequest) => {
13+
try {
14+
const tap = await compileProject(msg.lang, msg.code);
15+
// Small payloads; base64 over IPC avoids Buffer-serialisation quirks.
16+
process.send!({ ok: true, tapB64: tap.toString('base64') }, () => process.exit(0));
17+
} catch (err) {
18+
process.send!(
19+
{
20+
ok: false,
21+
compile: err instanceof CompileError,
22+
error: err instanceof Error ? err.message : String(err),
23+
},
24+
() => process.exit(0),
25+
);
26+
}
27+
});

apps/gif-service/src/routes/project.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { Router, Request, Response } from 'express';
22
import { GIFGenerator } from '../gif-generator.js';
33
import { fetchProject } from '../hasura.js';
4-
import { compileProject } from '../compile.js';
4+
import { compileProjectIsolated } from '../compile-isolated.js';
55
import { CompileError } from '../errors.js';
66

77
const router = Router();
@@ -56,7 +56,7 @@ async function handle(format: Format, req: Request, res: Response): Promise<void
5656

5757
let tap: Buffer;
5858
try {
59-
tap = await compileProject(project.lang, project.code);
59+
tap = await compileProjectIsolated(project.lang, project.code);
6060
} catch (err) {
6161
if (err instanceof CompileError) {
6262
res.status(400).json({ error: 'Project failed to compile', detail: err.message });

apps/gif-service/src/routes/source.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { Router, Request, Response } from 'express';
22
import { GIFGenerator } from '../gif-generator.js';
3-
import { compileProject } from '../compile.js';
3+
import { compileProjectIsolated } from '../compile-isolated.js';
44
import { CompileError } from '../errors.js';
55

66
const router = Router();
@@ -41,7 +41,7 @@ async function handle(format: Format, req: Request, res: Response): Promise<void
4141

4242
let tap: Buffer;
4343
try {
44-
tap = await compileProject(source.lang, source.code);
44+
tap = await compileProjectIsolated(source.lang, source.code);
4545
} catch (err) {
4646
if (err instanceof CompileError) {
4747
res.status(400).json({ error: 'Compilation failed', detail: err.message });

docker-compose.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,28 @@ services:
4242
- graphql-engine
4343
- serve
4444

45+
# Untrusted-input compilers. Cap resources and drop privileges. Not read_only:
46+
# these compilers write intermediate files; confirm a writable path per image
47+
# before tightening further.
4548
z88dk:
4649
image: ghcr.io/stever/zxcoder-api-z88dk
4750
restart: always
51+
mem_limit: 512m
52+
cpus: 1.0
53+
pids_limit: 256
54+
cap_drop: [ALL]
55+
security_opt:
56+
- no-new-privileges:true
4857

4958
zxbasic:
5059
image: ghcr.io/stever/zxcoder-api-zxbasic
5160
restart: always
61+
mem_limit: 512m
62+
cpus: 1.0
63+
pids_limit: 256
64+
cap_drop: [ALL]
65+
security_opt:
66+
- no-new-privileges:true
5267

5368
# Headless emulator: compiles a program to .tap and renders it running to
5469
# GIF/MP4. Renders inline BASIC, or a public project looked up from Hasura
@@ -64,6 +79,36 @@ services:
6479
- hasura
6580
environment:
6681
HASURA_URL: http://hasura:8080/v1/graphql
82+
# Untrusted execution: cap CPU/mem/processes and drop privileges. Root FS is
83+
# read-only with a writable tmpfs for the temp mp4/f32le files it creates.
84+
# NOTE: verify `user: node` + `read_only` on first deploy — the image must
85+
# let the node user read /app and tsx must cache under /tmp.
86+
mem_limit: 1g
87+
cpus: 2.0
88+
pids_limit: 512
89+
user: node
90+
read_only: true
91+
tmpfs:
92+
- /tmp
93+
cap_drop: [ALL]
94+
security_opt:
95+
- no-new-privileges:true
96+
healthcheck:
97+
test: ["CMD", "node", "-e", "fetch('http://localhost:5001/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
98+
interval: 30s
99+
timeout: 5s
100+
retries: 3
101+
start_period: 20s
102+
# NETWORK ISOLATION (prod): gif-service + zxbasic + z88dk need no inbound
103+
# internet, only intra-stack traffic (gif-service -> hasura; hasura ->
104+
# zxbasic/z88dk). In the prod compose put them on an `internal: true`
105+
# network that also carries hasura. Not wired here to avoid breaking dev
106+
# connectivity that can't be validated in this repo.
107+
#
108+
# AUTO-RESTART ON HANG: plain `restart: always` restarts on exit, NOT on an
109+
# unhealthy healthcheck. To recover a wedged event loop automatically, run
110+
# an autoheal sidecar (or Swarm). With compile now isolated in a killable
111+
# child and the render wall-clock guard, the remaining wedge risk is small.
67112

68113
# GoToSocial: the bot's federated identity, served at social.zxplay.org but
69114
# presenting handles as @user@zxplay.org via the account-domain webfinger trick.

0 commit comments

Comments
 (0)