diff --git a/ci.jsonnet b/ci.jsonnet index bff43aa641..9ccdccde2a 100644 --- a/ci.jsonnet +++ b/ci.jsonnet @@ -5,7 +5,7 @@ (import "ci/python-gate.libsonnet") + (import "ci/python-bench.libsonnet") + { - overlay: "26571215e27b3c415afb8119d38a0418c14b29c9", + overlay: "12367561e6a2b54df8d3b1bd400e431de01eef0f", specVersion: "8", // Until buildbot issues around CI tiers are resolved, we cannot use them // tierConfig: self.tierConfig, @@ -310,15 +310,17 @@ "tox-example": gpgate_ee + require(GPYEE_NATIVE_STANDALONE) + platform_spec(no_jobs) + platform_spec({ "linux:amd64:jdk-latest" : tier3, }), - // "python-svm-graalos-standalone-build": gpgate_ee + internet_access_env + platform_spec(no_jobs) + platform_spec({ - // "linux:amd64:jdk-latest": tier3 + $.ol8 + task_spec({ - // environment +: { - // GRAALPY_GRAALOS_TOOLCHAIN_URL: $.overlay_imports.GRAALPY_GRAALOS_TOOLCHAIN_URL, - // GRAALPY_GRAALOS_RUNTIME_URL: $.overlay_imports.GRAALPY_GRAALOS_RUNTIME_URL, - // GRAALPY_GRAALOS_ARTIFACT_BASE_URL: $.overlay_imports.GRAALPY_GRAALOS_ARTIFACT_BASE_URL, - // }, - // }), - // }), + "python-svm-graalos-standalone-build": gpgate_ee + internet_access_env + platform_spec(no_jobs) + platform_spec({ + "linux:amd64:jdk-latest": tier3 + $.ol8 + task_spec({ + capabilities+: ["mpk", "!fast", "!x82", "!x82_16_367"], + deploysArtifacts: true, + environment +: { + GRAALPY_GRAALOS_TOOLCHAIN_URL: $.overlay_imports.GRAALPY_GRAALOS_TOOLCHAIN_URL, + GRAALPY_GRAALOS_RUNTIME_URL: $.overlay_imports.GRAALPY_GRAALOS_RUNTIME_URL, + GRAALPY_GRAALOS_ARTIFACT_BASE_URL: $.overlay_imports.GRAALPY_GRAALOS_ARTIFACT_BASE_URL, + }, + }), + }), }, local need_pgo = task_spec({runAfter: ["python-pgo-profile-post_merge-linux-amd64-jdk-latest"]}), diff --git a/graalpython/com.oracle.graal.python.test/src/tests/graalos/GRAALOS_DEMO.md b/graalpython/com.oracle.graal.python.test/src/tests/graalos/GRAALOS_DEMO.md new file mode 100644 index 0000000000..65fe1a72e0 --- /dev/null +++ b/graalpython/com.oracle.graal.python.test/src/tests/graalos/GRAALOS_DEMO.md @@ -0,0 +1,78 @@ +# GraalOS Standalone Sandbox Demo + +This demo shows a small chat-style Python evaluator running inside the +GraalPy GraalOS standalone. + +The story: + +1. `rich` renders a friendly terminal UI. +2. The demo treats each entered expression as untrusted Python code, such as + code produced by an LLM agent or pasted by a human operator. +3. The process is inside the GraalOS sandbox, so file, subprocess, + network, and native library attempts remain contained. + +## Setup + +We can install the `rich` wheel directly into the standalone's `site-packages` +using any standard Python. While we could run `ensurepip` and `pip` inside the +sandbox by configuring the appropriate network access, we do this here +intentionally done outside the sandbox. The sandboxed standalone has no +outbound network mapping by default, which is one of the things the demo can +show. + +```bash +python3 -m pip install \ + --target GRAALPY_NATIVE_GRAALOS_STANDALONE/lib/python3.12/site-packages \ + --only-binary=:all: \ + --python-version 3.12 \ + --implementation py --implementation graalpy \ + --abi none --abi graalpy250_312_native \ + --platform any --platform graalos_x86_64 \ + --no-compile \ + rich +``` + +There should be a file `test_graalos_sandbox_chat.py` in this directory. If +not, find it in and copy it from the GraalPy source repository. From inside the +sandbox that file is available as `/test_graalos_sandbox_chat.py`, so run: + +```bash +./bin/graalpy /test_graalos_sandbox_chat.py +``` + +For a non-interactive walkthrough: + +```bash +./bin/graalpy /test_graalos_sandbox_chat.py --demo +``` + +## Demo Beats + +Start with a normal expression: + +```python +sum([i*i for i in range(1000)]) +``` + +Then move on to untrusted code that tries to access host resources: + +```python +open('/etc/passwd').read() +open('/etc/passwd').read().splitlines()[:3] +open('/etc/shadow').read() +__import__('subprocess').run(['/bin/sh', '-c', 'id'], capture_output=True, text=True) +__import__('socket').create_connection(('example.com', 80), timeout=2) +__import__('ctypes').CDLL('libc.so').system(b'cat /etc/shadow') +``` + +Expected result: harmless operations work or fail normally; sensitive host +resources are unavailable because the process only sees the sandboxed virtual +filesystem, process namespace, and configured network policy. The native +`system()` probe returns `-1`, which the demo renders as blocked. + +## Why This Is Useful + +This is a deliberately unsafe application pattern: it evaluates untrusted Python +code directly. That is useful for demonstrating the actual containment boundary. +GraalOS is that boundary, and it mediates filesystem, subprocess, native, and +network behavior even when the application itself offers no extra guardrails. diff --git a/graalpython/com.oracle.graal.python.test/src/tests/graalos/test_graalos_sandbox_chat.py b/graalpython/com.oracle.graal.python.test/src/tests/graalos/test_graalos_sandbox_chat.py new file mode 100644 index 0000000000..09c331ecee --- /dev/null +++ b/graalpython/com.oracle.graal.python.test/src/tests/graalos/test_graalos_sandbox_chat.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# The Universal Permissive License (UPL), Version 1.0 +# +# Subject to the condition set forth below, permission is hereby granted to any +# person obtaining a copy of this software, associated documentation and/or +# data (collectively the "Software"), free of charge and under any and all +# copyright rights in the Software, and any and all patent rights owned or +# freely licensable by each licensor hereunder covering either (i) the +# unmodified Software as contributed to or provided by such licensor, or (ii) +# the Larger Works (as defined below), to deal in both +# +# (a) the Software, and +# +# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if +# one is included with the Software each a "Larger Work" to which the Software +# is contributed by such licensors), +# +# without restriction, including without limitation the rights to copy, create +# derivative works of, display, perform, and distribute the Software and make, +# use, sell, offer for sale, import, export, have made, and have sold the +# Software and the Larger Work(s), and to sublicense the foregoing rights on +# either these or other terms. +# +# This license is subject to the following condition: +# +# The above copyright notice and either this complete permission notice or at a +# minimum a reference to the UPL must be included in all copies or substantial +# portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""Small Rich demo for the GraalOS standalone sandbox.""" + +from __future__ import annotations + +import argparse +import io +import sysconfig +import textwrap +import time +import unittest +from dataclasses import dataclass + + +console = None + + +@dataclass +class EvalResult: + mode: str + ok: bool + output: str + elapsed_ms: float +def render_message(role: str, body: str, style: str) -> None: + from rich.panel import Panel + from rich.text import Text + console.print(Panel(Text(body), title=role, title_align="left", border_style=style)) + + +def unsafe_eval(expr: str): + start = time.perf_counter() + try: + value = eval(expr) + elapsed = (time.perf_counter() - start) * 1000 + if value == -1: + return EvalResult("python eval", False, "-1 (operation denied by sandbox/runtime)", elapsed) + return EvalResult("python eval", True, repr(value), elapsed) + except Exception as exc: + elapsed = (time.perf_counter() - start) * 1000 + return EvalResult("python eval", False, f"{type(exc).__name__}: {exc}", elapsed) + + +def render_result(result) -> None: + from rich.table import Table + table = Table.grid(padding=(0, 1)) + table.add_column(style="bold") + table.add_column() + table.add_row("mode", result.mode) + table.add_row("status", "[green]ok[/green]" if result.ok else "[red]blocked/error[/red]") + table.add_row("time", f"{result.elapsed_ms:.1f} ms") + console.print(table) + render_message("sandbox", result.output, "green" if result.ok else "red") + + +def evaluate(line: str) -> None: + line = line.strip() + if not line: + return + render_result(unsafe_eval(line)) + + +def demo_script() -> list[str]: + return [ + "sum([i*i for i in range(1000)])", + "sin(pi / 4) ** 2 + cos(pi / 4) ** 2", + "open('/etc/passwd').read()", + "open('/etc/passwd').read().splitlines()[:3]", + "open('/etc/shadow').read()", + "__import__('subprocess').run(['/bin/sh', '-c', 'id'], capture_output=True, text=True)", + "__import__('socket').create_connection(('example.com', 80), timeout=2)", + "__import__('ctypes').CDLL('libc.so').system(b'cat /etc/shadow')", + ] + + +def print_intro() -> None: + body = textwrap.dedent( + """ + Type Python expressions and get chat-style results. + + This demo treats each expression as untrusted Python code, such as + code proposed by an LLM agent or pasted by a human operator. + GraalOS sandboxes that code, so filesystem, subprocess, native + library, and network attempts remain contained. + + Commands: /demo, /help, /quit + """ + ).strip() + render_message("graalos sandbox chat", body, "cyan") + + +def print_help() -> None: + examples = "\n".join(demo_script()) + from rich.syntax import Syntax + console.print(Syntax(examples, "python", theme="ansi_dark", word_wrap=True)) + + +def interactive() -> int: + print_intro() + while True: + try: + line = console.input("[bold cyan]you>[/bold cyan] ") + except (EOFError, KeyboardInterrupt): + console.print() + return 0 + command = line.strip() + if command in {"/quit", "/exit"}: + return 0 + if command == "/help": + print_help() + continue + if command == "/demo": + run_demo() + continue + evaluate(line) + + +def run_demo() -> None: + for line in demo_script(): + render_message("you", line, "blue") + evaluate(line) + + +def main(argv: list[str] | None = None) -> int: + from rich.console import Console + global console + if console is None: + console = Console() + parser = argparse.ArgumentParser() + parser.add_argument("--demo", action="store_true", help="run the prepared demo script and exit") + args = parser.parse_args(argv) + + if args.demo: + print_intro() + run_demo() + return 0 + return interactive() + + +def skip_unless_graalos(): + soabi = sysconfig.get_config_var("SOABI") or "" + if "graalos" not in soabi: + raise unittest.SkipTest(f"requires GraalOS SOABI, got {soabi!r}") + + +class GraalOSSandboxChatTests(unittest.TestCase): + + def setUp(self): + skip_unless_graalos() + + def test_demo_packages(self): + import rich + + self.assertTrue(rich.get_console()) + + def test_sandbox_chat_demo(self): + from rich.console import Console + global console + output = io.StringIO() + console = Console(file=output, force_terminal=False, color_system=None, width=120) + self.assertEqual(main(["--demo"]), 0) + stdout = output.getvalue() + self.assertIn("sum([i*i for i in range(1000)])", stdout) + self.assertIn("__import__('socket').create_connection", stdout) + self.assertIn("gaierror", stdout) + self.assertIn("FileNotFoundError", stdout) + self.assertIn("operation denied", stdout) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_graalos_standalone.py b/graalpython/com.oracle.graal.python.test/src/tests/graalos/test_graalos_standalone.py similarity index 77% rename from graalpython/com.oracle.graal.python.test/src/tests/test_graalos_standalone.py rename to graalpython/com.oracle.graal.python.test/src/tests/graalos/test_graalos_standalone.py index 49b73eddb8..ff7c02798b 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_graalos_standalone.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/graalos/test_graalos_standalone.py @@ -41,19 +41,26 @@ import unittest -def test_graalos_sqlite3_native_extension_smoke(): +def skip_unless_graalos(): soabi = sysconfig.get_config_var("SOABI") or "" if "graalos" not in soabi: raise unittest.SkipTest(f"requires GraalOS SOABI, got {soabi!r}") - import _sqlite3 - import sqlite3 - - assert _sqlite3.sqlite_version - conn = sqlite3.connect(":memory:") - try: - conn.execute("create table values_for_sum(value integer)") - conn.executemany("insert into values_for_sum(value) values (?)", [(1,), (2,), (3,)]) - assert conn.execute("select sum(value) from values_for_sum").fetchone()[0] == 6 - finally: - conn.close() + +class GraalOSStandaloneTests(unittest.TestCase): + + def setUp(self): + skip_unless_graalos() + + def test_sqlite3_native_extension_smoke(self): + import _sqlite3 + import sqlite3 + + self.assertTrue(_sqlite3.sqlite_version) + conn = sqlite3.connect(":memory:") + try: + conn.execute("create table values_for_sum(value integer)") + conn.executemany("insert into values_for_sum(value) values (?)", [(1,), (2,), (3,)]) + self.assertEqual(conn.execute("select sum(value) from values_for_sum").fetchone()[0], 6) + finally: + conn.close() diff --git a/graalpython/graalpy_graalos_standalone_payload/CMakeLists.txt b/graalpython/graalpy_graalos_standalone_payload/CMakeLists.txt index 268e254b0d..8c7b295d63 100644 --- a/graalpython/graalpy_graalos_standalone_payload/CMakeLists.txt +++ b/graalpython/graalpy_graalos_standalone_payload/CMakeLists.txt @@ -49,7 +49,8 @@ file(REMOVE_RECURSE "${PAYLOAD_DIR}/bin" "${PAYLOAD_DIR}/libexec" "${PAYLOAD_DIR}/lib" - "${PAYLOAD_DIR}/config.json") + "${PAYLOAD_DIR}/config.json" + "${PAYLOAD_DIR}/README_GRAALOS_STANDALONE.md") file(MAKE_DIRECTORY "${PAYLOAD_DIR}/bin" "${PAYLOAD_DIR}/libexec" @@ -169,6 +170,7 @@ _write_launcher("${PAYLOAD_DIR}/bin/${GRAALPY_CONFIG_LAUNCHER}" "/bin/graalpy-co _write_launcher("${PAYLOAD_DIR}/libexec/${GRAALPY_POLYGLOT_GET_LAUNCHER}" "/libexec/graalpy-polyglot-get") _copy_file("${CMAKE_CURRENT_LIST_DIR}/config.json" "${PAYLOAD_DIR}/config.json") +_copy_file("${CMAKE_CURRENT_LIST_DIR}/README_GRAALOS_STANDALONE.md" "${PAYLOAD_DIR}/README_GRAALOS_STANDALONE.md") _copy_executable("${CMAKE_CURRENT_LIST_DIR}/graalpy-sandbox-launcher.sh" "${GRAALOS_DIR}/graalpy-sandbox-launcher") _copy_executable( "${CMAKE_CURRENT_LIST_DIR}/graalpy-sandbox-expand-config.sh" diff --git a/graalpython/graalpy_graalos_standalone_payload/README_GRAALOS_STANDALONE.md b/graalpython/graalpy_graalos_standalone_payload/README_GRAALOS_STANDALONE.md new file mode 100644 index 0000000000..57a8562a9c --- /dev/null +++ b/graalpython/graalpy_graalos_standalone_payload/README_GRAALOS_STANDALONE.md @@ -0,0 +1,289 @@ +# GraalPy GraalOS Standalone + +This package runs GraalPy through `graalhost` with a sandboxed default +configuration. The goal is to feel like a normal Python installation for local +command-line use, while making resource usage, filesystem, and network access +explicit and sandboxing the entire execution to prevent untrusted Python code +or native extensions from compromising the system. + +## Quick Start + +Run Python normally: + +```bash +bin/graalpy +bin/graalpy -c 'print(42)' +bin/python +bin/python3 +``` + +Show Python help: + +```bash +bin/graalpy --help +``` + +The launcher passes Python's own options through unchanged. After Python prints +its help, the launcher appends a short section describing the additional +`--graalhost.*` options. + +Enable graalhost diagnostics for one run: + +```bash +bin/graalpy --graalhost.verbose -c 'print(42)' +``` + +## What Is Sandboxed By Default + +By default, the standalone: + +- keeps `stdin`, `stdout`, and `stderr` attached to your terminal +- allows access to the standalone tree and the small set of system files needed + for runtime startup +- denies general outbound network access +- denies bind and listen on TCP and UDP ports + +This means `bin/graalpy` behaves like a local Python process, but it does not +automatically get unrestricted filesystem or internet access. + +## Package Layout + +- `bin/graalpy`, `bin/python`, `bin/python3`: launch GraalPy inside graalhost +- `config.json`: sandbox and launcher configuration +- `lib/graalos/graalpy-sandbox-launcher`: shell wrapper used by the launchers +- `lib/graalos/graalpy-sandbox-expand-config`: fills in generated filesystem + mappings +- `lib/graalos/graalhost`: embedded GraalOS runtime + +## `config.json` + +`config.json` is the main file you edit to change sandbox behavior. The +launcher expands it before starting `graalhost`. + +Common top-level fields: + +- `env`: environment variables visible inside the sandboxed process +- `working_dir`: initial working directory inside the virtual filesystem +- `fds`: how standard input, output, error, and other file descriptors are + wired +- `allowed_ports`: explicit bind and listen allowlist +- `netmappings`: outbound and inbound network policy +- `allow_runtime_codegen`: allow runtime-generated code after GraalOS + binsweep verification +- `allow_signal_self_snapshot`: allows the process to create a snapshot by + signaling itself +- `memlimit`: memory budget in GiB +- `testing_default_mappings`: keep this enabled for the packaged standalone + +The launcher also understands a `graalhost` section: + +```json +"graalhost": { + "seccomp": null, + "log_level": null, + "log_to": null, + "visorcalloutput": null, + "extra_args": [] +} +``` + +Meaning: + +- `seccomp`: forwarded as `--seccomp` +- `log_level`: forwarded as `--log_level` +- `log_to`: forwarded as `--log_to` +- `visorcalloutput`: forwarded as `--visorcalloutput` +- `extra_args`: additional raw graalhost arguments, one item per array entry + +If you do not set graalhost logging options, the launcher stays quiet by +default. `--graalhost.verbose` overrides that for a single invocation. + +## Launcher Options + +These options are consumed by the launcher and are not passed to Python: + +- `--graalhost.verbose` +- `--graalhost.run_snapshot=PATH` +- `--graalhost.log_level=LEVEL` +- `--graalhost.log_to=DEST` +- `--graalhost.visorcalloutput=DEST` +- `--graalhost.seccomp=MODE` +- `--graalhost.extra_arg=ARG` + +`--graalhost.run_snapshot=PATH` restores a previously created GraalOS snapshot +instead of starting a fresh Python process. It should be used by itself: + +```bash +bin/graalpy --graalhost.run_snapshot=/path/to/persistIso... +``` + +## Common Scenarios + +### Use It Like Normal Python + +```bash +bin/graalpy -c 'print("hello")' +printf 'hello\n' | bin/graalpy -c 'print(input())' +``` + +The default config keeps the terminal connected: + +```json +"fds": { + "stdin": "stdin", + "stdout": "stdout", + "stderr": "stderr" +} +``` + +### Redirect Standard Streams + +To write output to files, edit `config.json`: + +```json +"fds": { + "stdin": "null", + "stdout": "file:/tmp/graalpy.stdout", + "stderr": "file:/tmp/graalpy.stderr" +} +``` + +Use `append:/path` instead of `file:/path` if you want append mode. + +### Keep Networking Disabled + +This is the default. If you do not add `netmappings`, the process does not get +general outbound network access. If `allowed_ports` is empty, it also cannot +bind or listen on ports. + +### Allow an Outbound TCP Connection + +To allow connections to `127.0.0.1:6010`, add: + +```json +"netmappings": [ + { + "networks": [ + { + "ips": ["127.0.0.1/32"], + "protocols": [ + { "type": "tcp", "outgoing_ports": ["6010"] } + ] + } + ] + } +] +``` + +If you use hostnames instead of literal IPs, your network policy also needs to +allow DNS. + +### Allow an Incoming Listener + +To allow listening on `127.0.0.1:6006`, add: + +```json +"allowed_ports": [6006], +"netmappings": [ + { + "networks": [ + { + "ips": ["127.0.0.1/32"], + "protocols": [ + { "type": "tcp", "incoming_ports": ["6006"] } + ] + } + ] + } +] +``` + +### Create and Resume a Snapshot + +If you want to resume a warm Python process later, enable self-snapshotting in +`config.json`: + +```json +"allow_signal_self_snapshot": true +``` + +Then run a Python program that saves its snapshot path and signals itself with +`SIGSTOP` when it is ready: + +```python +import os +import signal + +print("Ready to snapshot") +os.kill(os.getpid(), signal.SIGSTOP) +``` + +After GraalOS writes the snapshot file, resume it with: + +```bash +bin/graalpy --graalhost.run_snapshot=/path/to/persistIso... +``` + +Restoring a snapshot uses the saved process state. It does not take additional +Python command-line arguments on the same invocation. + +When `allow_signal_self_snapshot` is enabled, the launcher keeps the generated +expanded endpoint config under `tmp/graalpy-sandbox.*` instead of deleting it +at process exit. Snapshot restore needs that original directory to remain +available because the saved endpoint configuration records it as +`endpoint_config_path`. + +### Show Graalhost Diagnostics + +For launcher-level troubleshooting: + +```bash +bin/graalpy --graalhost.verbose -c 'print("hello")' +``` + +For more control, use one-run overrides such as: + +```bash +bin/graalpy \ + --graalhost.log_level=debug \ + --graalhost.log_to=stderr \ + --graalhost.visorcalloutput=@stderr \ + -c 'print("hello")' +``` + +### Install extra packages + +You may install additional packages directly into the standalone from the +outside, by selecting compatible tags, for example: + +```bash +python3 -m pip install \ + --target GRAALPY_NATIVE_GRAALOS_STANDALONE/lib/python3.12/site-packages \ + --only-binary=:all: \ + --python-version 3.12 \ + --implementation py --implementation graalpy \ + --abi none --abi graalpy250_312_native \ + --platform any --platform graalos_x86_64 \ + --no-compile \ + rich asteval +``` + +You may have to set --extra-index-url to an index that provides provides +pre-built binary wheels for GraalOS, since this building these requires a +special toolchain. + +## Notes About Graalhost + +The standalone wraps `graalhost`, which is the GraalOS runtime responsible for: + +- launching the Python isolate +- applying filesystem and network policy +- routing file descriptors +- creating and restoring snapshots +- emitting host-side diagnostics + +The launcher covers common usage. If you need the full host CLI, run: + +```bash +lib/graalos/graalhost --help +``` diff --git a/graalpython/graalpy_graalos_standalone_payload/config.json b/graalpython/graalpy_graalos_standalone_payload/config.json index 909725a003..3bfd8b5e0d 100644 --- a/graalpython/graalpy_graalos_standalone_payload/config.json +++ b/graalpython/graalpy_graalos_standalone_payload/config.json @@ -6,12 +6,20 @@ }, "working_dir": "/", "allow_runtime_codegen": true, + "memlimit": 64.0, + "fds": { + "stdin": "stdin", + "stdout": "stdout", + "stderr": "stderr" + }, "testing_default_mappings": true, "allowed_ports": [], "graalhost": { "seccomp": null, "log_level": null, - "extra_args": [] + "log_to": null, + "visorcalloutput": null, + "extra_args": ["--disable_core_scheduling"] }, "fsmappings": [] } diff --git a/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-fsmappings.sh b/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-fsmappings.sh index a418f208f1..f216b01fa6 100644 --- a/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-fsmappings.sh +++ b/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-fsmappings.sh @@ -75,6 +75,32 @@ emit_pseudo_mapping() { emit_mapping "$outfile" "$concrete" "$virt" "$extra" } +emit_musl_interpreter_mapping() { + local outfile="$1" + local executable="$2" + local safe_libc="$3" + local interp + + if ! command -v readelf >/dev/null 2>&1; then + return 0 + fi + + interp="$(readelf -l "$executable" 2>/dev/null | sed -n 's/.*Requesting program interpreter: \([^]]*\).*/\1/p' | head -n 1)" + case "$interp" in + /*) + case "${emitted_musl_interpreters:-}" in + *" +${interp} +"*) return 0 ;; + esac + emitted_musl_interpreters="${emitted_musl_interpreters:-} +${interp} +" + emit_mapping "$outfile" "$safe_libc" "$interp" ' "verif": true,' + ;; + esac +} + graalpy_sandbox_emit_fsmappings() { local standalone_home="$1" local outfile="$2" @@ -93,6 +119,7 @@ graalpy_sandbox_emit_fsmappings() { fi need_comma=false + emitted_musl_interpreters="" emit_mapping "$outfile" "$standalone_home" "/" ' "using": {"handler": "host_fs"}, "mutable": true, "allow_set_x_bit": true, @@ -106,11 +133,13 @@ graalpy_sandbox_emit_fsmappings() { while IFS= read -r file; do virt="/bin/${file#"$native_bin"/}" emit_mapping "$outfile" "$file" "$virt" ' "verif": true,' + emit_musl_interpreter_mapping "$outfile" "$file" "$safe_libc" done < <(find "$native_bin" -maxdepth 1 -type f -perm -111 | sort) else while IFS= read -r file; do virt="/${file#"$standalone_home"/}" emit_mapping "$outfile" "$file" "$virt" ' "verif": true,' + emit_musl_interpreter_mapping "$outfile" "$file" "$safe_libc" done < <(find "${standalone_home}/bin" -maxdepth 1 -type f -perm -111 | sort) fi diff --git a/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-launcher.sh b/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-launcher.sh index 2d30dc5295..e526c07537 100644 --- a/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-launcher.sh +++ b/graalpython/graalpy_graalos_standalone_payload/graalpy-sandbox-launcher.sh @@ -64,6 +64,10 @@ graalhost="${standalone_home}/lib/graalos/graalhost" libc="${standalone_home}/lib/graalos/libc.so" expand_config="${standalone_home}/lib/graalos/graalpy-sandbox-expand-config" config="${standalone_home}/config.json" +tmp_root="${standalone_home}/tmp" +launcher_verbose=false +launcher_show_help=false +cleanup_tmpdir=true if [ ! -x "$graalhost" ]; then echo "missing or non-executable GraalHost binary: $graalhost" >&2 @@ -85,20 +89,213 @@ if [ ! -f "$config" ]; then exit 126 fi -tmpdir="$(mktemp -d "${TMPDIR:-/tmp}/graalpy-sandbox.XXXXXXXXXX")" -trap 'rm -rf "$tmpdir"' EXIT +mkdir -p "$tmp_root" +tmp_base="${TMPDIR:-}" +if [ -z "$tmp_base" ] || [ ! -d "$tmp_base" ]; then + tmp_base="$tmp_root" +fi + +# Snapshot restore reuses the endpoint config persisted in the snapshot. When +# self-snapshotting is enabled, keep the generated config directory so the +# snapshotted endpoint_config_path still exists on resume. +if grep -Eq '^[[:space:]]*"allow_signal_self_snapshot"[[:space:]]*:[[:space:]]*true([[:space:]]*[,}])' "$config"; then + cleanup_tmpdir=false +fi + +tmpdir="$(mktemp -d "${tmp_base}/graalpy-sandbox.XXXXXXXXXX")" +if [ "$cleanup_tmpdir" = "true" ]; then + trap 'rm -rf "$tmpdir"' EXIT +fi endpoint_config="${tmpdir}/config.json" "$expand_config" "$standalone_home" "$config" "$endpoint_config" +graalhost_config="$( + awk ' + /"graalhost"[[:space:]]*:/ { in_obj = 1 } + in_obj { print } + in_obj && /^[[:space:]]*}[[:space:]]*,?[[:space:]]*$/ { exit } + ' "$config" +)" + +print_graalhost_help() { + cat <<'EOF' + +Additional graalhost launcher options: + --graalhost.verbose + Enable graalhost verbose logging on stderr for this launch. + --graalhost.run_snapshot=PATH + Restore and run a GraalOS snapshot instead of starting a new Python process. + --graalhost.log_level=LEVEL + Override graalhost log level for this launch. + --graalhost.log_to=DEST + Override graalhost log sink(s) for this launch. + --graalhost.visorcalloutput=DEST + Override graalhost visorcall logging destination for this launch. + --graalhost.seccomp=MODE + Override graalhost seccomp mode for this launch. + --graalhost.extra_arg=ARG + Append one raw graalhost argument for this launch. May be repeated. +EOF +} + +extract_graalhost_string() { + local key="$1" + printf '%s\n' "$graalhost_config" | sed -n "s/^[[:space:]]*\"${key}\"[[:space:]]*:[[:space:]]*\"\\([^\"]*\\)\".*/\\1/p" | head -n 1 +} + +extract_graalhost_scalar() { + local key="$1" + printf '%s\n' "$graalhost_config" | sed -n "s/^[[:space:]]*\"${key}\"[[:space:]]*:[[:space:]]*\\([^,][^,}]*\\).*/\\1/p" | head -n 1 | sed 's/[[:space:]]*$//' +} + +extract_graalhost_array() { + local key="$1" + printf '%s\n' "$graalhost_config" | awk -v key="$key" ' + $0 ~ ("\"" key "\"[[:space:]]*:[[:space:]]*\\[") { + in_arr = 1 + line = substr($0, index($0, "[") + 1) + } + in_arr { + if (!length(line)) { + line = $0 + } + while (match(line, /"([^"]*)"/)) { + print substr(line, RSTART + 1, RLENGTH - 2) + line = substr(line, RSTART + RLENGTH) + } + line = "" + if ($0 ~ /\]/) { + exit + } + } + ' +} + graalhost_args=() -log_level="$(sed -n 's/^[[:space:]]*"log_level"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' "$config" | head -n 1)" +python_args=() +cli_run_snapshot="" +cli_log_level="" +cli_log_to="" +cli_visorcalloutput="" +cli_seccomp="" +cli_extra_args=() +for arg in "$@"; do + case "$arg" in + --graalhost.verbose) + launcher_verbose=true + ;; + --graalhost.run_snapshot=*) + cli_run_snapshot="${arg#--graalhost.run_snapshot=}" + ;; + --graalhost.log_level=*) + cli_log_level="${arg#--graalhost.log_level=}" + ;; + --graalhost.log_to=*) + cli_log_to="${arg#--graalhost.log_to=}" + ;; + --graalhost.visorcalloutput=*) + cli_visorcalloutput="${arg#--graalhost.visorcalloutput=}" + ;; + --graalhost.seccomp=*) + cli_seccomp="${arg#--graalhost.seccomp=}" + ;; + --graalhost.extra_arg=*) + cli_extra_args+=("${arg#--graalhost.extra_arg=}") + ;; + -h|--help) + launcher_show_help=true + python_args+=("$arg") + ;; + *) + python_args+=("$arg") + ;; + esac +done + +if [ "$launcher_verbose" = "true" ]; then + graalhost_args+=(--verbose --log_to stderr) +else + graalhost_args+=(--log_level off --log_to visorbase --visorcalloutput @none) +fi + +seccomp="$(extract_graalhost_scalar seccomp)" +case "$seccomp" in + "" | "null") ;; + *) graalhost_args+=(--seccomp "$seccomp") ;; +esac + +if [ "$launcher_verbose" != "true" ]; then + log_level="$(extract_graalhost_string log_level)" + log_to="$(extract_graalhost_string log_to)" + visorcalloutput="$(extract_graalhost_string visorcalloutput)" +else + log_level="" + log_to="" + visorcalloutput="" +fi + if [ -n "$log_level" ]; then graalhost_args+=(--log_level "$log_level") fi -exec "$graalhost" \ - ${graalhost_args[@]+"${graalhost_args[@]}"} \ - --musl_path "$libc" \ - --run_config=@"$endpoint_config" \ - --run_virtual "$virtual_executable" \ - "$@" +if [ -n "$log_to" ]; then + graalhost_args+=(--log_to "$log_to") +fi + +if [ -n "$visorcalloutput" ]; then + graalhost_args+=(--visorcalloutput "$visorcalloutput") +fi + +while IFS= read -r extra_arg; do + [ -n "$extra_arg" ] || continue + graalhost_args+=("$extra_arg") +done < <(extract_graalhost_array extra_args) + +if [ -n "$cli_seccomp" ]; then + graalhost_args+=(--seccomp "$cli_seccomp") +fi + +if [ -n "$cli_log_level" ]; then + graalhost_args+=(--log_level "$cli_log_level") +fi + +if [ -n "$cli_log_to" ]; then + graalhost_args+=(--log_to "$cli_log_to") +fi + +if [ -n "$cli_visorcalloutput" ]; then + graalhost_args+=(--visorcalloutput "$cli_visorcalloutput") +fi + +for extra_arg in "${cli_extra_args[@]}"; do + graalhost_args+=("$extra_arg") +done + +set +e +if [ -n "$cli_run_snapshot" ]; then + if [ "${#python_args[@]}" -gt 0 ]; then + echo "--graalhost.run_snapshot cannot be combined with Python arguments" >&2 + status=2 + else + "$graalhost" \ + ${graalhost_args[@]+"${graalhost_args[@]}"} \ + --musl_path "$libc" \ + --run "$cli_run_snapshot" + status=$? + fi +else + "$graalhost" \ + ${graalhost_args[@]+"${graalhost_args[@]}"} \ + --musl_path "$libc" \ + --run_config=@"$endpoint_config" \ + --run_virtual "$virtual_executable" \ + "${python_args[@]}" + status=$? +fi +set -e + +if [ "$launcher_show_help" = "true" ]; then + print_graalhost_help +fi + +exit "$status" diff --git a/mx.graalpython/graalos_versions.json b/mx.graalpython/graalos_versions.json index 55c7eb4e65..6c770ec25a 100644 --- a/mx.graalpython/graalos_versions.json +++ b/mx.graalpython/graalos_versions.json @@ -1,4 +1,4 @@ { - "runtime": "graalos/graalos_prod_pkeyson_sandboxon-runtime-2026_06_21_v1.0.0_13143_gdf587994b631-1.el8.x86_64.tar.gz", - "toolchain": "graal/graalvm-graalos-java25-linux-amd64-25.1.3-dev-g03c51fe.tar.gz" + "runtime": "graalos/graalos_prod_pkeyson_sandboxon-runtime-2026_06_25_v1.0.0_13202_g5af02a07e23e-1.el8.x86_64.tar.gz", + "toolchain": "graal/graalvm-graalos-java25-linux-amd64-25.2.4-dev-gbc3c7bd.tar.gz" } diff --git a/mx.graalpython/mx_graalpython.py b/mx.graalpython/mx_graalpython.py index 68c3b6c0d3..e4567b026c 100644 --- a/mx.graalpython/mx_graalpython.py +++ b/mx.graalpython/mx_graalpython.py @@ -1572,7 +1572,8 @@ def graalpytest(args): def run_python_unittests(python_binary, args=None, paths=None, exclude=None, env=None, cwd=None, lock=None, out=None, err=None, nonZeroIsFatal=True, timeout=None, - report: Union[Task, bool, None] = False, parallel=None, runner_args=None): + report: Union[Task, bool, None] = False, parallel=None, runner_args=None, test_runner=None, + reportfile=None, runner_reportfile=None): if lock: lock.acquire() @@ -1609,7 +1610,7 @@ def run_python_unittests(python_binary, args=None, paths=None, exclude=None, env # index in in that case env["PIP_EXTRA_INDEX_URL"] = pip_index - args += [_python_test_runner(), "run", "--durations", "10", "-n", parallelism, f"--subprocess-args={shlex.join(args)}"] + args += [test_runner or _python_test_runner(), "run", "--durations", "10", "-n", parallelism, f"--subprocess-args={shlex.join(args)}"] if runner_args: args += runner_args @@ -1623,12 +1624,14 @@ def run_python_unittests(python_binary, args=None, paths=None, exclude=None, env # at once it generates so much data we run out of heap space args.append('--separate-workers') - reportfile = None t0 = time.time() if report: - with tempfile.NamedTemporaryFile(prefix="test-report-", suffix=".json", delete=False) as report_tmp: - reportfile = os.path.abspath(report_tmp.name) - args += ["--mx-report", reportfile] + if reportfile is None: + with tempfile.NamedTemporaryFile(prefix="test-report-", suffix=".json", delete=False) as report_tmp: + reportfile = os.path.abspath(report_tmp.name) + else: + reportfile = os.path.abspath(reportfile) + args += ["--mx-report", runner_reportfile or reportfile] if paths is not None: args += paths diff --git a/mx.graalpython/mx_graalpython_graalos.py b/mx.graalpython/mx_graalpython_graalos.py index 4ee60c0a79..ee3375c938 100644 --- a/mx.graalpython/mx_graalpython_graalos.py +++ b/mx.graalpython/mx_graalpython_graalos.py @@ -41,7 +41,6 @@ # pylint: disable=cyclic-import -import base64 import gzip import json import os @@ -49,6 +48,7 @@ import shutil import sys import tarfile +import tempfile import urllib.parse import urllib.request from pathlib import Path @@ -94,7 +94,7 @@ def update_graalos_versions(): content = json.dumps(versions, indent=2, sort_keys=True) content += "\n" mx.update_file(GRAALOS_VERSIONS_PATH.as_posix(), content, showDiff=True) - SUITE.vc.git_command(SUITE.dir, ["add", GRAALOS_VERSIONS_PATH.relative_to(SUITE.dir)], abortOnError=True) + SUITE.vc.git_command(SUITE.dir, ["add", str(GRAALOS_VERSIONS_PATH.relative_to(SUITE.dir))], abortOnError=True) def resolve_latest_graalos_artifact_name(source, on_fail=mx.abort): @@ -192,8 +192,115 @@ def _ensure_graalos_runtime_inputs(runtime_home: Path, on_fail=mx.abort): on_fail("Extracted GraalOS runtime artifact is missing required files:\n" + "\n".join([str(p) for p in missing])) +def _prepare_graalos_demo(standalone_home: Path, env): + demo_wheels = standalone_home / "demo-wheels" + site_packages = standalone_home / "lib" / "python3.12" / "site-packages" + demo_wheels.mkdir(parents=True, exist_ok=True) + site_packages.mkdir(parents=True, exist_ok=True) + + run([ + sys.executable, "-m", "pip", "download", + "--only-binary=:all:", + "--implementation", "py", + "--python-version", "3.12", + "--abi", "none", + "--platform", "any", + "--dest", str(demo_wheels), + "rich", + ], env=env) + # Work around GRAALOS-8260 by installing pure-Python demo wheels from the + # host. Remove this once in-sandbox ensurepip/pip subprocesses work there. + run([ + sys.executable, "-m", "pip", "install", + "--target", str(site_packages), + "--no-index", + "--find-links", str(demo_wheels), + "--ignore-requires-python", + "--no-compile", + "--upgrade", + "rich", + ], env=env) + + from mx_graalpython import _python_unittest_root + graalos_tests = Path(_python_unittest_root()) / "graalos" + shutil.copy2(graalos_tests / "test_graalos_sandbox_chat.py", standalone_home / "test_graalos_sandbox_chat.py") + shutil.copy2(graalos_tests / "GRAALOS_DEMO.md", standalone_home / "GRAALOS_DEMO.md") + + +def _stage_graalos_test_harness(standalone_home: Path): + from mx_graalpython import _python_test_runner, _python_unittest_root + graalos_tests = Path(_python_unittest_root()) / "graalos" + harness_dir = standalone_home / "test-harness" + tests_dir = harness_dir / "tests" + tests_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2( + _python_test_runner(), + harness_dir / "runner.py", + ) + shutil.copy2( + graalos_tests / "test_graalos_standalone.py", + tests_dir / "test_graalos_standalone.py", + ) + shutil.copy2( + graalos_tests / "test_graalos_sandbox_chat.py", + tests_dir / "test_graalos_sandbox_chat.py", + ) + + +def _set_graalos_standalone_env(standalone_home: Path, key, value, on_fail=mx.abort): + config_path = standalone_home / "config.json" + original_config = config_path.read_text(encoding="utf-8") + _ = json.dumps({key: value}) # Validate that both values can be represented in JSON. + if re.search(rf'^\s*"{re.escape(key)}"\s*:', original_config, flags=re.MULTILINE): + return config_path, original_config + env_match = re.search(r'("env"\s*:\s*\{\n)(.*?)(\n\s*\})', original_config, flags=re.DOTALL) + if not env_match: + on_fail(f"Could not find env object in GraalOS standalone config: {config_path}") + env_body = env_match.group(2) + indent_match = re.search(r'^(\s*)"', env_body, flags=re.MULTILINE) + indent = indent_match.group(1) if indent_match else " " + separator = "," if env_body.strip() else "" + entry = f'{separator}\n{indent}{json.dumps(key)}: {json.dumps(value)}' + config = original_config[:env_match.end(2)] + entry + original_config[env_match.end(2):] + config_path.write_text(config, encoding="utf-8") + return config_path, original_config + + +def _upload_graalos_standalone_artifact(standalone_home: Path, work_dir: Path): + script = os.environ.get("ARTIFACT_UPLOADER_SCRIPT") + if not script: + mx.log("Skipping GRAALPY_NATIVE_GRAALOS_STANDALONE artifact upload: ARTIFACT_UPLOADER_SCRIPT is not set") + return + + revision = str(SUITE.vc.tip(SUITE.dir)).strip() + short_revision = revision[:10] + archive_base = work_dir / f"graalpy-native-graalos-standalone-linux-amd64-dev-g{short_revision}" + archive_path = shutil.make_archive( + str(archive_base), + "gztar", + root_dir=str(standalone_home.parent), + base_dir=standalone_home.name, + ) + artifact_name = Path(archive_path).name + upload_cmd = [ + sys.executable, + script, + archive_path, + f"graalpy/{artifact_name}", + "graalpy", + "--artifact-type", "graalpy-native-graalos-standalone", + "--version", f"dev-g{short_revision}", + "--revision", revision, + "--edition", "ee", + "--lifecycle", "snapshot", + "--platform", "linux-amd64", + ] + if repo_key := os.environ.get("ARTIFACT_REPO_KEY_LOCATION"): + upload_cmd += ["--artifact-repo-key", repo_key] + run(upload_cmd) + + def graalpy_graalos_standalone_build_and_test(report=None, on_fail=mx.abort): - del report # This gate executes an in-sandbox smoke test directly instead of using the source-tree test runner. artifact_base_url = os.environ.get("GRAALPY_GRAALOS_ARTIFACT_BASE_URL") if not artifact_base_url: mx.log("Skipping GRAALPY_NATIVE_GRAALOS_STANDALONE build: GRAALPY_GRAALOS_ARTIFACT_BASE_URL is not configured") @@ -221,9 +328,10 @@ def graalpy_graalos_standalone_build_and_test(report=None, on_fail=mx.abort): _download_graalos_standalone_artifact(versions["runtime"], runtime_tarball, on_fail=on_fail) _extract_tarball(runtime_tarball, runtime_root, on_fail=on_fail) graalos_runtime_home = _find_graalos_runtime_home(runtime_root, on_fail=on_fail) + assert graalos_runtime_home _ensure_graalos_runtime_inputs(graalos_runtime_home, on_fail=on_fail) - from mx_graalpython import extend_os_env, run_mx, _graalpy_launcher + from mx_graalpython import extend_os_env, run_mx, run_python_unittests, _graalpy_launcher env = extend_os_env( JAVA_HOME=str(graalvm_home), MUSL_TOOLCHAIN=str(musl_toolchain), @@ -244,17 +352,38 @@ def graalpy_graalos_standalone_build_and_test(report=None, on_fail=mx.abort): if not launcher.exists(): on_fail(f"GRAALPY_NATIVE_GRAALOS_STANDALONE launcher was not built: {launcher}") - test_path = Path(SUITE.dir) / "graalpython" / "com.oracle.graal.python.test" / "src" / "tests" / "test_graalos_standalone.py" - with open(test_path, "r", encoding="utf-8") as f: - smoke_test = f.read() - smoke_test += """ -try: - test_graalos_sqlite3_native_extension_smoke() -except unittest.SkipTest as e: - print(f"skipped: {e}") -""" - smoke_test_arg = base64.b64encode(smoke_test.encode("utf-8")).decode("ascii") - smoke_test_command = f"import base64; exec(base64.b64decode({smoke_test_arg!r}).decode('utf-8'))" - result = run([str(launcher), "-c", smoke_test_command], env=env, nonZeroIsFatal=(on_fail == mx.abort)) # pylint: disable=comparison-with-callable - if result != 0: - on_fail("Testing GraalOS standalone failed") + _prepare_graalos_demo(standalone_home, env) + _stage_graalos_test_harness(standalone_home) + config_path, original_config = _set_graalos_standalone_env( + standalone_home, + "GRAALPYTEST_ALLOW_NO_JAVA_ASSERTIONS", + "true", + on_fail=on_fail, + ) + reportfile = None + runner_reportfile = None + if report: + report_dir = standalone_home / "tmp" + report_dir.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile( + prefix="test-report-", suffix=".json", dir=report_dir, delete=False + ) as report_tmp: + reportfile = report_tmp.name + runner_reportfile = f"/tmp/{os.path.basename(reportfile)}" + try: + run_python_unittests( + str(launcher), + paths=[ + "/test-harness/tests/test_graalos_standalone.py", + "/test-harness/tests/test_graalos_sandbox_chat.py", + ], + env=env, + report=report, + parallel=0, + test_runner="/test-harness/runner.py", + reportfile=reportfile, + runner_reportfile=runner_reportfile, + ) + finally: + config_path.write_text(original_config, encoding="utf-8") + _upload_graalos_standalone_artifact(standalone_home, work_dir) diff --git a/mx.graalpython/mx_pominit.py b/mx.graalpython/mx_pominit.py index afe7727075..30270bd52d 100644 --- a/mx.graalpython/mx_pominit.py +++ b/mx.graalpython/mx_pominit.py @@ -58,8 +58,8 @@ LOCAL_GROUP_ID = "${project.groupId}" LOCAL_VERSION = "${project.version}" GRAALVM_VERSION = "${graalvm.version}" -DEFAULT_GRAALVM_VERSION = "25.0.0" -CURRENT_GRAALVM_VERSION = "25.1.3" +DEFAULT_GRAALVM_VERSION = "25.1.3" +CURRENT_GRAALVM_VERSION = "25.2.4" XML_UPL_HEADER = """