From 64b4494ab9f22506188404151185b675e54c0be9 Mon Sep 17 00:00:00 2001 From: Joe Licata Date: Wed, 6 May 2026 20:20:56 +0000 Subject: [PATCH 1/5] refactor: Replace MinIO with S3-compatible storage - Updated configuration and environment variables to transition from MinIO to S3 storage, including changes to .env.example and Docker Compose files. - Introduced a new S3Config class for managing S3 settings and removed the MinIO configuration. - Refactored file management and state archival services to utilize the S3 client, ensuring compatibility with S3 operations. - Adjusted health checks and service dependencies to reflect the new S3 storage integration. - Updated documentation and comments throughout the codebase to replace references to MinIO with S3. --- .env.example | 13 +- docker-compose.prod.yml | 52 ++-- docker-compose.yml | 52 ++-- garage.toml | 11 + requirements.txt | 4 +- src/api/exec.py | 2 +- src/api/files.py | 2 +- src/api/health.py | 14 +- src/config/__init__.py | 56 ++-- src/config/minio.py | 31 -- src/config/resources.py | 2 +- src/config/s3.py | 29 ++ src/dependencies/services.py | 2 +- src/main.py | 4 +- src/services/cleanup.py | 6 +- src/services/execution/runner.py | 4 +- src/services/file.py | 274 +++++++++--------- src/services/health.py | 110 +++---- src/services/orchestrator.py | 16 +- src/services/sandbox/egress_firewall.py | 4 +- src/services/sandbox/egress_proxy.py | 2 +- src/services/session.py | 10 +- src/services/state.py | 12 +- src/services/state_archival.py | 161 +++++----- src/utils/config_validator.py | 40 +-- src/utils/logging.py | 3 +- tests/conftest.py | 51 ++-- tests/functional/test_concurrent_file_exec.py | 2 +- tests/integration/test_api_contracts.py | 2 +- tests/integration/test_librechat_compat.py | 2 +- tests/integration/test_session_behavior.py | 2 +- tests/unit/test_file_service.py | 65 ++--- tests/unit/test_runner_nested_paths.py | 2 +- tests/unit/test_upload_read_only.py | 5 +- 34 files changed, 512 insertions(+), 535 deletions(-) create mode 100644 garage.toml delete mode 100644 src/config/minio.py create mode 100644 src/config/s3.py diff --git a/.env.example b/.env.example index 9393a23..af7ec6b 100644 --- a/.env.example +++ b/.env.example @@ -35,12 +35,13 @@ REDIS_PORT=6379 # REDIS_PASSWORD= # REDIS_URL=redis://localhost:6379/0 # Alternative to individual settings -# ── MinIO / S3 ───────────────────────────────────────────────── -MINIO_ENDPOINT=localhost:9000 -MINIO_ACCESS_KEY=minioadmin -MINIO_SECRET_KEY=minioadmin -# MINIO_SECURE=false -# MINIO_BUCKET=code-interpreter-files +# ── S3 Storage (Garage) ──────────────────────────────────────── +S3_ENDPOINT=localhost:3900 +S3_ACCESS_KEY=minioadmin +S3_SECRET_KEY=minioadmin +# S3_SECURE=false +# S3_BUCKET=code-interpreter-files +# S3_REGION=garage # ── Execution Limits ─────────────────────────────────────────── # MAX_EXECUTION_TIME=30 # Seconds (default: 30) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 5d6b33a..940eb85 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -9,7 +9,7 @@ services: - SYS_ADMIN # NET_ADMIN required to install iptables egress rules for sandbox uid # when ENABLE_SANDBOX_NETWORK=true. Restricts sandbox traffic to the - # inline allowlist proxy and prevents SSRF to Redis/MinIO/etc. + # inline allowlist proxy and prevents SSRF to Redis/S3/etc. - NET_ADMIN security_opt: - apparmor:unconfined @@ -19,7 +19,7 @@ services: - .env environment: - REDIS_HOST=redis - - MINIO_ENDPOINT=minio:9000 + - S3_ENDPOINT=garage:3900 volumes: - sandbox-data:/var/lib/code-interpreter/sandboxes # Persistent skill-deps cache: pip/npm/go/cargo install here when @@ -34,8 +34,8 @@ services: depends_on: redis: condition: service_healthy - minio-init: - condition: service_completed_successfully + garage: + condition: service_healthy healthcheck: test: ["CMD-SHELL", "curl -fs http://localhost:8000/health || curl -fsk https://localhost:8000/health"] interval: 30s @@ -63,43 +63,33 @@ services: timeout: 5s retries: 5 - minio: - image: minio/minio:latest - container_name: code-interpreter-minio + # Garage S3-compatible object storage (replaces MinIO) + garage: + image: dxflrs/garage:v2.3.0 + container_name: code-interpreter-garage restart: unless-stopped + command: > + server + --single-node + --default-bucket ${S3_BUCKET:-code-interpreter-files} ports: - - "127.0.0.1:${MINIO_PORT:-9000}:9000" - - "127.0.0.1:${MINIO_CONSOLE_PORT:-9001}:9001" + - "127.0.0.1:${S3_PORT:-3900}:3900" + - "127.0.0.1:${GARAGE_ADMIN_PORT:-3903}:3903" environment: - MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-minioadmin} - command: server /data --console-address ":9001" + GARAGE_DEFAULT_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin} + GARAGE_DEFAULT_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin} volumes: - - minio-data:/data + - garage-data:/var/lib/garage + - ./garage.toml:/etc/garage.toml healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + test: ["CMD-SHELL", "curl -sf http://localhost:3900 || exit 1"] interval: 10s timeout: 5s retries: 5 - - minio-init: - image: minio/mc:latest - depends_on: - minio: - condition: service_healthy - entrypoint: > - /bin/sh -c " - mc alias set myminio http://minio:9000 $${MINIO_ACCESS_KEY:-minioadmin} $${MINIO_SECRET_KEY:-minioadmin}; - mc mb --ignore-existing myminio/$${MINIO_BUCKET:-code-interpreter-files}; - exit 0; - " - environment: - MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin} - MINIO_BUCKET: ${MINIO_BUCKET:-code-interpreter-files} + start_period: 10s volumes: sandbox-data: skill-deps: redis-data: - minio-data: + garage-data: diff --git a/docker-compose.yml b/docker-compose.yml index bf824e8..7484c34 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,7 +23,7 @@ services: environment: # Container-specific overrides (service discovery within compose network) - REDIS_HOST=redis - - MINIO_ENDPOINT=minio:9000 + - S3_ENDPOINT=garage:3900 volumes: - sandbox-data:/var/lib/code-interpreter/sandboxes # SSL_CERTS_PATH is a host path; SSL_CERT_FILE and SSL_KEY_FILE must point @@ -34,8 +34,8 @@ services: depends_on: redis: condition: service_healthy - minio-init: - condition: service_completed_successfully + garage: + condition: service_healthy healthcheck: test: ["CMD-SHELL", "curl -fs http://localhost:8000/health || curl -fsk https://localhost:8000/health"] interval: 30s @@ -65,44 +65,32 @@ services: timeout: 5s retries: 5 - # MinIO for file storage - minio: - image: minio/minio:latest - container_name: code-interpreter-minio + # Garage S3-compatible object storage (replaces MinIO) + garage: + image: dxflrs/garage:v2.3.0 + container_name: code-interpreter-garage restart: unless-stopped + command: > + server + --single-node + --default-bucket ${S3_BUCKET:-code-interpreter-files} ports: - - "127.0.0.1:${MINIO_PORT:-9000}:9000" - - "127.0.0.1:${MINIO_CONSOLE_PORT:-9001}:9001" + - "127.0.0.1:${S3_PORT:-3900}:3900" + - "127.0.0.1:${GARAGE_ADMIN_PORT:-3903}:3903" environment: - MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-minioadmin} - command: server /data --console-address ":9001" + GARAGE_DEFAULT_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin} + GARAGE_DEFAULT_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin} volumes: - - minio-data:/data + - garage-data:/var/lib/garage + - ./garage.toml:/etc/garage.toml healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + test: ["CMD-SHELL", "curl -sf http://localhost:3900 || exit 1"] interval: 10s timeout: 5s retries: 5 - - # MinIO bucket initialization - minio-init: - image: minio/mc:latest - depends_on: - minio: - condition: service_healthy - entrypoint: > - /bin/sh -c " - mc alias set myminio http://minio:9000 $${MINIO_ACCESS_KEY:-minioadmin} $${MINIO_SECRET_KEY:-minioadmin}; - mc mb --ignore-existing myminio/$${MINIO_BUCKET:-code-interpreter-files}; - exit 0; - " - environment: - MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin} - MINIO_BUCKET: ${MINIO_BUCKET:-code-interpreter-files} + start_period: 10s volumes: sandbox-data: redis-data: - minio-data: + garage-data: diff --git a/garage.toml b/garage.toml new file mode 100644 index 0000000..d890c7e --- /dev/null +++ b/garage.toml @@ -0,0 +1,11 @@ +metadata_dir = "/var/lib/garage/meta" +data_dir = "/var/lib/garage/data" +db_engine = "sqlite" +replication_factor = 1 + +[s3_api] +s3_region = "garage" +api_bind_addr = "[::]:3900" + +[admin] +api_bind_addr = "[::]:3903" diff --git a/requirements.txt b/requirements.txt index 0d81d69..694e1cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,8 +16,8 @@ redis==7.2.0 # SQLite async support for metrics aiosqlite>=0.19.0 -# MinIO/S3 client -minio==7.2.20 +# S3 storage client (Garage/any S3-compatible backend) +boto3>=1.35.0 # Date/time parsing utilities python-dateutil==2.9.0.post0 diff --git a/src/api/exec.py b/src/api/exec.py index 281530f..2dc6952 100644 --- a/src/api/exec.py +++ b/src/api/exec.py @@ -64,7 +64,7 @@ async def execute_code( within the same session, whether the caller supplies `session_id` directly or the orchestrator reuses a session through same-user file references or `entity_id` continuity. State is stored in Redis (2 hour TTL) with - automatic archival to MinIO for long-term storage (7 day TTL). + automatic archival to S3 for long-term storage (configurable TTL). Returns a streaming response that sends keepalive whitespace before the JSON body to prevent client socket timeouts during long operations. diff --git a/src/api/files.py b/src/api/files.py index 49227bb..59f85fa 100644 --- a/src/api/files.py +++ b/src/api/files.py @@ -135,7 +135,7 @@ async def upload_file( # Sanitize filename to match what will be used in container sanitized_name = OutputProcessor.sanitize_filename(file.filename) - # Store with sanitized name so MinIO, sandbox, and cleanup all use the same name + # Store with sanitized name so S3, sandbox, and cleanup all use the same name file_id = await file_service.store_uploaded_file( session_id=session_id, filename=sanitized_name, diff --git a/src/api/health.py b/src/api/health.py index 3ff6ff8..e7c43e3 100644 --- a/src/api/health.py +++ b/src/api/health.py @@ -116,11 +116,11 @@ async def redis_health_check(_: str = Depends(verify_api_key)): ) -@router.get("/health/minio", summary="MinIO health check") -async def minio_health_check(_: str = Depends(verify_api_key)): - """Check MinIO/S3 connectivity and performance.""" +@router.get("/health/s3", summary="S3 storage health check") +async def s3_health_check(_: str = Depends(verify_api_key)): + """Check S3 storage connectivity and performance.""" try: - result = await health_service.check_minio() + result = await health_service.check_s3() if result.status == HealthStatus.UNHEALTHY: return JSONResponse(status_code=503, content=result.to_dict()) @@ -128,13 +128,13 @@ async def minio_health_check(_: str = Depends(verify_api_key)): return JSONResponse(status_code=200, content=result.to_dict()) except Exception as e: - logger.error("MinIO health check failed", error=str(e)) + logger.error("S3 health check failed", error=str(e)) return JSONResponse( status_code=503, content={ - "service": "minio", + "service": "s3", "status": "unhealthy", - "error": str(e) if settings.api_debug else "MinIO check failed", + "error": str(e) if settings.api_debug else "S3 check failed", }, ) diff --git a/src/config/__init__.py b/src/config/__init__.py index cba93ed..bd13e27 100644 --- a/src/config/__init__.py +++ b/src/config/__init__.py @@ -29,7 +29,7 @@ # Import grouped configurations from .api import APIConfig from .redis import RedisConfig -from .minio import MinIOConfig +from .s3 import S3Config from .security import SecurityConfig from .resources import ResourcesConfig from .logging import LoggingConfig @@ -143,12 +143,13 @@ class Settings(BaseSettings): redis_socket_timeout: int = Field(default=5, ge=1) redis_socket_connect_timeout: int = Field(default=5, ge=1) - # MinIO/S3 Configuration - minio_endpoint: str = Field(default="localhost:9000") - minio_access_key: str = Field(default="test-access-key", min_length=3) - minio_secret_key: str = Field(default="test-secret-key", min_length=8) - minio_secure: bool = Field(default=False) - minio_bucket: str = Field(default="code-interpreter-files") + # S3 Storage Configuration + s3_endpoint: str = Field(default="localhost:3900") + s3_access_key: str = Field(default="test-access-key", min_length=3) + s3_secret_key: str = Field(default="test-secret-key", min_length=8) + s3_secure: bool = Field(default=False) + s3_bucket: str = Field(default="code-interpreter-files") + s3_region: str = Field(default="garage") # Sandbox (nsjail) Configuration nsjail_binary: str = Field( @@ -196,7 +197,7 @@ class Settings(BaseSettings): # Session Configuration session_ttl_hours: int = Field(default=24, ge=1, le=168) session_cleanup_interval_minutes: int = Field(default=60, ge=1, le=1440) - enable_orphan_minio_cleanup: bool = Field(default=True) + enable_orphan_s3_cleanup: bool = Field(default=True) # Sandbox Pool Configuration sandbox_pool_enabled: bool = Field(default=True) @@ -250,24 +251,24 @@ class Settings(BaseSettings): default=100, ge=1, le=500, - description="Max state size (MB, raw bytes) for Redis storage. Larger states go directly to MinIO", + description="Max state size (MB, raw bytes) for Redis storage. Larger states go directly to S3 cold storage", ) - # State Archival Configuration - Hybrid Redis + MinIO storage + # State Archival Configuration - Hybrid Redis + S3 storage state_archive_enabled: bool = Field( - default=True, description="Enable archiving inactive states from Redis to MinIO" + default=True, description="Enable archiving inactive states from Redis to S3" ) state_archive_after_seconds: int = Field( default=3600, ge=300, le=86400, - description="Archive state to MinIO after this many seconds of inactivity. Default: 1 hour", + description="Archive state to S3 after this many seconds of inactivity. Default: 1 hour", ) state_archive_ttl_days: int = Field( default=1, ge=1, le=30, - description="Keep archived states in MinIO for N days. Default: 1 (24 hours)", + description="Keep archived states in S3 for N days. Default: 1 (24 hours)", ) state_archive_check_interval_seconds: int = Field( default=300, @@ -449,12 +450,12 @@ def parse_api_keys(cls, v): """Parse comma-separated API keys into a list.""" return [key.strip() for key in v.split(",") if key.strip()] if v else None - @validator("minio_endpoint") - def validate_minio_endpoint(cls, v): - """Ensure MinIO endpoint doesn't include protocol.""" + @validator("s3_endpoint") + def validate_s3_endpoint(cls, v): + """Ensure S3 endpoint doesn't include protocol.""" if v.startswith(("http://", "https://")): raise ValueError( - "MinIO endpoint should not include protocol (use minio_secure instead)" + "S3 endpoint should not include protocol (use s3_secure instead)" ) return v @@ -505,14 +506,15 @@ def redis(self) -> RedisConfig: ) @property - def minio(self) -> MinIOConfig: - """Access MinIO configuration group.""" - return MinIOConfig( - minio_endpoint=self.minio_endpoint, - minio_access_key=self.minio_access_key, - minio_secret_key=self.minio_secret_key, - minio_secure=self.minio_secure, - minio_bucket=self.minio_bucket, + def s3(self) -> S3Config: + """Access S3 storage configuration group.""" + return S3Config( + s3_endpoint=self.s3_endpoint, + s3_access_key=self.s3_access_key, + s3_secret_key=self.s3_secret_key, + s3_secure=self.s3_secure, + s3_bucket=self.s3_bucket, + s3_region=self.s3_region, ) @property @@ -539,7 +541,7 @@ def resources(self) -> ResourcesConfig: max_filename_length=self.max_filename_length, session_ttl_hours=self.session_ttl_hours, session_cleanup_interval_minutes=self.session_cleanup_interval_minutes, - enable_orphan_minio_cleanup=self.enable_orphan_minio_cleanup, + enable_orphan_s3_cleanup=self.enable_orphan_s3_cleanup, ) @property @@ -612,7 +614,7 @@ def is_file_allowed(self, filename: str) -> bool: # Grouped configs "APIConfig", "RedisConfig", - "MinIOConfig", + "S3Config", "SecurityConfig", "ResourcesConfig", "LoggingConfig", diff --git a/src/config/minio.py b/src/config/minio.py deleted file mode 100644 index 11a8494..0000000 --- a/src/config/minio.py +++ /dev/null @@ -1,31 +0,0 @@ -"""MinIO/S3 configuration.""" - -from pydantic import Field, validator -from pydantic_settings import BaseSettings - - -class MinIOConfig(BaseSettings): - """MinIO/S3 storage settings.""" - - endpoint: str = Field(default="localhost:9000", alias="minio_endpoint") - access_key: str = Field( - default="test-access-key", min_length=3, alias="minio_access_key" - ) - secret_key: str = Field( - default="test-secret-key", min_length=8, alias="minio_secret_key" - ) - secure: bool = Field(default=False, alias="minio_secure") - bucket: str = Field(default="code-interpreter-files", alias="minio_bucket") - - @validator("endpoint") - def validate_endpoint(cls, v): - """Ensure endpoint doesn't include protocol.""" - if v.startswith(("http://", "https://")): - raise ValueError( - "MinIO endpoint should not include protocol (use secure instead)" - ) - return v - - class Config: - env_prefix = "" - extra = "ignore" diff --git a/src/config/resources.py b/src/config/resources.py index b4c5c44..8a57ab8 100644 --- a/src/config/resources.py +++ b/src/config/resources.py @@ -20,7 +20,7 @@ class ResourcesConfig(BaseSettings): # Session Lifecycle session_ttl_hours: int = Field(default=24, ge=1, le=168) session_cleanup_interval_minutes: int = Field(default=60, ge=1, le=1440) - enable_orphan_minio_cleanup: bool = Field(default=True) + enable_orphan_s3_cleanup: bool = Field(default=True) def get_session_ttl_minutes(self) -> int: """Get session TTL in minutes.""" diff --git a/src/config/s3.py b/src/config/s3.py new file mode 100644 index 0000000..0293279 --- /dev/null +++ b/src/config/s3.py @@ -0,0 +1,29 @@ +"""S3-compatible object storage configuration.""" + +from pydantic import Field +from pydantic_settings import BaseSettings + + +class S3Config(BaseSettings): + """S3-compatible storage settings (Garage, AWS S3, etc.).""" + + endpoint: str = Field(default="localhost:3900", alias="s3_endpoint") + access_key: str = Field( + default="test-access-key", min_length=3, alias="s3_access_key" + ) + secret_key: str = Field( + default="test-secret-key", min_length=8, alias="s3_secret_key" + ) + secure: bool = Field(default=False, alias="s3_secure") + bucket: str = Field(default="code-interpreter-files", alias="s3_bucket") + region: str = Field(default="garage", alias="s3_region") + + @property + def endpoint_url(self) -> str: + """Construct the full endpoint URL for boto3.""" + scheme = "https" if self.secure else "http" + return f"{scheme}://{self.endpoint}" + + class Config: + env_prefix = "" + extra = "ignore" diff --git a/src/dependencies/services.py b/src/dependencies/services.py index 5de3703..521d661 100644 --- a/src/dependencies/services.py +++ b/src/dependencies/services.py @@ -53,7 +53,7 @@ def get_state_service() -> StateService: @lru_cache() def get_state_archival_service() -> StateArchivalService: - """Get state archival service instance for MinIO cold storage.""" + """Get state archival service instance for S3 cold storage.""" state_service = get_state_service() return StateArchivalService(state_service=state_service) diff --git a/src/main.py b/src/main.py index a9802be..9df350e 100644 --- a/src/main.py +++ b/src/main.py @@ -184,7 +184,7 @@ async def _startup_egress_proxy(app: FastAPI) -> None: # Network-level enforcement so a malicious skill can't `socket.create_connection` # around the application-level proxy. Without these iptables rules, sandbox # processes — sharing the API container's net namespace — can directly reach - # Redis/MinIO and any internal docker network. Refuse to enable network if the + # Redis/S3 and any internal docker network. Refuse to enable network if the # firewall can't be installed (better to fail loudly than to silently leak SSRF). from .config.languages import SANDBOX_USER_ID from .services.sandbox.egress_firewall import install_sandbox_egress_rules @@ -201,7 +201,7 @@ async def _startup_egress_proxy(app: FastAPI) -> None: "ENABLE_SANDBOX_NETWORK=true but the iptables egress firewall could " "not be installed. The container needs CAP_NET_ADMIN (cap_add: NET_ADMIN " "in compose) and an iptables binary. Without these rules, sandboxes " - "could SSRF Redis/MinIO via direct sockets — refusing to enable network." + "could SSRF Redis/S3 via direct sockets — refusing to enable network." ) logger.info( diff --git a/src/services/cleanup.py b/src/services/cleanup.py index 6d43183..0461ea5 100644 --- a/src/services/cleanup.py +++ b/src/services/cleanup.py @@ -7,7 +7,7 @@ immediately after execution by the orchestrator. This scheduler handles: - File cleanup when sessions are explicitly deleted - Legacy cleanup for non-pooled containers -- Periodic state archival from Redis to MinIO +- Periodic state archival from Redis to S3 """ import asyncio @@ -27,7 +27,7 @@ class CleanupScheduler: With the simplified container pool architecture: - Containers are destroyed immediately after execution (no TTL tracking) - This scheduler handles file cleanup and session-level resource cleanup - - Periodic state archival from Redis to MinIO + - Periodic state archival from Redis to S3 """ def __init__(self, delay_seconds: int = 5): @@ -178,7 +178,7 @@ def pending_count(self) -> int: return len(self._pending_cleanups) async def _archival_loop(self): - """Background loop for archiving inactive states to MinIO.""" + """Background loop for archiving inactive states to S3.""" interval = settings.state_archive_check_interval_seconds while True: diff --git a/src/services/execution/runner.py b/src/services/execution/runner.py index 848aac4..89c24ea 100644 --- a/src/services/execution/runner.py +++ b/src/services/execution/runner.py @@ -701,7 +701,7 @@ async def _mount_files_to_sandbox( ) -> None: """Mount files to sandbox workspace. - Uses streaming (MinIO fget_object) to transfer files directly to the + Uses streaming (S3 download_file) to transfer files directly to the sandbox data directory without loading entire files into memory. This avoids blocking the asyncio event loop during large file transfers. @@ -763,7 +763,7 @@ def _set_file_perms(path, uid, read_only=False): size_mb=round(file_size / 1024 / 1024, 1), ) - # Stream directly from MinIO to sandbox directory (non-blocking) + # Stream directly from S3 to sandbox directory (non-blocking) success = await file_service.stream_file_to_path( session_id, file_id, dest_path ) diff --git a/src/services/file.py b/src/services/file.py index 0f3bf9a..5d12c01 100644 --- a/src/services/file.py +++ b/src/services/file.py @@ -1,15 +1,15 @@ -"""File management service with MinIO/S3 storage integration.""" +"""File management service with S3-compatible storage integration.""" # Standard library imports import asyncio -from datetime import datetime, timedelta +from datetime import datetime from typing import List, Optional, Tuple, Dict, Any # Third-party imports +import boto3 import redis.asyncio as redis import structlog -from minio import Minio -from minio.error import S3Error +from botocore.exceptions import ClientError # Local application imports from .interfaces import FileServiceInterface @@ -21,16 +21,16 @@ class FileService(FileServiceInterface): - """File management service with MinIO/S3 storage and Redis metadata.""" + """File management service with S3 storage and Redis metadata.""" def __init__(self): - """Initialize the file service with MinIO and Redis clients.""" - # Initialize MinIO client - self.minio_client = Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + """Initialize the file service with S3 and Redis clients.""" + self.s3_client = boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) # Initialize Redis client @@ -38,24 +38,28 @@ def __init__(self): settings.get_redis_url(), decode_responses=True ) - self.bucket_name = settings.minio_bucket + self.bucket_name = settings.s3_bucket async def _ensure_bucket_exists(self) -> None: - """Ensure the MinIO bucket exists.""" + """Ensure the S3 bucket exists.""" try: - # Run in thread pool since minio client is synchronous loop = asyncio.get_event_loop() - bucket_exists = await loop.run_in_executor( - None, self.minio_client.bucket_exists, self.bucket_name - ) - - if not bucket_exists: + try: await loop.run_in_executor( - None, self.minio_client.make_bucket, self.bucket_name + None, + lambda: self.s3_client.head_bucket(Bucket=self.bucket_name), ) - logger.info("Created MinIO bucket", bucket=self.bucket_name) + except ClientError as e: + if e.response["Error"]["Code"] in ("404", "NoSuchBucket"): + await loop.run_in_executor( + None, + lambda: self.s3_client.create_bucket(Bucket=self.bucket_name), + ) + logger.info("Created S3 bucket", bucket=self.bucket_name) + else: + raise - except S3Error as e: + except ClientError as e: logger.error( "Failed to ensure bucket exists", error=str(e), bucket=self.bucket_name ) @@ -109,13 +113,13 @@ async def _has_link_references(self, session_id: str, file_id: str) -> bool: return bool(await self.redis_client.smembers(links_key)) async def _delete_object(self, object_key: str) -> None: - """Delete a backing object from MinIO.""" + """Delete a backing object from S3.""" loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.remove_object, - self.bucket_name, - object_key, + lambda: self.s3_client.delete_object( + Bucket=self.bucket_name, Key=object_key + ), ) async def _find_linked_file( @@ -268,10 +272,11 @@ async def upload_file( loop = asyncio.get_event_loop() upload_url = await loop.run_in_executor( None, - self.minio_client.presigned_put_object, - self.bucket_name, - object_key, - timedelta(hours=1), + lambda: self.s3_client.generate_presigned_url( + "put_object", + Params={"Bucket": self.bucket_name, "Key": object_key}, + ExpiresIn=3600, + ), ) # Store initial metadata @@ -297,7 +302,7 @@ async def upload_file( return file_id, upload_url - except S3Error as e: + except ClientError as e: logger.error( "Failed to generate upload URL", error=str(e), session_id=session_id ) @@ -314,31 +319,36 @@ async def confirm_upload(self, session_id: str, file_id: str) -> FileInfo: try: # Get object info to confirm upload and get size loop = asyncio.get_event_loop() - stat = await loop.run_in_executor( - None, self.minio_client.stat_object, self.bucket_name, object_key + head = await loop.run_in_executor( + None, + lambda: self.s3_client.head_object( + Bucket=self.bucket_name, Key=object_key + ), ) + file_size = head["ContentLength"] + # Update metadata with actual file size - metadata["size"] = stat.size + metadata["size"] = file_size await self._store_file_metadata(session_id, file_id, metadata) logger.debug( "Confirmed file upload", session_id=session_id, file_id=file_id, - size=stat.size, + size=file_size, ) return FileInfo( file_id=file_id, filename=metadata["filename"], - size=stat.size, + size=file_size, content_type=metadata["content_type"], created_at=metadata["created_at"], path=metadata["path"], ) - except S3Error as e: + except ClientError as e: logger.error( "Failed to confirm upload", error=str(e), @@ -464,15 +474,16 @@ async def download_file(self, session_id: str, file_id: str) -> Optional[str]: loop = asyncio.get_event_loop() download_url = await loop.run_in_executor( None, - self.minio_client.presigned_get_object, - self.bucket_name, - object_key, - timedelta(hours=1), + lambda: self.s3_client.generate_presigned_url( + "get_object", + Params={"Bucket": self.bucket_name, "Key": object_key}, + ExpiresIn=3600, + ), ) return download_url - except S3Error as e: + except ClientError as e: logger.error( "Failed to generate download URL", error=str(e), @@ -517,7 +528,7 @@ async def delete_file(self, session_id: str, file_id: str) -> bool: source_file_id=metadata["source_file_id"], object_key=metadata["object_key"], ) - except S3Error as e: + except ClientError as e: logger.warning( "Failed to delete orphaned shared object", source_session_id=metadata["source_session_id"], @@ -539,7 +550,6 @@ async def delete_file(self, session_id: str, file_id: str) -> bool: object_key = metadata["object_key"] try: - # Delete from MinIO await self._delete_object(object_key) # Delete metadata from Redis @@ -548,7 +558,7 @@ async def delete_file(self, session_id: str, file_id: str) -> bool: logger.debug("Deleted file", session_id=session_id, file_id=file_id) return True - except S3Error as e: + except ClientError as e: logger.error( "Failed to delete file", error=str(e), @@ -571,36 +581,39 @@ async def cleanup_session_files(self, session_id: str) -> int: # Clean up session files set await self.redis_client.delete(session_files_key) - # If no files were tracked in Redis, fall back to prefix-based deletion in MinIO + # If no files were tracked in Redis, fall back to prefix-based deletion if deleted_count == 0: try: loop = asyncio.get_event_loop() - # List objects under both uploads and outputs prefixes prefixes = [ f"sessions/{session_id}/uploads/", f"sessions/{session_id}/outputs/", ] for prefix in prefixes: - # MinIO list_objects returns an iterator; use recursive to get all - objects = await loop.run_in_executor( - None, - lambda: list( - self.minio_client.list_objects( - self.bucket_name, prefix=prefix, recursive=True - ) - ), - ) - for obj in objects: - await loop.run_in_executor( - None, - self.minio_client.remove_object, - self.bucket_name, - obj.object_name, + + def _list_prefix(p: str = prefix) -> list: + return list( + self.s3_client.get_paginator("list_objects_v2") + .paginate(Bucket=self.bucket_name, Prefix=p) + .search("Contents[]") ) + + objects = await loop.run_in_executor(None, _list_prefix) + for entry in objects: + if entry is None: + continue + key = entry["Key"] + + def _delete(k: str = key) -> None: + self.s3_client.delete_object( + Bucket=self.bucket_name, Key=k + ) + + await loop.run_in_executor(None, _delete) deleted_count += 1 except Exception as e: logger.error( - "Prefix-based MinIO cleanup failed", + "Prefix-based S3 cleanup failed", session_id=session_id, error=str(e), ) @@ -643,20 +656,19 @@ async def store_execution_output_file( object_key = self._get_file_key(session_id, file_id, "outputs") try: - # Convert bytes to BytesIO for MinIO import io content_stream = io.BytesIO(content) - # Upload file content directly loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.put_object, - self.bucket_name, - object_key, - content_stream, - len(content), + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=content_stream, + ContentLength=len(content), + ), ) now = datetime.utcnow() @@ -670,7 +682,7 @@ async def store_execution_output_file( "created_at": now.isoformat(), "size": len(content), "path": f"/outputs/{filename}", - "type": "output", # Mark as execution output + "type": "output", } await self._store_file_metadata(session_id, file_id, metadata) @@ -685,7 +697,7 @@ async def store_execution_output_file( return file_id - except S3Error as e: + except ClientError as e: logger.error( "Failed to store output file", error=str(e), @@ -706,17 +718,15 @@ async def get_file_content(self, session_id: str, file_id: str) -> Optional[byte loop = asyncio.get_event_loop() def _download(): - response = self.minio_client.get_object(self.bucket_name, object_key) - try: - return response.read() - finally: - response.close() - response.release_conn() + response = self.s3_client.get_object( + Bucket=self.bucket_name, Key=object_key + ) + return response["Body"].read() content = await loop.run_in_executor(None, _download) return content - except S3Error as e: + except ClientError as e: logger.error( "Failed to get file content", error=str(e), @@ -728,9 +738,9 @@ def _download(): async def stream_file_to_path( self, session_id: str, file_id: str, dest_path: str ) -> bool: - """Stream file content from MinIO directly to a local file path. + """Stream file content from S3 directly to a local file path. - Uses MinIO's fget_object for efficient disk-to-disk transfer + Uses boto3's download_file for efficient disk-to-disk transfer without loading the entire file into memory. Runs in a thread pool executor to avoid blocking the async event loop. @@ -752,13 +762,12 @@ async def stream_file_to_path( loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.fget_object, - self.bucket_name, - object_key, - dest_path, + lambda: self.s3_client.download_file( + self.bucket_name, object_key, dest_path + ), ) return True - except S3Error as e: + except ClientError as e: logger.error( "Failed to stream file to path", error=str(e), @@ -801,7 +810,6 @@ async def store_uploaded_file( object_key = self._get_file_key(session_id, file_id, "uploads") try: - # Upload file content directly from io import BytesIO content_stream = BytesIO(content) @@ -809,12 +817,13 @@ async def store_uploaded_file( loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.put_object, - self.bucket_name, - object_key, - content_stream, - len(content), - content_type or "application/octet-stream", + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=content_stream, + ContentLength=len(content), + ContentType=content_type or "application/octet-stream", + ), ) # Store metadata @@ -827,10 +836,8 @@ async def store_uploaded_file( "created_at": datetime.utcnow().isoformat(), "size": len(content), "path": f"/{filename}", - "type": "upload", # Mark as uploaded file - "is_agent_file": ( - "1" if is_agent_file else "0" - ), # Read-only if agent file + "type": "upload", + "is_agent_file": ("1" if is_agent_file else "0"), "is_read_only": "1" if (is_read_only or is_agent_file) else "0", "original_filename": original_filename or filename, } @@ -847,7 +854,7 @@ async def store_uploaded_file( return file_id - except S3Error as e: + except ClientError as e: logger.error( "Failed to store uploaded file", error=str(e), @@ -857,7 +864,7 @@ async def store_uploaded_file( raise async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: - """Delete MinIO objects under sessions/ whose sessions are not active in Redis. + """Delete S3 objects under sessions/ whose sessions are not active in Redis. Safety guards: - Skip if the session index is empty (avoid mass-deletes on cold start). @@ -870,53 +877,51 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: active_session_ids = await self.redis_client.smembers("sessions:index") active_session_ids = active_session_ids or set() - # Guard 1: if index is empty, skip to avoid accidental bulk deletes if not active_session_ids: - logger.debug("Skipping orphan MinIO cleanup: empty sessions index") + logger.debug("Skipping orphan S3 cleanup: empty sessions index") return 0 loop = asyncio.get_event_loop() - # List all objects under the sessions/ prefix + + # List all objects under the sessions/ prefix using paginator objects = await loop.run_in_executor( None, lambda: list( - self.minio_client.list_objects( - self.bucket_name, prefix="sessions/", recursive=True - ) + self.s3_client.get_paginator("list_objects_v2") + .paginate(Bucket=self.bucket_name, Prefix="sessions/") + .search("Contents[]") ), ) deleted_count = 0 - # Cache existence checks to minimize Redis round-trips for unknown session IDs + # Cache existence checks to minimize Redis round-trips checked_missing_sessions: Dict[str, bool] = {} - # Determine age cutoff based on TTL (older than TTL are safe to remove) + # Determine age cutoff based on TTL ttl_minutes = settings.get_session_ttl_minutes() ttl_seconds = ttl_minutes * 60 now_ts = datetime.utcnow().timestamp() - for obj in objects: + for entry in objects: + if entry is None: + continue if deleted_count >= batch_limit: break - object_key = getattr(obj, "object_name", None) + object_key = entry.get("Key") if not object_key: continue parts = object_key.split("/") - # Expecting sessions/// if len(parts) < 3 or parts[0] != "sessions": continue object_session_id = parts[1] - # Guard 2: only delete if object is older than TTL (requires last_modified) try: - # minio list_objects entries typically have last_modified; if missing, skip - last_modified = getattr(obj, "last_modified", None) + last_modified = entry.get("LastModified") if last_modified is None: continue - # last_modified may be datetime; convert to timestamp obj_ts = ( last_modified.timestamp() if hasattr(last_modified, "timestamp") @@ -925,7 +930,6 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: if obj_ts is None: continue if (now_ts - obj_ts) < ttl_seconds: - # Too new; skip to avoid racing with active sessions continue except Exception as e: logger.debug( @@ -935,7 +939,6 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: ) continue - # Skip if known active if object_session_id in active_session_ids: continue @@ -945,7 +948,6 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: ): continue - # Double-check via Redis existence in case index is stale if object_session_id not in checked_missing_sessions: try: exists = await self.redis_client.exists( @@ -961,34 +963,31 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: checked_missing_sessions[object_session_id] = False if checked_missing_sessions.get(object_session_id, False): - # Session exists; keep the object continue - # Delete orphaned object try: - await loop.run_in_executor( - None, - self.minio_client.remove_object, - self.bucket_name, - object_key, - ) + + def _delete_orphan(k: str = object_key) -> None: + self.s3_client.delete_object(Bucket=self.bucket_name, Key=k) + + await loop.run_in_executor(None, _delete_orphan) deleted_count += 1 except Exception as e: logger.error( - "Failed to delete orphan MinIO object", + "Failed to delete orphan S3 object", object_key=object_key, error=str(e), ) if deleted_count > 0: - logger.info("Deleted orphan MinIO objects", deleted_count=deleted_count) + logger.info("Deleted orphan S3 objects", deleted_count=deleted_count) else: - logger.debug("No orphan MinIO objects found") + logger.debug("No orphan S3 objects found") return deleted_count except Exception as e: - logger.error("Orphan MinIO objects cleanup failed", error=str(e)) + logger.error("Orphan S3 objects cleanup failed", error=str(e)) return 0 async def update_file_content( @@ -999,7 +998,7 @@ async def update_file_content( ) -> bool: """Update the content of an existing file. - Overwrites the MinIO object and updates metadata. Used to persist + Overwrites the S3 object and updates metadata. Used to persist in-place edits to mounted files after execution. Args: @@ -1038,7 +1037,6 @@ async def update_file_content( ) return False - # Overwrite content in MinIO import io loop = asyncio.get_event_loop() @@ -1047,12 +1045,12 @@ async def update_file_content( await loop.run_in_executor( None, - lambda: self.minio_client.put_object( - self.bucket_name, - object_key, - content_stream, - len(content), - content_type, + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=content_stream, + ContentLength=len(content), + ContentType=content_type, ), ) diff --git a/src/services/health.py b/src/services/health.py index 70e69c5..0027c51 100644 --- a/src/services/health.py +++ b/src/services/health.py @@ -11,10 +11,10 @@ from typing import Dict, Any, Optional # Third-party imports +import boto3 import redis.asyncio as redis import structlog -from minio import Minio -from minio.error import S3Error +from botocore.exceptions import ClientError # Local application imports from ..config import settings @@ -75,7 +75,7 @@ class HealthCheckService: def __init__(self): """Initialize health check service.""" self._redis_client: Optional[redis.Redis] = None - self._minio_client: Optional[Minio] = None + self._s3_client = None self._sandbox_pool = None self._last_check_time: Optional[datetime] = None self._cached_results: Dict[str, HealthCheckResult] = {} @@ -104,10 +104,10 @@ async def check_all_services( # Run all health checks concurrently tasks = [ self.check_redis(), - self.check_minio(), + self.check_s3(), self.check_nsjail(), ] - service_names = ["redis", "minio", "nsjail"] + service_names = ["redis", "s3", "nsjail"] # Add sandbox pool check if pool is configured if self._sandbox_pool and settings.sandbox_pool_enabled: @@ -218,122 +218,124 @@ async def check_redis(self) -> HealthCheckResult: error=str(e), ) - async def check_minio(self) -> HealthCheckResult: - """Check MinIO/S3 connectivity and performance.""" + async def check_s3(self) -> HealthCheckResult: + """Check S3 storage connectivity and performance.""" start_time = time.time() try: - # Create MinIO client if not exists - if not self._minio_client: - self._minio_client = Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + if not self._s3_client: + self._s3_client = boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) - # Test basic connectivity by listing buckets loop = asyncio.get_event_loop() - buckets = await loop.run_in_executor(None, self._minio_client.list_buckets) - - # Check if our bucket exists - bucket_exists = await loop.run_in_executor( - None, self._minio_client.bucket_exists, settings.minio_bucket + buckets_resp = await loop.run_in_executor( + None, self._s3_client.list_buckets ) + buckets = buckets_resp.get("Buckets", []) - if not bucket_exists: - # Try to create the bucket + # Check if our bucket exists + try: + await loop.run_in_executor( + None, + lambda: self._s3_client.head_bucket(Bucket=settings.s3_bucket), + ) + bucket_exists = True + except ClientError: + bucket_exists = False await loop.run_in_executor( - None, self._minio_client.make_bucket, settings.minio_bucket + None, + lambda: self._s3_client.create_bucket(Bucket=settings.s3_bucket), ) - logger.info(f"Created missing bucket: {settings.minio_bucket}") + logger.info(f"Created missing bucket: {settings.s3_bucket}") # Test read/write operations test_object = f"health_check/test_{int(time.time())}.txt" test_content = b"health check test content" - # Create a BytesIO object for the upload from io import BytesIO test_data = BytesIO(test_content) - # Upload test object await loop.run_in_executor( None, - self._minio_client.put_object, - settings.minio_bucket, - test_object, - test_data, - len(test_content), + lambda: self._s3_client.put_object( + Bucket=settings.s3_bucket, + Key=test_object, + Body=test_data, + ContentLength=len(test_content), + ), ) - # Download test object response = await loop.run_in_executor( - None, self._minio_client.get_object, settings.minio_bucket, test_object + None, + lambda: self._s3_client.get_object( + Bucket=settings.s3_bucket, Key=test_object + ), ) - downloaded_content = response.read() - response.close() - response.release_conn() + downloaded_content = response["Body"].read() - # Clean up test object await loop.run_in_executor( None, - self._minio_client.remove_object, - settings.minio_bucket, - test_object, + lambda: self._s3_client.delete_object( + Bucket=settings.s3_bucket, Key=test_object + ), ) if downloaded_content != test_content: - raise Exception("MinIO read/write test failed") + raise Exception("S3 read/write test failed") response_time = (time.time() - start_time) * 1000 - # Determine status based on response time status = HealthStatus.HEALTHY - if response_time > 2000: # > 2 seconds + if response_time > 2000: status = HealthStatus.DEGRADED details = { - "endpoint": settings.minio_endpoint, - "bucket": settings.minio_bucket, + "endpoint": settings.s3_endpoint, + "bucket": settings.s3_bucket, "bucket_exists": bucket_exists, "total_buckets": len(buckets), - "secure": settings.minio_secure, + "secure": settings.s3_secure, } return HealthCheckResult( - service="minio", + service="s3", status=status, response_time_ms=response_time, details=details, ) - except S3Error as e: + except ClientError as e: response_time = (time.time() - start_time) * 1000 logger.error( - "MinIO health check failed", + "S3 health check failed", error=str(e), response_time_ms=response_time, ) return HealthCheckResult( - service="minio", + service="s3", status=HealthStatus.UNHEALTHY, response_time_ms=response_time, - error=f"S3 Error: {e.message if hasattr(e, 'message') else str(e)}", + error=str(e), ) except Exception as e: response_time = (time.time() - start_time) * 1000 logger.error( - "MinIO health check failed", + "S3 health check failed", error=str(e), response_time_ms=response_time, ) return HealthCheckResult( - service="minio", + service="s3", status=HealthStatus.UNHEALTHY, response_time_ms=response_time, error=str(e), @@ -470,7 +472,7 @@ async def check_sandbox_pool(self) -> HealthCheckResult: details = { "enabled": True, - "architecture": "stateless", # Sandboxes destroyed after each execution + "architecture": "stateless", "total_available": total_available, "total_acquisitions": total_acquisitions, "pool_hits": pool_hits, diff --git a/src/services/orchestrator.py b/src/services/orchestrator.py index bbcf08e..afb64c2 100644 --- a/src/services/orchestrator.py +++ b/src/services/orchestrator.py @@ -500,11 +500,11 @@ async def _auto_mount_session_files( return mounted async def _load_state(self, ctx: ExecutionContext) -> None: - """Load previous state from Redis (or MinIO fallback) for Python sessions. + """Load previous state from Redis (or S3 fallback) for Python sessions. Priority order: 1. Redis hot storage (within 2-hour TTL) - 2. MinIO cold storage (archived state) + 2. S3 cold storage (archived state) """ if not settings.state_persistence_enabled: return @@ -531,14 +531,14 @@ async def _load_state(self, ctx: ExecutionContext) -> None: ) return - # Try MinIO fallback (cold storage) + # Try S3 fallback (cold storage) if self.state_archival_service and settings.state_archive_enabled: ctx.initial_state = await self.state_archival_service.restore_state( ctx.session_id ) if ctx.initial_state: logger.debug( - "Restored state from MinIO", + "Restored state from S3", session_id=ctx.session_id[:12], state_size=len(ctx.initial_state), ) @@ -574,9 +574,9 @@ async def _save_state(self, ctx: ExecutionContext) -> None: max_redis_bytes = settings.state_max_redis_size_mb * 1024 * 1024 if raw_size > max_redis_bytes: - # Large state: store blob in MinIO, pointer in Redis + # Large state: store blob in S3, pointer in Redis logger.info( - "State exceeds Redis threshold, storing in MinIO", + "State exceeds Redis threshold, storing in S3", session_id=ctx.session_id[:12], state_size_mb=round(raw_size / 1024 / 1024, 1), threshold_mb=settings.state_max_redis_size_mb, @@ -593,9 +593,9 @@ async def _save_state(self, ctx: ExecutionContext) -> None: ttl_seconds=settings.state_ttl_seconds, ) else: - # MinIO failed, fall back to Redis anyway + # S3 archival failed, fall back to Redis anyway logger.warning( - "MinIO archival failed, falling back to Redis", + "S3 archival failed, falling back to Redis", session_id=ctx.session_id[:12], ) await self.state_service.save_state( diff --git a/src/services/sandbox/egress_firewall.py b/src/services/sandbox/egress_firewall.py index f7ed832..6afd3e5 100644 --- a/src/services/sandbox/egress_firewall.py +++ b/src/services/sandbox/egress_firewall.py @@ -2,7 +2,7 @@ Without this, enabling ENABLE_SANDBOX_NETWORK shares the API container's network namespace with sandbox processes, which gives them direct access to -internal services like Redis/MinIO on the docker bridge — full SSRF. +internal services like Redis/S3 on the docker bridge — full SSRF. The hostname-allowlist proxy only protects HTTPS_PROXY-aware clients (pip, npm, requests with proxy support). Raw socket calls — `socket.create_connection`, @@ -134,7 +134,7 @@ def install_sandbox_egress_rules(sandbox_uid: int, proxy_port: int) -> bool: "ACCEPT", ], # Drop everything else from the sandbox uid. This is what blocks - # direct connections to Redis/MinIO/internet. + # direct connections to Redis/S3/internet. [ "-A", "OUTPUT", diff --git a/src/services/sandbox/egress_proxy.py b/src/services/sandbox/egress_proxy.py index f03380c..4f87bfa 100644 --- a/src/services/sandbox/egress_proxy.py +++ b/src/services/sandbox/egress_proxy.py @@ -9,7 +9,7 @@ the upstream. Allowlist enforcement happens on the requested host name. - Refuses to open tunnels to private IP ranges (RFC 1918, loopback, link-local) even if a public hostname resolves to one. This stops trivial SSRF against - Redis/MinIO/etc. on the same docker network. + Redis/S3/etc. on the same docker network. - Refuses any request whose host doesn't match the allowlist. Allowlist defaults cover Python (PyPI), Node (npmjs), Go modules, and diff --git a/src/services/session.py b/src/services/session.py index 58fbbca..638fd37 100644 --- a/src/services/session.py +++ b/src/services/session.py @@ -99,21 +99,19 @@ async def _cleanup_loop(self) -> None: else: logger.debug("No expired sessions to clean up") - # Opportunistically prune orphan MinIO objects (configurable) - if self._file_service and settings.enable_orphan_minio_cleanup: + # Opportunistically prune orphan S3 objects (configurable) + if self._file_service and settings.enable_orphan_s3_cleanup: try: deleted_orphans = ( await self._file_service.cleanup_orphan_objects() ) if deleted_orphans: logger.info( - "Pruned orphan MinIO objects", + "Pruned orphan S3 objects", deleted_orphans=deleted_orphans, ) except Exception as e: - logger.error( - "Failed pruning orphan MinIO objects", error=str(e) - ) + logger.error("Failed pruning orphan S3 objects", error=str(e)) # Wait for the configured cleanup interval await asyncio.sleep(settings.session_cleanup_interval_minutes * 60) diff --git a/src/services/state.py b/src/services/state.py index d26aca9..e3c99c6 100644 --- a/src/services/state.py +++ b/src/services/state.py @@ -9,7 +9,7 @@ Hybrid storage: - Hot storage: Redis with configurable TTL (default 2 hours) -- Cold storage: MinIO for long-term archival (handled by StateArchivalService) +- Cold storage: S3 for long-term archival (handled by StateArchivalService) Storage format: - Redis storage: Base64-encoded @@ -158,11 +158,11 @@ async def save_state_pointer( state_b64: str, ttl_seconds: Optional[int] = None, ) -> Tuple[bool, Optional[str]]: - """Save only hash and metadata to Redis (state blob stored in MinIO). + """Save only hash and metadata to Redis (state blob stored in S3). Used when state exceeds the Redis size threshold. The full state - is stored in MinIO; Redis only holds the hash and metadata for - fast lookups. The orchestrator's _load_state MinIO fallback + is stored in S3; Redis only holds the hash and metadata for + fast lookups. The orchestrator's _load_state S3 fallback handles retrieval. Args: @@ -192,7 +192,7 @@ async def save_state_pointer( "size_bytes": len(raw_bytes), "hash": state_hash, "created_at": now.isoformat(), - "storage": "minio", + "storage": "s3", } ) pipe.setex(self._meta_key(session_id), ttl_seconds, meta) @@ -200,7 +200,7 @@ async def save_state_pointer( await pipe.execute() logger.info( - "Saved state pointer to Redis (blob in MinIO)", + "Saved state pointer to Redis (blob in S3)", session_id=session_id[:12], state_size=len(raw_bytes), hash=state_hash[:12], diff --git a/src/services/state_archival.py b/src/services/state_archival.py index e95a7b8..2d05d12 100644 --- a/src/services/state_archival.py +++ b/src/services/state_archival.py @@ -1,18 +1,18 @@ -"""State archival service for MinIO cold storage. +"""State archival service for S3 cold storage. -This service handles archiving Python session states from Redis to MinIO +This service handles archiving Python session states from Redis to S3 for long-term storage, and restoring them on demand. Hybrid storage architecture: - Hot storage: Redis with 2-hour TTL (fast access) -- Cold storage: MinIO with 7-day TTL (long-term archival) +- Cold storage: S3 with configurable TTL (long-term archival) When a state is accessed: 1. Check Redis first (hot storage) -2. If not found, check MinIO (cold storage) -3. If found in MinIO, restore to Redis +2. If not found, check S3 (cold storage) +3. If found in S3, restore to Redis -States are archived to MinIO when: +States are archived to S3 when: - TTL in Redis drops below archive_after_seconds threshold - This indicates the session has been inactive for a while """ @@ -22,9 +22,9 @@ from datetime import datetime, timezone from typing import Optional, Dict, Any +import boto3 import structlog -from minio import Minio -from minio.error import S3Error +from botocore.exceptions import ClientError from ..config import settings from .state import StateService @@ -33,74 +33,79 @@ class StateArchivalService: - """Manages archiving and restoring Python session states to/from MinIO. + """Manages archiving and restoring Python session states to/from S3. - States are stored in MinIO under the path: + States are stored in S3 under the path: states/{session_id}/state.dat - Metadata is stored as object tags/custom metadata: + Metadata is stored as S3 object metadata: - archived_at: ISO timestamp - original_size: Size before any host-side compression - session_id: The session identifier """ - # MinIO path prefix for archived states STATE_PREFIX = "states" def __init__( self, state_service: Optional[StateService] = None, - minio_client: Optional[Minio] = None, + s3_client: Optional[Any] = None, ): """Initialize the archival service. Args: state_service: StateService instance for Redis operations - minio_client: Optional MinIO client (creates new one if not provided) + s3_client: Optional boto3 S3 client (creates new one if not provided) """ self.state_service = state_service or StateService() - self.minio_client = minio_client or Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + self.s3_client = s3_client or boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) - self.bucket_name = settings.minio_bucket + self.bucket_name = settings.s3_bucket self._bucket_checked = False def _get_state_object_key(self, session_id: str) -> str: - """Generate MinIO object key for a session state.""" + """Generate S3 object key for a session state.""" return f"{self.STATE_PREFIX}/{session_id}/state.dat" async def _ensure_bucket_exists(self) -> None: - """Ensure the MinIO bucket exists.""" + """Ensure the S3 bucket exists.""" if self._bucket_checked: return try: loop = asyncio.get_event_loop() - bucket_exists = await loop.run_in_executor( - None, self.minio_client.bucket_exists, self.bucket_name - ) - - if not bucket_exists: + try: await loop.run_in_executor( - None, self.minio_client.make_bucket, self.bucket_name - ) - logger.info( - "Created MinIO bucket for state archival", bucket=self.bucket_name + None, + lambda: self.s3_client.head_bucket(Bucket=self.bucket_name), ) + except ClientError as e: + if e.response["Error"]["Code"] in ("404", "NoSuchBucket"): + await loop.run_in_executor( + None, + lambda: self.s3_client.create_bucket(Bucket=self.bucket_name), + ) + logger.info( + "Created S3 bucket for state archival", bucket=self.bucket_name + ) + else: + raise self._bucket_checked = True - except S3Error as e: + except ClientError as e: logger.error( "Failed to ensure bucket exists", error=str(e), bucket=self.bucket_name ) raise async def archive_state(self, session_id: str, state_data: str) -> bool: - """Archive a session state to MinIO. + """Archive a session state to S3. Args: session_id: Session identifier @@ -115,31 +120,29 @@ async def archive_state(self, session_id: str, state_data: str) -> bool: object_key = self._get_state_object_key(session_id) state_bytes = state_data.encode("utf-8") - # Create metadata metadata = { "archived_at": datetime.now(timezone.utc).isoformat(), "original_size": str(len(state_bytes)), "session_id": session_id, } - # Upload to MinIO loop = asyncio.get_event_loop() data_stream = io.BytesIO(state_bytes) await loop.run_in_executor( None, - lambda: self.minio_client.put_object( - self.bucket_name, - object_key, - data_stream, - len(state_bytes), - content_type="application/octet-stream", - metadata=metadata, + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=data_stream, + ContentLength=len(state_bytes), + ContentType="application/octet-stream", + Metadata=metadata, ), ) logger.info( - "Archived state to MinIO", + "Archived state to S3", session_id=session_id[:12], size_bytes=len(state_bytes), object_key=object_key, @@ -153,7 +156,7 @@ async def archive_state(self, session_id: str, state_data: str) -> bool: return False async def restore_state(self, session_id: str) -> Optional[str]: - """Restore a session state from MinIO. + """Restore a session state from S3. If found, the state is also saved back to Redis for fast access. @@ -169,17 +172,16 @@ async def restore_state(self, session_id: str) -> Optional[str]: object_key = self._get_state_object_key(session_id) loop = asyncio.get_event_loop() - # Check if object exists try: response = await loop.run_in_executor( None, - lambda: self.minio_client.get_object(self.bucket_name, object_key), + lambda: self.s3_client.get_object( + Bucket=self.bucket_name, Key=object_key + ), ) - state_bytes = response.read() - response.close() - response.release_conn() - except S3Error as e: - if e.code == "NoSuchKey": + state_bytes = response["Body"].read() + except ClientError as e: + if e.response["Error"]["Code"] == "NoSuchKey": logger.debug("No archived state found", session_id=session_id[:12]) return None raise @@ -197,18 +199,17 @@ async def restore_state(self, session_id: str) -> Optional[str]: session_id, state_data, ttl_seconds=settings.state_ttl_seconds ) else: - # Too large for Redis — save only pointer await self.state_service.save_state_pointer( session_id, state_data, ttl_seconds=settings.state_ttl_seconds ) logger.info( - "State too large for Redis, kept in MinIO only", + "State too large for Redis, kept in S3 only", session_id=session_id[:12], state_size_mb=round(raw_size / 1024 / 1024, 1), ) logger.info( - "Restored state from MinIO", + "Restored state from S3", session_id=session_id[:12], size_bytes=len(state_bytes), ) @@ -221,7 +222,7 @@ async def restore_state(self, session_id: str) -> Optional[str]: return None async def delete_archived_state(self, session_id: str) -> bool: - """Delete an archived state from MinIO. + """Delete an archived state from S3. Args: session_id: Session identifier @@ -235,23 +236,17 @@ async def delete_archived_state(self, session_id: str) -> bool: object_key = self._get_state_object_key(session_id) loop = asyncio.get_event_loop() + # boto3 delete_object is idempotent — no error on missing key await loop.run_in_executor( None, - lambda: self.minio_client.remove_object(self.bucket_name, object_key), + lambda: self.s3_client.delete_object( + Bucket=self.bucket_name, Key=object_key + ), ) logger.debug("Deleted archived state", session_id=session_id[:12]) return True - except S3Error as e: - if e.code == "NoSuchKey": - return True # Already doesn't exist - logger.error( - "Failed to delete archived state", - session_id=session_id[:12], - error=str(e), - ) - return False except Exception as e: logger.error( "Failed to delete archived state", @@ -261,7 +256,7 @@ async def delete_archived_state(self, session_id: str) -> bool: return False async def has_archived_state(self, session_id: str) -> bool: - """Check if a session has archived state in MinIO. + """Check if a session has archived state in S3. Args: session_id: Session identifier @@ -278,11 +273,13 @@ async def has_archived_state(self, session_id: str) -> bool: try: await loop.run_in_executor( None, - lambda: self.minio_client.stat_object(self.bucket_name, object_key), + lambda: self.s3_client.head_object( + Bucket=self.bucket_name, Key=object_key + ), ) return True - except S3Error as e: - if e.code == "NoSuchKey": + except ClientError as e: + if e.response["Error"]["Code"] == "404": return False raise @@ -295,7 +292,7 @@ async def has_archived_state(self, session_id: str) -> bool: return False async def archive_inactive_states(self) -> Dict[str, Any]: - """Archive inactive states from Redis to MinIO. + """Archive inactive states from Redis to S3. This is the main archival task that runs periodically. It finds states with low TTL (indicating inactivity) and archives them. @@ -313,22 +310,18 @@ async def archive_inactive_states(self) -> Dict[str, Any]: } try: - # Find states ready for archival states_to_archive = await self.state_service.get_states_for_archival() for session_id, remaining_ttl, size in states_to_archive: try: - # Check if already archived if await self.has_archived_state(session_id): summary["already_archived"] += 1 continue - # Get the state data state_data = await self.state_service.get_state(session_id) if not state_data: continue - # Archive to MinIO if await self.archive_state(session_id, state_data): summary["archived"] += 1 else: @@ -379,22 +372,22 @@ async def cleanup_expired_archives(self) -> Dict[str, Any]: ttl_days = settings.state_archive_ttl_days cutoff = datetime.now(timezone.utc).timestamp() - (ttl_days * 24 * 3600) - # List all archived states objects = await loop.run_in_executor( None, lambda: list( - self.minio_client.list_objects( - self.bucket_name, prefix=prefix, recursive=True - ) + self.s3_client.get_paginator("list_objects_v2") + .paginate(Bucket=self.bucket_name, Prefix=prefix) + .search("Contents[]") ), ) - for obj in objects: + for entry in objects: + if entry is None: + continue try: - # Check object age - if obj.last_modified and obj.last_modified.timestamp() < cutoff: - # Extract session_id from path - parts = obj.object_name.split("/") + last_modified = entry.get("LastModified") + if last_modified and last_modified.timestamp() < cutoff: + parts = entry["Key"].split("/") if len(parts) >= 2: session_id = parts[1] if await self.delete_archived_state(session_id): @@ -405,7 +398,7 @@ async def cleanup_expired_archives(self) -> Dict[str, Any]: except Exception as e: logger.warning( "Failed to cleanup archived state", - object_name=obj.object_name, + object_name=entry.get("Key"), error=str(e), ) summary["failed"] += 1 diff --git a/src/utils/config_validator.py b/src/utils/config_validator.py index e44f65d..237ed6a 100644 --- a/src/utils/config_validator.py +++ b/src/utils/config_validator.py @@ -4,8 +4,8 @@ import shutil from typing import List, Dict, Any import redis -from minio import Minio -from minio.error import S3Error +import boto3 +from botocore.exceptions import ClientError from ..config import settings @@ -38,7 +38,7 @@ def validate_all(self) -> bool: # Validate external services self._validate_redis_connection() - self._validate_minio_connection() + self._validate_s3_connection() self._validate_nsjail() # Log results @@ -121,40 +121,40 @@ def _validate_redis_connection(self): else: self.errors.append(f"Redis validation error: {e}") - def _validate_minio_connection(self): - """Validate MinIO/S3 connection.""" + def _validate_s3_connection(self): + """Validate S3 storage connection.""" try: - client = Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + client = boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) # Test connection by listing buckets - buckets = list(client.list_buckets()) + response = client.list_buckets() + buckets = response.get("Buckets", []) # Check if our bucket exists bucket_exists = any( - bucket.name == settings.minio_bucket for bucket in buckets + bucket["Name"] == settings.s3_bucket for bucket in buckets ) if not bucket_exists: self.warnings.append( - f"MinIO bucket '{settings.minio_bucket}' does not exist - will be created" + f"S3 bucket '{settings.s3_bucket}' does not exist - will be created" ) - except S3Error as e: - # Treat as warning in development mode to allow startup without MinIO + except ClientError as e: if settings.api_debug: - self.warnings.append(f"MinIO S3 error: {e}") + self.warnings.append(f"S3 error: {e}") else: - self.errors.append(f"MinIO S3 error: {e}") + self.errors.append(f"S3 error: {e}") except Exception as e: - # Treat as warning in development mode if settings.api_debug: - self.warnings.append(f"MinIO validation error: {e}") + self.warnings.append(f"S3 validation error: {e}") else: - self.errors.append(f"MinIO validation error: {e}") + self.errors.append(f"S3 validation error: {e}") def _validate_nsjail(self): """Validate nsjail sandbox availability.""" diff --git a/src/utils/logging.py b/src/utils/logging.py index 5d178e6..8155fa7 100644 --- a/src/utils/logging.py +++ b/src/utils/logging.py @@ -95,7 +95,8 @@ def configure_third_party_loggers() -> None: """Configure logging levels for third-party libraries.""" # Reduce noise from third-party libraries logging.getLogger("urllib3").setLevel(logging.WARNING) - logging.getLogger("minio").setLevel(logging.WARNING) + logging.getLogger("botocore").setLevel(logging.WARNING) + logging.getLogger("boto3").setLevel(logging.WARNING) # Suppress uvicorn access logs - RequestLoggingMiddleware handles this # with status-aware levels (DEBUG for 2xx, WARNING for 4xx, ERROR for 5xx). diff --git a/tests/conftest.py b/tests/conftest.py index 668b465..b23f054 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,6 @@ import pytest import redis.asyncio as redis -from minio import Minio # Set test environment before importing config # These match the docker-compose infrastructure settings @@ -13,10 +12,10 @@ os.environ.setdefault("API_KEY", "test-api-key-for-testing-12345") os.environ.setdefault("REDIS_HOST", "localhost") os.environ.setdefault("REDIS_PORT", "6379") -os.environ.setdefault("MINIO_ENDPOINT", "localhost:9000") -os.environ.setdefault("MINIO_ACCESS_KEY", "minioadmin") -os.environ.setdefault("MINIO_SECRET_KEY", "minioadmin") -os.environ.setdefault("MINIO_SECURE", "false") +os.environ.setdefault("S3_ENDPOINT", "localhost:3900") +os.environ.setdefault("S3_ACCESS_KEY", "minioadmin") +os.environ.setdefault("S3_SECRET_KEY", "minioadmin") +os.environ.setdefault("S3_SECURE", "false") from src.services.session import SessionService from src.services.execution import CodeExecutionService @@ -48,19 +47,17 @@ def mock_redis(): @pytest.fixture -def mock_minio(): - """Mock MinIO client for testing.""" - mock_client = MagicMock(spec=Minio) - - # Mock common MinIO operations - mock_client.bucket_exists.return_value = True - mock_client.make_bucket.return_value = None - mock_client.presigned_put_object.return_value = "https://example.com/upload" - mock_client.presigned_get_object.return_value = "https://example.com/download" - mock_client.stat_object.return_value = MagicMock(size=1024) - mock_client.put_object.return_value = None - mock_client.get_object.return_value = MagicMock() - mock_client.remove_object.return_value = None +def mock_s3_client(): + """Mock S3 client for testing.""" + mock_client = MagicMock() + + mock_client.head_bucket.return_value = {} + mock_client.create_bucket.return_value = {} + mock_client.generate_presigned_url.return_value = "https://example.com/presigned" + mock_client.head_object.return_value = {"ContentLength": 1024} + mock_client.put_object.return_value = {} + mock_client.get_object.return_value = {"Body": MagicMock(read=MagicMock(return_value=b""))} + mock_client.delete_object.return_value = {} return mock_client @@ -116,11 +113,12 @@ def execution_service(mock_sandbox_manager): @pytest.fixture -def file_service(mock_minio, mock_redis): +def file_service(mock_s3_client, mock_redis): """Create FileService instance with mocked dependencies.""" - with patch("src.services.file.Minio", return_value=mock_minio), patch( - "src.services.file.redis.Redis", return_value=mock_redis + with patch("src.services.file.boto3") as mock_boto3, patch( + "src.services.file.redis.from_url", return_value=mock_redis ): + mock_boto3.client.return_value = mock_s3_client service = FileService() yield service @@ -136,11 +134,12 @@ def mock_settings(): mock_settings.redis_url = None mock_settings.session_ttl_hours = 24 mock_settings.session_cleanup_interval_minutes = 60 - mock_settings.minio_endpoint = "localhost:9000" - mock_settings.minio_access_key = "test_key" - mock_settings.minio_secret_key = "test_secret" - mock_settings.minio_secure = False - mock_settings.minio_bucket = "test-bucket" + mock_settings.s3_endpoint = "localhost:3900" + mock_settings.s3_access_key = "test_key" + mock_settings.s3_secret_key = "test_secret" + mock_settings.s3_secure = False + mock_settings.s3_bucket = "test-bucket" + mock_settings.s3_region = "garage" mock_settings.api_key = "test-api-key-12345" mock_settings.max_execution_time = 30 mock_settings.max_file_size_mb = 10 diff --git a/tests/functional/test_concurrent_file_exec.py b/tests/functional/test_concurrent_file_exec.py index fe71c16..8e3d3a0 100644 --- a/tests/functional/test_concurrent_file_exec.py +++ b/tests/functional/test_concurrent_file_exec.py @@ -1,7 +1,7 @@ """Functional tests for concurrent execution with large file uploads. Regression test for event loop blocking bug: when large files (>40MB) are -downloaded from MinIO during file mounting, response.read() blocks the +downloaded from S3 during file mounting, response.read() blocks the asyncio event loop, starving all concurrent HTTP connections. This manifests as "socket hang up" errors in clients like LibreChat. diff --git a/tests/integration/test_api_contracts.py b/tests/integration/test_api_contracts.py index e48fbcd..59f1221 100644 --- a/tests/integration/test_api_contracts.py +++ b/tests/integration/test_api_contracts.py @@ -133,7 +133,7 @@ def mock_file_service(): created_at=datetime.utcnow(), path="/test.txt", ) - service.download_file.return_value = "https://minio.example.com/download-url" + service.download_file.return_value = "https://s3.example.com/download-url" service.validate_uploads = MagicMock(return_value=None) return service diff --git a/tests/integration/test_librechat_compat.py b/tests/integration/test_librechat_compat.py index 2c04a33..093e96e 100644 --- a/tests/integration/test_librechat_compat.py +++ b/tests/integration/test_librechat_compat.py @@ -2035,7 +2035,7 @@ def test_nested_filename_preserved_in_response( assert response.status_code == 200 result = response.json() assert result["files"][0]["filename"] == "skills/weather_lookup/SKILL.md" - # The stored filename also preserves the path so MinIO/sandbox round-trip works. + # The stored filename also preserves the path so S3/sandbox round-trip works. assert "skills/weather_lookup/SKILL.md" in setup_mocks["stored"] diff --git a/tests/integration/test_session_behavior.py b/tests/integration/test_session_behavior.py index 7881f00..effa95e 100644 --- a/tests/integration/test_session_behavior.py +++ b/tests/integration/test_session_behavior.py @@ -498,7 +498,7 @@ def test_generated_file_downloadable(self, client, auth_headers): path="/output.txt", ) ] - mock_file_service.download_file.return_value = "https://minio.test/download" + mock_file_service.download_file.return_value = "https://s3.test/download" from src.dependencies.services import ( get_session_service, diff --git a/tests/unit/test_file_service.py b/tests/unit/test_file_service.py index a261fff..58aebb6 100644 --- a/tests/unit/test_file_service.py +++ b/tests/unit/test_file_service.py @@ -9,12 +9,14 @@ @pytest.fixture -def mock_minio_client(): - """Mock MinIO client.""" +def mock_s3_client(): + """Mock S3 client.""" client = MagicMock() - client.bucket_exists = MagicMock(return_value=True) + client.head_bucket = MagicMock(return_value={}) client.put_object = MagicMock() client.get_object = MagicMock() + client.delete_object = MagicMock() + client.head_object = MagicMock(return_value={"ContentLength": 1024}) return client @@ -35,14 +37,14 @@ def mock_redis_client(): @pytest.fixture -def file_service(mock_minio_client, mock_redis_client): +def file_service(mock_s3_client, mock_redis_client): """Create FileService with mocked clients.""" - with patch("src.services.file.Minio") as mock_minio_class: - mock_minio_class.return_value = mock_minio_client + with patch("src.services.file.boto3") as mock_boto3: + mock_boto3.client.return_value = mock_s3_client with patch("src.services.file.redis.from_url") as mock_redis_from_url: mock_redis_from_url.return_value = mock_redis_client service = FileService() - service.minio_client = mock_minio_client + service.s3_client = mock_s3_client service.redis_client = mock_redis_client return service @@ -52,7 +54,7 @@ class TestUpdateFileContent: @pytest.mark.asyncio async def test_update_file_content_rejects_read_only_file( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Read-only linked aliases must not overwrite the source object.""" session_id = "test-session" @@ -73,13 +75,13 @@ async def test_update_file_content_rejects_read_only_file( ) assert result is False - mock_minio_client.put_object.assert_not_called() + mock_s3_client.put_object.assert_not_called() @pytest.mark.asyncio async def test_update_file_content_success( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): - """Test that update_file_content overwrites file in MinIO.""" + """Test that update_file_content overwrites file in S3.""" session_id = "test-session-123" file_id = "test-file-456" new_content = b"modified file content" @@ -99,14 +101,12 @@ async def test_update_file_content_success( ) assert result is True - # Verify MinIO put_object was called - mock_minio_client.put_object.assert_called_once() - # Verify metadata was updated + mock_s3_client.put_object.assert_called_once() mock_redis_client.hset.assert_called() @pytest.mark.asyncio async def test_update_file_content_updates_metadata( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Test that update_file_content updates file size metadata.""" session_id = "test-session-123" @@ -177,10 +177,10 @@ async def test_update_file_content_no_object_key( assert result is False @pytest.mark.asyncio - async def test_update_file_content_minio_error( - self, file_service, mock_minio_client, mock_redis_client + async def test_update_file_content_s3_error( + self, file_service, mock_s3_client, mock_redis_client ): - """Test handling of MinIO error during update.""" + """Test handling of S3 error during update.""" session_id = "test-session" file_id = "file-id" @@ -191,8 +191,7 @@ async def test_update_file_content_minio_error( "content_type": "text/plain", } - # Mock MinIO error - mock_minio_client.put_object.side_effect = Exception("MinIO connection error") + mock_s3_client.put_object.side_effect = Exception("S3 connection error") result = await file_service.update_file_content( session_id=session_id, @@ -204,7 +203,7 @@ async def test_update_file_content_minio_error( @pytest.mark.asyncio async def test_update_file_content_preserves_content_type( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Test that content_type is preserved from original metadata.""" session_id = "test-session" @@ -225,14 +224,12 @@ async def test_update_file_content_preserves_content_type( ) assert result is True - # Verify put_object was called with preserved content_type - put_call = mock_minio_client.put_object.call_args - # The content_type should be "image/png" from the metadata - assert "image/png" in str(put_call) + put_call = mock_s3_client.put_object.call_args + assert put_call.kwargs.get("ContentType") == "image/png" @pytest.mark.asyncio async def test_update_file_content_only_updates_size( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Test that update_file_content only updates size metadata.""" session_id = "test-session" @@ -354,7 +351,7 @@ async def test_link_file_into_session_reuses_existing_alias( @pytest.mark.asyncio async def test_delete_linked_file_only_removes_metadata( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Deleting a linked alias must not delete the shared object.""" mock_redis_client.hgetall.return_value = { @@ -375,13 +372,13 @@ async def test_delete_linked_file_only_removes_metadata( result = await file_service.delete_file("target-session", "linked-file") assert result is True - mock_minio_client.remove_object.assert_not_called() + mock_s3_client.delete_object.assert_not_called() mock_redis_client.delete.assert_called_once() assert mock_redis_client.srem.call_count == 2 @pytest.mark.asyncio async def test_delete_source_file_keeps_object_when_aliases_exist( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Deleting the source metadata must not delete a shared object still referenced by aliases.""" mock_redis_client.hgetall.return_value = { @@ -400,12 +397,12 @@ async def test_delete_source_file_keeps_object_when_aliases_exist( result = await file_service.delete_file("source-session", "source-file") assert result is True - mock_minio_client.remove_object.assert_not_called() + mock_s3_client.delete_object.assert_not_called() mock_redis_client.delete.assert_called_once() @pytest.mark.asyncio async def test_delete_last_linked_file_cleans_orphaned_shared_object( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """The final alias cleanup should delete the shared object once the source is gone.""" mock_redis_client.hgetall.side_effect = [ @@ -430,7 +427,7 @@ async def test_delete_last_linked_file_cleans_orphaned_shared_object( result = await file_service.delete_file("target-session", "linked-file") assert result is True - mock_minio_client.remove_object.assert_called_once_with( - file_service.bucket_name, - "sessions/source/uploads/source-file", + mock_s3_client.delete_object.assert_called_once_with( + Bucket=file_service.bucket_name, + Key="sessions/source/uploads/source-file", ) diff --git a/tests/unit/test_runner_nested_paths.py b/tests/unit/test_runner_nested_paths.py index d05270f..88c376d 100644 --- a/tests/unit/test_runner_nested_paths.py +++ b/tests/unit/test_runner_nested_paths.py @@ -327,7 +327,7 @@ async def test_new_file_alongside_unchanged_mount(self, runner, tmp_path): class TestMountFilesNestedPaths: """The mount path is harder to fully exercise because it pulls bytes from - MinIO. We patch FileService.stream_file_to_path and just confirm that + S3. We patch FileService.stream_file_to_path and just confirm that parent directories are created at the right nested location.""" async def test_nested_filename_creates_parent_dirs(self, runner, tmp_path): diff --git a/tests/unit/test_upload_read_only.py b/tests/unit/test_upload_read_only.py index ff0f6f8..d044ca5 100644 --- a/tests/unit/test_upload_read_only.py +++ b/tests/unit/test_upload_read_only.py @@ -64,10 +64,9 @@ def file_service(self): from src.services.file import FileService svc = FileService() - # Don't actually talk to MinIO — patch the storage and metadata bits. svc._ensure_bucket_exists = AsyncMock() - svc.minio_client = MagicMock() - svc.minio_client.put_object = MagicMock() + svc.s3_client = MagicMock() + svc.s3_client.put_object = MagicMock() svc._store_file_metadata = AsyncMock() return svc From 7631955361905cb753c219d31cac6b468b90bd17 Mon Sep 17 00:00:00 2001 From: Joe Licata Date: Wed, 6 May 2026 20:44:49 +0000 Subject: [PATCH 2/5] chore: Update S3 configuration and health check commands - Changed S3 access and secret keys in .env.example and test configuration to new values. - Updated Docker Compose files to reflect the new S3 access keys and added default bucket environment variable. - Modified health check command in Docker Compose to use the new status command for better service monitoring. - Added RPC settings in garage.toml for improved service configuration. --- .env.example | 4 ++-- docker-compose.prod.yml | 12 +++++------- docker-compose.yml | 12 +++++------- garage.toml | 3 +++ tests/conftest.py | 4 ++-- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/.env.example b/.env.example index af7ec6b..0dd127b 100644 --- a/.env.example +++ b/.env.example @@ -37,8 +37,8 @@ REDIS_PORT=6379 # ── S3 Storage (Garage) ──────────────────────────────────────── S3_ENDPOINT=localhost:3900 -S3_ACCESS_KEY=minioadmin -S3_SECRET_KEY=minioadmin +S3_ACCESS_KEY=GKminioadmin0000 +S3_SECRET_KEY=minioadminsecret # S3_SECURE=false # S3_BUCKET=code-interpreter-files # S3_REGION=garage diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 940eb85..798ee4e 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -68,21 +68,19 @@ services: image: dxflrs/garage:v2.3.0 container_name: code-interpreter-garage restart: unless-stopped - command: > - server - --single-node - --default-bucket ${S3_BUCKET:-code-interpreter-files} + command: /garage server --single-node --default-bucket ports: - "127.0.0.1:${S3_PORT:-3900}:3900" - "127.0.0.1:${GARAGE_ADMIN_PORT:-3903}:3903" environment: - GARAGE_DEFAULT_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin} - GARAGE_DEFAULT_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin} + GARAGE_DEFAULT_ACCESS_KEY: ${S3_ACCESS_KEY:-GKminioadmin0000} + GARAGE_DEFAULT_SECRET_KEY: ${S3_SECRET_KEY:-minioadminsecret} + GARAGE_DEFAULT_BUCKET: ${S3_BUCKET:-code-interpreter-files} volumes: - garage-data:/var/lib/garage - ./garage.toml:/etc/garage.toml healthcheck: - test: ["CMD-SHELL", "curl -sf http://localhost:3900 || exit 1"] + test: ["CMD", "/garage", "status"] interval: 10s timeout: 5s retries: 5 diff --git a/docker-compose.yml b/docker-compose.yml index 7484c34..37e3b94 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -70,21 +70,19 @@ services: image: dxflrs/garage:v2.3.0 container_name: code-interpreter-garage restart: unless-stopped - command: > - server - --single-node - --default-bucket ${S3_BUCKET:-code-interpreter-files} + command: /garage server --single-node --default-bucket ports: - "127.0.0.1:${S3_PORT:-3900}:3900" - "127.0.0.1:${GARAGE_ADMIN_PORT:-3903}:3903" environment: - GARAGE_DEFAULT_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin} - GARAGE_DEFAULT_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin} + GARAGE_DEFAULT_ACCESS_KEY: ${S3_ACCESS_KEY:-GKminioadmin0000} + GARAGE_DEFAULT_SECRET_KEY: ${S3_SECRET_KEY:-minioadminsecret} + GARAGE_DEFAULT_BUCKET: ${S3_BUCKET:-code-interpreter-files} volumes: - garage-data:/var/lib/garage - ./garage.toml:/etc/garage.toml healthcheck: - test: ["CMD-SHELL", "curl -sf http://localhost:3900 || exit 1"] + test: ["CMD", "/garage", "status"] interval: 10s timeout: 5s retries: 5 diff --git a/garage.toml b/garage.toml index d890c7e..dbfb1c3 100644 --- a/garage.toml +++ b/garage.toml @@ -3,6 +3,9 @@ data_dir = "/var/lib/garage/data" db_engine = "sqlite" replication_factor = 1 +rpc_bind_addr = "[::]:3901" +rpc_secret = "0000000000000000000000000000000000000000000000000000000000000000" + [s3_api] s3_region = "garage" api_bind_addr = "[::]:3900" diff --git a/tests/conftest.py b/tests/conftest.py index b23f054..37e432c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,8 +13,8 @@ os.environ.setdefault("REDIS_HOST", "localhost") os.environ.setdefault("REDIS_PORT", "6379") os.environ.setdefault("S3_ENDPOINT", "localhost:3900") -os.environ.setdefault("S3_ACCESS_KEY", "minioadmin") -os.environ.setdefault("S3_SECRET_KEY", "minioadmin") +os.environ.setdefault("S3_ACCESS_KEY", "GKminioadmin0000") +os.environ.setdefault("S3_SECRET_KEY", "minioadminsecret") os.environ.setdefault("S3_SECURE", "false") from src.services.session import SessionService From 25e8fed88ccbed0bbb67c52cd4a63246a380b2cf Mon Sep 17 00:00:00 2001 From: Joe Licata Date: Wed, 6 May 2026 20:56:55 +0000 Subject: [PATCH 3/5] refactor: Update mounted file edit tests for new output handling - Enhanced functional tests to verify that edits to mounted files produce new outputs with unique file_ids instead of in-place overwrites. - Updated test descriptions for clarity on expected behavior regarding modified files. - Introduced a helper function to locate modified files based on the original file_id, ensuring accurate assertions in test cases. --- tests/functional/test_mounted_file_edits.py | 57 +++++++++++++++++---- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/tests/functional/test_mounted_file_edits.py b/tests/functional/test_mounted_file_edits.py index 9e66ab4..32089af 100644 --- a/tests/functional/test_mounted_file_edits.py +++ b/tests/functional/test_mounted_file_edits.py @@ -1,14 +1,29 @@ -"""Functional tests for mounted file edit persistence against a live API.""" +"""Functional tests for mounted file edit persistence against a live API. + +Modified mounted files surface as new generated outputs with fresh file_ids +(not in-place overwrites of the original S3 object). The exec response +includes a `modified_from` reference back to the original upload. LibreChat +downloads the new file_id to capture the edited content. +""" import pytest +def _find_modified_file(exec_result, original_file_id): + """Find the generated file entry that was modified from the original.""" + for f in exec_result.get("files", []): + modified_from = f.get("modified_from") + if modified_from and modified_from.get("id") == original_file_id: + return f + return None + + class TestMountedFileEdits: - """Verify in-place edits to mounted files persist after execution.""" + """Verify in-place edits to mounted files surface as new generated outputs.""" @pytest.mark.asyncio async def test_overwrite_mounted_file_persists(self, async_client, auth_headers): - """Overwriting a mounted user file should persist the new content.""" + """Overwriting a mounted file should produce a new output with modified content.""" upload = await async_client.post( "/upload", headers={"x-api-key": auth_headers["x-api-key"]}, @@ -36,10 +51,18 @@ async def test_overwrite_mounted_file_persists(self, async_client, auth_headers) }, ) assert execute.status_code == 200, execute.text - assert "File modified" in execute.json()["stdout"] + exec_result = execute.json() + assert "File modified" in exec_result["stdout"] + + modified = _find_modified_file(exec_result, file_id) + assert modified is not None, ( + f"No modified_from entry for {file_id} in files: {exec_result['files']}" + ) + assert modified.get("inherited") is not True + assert modified["name"] == "test.txt" download = await async_client.get( - f"/download/{session_id}/{file_id}", + f"/download/{session_id}/{modified['id']}", headers=auth_headers, ) assert download.status_code == 200 @@ -47,7 +70,7 @@ async def test_overwrite_mounted_file_persists(self, async_client, auth_headers) @pytest.mark.asyncio async def test_append_to_mounted_file_persists(self, async_client, auth_headers): - """Appending to a mounted file should persist all new lines.""" + """Appending to a mounted file should produce a new output with all lines.""" upload = await async_client.post( "/upload", headers={"x-api-key": auth_headers["x-api-key"]}, @@ -74,9 +97,16 @@ async def test_append_to_mounted_file_persists(self, async_client, auth_headers) }, ) assert execute.status_code == 200, execute.text + exec_result = execute.json() + + modified = _find_modified_file(exec_result, file_id) + assert modified is not None, ( + f"No modified_from entry for {file_id} in files: {exec_result['files']}" + ) + assert modified.get("inherited") is not True download = await async_client.get( - f"/download/{session_id}/{file_id}", + f"/download/{session_id}/{modified['id']}", headers=auth_headers, ) assert download.status_code == 200 @@ -123,7 +153,7 @@ async def test_delete_mounted_file_does_not_error( @pytest.mark.asyncio async def test_edit_csv_file_persists(self, async_client, auth_headers): - """Editing a mounted CSV file should persist the transformed data.""" + """Editing a mounted CSV file should produce a new output with transformed data.""" upload = await async_client.post( "/upload", headers={"x-api-key": auth_headers["x-api-key"]}, @@ -153,10 +183,17 @@ async def test_edit_csv_file_persists(self, async_client, auth_headers): }, ) assert execute.status_code == 200, execute.text - assert "csv updated" in execute.json()["stdout"] + exec_result = execute.json() + assert "csv updated" in exec_result["stdout"] + + modified = _find_modified_file(exec_result, file_id) + assert modified is not None, ( + f"No modified_from entry for {file_id} in files: {exec_result['files']}" + ) + assert modified.get("inherited") is not True download = await async_client.get( - f"/download/{session_id}/{file_id}", + f"/download/{session_id}/{modified['id']}", headers=auth_headers, ) assert download.status_code == 200 From 16807f0e7ccef5a8d39bd701e2f3dfbd744e53b0 Mon Sep 17 00:00:00 2001 From: Joe Licata Date: Wed, 6 May 2026 21:08:33 +0000 Subject: [PATCH 4/5] chore: Update tmpfs configuration and directory structure in Docker setup - Added a temporary filesystem configuration for /tmp with size and mode settings in both Docker Compose files. - Changed the directory for empty_proc from /tmp to /var/lib/code-interpreter in the Dockerfile and related service files. - Updated the sandbox execution commands to reflect the new empty_proc path and incorporated dynamic tmpfs size settings. --- Dockerfile | 2 +- docker-compose.prod.yml | 1 + docker-compose.yml | 1 + src/services/programmatic.py | 6 +++++- src/services/sandbox/executor.py | 8 +++++++- src/services/sandbox/pool.py | 6 +++++- 6 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 52c6e34..f9e68ae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -233,7 +233,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # ============================================ RUN mkdir -p /var/lib/code-interpreter/sandboxes && \ mkdir -p /mnt/data && \ - mkdir -p /tmp/empty_proc + mkdir -p /var/lib/code-interpreter/empty_proc RUN groupadd -g 1001 codeuser && \ useradd -u 1001 -g codeuser -m codeuser && \ diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 798ee4e..ce2b87a 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -30,6 +30,7 @@ services: # to the mounted files inside the container under /app/ssl. - ${SSL_CERTS_PATH:-./ssl}:/app/ssl:ro tmpfs: + - /tmp:size=512m,mode=1777 - /app/data:size=100m depends_on: redis: diff --git a/docker-compose.yml b/docker-compose.yml index 37e3b94..3e3890d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,6 +30,7 @@ services: # to the mounted files inside the container under /app/ssl. - ${SSL_CERTS_PATH:-./ssl}:/app/ssl:ro tmpfs: + - /tmp:size=512m,mode=1777 - /app/data:size=100m depends_on: redis: diff --git a/src/services/programmatic.py b/src/services/programmatic.py index 9d34218..e8e081f 100644 --- a/src/services/programmatic.py +++ b/src/services/programmatic.py @@ -186,6 +186,8 @@ async def start_execution( shlex.quote(str(a)) for a in [settings.nsjail_binary] + nsjail_args ) + tmpfs_size = settings.sandbox_tmpfs_size_mb + wrapper_cmd = ( f"mount --bind {shlex.quote(str(sandbox_info.data_dir))} /mnt/data && " f"mount -t tmpfs -o size=1k tmpfs /var/lib/code-interpreter/sandboxes && " @@ -194,7 +196,9 @@ async def start_execution( f"mount -t tmpfs -o size=1k tmpfs /app/ssl && " f"mount -t tmpfs -o size=1k tmpfs /app/dashboard && " f"mount -t tmpfs -o size=1k tmpfs /app/src && " - f"mount --bind /tmp/empty_proc /proc && " + f"mount --bind /var/lib/code-interpreter/empty_proc /proc && " + # BUG-007: Ephemeral /tmp — prevent cross-session data persistence + f"mount -t tmpfs -o size={tmpfs_size}m,mode=1777 tmpfs /tmp && " f"{nsjail_cmd}" ) diff --git a/src/services/sandbox/executor.py b/src/services/sandbox/executor.py index 96a0019..2d60782 100644 --- a/src/services/sandbox/executor.py +++ b/src/services/sandbox/executor.py @@ -103,7 +103,11 @@ async def execute_command( if lang in ("java", "rs", "bash"): proc_mask = "" else: - proc_mask = "mount --bind /tmp/empty_proc /proc && " + proc_mask = ( + "mount --bind /var/lib/code-interpreter/empty_proc /proc && " + ) + + tmpfs_size = settings.sandbox_tmpfs_size_mb wrapper_cmd = ( # Bind sandbox dir to /mnt/data (before hiding sandboxes dir) @@ -120,6 +124,8 @@ async def execute_command( f"mount -t tmpfs -o size=1k tmpfs /app/src && " # BUG-003: Hide /proc (except Java which needs /proc/self/exe) f"{proc_mask}" + # BUG-007: Ephemeral /tmp — prevent cross-session data persistence + f"mount -t tmpfs -o size={tmpfs_size}m,mode=1777 tmpfs /tmp && " # Execute nsjail f"{nsjail_cmd}" ) diff --git a/src/services/sandbox/pool.py b/src/services/sandbox/pool.py index 26c8621..f4dd202 100644 --- a/src/services/sandbox/pool.py +++ b/src/services/sandbox/pool.py @@ -390,6 +390,8 @@ async def _start_repl_process( nsjail_cmd = " ".join( shlex.quote(str(a)) for a in [settings.nsjail_binary] + nsjail_args ) + tmpfs_size = settings.sandbox_tmpfs_size_mb + wrapper_cmd = ( # Bind sandbox dir to /mnt/data (before hiding sandboxes dir) f"mount --bind {shlex.quote(str(sandbox_info.data_dir))} /mnt/data && " @@ -404,7 +406,9 @@ async def _start_repl_process( f"mount -t tmpfs -o size=1k tmpfs /app/dashboard && " f"mount -t tmpfs -o size=1k tmpfs /app/src && " # BUG-003: Hide /proc (REPL is Python-only, always safe to mask) - f"mount --bind /tmp/empty_proc /proc && " + f"mount --bind /var/lib/code-interpreter/empty_proc /proc && " + # BUG-007: Ephemeral /tmp — prevent cross-session data persistence + f"mount -t tmpfs -o size={tmpfs_size}m,mode=1777 tmpfs /tmp && " # Execute nsjail f"{nsjail_cmd}" ) From 76987db031a43f34431803e6b8ff7533deaaf850 Mon Sep 17 00:00:00 2001 From: Joe Licata Date: Thu, 7 May 2026 00:50:55 +0000 Subject: [PATCH 5/5] chore: Enhance tmpfs security settings in Docker and service files - Updated tmpfs mount options for /tmp in Docker Compose files to include noexec, nosuid, and nodev for improved security. - Refactored sandbox execution commands to apply the new tmpfs settings consistently across service files. - Introduced dynamic handling of skill dependencies with updated mount options to enhance security and isolation. --- docker-compose.prod.yml | 2 +- docker-compose.yml | 2 +- src/services/programmatic.py | 15 +++++++++++++-- src/services/sandbox/executor.py | 15 +++++++++++++-- src/services/sandbox/pool.py | 15 +++++++++++++-- 5 files changed, 41 insertions(+), 8 deletions(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index ce2b87a..3bae193 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -30,7 +30,7 @@ services: # to the mounted files inside the container under /app/ssl. - ${SSL_CERTS_PATH:-./ssl}:/app/ssl:ro tmpfs: - - /tmp:size=512m,mode=1777 + - /tmp:size=512m,mode=1777,noexec,nosuid,nodev - /app/data:size=100m depends_on: redis: diff --git a/docker-compose.yml b/docker-compose.yml index 3e3890d..8807149 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,7 +30,7 @@ services: # to the mounted files inside the container under /app/ssl. - ${SSL_CERTS_PATH:-./ssl}:/app/ssl:ro tmpfs: - - /tmp:size=512m,mode=1777 + - /tmp:size=512m,mode=1777,noexec,nosuid,nodev - /app/data:size=100m depends_on: redis: diff --git a/src/services/programmatic.py b/src/services/programmatic.py index e8e081f..719e2dd 100644 --- a/src/services/programmatic.py +++ b/src/services/programmatic.py @@ -187,6 +187,8 @@ async def start_execution( ) tmpfs_size = settings.sandbox_tmpfs_size_mb + noexec_tmpfs = "noexec,nosuid,nodev," + deps_path = settings.skill_deps_path wrapper_cmd = ( f"mount --bind {shlex.quote(str(sandbox_info.data_dir))} /mnt/data && " @@ -197,8 +199,17 @@ async def start_execution( f"mount -t tmpfs -o size=1k tmpfs /app/dashboard && " f"mount -t tmpfs -o size=1k tmpfs /app/src && " f"mount --bind /var/lib/code-interpreter/empty_proc /proc && " - # BUG-007: Ephemeral /tmp — prevent cross-session data persistence - f"mount -t tmpfs -o size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-007: Ephemeral /tmp with noexec,nosuid,nodev + f"mount -t tmpfs -o {noexec_tmpfs}size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-008: Lock down other writable paths + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /var/tmp && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /run/lock && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1733 tmpfs /var/lib/php/sessions && " + # BUG-008: skill-deps nosuid,nodev (not noexec — installed CLIs need exec) + f"(test -d {shlex.quote(deps_path)} && " + f"mount --bind {shlex.quote(deps_path)} {shlex.quote(deps_path)} && " + f"mount -o remount,bind,nosuid,nodev {shlex.quote(deps_path)} " + f"|| true) && " f"{nsjail_cmd}" ) diff --git a/src/services/sandbox/executor.py b/src/services/sandbox/executor.py index 2d60782..fae13f6 100644 --- a/src/services/sandbox/executor.py +++ b/src/services/sandbox/executor.py @@ -108,6 +108,8 @@ async def execute_command( ) tmpfs_size = settings.sandbox_tmpfs_size_mb + noexec_tmpfs = "noexec,nosuid,nodev," + deps_path = settings.skill_deps_path wrapper_cmd = ( # Bind sandbox dir to /mnt/data (before hiding sandboxes dir) @@ -124,8 +126,17 @@ async def execute_command( f"mount -t tmpfs -o size=1k tmpfs /app/src && " # BUG-003: Hide /proc (except Java which needs /proc/self/exe) f"{proc_mask}" - # BUG-007: Ephemeral /tmp — prevent cross-session data persistence - f"mount -t tmpfs -o size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-007: Ephemeral /tmp with noexec,nosuid,nodev + f"mount -t tmpfs -o {noexec_tmpfs}size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-008: Lock down other writable paths + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /var/tmp && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /run/lock && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1733 tmpfs /var/lib/php/sessions && " + # BUG-008: skill-deps nosuid,nodev (not noexec — installed CLIs need exec) + f"(test -d {shlex.quote(deps_path)} && " + f"mount --bind {shlex.quote(deps_path)} {shlex.quote(deps_path)} && " + f"mount -o remount,bind,nosuid,nodev {shlex.quote(deps_path)} " + f"|| true) && " # Execute nsjail f"{nsjail_cmd}" ) diff --git a/src/services/sandbox/pool.py b/src/services/sandbox/pool.py index f4dd202..9f3fa63 100644 --- a/src/services/sandbox/pool.py +++ b/src/services/sandbox/pool.py @@ -391,6 +391,8 @@ async def _start_repl_process( shlex.quote(str(a)) for a in [settings.nsjail_binary] + nsjail_args ) tmpfs_size = settings.sandbox_tmpfs_size_mb + noexec_tmpfs = "noexec,nosuid,nodev," + deps_path = settings.skill_deps_path wrapper_cmd = ( # Bind sandbox dir to /mnt/data (before hiding sandboxes dir) @@ -407,8 +409,17 @@ async def _start_repl_process( f"mount -t tmpfs -o size=1k tmpfs /app/src && " # BUG-003: Hide /proc (REPL is Python-only, always safe to mask) f"mount --bind /var/lib/code-interpreter/empty_proc /proc && " - # BUG-007: Ephemeral /tmp — prevent cross-session data persistence - f"mount -t tmpfs -o size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-007: Ephemeral /tmp with noexec,nosuid,nodev + f"mount -t tmpfs -o {noexec_tmpfs}size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-008: Lock down other writable paths + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /var/tmp && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /run/lock && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1733 tmpfs /var/lib/php/sessions && " + # BUG-008: skill-deps nosuid,nodev (not noexec — installed CLIs need exec) + f"(test -d {shlex.quote(deps_path)} && " + f"mount --bind {shlex.quote(deps_path)} {shlex.quote(deps_path)} && " + f"mount -o remount,bind,nosuid,nodev {shlex.quote(deps_path)} " + f"|| true) && " # Execute nsjail f"{nsjail_cmd}" )