Skip to content

Commit 2308dc3

Browse files
authored
Merge pull request usnavy13#43 from usnavy13/dev
Merge dev into main: File mounting, session management, and testing improvements
2 parents 290889d + 0350b5a commit 2308dc3

27 files changed

Lines changed: 2755 additions & 113 deletions

docker/repl_server.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ def execute_code(
291291
timeout: int = 30,
292292
working_dir: str = "/mnt/data",
293293
initial_state: str = None,
294-
capture_state: bool = False
294+
capture_state: bool = False,
295+
args: list = None
295296
) -> dict:
296297
"""Execute code in isolated namespace and capture output.
297298
@@ -301,6 +302,7 @@ def execute_code(
301302
working_dir: Working directory for execution
302303
initial_state: Base64-encoded cloudpickle state to restore before execution
303304
capture_state: Whether to capture and return state after execution
305+
args: Optional list of command line arguments
304306
305307
Returns:
306308
Dict with exit_code, stdout, stderr, execution_time_ms, and optionally state/state_errors
@@ -330,6 +332,12 @@ def execute_code(
330332

331333
exit_code = 0
332334

335+
# Save and set sys.argv if args provided
336+
original_argv = sys.argv
337+
if args is not None:
338+
# Set sys.argv to [script_name] + args (matches file-based execution)
339+
sys.argv = ['/mnt/data/code.py'] + list(args)
340+
333341
# Set up timeout handler
334342
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
335343
signal.alarm(timeout)
@@ -370,6 +378,9 @@ def execute_code(
370378
signal.alarm(0)
371379
signal.signal(signal.SIGALRM, old_handler)
372380

381+
# Restore sys.argv
382+
sys.argv = original_argv
383+
373384
# Restore working directory
374385
try:
375386
os.chdir(original_dir)
@@ -503,14 +514,16 @@ def main():
503514
working_dir = request.get("working_dir", "/mnt/data")
504515
initial_state = request.get("initial_state")
505516
capture_state = request.get("capture_state", False)
517+
args = request.get("args") # List of command line arguments
506518

507519
# Execute code with optional state persistence
508520
response = execute_code(
509521
code,
510522
timeout,
511523
working_dir,
512524
initial_state=initial_state,
513-
capture_state=capture_state
525+
capture_state=capture_state,
526+
args=args
514527
)
515528

516529
# Send response

src/api/files.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
# Local application imports
1616
from ..config import settings
17-
from ..dependencies import FileServiceDep
17+
from ..dependencies import FileServiceDep, SessionServiceDep
18+
from ..models import SessionCreate
1819
from ..services.execution.output import OutputProcessor
19-
from ..utils.id_generator import generate_session_id
2020

2121
logger = structlog.get_logger(__name__)
2222
router = APIRouter()
@@ -55,6 +55,7 @@ async def upload_file(
5555
files: Optional[List[UploadFile]] = File(None),
5656
entity_id: Optional[str] = Form(None),
5757
file_service: FileServiceDep = None,
58+
session_service: SessionServiceDep = None,
5859
):
5960
"""Upload files with multipart form handling - LibreChat compatible.
6061
@@ -112,8 +113,17 @@ async def upload_file(
112113

113114
uploaded_files = []
114115

115-
# Create a session ID for this upload
116-
session_id = generate_session_id()
116+
# Create a real session for file uploads
117+
# This enables session reuse when files are referenced in /exec
118+
metadata = {}
119+
if entity_id:
120+
metadata["entity_id"] = entity_id
121+
session = await session_service.create_session(SessionCreate(metadata=metadata))
122+
session_id = session.session_id
123+
124+
# Determine if this is an agent file (uploaded with entity_id)
125+
# Agent files are read-only and cannot be modified by user code
126+
is_agent_file = entity_id is not None and len(entity_id) > 0
117127

118128
for file in upload_files:
119129
# Read file content
@@ -125,6 +135,7 @@ async def upload_file(
125135
filename=file.filename,
126136
content=content,
127137
content_type=file.content_type,
138+
is_agent_file=is_agent_file,
128139
)
129140

130141
# Sanitize filename to match what will be used in container

src/models/exec.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class FileRef(BaseModel):
1414
id: str
1515
name: str
1616
path: Optional[str] = None # Make path optional
17+
session_id: Optional[str] = None # Session ID for cross-message file persistence
1718

1819

1920
class RequestFile(BaseModel):
@@ -22,6 +23,10 @@ class RequestFile(BaseModel):
2223
id: str
2324
session_id: str
2425
name: str
26+
restore_state: bool = Field(
27+
default=False,
28+
description="If true, restore Python state from when this file was last used",
29+
)
2530

2631

2732
class ExecRequest(BaseModel):

src/models/execution.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ class ExecuteCodeRequest(BaseModel):
7979
timeout: Optional[int] = Field(
8080
default=None, description="Execution timeout in seconds"
8181
)
82+
args: Optional[List[str]] = Field(
83+
default=None, description="Command line arguments to pass to the executed code"
84+
)
8285

8386

8487
class ExecuteCodeResponse(BaseModel):

src/models/files.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@ class FileInfo(BaseModel):
4040
content_type: str
4141
created_at: datetime
4242
path: str = Field(..., description="File path in the session")
43+
# State restoration fields (for Python state-file linking)
44+
execution_id: Optional[str] = Field(
45+
default=None, description="ID of the execution that created/last used this file"
46+
)
47+
state_hash: Optional[str] = Field(
48+
default=None,
49+
description="SHA256 hash of the Python state when this file was last used",
50+
)
51+
last_used_at: Optional[datetime] = Field(
52+
default=None,
53+
description="Timestamp of when this file was last used in an execution",
54+
)
4355

4456
class Config:
4557
json_encoders = {datetime: lambda v: v.isoformat()}

src/services/container/manager.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ async def copy_to_container(
365365
return False
366366

367367
async def copy_content_to_container(
368-
self, container: Container, content: bytes, dest_path: str
368+
self, container: Container, content: bytes, dest_path: str, language: str = "py"
369369
) -> bool:
370370
"""Copy content directly to container without tempfiles.
371371
@@ -376,19 +376,25 @@ async def copy_content_to_container(
376376
container: Target container
377377
content: File content as bytes
378378
dest_path: Destination path in container (e.g., /mnt/data/file.py)
379+
language: Programming language (used to set correct file ownership)
379380
380381
Returns:
381382
True if successful, False otherwise
382383
"""
383384
try:
384385
loop = asyncio.get_event_loop()
385386

387+
# Get user ID for this language's container
388+
user_id = self.get_user_id_for_language(language)
389+
386390
# Build in-memory tar archive
387391
tar_buffer = io.BytesIO()
388392
with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
389393
tarinfo = tarfile.TarInfo(name=dest_path.split("/")[-1])
390394
tarinfo.size = len(content)
391395
tarinfo.mode = 0o644
396+
tarinfo.uid = user_id
397+
tarinfo.gid = user_id
392398
tar.addfile(tarinfo, io.BytesIO(content))
393399

394400
tar_buffer.seek(0)

src/services/container/repl_executor.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ async def execute(
4343
code: str,
4444
timeout: int = None,
4545
working_dir: str = "/mnt/data",
46+
args: Optional[List[str]] = None,
4647
) -> Tuple[int, str, str]:
4748
"""Execute code in running REPL.
4849
@@ -51,6 +52,7 @@ async def execute(
5152
code: Python code to execute
5253
timeout: Maximum execution time in seconds
5354
working_dir: Working directory for code execution
55+
args: Optional list of command line arguments
5456
5557
Returns:
5658
Tuple of (exit_code, stdout, stderr)
@@ -62,6 +64,8 @@ async def execute(
6264

6365
# Build request
6466
request = {"code": code, "timeout": timeout, "working_dir": working_dir}
67+
if args:
68+
request["args"] = args
6569
request_json = json.dumps(request)
6670
request_bytes = request_json.encode("utf-8") + DELIMITER
6771

@@ -109,6 +113,7 @@ async def execute_with_state(
109113
working_dir: str = "/mnt/data",
110114
initial_state: Optional[str] = None,
111115
capture_state: bool = False,
116+
args: Optional[List[str]] = None,
112117
) -> Tuple[int, str, str, Optional[str], List[str]]:
113118
"""Execute code in running REPL with optional state persistence.
114119
@@ -119,6 +124,7 @@ async def execute_with_state(
119124
working_dir: Working directory for code execution
120125
initial_state: Base64-encoded state to restore before execution
121126
capture_state: Whether to capture state after execution
127+
args: Optional list of command line arguments
122128
123129
Returns:
124130
Tuple of (exit_code, stdout, stderr, new_state, state_errors)
@@ -138,6 +144,9 @@ async def execute_with_state(
138144
if capture_state:
139145
request["capture_state"] = True
140146

147+
if args:
148+
request["args"] = args
149+
141150
request_json = json.dumps(request)
142151
request_bytes = request_json.encode("utf-8") + DELIMITER
143152

src/services/execution/runner.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Code execution runner - core execution logic."""
22

33
import asyncio
4+
import shlex
45
from datetime import datetime, timedelta
56
from pathlib import Path
67
from typing import Any, Dict, List, Optional, Tuple
@@ -152,7 +153,7 @@ async def execute(
152153

153154
# Mount files if provided
154155
if files:
155-
await self._mount_files_to_container(container, files)
156+
await self._mount_files_to_container(container, files, request.language)
156157

157158
# Execute the code
158159
start_time = datetime.utcnow()
@@ -185,11 +186,16 @@ async def execute(
185186
request.timeout or settings.max_execution_time,
186187
initial_state=initial_state,
187188
capture_state=capture_state,
189+
args=request.args,
188190
)
189191
else:
190192
# Standard execution (no state persistence)
191193
exit_code, stdout, stderr = await self._execute_code_in_container(
192-
container, request.code, request.language, request.timeout
194+
container,
195+
request.code,
196+
request.language,
197+
request.timeout,
198+
args=request.args,
193199
)
194200
end_time = datetime.utcnow()
195201

@@ -435,12 +441,20 @@ async def _execute_code_in_container(
435441
code: str,
436442
language: str,
437443
timeout: Optional[int] = None,
444+
args: Optional[List[str]] = None,
438445
) -> Tuple[int, str, str]:
439446
"""Execute code in the container.
440447
441448
For REPL-enabled containers (Python with REPL mode), uses the fast
442449
REPL executor which communicates with the pre-warmed Python interpreter.
443450
For other containers, uses the standard execution path.
451+
452+
Args:
453+
container: Docker container to execute in
454+
code: Code to execute
455+
language: Programming language
456+
timeout: Execution timeout in seconds
457+
args: Optional list of command line arguments
444458
"""
445459
language = language.lower()
446460
lang_config = get_language(language)
@@ -454,7 +468,9 @@ async def _execute_code_in_container(
454468
logger.debug(
455469
"Using REPL executor", container_id=container.id[:12], language=language
456470
)
457-
return await self._execute_via_repl(container, code, execution_timeout)
471+
return await self._execute_via_repl(
472+
container, code, execution_timeout, args=args
473+
)
458474

459475
# Standard execution path for non-REPL containers
460476
exec_command = lang_config.execution_command
@@ -480,13 +496,20 @@ async def _execute_code_in_container(
480496
# Direct memory-to-container transfer (no tempfiles)
481497
dest_path = f"/mnt/data/{code_filename}"
482498
if not await self.container_manager.copy_content_to_container(
483-
container, code.encode("utf-8"), dest_path
499+
container, code.encode("utf-8"), dest_path, language=language
484500
):
485501
return 1, "", "Failed to write code file to container"
486502

503+
# Build execution command with args if provided
504+
final_command = exec_command
505+
if args:
506+
# Safely quote each argument to prevent shell injection
507+
quoted_args = " ".join(shlex.quote(arg) for arg in args)
508+
final_command = f"{exec_command} {quoted_args}"
509+
487510
return await self.container_manager.execute_command(
488511
container,
489-
exec_command,
512+
final_command,
490513
timeout=execution_timeout,
491514
language=language,
492515
working_dir="/mnt/data",
@@ -521,21 +544,26 @@ def _is_repl_container(self, container: Container, language: str) -> bool:
521544
return False
522545

523546
async def _execute_via_repl(
524-
self, container: Container, code: str, timeout: int
547+
self,
548+
container: Container,
549+
code: str,
550+
timeout: int,
551+
args: Optional[List[str]] = None,
525552
) -> Tuple[int, str, str]:
526553
"""Execute code via REPL server in container.
527554
528555
Args:
529556
container: Docker container with REPL server running
530557
code: Python code to execute
531558
timeout: Maximum execution time in seconds
559+
args: Optional list of command line arguments
532560
533561
Returns:
534562
Tuple of (exit_code, stdout, stderr)
535563
"""
536564
repl_executor = REPLExecutor(self.container_manager.client)
537565
return await repl_executor.execute(
538-
container, code, timeout=timeout, working_dir="/mnt/data"
566+
container, code, timeout=timeout, working_dir="/mnt/data", args=args
539567
)
540568

541569
async def _execute_via_repl_with_state(
@@ -545,6 +573,7 @@ async def _execute_via_repl_with_state(
545573
timeout: int,
546574
initial_state: Optional[str] = None,
547575
capture_state: bool = True,
576+
args: Optional[List[str]] = None,
548577
) -> Tuple[int, str, str, Optional[str], List[str]]:
549578
"""Execute code via REPL server with state persistence.
550579
@@ -554,6 +583,7 @@ async def _execute_via_repl_with_state(
554583
timeout: Maximum execution time in seconds
555584
initial_state: Base64-encoded state to restore before execution
556585
capture_state: Whether to capture state after execution
586+
args: Optional list of command line arguments
557587
558588
Returns:
559589
Tuple of (exit_code, stdout, stderr, new_state, state_errors)
@@ -566,10 +596,11 @@ async def _execute_via_repl_with_state(
566596
working_dir="/mnt/data",
567597
initial_state=initial_state,
568598
capture_state=capture_state,
599+
args=args,
569600
)
570601

571602
async def _mount_files_to_container(
572-
self, container: Container, files: List[Dict[str, Any]]
603+
self, container: Container, files: List[Dict[str, Any]], language: str = "py"
573604
) -> None:
574605
"""Mount files to container workspace."""
575606
try:
@@ -599,7 +630,7 @@ async def _mount_files_to_container(
599630
dest_path = f"/mnt/data/{normalized_filename}"
600631

601632
if await self.container_manager.copy_content_to_container(
602-
container, file_content, dest_path
633+
container, file_content, dest_path, language=language
603634
):
604635
logger.info(
605636
"Mounted file",

0 commit comments

Comments
 (0)