Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,9 @@ def _effective_storage_provider() -> str:
"""
from lab.storage import STORAGE_PROVIDER

if STORAGE_PROVIDER == "localfs":
return "localfs"
# juicefs and localfs are always active when configured — no TFL_REMOTE_STORAGE_ENABLED gate.
if STORAGE_PROVIDER in ("localfs", "juicefs"):
return STORAGE_PROVIDER
remote_enabled = os.getenv("TFL_REMOTE_STORAGE_ENABLED", "false").lower() == "true"
return STORAGE_PROVIDER if remote_enabled else "localfs"

Expand Down
2 changes: 1 addition & 1 deletion api/localprovider_pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"soundfile==0.13.1",
"tensorboardX==2.6.2.2",
"timm==1.0.15",
"transformerlab==0.1.40",
"transformerlab==0.1.41",
"transformerlab-inference==0.2.52",
"transformers==4.57.1",
"wandb==0.23.1",
Expand Down
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies = [
"python-dotenv==1.2.2",
"python-multipart==0.0.29",
"sqlalchemy[asyncio]==2.0.49",
"transformerlab==0.1.40",
"transformerlab==0.1.41",
"transformerlab-inference==0.2.52",
"uvicorn==0.35.0",
"watchfiles==1.2.0",
Expand Down
6 changes: 5 additions & 1 deletion api/scripts/on_server_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ async def main() -> None:
# Buckets/containers/local workspace dirs must exist before seed_default_experiments(),
# which writes experiment metadata to remote storage (e.g. Azure block upload).
tfl_remote_storage_enabled = os.getenv("TFL_REMOTE_STORAGE_ENABLED", "false").lower() == "true"
if tfl_remote_storage_enabled or (os.getenv("TFL_STORAGE_PROVIDER") == "localfs" and os.getenv("TFL_STORAGE_URI")):
if (
tfl_remote_storage_enabled
or os.getenv("TFL_STORAGE_PROVIDER") == "juicefs"
or (os.getenv("TFL_STORAGE_PROVIDER") == "localfs" and os.getenv("TFL_STORAGE_URI"))
):
print("✅ CHECKING STORAGE FOR EXISTING TEAMS")
try:
async with async_session() as session:
Expand Down
100 changes: 100 additions & 0 deletions api/test/api/test_launch_template_juicefs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import pytest

from transformerlab.services.compute_provider.launch_juicefs import (
build_juicefs_install_command,
build_juicefs_pod_config,
)


def test_build_juicefs_pod_config_env_vars(monkeypatch):
monkeypatch.setenv("TFL_JUICEFS_METADATA_URL", "redis://localhost:6379/1")
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")

env_vars, mount_cmd, storage_uri = build_juicefs_pod_config(
team_id="team-abc",
mount_point="/mnt/juicefs",
)

assert env_vars["TFL_JUICEFS_METADATA_URL"] == "redis://localhost:6379/1"
assert env_vars["TFL_JUICEFS_VOLUME_NAME"] == "myvol"
assert env_vars["TFL_JUICEFS_MOUNT_POINT"] == "/mnt/juicefs"
assert env_vars["TFL_REMOTE_STORAGE_ENABLED"] == "true"
assert storage_uri == "/mnt/juicefs"


def test_build_juicefs_pod_config_mount_command(monkeypatch):
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.delenv("TFL_JUICEFS_TOKEN", raising=False)
monkeypatch.delenv("TFL_JUICEFS_CONSOLE_URL", raising=False)

_, mount_cmd, _ = build_juicefs_pod_config(
team_id="team-abc",
mount_point="/mnt/juicefs",
)

assert mount_cmd == "mkdir -p /mnt/juicefs && juicefs mount myvol /mnt/juicefs --subdir orgs/team-abc --background"


def test_build_juicefs_pod_config_mount_command_includes_token(monkeypatch):
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_TOKEN", "hosted-token")

env_vars, mount_cmd, _ = build_juicefs_pod_config(
team_id="team-abc",
mount_point="/mnt/juicefs",
)

assert env_vars["TFL_JUICEFS_TOKEN"] == "hosted-token"
assert 'if [ -n "$ACCESS_KEY" ] && [ -n "$SECRET_KEY" ]; then ' in mount_cmd
assert (
'juicefs auth myvol --token "$TFL_JUICEFS_TOKEN" --access-key "$ACCESS_KEY" --secret-key "$SECRET_KEY"'
in mount_cmd
)
assert 'else juicefs auth myvol --token "$TFL_JUICEFS_TOKEN"; fi' in mount_cmd
assert '--access-key "$ACCESS_KEY" --secret-key "$SECRET_KEY"' in mount_cmd
assert mount_cmd.endswith("juicefs mount myvol /mnt/juicefs --subdir orgs/team-abc --background")
assert "mkdir -p /mnt/juicefs && " in mount_cmd


def test_build_juicefs_pod_config_mount_command_includes_console_url(monkeypatch):
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_TOKEN", "hosted-token")
monkeypatch.setenv("TFL_JUICEFS_CONSOLE_URL", "http://juicefs-console:8080")

env_vars, mount_cmd, _ = build_juicefs_pod_config(
team_id="team-abc",
mount_point="/mnt/juicefs",
)

assert env_vars["TFL_JUICEFS_CONSOLE_URL"] == "http://juicefs-console:8080"
assert '--console-url "$TFL_JUICEFS_CONSOLE_URL"' in mount_cmd
assert mount_cmd.count('--console-url "$TFL_JUICEFS_CONSOLE_URL"') == 2


def test_build_juicefs_pod_config_custom_mount_point(monkeypatch):
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "testvol")

env_vars, mount_cmd, storage_uri = build_juicefs_pod_config(
team_id="team-xyz",
mount_point="/custom/mount",
)

assert env_vars["TFL_JUICEFS_MOUNT_POINT"] == "/custom/mount"
assert "orgs/team-xyz" in mount_cmd
assert storage_uri == "/custom/mount"


def test_build_juicefs_pod_config_raises_on_empty_volume_name(monkeypatch):
monkeypatch.delenv("TFL_JUICEFS_VOLUME_NAME", raising=False)

with pytest.raises(ValueError, match="TFL_JUICEFS_VOLUME_NAME"):
build_juicefs_pod_config(team_id="team-abc", mount_point="/mnt/juicefs")


def test_build_juicefs_install_command_installs_when_missing():
install_cmd = build_juicefs_install_command()

assert "command -v juicefs" in install_cmd
assert "https://juicefs.com/static/juicefs" in install_cmd
assert "mv /tmp/juicefs /usr/local/bin/juicefs" in install_cmd
assert "$HOME/.local/bin/juicefs" in install_cmd
112 changes: 112 additions & 0 deletions api/test/api/test_remote_workspace_juicefs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os
import pytest
from unittest.mock import patch
from transformerlab.shared import remote_workspace


def test_validate_juicefs_config_raises_on_missing_metadata_url(monkeypatch):
monkeypatch.delenv("TFL_JUICEFS_METADATA_URL", raising=False)
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_STORAGE_BACKEND", "aws")
monkeypatch.setattr(remote_workspace, "STORAGE_PROVIDER", "juicefs", raising=False)

with pytest.raises(SystemExit):
remote_workspace._validate_juicefs_config()


def test_validate_juicefs_config_raises_on_missing_volume_name(monkeypatch):
monkeypatch.setenv("TFL_JUICEFS_METADATA_URL", "redis://localhost:6379/1")
monkeypatch.delenv("TFL_JUICEFS_VOLUME_NAME", raising=False)
monkeypatch.setenv("TFL_JUICEFS_STORAGE_BACKEND", "aws")
monkeypatch.setattr(remote_workspace, "STORAGE_PROVIDER", "juicefs", raising=False)

with pytest.raises(SystemExit):
remote_workspace._validate_juicefs_config()


def test_validate_juicefs_config_raises_on_missing_storage_backend(monkeypatch):
monkeypatch.setenv("TFL_JUICEFS_METADATA_URL", "redis://localhost:6379/1")
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.delenv("TFL_JUICEFS_STORAGE_BACKEND", raising=False)
monkeypatch.setattr(remote_workspace, "STORAGE_PROVIDER", "juicefs", raising=False)

with pytest.raises(SystemExit):
remote_workspace._validate_juicefs_config()


def test_validate_juicefs_config_raises_when_not_mounted(monkeypatch, tmp_path):
monkeypatch.setenv("TFL_JUICEFS_METADATA_URL", "redis://localhost:6379/1")
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_STORAGE_BACKEND", "aws")
monkeypatch.setenv("TFL_JUICEFS_MOUNT_POINT", str(tmp_path / "not_a_mount"))
monkeypatch.setattr(remote_workspace, "STORAGE_PROVIDER", "juicefs", raising=False)

with patch("transformerlab.shared.remote_workspace.os.path.ismount", return_value=False):
with pytest.raises(SystemExit):
remote_workspace._validate_juicefs_config()


def test_validate_juicefs_config_raises_on_invalid_storage_backend(monkeypatch, tmp_path):
monkeypatch.setenv("TFL_JUICEFS_METADATA_URL", "redis://localhost:6379/1")
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_STORAGE_BACKEND", "s3")
monkeypatch.setenv("TFL_JUICEFS_MOUNT_POINT", str(tmp_path))
monkeypatch.setattr(remote_workspace, "STORAGE_PROVIDER", "juicefs", raising=False)

with patch("transformerlab.shared.remote_workspace.os.path.ismount", return_value=True):
with pytest.raises(SystemExit):
remote_workspace._validate_juicefs_config()


def test_validate_juicefs_config_passes_when_valid(monkeypatch, tmp_path):
monkeypatch.setenv("TFL_JUICEFS_METADATA_URL", "redis://localhost:6379/1")
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_STORAGE_BACKEND", "aws")
monkeypatch.setenv("TFL_JUICEFS_MOUNT_POINT", str(tmp_path))
monkeypatch.setattr(remote_workspace, "STORAGE_PROVIDER", "juicefs", raising=False)

with patch("transformerlab.shared.remote_workspace.os.path.ismount", return_value=True):
remote_workspace._validate_juicefs_config() # should not raise


def test_create_juicefs_directory_creates_dir_and_sets_quota(monkeypatch, tmp_path):
monkeypatch.setenv("TFL_JUICEFS_MOUNT_POINT", str(tmp_path))
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_QUOTA_GB", "50")

with patch("transformerlab.shared.remote_workspace.subprocess.run") as mock_run:
result = remote_workspace._create_juicefs_directory("team-abc")

assert result is True
org_path = os.path.join(str(tmp_path), "orgs", "team-abc")
assert os.path.isdir(org_path)
mock_run.assert_called_once_with(
["juicefs", "quota", "set", "myvol", "--path", "/orgs/team-abc", "--capacity", "50"],
check=True,
capture_output=True,
text=True,
)


def test_create_juicefs_directory_returns_false_on_error(monkeypatch, tmp_path):
monkeypatch.setenv("TFL_JUICEFS_MOUNT_POINT", str(tmp_path))
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_QUOTA_GB", "100")

with patch("transformerlab.shared.remote_workspace.subprocess.run", side_effect=Exception("juicefs not found")):
result = remote_workspace._create_juicefs_directory("team-abc")

assert result is False


def test_create_bucket_for_team_routes_to_juicefs(monkeypatch, tmp_path):
monkeypatch.setenv("TFL_JUICEFS_MOUNT_POINT", str(tmp_path))
monkeypatch.setenv("TFL_JUICEFS_VOLUME_NAME", "myvol")
monkeypatch.setenv("TFL_JUICEFS_QUOTA_GB", "100")
monkeypatch.setattr(remote_workspace, "STORAGE_PROVIDER", "juicefs", raising=False)

with patch("transformerlab.shared.remote_workspace.subprocess.run"):
result = remote_workspace.create_bucket_for_team("team-xyz")

assert result is True
assert os.path.isdir(os.path.join(str(tmp_path), "orgs", "team-xyz"))
133 changes: 133 additions & 0 deletions api/transformerlab/services/compute_provider/launch_juicefs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""JuiceFS helpers for remote pod launch: install, auth/mount, and backing-storage credentials."""

import asyncio
import os
import shlex

from transformerlab.shared.models.models import ProviderType
from transformerlab.services.compute_provider.launch_credentials import (
RUNPOD_AWS_CREDENTIALS_DIR,
generate_aws_credentials_setup,
generate_azure_credentials_setup,
generate_gcp_credentials_setup,
get_aws_credentials_from_file,
)


def build_juicefs_install_command() -> str:
"""Return a shell snippet that installs the JuiceFS binary if not already present."""
return (
"if ! command -v juicefs >/dev/null 2>&1; then "
"curl -fsSL https://juicefs.com/static/juicefs -o /tmp/juicefs; "
"chmod +x /tmp/juicefs; "
"mv /tmp/juicefs /usr/local/bin/juicefs 2>/dev/null || "
'(mkdir -p "$HOME/.local/bin"; '
'mv /tmp/juicefs "$HOME/.local/bin/juicefs"; '
'export PATH="$HOME/.local/bin:$PATH"); '
"fi"
)


def build_juicefs_pod_config(
team_id: str,
mount_point: str,
) -> tuple[dict[str, str], str, str]:
"""Return (env_vars, mount_command, tfl_storage_uri) for a JuiceFS pod launch.

Mounts only this org's subdir so the pod cannot access other orgs' data.
tfl_storage_uri equals mount_point (already org-scoped).
"""
volume_name = os.getenv("TFL_JUICEFS_VOLUME_NAME", "")
if not volume_name:
raise ValueError("TFL_JUICEFS_VOLUME_NAME must be set when TFL_STORAGE_PROVIDER=juicefs")

env_vars: dict[str, str] = {
"TFL_JUICEFS_METADATA_URL": os.getenv("TFL_JUICEFS_METADATA_URL", ""),
"TFL_JUICEFS_VOLUME_NAME": volume_name,
"TFL_JUICEFS_MOUNT_POINT": mount_point,
"TFL_REMOTE_STORAGE_ENABLED": "true",
}

juicefs_token = os.getenv("TFL_JUICEFS_TOKEN", "")
if juicefs_token:
env_vars["TFL_JUICEFS_TOKEN"] = juicefs_token
juicefs_console_url = os.getenv("TFL_JUICEFS_CONSOLE_URL", "")
if juicefs_console_url:
env_vars["TFL_JUICEFS_CONSOLE_URL"] = juicefs_console_url

mount_cmd = (
f"mkdir -p {shlex.quote(mount_point)} && "
f"juicefs mount {shlex.quote(volume_name)} {shlex.quote(mount_point)}"
f" --subdir {shlex.quote(f'orgs/{team_id}')} --background"
)
if juicefs_token:
console_flag = ' --console-url "$TFL_JUICEFS_CONSOLE_URL"' if juicefs_console_url else ""
auth_cmd = (
'if [ -n "$ACCESS_KEY" ] && [ -n "$SECRET_KEY" ]; then '
f'juicefs auth {shlex.quote(volume_name)} --token "$TFL_JUICEFS_TOKEN"{console_flag} '
'--access-key "$ACCESS_KEY" --secret-key "$SECRET_KEY"; '
"else "
f'juicefs auth {shlex.quote(volume_name)} --token "$TFL_JUICEFS_TOKEN"{console_flag}; '
"fi"
)
mount_cmd = f"{auth_cmd} && {mount_cmd}"

return env_vars, mount_cmd, mount_point


async def build_juicefs_backend_credentials_setup(
provider_type: str,
) -> tuple[list[str], dict[str, str]]:
"""Return (setup_commands, env_vars) for the JuiceFS backing object-storage credentials.

Called unconditionally when STORAGE_PROVIDER=juicefs, regardless of
TFL_REMOTE_STORAGE_ENABLED, because JuiceFS needs its own backend credentials
even when the cloud-bucket provider path is not active.
"""
setup_commands: list[str] = []
env_vars: dict[str, str] = {}

backend = os.getenv("TFL_JUICEFS_STORAGE_BACKEND", "")
if backend == "aws":
from transformerlab.shared.remote_workspace import get_default_aws_profile

aws_profile = get_default_aws_profile()
aws_access_key_id, aws_secret_access_key = await asyncio.to_thread(get_aws_credentials_from_file, aws_profile)
if aws_access_key_id and aws_secret_access_key:
aws_credentials_dir = RUNPOD_AWS_CREDENTIALS_DIR if provider_type == ProviderType.RUNPOD.value else None
setup_commands.append(
generate_aws_credentials_setup(
aws_access_key_id,
aws_secret_access_key,
aws_profile,
aws_credentials_dir=aws_credentials_dir,
)
)
if aws_credentials_dir:
env_vars["AWS_SHARED_CREDENTIALS_FILE"] = f"{aws_credentials_dir}/credentials"
env_vars["AWS_PROFILE"] = aws_profile
env_vars["ACCESS_KEY"] = aws_access_key_id
env_vars["SECRET_KEY"] = aws_secret_access_key
elif backend == "gcp":
gcp_sa_json_path = os.getenv("TFL_GCP_SERVICE_ACCOUNT_JSON_PATH")
if gcp_sa_json_path:
setup_commands.append(generate_gcp_credentials_setup(gcp_sa_json_path))
elif backend == "azure":
azure_connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
azure_account = os.getenv("AZURE_STORAGE_ACCOUNT")
azure_key = os.getenv("AZURE_STORAGE_KEY")
azure_sas = os.getenv("AZURE_STORAGE_SAS_TOKEN")
if azure_connection_string or azure_account:
setup_commands.append(
generate_azure_credentials_setup(azure_connection_string, azure_account, azure_key, azure_sas)
)
if azure_connection_string:
env_vars["AZURE_STORAGE_CONNECTION_STRING"] = azure_connection_string
if azure_account:
env_vars["AZURE_STORAGE_ACCOUNT"] = azure_account
if azure_key:
env_vars["AZURE_STORAGE_KEY"] = azure_key
if azure_sas:
env_vars["AZURE_STORAGE_SAS_TOKEN"] = azure_sas

return setup_commands, env_vars
Loading
Loading