diff --git a/.gitignore b/.gitignore
index 344e57de05..c36ec92c3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,6 @@ build/
.fleet
.env
.aider*
+uv.lock
+.local/
+.claude/settings.local.json
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000..9aba61b002
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,162 @@
+[project]
+name = "dstack"
+dynamic = ["version", "readme"]
+authors = [{ name = "Andrey Cheptsov", email = "andrey@dstack.ai" }]
+description = "dstack is an open-source orchestration engine for running AI workloads on any cloud or on-premises."
+requires-python = ">=3.9"
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+ "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+ "Programming Language :: Python :: 3",
+]
+dependencies = [
+ "pyyaml",
+ "requests",
+ "typing-extensions>=4.0.0",
+ "cryptography",
+ "packaging",
+ "python-dateutil",
+ "cachetools",
+ "gitpython",
+ "jsonschema",
+ "paramiko>=3.2.0",
+ "cursor",
+ "rich",
+ "rich-argparse",
+ "tqdm",
+ "simple-term-menu",
+ "pydantic>=1.10.10,<2.0.0",
+ "pydantic-duality>=1.2.4",
+ "websocket-client",
+ "python-multipart>=0.0.16",
+ "filelock",
+ "psutil",
+ "gpuhunt==0.1.6",
+ "argcomplete>=3.5.0",
+ "gitignore-parser>=0.1.12",
+]
+
+[project.urls]
+Homepage = "https://dstack.ai"
+Source = "https://github.com/dstackai/dstack"
+Documentation = "https://dstack.ai/docs"
+Issues = "https://github.com/dstackai/dstack/issues"
+Changelog = "https://github.com/dstackai/dstack/releases"
+Discord = "https://discord.gg/u8SmfwPpMd"
+
+[build-system]
+requires = ["hatchling", "hatch-fancy-pypi-readme"]
+build-backend = "hatchling.build"
+
+[project.scripts]
+dstack = "dstack._internal.cli.main:main"
+
+[tool.hatch.version]
+path = "src/dstack/version.py"
+
+[tool.hatch.build.targets.sdist]
+artifacts = ["src/dstack/_internal/server/statics/**"]
+
+[tool.hatch.build.targets.wheel]
+artifacts = ["src/dstack/_internal/server/statics/**"]
+
+[tool.hatch.metadata.hooks.fancy-pypi-readme]
+content-type = "text/markdown"
+
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
+path = "README.md"
+
+[[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
+pattern = '\s*|]*>\s*|\s*|\s*|### Demo\s*'
+replacement = ''
+ignore-case = true
+
+[dependency-groups]
+dev = [
+ "build>=1.2.2.post1",
+ "httpx>=0.28.1",
+ "pre-commit>=4.2.0",
+ "pytest-asyncio>=0.23.8",
+ "pytest-httpbin>=2.1.0",
+ "httpbin>=0.10.2", # indirect to make compatible with Werkzeug 3
+ "pytest~=7.2",
+ "pytest-socket>=0.7.0",
+ "requests-mock>=1.12.1",
+ "openai>=1.68.2",
+ "freezegun>=1.5.1",
+ "ruff==0.11.6", # should match .pre-commit-config.yaml
+ "testcontainers>=4.9.2",
+ "pytest-xdist>=3.6.1",
+]
+
+[project.optional-dependencies]
+gateway = [
+ "fastapi",
+ "starlette>=0.26.0",
+ "uvicorn",
+ "aiorwlock",
+ "aiocache",
+ "httpx",
+ "jinja2",
+]
+server = [
+ "fastapi",
+ "starlette>=0.26.0",
+ "uvicorn",
+ "aiorwlock",
+ "aiocache",
+ "httpx",
+ "jinja2",
+ "watchfiles",
+ "sqlalchemy[asyncio]>=2.0.0",
+ "sqlalchemy_utils>=0.40.0",
+ "alembic>=1.10.2",
+ "apscheduler<4",
+ "aiosqlite",
+ "docker>=6.0.0",
+ "python-dxf==12.1.0",
+ "sentry-sdk[fastapi]",
+ "alembic-postgresql-enum",
+ "asyncpg",
+ "python-json-logger>=3.1.0",
+ "prometheus-client",
+ "grpcio>=1.50",
+ "backports.entry-points-selectable",
+]
+aws = ["boto3>=1.38.13", "botocore", "dstack[server]"]
+azure = [
+ "azure-identity>=1.12.0",
+ "azure-mgmt-subscription>=3.1.1",
+ "azure-mgmt-compute>=29.1.0",
+ "azure-mgmt-network>=23.0.0,<28.0.0",
+ "azure-mgmt-resource>=22.0.0",
+ "azure-mgmt-authorization>=3.0.0",
+ "azure-mgmt-msi>=7.0.0",
+ "dstack[server]",
+]
+gcp = [
+ "google-auth>=2.3.0",
+ "google-cloud-storage>=2.0.0",
+ "google-cloud-compute>=1.5.0",
+ "google-cloud-logging>=2.0.0",
+ "google-api-python-client>=2.80.0",
+ "google-cloud-billing>=1.11.0",
+ "google-cloud-tpu>=1.18.3",
+ "dstack[server]",
+]
+datacrunch = ["datacrunch", "dstack[server]"]
+kubernetes = ["kubernetes", "dstack[server]"]
+lambda = ["boto3>=1.38.13", "botocore", "dstack[server]"]
+oci = [
+ "oci>=2.150.0",
+ "cryptography>=44.0.3",
+ # pyopenssl is indirect to avoid uv falling back to the old version
+ # due to an upper limit from oci
+ "pyopenssl>=23.2.0",
+ "dstack[server]",
+]
+nebius = ["nebius>=0.2.19,<0.3; python_version >= '3.10'", "dstack[server]"]
+all = [
+ "dstack[gateway,server,aws,azure,gcp,datacrunch,kubernetes,lambda,nebius,oci]",
+]
diff --git a/src/dstack/_internal/core/models/repos/local.py b/src/dstack/_internal/core/models/repos/local.py
index 1bc815f127..cbeb077492 100644
--- a/src/dstack/_internal/core/models/repos/local.py
+++ b/src/dstack/_internal/core/models/repos/local.py
@@ -5,10 +5,14 @@
from typing_extensions import Literal
from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
+from dstack._internal.utils.common import sizeof_fmt
from dstack._internal.utils.hash import get_sha256, slugify
from dstack._internal.utils.ignore import GitIgnore
+from dstack._internal.utils.logging import get_logger
from dstack._internal.utils.path import PathLike
+logger = get_logger(__name__)
+
class LocalRepoInfo(BaseRepoInfo):
repo_type: Literal["local"] = "local"
@@ -75,6 +79,8 @@ def write_code_file(self, fp: BinaryIO) -> str:
arcname="",
filter=TarIgnore(self.run_repo_data.repo_dir, globs=[".git"]),
)
+
+ logger.debug(f"Code file size: {sizeof_fmt(fp.tell())} bytes")
return get_sha256(fp)
def get_repo_info(self) -> LocalRepoInfo:
diff --git a/src/dstack/_internal/utils/ignore.py b/src/dstack/_internal/utils/ignore.py
index cfe83e2e77..2d55e84a38 100644
--- a/src/dstack/_internal/utils/ignore.py
+++ b/src/dstack/_internal/utils/ignore.py
@@ -1,7 +1,8 @@
-import fnmatch
-from itertools import zip_longest
+import os
from pathlib import Path
-from typing import Dict, List, Optional
+from typing import List
+
+from gitignore_parser import parse_gitignore_str
from dstack._internal.utils.path import PathLike
@@ -16,75 +17,83 @@ def __init__(
if ignore_files is not None
else [".gitignore", ".git/info/exclude", ".dstackignore"]
)
- self.ignore_globs: Dict[str, List[str]] = {".": globs or []}
- self.load_recursive()
+ self.parser = None
+ self._create_combined_parser(globs or [])
- def load_ignore_file(self, path: str, ignore_file: Path):
- if path not in self.ignore_globs:
- self.ignore_globs[path] = []
- with ignore_file.open("r") as f:
- for line in f:
- line = self.rstrip(line.rstrip("\n")).rstrip("/")
- line = line.replace("\\ ", " ")
- if line.startswith("#") or not line:
- continue
- self.ignore_globs[path].append(line)
+ def _create_combined_parser(self, additional_globs: List[str]):
+ """Create a single parser from all ignore files and additional globs."""
+ all_patterns = []
- def load_recursive(self, path: Optional[Path] = None):
- path = path or self.root_dir
- for ignore_file in self.ignore_files:
- ignore_file = path / ignore_file
- if ignore_file.exists():
- self.load_ignore_file(str(path.relative_to(self.root_dir)), ignore_file)
+ # Collect patterns from all ignore files recursively
+ self._collect_patterns_recursive(self.root_dir, all_patterns)
- for subdir in path.iterdir():
- if not subdir.is_dir() or self.ignore(subdir.relative_to(self.root_dir)):
- continue
- self.load_recursive(subdir)
+ # Add additional glob patterns
+ all_patterns.extend(additional_globs)
- @staticmethod
- def rstrip(value: str) -> str:
- end = len(value) - 1
- while end >= 0:
- if not value[end].isspace():
- break
- if end > 0 and value[end - 1] == "\\":
- break # escaped space
- end -= 1
- else:
- return ""
- return value[: end + 1]
+ self.parser = parse_gitignore_str("\n".join(all_patterns), self.root_dir)
- @staticmethod
- def fnmatch(name: str, pattern: str, sep="/") -> bool:
- if pattern.startswith(sep):
- name = sep + name
- for n, p in zip_longest(
- reversed(name.split(sep)), reversed(pattern.split(sep)), fillvalue=None
- ):
- if p == "**":
- raise NotImplementedError()
- if p is None:
- return True
- if n is None or not fnmatch.fnmatch(n, p):
- return False
- return True
+ def _collect_patterns_recursive(self, path: Path, patterns: List[str]):
+ """
+ Recursively collect patterns from all ignore files and combine them into a single gitignore,
+ with the root directory as the base path.
+ """
+ for ignore_file_name in self.ignore_files:
+ ignore_file = path / ignore_file_name
+ if ignore_file.exists():
+ try:
+ # Get relative path from root to this directory
+ if path == self.root_dir:
+ prefix = ""
+ else:
+ prefix = path.relative_to(self.root_dir)
- def ignore(self, path: PathLike, sep="/") -> bool:
- if not path:
+ # Read patterns and prefix them with directory path
+ with ignore_file.open("r", encoding="utf-8", errors="ignore") as f:
+ for line in f:
+ line = line.strip()
+ if line and not line.startswith("#"):
+ if prefix:
+ # Prefix patterns with directory path for subdirectories
+ if line.startswith("/"):
+ # Absolute pattern within subdirectory
+ patterns.append(os.path.join(prefix, line[1:]))
+ else:
+ # Relative pattern within subdirectory
+ # Add pattern that matches files directly in the subdirectory
+ patterns.append(os.path.join(prefix, line))
+ # Add pattern that matches files in deeper subdirectories
+ patterns.append(os.path.join(prefix, "**", line))
+ else:
+ # Root directory patterns
+ patterns.append(line)
+ except (OSError, UnicodeDecodeError):
+ # Skip files we can't read
+ continue
+
+ # Recursively process subdirectories
+ # Note: We need to check if directories should be ignored, but we can't
+ # use self.ignore() yet since we're still building the parser
+ # So we'll process all directories and let gitignore_parser handle the logic
+ try:
+ for subdir in path.iterdir():
+ if subdir.is_dir():
+ self._collect_patterns_recursive(subdir, patterns)
+ except (OSError, PermissionError):
+ # Skip directories we can't read
+ pass
+
+ def ignore(self, path: PathLike) -> bool:
+ """Check if a path should be ignored."""
+ if not path or not self.parser:
return False
+
path = Path(path)
if path.is_absolute():
- path = path.relative_to(self.root_dir)
+ try:
+ path = path.relative_to(self.root_dir)
+ except ValueError:
+ return False
- tokens = ("." + sep + str(path)).split(sep)
- for i in range(1, len(tokens)):
- parent = sep.join(tokens[:-i])
- globs = self.ignore_globs.get(parent)
- if not globs:
- continue
- name = sep.join(tokens[-i:])
- for glob in globs:
- if self.fnmatch(name, glob, sep=sep):
- return True
- return False
+ # Convert to absolute path for gitignore_parser
+ abs_path = str(self.root_dir / path)
+ return self.parser(abs_path)
diff --git a/src/tests/_internal/utils/test_gitignore.py b/src/tests/_internal/utils/test_gitignore.py
new file mode 100644
index 0000000000..602a677722
--- /dev/null
+++ b/src/tests/_internal/utils/test_gitignore.py
@@ -0,0 +1,256 @@
+import tempfile
+from pathlib import Path
+
+from dstack._internal.utils.ignore import GitIgnore
+
+
+class TestGitIgnore:
+ def test_basic_gitignore_functionality(self):
+ """Test basic .gitignore pattern matching."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create .gitignore
+ gitignore_file = test_dir / ".gitignore"
+ gitignore_file.write_text("*.log\ntemp/\n__pycache__/\n")
+
+ git_ignore = GitIgnore(test_dir)
+
+ # Test file patterns
+ assert git_ignore.ignore("test.log") is True
+ assert git_ignore.ignore("debug.log") is True
+ assert git_ignore.ignore("test.txt") is False
+ assert git_ignore.ignore("script.py") is False
+
+ # Test directory patterns
+ assert git_ignore.ignore("temp") is True
+ assert git_ignore.ignore("temp/") is True
+ assert git_ignore.ignore("temp/file.txt") is True
+ assert git_ignore.ignore("__pycache__") is True
+ assert git_ignore.ignore("__pycache__/module.pyc") is True
+
+ def test_nested_gitignore_files(self):
+ """Test that nested .gitignore files are loaded recursively."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Root .gitignore
+ (test_dir / ".gitignore").write_text("*.log\n")
+
+ # Nested directory with its own .gitignore
+ subdir = test_dir / "subdir"
+ subdir.mkdir()
+ (subdir / ".gitignore").write_text("*.tmp\n")
+
+ # Create actual files for testing (gitignore_parser may need them)
+ (test_dir / "test.log").touch()
+ (subdir / "test.log").touch()
+ (subdir / "file.tmp").touch()
+ (test_dir / "file.tmp").touch()
+
+ git_ignore = GitIgnore(test_dir)
+
+ # Test patterns from root .gitignore
+ assert git_ignore.ignore("test.log") is True
+ assert git_ignore.ignore("subdir/test.log") is True
+
+ # Test patterns from nested .gitignore
+ assert git_ignore.ignore("subdir/file.tmp") is True
+ # Files outside the subdir should not be matched by subdir's .gitignore
+ assert git_ignore.ignore("file.tmp") is False
+
+ def test_dstackignore_file(self):
+ """Test that .dstackignore files are processed."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create .dstackignore
+ dstackignore_file = test_dir / ".dstackignore"
+ dstackignore_file.write_text("*.cache\ndata/\n")
+
+ git_ignore = GitIgnore(test_dir)
+
+ assert git_ignore.ignore("file.cache") is True
+ assert git_ignore.ignore("data") is True
+ assert git_ignore.ignore("data/dataset.csv") is True
+ assert git_ignore.ignore("file.txt") is False
+
+ def test_git_info_exclude(self):
+ """Test that .git/info/exclude files are processed."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create .git/info/exclude in the root directory
+ git_info_dir = test_dir / ".git" / "info"
+ git_info_dir.mkdir(parents=True)
+ exclude_file = git_info_dir / "exclude"
+ exclude_file.write_text("*.exclude\nbuild/\n")
+
+ git_ignore = GitIgnore(test_dir)
+
+ # .git/info/exclude should apply to the entire repository
+ assert git_ignore.ignore("file.exclude") is True
+ assert git_ignore.ignore("build") is True
+ assert git_ignore.ignore("build/output.txt") is True
+ assert git_ignore.ignore("subdir/file.exclude") is True
+ assert git_ignore.ignore("file.txt") is False
+
+ def test_custom_ignore_files(self):
+ """Test custom ignore file names."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create custom ignore file
+ custom_ignore = test_dir / ".myignore"
+ custom_ignore.write_text("*.custom\n")
+
+ git_ignore = GitIgnore(test_dir, ignore_files=[".myignore"])
+
+ assert git_ignore.ignore("file.custom") is True
+ assert git_ignore.ignore("file.txt") is False
+
+ def test_additional_globs(self):
+ """Test additional glob patterns passed to constructor."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ git_ignore = GitIgnore(test_dir, globs=["*.pyc", "node_modules/"])
+
+ assert git_ignore.ignore("module.pyc") is True
+ assert git_ignore.ignore("node_modules") is True
+ assert git_ignore.ignore("node_modules/package.json") is True
+ assert git_ignore.ignore("script.py") is False
+
+ def test_combined_ignore_sources(self):
+ """Test combination of .gitignore, custom files, and globs."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create .gitignore
+ (test_dir / ".gitignore").write_text("*.log\n")
+
+ # Create .dstackignore
+ (test_dir / ".dstackignore").write_text("*.cache\n")
+
+ git_ignore = GitIgnore(test_dir, globs=["*.tmp"])
+
+ assert git_ignore.ignore("file.log") is True # from .gitignore
+ assert git_ignore.ignore("file.cache") is True # from .dstackignore
+ assert git_ignore.ignore("file.tmp") is True # from globs
+ assert git_ignore.ignore("file.txt") is False
+
+ def test_absolute_paths(self):
+ """Test handling of absolute paths."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create .gitignore
+ (test_dir / ".gitignore").write_text("*.log\n")
+
+ git_ignore = GitIgnore(test_dir)
+
+ # Test absolute path within repo
+ abs_path = test_dir / "test.log"
+ assert git_ignore.ignore(abs_path) is True
+
+ # Test absolute path outside repo
+ outside_path = Path(tmpdir) / "outside.log"
+ assert git_ignore.ignore(outside_path) is False
+
+ def test_empty_path(self):
+ """Test handling of empty paths."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ git_ignore = GitIgnore(test_dir)
+
+ assert git_ignore.ignore("") is False
+ assert git_ignore.ignore(None) is False
+
+ def test_nonexistent_ignore_files(self):
+ """Test that nonexistent ignore files are handled gracefully."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # No ignore files exist
+ git_ignore = GitIgnore(test_dir)
+
+ # Should not ignore anything
+ assert git_ignore.ignore("any_file.txt") is False
+ assert git_ignore.ignore("any_dir/") is False
+
+ def test_malformed_ignore_files(self):
+ """Test handling of malformed ignore files."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create a file that might cause parsing issues
+ gitignore_file = test_dir / ".gitignore"
+ gitignore_file.write_text("*.log\n# comment\n\n \n*.tmp\n")
+
+ git_ignore = GitIgnore(test_dir)
+
+ # Should still work for valid patterns
+ assert git_ignore.ignore("test.log") is True
+ assert git_ignore.ignore("test.tmp") is True
+ assert git_ignore.ignore("test.txt") is False
+
+ def test_directory_traversal_stops_at_ignored_dirs(self):
+ """Test that ignored directories don't have their subdirectories processed."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ # Create root .gitignore that ignores 'ignored_dir'
+ (test_dir / ".gitignore").write_text("ignored_dir/\n")
+
+ # Create ignored directory with its own .gitignore
+ ignored_dir = test_dir / "ignored_dir"
+ ignored_dir.mkdir()
+ (ignored_dir / ".gitignore").write_text("*.should_not_apply\n")
+
+ # Create a subdirectory in the ignored directory
+ subdir = ignored_dir / "subdir"
+ subdir.mkdir()
+ (subdir / ".gitignore").write_text("*.also_should_not_apply\n")
+
+ git_ignore = GitIgnore(test_dir)
+
+ # The ignored directory itself should be ignored
+ assert git_ignore.ignore("ignored_dir") is True
+ assert git_ignore.ignore("ignored_dir/file.txt") is True
+
+ # Patterns from .gitignore files inside ignored directories should not apply
+ # to files outside those directories
+ assert git_ignore.ignore("file.should_not_apply") is False
+ assert git_ignore.ignore("file.also_should_not_apply") is False
+
+ def test_relative_path_handling(self):
+ """Test various relative path formats."""
+ with tempfile.TemporaryDirectory() as tmpdir:
+ test_dir = Path(tmpdir) / "test"
+ test_dir.mkdir()
+
+ (test_dir / ".gitignore").write_text("*.log\ntemp/\n")
+
+ git_ignore = GitIgnore(test_dir)
+
+ # Test different path formats
+ assert git_ignore.ignore("file.log") is True
+ assert git_ignore.ignore("./file.log") is True
+ assert git_ignore.ignore("subdir/file.log") is True
+ assert git_ignore.ignore("./subdir/file.log") is True
+ assert git_ignore.ignore("temp") is True
+ assert git_ignore.ignore("./temp") is True
+ assert git_ignore.ignore("temp/") is True