diff --git a/pyproject.toml b/pyproject.toml index d66185fc4..5c143cf5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "psutil", "gpuhunt==0.1.6", "argcomplete>=3.5.0", + "ignore-python>=0.2.0", ] [project.urls] diff --git a/src/dstack/_internal/core/models/repos/local.py b/src/dstack/_internal/core/models/repos/local.py index 1bc815f12..b0a5e0c59 100644 --- a/src/dstack/_internal/core/models/repos/local.py +++ b/src/dstack/_internal/core/models/repos/local.py @@ -2,13 +2,18 @@ from pathlib import Path from typing import BinaryIO, Optional +import ignore +import ignore.overrides from typing_extensions import Literal from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo +from dstack._internal.utils.common import sizeof_fmt from dstack._internal.utils.hash import get_sha256, slugify -from dstack._internal.utils.ignore import GitIgnore +from dstack._internal.utils.logging import get_logger from dstack._internal.utils.path import PathLike +logger = get_logger(__name__) + class LocalRepoInfo(BaseRepoInfo): repo_type: Literal["local"] = "local" @@ -69,22 +74,23 @@ def __init__( self.run_repo_data = repo_data def write_code_file(self, fp: BinaryIO) -> str: + repo_path = Path(self.run_repo_data.repo_dir) with tarfile.TarFile(mode="w", fileobj=fp) as t: - t.add( - self.run_repo_data.repo_dir, - arcname="", - filter=TarIgnore(self.run_repo_data.repo_dir, globs=[".git"]), - ) + for entry in ( + ignore.WalkBuilder(repo_path) + .overrides(ignore.overrides.OverrideBuilder(repo_path).add("!/.git/").build()) + .hidden(False) # do not ignore files that start with a dot + .require_git(False) # respect git ignore rules even if not a git repo + .add_custom_ignore_filename(".dstackignore") + .build() + ): + path = entry.path().relative_to(repo_path.absolute()) + if path != Path("."): + t.add(path, recursive=False) + logger.debug("Code file size: %s", sizeof_fmt(fp.tell())) return get_sha256(fp) def get_repo_info(self) -> LocalRepoInfo: return LocalRepoInfo( repo_dir=self.run_repo_data.repo_dir, ) - - -class TarIgnore(GitIgnore): - def __call__(self, tarinfo: tarfile.TarInfo) -> Optional[tarfile.TarInfo]: - if self.ignore(tarinfo.path): - return None - return tarinfo diff --git a/src/dstack/_internal/utils/ignore.py b/src/dstack/_internal/utils/ignore.py deleted file mode 100644 index f3be17f63..000000000 --- a/src/dstack/_internal/utils/ignore.py +++ /dev/null @@ -1,92 +0,0 @@ -import fnmatch -from itertools import zip_longest -from pathlib import Path -from typing import Dict, List, Optional - -from dstack._internal.utils.path import PathLike - - -class GitIgnore: - def __init__( - self, root_dir: PathLike, ignore_files: List[str] = None, globs: List[str] = None - ): - self.root_dir = Path(root_dir) - self.ignore_files = ( - ignore_files - if ignore_files is not None - else [".gitignore", ".git/info/exclude", ".dstackignore"] - ) - self.ignore_globs: Dict[str, List[str]] = {".": globs or []} - self.load_recursive() - - def load_ignore_file(self, path: str, ignore_file: Path): - if path != "." and not path.startswith("./"): - path = "./" + path - if path not in self.ignore_globs: - self.ignore_globs[path] = [] - with ignore_file.open("r") as f: - for line in f: - line = self.rstrip(line.rstrip("\n")).rstrip("/") - line = line.replace("\\ ", " ") - if line.startswith("#") or not line: - continue - self.ignore_globs[path].append(line) - - def load_recursive(self, path: Optional[Path] = None): - path = path or self.root_dir - for ignore_file in self.ignore_files: - ignore_file = path / ignore_file - if ignore_file.exists(): - self.load_ignore_file(str(path.relative_to(self.root_dir)), ignore_file) - - for subdir in path.iterdir(): - if not subdir.is_dir() or self.ignore(subdir.relative_to(self.root_dir)): - continue - self.load_recursive(subdir) - - @staticmethod - def rstrip(value: str) -> str: - end = len(value) - 1 - while end >= 0: - if not value[end].isspace(): - break - if end > 0 and value[end - 1] == "\\": - break # escaped space - end -= 1 - else: - return "" - return value[: end + 1] - - @staticmethod - def fnmatch(name: str, pattern: str, sep="/") -> bool: - if pattern.startswith(sep): - name = sep + name - for n, p in zip_longest( - reversed(name.split(sep)), reversed(pattern.split(sep)), fillvalue=None - ): - if p == "**": - raise NotImplementedError() - if p is None: - return True - if n is None or not fnmatch.fnmatch(n, p): - return False - return True - - def ignore(self, path: PathLike, sep="/") -> bool: - if not path: - return False - path = Path(path) - if path.is_absolute(): - path = path.relative_to(self.root_dir) - - tokens = ("." + sep + str(path)).split(sep) - for i in range(1, len(tokens)): - parent = sep.join(tokens[:-i]) - globs = self.ignore_globs.get(parent) - if not globs: - continue - name = sep.join(tokens[-i:]) - for glob in globs: - if self.fnmatch(name, glob, sep=sep): - return True - return False