Skip to content

Commit c9a535a

Browse files
committed
Update gitignore logic to catch more cases
1 parent 9a0948d commit c9a535a

4 files changed

Lines changed: 345 additions & 97 deletions

File tree

pyproject.toml

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ dependencies = [
3434
"psutil",
3535
"gpuhunt==0.1.6",
3636
"argcomplete>=3.5.0",
37+
"gitignore-parser>=0.1.12",
3738
]
3839

3940
[project.urls]
@@ -55,14 +56,10 @@ dstack = "dstack._internal.cli.main:main"
5556
path = "src/dstack/version.py"
5657

5758
[tool.hatch.build.targets.sdist]
58-
artifacts = [
59-
"src/dstack/_internal/server/statics/**",
60-
]
59+
artifacts = ["src/dstack/_internal/server/statics/**"]
6160

6261
[tool.hatch.build.targets.wheel]
63-
artifacts = [
64-
"src/dstack/_internal/server/statics/**",
65-
]
62+
artifacts = ["src/dstack/_internal/server/statics/**"]
6663

6764
[tool.hatch.metadata.hooks.fancy-pypi-readme]
6865
content-type = "text/markdown"
@@ -82,13 +79,13 @@ dev = [
8279
"pre-commit>=4.2.0",
8380
"pytest-asyncio>=0.23.8",
8481
"pytest-httpbin>=2.1.0",
85-
"httpbin>=0.10.2", # indirect to make compatible with Werkzeug 3
82+
"httpbin>=0.10.2", # indirect to make compatible with Werkzeug 3
8683
"pytest~=7.2",
8784
"pytest-socket>=0.7.0",
8885
"requests-mock>=1.12.1",
8986
"openai>=1.68.2",
9087
"freezegun>=1.5.1",
91-
"ruff==0.11.6", # should match .pre-commit-config.yaml
88+
"ruff==0.11.6", # should match .pre-commit-config.yaml
9289
"testcontainers>=4.9.2",
9390
"pytest-xdist>=3.6.1",
9491
]
@@ -127,11 +124,7 @@ server = [
127124
"grpcio>=1.50",
128125
"backports.entry-points-selectable",
129126
]
130-
aws = [
131-
"boto3>=1.38.13",
132-
"botocore",
133-
"dstack[server]",
134-
]
127+
aws = ["boto3>=1.38.13", "botocore", "dstack[server]"]
135128
azure = [
136129
"azure-identity>=1.12.0",
137130
"azure-mgmt-subscription>=3.1.1",
@@ -152,19 +145,9 @@ gcp = [
152145
"google-cloud-tpu>=1.18.3",
153146
"dstack[server]",
154147
]
155-
datacrunch = [
156-
"datacrunch",
157-
"dstack[server]",
158-
]
159-
kubernetes = [
160-
"kubernetes",
161-
"dstack[server]",
162-
]
163-
lambda = [
164-
"boto3>=1.38.13",
165-
"botocore",
166-
"dstack[server]",
167-
]
148+
datacrunch = ["datacrunch", "dstack[server]"]
149+
kubernetes = ["kubernetes", "dstack[server]"]
150+
lambda = ["boto3>=1.38.13", "botocore", "dstack[server]"]
168151
oci = [
169152
"oci>=2.150.0",
170153
"cryptography>=44.0.3",
@@ -173,10 +156,7 @@ oci = [
173156
"pyopenssl>=23.2.0",
174157
"dstack[server]",
175158
]
176-
nebius = [
177-
"nebius>=0.2.19,<0.3; python_version >= '3.10'",
178-
"dstack[server]",
179-
]
159+
nebius = ["nebius>=0.2.19,<0.3; python_version >= '3.10'", "dstack[server]"]
180160
all = [
181161
"dstack[gateway,server,aws,azure,gcp,datacrunch,kubernetes,lambda,nebius,oci]",
182162
]

src/dstack/_internal/core/models/repos/local.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@
55
from typing_extensions import Literal
66

77
from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
8+
from dstack._internal.utils.common import sizeof_fmt
89
from dstack._internal.utils.hash import get_sha256, slugify
910
from dstack._internal.utils.ignore import GitIgnore
11+
from dstack._internal.utils.logging import get_logger
1012
from dstack._internal.utils.path import PathLike
1113

14+
logger = get_logger(__name__)
15+
1216

1317
class LocalRepoInfo(BaseRepoInfo):
1418
repo_type: Literal["local"] = "local"
@@ -75,6 +79,7 @@ def write_code_file(self, fp: BinaryIO) -> str:
7579
arcname="",
7680
filter=TarIgnore(self.run_repo_data.repo_dir, globs=[".git"]),
7781
)
82+
logger.debug(f"Code file size: {sizeof_fmt(fp.tell())} bytes")
7883
return get_sha256(fp)
7984

8085
def get_repo_info(self) -> LocalRepoInfo:
Lines changed: 74 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
import fnmatch
2-
from itertools import zip_longest
1+
import os
32
from pathlib import Path
4-
from typing import Dict, List, Optional
3+
from typing import List
4+
5+
from gitignore_parser import parse_gitignore_str
56

67
from dstack._internal.utils.path import PathLike
78

@@ -16,77 +17,83 @@ def __init__(
1617
if ignore_files is not None
1718
else [".gitignore", ".git/info/exclude", ".dstackignore"]
1819
)
19-
self.ignore_globs: Dict[str, List[str]] = {".": globs or []}
20-
self.load_recursive()
20+
self.parser = None
21+
self._create_combined_parser(globs or [])
2122

22-
def load_ignore_file(self, path: str, ignore_file: Path):
23-
if path != "." and not path.startswith("./"):
24-
path = "./" + path
25-
if path not in self.ignore_globs:
26-
self.ignore_globs[path] = []
27-
with ignore_file.open("r") as f:
28-
for line in f:
29-
line = self.rstrip(line.rstrip("\n")).rstrip("/")
30-
line = line.replace("\\ ", " ")
31-
if line.startswith("#") or not line:
32-
continue
33-
self.ignore_globs[path].append(line)
23+
def _create_combined_parser(self, additional_globs: List[str]):
24+
"""Create a single parser from all ignore files and additional globs."""
25+
all_patterns = []
3426

35-
def load_recursive(self, path: Optional[Path] = None):
36-
path = path or self.root_dir
37-
for ignore_file in self.ignore_files:
38-
ignore_file = path / ignore_file
39-
if ignore_file.exists():
40-
self.load_ignore_file(str(path.relative_to(self.root_dir)), ignore_file)
27+
# Collect patterns from all ignore files recursively
28+
self._collect_patterns_recursive(self.root_dir, all_patterns)
4129

42-
for subdir in path.iterdir():
43-
if not subdir.is_dir() or self.ignore(subdir.relative_to(self.root_dir)):
44-
continue
45-
self.load_recursive(subdir)
30+
# Add additional glob patterns
31+
all_patterns.extend(additional_globs)
4632

47-
@staticmethod
48-
def rstrip(value: str) -> str:
49-
end = len(value) - 1
50-
while end >= 0:
51-
if not value[end].isspace():
52-
break
53-
if end > 0 and value[end - 1] == "\\":
54-
break # escaped space
55-
end -= 1
56-
else:
57-
return ""
58-
return value[: end + 1]
33+
self.parser = parse_gitignore_str("\n".join(all_patterns), self.root_dir)
5934

60-
@staticmethod
61-
def fnmatch(name: str, pattern: str, sep="/") -> bool:
62-
if pattern.startswith(sep):
63-
name = sep + name
64-
for n, p in zip_longest(
65-
reversed(name.split(sep)), reversed(pattern.split(sep)), fillvalue=None
66-
):
67-
if p == "**":
68-
raise NotImplementedError()
69-
if p is None:
70-
return True
71-
if n is None or not fnmatch.fnmatch(n, p):
72-
return False
73-
return True
35+
def _collect_patterns_recursive(self, path: Path, patterns: List[str]):
36+
"""
37+
Recursively collect patterns from all ignore files and combine them into a single gitignore,
38+
with the root directory as the base path.
39+
"""
40+
for ignore_file_name in self.ignore_files:
41+
ignore_file = path / ignore_file_name
42+
if ignore_file.exists():
43+
try:
44+
# Get relative path from root to this directory
45+
if path == self.root_dir:
46+
prefix = ""
47+
else:
48+
prefix = path.relative_to(self.root_dir)
7449

75-
def ignore(self, path: PathLike, sep="/") -> bool:
76-
if not path:
50+
# Read patterns and prefix them with directory path
51+
with ignore_file.open("r", encoding="utf-8", errors="ignore") as f:
52+
for line in f:
53+
line = line.strip()
54+
if line and not line.startswith("#"):
55+
if prefix:
56+
# Prefix patterns with directory path for subdirectories
57+
if line.startswith("/"):
58+
# Absolute pattern within subdirectory
59+
patterns.append(os.path.join(prefix, line[1:]))
60+
else:
61+
# Relative pattern within subdirectory
62+
# Add pattern that matches files directly in the subdirectory
63+
patterns.append(os.path.join(prefix, line))
64+
# Add pattern that matches files in deeper subdirectories
65+
patterns.append(os.path.join(prefix, "**", line))
66+
else:
67+
# Root directory patterns
68+
patterns.append(line)
69+
except (OSError, UnicodeDecodeError):
70+
# Skip files we can't read
71+
continue
72+
73+
# Recursively process subdirectories
74+
# Note: We need to check if directories should be ignored, but we can't
75+
# use self.ignore() yet since we're still building the parser
76+
# So we'll process all directories and let gitignore_parser handle the logic
77+
try:
78+
for subdir in path.iterdir():
79+
if subdir.is_dir():
80+
self._collect_patterns_recursive(subdir, patterns)
81+
except (OSError, PermissionError):
82+
# Skip directories we can't read
83+
pass
84+
85+
def ignore(self, path: PathLike) -> bool:
86+
"""Check if a path should be ignored."""
87+
if not path or not self.parser:
7788
return False
89+
7890
path = Path(path)
7991
if path.is_absolute():
80-
path = path.relative_to(self.root_dir)
92+
try:
93+
path = path.relative_to(self.root_dir)
94+
except ValueError:
95+
return False
8196

82-
tokens = ("." + sep + str(path)).split(sep)
83-
for i in range(1, len(tokens)):
84-
parent = sep.join(tokens[:-i])
85-
globs = self.ignore_globs.get(parent)
86-
if not globs:
87-
continue
88-
name = sep.join(tokens[-i:])
89-
for glob in globs:
90-
if self.fnmatch(name, glob, sep=sep):
91-
return True
92-
return False
97+
# Convert to absolute path for gitignore_parser
98+
abs_path = str(self.root_dir / path)
99+
return self.parser(abs_path)

0 commit comments

Comments
 (0)