Skip to content

Commit cf21ceb

Browse files
✨ NEW: Replace gitignore-parser with ignore-python and add follow_links config
Agent-Logs-Url: https://github.com/useblocks/sphinx-codelinks/sessions/e9503678-3d44-4fee-9eb5-0d065cf4b5af Co-authored-by: chrisjsewell <2997570+chrisjsewell@users.noreply.github.com>
1 parent a2a79e4 commit cf21ceb

6 files changed

Lines changed: 111 additions & 43 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ readme = "README.md"
1212
requires-python = ">= 3.12"
1313
dependencies = [
1414
"comment-parser>=1.2.4",
15-
"gitignore-parser>=0.1.11",
15+
"ignore-python>=0.3.3",
1616
"typer>=0.16.0",
1717
"click < 8.2", # click 8.2.* produces empty errors if no args are given
1818
"jsonschema",

src/sphinx_codelinks/cmd.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def analyse( # noqa: PLR0912 # for CLI, so it needs the branches
170170

171171

172172
@app.command(no_args_is_help=True)
173-
def discover(
173+
def discover( # noqa: PLR0913 # CLI command requires multiple parameters
174174
src_dir: Annotated[
175175
Path,
176176
typer.Argument(
@@ -203,9 +203,13 @@ def discover(
203203
gitignore: Annotated[
204204
bool,
205205
typer.Option(
206-
help="Respect .gitignore in the given directory. Nested .gitignore Not supported"
206+
help="Respect .gitignore files in the given directory and its parents"
207207
),
208208
] = True,
209+
follow_links: Annotated[
210+
bool,
211+
typer.Option(help="Follow symbolic links during file discovery"),
212+
] = False,
209213
comment_type: Annotated[
210214
CommentType,
211215
typer.Option(
@@ -222,6 +226,7 @@ def discover(
222226
"exclude": exclude,
223227
"include": include,
224228
"gitignore": gitignore,
229+
"follow_links": follow_links,
225230
"comment_type": comment_type,
226231
}
227232

src/sphinx_codelinks/source_discover/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class SourceDiscoverSectionConfigType(TypedDict, total=False):
3030
exclude: list[str]
3131
include: list[str]
3232
gitignore: bool
33+
follow_links: bool
3334
comment_type: CommentType
3435

3536

@@ -40,6 +41,7 @@ class SourceDiscoverConfigType(TypedDict, total=False):
4041
exclude: list[str]
4142
include: list[str]
4243
gitignore: bool
44+
follow_links: bool
4345
comment_type: CommentType
4446

4547

@@ -69,6 +71,9 @@ def field_names(cls) -> set[str]:
6971
gitignore: bool = field(default=True, metadata={"schema": {"type": "boolean"}})
7072
"""Whether to respect .gitignore to exclude files."""
7173

74+
follow_links: bool = field(default=False, metadata={"schema": {"type": "boolean"}})
75+
"""Whether to follow symbolic links during file discovery."""
76+
7277
comment_type: str = field(
7378
default="cpp",
7479
metadata={

src/sphinx_codelinks/source_discover/source_discover.py

Lines changed: 50 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
1-
from collections.abc import Callable
2-
import fnmatch
31
import os
42
from pathlib import Path
53

6-
from gitignore_parser import ( # type: ignore[import-untyped] # library has no stub
7-
parse_gitignore,
8-
)
4+
from ignore import WalkBuilder
5+
from ignore.overrides import OverrideBuilder
96

107
from sphinx_codelinks.source_discover.config import (
118
COMMENT_FILETYPE,
@@ -17,51 +14,65 @@
1714
class SourceDiscover:
1815
def __init__(self, src_discover_config: SourceDiscoverConfig):
1916
self.src_discover_config = src_discover_config
20-
# Only gitignore at source root is considered.
21-
# TODO: Support nested gitignore files
22-
gitignore_path = self.src_discover_config.src_dir / ".gitignore"
23-
self.gitignore_matcher: Callable[[str], bool] | None = (
24-
parse_gitignore(gitignore_path)
25-
if self.src_discover_config.gitignore and gitignore_path.exists()
26-
else None
27-
)
2817
# normalize the file types to lower case with leading dot
2918
self.file_types = {
3019
f".{ext}" for ext in COMMENT_FILETYPE[src_discover_config.comment_type]
3120
}
3221

3322
self.source_paths = self._discover()
3423

24+
def _build_overrides(self) -> OverrideBuilder | None:
25+
"""Build an OverrideBuilder for include/exclude patterns.
26+
27+
Include patterns are added as whitelist globs.
28+
Exclude patterns are added as negated globs (prefixed with ``!``).
29+
"""
30+
src_dir = str(self.src_discover_config.src_dir)
31+
has_include = bool(self.src_discover_config.include)
32+
has_exclude = bool(self.src_discover_config.exclude)
33+
34+
if not has_include and not has_exclude:
35+
return None
36+
37+
ob = OverrideBuilder(src_dir)
38+
39+
if has_include:
40+
for pattern in self.src_discover_config.include:
41+
ob.add(pattern)
42+
43+
if has_exclude:
44+
for pattern in self.src_discover_config.exclude:
45+
ob.add(f"!{pattern}")
46+
47+
return ob
48+
3549
def _discover(self) -> list[Path]:
3650
"""Discover source files recursively in the given directory."""
51+
src_dir = self.src_discover_config.src_dir
52+
if not src_dir.is_dir():
53+
return []
54+
55+
builder = WalkBuilder(str(src_dir))
56+
builder.hidden(False)
57+
builder.git_ignore(self.src_discover_config.gitignore)
58+
builder.git_global(False)
59+
builder.git_exclude(False)
60+
builder.follow_links(self.src_discover_config.follow_links)
61+
62+
override_builder = self._build_overrides()
63+
if override_builder is not None:
64+
builder.overrides(override_builder.build())
65+
3766
discovered_files = []
38-
for filepath in self.src_discover_config.src_dir.rglob("*"):
39-
if filepath.is_file():
40-
if self.file_types and filepath.suffix.lower() not in self.file_types:
41-
continue
42-
rel_filepath = str(
43-
filepath.relative_to(self.src_discover_config.src_dir)
44-
)
45-
if self.src_discover_config.include and self._matches_any(
46-
rel_filepath, self.src_discover_config.include
47-
):
48-
# "includes" has the highest priority over "gitignore" and "excludes"
49-
discovered_files.append(filepath)
50-
continue
51-
if self.gitignore_matcher and self.gitignore_matcher(
52-
str(filepath.absolute())
53-
):
54-
continue
55-
if self.src_discover_config.exclude and self._matches_any(
56-
rel_filepath, self.src_discover_config.exclude
57-
):
58-
continue
59-
discovered_files.append(filepath)
67+
for entry in builder.build():
68+
filepath = entry.path()
69+
if not filepath.is_file():
70+
continue
71+
if self.file_types and filepath.suffix.lower() not in self.file_types:
72+
continue
73+
discovered_files.append(filepath.resolve())
74+
6075
sorted_filepaths = sorted(
6176
discovered_files, key=lambda x: os.path.normcase(os.path.normpath(x))
6277
)
6378
return sorted_filepaths
64-
65-
def _matches_any(self, rel_filepath: str, patterns: list[str]) -> bool:
66-
"""Check if the given file path matches any of the given patterns."""
67-
return any(fnmatch.fnmatch(rel_filepath, pattern) for pattern in patterns)

src/sphinx_codelinks/sphinx_extension/directives/src_trace.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ def get_src_files(
213213
gitignore=src_discover_config.gitignore,
214214
include=src_discover_config.include,
215215
exclude=src_discover_config.exclude,
216+
follow_links=src_discover_config.follow_links,
216217
comment_type=src_discover_config.comment_type,
217218
)
218219
source_discover = SourceDiscover(src_discover)

tests/test_source_discover.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@
6060
"Schema validation error in field 'comment_type': ['cpp', 'hpp'] is not of type 'string'"
6161
],
6262
),
63+
(
64+
{
65+
"src_dir": "/path/to/root",
66+
"follow_links": "not_a_bool",
67+
},
68+
[
69+
"Schema validation error in field 'follow_links': 'not_a_bool' is not of type 'boolean'"
70+
],
71+
),
6372
],
6473
)
6574
def test_schema_negative(config, msgs):
@@ -86,6 +95,10 @@ def test_schema_negative(config, msgs):
8695
"gitignore": True,
8796
"comment_type": "python",
8897
},
98+
{
99+
"src_dir": "/path/to/root",
100+
"follow_links": True,
101+
},
89102
],
90103
)
91104
def test_schema_positive(config):
@@ -117,7 +130,7 @@ def test_schema_positive(config):
117130
"exclude": ["charge/*.cpp"],
118131
"include": ["**/*.cpp"],
119132
},
120-
4,
133+
2,
121134
"",
122135
),
123136
(
@@ -174,3 +187,36 @@ def test_comment_filetype(
174187
)
175188
source_discover = SourceDiscover(config)
176189
assert len(source_discover.source_paths) == nums_files
190+
191+
192+
def test_follow_links(tmp_path: Path) -> None:
193+
"""Test that follow_links controls whether symbolic links are followed."""
194+
# Create a real directory with a source file
195+
real_dir = tmp_path / "real"
196+
real_dir.mkdir()
197+
(real_dir / "source.cpp").write_text("// test")
198+
199+
# Create a project directory with a symlink to the real directory
200+
project_dir = tmp_path / "project"
201+
project_dir.mkdir()
202+
(project_dir / "direct.cpp").write_text("// direct")
203+
link = project_dir / "linked"
204+
link.symlink_to(real_dir)
205+
206+
# Without follow_links, symlinked files should not be discovered
207+
config_no_follow = SourceDiscoverConfig(
208+
src_dir=project_dir, gitignore=False, follow_links=False
209+
)
210+
discover_no_follow = SourceDiscover(config_no_follow)
211+
discovered_names = {p.name for p in discover_no_follow.source_paths}
212+
assert "direct.cpp" in discovered_names
213+
assert "source.cpp" not in discovered_names
214+
215+
# With follow_links, symlinked files should be discovered
216+
config_follow = SourceDiscoverConfig(
217+
src_dir=project_dir, gitignore=False, follow_links=True
218+
)
219+
discover_follow = SourceDiscover(config_follow)
220+
discovered_names = {p.name for p in discover_follow.source_paths}
221+
assert "direct.cpp" in discovered_names
222+
assert "source.cpp" in discovered_names

0 commit comments

Comments
 (0)