|
1 | | -from collections.abc import Callable |
2 | | -import fnmatch |
3 | 1 | import os |
4 | 2 | from pathlib import Path |
5 | 3 |
|
6 | | -from gitignore_parser import ( # type: ignore[import-untyped] # library has no stub |
7 | | - parse_gitignore, |
8 | | -) |
| 4 | +from ignore import WalkBuilder |
| 5 | +from ignore.overrides import OverrideBuilder |
9 | 6 |
|
10 | 7 | from sphinx_codelinks.source_discover.config import ( |
11 | 8 | COMMENT_FILETYPE, |
|
17 | 14 | class SourceDiscover: |
18 | 15 | def __init__(self, src_discover_config: SourceDiscoverConfig): |
19 | 16 | self.src_discover_config = src_discover_config |
20 | | - # Only gitignore at source root is considered. |
21 | | - # TODO: Support nested gitignore files |
22 | | - gitignore_path = self.src_discover_config.src_dir / ".gitignore" |
23 | | - self.gitignore_matcher: Callable[[str], bool] | None = ( |
24 | | - parse_gitignore(gitignore_path) |
25 | | - if self.src_discover_config.gitignore and gitignore_path.exists() |
26 | | - else None |
27 | | - ) |
28 | 17 | # normalize the file types to lower case with leading dot |
29 | 18 | self.file_types = { |
30 | 19 | f".{ext}" for ext in COMMENT_FILETYPE[src_discover_config.comment_type] |
31 | 20 | } |
32 | 21 |
|
33 | 22 | self.source_paths = self._discover() |
34 | 23 |
|
| 24 | + def _build_overrides(self) -> OverrideBuilder | None: |
| 25 | + """Build an OverrideBuilder for include/exclude patterns. |
| 26 | +
|
| 27 | + Include patterns are added as whitelist globs. |
| 28 | + Exclude patterns are added as negated globs (prefixed with ``!``). |
| 29 | + """ |
| 30 | + src_dir = str(self.src_discover_config.src_dir) |
| 31 | + has_include = bool(self.src_discover_config.include) |
| 32 | + has_exclude = bool(self.src_discover_config.exclude) |
| 33 | + |
| 34 | + if not has_include and not has_exclude: |
| 35 | + return None |
| 36 | + |
| 37 | + ob = OverrideBuilder(src_dir) |
| 38 | + |
| 39 | + if has_include: |
| 40 | + for pattern in self.src_discover_config.include: |
| 41 | + ob.add(pattern) |
| 42 | + |
| 43 | + if has_exclude: |
| 44 | + for pattern in self.src_discover_config.exclude: |
| 45 | + ob.add(f"!{pattern}") |
| 46 | + |
| 47 | + return ob |
| 48 | + |
35 | 49 | def _discover(self) -> list[Path]: |
36 | 50 | """Discover source files recursively in the given directory.""" |
| 51 | + src_dir = self.src_discover_config.src_dir |
| 52 | + if not src_dir.is_dir(): |
| 53 | + return [] |
| 54 | + |
| 55 | + builder = WalkBuilder(str(src_dir)) |
| 56 | + builder.hidden(False) |
| 57 | + builder.git_ignore(self.src_discover_config.gitignore) |
| 58 | + builder.git_global(False) |
| 59 | + builder.git_exclude(False) |
| 60 | + builder.follow_links(self.src_discover_config.follow_links) |
| 61 | + |
| 62 | + override_builder = self._build_overrides() |
| 63 | + if override_builder is not None: |
| 64 | + builder.overrides(override_builder.build()) |
| 65 | + |
37 | 66 | discovered_files = [] |
38 | | - for filepath in self.src_discover_config.src_dir.rglob("*"): |
39 | | - if filepath.is_file(): |
40 | | - if self.file_types and filepath.suffix.lower() not in self.file_types: |
41 | | - continue |
42 | | - rel_filepath = str( |
43 | | - filepath.relative_to(self.src_discover_config.src_dir) |
44 | | - ) |
45 | | - if self.src_discover_config.include and self._matches_any( |
46 | | - rel_filepath, self.src_discover_config.include |
47 | | - ): |
48 | | - # "includes" has the highest priority over "gitignore" and "excludes" |
49 | | - discovered_files.append(filepath) |
50 | | - continue |
51 | | - if self.gitignore_matcher and self.gitignore_matcher( |
52 | | - str(filepath.absolute()) |
53 | | - ): |
54 | | - continue |
55 | | - if self.src_discover_config.exclude and self._matches_any( |
56 | | - rel_filepath, self.src_discover_config.exclude |
57 | | - ): |
58 | | - continue |
59 | | - discovered_files.append(filepath) |
| 67 | + for entry in builder.build(): |
| 68 | + filepath = entry.path() |
| 69 | + if not filepath.is_file(): |
| 70 | + continue |
| 71 | + if self.file_types and filepath.suffix.lower() not in self.file_types: |
| 72 | + continue |
| 73 | + discovered_files.append(filepath.resolve()) |
| 74 | + |
60 | 75 | sorted_filepaths = sorted( |
61 | 76 | discovered_files, key=lambda x: os.path.normcase(os.path.normpath(x)) |
62 | 77 | ) |
63 | 78 | return sorted_filepaths |
64 | | - |
65 | | - def _matches_any(self, rel_filepath: str, patterns: list[str]) -> bool: |
66 | | - """Check if the given file path matches any of the given patterns.""" |
67 | | - return any(fnmatch.fnmatch(rel_filepath, pattern) for pattern in patterns) |
|
0 commit comments