Skip to content

Commit b0fcba0

Browse files
committed
feat: add whitelist file support (-w/--whitelist-file, .treemapperwhitelist)
1 parent 6501ee4 commit b0fcba0

7 files changed

Lines changed: 76 additions & 5 deletions

File tree

src/treemapper/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import Any
66

77
from .diffctx import build_diff_context
8-
from .ignore import get_ignore_specs
8+
from .ignore import get_ignore_specs, get_whitelist_spec
99
from .tree import TreeBuildContext, build_tree
1010
from .version import __version__
1111
from .writer import write_tree_json, write_tree_markdown, write_tree_text, write_tree_yaml
@@ -31,12 +31,14 @@ def map_directory(
3131
max_file_bytes: int | None = None,
3232
ignore_file: str | Path | None = None,
3333
no_default_ignores: bool = False,
34+
whitelist_file: str | Path | None = None,
3435
) -> dict[str, Any]:
3536
root_dir = Path(path).resolve()
3637
if not root_dir.is_dir():
3738
raise ValueError(f"'{path}' is not a directory")
3839

3940
ignore_path = Path(ignore_file).resolve() if ignore_file else None
41+
whitelist_path = Path(whitelist_file).resolve() if whitelist_file else None
4042

4143
ctx = TreeBuildContext(
4244
base_dir=root_dir,
@@ -45,6 +47,7 @@ def map_directory(
4547
max_depth=max_depth,
4648
no_content=no_content,
4749
max_file_bytes=max_file_bytes,
50+
whitelist_spec=get_whitelist_spec(whitelist_path, root_dir),
4851
)
4952

5053
return {

src/treemapper/cli.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,20 @@ def _resolve_ignore_file(ignore_file_arg: str | None) -> Path | None:
8181
return ignore_file
8282

8383

84+
def _resolve_whitelist_file(whitelist_file_arg: str | None) -> Path | None:
85+
if not whitelist_file_arg:
86+
return None
87+
whitelist_file = Path(whitelist_file_arg).resolve()
88+
if not whitelist_file.is_file():
89+
_exit_error(f"Whitelist file '{whitelist_file_arg}' does not exist.")
90+
return whitelist_file
91+
92+
8493
@dataclass
8594
class ParsedArgs:
8695
root_dir: Path
8796
ignore_file: Path | None
97+
whitelist_file: Path | None
8898
output_file: Path | None
8999
no_default_ignores: bool
90100
verbosity: int
@@ -132,6 +142,7 @@ def parse_args() -> ParsedArgs:
132142
parser.add_argument("-v", "--version", action="version", version=f"%(prog)s {__version__}")
133143
parser.add_argument("directory", nargs="?", default=".", help="The directory to analyze")
134144
parser.add_argument("-i", "--ignore-file", default=None, help="Path to custom ignore file")
145+
parser.add_argument("-w", "--whitelist-file", default=None, help="Path to whitelist file (only matching files are included)")
135146
parser.add_argument(
136147
"-o",
137148
"--output-file",
@@ -211,13 +222,15 @@ def parse_args() -> ParsedArgs:
211222
output_format = "yaml" if args.format == "yml" else args.format
212223
output_file, force_stdout = _resolve_output_file(args.output_file, output_format)
213224
ignore_file = _resolve_ignore_file(args.ignore_file)
225+
whitelist_file = _resolve_whitelist_file(args.whitelist_file)
214226

215227
log_level_map = {"error": 0, "warning": 1, "info": 2, "debug": 3}
216228
verbosity = log_level_map[args.log_level]
217229

218230
return ParsedArgs(
219231
root_dir=root_dir,
220232
ignore_file=ignore_file,
233+
whitelist_file=whitelist_file,
221234
output_file=output_file,
222235
no_default_ignores=args.no_default_ignores,
223236
verbosity=verbosity,

src/treemapper/diffctx/__init__.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import pathspec
1010

11-
from ..ignore import get_ignore_specs, should_ignore
11+
from ..ignore import get_ignore_specs, get_whitelist_spec, is_whitelisted, should_ignore
1212
from ..tokens import count_tokens
1313
from .config import LIMITS
1414
from .config.extensions import CODE_EXTENSIONS, CONFIG_EXTENSIONS, DOC_EXTENSIONS
@@ -220,6 +220,7 @@ def build_diff_context(
220220
ignore_file: Path | None = None,
221221
no_default_ignores: bool = False,
222222
full: bool = False,
223+
whitelist_file: Path | None = None,
223224
) -> dict[str, Any]:
224225
_validate_inputs(root_dir, alpha, tau, budget_tokens)
225226
root_dir = root_dir.resolve()
@@ -229,6 +230,7 @@ def build_diff_context(
229230
base_rev, head_rev = split_diff_range(diff_range)
230231
is_working_tree_diff = base_rev is None and head_rev is None
231232
combined_spec = get_ignore_specs(root_dir, ignore_file, no_default_ignores, None)
233+
wl_spec = get_whitelist_spec(whitelist_file, root_dir)
232234

233235
untracked: list[Path] = []
234236
if is_working_tree_diff:
@@ -247,13 +249,15 @@ def build_diff_context(
247249
changed_files = [_normalize_path(p, root_dir) for p in changed_files]
248250
changed_files.extend(untracked)
249251
changed_files = _filter_ignored(changed_files, root_dir, combined_spec)
252+
changed_files = _filter_whitelist(changed_files, root_dir, wl_spec)
250253

251254
preferred_revs = _build_preferred_revs(base_rev, head_rev)
252255

253256
seen_frag_ids: set[FragmentId] = set()
254257
all_fragments = _process_files_for_fragments(changed_files, root_dir, preferred_revs, seen_frag_ids)
255258

256259
all_candidate_files = _collect_candidate_files(root_dir, set(changed_files), combined_spec)
260+
all_candidate_files = _filter_whitelist(all_candidate_files, root_dir, wl_spec)
257261

258262
edge_discovered = discover_all_related_files(changed_files, all_candidate_files, root_dir)
259263
edge_discovered = [_normalize_path(p, root_dir) for p in edge_discovered]
@@ -586,6 +590,24 @@ def _collect_expansion_files(
586590
return list(expansion_files)
587591

588592

593+
def _filter_whitelist(
594+
files: list[Path],
595+
root_dir: Path,
596+
wl_spec: pathspec.PathSpec | None,
597+
) -> list[Path]:
598+
if wl_spec is None:
599+
return files
600+
result: list[Path] = []
601+
for file_path in files:
602+
try:
603+
rel_path = file_path.relative_to(root_dir).as_posix()
604+
if is_whitelisted(rel_path, wl_spec):
605+
result.append(file_path)
606+
except ValueError:
607+
pass
608+
return result
609+
610+
589611
def _filter_ignored(
590612
files: list[Path],
591613
root_dir: Path,

src/treemapper/ignore.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,3 +272,28 @@ def should_ignore(relative_path_str: str, combined_spec: pathspec.PathSpec) -> b
272272
if logging.getLogger().isEnabledFor(logging.DEBUG):
273273
logging.debug("Checking ignore for '%s': %s", relative_path_str, is_ignored)
274274
return is_ignored
275+
276+
277+
DEFAULT_WHITELIST_FILENAME = ".treemapperwhitelist"
278+
279+
280+
def get_whitelist_spec(whitelist_file: Path | None, root_dir: Path | None = None) -> pathspec.PathSpec | None:
281+
effective_file = whitelist_file
282+
if not effective_file and root_dir:
283+
default = root_dir / DEFAULT_WHITELIST_FILENAME
284+
if default.is_file():
285+
effective_file = default
286+
if not effective_file:
287+
return None
288+
patterns = read_ignore_file(effective_file)
289+
if not patterns:
290+
return None
291+
return pathspec.PathSpec.from_lines("gitignore", patterns)
292+
293+
294+
def is_whitelisted(relative_path_str: str, whitelist_spec: pathspec.PathSpec | None, is_dir: bool = False) -> bool:
295+
if whitelist_spec is None:
296+
return True
297+
if is_dir:
298+
return True
299+
return whitelist_spec.match_file(relative_path_str)

src/treemapper/tree.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import pathspec
99

10-
from .ignore import should_ignore
10+
from .ignore import is_whitelisted, should_ignore
1111

1212
BINARY_DETECTION_SAMPLE_SIZE = 8192
1313
MAX_SAFE_FILE_SIZE = 100 * 1024 * 1024 # 100 MB - prevent OOM when --max-file-bytes 0
@@ -108,6 +108,7 @@ class TreeBuildContext:
108108
max_depth: int | None = None
109109
no_content: bool = False
110110
max_file_bytes: int | None = None
111+
whitelist_spec: pathspec.PathSpec | None = None
111112
_resolved_output_file: Path | None = None
112113

113114
def __post_init__(self) -> None:
@@ -161,6 +162,9 @@ def _process_entry(entry: Path, ctx: TreeBuildContext, current_depth: int) -> di
161162
if should_ignore(path_to_check, ctx.combined_spec):
162163
return None
163164

165+
if not is_whitelisted(relative_path, ctx.whitelist_spec, is_dir=is_dir):
166+
return None
167+
164168
if entry.is_symlink() or not entry.exists():
165169
logging.debug("Skipping '%s': symlink or not exists", path_to_check)
166170
return None
@@ -176,6 +180,8 @@ def _create_node(entry: Path, ctx: TreeBuildContext, current_depth: int, is_dir:
176180
children = build_tree(entry, ctx, current_depth + 1)
177181
if children:
178182
node["children"] = children
183+
elif ctx.whitelist_spec is not None:
184+
return None
179185
elif not ctx.no_content:
180186
node["content"] = _read_file_content(entry, ctx.max_file_bytes)
181187

src/treemapper/treemapper.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ def _build_diff_tree(args: ParsedArgs) -> dict[str, Any]:
2828
ignore_file=args.ignore_file,
2929
no_default_ignores=args.no_default_ignores,
3030
full=args.full_diff,
31+
whitelist_file=args.whitelist_file,
3132
)
3233
except GitError as e:
3334
print(f"Error: {e}", file=sys.stderr)
3435
sys.exit(1)
3536

3637

3738
def _build_standard_tree(args: ParsedArgs) -> dict[str, Any]:
38-
from .ignore import get_ignore_specs
39+
from .ignore import get_ignore_specs, get_whitelist_spec
3940
from .tree import TreeBuildContext, build_tree
4041

4142
ctx = TreeBuildContext(
@@ -45,6 +46,7 @@ def _build_standard_tree(args: ParsedArgs) -> dict[str, Any]:
4546
max_depth=args.max_depth,
4647
no_content=args.no_content,
4748
max_file_bytes=args.max_file_bytes,
49+
whitelist_spec=get_whitelist_spec(args.whitelist_file, args.root_dir),
4850
)
4951
return {
5052
"name": args.root_dir.name,

src/treemapper/writer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def _write_yaml_content(file: TextIO, content: str, base_indent: str) -> None:
7575
content_indent = base_indent + " "
7676
if not content:
7777
file.write(f'{base_indent}content: ""\n')
78-
elif _has_problematic_chars(content):
78+
elif _has_problematic_chars(content) or not content.strip():
7979
file.write(f'{base_indent}content: "{_escape_yaml_content(content)}"\n')
8080
else:
8181
file.write(f"{base_indent}content: |2\n")

0 commit comments

Comments
 (0)