Skip to content

Commit 0858b52

Browse files
committed
feat(diffctx): restore domain stopwords, add JS forward import discovery
Restore ~550 domain-specific stopwords (React hooks, IaC terms, OOP keywords, SQL, networking, etc.) that were removed in the stopwords rewrite. Add forward import resolution to JS/TS edge builder so changed files discover their own import targets, not just importers. Raises quality floor from 83% to 87% (avg 87.2%, 524 perfect).
1 parent 22dc631 commit 0858b52

837 files changed

Lines changed: 108360 additions & 45812 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,6 @@ benchmarks/results/
405405
*.prof
406406
*.lprof
407407
profile_*.txt
408+
409+
# Quality score tracking
410+
.scores/

.gitleaks.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@ paths = [
1616
'''tests/cases/diff/fragments/fragments_020_config_json_top_level_keys\.yaml''',
1717
'''tests/cases/diff/helm/helm_025_template_secret\.yaml''',
1818
'''tests/cases/diff/kubernetes/kubernetes_002_replicas_change\.yaml''',
19+
'''tests/cases/diff/kubernetes/kubernetes_026_ingress_rules\.yaml''',
20+
'''tests/cases/diff/kubernetes/kubernetes_027_ingress_tls\.yaml''',
1921
]

.pre-commit-config.yaml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,16 +164,6 @@ repos:
164164
]
165165
files: ^src/
166166

167-
# ============================================================================
168-
# SPELL CHECKING
169-
# ============================================================================
170-
- repo: https://github.com/codespell-project/codespell
171-
rev: v2.4.1
172-
hooks:
173-
- id: codespell
174-
args: ["--write-changes", "--ignore-words-list=crate,nd,ser,llm,async,cli,theses,datas,hel,afterall,aftereach"]
175-
exclude: ^(\.git/|\.venv/|venv/)
176-
177167
# ============================================================================
178168
# YAML LINTING
179169
# ============================================================================

.treemapper/review-ux.whitelist

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# UX / Usability Review
2+
# CLI interface, output formatting, user feedback, error handling
3+
4+
src/treemapper/cli.py
5+
src/treemapper/writer.py
6+
src/treemapper/logger.py
7+
src/treemapper/clipboard.py
8+
src/treemapper/tokens.py
9+
src/treemapper/version.py
10+
src/treemapper/diffctx/render.py
11+
src/treemapper/__init__.py
12+
README.md

PAPER_DEVIATIONS.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Deviations from Paper
2+
3+
This document tracks implementation deviations from the research
4+
paper [Context-Selection for Git Diff](https://nikolay-eremeev.com/blog/context-selection-git-diff/).
5+
6+
## 1. Caller Importance Weighting for Impact Needs
7+
8+
**Paper reference:** Section 4.2.1 (Impact need scoring)
9+
10+
**Problem:** The paper's `m(f, n)` assigns a flat 0.8 to any
11+
fragment that mentions a symbol for `impact` needs. This cannot
12+
distinguish production callers (`handler.ts`) from peripheral
13+
code (`examples/parsing.ts`) — both receive identical scores.
14+
15+
**Extension:** For impact needs only, the match strength is
16+
scaled by a file importance factor:
17+
18+
```text
19+
m'(f, n) = m(f, n) * I(f) where n.type == "impact"
20+
```
21+
22+
`I(f)` is computed from three layers:
23+
24+
| Layer | Signal | Importance |
25+
|-------|--------|------------|
26+
| Path patterns | `examples/`, `demo/`, `vendor/`, etc. | 0.15 |
27+
| Generated code | `generated/`, `__generated__/` paths | 0.10 |
28+
| Script dirs | `scripts/`, `tools/`, `bin/` | 0.40 |
29+
| Graph topology | Leaf node (in=0, out>0) | 0.25 |
30+
| Graph topology | Isolated (in=0, out=0) | 0.50 |
31+
| Graph topology | Production (in>0) | min(1.0, 0.7 + 0.1*in) |
32+
33+
Path-based layers take priority over graph topology.
34+
35+
**Submodularity preservation:** Since `I(f) in [0, 1]` is a
36+
constant per-fragment multiplier, `m'(f, n) <= m(f, n)`. The
37+
augmented score `a(f, n) = m'(f, n) + eta * R(f)` remains
38+
monotone submodular — scaling a nonneg input to `phi(max(...))` by
39+
a constant in [0, 1] preserves concavity of the max-of-concave
40+
composition.
41+
42+
**Scope:** Only impact needs are affected. Definition, signature,
43+
test, invariant, and background needs use unmodified `m(f, n)`.

src/treemapper/__init__.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import io
4+
import logging
45
from pathlib import Path
56
from typing import Any
67

@@ -10,19 +11,35 @@
1011
from .version import __version__
1112
from .writer import write_tree_json, write_tree_markdown, write_tree_text, write_tree_yaml
1213

14+
logging.getLogger("treemapper").addHandler(logging.NullHandler())
15+
1316
__all__ = [
1417
"__version__",
1518
"build_diff_context",
1619
"map_directory",
1720
"to_json",
1821
"to_markdown",
19-
"to_md",
2022
"to_text",
21-
"to_txt",
2223
"to_yaml",
2324
]
2425

2526

27+
def _root_display_name(user_path: str | Path, resolved: Path) -> str:
28+
original_name = Path(user_path).name
29+
if original_name:
30+
return original_name
31+
return str(resolved)
32+
33+
34+
def _resolve_path_if_exists(path: str | Path | None, label: str) -> Path | None:
35+
if path is None:
36+
return None
37+
resolved = Path(path).resolve()
38+
if not resolved.is_file():
39+
raise FileNotFoundError(f"{label} '{path}' does not exist")
40+
return resolved
41+
42+
2643
def map_directory(
2744
path: str | Path,
2845
*,
@@ -37,8 +54,8 @@ def map_directory(
3754
if not root_dir.is_dir():
3855
raise ValueError(f"'{path}' is not a directory")
3956

40-
ignore_path = Path(ignore_file).resolve() if ignore_file else None
41-
whitelist_path = Path(whitelist_file).resolve() if whitelist_file else None
57+
ignore_path = _resolve_path_if_exists(ignore_file, "Ignore file")
58+
whitelist_path = _resolve_path_if_exists(whitelist_file, "Whitelist file")
4259

4360
ctx = TreeBuildContext(
4461
base_dir=root_dir,
@@ -51,7 +68,7 @@ def map_directory(
5168
)
5269

5370
return {
54-
"name": root_dir.name,
71+
"name": _root_display_name(path, root_dir),
5572
"type": "directory",
5673
"children": build_tree(root_dir, ctx),
5774
}

0 commit comments

Comments
 (0)