Skip to content

Commit acc4ccb

Browse files
committed
fix: ensure changed files appear in diffctx output, reduce noise
1 parent bd23e08 commit acc4ccb

19 files changed

Lines changed: 935 additions & 1 deletion

src/treemapper/diffctx/__init__.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def _apply_same_file_floor(
179179
rel[frag.id] = _SAME_FILE_FLOOR
180180

181181

182-
_HUB_REVERSE_THRESHOLD = 5
182+
_HUB_REVERSE_THRESHOLD = 3
183183

184184

185185
def _find_hub_noise_paths(
@@ -292,6 +292,66 @@ def _cap_context_fragments(
292292
return result
293293

294294

295+
_LOW_RELEVANCE_THRESHOLD = 0.005
296+
297+
298+
def _filter_low_relevance_fragments(
299+
fragments: list[Fragment],
300+
core_ids: set[FragmentId],
301+
rel: dict[FragmentId, float],
302+
) -> list[Fragment]:
303+
changed_paths = {fid.path for fid in core_ids}
304+
kept = [
305+
f
306+
for f in fragments
307+
if f.path in changed_paths or rel.get(f.id, 0.0) >= _LOW_RELEVANCE_THRESHOLD
308+
]
309+
removed = len(fragments) - len(kept)
310+
if removed:
311+
logging.debug("diffctx: filtered %d low-relevance fragments (threshold=%.4f)", removed, _LOW_RELEVANCE_THRESHOLD)
312+
return kept
313+
314+
315+
def _ensure_changed_files_represented(
316+
selected: list[Fragment],
317+
all_fragments: list[Fragment],
318+
changed_files: list[Path],
319+
remaining_budget: int,
320+
) -> list[Fragment]:
321+
selected_paths = {f.path for f in selected}
322+
changed_paths = set(changed_files)
323+
missing_paths = changed_paths - selected_paths
324+
325+
if not missing_paths:
326+
return selected
327+
328+
frags_by_path: dict[Path, list[Fragment]] = defaultdict(list)
329+
for f in all_fragments:
330+
if f.path in missing_paths:
331+
frags_by_path[f.path].append(f)
332+
333+
added: list[Fragment] = []
334+
budget_left = remaining_budget
335+
selected_ids = {f.id for f in selected}
336+
337+
for path in sorted(missing_paths):
338+
candidates = frags_by_path.get(path, [])
339+
if not candidates:
340+
continue
341+
best = max(candidates, key=lambda f: f.token_count if f.token_count > 0 else 0)
342+
if best.token_count <= 0 or best.id in selected_ids:
343+
continue
344+
if best.token_count <= budget_left:
345+
added.append(best)
346+
selected_ids.add(best.id)
347+
budget_left -= best.token_count
348+
349+
if added:
350+
logging.debug("diffctx: injected %d fragments to cover %d missing changed files", len(added), len(missing_paths))
351+
352+
return selected + added
353+
354+
295355
def _select_with_ppr(
296356
all_fragments: list[Fragment],
297357
core_ids: set[FragmentId],
@@ -307,6 +367,7 @@ def _select_with_ppr(
307367
_apply_same_file_floor(rel_scores, core_ids, all_fragments)
308368

309369
filtered_fragments = _filter_unrelated_fragments(all_fragments, core_ids, graph)
370+
filtered_fragments = _filter_low_relevance_fragments(filtered_fragments, core_ids, rel_scores)
310371
filtered_fragments = _cap_context_fragments(filtered_fragments, core_ids, rel_scores)
311372

312373
needs = needs_from_diff(filtered_fragments, core_ids, graph, diff_text)
@@ -410,6 +471,9 @@ def build_diff_context(
410471
repo_root=root_dir,
411472
seed_weights=seed_weights,
412473
)
474+
effective_budget = budget_tokens if budget_tokens is not None else _UNLIMITED_BUDGET
475+
remaining = effective_budget - result.used_tokens
476+
selected = _ensure_changed_files_represented(selected, all_fragments, changed_files, remaining)
413477
_log_ppr_mode(selected, core_ids, budget_tokens, result, alpha, tau)
414478

415479
if no_content:
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
src/repository.py: |
2+
from typing import List, Optional
3+
4+
class UserRepository:
5+
def __init__(self, db_connection):
6+
self.db = db_connection
7+
8+
def find_by_id(self, user_id: int) -> Optional[dict]:
9+
return self.db.query("SELECT * FROM users WHERE id = %s", user_id)
10+
11+
def find_all(self) -> List[dict]:
12+
return self.db.query("SELECT * FROM users")
13+
14+
def create(self, user_data: dict) -> int:
15+
return self.db.insert("users", user_data)
16+
17+
def update(self, user_id: int, user_data: dict) -> bool:
18+
return self.db.update("users", user_id, user_data)
19+
20+
def delete(self, user_id: int) -> bool:
21+
return self.db.delete("users", user_id)
22+
23+
src/models.py: |
24+
from dataclasses import dataclass
25+
from typing import Optional
26+
from datetime import datetime
27+
28+
@dataclass
29+
class User:
30+
id: int
31+
name: str
32+
email: str
33+
created_at: datetime
34+
updated_at: Optional[datetime] = None
35+
36+
@dataclass
37+
class UserProfile:
38+
user_id: int
39+
bio: Optional[str] = None
40+
avatar_url: Optional[str] = None
41+
42+
src/validators.py: |
43+
import re
44+
from typing import List
45+
46+
def validate_email(email: str) -> bool:
47+
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
48+
return bool(re.match(pattern, email))
49+
50+
def validate_username(name: str) -> bool:
51+
return len(name) >= 3 and len(name) <= 50
52+
53+
def validate_password(password: str) -> List[str]:
54+
errors = []
55+
if len(password) < 8:
56+
errors.append("Password must be at least 8 characters")
57+
if not any(c.isupper() for c in password):
58+
errors.append("Password must contain uppercase letter")
59+
return errors
60+
61+
src/user_service.py: |
62+
from typing import List, Optional
63+
from src.repository import UserRepository
64+
from src.models import User, UserProfile
65+
from src.validators import validate_email, validate_username, validate_password
66+
67+
class UserService:
68+
def __init__(self, repository: UserRepository):
69+
self.repository = repository
70+
71+
def register_user(self, name: str, email: str, password: str) -> User:
72+
if not validate_email(email):
73+
raise ValueError("Invalid email address")
74+
if not validate_username(name):
75+
raise ValueError("Invalid username")
76+
password_errors = validate_password(password)
77+
if password_errors:
78+
raise ValueError(", ".join(password_errors))
79+
user_data = {"name": name, "email": email, "password": password}
80+
user_id = self.repository.create(user_data)
81+
return User(id=user_id, name=name, email=email, created_at=None)
82+
83+
def get_user(self, user_id: int) -> Optional[User]:
84+
data = self.repository.find_by_id(user_id)
85+
if data is None:
86+
return None
87+
return User(**data)
88+
89+
def list_users(self) -> List[User]:
90+
rows = self.repository.find_all()
91+
return [User(**row) for row in rows]
92+
93+
def update_user(self, user_id: int, name: str, email: str) -> bool:
94+
if not validate_email(email):
95+
raise ValueError("Invalid email address")
96+
if not validate_username(name):
97+
raise ValueError("Invalid username")
98+
return self.repository.update(user_id, {"name": name, "email": email})
99+
100+
def delete_user(self, user_id: int) -> bool:
101+
return self.repository.delete(user_id)
102+
103+
def get_user_profile(self, user_id: int) -> Optional[UserProfile]:
104+
data = self.repository.find_by_id(user_id)
105+
if data is None:
106+
return None
107+
return UserProfile(user_id=user_id, bio=data.get("bio"), avatar_url=data.get("avatar_url"))
108+
109+
scripts/GARBAGE_REG014_seed_data.py: |
110+
GARBAGE_REG014_SEED_MARKER = "seed_data_garbage"
111+
def GARBAGE_REG014_populate_database():
112+
return "GARBAGE_REG014_POPULATE_RESULT"
113+
114+
scripts/GARBAGE_REG014_migrate.py: |
115+
GARBAGE_REG014_MIGRATE_MARKER = "migrate_garbage"
116+
def GARBAGE_REG014_run_migrations():
117+
return "GARBAGE_REG014_MIGRATION_RESULT"
118+
119+
utils/GARBAGE_REG014_helpers.py: |
120+
GARBAGE_REG014_HELPER_MARKER = "helper_garbage"
121+
def GARBAGE_REG014_format_output():
122+
return "GARBAGE_REG014_FORMAT_RESULT"
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
src/repository.py: |
2+
from typing import List, Optional
3+
4+
class UserRepository:
5+
def __init__(self, db_connection):
6+
self.db = db_connection
7+
8+
def find_by_id(self, user_id: int) -> Optional[dict]:
9+
return self.db.query("SELECT * FROM users WHERE id = %s", user_id)
10+
11+
def find_all(self) -> List[dict]:
12+
return self.db.query("SELECT * FROM users")
13+
14+
def create(self, user_data: dict) -> int:
15+
return self.db.insert("users", user_data)
16+
17+
def update(self, user_id: int, user_data: dict) -> bool:
18+
return self.db.update("users", user_id, user_data)
19+
20+
def delete(self, user_id: int) -> bool:
21+
return self.db.delete("users", user_id)
22+
23+
src/models.py: |
24+
from dataclasses import dataclass
25+
from typing import Optional
26+
from datetime import datetime
27+
28+
@dataclass
29+
class User:
30+
id: int
31+
name: str
32+
email: str
33+
created_at: datetime
34+
updated_at: Optional[datetime] = None
35+
36+
@dataclass
37+
class UserProfile:
38+
user_id: int
39+
bio: Optional[str] = None
40+
avatar_url: Optional[str] = None
41+
42+
src/validators.py: |
43+
import re
44+
from typing import List
45+
46+
def validate_email(email: str) -> bool:
47+
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
48+
return bool(re.match(pattern, email))
49+
50+
def validate_username(name: str) -> bool:
51+
return len(name) >= 3 and len(name) <= 50
52+
53+
def validate_password(password: str) -> List[str]:
54+
errors = []
55+
if len(password) < 8:
56+
errors.append("Password must be at least 8 characters")
57+
if not any(c.isupper() for c in password):
58+
errors.append("Password must contain uppercase letter")
59+
return errors
60+
61+
scripts/GARBAGE_REG014_seed_data.py: |
62+
GARBAGE_REG014_SEED_MARKER = "seed_data_garbage"
63+
def GARBAGE_REG014_populate_database():
64+
return "GARBAGE_REG014_POPULATE_RESULT"
65+
66+
scripts/GARBAGE_REG014_migrate.py: |
67+
GARBAGE_REG014_MIGRATE_MARKER = "migrate_garbage"
68+
def GARBAGE_REG014_run_migrations():
69+
return "GARBAGE_REG014_MIGRATION_RESULT"
70+
71+
utils/GARBAGE_REG014_helpers.py: |
72+
GARBAGE_REG014_HELPER_MARKER = "helper_garbage"
73+
def GARBAGE_REG014_format_output():
74+
return "GARBAGE_REG014_FORMAT_RESULT"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
must_include_files:
2+
- src/user_service.py
3+
must_include_content:
4+
- class UserService
5+
- def register_user
6+
- def get_user
7+
must_not_include:
8+
- GARBAGE_REG014_SEED_MARKER
9+
- GARBAGE_REG014_POPULATE_RESULT
10+
- GARBAGE_REG014_MIGRATE_MARKER
11+
- GARBAGE_REG014_MIGRATION_RESULT
12+
- GARBAGE_REG014_HELPER_MARKER
13+
- GARBAGE_REG014_FORMAT_RESULT
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
src/app.py: |
2+
from flask import Flask
3+
4+
def create_app():
5+
app = Flask(__name__)
6+
app.config["DEBUG"] = False
7+
return app
8+
9+
def run_server(app):
10+
app.run(host="0.0.0.0", port=8080)
11+
12+
src/routes.py: |
13+
from flask import jsonify
14+
15+
def register_routes(app):
16+
@app.route("/health")
17+
def health():
18+
return jsonify({"status": "ok"})
19+
20+
@app.route("/api/version")
21+
def version():
22+
return jsonify({"version": "1.0.0"})
23+
24+
.github/workflows/ci.yml: |
25+
name: CI
26+
on:
27+
push:
28+
branches: [main]
29+
pull_request:
30+
branches: [main]
31+
jobs:
32+
lint:
33+
runs-on: ubuntu-latest
34+
steps:
35+
- uses: actions/checkout@v4
36+
- name: Install dependencies
37+
run: pip install ruff
38+
- name: Run linter
39+
run: ruff check .
40+
test:
41+
runs-on: ubuntu-latest
42+
needs: lint
43+
steps:
44+
- uses: actions/checkout@v4
45+
- name: Install dependencies
46+
run: pip install -e ".[dev]"
47+
- name: Run tests
48+
run: pytest --tb=short
49+
50+
README.md: |
51+
# My Application
52+
53+
A web application built with Flask.
54+
55+
## Installation
56+
57+
```bash
58+
pip install -e ".[dev]"
59+
```
60+
61+
## Usage
62+
63+
```bash
64+
python -m src.app
65+
```
66+
67+
## Development
68+
69+
Run tests with:
70+
71+
```bash
72+
pytest
73+
```
74+
75+
frontend/GARBAGE_REG015_component.tsx: |
76+
export const GARBAGE_REG015_COMPONENT_MARKER = "garbage_component";
77+
export function GARBAGE_REG015_RenderWidget() {
78+
return "GARBAGE_REG015_WIDGET_RESULT";
79+
}
80+
81+
frontend/GARBAGE_REG015_styles.css: |
82+
.GARBAGE_REG015_STYLE_MARKER { color: red; }
83+
84+
data/GARBAGE_REG015_fixtures.json: |
85+
{"GARBAGE_REG015_JSON_MARKER": "garbage_json_value"}

0 commit comments

Comments
 (0)