Skip to content

Commit f919fb9

Browse files
author
jgstern-agent
committed
feat(ir): add is_example_file + is_config_file booleans to Symbol
Per the WI-gumoz-zoboz audit verdict (DOCUMENT + EXTEND), surface two sibling role flags on Symbol that mirror is_test_file (WI-rigun) and is_generated_file (WI-tizij): - is_example_file: True when the file matches an EXAMPLE_PATTERN (examples/, demos/, samples/, tutorials/). - is_config_file: True when the basename matches a known dependency/build manifest (pyproject.toml, package.json, Cargo.toml, go.mod, etc.). The integer supply_chain_tier is unchanged. Within tier 2 (INTERNAL_DEP), the role flags is_test_file / is_example_file / is_config_file are mutually exclusive — at most one is True per Symbol — making the four-way distinction inside tier 2 derivable as the XOR of the bits. The is_config bit is suppressed on a file when is_test or is_example already fires (e.g., examples/foo/package.json is is_example, not is_config) to preserve mutual exclusion. Wired through Symbol.to_dict / from_dict (round-trips via supply_chain sub-object) and both supply-chain-classification call sites in cli.py (_classify_symbols) and sketch.py. Also: closes a pre-existing per-package isolation coverage gap on mainstream/py.py:1724 by adding a _detect_source_roots test covering the skip-list / dot-prefixed directory branch (verified existed on plain dev before this PR). Closes WI-jobuj. Signed-off-by: jgstern-agent <josh-agent@iterabloom.com>
1 parent 3b45c0d commit f919fb9

7 files changed

Lines changed: 392 additions & 14 deletions

File tree

.ci/affected-tests.txt

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
# Test selection manifest
2-
# Generated by smart-test at 2026-05-02T04:22:28-04:00
2+
# Generated by smart-test at 2026-05-02T06:02:41-04:00
33
# Mode: targeted
4-
# Baseline: 989a35bd7dc16209d641af307605bdcfc6907a11
5-
# Changed files: 6
6-
# Changed source files: 2
7-
# Selected tests: 168
4+
# Baseline: 3b45c0d3eddc739bf5c14a3c7eeed6167aee64ff
5+
# Changed files: 9
6+
# Changed source files: 4
7+
# Selected tests: 171
88
#
99
# === CHANGED_SOURCE_FILES ===
10-
packages/hypergumbo-core/src/hypergumbo_core/datamodels.py
10+
packages/hypergumbo-core/src/hypergumbo_core/cli.py
1111
packages/hypergumbo-core/src/hypergumbo_core/ir.py
12+
packages/hypergumbo-core/src/hypergumbo_core/sketch.py
13+
packages/hypergumbo-core/src/hypergumbo_core/supply_chain.py
1214
# === SELECTED_TESTS ===
1315
packages/hypergumbo-core/tests/BRANCHES_test_compact.py
1416
packages/hypergumbo-core/tests/BRANCHES_test_database_query.py
@@ -70,6 +72,7 @@ packages/hypergumbo-core/tests/test_grpc_linker.py
7072
packages/hypergumbo-core/tests/test_handler_slices.py
7173
packages/hypergumbo-core/tests/test_http_linker.py
7274
packages/hypergumbo-core/tests/test_inheritance_linker.py
75+
packages/hypergumbo-core/tests/test_install_extras.py
7376
packages/hypergumbo-core/tests/test_ipc_linker.py
7477
packages/hypergumbo-core/tests/test_ir.py
7578
packages/hypergumbo-core/tests/test_jackson_dispatch_linker.py
@@ -102,6 +105,7 @@ packages/hypergumbo-core/tests/test_route_handler_linker.py
102105
packages/hypergumbo-core/tests/test_router_routes_linker.py
103106
packages/hypergumbo-core/tests/test_rubyffi_linker.py
104107
packages/hypergumbo-core/tests/test_run_behavior_map.py
108+
packages/hypergumbo-core/tests/test_rust_analyzer_install.py
105109
packages/hypergumbo-core/tests/test_rust_trait_dispatch_linker.py
106110
packages/hypergumbo-core/tests/test_schema_compliance.py
107111
packages/hypergumbo-core/tests/test_schema.py
@@ -163,6 +167,7 @@ packages/hypergumbo-lang-mainstream/tests/test_html_analysis.py
163167
packages/hypergumbo-lang-mainstream/tests/test_java.py
164168
packages/hypergumbo-lang-mainstream/tests/test_js_ts.py
165169
packages/hypergumbo-lang-mainstream/tests/test_jupyter.py
170+
packages/hypergumbo-lang-mainstream/tests/test_jvm_deps.py
166171
packages/hypergumbo-lang-mainstream/tests/test_kotlin.py
167172
packages/hypergumbo-lang-mainstream/tests/test_manifest_targets.py
168173
packages/hypergumbo-lang-mainstream/tests/test_markdown.py

packages/hypergumbo-core/src/hypergumbo_core/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6094,6 +6094,8 @@ def _classify_symbols(
60946094
symbol.supply_chain_tier = classification.tier.value
60956095
symbol.supply_chain_reason = classification.reason
60966096
symbol.is_test_file = classification.is_test
6097+
symbol.is_example_file = classification.is_example
6098+
symbol.is_config_file = classification.is_config
60976099
symbol.is_generated_file = classification.is_generated
60986100
# WI-zimum: fold in modifier-derived export signal. The analyzer
60996101
# may have already set Symbol.is_exported at extraction time

packages/hypergumbo-core/src/hypergumbo_core/ir.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,19 @@ class Symbol:
223223
supply_chain_tier: Position in dependency graph (1=first_party, 2=internal_dep,
224224
3=external_dep, 4=derived). See §14 of spec.
225225
supply_chain_reason: Why this tier was assigned (e.g., "matches ^src/")
226+
is_test_file: True if the file holds test code (WI-rigun). Independent
227+
of tier — co-located test files can be tier 1.
228+
is_example_file: True if the file is example/demo/sample/tutorial code
229+
(WI-jobuj). Set when the path matches an EXAMPLE_PATTERN.
230+
is_config_file: True if the file is a dependency/build manifest such as
231+
``pyproject.toml`` / ``package.json`` / ``Cargo.toml`` (WI-jobuj).
232+
Within tier 2, ``is_test_file`` / ``is_example_file`` /
233+
``is_config_file`` are mutually exclusive — at most one is True
234+
per Symbol.
235+
is_generated_file: True if the file is generated code (WI-tizij).
236+
Independent of the role flags above.
237+
is_exported: True if the symbol is part of the package's public API
238+
(WI-zimum).
226239
cyclomatic_complexity: McCabe cyclomatic complexity (decision points + 1).
227240
Counts if/elif/else, for, while, except, with, and/or, match/case.
228241
lines_of_code: Number of source lines in the symbol body (end_line - start_line + 1).
@@ -251,6 +264,8 @@ class Symbol:
251264
supply_chain_tier: int = 1 # Default to first_party
252265
supply_chain_reason: str = ""
253266
is_test_file: bool = False # WI-rigun: independent of tier
267+
is_example_file: bool = False # WI-jobuj: example/demo/sample/tutorial code
268+
is_config_file: bool = False # WI-jobuj: dependency/build manifest
254269
is_generated_file: bool = False # WI-tizij: generated code flag
255270
is_exported: bool = False # WI-zimum: public API / externally reachable
256271
cyclomatic_complexity: Optional[int] = None
@@ -291,6 +306,8 @@ def to_dict(self) -> dict:
291306
"tier_name": _TIER_NAMES.get(self.supply_chain_tier, "first_party"),
292307
"reason": self.supply_chain_reason,
293308
"is_test_file": self.is_test_file,
309+
"is_example_file": self.is_example_file,
310+
"is_config_file": self.is_config_file,
294311
"is_generated_file": self.is_generated_file,
295312
"is_exported": self.is_exported,
296313
},
@@ -325,6 +342,8 @@ def from_dict(cls, d: dict) -> "Symbol":
325342
supply_chain_tier=supply_chain.get("tier", 1),
326343
supply_chain_reason=supply_chain.get("reason", ""),
327344
is_test_file=supply_chain.get("is_test_file", False),
345+
is_example_file=supply_chain.get("is_example_file", False),
346+
is_config_file=supply_chain.get("is_config_file", False),
328347
is_generated_file=supply_chain.get("is_generated_file", False),
329348
is_exported=supply_chain.get("is_exported", False),
330349
cyclomatic_complexity=d.get("cyclomatic_complexity"),

packages/hypergumbo-core/src/hypergumbo_core/sketch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5025,6 +5025,8 @@ def _run_analysis(
50255025
symbol.supply_chain_tier = classification.tier.value
50265026
symbol.supply_chain_reason = classification.reason
50275027
symbol.is_test_file = classification.is_test
5028+
symbol.is_example_file = classification.is_example
5029+
symbol.is_config_file = classification.is_config
50285030
symbol.is_generated_file = classification.is_generated
50295031
# WI-zimum: fold in modifier-derived export signal. Analyzer
50305032
# may have already set is_exported (WI-gipag Python __all__);

packages/hypergumbo-core/src/hypergumbo_core/supply_chain.py

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,19 +163,28 @@ def from_dict(cls, data: dict) -> "SupplyChainConfig":
163163
class FileClassification:
164164
"""Classification result for a file.
165165
166-
`tier` and `is_test` are independent axes (WI-rigun-patuz). The
167-
classifier may demote a file to INTERNAL_DEP because it is a test
168-
file, but consumers that want a clean 'real third-party deps' or
169-
'production code only' query should compose `tier` with `is_test`
170-
rather than re-deriving test-ness from path patterns. The historical
171-
'tests = tier 2' framing (89154fa20) is preserved on `tier`; the new
172-
`is_test` flag exposes the test-detection result as a separate bit.
166+
`tier` and the boolean role flags (`is_test`, `is_example`,
167+
`is_config`, `is_generated`) are independent axes (WI-rigun-patuz,
168+
WI-jobuj). The classifier may demote a file to INTERNAL_DEP because
169+
it is a test/example/config file, but consumers that want a clean
170+
'real third-party deps' or 'production code only' query should
171+
compose `tier` with the role flag rather than re-deriving the role
172+
from path patterns. The historical 'tests = tier 2' framing
173+
(89154fa20) is preserved on `tier`; the role flags expose the
174+
detection results as separate bits.
175+
176+
Within tier 2 (INTERNAL_DEP), at most one of `is_test`,
177+
`is_example`, `is_config` is True per file — the four-way
178+
distinction (test / example / config / pure internal_dep) is
179+
derivable from those bits.
173180
"""
174181

175182
tier: Tier
176183
reason: str
177184
package_name: Optional[str] = None
178185
is_test: bool = False
186+
is_example: bool = False
187+
is_config: bool = False
179188
is_generated: bool = False
180189

181190

@@ -295,6 +304,46 @@ class FileClassification:
295304
r"^tutorials?/", # tutorials/ or tutorial/
296305
]
297306

307+
# WI-jobuj: dependency/build manifest filenames. Files matching these
308+
# names are flagged with ``is_config=True`` independently of tier (see
309+
# FileClassification). The set is intentionally narrow — canonical
310+
# package-manager and build-tool manifests, not arbitrary dotfiles —
311+
# so the bit means "this file declares dependencies / build config",
312+
# not "this file happens to configure something."
313+
CONFIG_FILE_NAMES = frozenset({
314+
# Python
315+
"pyproject.toml",
316+
"setup.py",
317+
"setup.cfg",
318+
"Pipfile",
319+
"Pipfile.lock",
320+
# Node
321+
"package.json",
322+
"package-lock.json",
323+
"yarn.lock",
324+
# Rust
325+
"Cargo.toml",
326+
"Cargo.lock",
327+
# Go
328+
"go.mod",
329+
"go.sum",
330+
# Ruby
331+
"Gemfile",
332+
"Gemfile.lock",
333+
# Java / Kotlin / JVM
334+
"pom.xml",
335+
"build.gradle",
336+
"build.gradle.kts",
337+
"settings.gradle",
338+
"settings.gradle.kts",
339+
# PHP
340+
"composer.json",
341+
"composer.lock",
342+
# Elixir
343+
"mix.exs",
344+
"mix.lock",
345+
})
346+
298347
# Patterns for documentation directories (tier 2) — not production code.
299348
# Checked with re.search to match at any depth (e.g., Sources/Lib/Documentation.docc/).
300349
# Swift DocC (.docc) bundles contain tutorial fragments, articles, and extension files
@@ -397,6 +446,18 @@ def _is_generated_file(rel_path: str) -> bool:
397446
return False
398447

399448

449+
def _is_config_file(rel_path: str) -> bool:
450+
"""Check if a file path matches a known dependency/build manifest filename.
451+
452+
WI-jobuj: matches the basename against ``CONFIG_FILE_NAMES``.
453+
Path-position-agnostic — a ``pyproject.toml`` at the repo root and
454+
one inside a workspace are both flagged.
455+
"""
456+
rel_norm = rel_path.replace("\\", "/")
457+
basename = rel_norm.rsplit("/", 1)[-1]
458+
return basename in CONFIG_FILE_NAMES
459+
460+
400461
def _has_generated_header(path: Path) -> bool:
401462
"""Check the first few KB of *path* for a generated-file marker.
402463
@@ -437,6 +498,11 @@ def classify_file(
437498
FileClassification with tier, reason, and optional package_name.
438499
The ``is_generated`` flag is set independently of tier when the
439500
file path matches known generated-code patterns (WI-tizij).
501+
The ``is_config`` flag (WI-jobuj) is set independently of tier
502+
when the basename matches a dependency/build manifest filename
503+
(``pyproject.toml``, ``package.json``, ``Cargo.toml``, etc.) —
504+
but NOT when ``is_test`` or ``is_example`` is already True, so
505+
the role flags remain mutually exclusive within tier 2.
440506
"""
441507
# Get relative path for pattern matching
442508
try:
@@ -449,10 +515,16 @@ def classify_file(
449515
# WI-pofin: fall back to a content-based header scan when the path
450516
# is unambiguous. Path check first because it's cheaper.
451517
generated = _is_generated_file(rel) or _has_generated_header(path)
518+
config_filename = _is_config_file(rel)
452519

453520
result = _classify_file_core(rel, path, repo_root, package_roots, config)
454521
if generated:
455522
result.is_generated = True
523+
# WI-jobuj: is_config is mutually exclusive with is_test / is_example
524+
# within tier 2 (test and example detection wins on a tie, e.g., a
525+
# package.json under examples/ is is_example=True, not is_config).
526+
if config_filename and not result.is_test and not result.is_example:
527+
result.is_config = True
456528
return result
457529

458530

@@ -502,7 +574,9 @@ def _classify_file_core(
502574
# 4. Check example/demo patterns (lower priority than workspace packages)
503575
for pattern in EXAMPLE_PATTERNS:
504576
if re.match(pattern, rel):
505-
return FileClassification(Tier.INTERNAL_DEP, f"path matches {pattern}")
577+
return FileClassification(
578+
Tier.INTERNAL_DEP, f"path matches {pattern}", is_example=True
579+
)
506580

507581
# 4b. Check documentation patterns (DocC bundles, etc.)
508582
for pattern in DOCUMENTATION_PATTERNS:

0 commit comments

Comments
 (0)